1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24 
25 *****************************************************************************/
26 
27 /**************************************************//**
28 @file fil/fil0fil.cc
29 The tablespace memory cache
30 
31 Created 10/25/1995 Heikki Tuuri
32 *******************************************************/
33 
34 #include "fil0fil.h"
35 
36 #include <debug_sync.h>
37 #include <my_dbug.h>
38 
39 #include "mem0mem.h"
40 #include "hash0hash.h"
41 #include "os0file.h"
42 #include "mach0data.h"
43 #include "buf0buf.h"
44 #include "buf0flu.h"
45 #include "log0recv.h"
46 #include "fsp0fsp.h"
47 #include "srv0srv.h"
48 #include "srv0start.h"
49 #include "mtr0mtr.h"
50 #include "mtr0log.h"
51 #include "dict0dict.h"
52 #include "page0page.h"
53 #include "page0zip.h"
54 #include "trx0sys.h"
55 #include "row0mysql.h"
56 #ifndef UNIV_HOTBACKUP
57 # include "buf0lru.h"
58 # include "ibuf0ibuf.h"
59 # include "sync0sync.h"
60 # include "os0sync.h"
61 #else /* !UNIV_HOTBACKUP */
62 # include "srv0srv.h"
63 static ulint srv_data_read, srv_data_written;
64 #endif /* !UNIV_HOTBACKUP */
65 
66 /*
67 		IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
68 		=============================================
69 
70 The tablespace cache is responsible for providing fast read/write access to
71 tablespaces and logs of the database. File creation and deletion is done
72 in other modules which know more of the logic of the operation, however.
73 
74 A tablespace consists of a chain of files. The size of the files does not
75 have to be divisible by the database block size, because we may just leave
76 the last incomplete block unused. When a new file is appended to the
77 tablespace, the maximum size of the file is also specified. At the moment,
78 we think that it is best to extend the file to its maximum size already at
79 the creation of the file, because then we can avoid dynamically extending
80 the file when more space is needed for the tablespace.
81 
82 A block's position in the tablespace is specified with a 32-bit unsigned
83 integer. The files in the chain are thought to be catenated, and the block
84 corresponding to an address n is the nth block in the catenated file (where
85 the first block is named the 0th block, and the incomplete block fragments
86 at the end of files are not taken into account). A tablespace can be extended
87 by appending a new file at the end of the chain.
88 
89 Our tablespace concept is similar to the one of Oracle.
90 
91 To acquire more speed in disk transfers, a technique called disk striping is
92 sometimes used. This means that logical block addresses are divided in a
93 round-robin fashion across several disks. Windows NT supports disk striping,
94 so there we do not need to support it in the database. Disk striping is
95 implemented in hardware in RAID disks. We conclude that it is not necessary
96 to implement it in the database. Oracle 7 does not support disk striping,
97 either.
98 
99 Another trick used at some database sites is replacing tablespace files by
100 raw disks, that is, the whole physical disk drive, or a partition of it, is
101 opened as a single file, and it is accessed through byte offsets calculated
102 from the start of the disk or the partition. This is recommended in some
103 books on database tuning to achieve more speed in i/o. Using raw disk
104 certainly prevents the OS from fragmenting disk space, but it is not clear
105 if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file
106 system + EIDE Conner disk only a negligible difference in speed when reading
107 from a file, versus reading from a raw disk.
108 
109 To have fast access to a tablespace or a log file, we put the data structures
110 to a hash table. Each tablespace and log file is given an unique 32-bit
111 identifier.
112 
113 Some operating systems do not support many open files at the same time,
114 though NT seems to tolerate at least 900 open files. Therefore, we put the
115 open files in an LRU-list. If we need to open another file, we may close the
116 file at the end of the LRU-list. When an i/o-operation is pending on a file,
117 the file cannot be closed. We take the file nodes with pending i/o-operations
118 out of the LRU-list and keep a count of pending operations. When an operation
119 completes, we decrement the count and return the file node to the LRU-list if
120 the count drops to zero. */
121 
122 /** When mysqld is run, the default directory "." is the mysqld datadir,
123 but in the MySQL Embedded Server Library and mysqlbackup it is not the default
124 directory, and we must set the base file path explicitly */
125 UNIV_INTERN const char*	fil_path_to_mysql_datadir	= ".";
126 
127 /** The number of fsyncs done to the log */
128 UNIV_INTERN ulint	fil_n_log_flushes			= 0;
129 
130 /** Number of pending redo log flushes */
131 UNIV_INTERN ulint	fil_n_pending_log_flushes		= 0;
132 /** Number of pending tablespace flushes */
133 UNIV_INTERN ulint	fil_n_pending_tablespace_flushes	= 0;
134 
135 /** Number of files currently open */
136 UNIV_INTERN ulint	fil_n_file_opened			= 0;
137 
138 /** The null file address */
139 UNIV_INTERN fil_addr_t	fil_addr_null = {FIL_NULL, 0};
140 
141 #ifdef UNIV_PFS_MUTEX
142 /* Key to register fil_system_mutex with performance schema */
143 UNIV_INTERN mysql_pfs_key_t	fil_system_mutex_key;
144 #endif /* UNIV_PFS_MUTEX */
145 
146 #ifdef UNIV_PFS_RWLOCK
147 /* Key to register file space latch with performance schema */
148 UNIV_INTERN mysql_pfs_key_t	fil_space_latch_key;
149 #endif /* UNIV_PFS_RWLOCK */
150 
151 /** File node of a tablespace or the log data space */
152 struct fil_node_t {
153 	fil_space_t*	space;	/*!< backpointer to the space where this node
154 				belongs */
155 	char*		name;	/*!< path to the file */
156 	ibool		open;	/*!< TRUE if file open */
157 	pfs_os_file_t	handle;	/*!< OS handle to the file, if file open */
158 	os_event_t	sync_event;/*!< Condition event to group and
159 				serialize calls to fsync */
160 	ibool		is_raw_disk;/*!< TRUE if the 'file' is actually a raw
161 				device or a raw disk partition */
162 	ulint		size;	/*!< size of the file in database pages, 0 if
163 				not known yet; the possible last incomplete
164 				megabyte may be ignored if space == 0 */
165 	ulint		n_pending;
166 				/*!< count of pending i/o's on this file;
167 				closing of the file is not allowed if
168 				this is > 0 */
169 	ulint		n_pending_flushes;
170 				/*!< count of pending flushes on this file;
171 				closing of the file is not allowed if
172 				this is > 0 */
173 	ibool		being_extended;
174 				/*!< TRUE if the node is currently
175 				being extended. */
176 	ib_int64_t	modification_counter;/*!< when we write to the file we
177 				increment this by one */
178 	ib_int64_t	flush_counter;/*!< up to what
179 				modification_counter value we have
180 				flushed the modifications to disk */
181 	UT_LIST_NODE_T(fil_node_t) chain;
182 				/*!< link field for the file chain */
183 	UT_LIST_NODE_T(fil_node_t) LRU;
184 				/*!< link field for the LRU list */
185 	ulint		magic_n;/*!< FIL_NODE_MAGIC_N */
186 };
187 
188 /** Value of fil_node_t::magic_n */
189 #define	FIL_NODE_MAGIC_N	89389
190 
191 /** Tablespace or log data space: let us call them by a common name space */
192 struct fil_space_t {
193 	char*		name;	/*!< space name = the path to the first file in
194 				it */
195 	ulint		id;	/*!< space id */
196 	ib_int64_t	tablespace_version;
197 				/*!< in DISCARD/IMPORT this timestamp
198 				is used to check if we should ignore
199 				an insert buffer merge request for a
200 				page because it actually was for the
201 				previous incarnation of the space */
202 	ibool		mark;	/*!< this is set to TRUE at database startup if
203 				the space corresponds to a table in the InnoDB
204 				data dictionary; so we can print a warning of
205 				orphaned tablespaces */
206 	ibool		stop_ios;/*!< TRUE if we want to rename the
207 				.ibd file of tablespace and want to
208 				stop temporarily posting of new i/o
209 				requests on the file */
210 	ibool		stop_new_ops;
211 				/*!< we set this TRUE when we start
212 				deleting a single-table tablespace.
213 				When this is set following new ops
214 				are not allowed:
215 				* read IO request
216 				* ibuf merge
217 				* file flush
218 				Note that we can still possibly have
219 				new write operations because we don't
220 				check this flag when doing flush
221 				batches. */
222 	ulint		purpose;/*!< FIL_TABLESPACE, FIL_LOG, or
223 				FIL_ARCH_LOG */
224 	UT_LIST_BASE_NODE_T(fil_node_t) chain;
225 				/*!< base node for the file chain */
226 	ulint		size;	/*!< space size in pages; 0 if a single-table
227 				tablespace whose size we do not know yet;
228 				last incomplete megabytes in data files may be
229 				ignored if space == 0 */
230 	ulint		flags;	/*!< tablespace flags; see
231 				fsp_flags_is_valid(),
232 				fsp_flags_get_zip_size() */
233 	ulint		n_reserved_extents;
234 				/*!< number of reserved free extents for
235 				ongoing operations like B-tree page split */
236 	ulint		n_pending_flushes; /*!< this is positive when flushing
237 				the tablespace to disk; dropping of the
238 				tablespace is forbidden if this is positive */
239 	ulint		n_pending_ops;/*!< this is positive when we
240 				have pending operations against this
241 				tablespace. The pending operations can
242 				be ibuf merges or lock validation code
243 				trying to read a block.
244 				Dropping of the tablespace is forbidden
245 				if this is positive */
246 	hash_node_t	hash;	/*!< hash chain node */
247 	hash_node_t	name_hash;/*!< hash chain the name_hash table */
248 #ifndef UNIV_HOTBACKUP
249 	rw_lock_t	latch;	/*!< latch protecting the file space storage
250 				allocation */
251 #endif /* !UNIV_HOTBACKUP */
252 	UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
253 				/*!< list of spaces with at least one unflushed
254 				file we have written to */
255 	bool		is_in_unflushed_spaces;
256 				/*!< true if this space is currently in
257 				unflushed_spaces */
258 	UT_LIST_NODE_T(fil_space_t) space_list;
259 				/*!< list of all spaces */
260 	ulint		magic_n;/*!< FIL_SPACE_MAGIC_N */
261 };
262 
263 /** Value of fil_space_t::magic_n */
264 #define	FIL_SPACE_MAGIC_N	89472
265 
266 /** The tablespace memory cache; also the totality of logs (the log
267 data space) is stored here; below we talk about tablespaces, but also
268 the ib_logfiles form a 'space' and it is handled here */
269 struct fil_system_t {
270 #ifndef UNIV_HOTBACKUP
271 	ib_mutex_t		mutex;		/*!< The mutex protecting the cache */
272 #endif /* !UNIV_HOTBACKUP */
273 	hash_table_t*	spaces;		/*!< The hash table of spaces in the
274 					system; they are hashed on the space
275 					id */
276 	hash_table_t*	name_hash;	/*!< hash table based on the space
277 					name */
278 	UT_LIST_BASE_NODE_T(fil_node_t) LRU;
279 					/*!< base node for the LRU list of the
280 					most recently used open files with no
281 					pending i/o's; if we start an i/o on
282 					the file, we first remove it from this
283 					list, and return it to the start of
284 					the list when the i/o ends;
285 					log files and the system tablespace are
286 					not put to this list: they are opened
287 					after the startup, and kept open until
288 					shutdown */
289 	UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;
290 					/*!< base node for the list of those
291 					tablespaces whose files contain
292 					unflushed writes; those spaces have
293 					at least one file node where
294 					modification_counter > flush_counter */
295 	ulint		n_open;		/*!< number of files currently open */
296 	ulint		max_n_open;	/*!< n_open is not allowed to exceed
297 					this */
298 	ib_int64_t	modification_counter;/*!< when we write to a file we
299 					increment this by one */
300 	ulint		max_assigned_id;/*!< maximum space id in the existing
301 					tables, or assigned during the time
302 					mysqld has been up; at an InnoDB
303 					startup we scan the data dictionary
304 					and set here the maximum of the
305 					space id's of the tables there */
306 	ib_int64_t	tablespace_version;
307 					/*!< a counter which is incremented for
308 					every space object memory creation;
309 					every space mem object gets a
310 					'timestamp' from this; in DISCARD/
311 					IMPORT this is used to check if we
312 					should ignore an insert buffer merge
313 					request */
314 	UT_LIST_BASE_NODE_T(fil_space_t) space_list;
315 					/*!< list of all file spaces */
316 	ibool		space_id_reuse_warned;
317 					/* !< TRUE if fil_space_create()
318 					has issued a warning about
319 					potential space_id reuse */
320 };
321 
322 /** The tablespace memory cache. This variable is NULL before the module is
323 initialized. */
324 static fil_system_t*	fil_system	= NULL;
325 
326 /** Determine if (i) is a user tablespace id or not. */
327 # define fil_is_user_tablespace_id(i) (i != 0 \
328 				       && !srv_is_undo_tablespace(i))
329 
330 /** Determine if user has explicitly disabled fsync(). */
331 #ifndef __WIN__
332 # define fil_buffering_disabled(s)	\
333 	((s)->purpose == FIL_TABLESPACE	\
334 	 && srv_unix_file_flush_method	\
335 	 == SRV_UNIX_O_DIRECT_NO_FSYNC)
336 #else /* __WIN__ */
337 # define fil_buffering_disabled(s)	(0)
338 #endif /* __WIN__ */
339 
340 #ifdef UNIV_DEBUG
341 /** Try fil_validate() every this many times */
342 # define FIL_VALIDATE_SKIP	17
343 
344 /******************************************************************//**
345 Checks the consistency of the tablespace cache some of the time.
346 @return	TRUE if ok or the check was skipped */
347 static
348 ibool
fil_validate_skip(void)349 fil_validate_skip(void)
350 /*===================*/
351 {
352 	/** The fil_validate() call skip counter. Use a signed type
353 	because of the race condition below. */
354 	static int fil_validate_count = FIL_VALIDATE_SKIP;
355 
356 	/* There is a race condition below, but it does not matter,
357 	because this call is only for heuristic purposes. We want to
358 	reduce the call frequency of the costly fil_validate() check
359 	in debug builds. */
360 	if (--fil_validate_count > 0) {
361 		return(TRUE);
362 	}
363 
364 	fil_validate_count = FIL_VALIDATE_SKIP;
365 	return(fil_validate());
366 }
367 #endif /* UNIV_DEBUG */
368 
369 /********************************************************************//**
370 Determines if a file node belongs to the least-recently-used list.
371 @return TRUE if the file belongs to fil_system->LRU mutex. */
372 UNIV_INLINE
373 ibool
fil_space_belongs_in_lru(const fil_space_t * space)374 fil_space_belongs_in_lru(
375 /*=====================*/
376 	const fil_space_t*	space)	/*!< in: file space */
377 {
378 	return(space->purpose == FIL_TABLESPACE
379 	       && fil_is_user_tablespace_id(space->id));
380 }
381 
382 /********************************************************************//**
383 NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
384 
385 Prepares a file node for i/o. Opens the file if it is closed. Updates the
386 pending i/o's field in the node and the system appropriately. Takes the node
387 off the LRU list if it is in the LRU list. The caller must hold the fil_sys
388 mutex.
389 @return false if the file can't be opened, otherwise true */
390 static
391 bool
392 fil_node_prepare_for_io(
393 /*====================*/
394 	fil_node_t*	node,	/*!< in: file node */
395 	fil_system_t*	system,	/*!< in: tablespace memory cache */
396 	fil_space_t*	space);	/*!< in: space */
397 /********************************************************************//**
398 Updates the data structures when an i/o operation finishes. Updates the
399 pending i/o's field in the node appropriately. */
400 static
401 void
402 fil_node_complete_io(
403 /*=================*/
404 	fil_node_t*	node,	/*!< in: file node */
405 	fil_system_t*	system,	/*!< in: tablespace memory cache */
406 	ulint		type);	/*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
407 				the node as modified if
408 				type == OS_FILE_WRITE */
409 /*******************************************************************//**
410 Frees a space object from the tablespace memory cache. Closes the files in
411 the chain but does not delete them. There must not be any pending i/o's or
412 flushes on the files.
413 @return TRUE on success */
414 static
415 ibool
416 fil_space_free(
417 /*===========*/
418 	ulint		id,		/* in: space id */
419 	ibool		x_latched);	/* in: TRUE if caller has space->latch
420 					in X mode */
421 /********************************************************************//**
422 Reads data from a space to a buffer. Remember that the possible incomplete
423 blocks at the end of file are ignored: they are not taken into account when
424 calculating the byte offset within a space.
425 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
426 i/o on a tablespace which does not exist */
427 UNIV_INLINE
428 dberr_t
fil_read(bool sync,ulint space_id,ulint zip_size,ulint block_offset,ulint byte_offset,ulint len,void * buf,void * message)429 fil_read(
430 /*=====*/
431 	bool	sync,		/*!< in: true if synchronous aio is desired */
432 	ulint	space_id,	/*!< in: space id */
433 	ulint	zip_size,	/*!< in: compressed page size in bytes;
434 				0 for uncompressed pages */
435 	ulint	block_offset,	/*!< in: offset in number of blocks */
436 	ulint	byte_offset,	/*!< in: remainder of offset in bytes; in aio
437 				this must be divisible by the OS block size */
438 	ulint	len,		/*!< in: how many bytes to read; this must not
439 				cross a file boundary; in aio this must be a
440 				block size multiple */
441 	void*	buf,		/*!< in/out: buffer where to store data read;
442 				in aio this must be appropriately aligned */
443 	void*	message)	/*!< in: message for aio handler if non-sync
444 				aio used, else ignored */
445 {
446 	return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
447 					  byte_offset, len, buf, message));
448 }
449 
450 /********************************************************************//**
451 Writes data to a space from a buffer. Remember that the possible incomplete
452 blocks at the end of file are ignored: they are not taken into account when
453 calculating the byte offset within a space.
454 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
455 i/o on a tablespace which does not exist */
456 UNIV_INLINE
457 dberr_t
fil_write(bool sync,ulint space_id,ulint zip_size,ulint block_offset,ulint byte_offset,ulint len,void * buf,void * message)458 fil_write(
459 /*======*/
460 	bool	sync,		/*!< in: true if synchronous aio is desired */
461 	ulint	space_id,	/*!< in: space id */
462 	ulint	zip_size,	/*!< in: compressed page size in bytes;
463 				0 for uncompressed pages */
464 	ulint	block_offset,	/*!< in: offset in number of blocks */
465 	ulint	byte_offset,	/*!< in: remainder of offset in bytes; in aio
466 				this must be divisible by the OS block size */
467 	ulint	len,		/*!< in: how many bytes to write; this must
468 				not cross a file boundary; in aio this must
469 				be a block size multiple */
470 	void*	buf,		/*!< in: buffer from which to write; in aio
471 				this must be appropriately aligned */
472 	void*	message)	/*!< in: message for aio handler if non-sync
473 				aio used, else ignored */
474 {
475 	ut_ad(!srv_read_only_mode);
476 
477 	return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
478 					   byte_offset, len, buf, message));
479 }
480 
481 /*******************************************************************//**
482 Returns the table space by a given id, NULL if not found. */
483 UNIV_INLINE
484 fil_space_t*
fil_space_get_by_id(ulint id)485 fil_space_get_by_id(
486 /*================*/
487 	ulint	id)	/*!< in: space id */
488 {
489 	fil_space_t*	space;
490 
491 	ut_ad(mutex_own(&fil_system->mutex));
492 
493 	HASH_SEARCH(hash, fil_system->spaces, id,
494 		    fil_space_t*, space,
495 		    ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
496 		    space->id == id);
497 
498 	return(space);
499 }
500 
501 /*******************************************************************//**
502 Returns the table space by a given name, NULL if not found. */
503 UNIV_INLINE
504 fil_space_t*
fil_space_get_by_name(const char * name)505 fil_space_get_by_name(
506 /*==================*/
507 	const char*	name)	/*!< in: space name */
508 {
509 	fil_space_t*	space;
510 	ulint		fold;
511 
512 	ut_ad(mutex_own(&fil_system->mutex));
513 
514 	fold = ut_fold_string(name);
515 
516 	HASH_SEARCH(name_hash, fil_system->name_hash, fold,
517 		    fil_space_t*, space,
518 		    ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
519 		    !strcmp(name, space->name));
520 
521 	return(space);
522 }
523 
524 #ifndef UNIV_HOTBACKUP
525 /*******************************************************************//**
526 Returns the version number of a tablespace, -1 if not found.
527 @return version number, -1 if the tablespace does not exist in the
528 memory cache */
529 UNIV_INTERN
530 ib_int64_t
fil_space_get_version(ulint id)531 fil_space_get_version(
532 /*==================*/
533 	ulint	id)	/*!< in: space id */
534 {
535 	fil_space_t*	space;
536 	ib_int64_t	version		= -1;
537 
538 	ut_ad(fil_system);
539 
540 	mutex_enter(&fil_system->mutex);
541 
542 	space = fil_space_get_by_id(id);
543 
544 	if (space) {
545 		version = space->tablespace_version;
546 	}
547 
548 	mutex_exit(&fil_system->mutex);
549 
550 	return(version);
551 }
552 
553 /*******************************************************************//**
554 Returns the latch of a file space.
555 @return	latch protecting storage allocation */
556 UNIV_INTERN
557 rw_lock_t*
fil_space_get_latch(ulint id,ulint * flags)558 fil_space_get_latch(
559 /*================*/
560 	ulint	id,	/*!< in: space id */
561 	ulint*	flags)	/*!< out: tablespace flags */
562 {
563 	fil_space_t*	space;
564 
565 	ut_ad(fil_system);
566 
567 	mutex_enter(&fil_system->mutex);
568 
569 	space = fil_space_get_by_id(id);
570 
571 	ut_a(space);
572 
573 	if (flags) {
574 		*flags = space->flags;
575 	}
576 
577 	mutex_exit(&fil_system->mutex);
578 
579 	return(&(space->latch));
580 }
581 
582 /*******************************************************************//**
583 Returns the type of a file space.
584 @return	FIL_TABLESPACE or FIL_LOG */
585 UNIV_INTERN
586 ulint
fil_space_get_type(ulint id)587 fil_space_get_type(
588 /*===============*/
589 	ulint	id)	/*!< in: space id */
590 {
591 	fil_space_t*	space;
592 
593 	ut_ad(fil_system);
594 
595 	mutex_enter(&fil_system->mutex);
596 
597 	space = fil_space_get_by_id(id);
598 
599 	ut_a(space);
600 
601 	mutex_exit(&fil_system->mutex);
602 
603 	return(space->purpose);
604 }
605 #endif /* !UNIV_HOTBACKUP */
606 
607 /**********************************************************************//**
608 Checks if all the file nodes in a space are flushed. The caller must hold
609 the fil_system mutex.
610 @return	true if all are flushed */
611 static
612 bool
fil_space_is_flushed(fil_space_t * space)613 fil_space_is_flushed(
614 /*=================*/
615 	fil_space_t*	space)	/*!< in: space */
616 {
617 	fil_node_t*	node;
618 
619 	ut_ad(mutex_own(&fil_system->mutex));
620 
621 	node = UT_LIST_GET_FIRST(space->chain);
622 
623 	while (node) {
624 		if (node->modification_counter > node->flush_counter) {
625 
626 			ut_ad(!fil_buffering_disabled(space));
627 			return(false);
628 		}
629 
630 		node = UT_LIST_GET_NEXT(chain, node);
631 	}
632 
633 	return(true);
634 }
635 
636 /*******************************************************************//**
637 Appends a new file to the chain of files of a space. File must be closed.
638 @return pointer to the file name, or NULL on error */
639 UNIV_INTERN
640 char*
fil_node_create(const char * name,ulint size,ulint id,ibool is_raw)641 fil_node_create(
642 /*============*/
643 	const char*	name,	/*!< in: file name (file must be closed) */
644 	ulint		size,	/*!< in: file size in database blocks, rounded
645 				downwards to an integer */
646 	ulint		id,	/*!< in: space id where to append */
647 	ibool		is_raw)	/*!< in: TRUE if a raw device or
648 				a raw disk partition */
649 {
650 	fil_node_t*	node;
651 	fil_space_t*	space;
652 
653 	ut_a(fil_system);
654 	ut_a(name);
655 
656 	mutex_enter(&fil_system->mutex);
657 
658 	node = static_cast<fil_node_t*>(mem_zalloc(sizeof(fil_node_t)));
659 
660 	node->name = mem_strdup(name);
661 
662 	ut_a(!is_raw || srv_start_raw_disk_in_use);
663 
664 	node->sync_event = os_event_create();
665 	node->is_raw_disk = is_raw;
666 	node->size = size;
667 	node->magic_n = FIL_NODE_MAGIC_N;
668 
669 	space = fil_space_get_by_id(id);
670 
671 	if (!space) {
672 		ut_print_timestamp(stderr);
673 		fprintf(stderr,
674 			"  InnoDB: Error: Could not find tablespace %lu for\n"
675 			"InnoDB: file ", (ulong) id);
676 		ut_print_filename(stderr, name);
677 		fputs(" in the tablespace memory cache.\n", stderr);
678 		mem_free(node->name);
679 
680 		mem_free(node);
681 
682 		mutex_exit(&fil_system->mutex);
683 
684 		return(NULL);
685 	}
686 
687 	space->size += size;
688 
689 	node->space = space;
690 
691 	UT_LIST_ADD_LAST(chain, space->chain, node);
692 
693 	if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
694 
695 		fil_system->max_assigned_id = id;
696 	}
697 
698 	mutex_exit(&fil_system->mutex);
699 
700 	return(node->name);
701 }
702 
703 /********************************************************************//**
704 Opens a file of a node of a tablespace. The caller must own the fil_system
705 mutex.
706 @return false if the file can't be opened, otherwise true */
707 static
708 bool
fil_node_open_file(fil_node_t * node,fil_system_t * system,fil_space_t * space)709 fil_node_open_file(
710 /*===============*/
711 	fil_node_t*	node,	/*!< in: file node */
712 	fil_system_t*	system,	/*!< in: tablespace memory cache */
713 	fil_space_t*	space)	/*!< in: space */
714 {
715 	os_offset_t	size_bytes;
716 	ibool		ret;
717 	ibool		success;
718 	byte*		buf2;
719 	byte*		page;
720 	ulint		space_id;
721 	ulint		flags;
722 	ulint		page_size;
723 
724 	ut_ad(mutex_own(&(system->mutex)));
725 	ut_a(node->n_pending == 0);
726 	ut_a(node->open == FALSE);
727 
728 	if (node->size == 0) {
729 		/* It must be a single-table tablespace and we do not know the
730 		size of the file yet. First we open the file in the normal
731 		mode, no async I/O here, for simplicity. Then do some checks,
732 		and close the file again.
733 		NOTE that we could not use the simple file read function
734 		os_file_read() in Windows to read from a file opened for
735 		async I/O! */
736 
737 		node->handle = os_file_create_simple_no_error_handling(
738 			innodb_file_data_key, node->name, OS_FILE_OPEN,
739 			OS_FILE_READ_ONLY, &success);
740 		if (!success) {
741 			/* The following call prints an error message */
742 			os_file_get_last_error(true);
743 
744 			ut_print_timestamp(stderr);
745 
746 			ib_logf(IB_LOG_LEVEL_WARN, "InnoDB: Error: cannot "
747 				"open %s\n. InnoDB: Have you deleted .ibd "
748 				"files under a running mysqld server?\n",
749 				node->name);
750 
751 			return(false);
752 		}
753 
754 		size_bytes = os_file_get_size(node->handle);
755 		ut_a(size_bytes != (os_offset_t) -1);
756 #ifdef UNIV_HOTBACKUP
757 		if (space->id == 0) {
758 			node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
759 			os_file_close(node->handle);
760 			goto add_size;
761 		}
762 #endif /* UNIV_HOTBACKUP */
763 		ut_a(space->purpose != FIL_LOG);
764 		ut_a(fil_is_user_tablespace_id(space->id));
765 
766 		if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
767 			fprintf(stderr,
768 				"InnoDB: Error: the size of single-table"
769 				" tablespace file %s\n"
770 				"InnoDB: is only " UINT64PF ","
771 				" should be at least %lu!\n",
772 				node->name,
773 				size_bytes,
774 				(ulong) (FIL_IBD_FILE_INITIAL_SIZE
775 					 * UNIV_PAGE_SIZE));
776 
777 			ut_a(0);
778 		}
779 
780 		/* Read the first page of the tablespace */
781 
782 		buf2 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
783 		/* Align the memory for file i/o if we might have O_DIRECT
784 		set */
785 		page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
786 
787 		success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE);
788 		space_id = fsp_header_get_space_id(page);
789 		flags = fsp_header_get_flags(page);
790 		page_size = fsp_flags_get_page_size(flags);
791 
792 		ut_free(buf2);
793 
794 		/* Close the file now that we have read the space id from it */
795 
796 		os_file_close(node->handle);
797 
798 		if (UNIV_UNLIKELY(space_id != space->id)) {
799 			fprintf(stderr,
800 				"InnoDB: Error: tablespace id is %lu"
801 				" in the data dictionary\n"
802 				"InnoDB: but in file %s it is %lu!\n",
803 				space->id, node->name, space_id);
804 
805 			ut_error;
806 		}
807 
808 		if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED
809 				  || space_id == 0)) {
810 			fprintf(stderr,
811 				"InnoDB: Error: tablespace id %lu"
812 				" in file %s is not sensible\n",
813 				(ulong) space_id, node->name);
814 
815 			ut_error;
816 		}
817 
818 		if (UNIV_UNLIKELY(fsp_flags_get_page_size(space->flags)
819 				  != page_size)) {
820 			fprintf(stderr,
821 				"InnoDB: Error: tablespace file %s"
822 				" has page size 0x%lx\n"
823 				"InnoDB: but the data dictionary"
824 				" expects page size 0x%lx!\n",
825 				node->name, flags,
826 				fsp_flags_get_page_size(space->flags));
827 
828 			ut_error;
829 		}
830 
831 		if (UNIV_UNLIKELY(space->flags != flags)) {
832 			fprintf(stderr,
833 				"InnoDB: Error: table flags are 0x%lx"
834 				" in the data dictionary\n"
835 				"InnoDB: but the flags in file %s are 0x%lx!\n",
836 				space->flags, node->name, flags);
837 
838 			ut_error;
839 		}
840 
841 		if (size_bytes >= 1024 * 1024) {
842 			/* Truncate the size to whole megabytes. */
843 			size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
844 		}
845 
846 		if (!fsp_flags_is_compressed(flags)) {
847 			node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
848 		} else {
849 			node->size = (ulint)
850 				(size_bytes
851 				 / fsp_flags_get_zip_size(flags));
852 		}
853 
854 #ifdef UNIV_HOTBACKUP
855 add_size:
856 #endif /* UNIV_HOTBACKUP */
857 		space->size += node->size;
858 	}
859 
860 	/* printf("Opening file %s\n", node->name); */
861 
862 	/* Open the file for reading and writing, in Windows normally in the
863 	unbuffered async I/O mode, though global variables may make
864 	os_file_create() to fall back to the normal file I/O mode. */
865 
866 	if (space->purpose == FIL_LOG) {
867 		node->handle = os_file_create(innodb_file_log_key,
868 					      node->name, OS_FILE_OPEN,
869 					      OS_FILE_AIO, OS_LOG_FILE,
870 					      &ret);
871 	} else if (node->is_raw_disk) {
872 		node->handle = os_file_create(innodb_file_data_key,
873 					      node->name,
874 					      OS_FILE_OPEN_RAW,
875 					      OS_FILE_AIO, OS_DATA_FILE,
876 						     &ret);
877 	} else {
878 		node->handle = os_file_create(innodb_file_data_key,
879 					      node->name, OS_FILE_OPEN,
880 					      OS_FILE_AIO, OS_DATA_FILE,
881 					      &ret);
882 	}
883 
884 	ut_a(ret);
885 
886 	node->open = TRUE;
887 
888 	system->n_open++;
889 	fil_n_file_opened++;
890 
891 	if (fil_space_belongs_in_lru(space)) {
892 
893 		/* Put the node to the LRU list */
894 		UT_LIST_ADD_FIRST(LRU, system->LRU, node);
895 	}
896 
897 	return(true);
898 }
899 
900 /**********************************************************************//**
901 Closes a file. */
902 static
903 void
fil_node_close_file(fil_node_t * node,fil_system_t * system)904 fil_node_close_file(
905 /*================*/
906 	fil_node_t*	node,	/*!< in: file node */
907 	fil_system_t*	system)	/*!< in: tablespace memory cache */
908 {
909 	ibool	ret;
910 
911 	ut_ad(node && system);
912 	ut_ad(mutex_own(&(system->mutex)));
913 	ut_a(node->open);
914 	ut_a(node->n_pending == 0);
915 	ut_a(node->n_pending_flushes == 0);
916 	ut_a(!node->being_extended);
917 #ifndef UNIV_HOTBACKUP
918 	ut_a(node->modification_counter == node->flush_counter
919 	     || srv_fast_shutdown == 2);
920 #endif /* !UNIV_HOTBACKUP */
921 
922 	ret = os_file_close(node->handle);
923 	ut_a(ret);
924 
925 	/* printf("Closing file %s\n", node->name); */
926 
927 	node->open = FALSE;
928 	ut_a(system->n_open > 0);
929 	system->n_open--;
930 	fil_n_file_opened--;
931 
932 	if (fil_space_belongs_in_lru(node->space)) {
933 
934 		ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
935 
936 		/* The node is in the LRU list, remove it */
937 		UT_LIST_REMOVE(LRU, system->LRU, node);
938 	}
939 }
940 
941 /********************************************************************//**
942 Tries to close a file in the LRU list. The caller must hold the fil_sys
943 mutex.
944 @return TRUE if success, FALSE if should retry later; since i/o's
945 generally complete in < 100 ms, and as InnoDB writes at most 128 pages
946 from the buffer pool in a batch, and then immediately flushes the
947 files, there is a good chance that the next time we find a suitable
948 node from the LRU list */
949 static
950 ibool
fil_try_to_close_file_in_LRU(ibool print_info)951 fil_try_to_close_file_in_LRU(
952 /*=========================*/
953 	ibool	print_info)	/*!< in: if TRUE, prints information why it
954 				cannot close a file */
955 {
956 	fil_node_t*	node;
957 
958 	ut_ad(mutex_own(&fil_system->mutex));
959 
960 	if (print_info) {
961 		fprintf(stderr,
962 			"InnoDB: fil_sys open file LRU len %lu\n",
963 			(ulong) UT_LIST_GET_LEN(fil_system->LRU));
964 	}
965 
966 	for (node = UT_LIST_GET_LAST(fil_system->LRU);
967 	     node != NULL;
968 	     node = UT_LIST_GET_PREV(LRU, node)) {
969 
970 		if (node->modification_counter == node->flush_counter
971 		    && node->n_pending_flushes == 0
972 		    && !node->being_extended) {
973 
974 			fil_node_close_file(node, fil_system);
975 
976 			return(TRUE);
977 		}
978 
979 		if (!print_info) {
980 			continue;
981 		}
982 
983 		if (node->n_pending_flushes > 0) {
984 			fputs("InnoDB: cannot close file ", stderr);
985 			ut_print_filename(stderr, node->name);
986 			fprintf(stderr, ", because n_pending_flushes %lu\n",
987 				(ulong) node->n_pending_flushes);
988 		}
989 
990 		if (node->modification_counter != node->flush_counter) {
991 			fputs("InnoDB: cannot close file ", stderr);
992 			ut_print_filename(stderr, node->name);
993 			fprintf(stderr,
994 				", because mod_count %ld != fl_count %ld\n",
995 				(long) node->modification_counter,
996 				(long) node->flush_counter);
997 
998 		}
999 
1000 		if (node->being_extended) {
1001 			fputs("InnoDB: cannot close file ", stderr);
1002 			ut_print_filename(stderr, node->name);
1003 			fprintf(stderr, ", because it is being extended\n");
1004 		}
1005 	}
1006 
1007 	return(FALSE);
1008 }
1009 
1010 /*******************************************************************//**
1011 Reserves the fil_system mutex and tries to make sure we can open at least one
1012 file while holding it. This should be called before calling
1013 fil_node_prepare_for_io(), because that function may need to open a file. */
1014 static
1015 void
fil_mutex_enter_and_prepare_for_io(ulint space_id)1016 fil_mutex_enter_and_prepare_for_io(
1017 /*===============================*/
1018 	ulint	space_id)	/*!< in: space id */
1019 {
1020 	fil_space_t*	space;
1021 	ibool		success;
1022 	ibool		print_info	= FALSE;
1023 	ulint		count		= 0;
1024 	ulint		count2		= 0;
1025 
1026 retry:
1027 	mutex_enter(&fil_system->mutex);
1028 
1029 	if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
1030 		/* We keep log files and system tablespace files always open;
1031 		this is important in preventing deadlocks in this module, as
1032 		a page read completion often performs another read from the
1033 		insert buffer. The insert buffer is in tablespace 0, and we
1034 		cannot end up waiting in this function. */
1035 
1036 		return;
1037 	}
1038 
1039 	space = fil_space_get_by_id(space_id);
1040 
1041 	if (space != NULL && space->stop_ios) {
1042 		/* We are going to do a rename file and want to stop new i/o's
1043 		for a while */
1044 
1045 		if (count2 > 20000) {
1046 			fputs("InnoDB: Warning: tablespace ", stderr);
1047 			ut_print_filename(stderr, space->name);
1048 			fprintf(stderr,
1049 				" has i/o ops stopped for a long time %lu\n",
1050 				(ulong) count2);
1051 		}
1052 
1053 		mutex_exit(&fil_system->mutex);
1054 
1055 #ifndef UNIV_HOTBACKUP
1056 
1057 		/* Wake the i/o-handler threads to make sure pending
1058 		i/o's are performed */
1059 		os_aio_simulated_wake_handler_threads();
1060 
1061 		/* The sleep here is just to give IO helper threads a
1062 		bit of time to do some work. It is not required that
1063 		all IO related to the tablespace being renamed must
1064 		be flushed here as we do fil_flush() in
1065 		fil_rename_tablespace() as well. */
1066 		os_thread_sleep(20000);
1067 
1068 #endif /* UNIV_HOTBACKUP */
1069 
1070 		/* Flush tablespaces so that we can close modified
1071 		files in the LRU list */
1072 		fil_flush_file_spaces(FIL_TABLESPACE);
1073 
1074 		os_thread_sleep(20000);
1075 
1076 		count2++;
1077 
1078 		goto retry;
1079 	}
1080 
1081 	if (fil_system->n_open < fil_system->max_n_open) {
1082 
1083 		return;
1084 	}
1085 
1086 	/* If the file is already open, no need to do anything; if the space
1087 	does not exist, we handle the situation in the function which called
1088 	this function */
1089 
1090 	if (!space || UT_LIST_GET_FIRST(space->chain)->open) {
1091 
1092 		return;
1093 	}
1094 
1095 	if (count > 1) {
1096 		print_info = TRUE;
1097 	}
1098 
1099 	/* Too many files are open, try to close some */
1100 close_more:
1101 	success = fil_try_to_close_file_in_LRU(print_info);
1102 
1103 	if (success && fil_system->n_open >= fil_system->max_n_open) {
1104 
1105 		goto close_more;
1106 	}
1107 
1108 	if (fil_system->n_open < fil_system->max_n_open) {
1109 		/* Ok */
1110 
1111 		return;
1112 	}
1113 
1114 	if (count >= 2) {
1115 		ut_print_timestamp(stderr);
1116 		fprintf(stderr,
1117 			"  InnoDB: Warning: too many (%lu) files stay open"
1118 			" while the maximum\n"
1119 			"InnoDB: allowed value would be %lu.\n"
1120 			"InnoDB: You may need to raise the value of"
1121 			" innodb_open_files in\n"
1122 			"InnoDB: my.cnf.\n",
1123 			(ulong) fil_system->n_open,
1124 			(ulong) fil_system->max_n_open);
1125 
1126 		return;
1127 	}
1128 
1129 	mutex_exit(&fil_system->mutex);
1130 
1131 #ifndef UNIV_HOTBACKUP
1132 	/* Wake the i/o-handler threads to make sure pending i/o's are
1133 	performed */
1134 	os_aio_simulated_wake_handler_threads();
1135 
1136 	os_thread_sleep(20000);
1137 #endif
1138 	/* Flush tablespaces so that we can close modified files in the LRU
1139 	list */
1140 
1141 	fil_flush_file_spaces(FIL_TABLESPACE);
1142 
1143 	count++;
1144 
1145 	goto retry;
1146 }
1147 
1148 /*******************************************************************//**
1149 Frees a file node object from a tablespace memory cache. */
1150 static
1151 void
fil_node_free(fil_node_t * node,fil_system_t * system,fil_space_t * space)1152 fil_node_free(
1153 /*==========*/
1154 	fil_node_t*	node,	/*!< in, own: file node */
1155 	fil_system_t*	system,	/*!< in: tablespace memory cache */
1156 	fil_space_t*	space)	/*!< in: space where the file node is chained */
1157 {
1158 	ut_ad(node && system && space);
1159 	ut_ad(mutex_own(&(system->mutex)));
1160 	ut_a(node->magic_n == FIL_NODE_MAGIC_N);
1161 	ut_a(node->n_pending == 0);
1162 	ut_a(!node->being_extended);
1163 
1164 	if (node->open) {
1165 		/* We fool the assertion in fil_node_close_file() to think
1166 		there are no unflushed modifications in the file */
1167 
1168 		node->modification_counter = node->flush_counter;
1169 		os_event_set(node->sync_event);
1170 
1171 		if (fil_buffering_disabled(space)) {
1172 
1173 			ut_ad(!space->is_in_unflushed_spaces);
1174 			ut_ad(fil_space_is_flushed(space));
1175 
1176 		} else if (space->is_in_unflushed_spaces
1177 			   && fil_space_is_flushed(space)) {
1178 
1179 			space->is_in_unflushed_spaces = false;
1180 
1181 			UT_LIST_REMOVE(unflushed_spaces,
1182 				       system->unflushed_spaces,
1183 				       space);
1184 		}
1185 
1186 		fil_node_close_file(node, system);
1187 	}
1188 
1189 	space->size -= node->size;
1190 
1191 	UT_LIST_REMOVE(chain, space->chain, node);
1192 
1193 	os_event_free(node->sync_event);
1194 	mem_free(node->name);
1195 	mem_free(node);
1196 }
1197 
1198 #ifdef UNIV_LOG_ARCHIVE
1199 /****************************************************************//**
1200 Drops files from the start of a file space, so that its size is cut by
1201 the amount given. */
1202 UNIV_INTERN
1203 void
fil_space_truncate_start(ulint id,ulint trunc_len)1204 fil_space_truncate_start(
1205 /*=====================*/
1206 	ulint	id,		/*!< in: space id */
1207 	ulint	trunc_len)	/*!< in: truncate by this much; it is an error
1208 				if this does not equal to the combined size of
1209 				some initial files in the space */
1210 {
1211 	fil_node_t*	node;
1212 	fil_space_t*	space;
1213 
1214 	mutex_enter(&fil_system->mutex);
1215 
1216 	space = fil_space_get_by_id(id);
1217 
1218 	ut_a(space);
1219 
1220 	while (trunc_len > 0) {
1221 		node = UT_LIST_GET_FIRST(space->chain);
1222 
1223 		ut_a(node->size * UNIV_PAGE_SIZE <= trunc_len);
1224 
1225 		trunc_len -= node->size * UNIV_PAGE_SIZE;
1226 
1227 		fil_node_free(node, fil_system, space);
1228 	}
1229 
1230 	mutex_exit(&fil_system->mutex);
1231 }
1232 #endif /* UNIV_LOG_ARCHIVE */
1233 
1234 /*******************************************************************//**
1235 Creates a space memory object and puts it to the 'fil system' hash table.
1236 If there is an error, prints an error message to the .err log.
1237 @return	TRUE if success */
1238 UNIV_INTERN
1239 ibool
fil_space_create(const char * name,ulint id,ulint flags,ulint purpose)1240 fil_space_create(
1241 /*=============*/
1242 	const char*	name,	/*!< in: space name */
1243 	ulint		id,	/*!< in: space id */
1244 	ulint		flags,	/*!< in: tablespace flags */
1245 	ulint		purpose)/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
1246 {
1247 	fil_space_t*	space;
1248 
1249 	DBUG_EXECUTE_IF("fil_space_create_failure", return(false););
1250 
1251 	ut_a(fil_system);
1252 	ut_a(fsp_flags_is_valid(flags));
1253 
1254 	/* Look for a matching tablespace and if found free it. */
1255 	do {
1256 		mutex_enter(&fil_system->mutex);
1257 
1258 		space = fil_space_get_by_name(name);
1259 
1260 		if (space != 0) {
1261 			ib_logf(IB_LOG_LEVEL_WARN,
1262 				"Tablespace '%s' exists in the cache "
1263 				"with id %lu != %lu",
1264 				name, (ulong) space->id, (ulong) id);
1265 
1266 			if (id == 0 || purpose != FIL_TABLESPACE) {
1267 
1268 				mutex_exit(&fil_system->mutex);
1269 
1270 				return(FALSE);
1271 			}
1272 
1273 			ib_logf(IB_LOG_LEVEL_WARN,
1274 				"Freeing existing tablespace '%s' entry "
1275 				"from the cache with id %lu",
1276 				name, (ulong) id);
1277 
1278 			ibool	success = fil_space_free(space->id, FALSE);
1279 			ut_a(success);
1280 
1281 			mutex_exit(&fil_system->mutex);
1282 		}
1283 
1284 	} while (space != 0);
1285 
1286 	space = fil_space_get_by_id(id);
1287 
1288 	if (space != 0) {
1289 		ib_logf(IB_LOG_LEVEL_ERROR,
1290 			"Trying to add tablespace '%s' with id %lu "
1291 			"to the tablespace memory cache, but tablespace '%s' "
1292 			"with id %lu already exists in the cache!",
1293 			name, (ulong) id, space->name, (ulong) space->id);
1294 
1295 		mutex_exit(&fil_system->mutex);
1296 
1297 		return(FALSE);
1298 	}
1299 
1300 	space = static_cast<fil_space_t*>(mem_zalloc(sizeof(*space)));
1301 
1302 	space->name = mem_strdup(name);
1303 	space->id = id;
1304 
1305 	fil_system->tablespace_version++;
1306 	space->tablespace_version = fil_system->tablespace_version;
1307 	space->mark = FALSE;
1308 
1309 	if (purpose == FIL_TABLESPACE && !recv_recovery_on
1310 	    && id > fil_system->max_assigned_id) {
1311 
1312 		if (!fil_system->space_id_reuse_warned) {
1313 			fil_system->space_id_reuse_warned = TRUE;
1314 
1315 			ib_logf(IB_LOG_LEVEL_WARN,
1316 				"Allocated tablespace %lu, old maximum "
1317 				"was %lu",
1318 				(ulong) id,
1319 				(ulong) fil_system->max_assigned_id);
1320 		}
1321 
1322 		fil_system->max_assigned_id = id;
1323 	}
1324 
1325 	space->purpose = purpose;
1326 	space->flags = flags;
1327 
1328 	space->magic_n = FIL_SPACE_MAGIC_N;
1329 
1330 	rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
1331 
1332 	HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
1333 
1334 	HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
1335 		    ut_fold_string(name), space);
1336 	space->is_in_unflushed_spaces = false;
1337 
1338 	UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
1339 
1340 	mutex_exit(&fil_system->mutex);
1341 
1342 	return(TRUE);
1343 }
1344 
1345 /*******************************************************************//**
1346 Assigns a new space id for a new single-table tablespace. This works simply by
1347 incrementing the global counter. If 4 billion id's is not enough, we may need
1348 to recycle id's.
1349 @return	TRUE if assigned, FALSE if not */
1350 UNIV_INTERN
1351 ibool
fil_assign_new_space_id(ulint * space_id)1352 fil_assign_new_space_id(
1353 /*====================*/
1354 	ulint*	space_id)	/*!< in/out: space id */
1355 {
1356 	ulint	id;
1357 	ibool	success;
1358 
1359 	mutex_enter(&fil_system->mutex);
1360 
1361 	id = *space_id;
1362 
1363 	if (id < fil_system->max_assigned_id) {
1364 		id = fil_system->max_assigned_id;
1365 	}
1366 
1367 	id++;
1368 
1369 	if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
1370 		ut_print_timestamp(stderr);
1371 		fprintf(stderr,
1372 			"InnoDB: Warning: you are running out of new"
1373 			" single-table tablespace id's.\n"
1374 			"InnoDB: Current counter is %lu and it"
1375 			" must not exceed %lu!\n"
1376 			"InnoDB: To reset the counter to zero"
1377 			" you have to dump all your tables and\n"
1378 			"InnoDB: recreate the whole InnoDB installation.\n",
1379 			(ulong) id,
1380 			(ulong) SRV_LOG_SPACE_FIRST_ID);
1381 	}
1382 
1383 	success = (id < SRV_LOG_SPACE_FIRST_ID);
1384 
1385 	if (success) {
1386 		*space_id = fil_system->max_assigned_id = id;
1387 	} else {
1388 		ut_print_timestamp(stderr);
1389 		fprintf(stderr,
1390 			"InnoDB: You have run out of single-table"
1391 			" tablespace id's!\n"
1392 			"InnoDB: Current counter is %lu.\n"
1393 			"InnoDB: To reset the counter to zero you"
1394 			" have to dump all your tables and\n"
1395 			"InnoDB: recreate the whole InnoDB installation.\n",
1396 			(ulong) id);
1397 		*space_id = ULINT_UNDEFINED;
1398 	}
1399 
1400 	mutex_exit(&fil_system->mutex);
1401 
1402 	return(success);
1403 }
1404 
1405 /*******************************************************************//**
1406 Frees a space object from the tablespace memory cache. Closes the files in
1407 the chain but does not delete them. There must not be any pending i/o's or
1408 flushes on the files.
1409 @return	TRUE if success */
1410 static
1411 ibool
fil_space_free(ulint id,ibool x_latched)1412 fil_space_free(
1413 /*===========*/
1414 					/* out: TRUE if success */
1415 	ulint		id,		/* in: space id */
1416 	ibool		x_latched)	/* in: TRUE if caller has space->latch
1417 					in X mode */
1418 {
1419 	fil_space_t*	space;
1420 	fil_space_t*	fnamespace;
1421 
1422 	ut_ad(mutex_own(&fil_system->mutex));
1423 
1424 	space = fil_space_get_by_id(id);
1425 
1426 	if (!space) {
1427 		ut_print_timestamp(stderr);
1428 		fprintf(stderr,
1429 			"  InnoDB: Error: trying to remove tablespace %lu"
1430 			" from the cache but\n"
1431 			"InnoDB: it is not there.\n", (ulong) id);
1432 
1433 		return(FALSE);
1434 	}
1435 
1436 	HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space);
1437 
1438 	fnamespace = fil_space_get_by_name(space->name);
1439 	ut_a(fnamespace);
1440 	ut_a(space == fnamespace);
1441 
1442 	HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
1443 		    ut_fold_string(space->name), space);
1444 
1445 	if (space->is_in_unflushed_spaces) {
1446 
1447 		ut_ad(!fil_buffering_disabled(space));
1448 		space->is_in_unflushed_spaces = false;
1449 
1450 		UT_LIST_REMOVE(unflushed_spaces, fil_system->unflushed_spaces,
1451 			       space);
1452 	}
1453 
1454 	UT_LIST_REMOVE(space_list, fil_system->space_list, space);
1455 
1456 	ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
1457 	ut_a(0 == space->n_pending_flushes);
1458 
1459 	for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain);
1460 	     fil_node != NULL;
1461 	     fil_node = UT_LIST_GET_FIRST(space->chain)) {
1462 
1463 		fil_node_free(fil_node, fil_system, space);
1464 	}
1465 
1466 	ut_a(0 == UT_LIST_GET_LEN(space->chain));
1467 
1468 	if (x_latched) {
1469 		rw_lock_x_unlock(&space->latch);
1470 	}
1471 
1472 	rw_lock_free(&(space->latch));
1473 
1474 	mem_free(space->name);
1475 	mem_free(space);
1476 
1477 	return(TRUE);
1478 }
1479 
1480 /*******************************************************************//**
1481 Returns a pointer to the file_space_t that is in the memory cache
1482 associated with a space id. The caller must lock fil_system->mutex.
1483 @return	file_space_t pointer, NULL if space not found */
1484 UNIV_INLINE
1485 fil_space_t*
fil_space_get_space(ulint id)1486 fil_space_get_space(
1487 /*================*/
1488 	ulint	id)	/*!< in: space id */
1489 {
1490 	fil_space_t*	space;
1491 	fil_node_t*	node;
1492 
1493 	ut_ad(fil_system);
1494 
1495 	space = fil_space_get_by_id(id);
1496 	if (space == NULL) {
1497 		return(NULL);
1498 	}
1499 
1500 	if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
1501 		ut_a(id != 0);
1502 
1503 		mutex_exit(&fil_system->mutex);
1504 
1505 		/* It is possible that the space gets evicted at this point
1506 		before the fil_mutex_enter_and_prepare_for_io() acquires
1507 		the fil_system->mutex. Check for this after completing the
1508 		call to fil_mutex_enter_and_prepare_for_io(). */
1509 		fil_mutex_enter_and_prepare_for_io(id);
1510 
1511 		/* We are still holding the fil_system->mutex. Check if
1512 		the space is still in memory cache. */
1513 		space = fil_space_get_by_id(id);
1514 		if (space == NULL) {
1515 			return(NULL);
1516 		}
1517 
1518 		/* The following code must change when InnoDB supports
1519 		multiple datafiles per tablespace. */
1520 		ut_a(1 == UT_LIST_GET_LEN(space->chain));
1521 
1522 		node = UT_LIST_GET_FIRST(space->chain);
1523 
1524 		/* It must be a single-table tablespace and we have not opened
1525 		the file yet; the following calls will open it and update the
1526 		size fields */
1527 
1528 		if (!fil_node_prepare_for_io(node, fil_system, space)) {
1529 			/* The single-table tablespace can't be opened,
1530 			because the ibd file is missing. */
1531 			return(NULL);
1532 		}
1533 		fil_node_complete_io(node, fil_system, OS_FILE_READ);
1534 	}
1535 
1536 	return(space);
1537 }
1538 
1539 /*******************************************************************//**
1540 Returns the path from the first fil_node_t found for the space ID sent.
1541 The caller is responsible for freeing the memory allocated here for the
1542 value returned.
1543 @return	own: A copy of fil_node_t::path, NULL if space ID is zero
1544 or not found. */
1545 UNIV_INTERN
1546 char*
fil_space_get_first_path(ulint id)1547 fil_space_get_first_path(
1548 /*=====================*/
1549 	ulint		id)	/*!< in: space id */
1550 {
1551 	fil_space_t*	space;
1552 	fil_node_t*	node;
1553 	char*		path;
1554 
1555 	ut_ad(fil_system);
1556 	ut_a(id);
1557 
1558 	fil_mutex_enter_and_prepare_for_io(id);
1559 
1560 	space = fil_space_get_space(id);
1561 
1562 	if (space == NULL) {
1563 		mutex_exit(&fil_system->mutex);
1564 
1565 		return(NULL);
1566 	}
1567 
1568 	ut_ad(mutex_own(&fil_system->mutex));
1569 
1570 	node = UT_LIST_GET_FIRST(space->chain);
1571 
1572 	path = mem_strdup(node->name);
1573 
1574 	mutex_exit(&fil_system->mutex);
1575 
1576 	return(path);
1577 }
1578 
1579 /*******************************************************************//**
1580 Returns the size of the space in pages. The tablespace must be cached in the
1581 memory cache.
1582 @return	space size, 0 if space not found */
1583 UNIV_INTERN
1584 ulint
fil_space_get_size(ulint id)1585 fil_space_get_size(
1586 /*===============*/
1587 	ulint	id)	/*!< in: space id */
1588 {
1589 	fil_space_t*	space;
1590 	ulint		size;
1591 
1592 	ut_ad(fil_system);
1593 	mutex_enter(&fil_system->mutex);
1594 
1595 	space = fil_space_get_space(id);
1596 
1597 	size = space ? space->size : 0;
1598 
1599 	mutex_exit(&fil_system->mutex);
1600 
1601 	return(size);
1602 }
1603 
1604 /*******************************************************************//**
1605 Returns the flags of the space. The tablespace must be cached
1606 in the memory cache.
1607 @return	flags, ULINT_UNDEFINED if space not found */
1608 UNIV_INTERN
1609 ulint
fil_space_get_flags(ulint id)1610 fil_space_get_flags(
1611 /*================*/
1612 	ulint	id)	/*!< in: space id */
1613 {
1614 	fil_space_t*	space;
1615 	ulint		flags;
1616 
1617 	ut_ad(fil_system);
1618 
1619 	if (!id) {
1620 		return(0);
1621 	}
1622 
1623 	mutex_enter(&fil_system->mutex);
1624 
1625 	space = fil_space_get_space(id);
1626 
1627 	if (space == NULL) {
1628 		mutex_exit(&fil_system->mutex);
1629 
1630 		return(ULINT_UNDEFINED);
1631 	}
1632 
1633 	flags = space->flags;
1634 
1635 	mutex_exit(&fil_system->mutex);
1636 
1637 	return(flags);
1638 }
1639 
1640 /*******************************************************************//**
1641 Returns the compressed page size of the space, or 0 if the space
1642 is not compressed. The tablespace must be cached in the memory cache.
1643 @return	compressed page size, ULINT_UNDEFINED if space not found */
1644 UNIV_INTERN
1645 ulint
fil_space_get_zip_size(ulint id)1646 fil_space_get_zip_size(
1647 /*===================*/
1648 	ulint	id)	/*!< in: space id */
1649 {
1650 	ulint	flags;
1651 
1652 	flags = fil_space_get_flags(id);
1653 
1654 	if (flags && flags != ULINT_UNDEFINED) {
1655 
1656 		return(fsp_flags_get_zip_size(flags));
1657 	}
1658 
1659 	return(flags);
1660 }
1661 
1662 /*******************************************************************//**
1663 Checks if the pair space, page_no refers to an existing page in a tablespace
1664 file space. The tablespace must be cached in the memory cache.
1665 @return	TRUE if the address is meaningful */
1666 UNIV_INTERN
1667 ibool
fil_check_adress_in_tablespace(ulint id,ulint page_no)1668 fil_check_adress_in_tablespace(
1669 /*===========================*/
1670 	ulint	id,	/*!< in: space id */
1671 	ulint	page_no)/*!< in: page number */
1672 {
1673 	if (fil_space_get_size(id) > page_no) {
1674 
1675 		return(TRUE);
1676 	}
1677 
1678 	return(FALSE);
1679 }
1680 
1681 /****************************************************************//**
1682 Initializes the tablespace memory cache. */
1683 UNIV_INTERN
1684 void
fil_init(ulint hash_size,ulint max_n_open)1685 fil_init(
1686 /*=====*/
1687 	ulint	hash_size,	/*!< in: hash table size */
1688 	ulint	max_n_open)	/*!< in: max number of open files */
1689 {
1690 	ut_a(fil_system == NULL);
1691 
1692 	ut_a(hash_size > 0);
1693 	ut_a(max_n_open > 0);
1694 
1695 	fil_system = static_cast<fil_system_t*>(
1696 		mem_zalloc(sizeof(fil_system_t)));
1697 
1698 	mutex_create(fil_system_mutex_key,
1699 		     &fil_system->mutex, SYNC_ANY_LATCH);
1700 
1701 	fil_system->spaces = hash_create(hash_size);
1702 	fil_system->name_hash = hash_create(hash_size);
1703 
1704 	UT_LIST_INIT(fil_system->LRU);
1705 
1706 	fil_system->max_n_open = max_n_open;
1707 }
1708 
1709 /*******************************************************************//**
1710 Opens all log files and system tablespace data files. They stay open until the
1711 database server shutdown. This should be called at a server startup after the
1712 space objects for the log and the system tablespace have been created. The
1713 purpose of this operation is to make sure we never run out of file descriptors
1714 if we need to read from the insert buffer or to write to the log. */
1715 UNIV_INTERN
1716 void
fil_open_log_and_system_tablespace_files(void)1717 fil_open_log_and_system_tablespace_files(void)
1718 /*==========================================*/
1719 {
1720 	fil_space_t*	space;
1721 
1722 	mutex_enter(&fil_system->mutex);
1723 
1724 	for (space = UT_LIST_GET_FIRST(fil_system->space_list);
1725 	     space != NULL;
1726 	     space = UT_LIST_GET_NEXT(space_list, space)) {
1727 
1728 		fil_node_t*	node;
1729 
1730 		if (fil_space_belongs_in_lru(space)) {
1731 
1732 			continue;
1733 		}
1734 
1735 		for (node = UT_LIST_GET_FIRST(space->chain);
1736 		     node != NULL;
1737 		     node = UT_LIST_GET_NEXT(chain, node)) {
1738 
1739 			if (!node->open) {
1740 				if (!fil_node_open_file(node, fil_system,
1741 							space)) {
1742 					/* This func is called during server's
1743 					startup. If some file of log or system
1744 					tablespace is missing, the server
1745 					can't start successfully. So we should
1746 					assert for it. */
1747 					ut_a(0);
1748 				}
1749 			}
1750 
1751 			if (fil_system->max_n_open < 10 + fil_system->n_open) {
1752 
1753 				fprintf(stderr,
1754 					"InnoDB: Warning: you must"
1755 					" raise the value of"
1756 					" innodb_open_files in\n"
1757 					"InnoDB: my.cnf! Remember that"
1758 					" InnoDB keeps all log files"
1759 					" and all system\n"
1760 					"InnoDB: tablespace files open"
1761 					" for the whole time mysqld is"
1762 					" running, and\n"
1763 					"InnoDB: needs to open also"
1764 					" some .ibd files if the"
1765 					" file-per-table storage\n"
1766 					"InnoDB: model is used."
1767 					" Current open files %lu,"
1768 					" max allowed"
1769 					" open files %lu.\n",
1770 					(ulong) fil_system->n_open,
1771 					(ulong) fil_system->max_n_open);
1772 			}
1773 		}
1774 	}
1775 
1776 	mutex_exit(&fil_system->mutex);
1777 }
1778 
1779 /*******************************************************************//**
1780 Closes all open files. There must not be any pending i/o's or not flushed
1781 modifications in the files. */
1782 UNIV_INTERN
1783 void
fil_close_all_files(void)1784 fil_close_all_files(void)
1785 /*=====================*/
1786 {
1787 	fil_space_t*	space;
1788 
1789 	mutex_enter(&fil_system->mutex);
1790 
1791 	space = UT_LIST_GET_FIRST(fil_system->space_list);
1792 
1793 	while (space != NULL) {
1794 		fil_node_t*	node;
1795 		fil_space_t*	prev_space = space;
1796 
1797 		for (node = UT_LIST_GET_FIRST(space->chain);
1798 		     node != NULL;
1799 		     node = UT_LIST_GET_NEXT(chain, node)) {
1800 
1801 			if (node->open) {
1802 				fil_node_close_file(node, fil_system);
1803 			}
1804 		}
1805 
1806 		space = UT_LIST_GET_NEXT(space_list, space);
1807 
1808 		fil_space_free(prev_space->id, FALSE);
1809 	}
1810 
1811 	mutex_exit(&fil_system->mutex);
1812 }
1813 
1814 /*******************************************************************//**
1815 Closes the redo log files. There must not be any pending i/o's or not
1816 flushed modifications in the files. */
1817 UNIV_INTERN
1818 void
fil_close_log_files(bool free)1819 fil_close_log_files(
1820 /*================*/
1821 	bool	free)	/*!< in: whether to free the memory object */
1822 {
1823 	fil_space_t*	space;
1824 
1825 	mutex_enter(&fil_system->mutex);
1826 
1827 	space = UT_LIST_GET_FIRST(fil_system->space_list);
1828 
1829 	while (space != NULL) {
1830 		fil_node_t*	node;
1831 		fil_space_t*	prev_space = space;
1832 
1833 		if (space->purpose != FIL_LOG) {
1834 			space = UT_LIST_GET_NEXT(space_list, space);
1835 			continue;
1836 		}
1837 
1838 		for (node = UT_LIST_GET_FIRST(space->chain);
1839 		     node != NULL;
1840 		     node = UT_LIST_GET_NEXT(chain, node)) {
1841 
1842 			if (node->open) {
1843 				fil_node_close_file(node, fil_system);
1844 			}
1845 		}
1846 
1847 		space = UT_LIST_GET_NEXT(space_list, space);
1848 
1849 		if (free) {
1850 			fil_space_free(prev_space->id, FALSE);
1851 		}
1852 	}
1853 
1854 	mutex_exit(&fil_system->mutex);
1855 }
1856 
1857 /*******************************************************************//**
1858 Sets the max tablespace id counter if the given number is bigger than the
1859 previous value. */
1860 UNIV_INTERN
1861 void
fil_set_max_space_id_if_bigger(ulint max_id)1862 fil_set_max_space_id_if_bigger(
1863 /*===========================*/
1864 	ulint	max_id)	/*!< in: maximum known id */
1865 {
1866 	if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
1867 		fprintf(stderr,
1868 			"InnoDB: Fatal error: max tablespace id"
1869 			" is too high, %lu\n", (ulong) max_id);
1870 		ut_error;
1871 	}
1872 
1873 	mutex_enter(&fil_system->mutex);
1874 
1875 	if (fil_system->max_assigned_id < max_id) {
1876 
1877 		fil_system->max_assigned_id = max_id;
1878 	}
1879 
1880 	mutex_exit(&fil_system->mutex);
1881 }
1882 
1883 /****************************************************************//**
1884 Writes the flushed lsn and the latest archived log number to the page header
1885 of the first page of a data file of the system tablespace (space 0),
1886 which is uncompressed. */
1887 static MY_ATTRIBUTE((warn_unused_result))
1888 dberr_t
fil_write_lsn_and_arch_no_to_file(ulint space,ulint sum_of_sizes,lsn_t lsn,ulint arch_log_no MY_ATTRIBUTE ((unused)))1889 fil_write_lsn_and_arch_no_to_file(
1890 /*==============================*/
1891 	ulint	space,		/*!< in: space to write to */
1892 	ulint	sum_of_sizes,	/*!< in: combined size of previous files
1893 				in space, in database pages */
1894 	lsn_t	lsn,		/*!< in: lsn to write */
1895 	ulint	arch_log_no MY_ATTRIBUTE((unused)))
1896 				/*!< in: archived log number to write */
1897 {
1898 	byte*	buf1;
1899 	byte*	buf;
1900 	dberr_t	err;
1901 
1902 	buf1 = static_cast<byte*>(mem_alloc(2 * UNIV_PAGE_SIZE));
1903 	buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE));
1904 
1905 	err = fil_read(TRUE, space, 0, sum_of_sizes, 0,
1906 		       UNIV_PAGE_SIZE, buf, NULL);
1907 	if (err == DB_SUCCESS) {
1908 		mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
1909 
1910 		err = fil_write(TRUE, space, 0, sum_of_sizes, 0,
1911 				UNIV_PAGE_SIZE, buf, NULL);
1912 	}
1913 
1914 	mem_free(buf1);
1915 
1916 	return(err);
1917 }
1918 
1919 /****************************************************************//**
1920 Writes the flushed lsn and the latest archived log number to the page
1921 header of the first page of each data file in the system tablespace.
1922 @return	DB_SUCCESS or error number */
1923 UNIV_INTERN
1924 dberr_t
fil_write_flushed_lsn_to_data_files(lsn_t lsn,ulint arch_log_no)1925 fil_write_flushed_lsn_to_data_files(
1926 /*================================*/
1927 	lsn_t	lsn,		/*!< in: lsn to write */
1928 	ulint	arch_log_no)	/*!< in: latest archived log file number */
1929 {
1930 	fil_space_t*	space;
1931 	fil_node_t*	node;
1932 	dberr_t		err;
1933 
1934 	mutex_enter(&fil_system->mutex);
1935 
1936 	for (space = UT_LIST_GET_FIRST(fil_system->space_list);
1937 	     space != NULL;
1938 	     space = UT_LIST_GET_NEXT(space_list, space)) {
1939 
1940 		/* We only write the lsn to all existing data files which have
1941 		been open during the lifetime of the mysqld process; they are
1942 		represented by the space objects in the tablespace memory
1943 		cache. Note that all data files in the system tablespace 0
1944 		and the UNDO log tablespaces (if separate) are always open. */
1945 
1946 		if (space->purpose == FIL_TABLESPACE
1947 		    && !fil_is_user_tablespace_id(space->id)) {
1948 			ulint	sum_of_sizes = 0;
1949 
1950 			for (node = UT_LIST_GET_FIRST(space->chain);
1951 			     node != NULL;
1952 			     node = UT_LIST_GET_NEXT(chain, node)) {
1953 
1954 				mutex_exit(&fil_system->mutex);
1955 
1956 				err = fil_write_lsn_and_arch_no_to_file(
1957 					space->id, sum_of_sizes, lsn,
1958 					arch_log_no);
1959 
1960 				if (err != DB_SUCCESS) {
1961 
1962 					return(err);
1963 				}
1964 
1965 				mutex_enter(&fil_system->mutex);
1966 
1967 				sum_of_sizes += node->size;
1968 			}
1969 		}
1970 	}
1971 
1972 	mutex_exit(&fil_system->mutex);
1973 
1974 	return(DB_SUCCESS);
1975 }
1976 
1977 /*******************************************************************//**
1978 Checks the consistency of the first data page of a tablespace
1979 at database startup.
1980 @retval NULL on success, or if innodb_force_recovery is set
1981 @return pointer to an error message string */
1982 static MY_ATTRIBUTE((warn_unused_result))
1983 const char*
fil_check_first_page(const page_t * page)1984 fil_check_first_page(
1985 /*=================*/
1986 	const page_t*	page)		/*!< in: data page */
1987 {
1988 	ulint	space_id;
1989 	ulint	flags;
1990 
1991 	if (srv_force_recovery >= SRV_FORCE_IGNORE_CORRUPT) {
1992 		return(NULL);
1993 	}
1994 
1995 	space_id = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page);
1996 	flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
1997 
1998 	if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) {
1999 		return("innodb-page-size mismatch");
2000 	}
2001 
2002 	if (!space_id && !flags) {
2003 		ulint		nonzero_bytes	= UNIV_PAGE_SIZE;
2004 		const byte*	b		= page;
2005 
2006 		while (!*b && --nonzero_bytes) {
2007 			b++;
2008 		}
2009 
2010 		if (!nonzero_bytes) {
2011 			return("space header page consists of zero bytes");
2012 		}
2013 	}
2014 
2015 	if (buf_page_is_corrupted(
2016 		    false, page, fsp_flags_get_zip_size(flags))) {
2017 		return("checksum mismatch");
2018 	}
2019 
2020 	if (page_get_space_id(page) == space_id
2021 	    && page_get_page_no(page) == 0) {
2022 		return(NULL);
2023 	}
2024 
2025 	return("inconsistent data in space header");
2026 }
2027 
2028 /*******************************************************************//**
2029 Reads the flushed lsn, arch no, space_id and tablespace flag fields from
2030 the first page of a data file at database startup.
2031 @retval NULL on success, or if innodb_force_recovery is set
2032 @return pointer to an error message string */
2033 UNIV_INTERN
2034 const char*
fil_read_first_page(pfs_os_file_t data_file,ibool one_read_already,ulint * flags,ulint * space_id,ulint * min_arch_log_no,ulint * max_arch_log_no,lsn_t * min_flushed_lsn,lsn_t * max_flushed_lsn)2035 fil_read_first_page(
2036 /*================*/
2037 	pfs_os_file_t	data_file,		/*!< in: open data file */
2038 	ibool		one_read_already,	/*!< in: TRUE if min and max
2039 						parameters below already
2040 						contain sensible data */
2041 	ulint*		flags,			/*!< out: tablespace flags */
2042 	ulint*		space_id,		/*!< out: tablespace ID */
2043 #ifdef UNIV_LOG_ARCHIVE
2044 	ulint*		min_arch_log_no,	/*!< out: min of archived
2045 						log numbers in data files */
2046 	ulint*		max_arch_log_no,	/*!< out: max of archived
2047 						log numbers in data files */
2048 #endif /* UNIV_LOG_ARCHIVE */
2049 	lsn_t*		min_flushed_lsn,	/*!< out: min of flushed
2050 						lsn values in data files */
2051 	lsn_t*		max_flushed_lsn)	/*!< out: max of flushed
2052 						lsn values in data files */
2053 {
2054 	byte*		buf;
2055 	byte*		page;
2056 	lsn_t		flushed_lsn;
2057 	const char*	check_msg = NULL;
2058 
2059 	buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
2060 
2061 	/* Align the memory for a possible read from a raw device */
2062 
2063 	page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
2064 
2065 	os_file_read(data_file, page, 0, UNIV_PAGE_SIZE);
2066 
2067 	/* The FSP_HEADER on page 0 is only valid for the first file
2068 	in a tablespace.  So if this is not the first datafile, leave
2069 	*flags and *space_id as they were read from the first file and
2070 	do not validate the first page. */
2071 	if (!one_read_already) {
2072 		*flags = fsp_header_get_flags(page);
2073 		*space_id = fsp_header_get_space_id(page);
2074 
2075 		check_msg = fil_check_first_page(page);
2076 	}
2077 
2078 	flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
2079 
2080 	ut_free(buf);
2081 
2082 	if (check_msg) {
2083 		return(check_msg);
2084 	}
2085 
2086 	if (!one_read_already) {
2087 		*min_flushed_lsn = flushed_lsn;
2088 		*max_flushed_lsn = flushed_lsn;
2089 #ifdef UNIV_LOG_ARCHIVE
2090 		*min_arch_log_no = arch_log_no;
2091 		*max_arch_log_no = arch_log_no;
2092 #endif /* UNIV_LOG_ARCHIVE */
2093 		return(NULL);
2094 	}
2095 
2096 	if (*min_flushed_lsn > flushed_lsn) {
2097 		*min_flushed_lsn = flushed_lsn;
2098 	}
2099 	if (*max_flushed_lsn < flushed_lsn) {
2100 		*max_flushed_lsn = flushed_lsn;
2101 	}
2102 #ifdef UNIV_LOG_ARCHIVE
2103 	if (*min_arch_log_no > arch_log_no) {
2104 		*min_arch_log_no = arch_log_no;
2105 	}
2106 	if (*max_arch_log_no < arch_log_no) {
2107 		*max_arch_log_no = arch_log_no;
2108 	}
2109 #endif /* UNIV_LOG_ARCHIVE */
2110 
2111 	return(NULL);
2112 }
2113 
2114 /*================ SINGLE-TABLE TABLESPACES ==========================*/
2115 
2116 #ifndef UNIV_HOTBACKUP
2117 /*******************************************************************//**
2118 Increments the count of pending operation, if space is not being deleted.
2119 @return	TRUE if being deleted, and operation should be skipped */
2120 UNIV_INTERN
2121 ibool
fil_inc_pending_ops(ulint id,ibool print_err)2122 fil_inc_pending_ops(
2123 /*================*/
2124 	ulint	id,		/*!< in: space id */
2125 	ibool	print_err)	/*!< in: need to print error or not */
2126 {
2127 	fil_space_t*	space;
2128 
2129 	mutex_enter(&fil_system->mutex);
2130 
2131 	space = fil_space_get_by_id(id);
2132 
2133 	if (space == NULL) {
2134 		if (print_err) {
2135 			fprintf(stderr,
2136 				"InnoDB: Error: trying to do an operation on a"
2137 				" dropped tablespace %lu\n",
2138 				(ulong) id);
2139 		}
2140 	}
2141 
2142 	if (space == NULL || space->stop_new_ops) {
2143 		mutex_exit(&fil_system->mutex);
2144 
2145 		return(TRUE);
2146 	}
2147 
2148 	space->n_pending_ops++;
2149 
2150 	mutex_exit(&fil_system->mutex);
2151 
2152 	return(FALSE);
2153 }
2154 
2155 /*******************************************************************//**
2156 Decrements the count of pending operations. */
2157 UNIV_INTERN
2158 void
fil_decr_pending_ops(ulint id)2159 fil_decr_pending_ops(
2160 /*=================*/
2161 	ulint	id)	/*!< in: space id */
2162 {
2163 	fil_space_t*	space;
2164 
2165 	mutex_enter(&fil_system->mutex);
2166 
2167 	space = fil_space_get_by_id(id);
2168 
2169 	if (space == NULL) {
2170 		fprintf(stderr,
2171 			"InnoDB: Error: decrementing pending operation"
2172 			" of a dropped tablespace %lu\n",
2173 			(ulong) id);
2174 	}
2175 
2176 	if (space != NULL) {
2177 		space->n_pending_ops--;
2178 	}
2179 
2180 	mutex_exit(&fil_system->mutex);
2181 }
2182 #endif /* !UNIV_HOTBACKUP */
2183 
2184 /********************************************************//**
2185 Creates the database directory for a table if it does not exist yet. */
2186 static
2187 void
fil_create_directory_for_tablename(const char * name)2188 fil_create_directory_for_tablename(
2189 /*===============================*/
2190 	const char*	name)	/*!< in: name in the standard
2191 				'databasename/tablename' format */
2192 {
2193 	const char*	namend;
2194 	char*		path;
2195 	ulint		len;
2196 
2197 	len = strlen(fil_path_to_mysql_datadir);
2198 	namend = strchr(name, '/');
2199 	ut_a(namend);
2200 	path = static_cast<char*>(mem_alloc(len + (namend - name) + 2));
2201 
2202 	memcpy(path, fil_path_to_mysql_datadir, len);
2203 	path[len] = '/';
2204 	memcpy(path + len + 1, name, namend - name);
2205 	path[len + (namend - name) + 1] = 0;
2206 
2207 	srv_normalize_path_for_win(path);
2208 
2209 	ut_a(os_file_create_directory(path, FALSE));
2210 	mem_free(path);
2211 }
2212 
2213 #ifndef UNIV_HOTBACKUP
2214 /********************************************************//**
2215 Writes a log record about an .ibd file create/rename/delete. */
2216 static
2217 void
fil_op_write_log(ulint type,ulint space_id,ulint log_flags,ulint flags,const char * name,const char * new_name,mtr_t * mtr)2218 fil_op_write_log(
2219 /*=============*/
2220 	ulint		type,		/*!< in: MLOG_FILE_CREATE,
2221 					MLOG_FILE_CREATE2,
2222 					MLOG_FILE_DELETE, or
2223 					MLOG_FILE_RENAME */
2224 	ulint		space_id,	/*!< in: space id */
2225 	ulint		log_flags,	/*!< in: redo log flags (stored
2226 					in the page number field) */
2227 	ulint		flags,		/*!< in: compressed page size
2228 					and file format
2229 					if type==MLOG_FILE_CREATE2, or 0 */
2230 	const char*	name,		/*!< in: table name in the familiar
2231 					'databasename/tablename' format, or
2232 					the file path in the case of
2233 					MLOG_FILE_DELETE */
2234 	const char*	new_name,	/*!< in: if type is MLOG_FILE_RENAME,
2235 					the new table name in the
2236 					'databasename/tablename' format */
2237 	mtr_t*		mtr)		/*!< in: mini-transaction handle */
2238 {
2239 	byte*	log_ptr;
2240 	ulint	len;
2241 
2242 	log_ptr = mlog_open(mtr, 11 + 2 + 1);
2243 
2244 	if (!log_ptr) {
2245 		/* Logging in mtr is switched off during crash recovery:
2246 		in that case mlog_open returns NULL */
2247 		return;
2248 	}
2249 
2250 	log_ptr = mlog_write_initial_log_record_for_file_op(
2251 		type, space_id, log_flags, log_ptr, mtr);
2252 	if (type == MLOG_FILE_CREATE2) {
2253 		mach_write_to_4(log_ptr, flags);
2254 		log_ptr += 4;
2255 	}
2256 	/* Let us store the strings as null-terminated for easier readability
2257 	and handling */
2258 
2259 	len = strlen(name) + 1;
2260 
2261 	mach_write_to_2(log_ptr, len);
2262 	log_ptr += 2;
2263 	mlog_close(mtr, log_ptr);
2264 
2265 	mlog_catenate_string(mtr, (byte*) name, len);
2266 
2267 	if (type == MLOG_FILE_RENAME) {
2268 		len = strlen(new_name) + 1;
2269 		log_ptr = mlog_open(mtr, 2 + len);
2270 		ut_a(log_ptr);
2271 		mach_write_to_2(log_ptr, len);
2272 		log_ptr += 2;
2273 		mlog_close(mtr, log_ptr);
2274 
2275 		mlog_catenate_string(mtr, (byte*) new_name, len);
2276 	}
2277 }
2278 #endif
2279 
2280 /*******************************************************************//**
2281 Parses the body of a log record written about an .ibd file operation. That is,
2282 the log record part after the standard (type, space id, page no) header of the
2283 log record.
2284 
2285 If desired, also replays the delete or rename operation if the .ibd file
2286 exists and the space id in it matches. Replays the create operation if a file
2287 at that path does not exist yet. If the database directory for the file to be
2288 created does not exist, then we create the directory, too.
2289 
2290 Note that mysqlbackup --apply-log sets fil_path_to_mysql_datadir to point to
2291 the datadir that we should use in replaying the file operations.
2292 
2293 InnoDB recovery does not replay these fully since it always sets the space id
2294 to zero. But mysqlbackup does replay them.  TODO: If remote tablespaces are
2295 used, mysqlbackup will only create tables in the default directory since
2296 MLOG_FILE_CREATE and MLOG_FILE_CREATE2 only know the tablename, not the path.
2297 
2298 @return end of log record, or NULL if the record was not completely
2299 contained between ptr and end_ptr */
2300 UNIV_INTERN
2301 byte*
fil_op_log_parse_or_replay(byte * ptr,byte * end_ptr,ulint type,ulint space_id,ulint log_flags)2302 fil_op_log_parse_or_replay(
2303 /*=======================*/
2304 	byte*	ptr,		/*!< in: buffer containing the log record body,
2305 				or an initial segment of it, if the record does
2306 				not fir completely between ptr and end_ptr */
2307 	byte*	end_ptr,	/*!< in: buffer end */
2308 	ulint	type,		/*!< in: the type of this log record */
2309 	ulint	space_id,	/*!< in: the space id of the tablespace in
2310 				question, or 0 if the log record should
2311 				only be parsed but not replayed */
2312 	ulint	log_flags)	/*!< in: redo log flags
2313 				(stored in the page number parameter) */
2314 {
2315 	ulint		name_len;
2316 	ulint		new_name_len;
2317 	const char*	name;
2318 	const char*	new_name	= NULL;
2319 	ulint		flags		= 0;
2320 
2321 	if (type == MLOG_FILE_CREATE2) {
2322 		if (end_ptr < ptr + 4) {
2323 
2324 			return(NULL);
2325 		}
2326 
2327 		flags = mach_read_from_4(ptr);
2328 		ptr += 4;
2329 	}
2330 
2331 	if (end_ptr < ptr + 2) {
2332 
2333 		return(NULL);
2334 	}
2335 
2336 	name_len = mach_read_from_2(ptr);
2337 
2338 	ptr += 2;
2339 
2340 	if (end_ptr < ptr + name_len) {
2341 
2342 		return(NULL);
2343 	}
2344 
2345 	name = (const char*) ptr;
2346 
2347 	ptr += name_len;
2348 
2349 	if (type == MLOG_FILE_RENAME) {
2350 		if (end_ptr < ptr + 2) {
2351 
2352 			return(NULL);
2353 		}
2354 
2355 		new_name_len = mach_read_from_2(ptr);
2356 
2357 		ptr += 2;
2358 
2359 		if (end_ptr < ptr + new_name_len) {
2360 
2361 			return(NULL);
2362 		}
2363 
2364 		new_name = (const char*) ptr;
2365 
2366 		ptr += new_name_len;
2367 	}
2368 
2369 	/* We managed to parse a full log record body */
2370 	/*
2371 	printf("Parsed log rec of type %lu space %lu\n"
2372 	"name %s\n", type, space_id, name);
2373 
2374 	if (type == MLOG_FILE_RENAME) {
2375 	printf("new name %s\n", new_name);
2376 	}
2377 	*/
2378 	if (!space_id) {
2379 		return(ptr);
2380 	}
2381 
2382 	/* Let us try to perform the file operation, if sensible. Note that
2383 	mysqlbackup has at this stage already read in all space id info to the
2384 	fil0fil.cc data structures.
2385 
2386 	NOTE that our algorithm is not guaranteed to work correctly if there
2387 	were renames of tables during the backup. See mysqlbackup code for more
2388 	on the problem. */
2389 
2390 	switch (type) {
2391 	case MLOG_FILE_DELETE:
2392 		if (fil_tablespace_exists_in_mem(space_id)) {
2393 			dberr_t	err = fil_delete_tablespace(
2394 				space_id, BUF_REMOVE_FLUSH_NO_WRITE);
2395 			ut_a(err == DB_SUCCESS);
2396 		}
2397 
2398 		break;
2399 
2400 	case MLOG_FILE_RENAME:
2401 		/* In order to replay the rename, the following must hold:
2402 		* The new name is not already used.
2403 		* A tablespace is open in memory with the old name.
2404 		* The space ID for that tablepace matches this log entry.
2405 		This will prevent unintended renames during recovery. */
2406 
2407 		if (fil_get_space_id_for_table(new_name) == ULINT_UNDEFINED
2408 		    && space_id == fil_get_space_id_for_table(name)) {
2409 			/* Create the database directory for the new name, if
2410 			it does not exist yet */
2411 			fil_create_directory_for_tablename(new_name);
2412 
2413 			if (!fil_rename_tablespace(name, space_id,
2414 						   new_name, NULL)) {
2415 				ut_error;
2416 			}
2417 		}
2418 
2419 		break;
2420 
2421 	case MLOG_FILE_CREATE:
2422 	case MLOG_FILE_CREATE2:
2423 		if (fil_tablespace_exists_in_mem(space_id)) {
2424 			/* Do nothing */
2425 		} else if (fil_get_space_id_for_table(name)
2426 			   != ULINT_UNDEFINED) {
2427 			/* Do nothing */
2428 		} else if (log_flags & MLOG_FILE_FLAG_TEMP) {
2429 			/* Temporary table, do nothing */
2430 		} else {
2431 			const char*	path = NULL;
2432 
2433 			/* Create the database directory for name, if it does
2434 			not exist yet */
2435 			fil_create_directory_for_tablename(name);
2436 
2437 			if (fil_create_new_single_table_tablespace(
2438 				    space_id, name, path, flags,
2439 				    DICT_TF2_USE_TABLESPACE,
2440 				    FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
2441 				ut_error;
2442 			}
2443 		}
2444 
2445 		break;
2446 
2447 	default:
2448 		ut_error;
2449 	}
2450 
2451 	return(ptr);
2452 }
2453 
2454 /*******************************************************************//**
2455 Allocates a file name for the EXPORT/IMPORT config file name.  The
2456 string must be freed by caller with mem_free().
2457 @return own: file name */
2458 static
2459 char*
fil_make_cfg_name(const char * filepath)2460 fil_make_cfg_name(
2461 /*==============*/
2462 	const char*	filepath)	/*!< in: .ibd file name */
2463 {
2464 	char*	cfg_name;
2465 
2466 	/* Create a temporary file path by replacing the .ibd suffix
2467 	with .cfg. */
2468 
2469 	ut_ad(strlen(filepath) > 4);
2470 
2471 	cfg_name = mem_strdup(filepath);
2472 	ut_snprintf(cfg_name + strlen(cfg_name) - 3, 4, "cfg");
2473 	return(cfg_name);
2474 }
2475 
2476 /*******************************************************************//**
2477 Check for change buffer merges.
2478 @return 0 if no merges else count + 1. */
2479 static
2480 ulint
fil_ibuf_check_pending_ops(fil_space_t * space,ulint count)2481 fil_ibuf_check_pending_ops(
2482 /*=======================*/
2483 	fil_space_t*	space,	/*!< in/out: Tablespace to check */
2484 	ulint		count)	/*!< in: number of attempts so far */
2485 {
2486 	ut_ad(mutex_own(&fil_system->mutex));
2487 
2488 	if (space != 0 && space->n_pending_ops != 0) {
2489 
2490 		if (count > 5000) {
2491 			ib_logf(IB_LOG_LEVEL_WARN,
2492 				"Trying to close/delete tablespace "
2493 				"'%s' but there are %lu pending change "
2494 				"buffer merges on it.",
2495 				space->name,
2496 				(ulong) space->n_pending_ops);
2497 		}
2498 
2499 		return(count + 1);
2500 	}
2501 
2502 	return(0);
2503 }
2504 
2505 /*******************************************************************//**
2506 Check for pending IO.
2507 @return 0 if no pending else count + 1. */
2508 static
2509 ulint
fil_check_pending_io(fil_space_t * space,fil_node_t ** node,ulint count)2510 fil_check_pending_io(
2511 /*=================*/
2512 	fil_space_t*	space,	/*!< in/out: Tablespace to check */
2513 	fil_node_t**	node,	/*!< out: Node in space list */
2514 	ulint		count)	/*!< in: number of attempts so far */
2515 {
2516 	ut_ad(mutex_own(&fil_system->mutex));
2517 	ut_a(space->n_pending_ops == 0);
2518 
2519 	/* The following code must change when InnoDB supports
2520 	multiple datafiles per tablespace. */
2521 	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2522 
2523 	*node = UT_LIST_GET_FIRST(space->chain);
2524 
2525 	if (space->n_pending_flushes > 0 || (*node)->n_pending > 0) {
2526 
2527 		ut_a(!(*node)->being_extended);
2528 
2529 		if (count > 1000) {
2530 			ib_logf(IB_LOG_LEVEL_WARN,
2531 				"Trying to close/delete tablespace '%s' "
2532 				"but there are %lu flushes "
2533 				" and %lu pending i/o's on it.",
2534 				space->name,
2535 				(ulong) space->n_pending_flushes,
2536 				(ulong) (*node)->n_pending);
2537 		}
2538 
2539 		return(count + 1);
2540 	}
2541 
2542 	return(0);
2543 }
2544 
2545 /*******************************************************************//**
2546 Check pending operations on a tablespace.
2547 @return DB_SUCCESS or error failure. */
2548 static
2549 dberr_t
fil_check_pending_operations(ulint id,fil_space_t ** space,char ** path)2550 fil_check_pending_operations(
2551 /*=========================*/
2552 	ulint		id,	/*!< in: space id */
2553 	fil_space_t**	space,	/*!< out: tablespace instance in memory */
2554 	char**		path)	/*!< out/own: tablespace path */
2555 {
2556 	ulint		count = 0;
2557 
2558 	ut_a(id != TRX_SYS_SPACE);
2559 	ut_ad(space);
2560 
2561 	*space = 0;
2562 
2563 	mutex_enter(&fil_system->mutex);
2564 	fil_space_t* sp = fil_space_get_by_id(id);
2565 	if (sp) {
2566 		sp->stop_new_ops = TRUE;
2567 	}
2568 	mutex_exit(&fil_system->mutex);
2569 
2570 	/* Check for pending change buffer merges. */
2571 
2572 	do {
2573 		mutex_enter(&fil_system->mutex);
2574 
2575 		sp = fil_space_get_by_id(id);
2576 
2577 		count = fil_ibuf_check_pending_ops(sp, count);
2578 
2579 		mutex_exit(&fil_system->mutex);
2580 
2581 		if (count > 0) {
2582 			os_thread_sleep(20000);
2583 		}
2584 
2585 	} while (count > 0);
2586 
2587 	/* Check for pending IO. */
2588 
2589 	*path = 0;
2590 
2591 	do {
2592 		mutex_enter(&fil_system->mutex);
2593 
2594 		sp = fil_space_get_by_id(id);
2595 
2596 		if (sp == NULL) {
2597 			mutex_exit(&fil_system->mutex);
2598 			return(DB_TABLESPACE_NOT_FOUND);
2599 		}
2600 
2601 		fil_node_t*	node;
2602 
2603 		count = fil_check_pending_io(sp, &node, count);
2604 
2605 		if (count == 0) {
2606 			*path = mem_strdup(node->name);
2607 		}
2608 
2609 		mutex_exit(&fil_system->mutex);
2610 
2611 		if (count > 0) {
2612 			os_thread_sleep(20000);
2613 		}
2614 
2615 	} while (count > 0);
2616 
2617 	ut_ad(sp);
2618 
2619 	*space = sp;
2620 	return(DB_SUCCESS);
2621 }
2622 
2623 /*******************************************************************//**
2624 Closes a single-table tablespace. The tablespace must be cached in the
2625 memory cache. Free all pages used by the tablespace.
2626 @return	DB_SUCCESS or error */
2627 UNIV_INTERN
2628 dberr_t
fil_close_tablespace(trx_t * trx,ulint id)2629 fil_close_tablespace(
2630 /*=================*/
2631 	trx_t*		trx,	/*!< in/out: Transaction covering the close */
2632 	ulint		id)	/*!< in: space id */
2633 {
2634 	char*		path = 0;
2635 	fil_space_t*	space = 0;
2636 
2637 	ut_a(id != TRX_SYS_SPACE);
2638 
2639 	dberr_t		err = fil_check_pending_operations(id, &space, &path);
2640 
2641 	if (err != DB_SUCCESS) {
2642 		return(err);
2643 	}
2644 
2645 	ut_a(space);
2646 	ut_a(path != 0);
2647 
2648 	rw_lock_x_lock(&space->latch);
2649 
2650 #ifndef UNIV_HOTBACKUP
2651 	/* Invalidate in the buffer pool all pages belonging to the
2652 	tablespace. Since we have set space->stop_new_ops = TRUE, readahead
2653 	or ibuf merge can no longer read more pages of this tablespace to the
2654 	buffer pool. Thus we can clean the tablespace out of the buffer pool
2655 	completely and permanently. The flag stop_new_ops also prevents
2656 	fil_flush() from being applied to this tablespace. */
2657 
2658 	buf_LRU_flush_or_remove_pages(id, BUF_REMOVE_FLUSH_WRITE, trx);
2659 #endif
2660 	mutex_enter(&fil_system->mutex);
2661 
2662 	/* If the free is successful, the X lock will be released before
2663 	the space memory data structure is freed. */
2664 
2665 	if (!fil_space_free(id, TRUE)) {
2666 		rw_lock_x_unlock(&space->latch);
2667 		err = DB_TABLESPACE_NOT_FOUND;
2668 	} else {
2669 		err = DB_SUCCESS;
2670 	}
2671 
2672 	mutex_exit(&fil_system->mutex);
2673 
2674 	/* If it is a delete then also delete any generated files, otherwise
2675 	when we drop the database the remove directory will fail. */
2676 
2677 	char*	cfg_name = fil_make_cfg_name(path);
2678 
2679 	os_file_delete_if_exists(innodb_file_data_key, cfg_name);
2680 
2681 	mem_free(path);
2682 	mem_free(cfg_name);
2683 
2684 	return(err);
2685 }
2686 
2687 /*******************************************************************//**
2688 Deletes a single-table tablespace. The tablespace must be cached in the
2689 memory cache.
2690 @return	DB_SUCCESS or error */
2691 UNIV_INTERN
2692 dberr_t
fil_delete_tablespace(ulint id,buf_remove_t buf_remove)2693 fil_delete_tablespace(
2694 /*==================*/
2695 	ulint		id,		/*!< in: space id */
2696 	buf_remove_t	buf_remove)	/*!< in: specify the action to take
2697 					on the tables pages in the buffer
2698 					pool */
2699 {
2700 	char*		path = 0;
2701 	fil_space_t*	space = 0;
2702 
2703 	ut_a(id != TRX_SYS_SPACE);
2704 
2705 	dberr_t		err = fil_check_pending_operations(id, &space, &path);
2706 
2707 	if (err != DB_SUCCESS) {
2708 
2709 		ib_logf(IB_LOG_LEVEL_ERROR,
2710 			"Cannot delete tablespace %lu because it is not "
2711 			"found in the tablespace memory cache.",
2712 			(ulong) id);
2713 
2714 		return(err);
2715 	}
2716 
2717 	ut_a(space);
2718 	ut_a(path != 0);
2719 
2720 	/* Important: We rely on the data dictionary mutex to ensure
2721 	that a race is not possible here. It should serialize the tablespace
2722 	drop/free. We acquire an X latch only to avoid a race condition
2723 	when accessing the tablespace instance via:
2724 
2725 	  fsp_get_available_space_in_free_extents().
2726 
2727 	There our main motivation is to reduce the contention on the
2728 	dictionary mutex. */
2729 
2730 	rw_lock_x_lock(&space->latch);
2731 
2732 #ifndef UNIV_HOTBACKUP
2733 	/* IMPORTANT: Because we have set space::stop_new_ops there
2734 	can't be any new ibuf merges, reads or flushes. We are here
2735 	because node::n_pending was zero above. However, it is still
2736 	possible to have pending read and write requests:
2737 
2738 	A read request can happen because the reader thread has
2739 	gone through the ::stop_new_ops check in buf_page_init_for_read()
2740 	before the flag was set and has not yet incremented ::n_pending
2741 	when we checked it above.
2742 
2743 	A write request can be issued any time because we don't check
2744 	the ::stop_new_ops flag when queueing a block for write.
2745 
2746 	We deal with pending write requests in the following function
2747 	where we'd minimally evict all dirty pages belonging to this
2748 	space from the flush_list. Not that if a block is IO-fixed
2749 	we'll wait for IO to complete.
2750 
2751 	To deal with potential read requests by checking the
2752 	::stop_new_ops flag in fil_io() */
2753 
2754 	buf_LRU_flush_or_remove_pages(id, buf_remove, 0);
2755 
2756 #endif /* !UNIV_HOTBACKUP */
2757 
2758 	/* If it is a delete then also delete any generated files, otherwise
2759 	when we drop the database the remove directory will fail. */
2760 	{
2761 		char*	cfg_name = fil_make_cfg_name(path);
2762 		os_file_delete_if_exists(innodb_file_data_key, cfg_name);
2763 		mem_free(cfg_name);
2764 	}
2765 
2766 	/* Delete the link file pointing to the ibd file we are deleting. */
2767 	if (FSP_FLAGS_HAS_DATA_DIR(space->flags)) {
2768 		fil_delete_link_file(space->name);
2769 	}
2770 
2771 	mutex_enter(&fil_system->mutex);
2772 
2773 	/* Double check the sanity of pending ops after reacquiring
2774 	the fil_system::mutex. */
2775 	if (fil_space_get_by_id(id)) {
2776 		ut_a(space->n_pending_ops == 0);
2777 		ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2778 		fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
2779 		ut_a(node->n_pending == 0);
2780 	}
2781 
2782 	if (!fil_space_free(id, TRUE)) {
2783 		err = DB_TABLESPACE_NOT_FOUND;
2784 	}
2785 
2786 	mutex_exit(&fil_system->mutex);
2787 
2788 	if (err != DB_SUCCESS) {
2789 		rw_lock_x_unlock(&space->latch);
2790 	} else if (!os_file_delete(innodb_file_data_key, path)
2791 		   && !os_file_delete_if_exists(innodb_file_data_key, path)) {
2792 
2793 		/* Note: This is because we have removed the
2794 		tablespace instance from the cache. */
2795 
2796 		err = DB_IO_ERROR;
2797 	}
2798 
2799 	if (err == DB_SUCCESS) {
2800 #ifndef UNIV_HOTBACKUP
2801 		/* Write a log record about the deletion of the .ibd
2802 		file, so that mysqlbackup can replay it in the
2803 		--apply-log phase. We use a dummy mtr and the familiar
2804 		log write mechanism. */
2805 		mtr_t		mtr;
2806 
2807 		/* When replaying the operation in mysqlbackup, do not try
2808 		to write any log record */
2809 		mtr_start(&mtr);
2810 
2811 		fil_op_write_log(MLOG_FILE_DELETE, id, 0, 0, path, NULL, &mtr);
2812 		mtr_commit(&mtr);
2813 #endif
2814 		err = DB_SUCCESS;
2815 	}
2816 
2817 	mem_free(path);
2818 
2819 	return(err);
2820 }
2821 
2822 /*******************************************************************//**
2823 Returns TRUE if a single-table tablespace is being deleted.
2824 @return TRUE if being deleted */
2825 UNIV_INTERN
2826 ibool
fil_tablespace_is_being_deleted(ulint id)2827 fil_tablespace_is_being_deleted(
2828 /*============================*/
2829 	ulint		id)	/*!< in: space id */
2830 {
2831 	fil_space_t*	space;
2832 	ibool		is_being_deleted;
2833 
2834 	mutex_enter(&fil_system->mutex);
2835 
2836 	space = fil_space_get_by_id(id);
2837 
2838 	ut_a(space != NULL);
2839 
2840 	is_being_deleted = space->stop_new_ops;
2841 
2842 	mutex_exit(&fil_system->mutex);
2843 
2844 	return(is_being_deleted);
2845 }
2846 
2847 #ifndef UNIV_HOTBACKUP
2848 /*******************************************************************//**
2849 Discards a single-table tablespace. The tablespace must be cached in the
2850 memory cache. Discarding is like deleting a tablespace, but
2851 
2852  1. We do not drop the table from the data dictionary;
2853 
2854  2. We remove all insert buffer entries for the tablespace immediately;
2855     in DROP TABLE they are only removed gradually in the background;
2856 
2857  3. Free all the pages in use by the tablespace.
2858 @return	DB_SUCCESS or error */
2859 UNIV_INTERN
2860 dberr_t
fil_discard_tablespace(ulint id)2861 fil_discard_tablespace(
2862 /*===================*/
2863 	ulint	id)	/*!< in: space id */
2864 {
2865 	dberr_t	err;
2866 
2867 	switch (err = fil_delete_tablespace(id, BUF_REMOVE_ALL_NO_WRITE)) {
2868 	case DB_SUCCESS:
2869 		break;
2870 
2871 	case DB_IO_ERROR:
2872 		ib_logf(IB_LOG_LEVEL_WARN,
2873 			"While deleting tablespace %lu in DISCARD TABLESPACE."
2874 			" File rename/delete failed: %s",
2875 			(ulong) id, ut_strerr(err));
2876 		break;
2877 
2878 	case DB_TABLESPACE_NOT_FOUND:
2879 		ib_logf(IB_LOG_LEVEL_WARN,
2880 			"Cannot delete tablespace %lu in DISCARD "
2881 			"TABLESPACE. %s",
2882 			(ulong) id, ut_strerr(err));
2883 		break;
2884 
2885 	default:
2886 		ut_error;
2887 	}
2888 
2889 	/* Remove all insert buffer entries for the tablespace */
2890 
2891 	ibuf_delete_for_discarded_space(id);
2892 
2893 	return(err);
2894 }
2895 #endif /* !UNIV_HOTBACKUP */
2896 
2897 /*******************************************************************//**
2898 Renames the memory cache structures of a single-table tablespace.
2899 @return	TRUE if success */
2900 static
2901 ibool
fil_rename_tablespace_in_mem(fil_space_t * space,fil_node_t * node,const char * new_name,const char * new_path)2902 fil_rename_tablespace_in_mem(
2903 /*=========================*/
2904 	fil_space_t*	space,	/*!< in: tablespace memory object */
2905 	fil_node_t*	node,	/*!< in: file node of that tablespace */
2906 	const char*	new_name,	/*!< in: new name */
2907 	const char*	new_path)	/*!< in: new file path */
2908 {
2909 	fil_space_t*	space2;
2910 	const char*	old_name	= space->name;
2911 
2912 	ut_ad(mutex_own(&fil_system->mutex));
2913 
2914 	space2 = fil_space_get_by_name(old_name);
2915 	if (space != space2) {
2916 		fputs("InnoDB: Error: cannot find ", stderr);
2917 		ut_print_filename(stderr, old_name);
2918 		fputs(" in tablespace memory cache\n", stderr);
2919 
2920 		return(FALSE);
2921 	}
2922 
2923 	space2 = fil_space_get_by_name(new_name);
2924 	if (space2 != NULL) {
2925 		fputs("InnoDB: Error: ", stderr);
2926 		ut_print_filename(stderr, new_name);
2927 		fputs(" is already in tablespace memory cache\n", stderr);
2928 
2929 		return(FALSE);
2930 	}
2931 
2932 	HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
2933 		    ut_fold_string(space->name), space);
2934 	mem_free(space->name);
2935 	mem_free(node->name);
2936 
2937 	space->name = mem_strdup(new_name);
2938 	node->name = mem_strdup(new_path);
2939 
2940 	HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
2941 		    ut_fold_string(new_name), space);
2942 	return(TRUE);
2943 }
2944 
2945 /*******************************************************************//**
2946 Allocates a file name for a single-table tablespace. The string must be freed
2947 by caller with mem_free().
2948 @return	own: file name */
2949 UNIV_INTERN
2950 char*
fil_make_ibd_name(const char * name,bool is_full_path)2951 fil_make_ibd_name(
2952 /*==============*/
2953 	const char*	name,		/*!< in: table name or a dir path */
2954 	bool		is_full_path)	/*!< in: TRUE if it is a dir path */
2955 {
2956 	char*	filename;
2957 	ulint	namelen		= strlen(name);
2958 	ulint	dirlen		= strlen(fil_path_to_mysql_datadir);
2959 	ulint	pathlen		= dirlen + namelen + sizeof "/.ibd";
2960 
2961 	filename = static_cast<char*>(mem_alloc(pathlen));
2962 
2963 	if (is_full_path) {
2964 		memcpy(filename, name, namelen);
2965 		memcpy(filename + namelen, ".ibd", sizeof ".ibd");
2966 	} else {
2967 		ut_snprintf(filename, pathlen, "%s/%s.ibd",
2968 			fil_path_to_mysql_datadir, name);
2969 
2970 	}
2971 
2972 	srv_normalize_path_for_win(filename);
2973 
2974 	return(filename);
2975 }
2976 
2977 /*******************************************************************//**
2978 Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link).
2979 The string must be freed by caller with mem_free().
2980 @return	own: file name */
2981 UNIV_INTERN
2982 char*
fil_make_isl_name(const char * name)2983 fil_make_isl_name(
2984 /*==============*/
2985 	const char*	name)	/*!< in: table name */
2986 {
2987 	char*	filename;
2988 	ulint	namelen		= strlen(name);
2989 	ulint	dirlen		= strlen(fil_path_to_mysql_datadir);
2990 	ulint	pathlen		= dirlen + namelen + sizeof "/.isl";
2991 
2992 	filename = static_cast<char*>(mem_alloc(pathlen));
2993 
2994 	ut_snprintf(filename, pathlen, "%s/%s.isl",
2995 		fil_path_to_mysql_datadir, name);
2996 
2997 	srv_normalize_path_for_win(filename);
2998 
2999 	return(filename);
3000 }
3001 
3002 /** Test if a tablespace file can be renamed to a new filepath by checking
3003 if that the old filepath exists and the new filepath does not exist.
3004 @param[in]	space_id	tablespace id
3005 @param[in]	old_path	old filepath
3006 @param[in]	new_path	new filepath
3007 @param[in]	is_discarded	whether the tablespace is discarded
3008 @return innodb error code */
3009 dberr_t
fil_rename_tablespace_check(ulint space_id,const char * old_path,const char * new_path,bool is_discarded)3010 fil_rename_tablespace_check(
3011 	ulint		space_id,
3012 	const char*	old_path,
3013 	const char*	new_path,
3014 	bool		is_discarded)
3015 {
3016 	ulint	exists = false;
3017 	os_file_type_t	ftype;
3018 
3019 	if (!is_discarded
3020 	    && os_file_status(old_path, &exists, &ftype)
3021 	    && !exists) {
3022 		ib_logf(IB_LOG_LEVEL_ERROR,
3023 			"Cannot rename '%s' to '%s' for space ID %lu"
3024 			" because the source file does not exist.",
3025 			old_path, new_path, space_id);
3026 
3027 		return(DB_TABLESPACE_NOT_FOUND);
3028 	}
3029 
3030 	exists = false;
3031 	if (!os_file_status(new_path, &exists, &ftype) || exists) {
3032 		ib_logf(IB_LOG_LEVEL_ERROR,
3033 			"Cannot rename '%s' to '%s' for space ID %lu"
3034 			" because the target file exists."
3035 			" Remove the target file and try again.",
3036 			old_path, new_path, space_id);
3037 
3038 		return(DB_TABLESPACE_EXISTS);
3039 	}
3040 
3041 	return(DB_SUCCESS);
3042 }
3043 
3044 /*******************************************************************//**
3045 Renames a single-table tablespace. The tablespace must be cached in the
3046 tablespace memory cache.
3047 @return	TRUE if success */
3048 UNIV_INTERN
3049 ibool
fil_rename_tablespace(const char * old_name_in,ulint id,const char * new_name,const char * new_path_in)3050 fil_rename_tablespace(
3051 /*==================*/
3052 	const char*	old_name_in,	/*!< in: old table name in the
3053 					standard databasename/tablename
3054 					format of InnoDB, or NULL if we
3055 					do the rename based on the space
3056 					id only */
3057 	ulint		id,		/*!< in: space id */
3058 	const char*	new_name,	/*!< in: new table name in the
3059 					standard databasename/tablename
3060 					format of InnoDB */
3061 	const char*	new_path_in)	/*!< in: new full datafile path
3062 					if the tablespace is remotely
3063 					located, or NULL if it is located
3064 					in the normal data directory. */
3065 {
3066 	ibool		success;
3067 	fil_space_t*	space;
3068 	fil_node_t*	node;
3069 	ulint		count		= 0;
3070 	char*		new_path;
3071 	char*		old_name;
3072 	char*		old_path;
3073 	const char*	not_given	= "(name not specified)";
3074 
3075 	ut_a(id != 0);
3076 
3077 retry:
3078 	count++;
3079 
3080 	if (!(count % 1000)) {
3081 		ut_print_timestamp(stderr);
3082 		fputs("  InnoDB: Warning: problems renaming ", stderr);
3083 		ut_print_filename(stderr,
3084 				  old_name_in ? old_name_in : not_given);
3085 		fputs(" to ", stderr);
3086 		ut_print_filename(stderr, new_name);
3087 		fprintf(stderr, ", %lu iterations\n", (ulong) count);
3088 	}
3089 
3090 	mutex_enter(&fil_system->mutex);
3091 
3092 	space = fil_space_get_by_id(id);
3093 
3094 	DBUG_EXECUTE_IF("fil_rename_tablespace_failure_1", space = NULL; );
3095 
3096 	if (space == NULL) {
3097 		ib_logf(IB_LOG_LEVEL_ERROR,
3098 			"Cannot find space id %lu in the tablespace "
3099 			"memory cache, though the table '%s' in a "
3100 			"rename operation should have that id.",
3101 			(ulong) id, old_name_in ? old_name_in : not_given);
3102 		mutex_exit(&fil_system->mutex);
3103 
3104 		return(FALSE);
3105 	}
3106 
3107 	if (count > 25000) {
3108 		space->stop_ios = FALSE;
3109 		mutex_exit(&fil_system->mutex);
3110 
3111 		return(FALSE);
3112 	}
3113 
3114 	/* We temporarily close the .ibd file because we do not trust that
3115 	operating systems can rename an open file. For the closing we have to
3116 	wait until there are no pending i/o's or flushes on the file. */
3117 
3118 	space->stop_ios = TRUE;
3119 
3120 	/* The following code must change when InnoDB supports
3121 	multiple datafiles per tablespace. */
3122 	ut_a(UT_LIST_GET_LEN(space->chain) == 1);
3123 	node = UT_LIST_GET_FIRST(space->chain);
3124 
3125 	if (node->n_pending > 0
3126 	    || node->n_pending_flushes > 0
3127 	    || node->being_extended) {
3128 		/* There are pending i/o's or flushes or the file is
3129 		currently being extended, sleep for a while and
3130 		retry */
3131 
3132 		mutex_exit(&fil_system->mutex);
3133 
3134 		os_thread_sleep(20000);
3135 
3136 		goto retry;
3137 
3138 	} else if (node->modification_counter > node->flush_counter) {
3139 		/* Flush the space */
3140 
3141 		mutex_exit(&fil_system->mutex);
3142 
3143 		os_thread_sleep(20000);
3144 
3145 		fil_flush(id);
3146 
3147 		goto retry;
3148 
3149 	} else if (node->open) {
3150 		/* Close the file */
3151 
3152 		fil_node_close_file(node, fil_system);
3153 	}
3154 
3155 	/* Check that the old name in the space is right */
3156 
3157 	if (old_name_in) {
3158 		old_name = mem_strdup(old_name_in);
3159 		ut_a(strcmp(space->name, old_name) == 0);
3160 	} else {
3161 		old_name = mem_strdup(space->name);
3162 	}
3163 	old_path = mem_strdup(node->name);
3164 
3165 	/* Rename the tablespace and the node in the memory cache */
3166 	new_path = new_path_in ? mem_strdup(new_path_in)
3167 		: fil_make_ibd_name(new_name, false);
3168 
3169 	success = fil_rename_tablespace_in_mem(
3170 		space, node, new_name, new_path);
3171 
3172 	if (success) {
3173 
3174 		DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
3175 			goto skip_second_rename; );
3176 
3177 		success = os_file_rename(
3178 			innodb_file_data_key, old_path, new_path);
3179 
3180 		DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
3181 skip_second_rename:
3182 			success = FALSE; );
3183 
3184 		if (!success) {
3185 			/* We have to revert the changes we made
3186 			to the tablespace memory cache */
3187 
3188 			ut_a(fil_rename_tablespace_in_mem(
3189 					space, node, old_name, old_path));
3190 		}
3191 	}
3192 
3193 	space->stop_ios = FALSE;
3194 
3195 	mutex_exit(&fil_system->mutex);
3196 
3197 #ifndef UNIV_HOTBACKUP
3198 	if (success && !recv_recovery_on) {
3199 		mtr_t		mtr;
3200 
3201 		mtr_start(&mtr);
3202 
3203 		fil_op_write_log(MLOG_FILE_RENAME, id, 0, 0, old_name, new_name,
3204 				 &mtr);
3205 		mtr_commit(&mtr);
3206 	}
3207 #endif /* !UNIV_HOTBACKUP */
3208 
3209 	mem_free(new_path);
3210 	mem_free(old_path);
3211 	mem_free(old_name);
3212 
3213 	return(success);
3214 }
3215 
3216 /*******************************************************************//**
3217 Creates a new InnoDB Symbolic Link (ISL) file.  It is always created
3218 under the 'datadir' of MySQL. The datadir is the directory of a
3219 running mysqld program. We can refer to it by simply using the path '.'.
3220 @return	DB_SUCCESS or error code */
3221 UNIV_INTERN
3222 dberr_t
fil_create_link_file(const char * tablename,const char * filepath)3223 fil_create_link_file(
3224 /*=================*/
3225 	const char*	tablename,	/*!< in: tablename */
3226 	const char*	filepath)	/*!< in: pathname of tablespace */
3227 {
3228 	dberr_t		err = DB_SUCCESS;
3229 	char*		link_filepath;
3230 	char*		prev_filepath = fil_read_link_file(tablename);
3231 
3232 	ut_ad(!srv_read_only_mode);
3233 
3234 	if (prev_filepath) {
3235 		/* Truncate will call this with an existing
3236 		link file which contains the same filepath. */
3237 		if (0 == strcmp(prev_filepath, filepath)) {
3238 			mem_free(prev_filepath);
3239 			return(DB_SUCCESS);
3240 		}
3241 		mem_free(prev_filepath);
3242 	}
3243 
3244 	link_filepath = fil_make_isl_name(tablename);
3245 
3246 	/** Check if the file already exists. */
3247 	FILE*                   file = NULL;
3248 	ibool                   exists;
3249 	os_file_type_t          ftype;
3250 
3251 	bool success = os_file_status(link_filepath, &exists, &ftype);
3252 
3253 	ulint error = 0;
3254 	if (success && !exists) {
3255 		file = fopen(link_filepath, "w");
3256 		if (file == NULL) {
3257 			/* This call will print its own error message */
3258 			error = os_file_get_last_error(true);
3259 		}
3260 	} else {
3261 		error = OS_FILE_ALREADY_EXISTS;
3262 	}
3263 	if (error != 0) {
3264 
3265 		ut_print_timestamp(stderr);
3266 		fputs("  InnoDB: Cannot create file ", stderr);
3267 		ut_print_filename(stderr, link_filepath);
3268 		fputs(".\n", stderr);
3269 
3270 		if (error == OS_FILE_ALREADY_EXISTS) {
3271 			fputs("InnoDB: The link file: ", stderr);
3272 			ut_print_filename(stderr, filepath);
3273 			fputs(" already exists.\n", stderr);
3274 			err = DB_TABLESPACE_EXISTS;
3275 
3276 		} else if (error == OS_FILE_DISK_FULL) {
3277 			err = DB_OUT_OF_FILE_SPACE;
3278 
3279 		} else {
3280 			err = DB_ERROR;
3281 		}
3282 
3283 		/* file is not open, no need to close it. */
3284 		mem_free(link_filepath);
3285 		return(err);
3286 	}
3287 
3288 	ulint rbytes = fwrite(filepath, 1, strlen(filepath), file);
3289 	if (rbytes != strlen(filepath)) {
3290 		os_file_get_last_error(true);
3291 		ib_logf(IB_LOG_LEVEL_ERROR,
3292 			"cannot write link file "
3293 			 "%s",filepath);
3294 		err = DB_ERROR;
3295 	}
3296 
3297 	/* Close the file, we only need it at startup */
3298 	fclose(file);
3299 
3300 	mem_free(link_filepath);
3301 
3302 	return(err);
3303 }
3304 
3305 /*******************************************************************//**
3306 Deletes an InnoDB Symbolic Link (ISL) file. */
3307 UNIV_INTERN
3308 void
fil_delete_link_file(const char * tablename)3309 fil_delete_link_file(
3310 /*=================*/
3311 	const char*	tablename)	/*!< in: name of table */
3312 {
3313 	char* link_filepath = fil_make_isl_name(tablename);
3314 
3315 	os_file_delete_if_exists(innodb_file_data_key, link_filepath);
3316 
3317 	mem_free(link_filepath);
3318 }
3319 
3320 /*******************************************************************//**
3321 Reads an InnoDB Symbolic Link (ISL) file.
3322 It is always created under the 'datadir' of MySQL.  The name is of the
3323 form {databasename}/{tablename}. and the isl file is expected to be in a
3324 '{databasename}' directory called '{tablename}.isl'. The caller must free
3325 the memory of the null-terminated path returned if it is not null.
3326 @return	own: filepath found in link file, NULL if not found. */
3327 UNIV_INTERN
3328 char*
fil_read_link_file(const char * name)3329 fil_read_link_file(
3330 /*===============*/
3331 	const char*	name)		/*!< in: tablespace name */
3332 {
3333 	char*		filepath = NULL;
3334 	char*		link_filepath;
3335 	FILE*		file = NULL;
3336 
3337 	/* The .isl file is in the 'normal' tablespace location. */
3338 	link_filepath = fil_make_isl_name(name);
3339 
3340 	file = fopen(link_filepath, "r+b");
3341 
3342 	mem_free(link_filepath);
3343 
3344 	if (file) {
3345 		filepath = static_cast<char*>(mem_alloc(OS_FILE_MAX_PATH));
3346 
3347 		os_file_read_string(file, filepath, OS_FILE_MAX_PATH);
3348 		fclose(file);
3349 
3350 		if (strlen(filepath)) {
3351 			/* Trim whitespace from end of filepath */
3352 			ulint lastch = strlen(filepath) - 1;
3353 			while (lastch > 4 && filepath[lastch] <= 0x20) {
3354 				filepath[lastch--] = 0x00;
3355 			}
3356 			srv_normalize_path_for_win(filepath);
3357 		}
3358 	}
3359 
3360 	return(filepath);
3361 }
3362 
3363 /*******************************************************************//**
3364 Opens a handle to the file linked to in an InnoDB Symbolic Link file.
3365 @return	TRUE if remote linked tablespace file is found and opened. */
3366 UNIV_INTERN
3367 ibool
fil_open_linked_file(const char * tablename,char ** remote_filepath,pfs_os_file_t * remote_file)3368 fil_open_linked_file(
3369 /*===============*/
3370 	const char*	tablename,	/*!< in: database/tablename */
3371 	char**		remote_filepath,/*!< out: remote filepath */
3372 	pfs_os_file_t*	remote_file)	/*!< out: remote file handle */
3373 
3374 {
3375 	ibool		success;
3376 
3377 	*remote_filepath = fil_read_link_file(tablename);
3378 	if (*remote_filepath == NULL) {
3379 		return(FALSE);
3380 	}
3381 
3382 	/* The filepath provided is different from what was
3383 	found in the link file. */
3384 	*remote_file = os_file_create_simple_no_error_handling(
3385 		innodb_file_data_key, *remote_filepath,
3386 		OS_FILE_OPEN, OS_FILE_READ_ONLY,
3387 		&success);
3388 
3389 	if (!success) {
3390 		char*	link_filepath = fil_make_isl_name(tablename);
3391 
3392 		/* The following call prints an error message */
3393 		os_file_get_last_error(true);
3394 
3395 		ib_logf(IB_LOG_LEVEL_ERROR,
3396 			"A link file was found named '%s' "
3397 			"but the linked tablespace '%s' "
3398 			"could not be opened.",
3399 			link_filepath, *remote_filepath);
3400 
3401 		mem_free(link_filepath);
3402 		mem_free(*remote_filepath);
3403 		*remote_filepath = NULL;
3404 	}
3405 
3406 	return(success);
3407 }
3408 
3409 /*******************************************************************//**
3410 Creates a new single-table tablespace to a database directory of MySQL.
3411 Database directories are under the 'datadir' of MySQL. The datadir is the
3412 directory of a running mysqld program. We can refer to it by simply the
3413 path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
3414 dir of the mysqld server.
3415 
3416 @return	DB_SUCCESS or error code */
3417 UNIV_INTERN
3418 dberr_t
fil_create_new_single_table_tablespace(ulint space_id,const char * tablename,const char * dir_path,ulint flags,ulint flags2,ulint size)3419 fil_create_new_single_table_tablespace(
3420 /*===================================*/
3421 	ulint		space_id,	/*!< in: space id */
3422 	const char*	tablename,	/*!< in: the table name in the usual
3423 					databasename/tablename format
3424 					of InnoDB */
3425 	const char*	dir_path,	/*!< in: NULL or a dir path */
3426 	ulint		flags,		/*!< in: tablespace flags */
3427 	ulint		flags2,		/*!< in: table flags2 */
3428 	ulint		size)		/*!< in: the initial size of the
3429 					tablespace file in pages,
3430 					must be >= FIL_IBD_FILE_INITIAL_SIZE */
3431 {
3432 	pfs_os_file_t	file;
3433 
3434 	ibool		ret;
3435 	dberr_t		err;
3436 	byte*		buf2;
3437 	byte*		page;
3438 	char*		path;
3439 	ibool		success;
3440 	/* TRUE if a table is created with CREATE TEMPORARY TABLE */
3441 	bool		is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
3442 	bool		has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
3443 
3444 	ut_a(space_id > 0);
3445 	ut_ad(!srv_read_only_mode);
3446 	ut_a(space_id < SRV_LOG_SPACE_FIRST_ID);
3447 	ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
3448 	ut_a(fsp_flags_is_valid(flags));
3449 
3450 	if (is_temp) {
3451 		/* Temporary table filepath */
3452 		ut_ad(dir_path);
3453 		path = fil_make_ibd_name(dir_path, true);
3454 	} else if (has_data_dir) {
3455 		ut_ad(dir_path);
3456 		path = os_file_make_remote_pathname(dir_path, tablename, "ibd");
3457 
3458 		/* Since this tablespace file will be created in a
3459 		remote directory, let's create the subdirectories
3460 		in the path, if they are not there already. */
3461 		success = os_file_create_subdirs_if_needed(path);
3462 		if (!success) {
3463 			err = DB_ERROR;
3464 			goto error_exit_3;
3465 		}
3466 	} else {
3467 		path = fil_make_ibd_name(tablename, false);
3468 	}
3469 
3470 	file = os_file_create(
3471 		innodb_file_data_key, path,
3472 		OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
3473 		OS_FILE_NORMAL,
3474 		OS_DATA_FILE,
3475 		&ret);
3476 
3477 	if (ret == FALSE) {
3478 		/* The following call will print an error message */
3479 		ulint	error = os_file_get_last_error(true);
3480 
3481 		ib_logf(IB_LOG_LEVEL_ERROR,
3482 			"Cannot create file '%s'\n", path);
3483 
3484 		if (error == OS_FILE_ALREADY_EXISTS) {
3485 			ib_logf(IB_LOG_LEVEL_ERROR,
3486 				"The file '%s' already exists though the "
3487 				"corresponding table did not exist "
3488 				"in the InnoDB data dictionary. "
3489 				"Have you moved InnoDB .ibd files "
3490 				"around without using the SQL commands "
3491 				"DISCARD TABLESPACE and IMPORT TABLESPACE, "
3492 				"or did mysqld crash in the middle of "
3493 				"CREATE TABLE? "
3494 				"You can resolve the problem by removing "
3495 				"the file '%s' under the 'datadir' of MySQL.",
3496 				path, path);
3497 
3498 			err = DB_TABLESPACE_EXISTS;
3499 			goto error_exit_3;
3500 		}
3501 
3502 		if (error == OS_FILE_DISK_FULL) {
3503 			err = DB_OUT_OF_FILE_SPACE;
3504 			goto error_exit_3;
3505 		}
3506 
3507 		err = DB_ERROR;
3508 		goto error_exit_3;
3509 	}
3510 
3511 	ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE);
3512 
3513 	if (!ret) {
3514 		err = DB_OUT_OF_FILE_SPACE;
3515 		goto error_exit_2;
3516 	}
3517 
3518 	/* printf("Creating tablespace %s id %lu\n", path, space_id); */
3519 
3520 	/* We have to write the space id to the file immediately and flush the
3521 	file to disk. This is because in crash recovery we must be aware what
3522 	tablespaces exist and what are their space id's, so that we can apply
3523 	the log records to the right file. It may take quite a while until
3524 	buffer pool flush algorithms write anything to the file and flush it to
3525 	disk. If we would not write here anything, the file would be filled
3526 	with zeros from the call of os_file_set_size(), until a buffer pool
3527 	flush would write to it. */
3528 
3529 	buf2 = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE));
3530 	/* Align the memory for file i/o if we might have O_DIRECT set */
3531 	page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
3532 
3533 	memset(page, '\0', UNIV_PAGE_SIZE);
3534 
3535 	/* Add the UNIV_PAGE_SIZE to the table flags and write them to the
3536 	tablespace header. */
3537 	flags = fsp_flags_set_page_size(flags, UNIV_PAGE_SIZE);
3538 	fsp_header_init_fields(page, space_id, flags);
3539 	mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
3540 
3541 	if (!(fsp_flags_is_compressed(flags))) {
3542 		buf_flush_init_for_writing(page, NULL, 0);
3543 		ret = os_file_write(path, file, page, 0, UNIV_PAGE_SIZE);
3544 	} else {
3545 		page_zip_des_t	page_zip;
3546 		ulint		zip_size;
3547 
3548 		zip_size = fsp_flags_get_zip_size(flags);
3549 
3550 		page_zip_set_size(&page_zip, zip_size);
3551 		page_zip.data = page + UNIV_PAGE_SIZE;
3552 #ifdef UNIV_DEBUG
3553 		page_zip.m_start =
3554 #endif /* UNIV_DEBUG */
3555 			page_zip.m_end = page_zip.m_nonempty =
3556 			page_zip.n_blobs = 0;
3557 		buf_flush_init_for_writing(page, &page_zip, 0);
3558 		ret = os_file_write(path, file, page_zip.data, 0, zip_size);
3559 	}
3560 
3561 	ut_free(buf2);
3562 
3563 	if (!ret) {
3564 		ib_logf(IB_LOG_LEVEL_ERROR,
3565 			"Could not write the first page to tablespace "
3566 			"'%s'", path);
3567 
3568 		err = DB_ERROR;
3569 		goto error_exit_2;
3570 	}
3571 
3572 	ret = os_file_flush(file);
3573 
3574 	if (!ret) {
3575 		ib_logf(IB_LOG_LEVEL_ERROR,
3576 			"File flush of tablespace '%s' failed", path);
3577 		err = DB_ERROR;
3578 		goto error_exit_2;
3579 	}
3580 
3581 	if (has_data_dir) {
3582 		/* Now that the IBD file is created, make the ISL file. */
3583 		err = fil_create_link_file(tablename, path);
3584 		if (err != DB_SUCCESS) {
3585 			goto error_exit_2;
3586 		}
3587 	}
3588 
3589 	success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE);
3590 	if (!success || !fil_node_create(path, size, space_id, FALSE)) {
3591 		err = DB_ERROR;
3592 		goto error_exit_1;
3593 	}
3594 
3595 #ifndef UNIV_HOTBACKUP
3596 	{
3597 		mtr_t		mtr;
3598 		ulint		mlog_file_flag = 0;
3599 
3600 		if (is_temp) {
3601 			mlog_file_flag |= MLOG_FILE_FLAG_TEMP;
3602 		}
3603 
3604 		mtr_start(&mtr);
3605 
3606 		fil_op_write_log(flags
3607 				 ? MLOG_FILE_CREATE2
3608 				 : MLOG_FILE_CREATE,
3609 				 space_id, mlog_file_flag, flags,
3610 				 tablename, NULL, &mtr);
3611 
3612 		mtr_commit(&mtr);
3613 	}
3614 #endif
3615 	err = DB_SUCCESS;
3616 
3617 	/* Error code is set.  Cleanup the various variables used.
3618 	These labels reflect the order in which variables are assigned or
3619 	actions are done. */
3620 error_exit_1:
3621 	if (has_data_dir && err != DB_SUCCESS) {
3622 		fil_delete_link_file(tablename);
3623 	}
3624 error_exit_2:
3625 	os_file_close(file);
3626 	if (err != DB_SUCCESS) {
3627 		os_file_delete(innodb_file_data_key, path);
3628 	}
3629 error_exit_3:
3630 	mem_free(path);
3631 
3632 	return(err);
3633 }
3634 
3635 #ifndef UNIV_HOTBACKUP
3636 /********************************************************************//**
3637 Report information about a bad tablespace. */
3638 static
3639 void
fil_report_bad_tablespace(const char * filepath,const char * check_msg,ulint found_id,ulint found_flags,ulint expected_id,ulint expected_flags)3640 fil_report_bad_tablespace(
3641 /*======================*/
3642 	const char*	filepath,	/*!< in: filepath */
3643 	const char*	check_msg,	/*!< in: fil_check_first_page() */
3644 	ulint		found_id,	/*!< in: found space ID */
3645 	ulint		found_flags,	/*!< in: found flags */
3646 	ulint		expected_id,	/*!< in: expected space id */
3647 	ulint		expected_flags)	/*!< in: expected flags */
3648 {
3649 	if (check_msg) {
3650 		ib_logf(IB_LOG_LEVEL_ERROR,
3651 			"Error %s in file '%s',"
3652 			"tablespace id=%lu, flags=%lu. "
3653 			"Please refer to "
3654 			REFMAN "innodb-troubleshooting-datadict.html "
3655 			"for how to resolve the issue.",
3656 			check_msg, filepath,
3657 			(ulong) expected_id, (ulong) expected_flags);
3658 		return;
3659 	}
3660 
3661 	ib_logf(IB_LOG_LEVEL_ERROR,
3662 		"In file '%s', tablespace id and flags are %lu and %lu, "
3663 		"but in the InnoDB data dictionary they are %lu and %lu. "
3664 		"Have you moved InnoDB .ibd files around without using the "
3665 		"commands DISCARD TABLESPACE and IMPORT TABLESPACE? "
3666 		"Please refer to "
3667 		REFMAN "innodb-troubleshooting-datadict.html "
3668 		"for how to resolve the issue.",
3669 		filepath, (ulong) found_id, (ulong) found_flags,
3670 		(ulong) expected_id, (ulong) expected_flags);
3671 }
3672 
3673 /********************************************************************//**
3674 Tries to open a single-table tablespace and optionally checks that the
3675 space id in it is correct. If this does not succeed, print an error message
3676 to the .err log. This function is used to open a tablespace when we start
3677 mysqld after the dictionary has been booted, and also in IMPORT TABLESPACE.
3678 
3679 NOTE that we assume this operation is used either at the database startup
3680 or under the protection of the dictionary mutex, so that two users cannot
3681 race here. This operation does not leave the file associated with the
3682 tablespace open, but closes it after we have looked at the space id in it.
3683 
3684 If the validate boolean is set, we read the first page of the file and
3685 check that the space id in the file is what we expect. We assume that
3686 this function runs much faster if no check is made, since accessing the
3687 file inode probably is much faster (the OS caches them) than accessing
3688 the first page of the file.  This boolean may be initially FALSE, but if
3689 a remote tablespace is found it will be changed to true.
3690 
3691 If the fix_dict boolean is set, then it is safe to use an internal SQL
3692 statement to update the dictionary tables if they are incorrect.
3693 
3694 @return	DB_SUCCESS or error code */
3695 UNIV_INTERN
3696 dberr_t
fil_open_single_table_tablespace(bool validate,bool fix_dict,ulint id,ulint flags,const char * tablename,const char * path_in)3697 fil_open_single_table_tablespace(
3698 /*=============================*/
3699 	bool		validate,	/*!< in: Do we validate tablespace? */
3700 	bool		fix_dict,	/*!< in: Can we fix the dictionary? */
3701 	ulint		id,		/*!< in: space id */
3702 	ulint		flags,		/*!< in: tablespace flags */
3703 	const char*	tablename,	/*!< in: table name in the
3704 					databasename/tablename format */
3705 	const char*	path_in)	/*!< in: tablespace filepath */
3706 {
3707 	dberr_t		err = DB_SUCCESS;
3708 	bool		dict_filepath_same_as_default = false;
3709 	bool		link_file_found = false;
3710 	bool		link_file_is_bad = false;
3711 	fsp_open_info	def;
3712 	fsp_open_info	dict;
3713 	fsp_open_info	remote;
3714 	ulint		tablespaces_found = 0;
3715 	ulint		valid_tablespaces_found = 0;
3716 
3717 #ifdef UNIV_SYNC_DEBUG
3718 	ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
3719 #endif /* UNIV_SYNC_DEBUG */
3720 	ut_ad(!fix_dict || mutex_own(&(dict_sys->mutex)));
3721 
3722 	if (!fsp_flags_is_valid(flags)) {
3723 		return(DB_CORRUPTION);
3724 	}
3725 
3726 	/* If the tablespace was relocated, we do not
3727 	compare the DATA_DIR flag */
3728 	ulint mod_flags = flags & ~FSP_FLAGS_MASK_DATA_DIR;
3729 
3730 	memset(&def, 0, sizeof(def));
3731 	memset(&dict, 0, sizeof(dict));
3732 	memset(&remote, 0, sizeof(remote));
3733 
3734 	/* Discover the correct filepath.  We will always look for an ibd
3735 	in the default location. If it is remote, it should not be here. */
3736 	def.filepath = fil_make_ibd_name(tablename, false);
3737 
3738 	/* The path_in was read from SYS_DATAFILES. */
3739 	if (path_in) {
3740 		if (strcmp(def.filepath, path_in)) {
3741 			dict.filepath = mem_strdup(path_in);
3742 			/* possibility of multiple files. */
3743 			validate = true;
3744 		} else {
3745 			dict_filepath_same_as_default = true;
3746 		}
3747 	}
3748 
3749 	link_file_found = fil_open_linked_file(
3750 		tablename, &remote.filepath, &remote.file);
3751 	remote.success = link_file_found;
3752 	if (remote.success) {
3753 		/* possibility of multiple files. */
3754 		validate = true;
3755 		tablespaces_found++;
3756 
3757 		/* A link file was found. MySQL does not allow a DATA
3758 		DIRECTORY to be be the same as the default filepath. */
3759 		ut_a(strcmp(def.filepath, remote.filepath));
3760 
3761 		/* If there was a filepath found in SYS_DATAFILES,
3762 		we hope it was the same as this remote.filepath found
3763 		in the ISL file. */
3764 		if (dict.filepath
3765 		    && (0 == strcmp(dict.filepath, remote.filepath))) {
3766 			remote.success = FALSE;
3767 			os_file_close(remote.file);
3768 			mem_free(remote.filepath);
3769 			remote.filepath = NULL;
3770 			tablespaces_found--;
3771 		}
3772 	}
3773 
3774 	/* Attempt to open the tablespace at other possible filepaths. */
3775 	if (dict.filepath) {
3776 		dict.file = os_file_create_simple_no_error_handling(
3777 			innodb_file_data_key, dict.filepath, OS_FILE_OPEN,
3778 			OS_FILE_READ_ONLY, &dict.success);
3779 		if (dict.success) {
3780 			/* possibility of multiple files. */
3781 			validate = true;
3782 			tablespaces_found++;
3783 		}
3784 	}
3785 
3786 	/* Always look for a file at the default location. */
3787 	ut_a(def.filepath);
3788 	def.file = os_file_create_simple_no_error_handling(
3789 		innodb_file_data_key, def.filepath, OS_FILE_OPEN,
3790 		OS_FILE_READ_ONLY, &def.success);
3791 	if (def.success) {
3792 		tablespaces_found++;
3793 	}
3794 
3795 	/*  We have now checked all possible tablespace locations and
3796 	have a count of how many we found.  If things are normal, we
3797 	only found 1. */
3798 	if (!validate && tablespaces_found == 1) {
3799 		goto skip_validate;
3800 	}
3801 
3802 	/* Read the first page of the datadir tablespace, if found. */
3803 	if (def.success) {
3804 		def.check_msg = fil_read_first_page(
3805 			def.file, FALSE, &def.flags, &def.id,
3806 #ifdef UNIV_LOG_ARCHIVE
3807 			&space_arch_log_no, &space_arch_log_no,
3808 #endif /* UNIV_LOG_ARCHIVE */
3809 			&def.lsn, &def.lsn);
3810 		def.valid = !def.check_msg;
3811 
3812 		/* Validate this single-table-tablespace with SYS_TABLES,
3813 		but do not compare the DATA_DIR flag, in case the
3814 		tablespace was relocated. */
3815 		if (def.valid && def.id == id
3816 		    && (def.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) {
3817 			valid_tablespaces_found++;
3818 		} else {
3819 			def.valid = false;
3820 			/* Do not use this tablespace. */
3821 			fil_report_bad_tablespace(
3822 				def.filepath, def.check_msg, def.id,
3823 				def.flags, id, flags);
3824 		}
3825 	}
3826 
3827 	/* Read the first page of the remote tablespace */
3828 	if (remote.success) {
3829 		remote.check_msg = fil_read_first_page(
3830 			remote.file, FALSE, &remote.flags, &remote.id,
3831 #ifdef UNIV_LOG_ARCHIVE
3832 			&remote.arch_log_no, &remote.arch_log_no,
3833 #endif /* UNIV_LOG_ARCHIVE */
3834 			&remote.lsn, &remote.lsn);
3835 		remote.valid = !remote.check_msg;
3836 
3837 		/* Validate this single-table-tablespace with SYS_TABLES,
3838 		but do not compare the DATA_DIR flag, in case the
3839 		tablespace was relocated. */
3840 		if (remote.valid && remote.id == id
3841 		    && (remote.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) {
3842 			valid_tablespaces_found++;
3843 		} else {
3844 			remote.valid = false;
3845 			/* Do not use this linked tablespace. */
3846 			fil_report_bad_tablespace(
3847 				remote.filepath, remote.check_msg, remote.id,
3848 				remote.flags, id, flags);
3849 			link_file_is_bad = true;
3850 		}
3851 	}
3852 
3853 	/* Read the first page of the datadir tablespace, if found. */
3854 	if (dict.success) {
3855 		dict.check_msg = fil_read_first_page(
3856 			dict.file, FALSE, &dict.flags, &dict.id,
3857 #ifdef UNIV_LOG_ARCHIVE
3858 			&dict.arch_log_no, &dict.arch_log_no,
3859 #endif /* UNIV_LOG_ARCHIVE */
3860 			&dict.lsn, &dict.lsn);
3861 		dict.valid = !dict.check_msg;
3862 
3863 		/* Validate this single-table-tablespace with SYS_TABLES,
3864 		but do not compare the DATA_DIR flag, in case the
3865 		tablespace was relocated. */
3866 		if (dict.valid && dict.id == id
3867 		    && (dict.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) {
3868 			valid_tablespaces_found++;
3869 		} else {
3870 			dict.valid = false;
3871 			/* Do not use this tablespace. */
3872 			fil_report_bad_tablespace(
3873 				dict.filepath, dict.check_msg, dict.id,
3874 				dict.flags, id, flags);
3875 		}
3876 	}
3877 
3878 	/* Make sense of these three possible locations.
3879 	First, bail out if no tablespace files were found. */
3880 	if (valid_tablespaces_found == 0) {
3881 		/* The following call prints an error message */
3882 		os_file_get_last_error(true);
3883 
3884 		ib_logf(IB_LOG_LEVEL_ERROR,
3885 			"Could not find a valid tablespace file for '%s'. "
3886 			"See " REFMAN "innodb-troubleshooting-datadict.html "
3887 			"for how to resolve the issue.",
3888 			tablename);
3889 
3890 		err = DB_CORRUPTION;
3891 
3892 		goto cleanup_and_exit;
3893 	}
3894 
3895 	/* Do not open any tablespaces if more than one tablespace with
3896 	the correct space ID and flags were found. */
3897 	if (tablespaces_found > 1) {
3898 		ib_logf(IB_LOG_LEVEL_ERROR,
3899 			"A tablespace for %s has been found in "
3900 			"multiple places;", tablename);
3901 		if (def.success) {
3902 			ib_logf(IB_LOG_LEVEL_ERROR,
3903 				"Default location; %s, LSN=" LSN_PF
3904 				", Space ID=%lu, Flags=%lu",
3905 				def.filepath, def.lsn,
3906 				(ulong) def.id, (ulong) def.flags);
3907 		}
3908 		if (remote.success) {
3909 			ib_logf(IB_LOG_LEVEL_ERROR,
3910 				"Remote location; %s, LSN=" LSN_PF
3911 				", Space ID=%lu, Flags=%lu",
3912 				remote.filepath, remote.lsn,
3913 				(ulong) remote.id, (ulong) remote.flags);
3914 		}
3915 		if (dict.success) {
3916 			ib_logf(IB_LOG_LEVEL_ERROR,
3917 				"Dictionary location; %s, LSN=" LSN_PF
3918 				", Space ID=%lu, Flags=%lu",
3919 				dict.filepath, dict.lsn,
3920 				(ulong) dict.id, (ulong) dict.flags);
3921 		}
3922 
3923 		/* Force-recovery will allow some tablespaces to be
3924 		skipped by REDO if there was more than one file found.
3925 		Unlike during the REDO phase of recovery, we now know
3926 		if the tablespace is valid according to the dictionary,
3927 		which was not available then. So if we did not force
3928 		recovery and there is only one good tablespace, ignore
3929 		any bad tablespaces. */
3930 		if (valid_tablespaces_found > 1 || srv_force_recovery > 0) {
3931 			ib_logf(IB_LOG_LEVEL_ERROR,
3932 				"Will not open the tablespace for '%s'",
3933 				tablename);
3934 
3935 			if (def.success != def.valid
3936 			    || dict.success != dict.valid
3937 			    || remote.success != remote.valid) {
3938 				err = DB_CORRUPTION;
3939 			} else {
3940 				err = DB_ERROR;
3941 			}
3942 			goto cleanup_and_exit;
3943 		}
3944 
3945 		/* There is only one valid tablespace found and we did
3946 		not use srv_force_recovery during REDO.  Use this one
3947 		tablespace and clean up invalid tablespace pointers */
3948 		if (def.success && !def.valid) {
3949 			def.success = false;
3950 			os_file_close(def.file);
3951 			tablespaces_found--;
3952 		}
3953 		if (dict.success && !dict.valid) {
3954 			dict.success = false;
3955 			os_file_close(dict.file);
3956 			/* Leave dict.filepath so that SYS_DATAFILES
3957 			can be corrected below. */
3958 			tablespaces_found--;
3959 		}
3960 		if (remote.success && !remote.valid) {
3961 			remote.success = false;
3962 			os_file_close(remote.file);
3963 			mem_free(remote.filepath);
3964 			remote.filepath = NULL;
3965 			tablespaces_found--;
3966 		}
3967 	}
3968 
3969 	/* At this point, there should be only one filepath. */
3970 	ut_a(tablespaces_found == 1);
3971 	ut_a(valid_tablespaces_found == 1);
3972 
3973 	/* Only fix the dictionary at startup when there is only one thread.
3974 	Calls to dict_load_table() can be done while holding other latches. */
3975 	if (!fix_dict) {
3976 		goto skip_validate;
3977 	}
3978 
3979 	/* We may need to change what is stored in SYS_DATAFILES or
3980 	SYS_TABLESPACES or adjust the link file.
3981 	Since a failure to update SYS_TABLESPACES or SYS_DATAFILES does
3982 	not prevent opening and using the single_table_tablespace either
3983 	this time or the next, we do not check the return code or fail
3984 	to open the tablespace. But dict_update_filepath() will issue a
3985 	warning to the log. */
3986 	if (dict.filepath) {
3987 		if (remote.success) {
3988 			dict_update_filepath(id, remote.filepath);
3989 		} else if (def.success) {
3990 			dict_update_filepath(id, def.filepath);
3991 			if (link_file_is_bad) {
3992 				fil_delete_link_file(tablename);
3993 			}
3994 		} else if (!link_file_found || link_file_is_bad) {
3995 			ut_ad(dict.success);
3996 			/* Fix the link file if we got our filepath
3997 			from the dictionary but a link file did not
3998 			exist or it did not point to a valid file. */
3999 			fil_delete_link_file(tablename);
4000 			fil_create_link_file(tablename, dict.filepath);
4001 		}
4002 
4003 	} else if (remote.success && dict_filepath_same_as_default) {
4004 		dict_update_filepath(id, remote.filepath);
4005 
4006 	} else if (remote.success && path_in == NULL) {
4007 		/* SYS_DATAFILES record for this space ID was not found. */
4008 		dict_insert_tablespace_and_filepath(
4009 			id, tablename, remote.filepath, flags);
4010 	}
4011 
4012 skip_validate:
4013 	if (err != DB_SUCCESS) {
4014 		; // Don't load the tablespace into the cache
4015 	} else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE)) {
4016 		err = DB_ERROR;
4017 	} else {
4018 		/* We do not measure the size of the file, that is why
4019 		we pass the 0 below */
4020 
4021 		if (!fil_node_create(remote.success ? remote.filepath :
4022 				     dict.success ? dict.filepath :
4023 				     def.filepath, 0, id, FALSE)) {
4024 			err = DB_ERROR;
4025 		}
4026 	}
4027 
4028 cleanup_and_exit:
4029 	if (remote.success) {
4030 		os_file_close(remote.file);
4031 	}
4032 	if (remote.filepath) {
4033 		mem_free(remote.filepath);
4034 	}
4035 	if (dict.success) {
4036 		os_file_close(dict.file);
4037 	}
4038 	if (dict.filepath) {
4039 		mem_free(dict.filepath);
4040 	}
4041 	if (def.success) {
4042 		os_file_close(def.file);
4043 	}
4044 	mem_free(def.filepath);
4045 
4046 	return(err);
4047 }
4048 #endif /* !UNIV_HOTBACKUP */
4049 
4050 #ifdef UNIV_HOTBACKUP
4051 /*******************************************************************//**
4052 Allocates a file name for an old version of a single-table tablespace.
4053 The string must be freed by caller with mem_free()!
4054 @return	own: file name */
4055 static
4056 char*
fil_make_ibbackup_old_name(const char * name)4057 fil_make_ibbackup_old_name(
4058 /*=======================*/
4059 	const char*	name)		/*!< in: original file name */
4060 {
4061 	static const char suffix[] = "_ibbackup_old_vers_";
4062 	char*	path;
4063 	ulint	len	= strlen(name);
4064 
4065 	path = static_cast<char*>(mem_alloc(len + (15 + sizeof suffix)));
4066 
4067 	memcpy(path, name, len);
4068 	memcpy(path + len, suffix, (sizeof suffix) - 1);
4069 	ut_sprintf_timestamp_without_extra_chars(
4070 		path + len + ((sizeof suffix) - 1));
4071 	return(path);
4072 }
4073 #endif /* UNIV_HOTBACKUP */
4074 
4075 
4076 /*******************************************************************//**
4077 Determine the space id of the given file descriptor by reading a few
4078 pages from the beginning of the .ibd file.
4079 @return true if space id was successfully identified, or false. */
4080 static
4081 bool
fil_user_tablespace_find_space_id(fsp_open_info * fsp)4082 fil_user_tablespace_find_space_id(
4083 /*==============================*/
4084 	fsp_open_info*	fsp)	/* in/out: contains file descriptor, which is
4085 				used as input.  contains space_id, which is
4086 				the output */
4087 {
4088 	bool		st;
4089 	os_offset_t	file_size;
4090 
4091 	file_size = os_file_get_size(fsp->file);
4092 
4093 	if (file_size == (os_offset_t) -1) {
4094 		ib_logf(IB_LOG_LEVEL_ERROR, "Could not get file size: %s",
4095 			fsp->filepath);
4096 		return(false);
4097 	}
4098 
4099 	/* Assuming a page size, read the space_id from each page and store it
4100 	in a map.  Find out which space_id is agreed on by majority of the
4101 	pages.  Choose that space_id. */
4102 	for (ulint page_size = UNIV_ZIP_SIZE_MIN;
4103 	     page_size <= UNIV_PAGE_SIZE_MAX; page_size <<= 1) {
4104 
4105 		/* map[space_id] = count of pages */
4106 		std::map<ulint, ulint> verify;
4107 
4108 		ulint page_count = 64;
4109 		ulint valid_pages = 0;
4110 
4111 		/* Adjust the number of pages to analyze based on file size */
4112 		while ((page_count * page_size) > file_size) {
4113 			--page_count;
4114 		}
4115 
4116 		ib_logf(IB_LOG_LEVEL_INFO, "Page size:%lu Pages to analyze:"
4117 			"%lu", page_size, page_count);
4118 
4119 		byte* buf = static_cast<byte*>(ut_malloc(2*page_size));
4120 		byte* page = static_cast<byte*>(ut_align(buf, page_size));
4121 
4122 		for (ulint j = 0; j < page_count; ++j) {
4123 
4124 			st = os_file_read(fsp->file, page, (j* page_size), page_size);
4125 
4126 			if (!st) {
4127 				ib_logf(IB_LOG_LEVEL_INFO,
4128 					"READ FAIL: page_no:%lu", j);
4129 				continue;
4130 			}
4131 
4132 			bool uncompressed_ok = false;
4133 
4134 			/* For uncompressed pages, the page size must be equal
4135 			to UNIV_PAGE_SIZE. */
4136 			if (page_size == UNIV_PAGE_SIZE) {
4137 				uncompressed_ok = !buf_page_is_corrupted(
4138 					false, page, 0);
4139 			}
4140 
4141 			bool compressed_ok = !buf_page_is_corrupted(
4142 				false, page, page_size);
4143 
4144 			if (uncompressed_ok || compressed_ok) {
4145 
4146 				ulint space_id = mach_read_from_4(page
4147 					+ FIL_PAGE_SPACE_ID);
4148 
4149 				if (space_id > 0) {
4150 					ib_logf(IB_LOG_LEVEL_INFO,
4151 						"VALID: space:%lu "
4152 						"page_no:%lu page_size:%lu",
4153 						space_id, j, page_size);
4154 					verify[space_id]++;
4155 					++valid_pages;
4156 				}
4157 			}
4158 		}
4159 
4160 		ut_free(buf);
4161 
4162 		ib_logf(IB_LOG_LEVEL_INFO, "Page size: %lu, Possible space_id "
4163 			"count:%lu", page_size, (ulint) verify.size());
4164 
4165 		const ulint pages_corrupted = 3;
4166 		for (ulint missed = 0; missed <= pages_corrupted; ++missed) {
4167 
4168 			for (std::map<ulint, ulint>::iterator
4169 			     m = verify.begin(); m != verify.end(); ++m ) {
4170 
4171 				ib_logf(IB_LOG_LEVEL_INFO, "space_id:%lu, "
4172 					"Number of pages matched: %lu/%lu "
4173 					"(%lu)", m->first, m->second,
4174 					valid_pages, page_size);
4175 
4176 				if (m->second == (valid_pages - missed)) {
4177 
4178 					ib_logf(IB_LOG_LEVEL_INFO,
4179 						"Chosen space:%lu\n", m->first);
4180 
4181 					fsp->id = m->first;
4182 					return(true);
4183 				}
4184 			}
4185 
4186 		}
4187 	}
4188 
4189 	return(false);
4190 }
4191 
4192 /*******************************************************************//**
4193 Finds the given page_no of the given space id from the double write buffer,
4194 and copies it to the corresponding .ibd file.
4195 @return true if copy was successful, or false. */
4196 bool
fil_user_tablespace_restore_page(fsp_open_info * fsp,ulint page_no)4197 fil_user_tablespace_restore_page(
4198 /*==============================*/
4199 	fsp_open_info*	fsp,		/* in: contains space id and .ibd
4200 					file information */
4201 	ulint		page_no)	/* in: page_no to obtain from double
4202 					write buffer */
4203 {
4204 	bool	err;
4205 	ulint	flags;
4206 	ulint	zip_size;
4207 	ulint	page_size;
4208 	ulint	buflen;
4209 	byte*	page;
4210 
4211 	ib_logf(IB_LOG_LEVEL_INFO, "Restoring page %lu of tablespace %lu",
4212 		page_no, fsp->id);
4213 
4214 	// find if double write buffer has page_no of given space id
4215 	page = recv_sys->dblwr.find_page(fsp->id, page_no);
4216 
4217 	if (!page) {
4218                 ib_logf(IB_LOG_LEVEL_WARN, "Doublewrite does not have "
4219 			"page_no=%lu of space: %lu", page_no, fsp->id);
4220 		err = false;
4221 		goto out;
4222 	}
4223 
4224         flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
4225 	zip_size = fsp_flags_get_zip_size(flags);
4226 	page_size = fsp_flags_get_page_size(flags);
4227 
4228 	ut_ad(page_no == page_get_page_no(page));
4229 
4230 	buflen = zip_size ? zip_size: page_size;
4231 
4232 	ib_logf(IB_LOG_LEVEL_INFO, "Writing %lu bytes into file: %s",
4233 		buflen, fsp->filepath);
4234 
4235 	err = os_file_write(fsp->filepath, fsp->file, page,
4236 			    (zip_size ? zip_size : page_size) * page_no,
4237 			    buflen);
4238 
4239 	os_file_flush(fsp->file);
4240 out:
4241 	return(err);
4242 }
4243 
4244 /********************************************************************//**
4245 Opens an .ibd file and adds the associated single-table tablespace to the
4246 InnoDB fil0fil.cc data structures.
4247 Set fsp->success to TRUE if tablespace is valid, FALSE if not. */
4248 static
4249 void
fil_validate_single_table_tablespace(const char * tablename,fsp_open_info * fsp)4250 fil_validate_single_table_tablespace(
4251 /*=================================*/
4252 	const char*	tablename,	/*!< in: database/tablename */
4253 	fsp_open_info*	fsp)		/*!< in/out: tablespace info */
4254 {
4255 	bool restore_attempted = false;
4256 
4257 check_first_page:
4258 	fsp->success = TRUE;
4259 	if (const char* check_msg = fil_read_first_page(
4260 		    fsp->file, FALSE, &fsp->flags, &fsp->id,
4261 #ifdef UNIV_LOG_ARCHIVE
4262 		    &fsp->arch_log_no, &fsp->arch_log_no,
4263 #endif /* UNIV_LOG_ARCHIVE */
4264 		    &fsp->lsn, &fsp->lsn)) {
4265 		ib_logf(IB_LOG_LEVEL_ERROR,
4266 			"%s in tablespace %s (table %s)",
4267 			check_msg, fsp->filepath, tablename);
4268 		fsp->success = FALSE;
4269 	}
4270 
4271 	if (!fsp->success) {
4272 		if (!restore_attempted) {
4273 			if (!fil_user_tablespace_find_space_id(fsp)) {
4274 				return;
4275 			}
4276 			restore_attempted = true;
4277 
4278 			if (fsp->id > 0
4279 			    && !fil_user_tablespace_restore_page(fsp, 0)) {
4280 				return;
4281 			}
4282 			goto check_first_page;
4283 		}
4284 		return;
4285 	}
4286 
4287 	if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
4288 		ib_logf(IB_LOG_LEVEL_ERROR,
4289 			"Tablespace is not sensible;"
4290 			" Table: %s  Space ID: %lu  Filepath: %s\n",
4291 		tablename, (ulong) fsp->id, fsp->filepath);
4292 		fsp->success = FALSE;
4293 		return;
4294 	}
4295 
4296 	mutex_enter(&fil_system->mutex);
4297 	fil_space_t* space = fil_space_get_by_id(fsp->id);
4298 	mutex_exit(&fil_system->mutex);
4299 	if (space != NULL) {
4300 		char* prev_filepath = fil_space_get_first_path(fsp->id);
4301 
4302 		ib_logf(IB_LOG_LEVEL_ERROR,
4303 			"Attempted to open a previously opened tablespace. "
4304 			"Previous tablespace %s uses space ID: %lu at "
4305 			"filepath: %s. Cannot open tablespace %s which uses "
4306 			"space ID: %lu at filepath: %s",
4307 			space->name, (ulong) space->id, prev_filepath,
4308 			tablename, (ulong) fsp->id, fsp->filepath);
4309 
4310 		mem_free(prev_filepath);
4311 		fsp->success = FALSE;
4312 		return;
4313 	}
4314 
4315 	fsp->success = TRUE;
4316 }
4317 
4318 
4319 /********************************************************************//**
4320 Opens an .ibd file and adds the associated single-table tablespace to the
4321 InnoDB fil0fil.cc data structures. */
4322 static
4323 void
fil_load_single_table_tablespace(const char * dbname,const char * filename)4324 fil_load_single_table_tablespace(
4325 /*=============================*/
4326 	const char*	dbname,		/*!< in: database name */
4327 	const char*	filename)	/*!< in: file name (not a path),
4328 					including the .ibd or .isl extension */
4329 {
4330 	char*		tablename;
4331 	ulint		tablename_len;
4332 	ulint		dbname_len = strlen(dbname);
4333 	ulint		filename_len = strlen(filename);
4334 	fsp_open_info	def;
4335 	fsp_open_info	remote;
4336 	os_offset_t	size;
4337 #ifdef UNIV_HOTBACKUP
4338 	fil_space_t*	space;
4339 #endif
4340 
4341 	memset(&def, 0, sizeof(def));
4342 	memset(&remote, 0, sizeof(remote));
4343 
4344 	/* The caller assured that the extension is ".ibd" or ".isl". */
4345 	ut_ad(0 == memcmp(filename + filename_len - 4, ".ibd", 4)
4346 	      || 0 == memcmp(filename + filename_len - 4, ".isl", 4));
4347 
4348 	/* Build up the tablename in the standard form database/table. */
4349 	tablename = static_cast<char*>(
4350 		mem_alloc(dbname_len + filename_len + 2));
4351 
4352 	/* When lower_case_table_names = 2 it is possible that the
4353 	dbname is in upper case ,but while storing it in fil_space_t
4354 	we must convert it into lower case */
4355 	sprintf(tablename, "%s" , dbname);
4356 	tablename[dbname_len] = '\0';
4357 
4358         if (lower_case_file_system) {
4359                 dict_casedn_str(tablename);
4360         }
4361 
4362 	sprintf(tablename+dbname_len,"/%s",filename);
4363 	tablename_len = strlen(tablename) - strlen(".ibd");
4364 	tablename[tablename_len] = '\0';
4365 
4366 	/* There may be both .ibd and .isl file in the directory.
4367 	And it is possible that the .isl file refers to a different
4368 	.ibd file.  If so, we open and compare them the first time
4369 	one of them is sent to this function.  So if this table has
4370 	already been loaded, there is nothing to do.*/
4371 	mutex_enter(&fil_system->mutex);
4372 	if (fil_space_get_by_name(tablename)) {
4373 		mem_free(tablename);
4374 		mutex_exit(&fil_system->mutex);
4375 		return;
4376 	}
4377 	mutex_exit(&fil_system->mutex);
4378 
4379 	/* Build up the filepath of the .ibd tablespace in the datadir.
4380 	This must be freed independent of def.success. */
4381 	def.filepath = fil_make_ibd_name(tablename, false);
4382 
4383 #ifdef __WIN__
4384 # ifndef UNIV_HOTBACKUP
4385 	/* If lower_case_table_names is 0 or 2, then MySQL allows database
4386 	directory names with upper case letters. On Windows, all table and
4387 	database names in InnoDB are internally always in lower case. Put the
4388 	file path to lower case, so that we are consistent with InnoDB's
4389 	internal data dictionary. */
4390 
4391 	dict_casedn_str(def.filepath);
4392 # endif /* !UNIV_HOTBACKUP */
4393 #endif
4394 
4395 	/* Check for a link file which locates a remote tablespace. */
4396 	remote.success = fil_open_linked_file(
4397 		tablename, &remote.filepath, &remote.file);
4398 
4399 	/* Read the first page of the remote tablespace */
4400 	if (remote.success) {
4401 		fil_validate_single_table_tablespace(tablename, &remote);
4402 		if (!remote.success) {
4403 			os_file_close(remote.file);
4404 			mem_free(remote.filepath);
4405 		}
4406 	}
4407 
4408 
4409 	/* Try to open the tablespace in the datadir. */
4410 	def.file = os_file_create_simple_no_error_handling(
4411 		innodb_file_data_key, def.filepath, OS_FILE_OPEN,
4412 		OS_FILE_READ_WRITE, &def.success);
4413 
4414 	/* Read the first page of the remote tablespace */
4415 	if (def.success) {
4416 		fil_validate_single_table_tablespace(tablename, &def);
4417 		if (!def.success) {
4418 			os_file_close(def.file);
4419 		}
4420 	}
4421 
4422 	if (!def.success && !remote.success) {
4423 		/* The following call prints an error message */
4424 		os_file_get_last_error(true);
4425 		fprintf(stderr,
4426 			"InnoDB: Error: could not open single-table"
4427 			" tablespace file %s\n", def.filepath);
4428 
4429 		if (!strncmp(filename,
4430 			     tmp_file_prefix, tmp_file_prefix_length)) {
4431 			/* Ignore errors for #sql tablespaces. */
4432 			mem_free(tablename);
4433 			if (remote.filepath) {
4434 				mem_free(remote.filepath);
4435 			}
4436 			if (def.filepath) {
4437 				mem_free(def.filepath);
4438 			}
4439 			return;
4440 		}
4441 no_good_file:
4442 		fprintf(stderr,
4443 			"InnoDB: We do not continue the crash recovery,"
4444 			" because the table may become\n"
4445 			"InnoDB: corrupt if we cannot apply the log"
4446 			" records in the InnoDB log to it.\n"
4447 			"InnoDB: To fix the problem and start mysqld:\n"
4448 			"InnoDB: 1) If there is a permission problem"
4449 			" in the file and mysqld cannot\n"
4450 			"InnoDB: open the file, you should"
4451 			" modify the permissions.\n"
4452 			"InnoDB: 2) If the table is not needed, or you"
4453 			" can restore it from a backup,\n"
4454 			"InnoDB: then you can remove the .ibd file,"
4455 			" and InnoDB will do a normal\n"
4456 			"InnoDB: crash recovery and ignore that table.\n"
4457 			"InnoDB: 3) If the file system or the"
4458 			" disk is broken, and you cannot remove\n"
4459 			"InnoDB: the .ibd file, you can set"
4460 			" innodb_force_recovery > 0 in my.cnf\n"
4461 			"InnoDB: and force InnoDB to continue crash"
4462 			" recovery here.\n");
4463 will_not_choose:
4464 		mem_free(tablename);
4465 		if (remote.filepath) {
4466 			mem_free(remote.filepath);
4467 		}
4468 		if (def.filepath) {
4469 			mem_free(def.filepath);
4470 		}
4471 
4472 		if (srv_force_recovery > 0) {
4473 			ib_logf(IB_LOG_LEVEL_INFO,
4474 				"innodb_force_recovery was set to %lu. "
4475 				"Continuing crash recovery even though we "
4476 				"cannot access the .ibd file of this table.",
4477 				srv_force_recovery);
4478 			return;
4479 		}
4480 
4481 		exit(1);
4482 	}
4483 
4484 	if (def.success && remote.success) {
4485 		ib_logf(IB_LOG_LEVEL_ERROR,
4486 			"Tablespaces for %s have been found in two places;\n"
4487 			"Location 1: SpaceID: %lu  LSN: %lu  File: %s\n"
4488 			"Location 2: SpaceID: %lu  LSN: %lu  File: %s\n"
4489 			"You must delete one of them.",
4490 			tablename, (ulong) def.id, (ulong) def.lsn,
4491 			def.filepath, (ulong) remote.id, (ulong) remote.lsn,
4492 			remote.filepath);
4493 
4494 		def.success = FALSE;
4495 		os_file_close(def.file);
4496 		os_file_close(remote.file);
4497 		goto will_not_choose;
4498 	}
4499 
4500 	/* At this point, only one tablespace is open */
4501 	ut_a(def.success == !remote.success);
4502 
4503 	fsp_open_info*	fsp = def.success ? &def : &remote;
4504 
4505 	/* Get and test the file size. */
4506 	size = os_file_get_size(fsp->file);
4507 
4508 	if (size == (os_offset_t) -1) {
4509 		/* The following call prints an error message */
4510 		os_file_get_last_error(true);
4511 
4512 		ib_logf(IB_LOG_LEVEL_ERROR,
4513 			"could not measure the size of single-table "
4514 			"tablespace file %s", fsp->filepath);
4515 
4516 		os_file_close(fsp->file);
4517 		goto no_good_file;
4518 	}
4519 
4520 	/* Every .ibd file is created >= 4 pages in size. Smaller files
4521 	cannot be ok. */
4522 	ulong minimum_size = FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE;
4523 	if (size < minimum_size) {
4524 #ifndef UNIV_HOTBACKUP
4525 		ib_logf(IB_LOG_LEVEL_ERROR,
4526 			"The size of single-table tablespace file %s "
4527 			"is only " UINT64PF ", should be at least %lu!",
4528 			fsp->filepath, size, minimum_size);
4529 		os_file_close(fsp->file);
4530 		goto no_good_file;
4531 #else
4532 		fsp->id = ULINT_UNDEFINED;
4533 		fsp->flags = 0;
4534 #endif /* !UNIV_HOTBACKUP */
4535 	}
4536 
4537 #ifdef UNIV_HOTBACKUP
4538 	if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
4539 		char*	new_path;
4540 
4541 		fprintf(stderr,
4542 			"InnoDB: Renaming tablespace %s of id %lu,\n"
4543 			"InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
4544 			"InnoDB: because its size %" PRId64 " is too small"
4545 			" (< 4 pages 16 kB each),\n"
4546 			"InnoDB: or the space id in the file header"
4547 			" is not sensible.\n"
4548 			"InnoDB: This can happen in an mysqlbackup run,"
4549 			" and is not dangerous.\n",
4550 			fsp->filepath, fsp->id, fsp->filepath, size);
4551 		os_file_close(fsp->file);
4552 
4553 		new_path = fil_make_ibbackup_old_name(fsp->filepath);
4554 
4555 		bool	success = os_file_rename(
4556 			innodb_file_data_key, fsp->filepath, new_path);
4557 
4558 		ut_a(success);
4559 
4560 		mem_free(new_path);
4561 
4562 		goto func_exit_after_close;
4563 	}
4564 
4565 	/* A backup may contain the same space several times, if the space got
4566 	renamed at a sensitive time. Since it is enough to have one version of
4567 	the space, we rename the file if a space with the same space id
4568 	already exists in the tablespace memory cache. We rather rename the
4569 	file than delete it, because if there is a bug, we do not want to
4570 	destroy valuable data. */
4571 
4572 	mutex_enter(&fil_system->mutex);
4573 
4574 	space = fil_space_get_by_id(fsp->id);
4575 
4576 	if (space) {
4577 		char*	new_path;
4578 
4579 		fprintf(stderr,
4580 			"InnoDB: Renaming tablespace %s of id %lu,\n"
4581 			"InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
4582 			"InnoDB: because space %s with the same id\n"
4583 			"InnoDB: was scanned earlier. This can happen"
4584 			" if you have renamed tables\n"
4585 			"InnoDB: during an mysqlbackup run.\n",
4586 			fsp->filepath, fsp->id, fsp->filepath,
4587 			space->name);
4588 		os_file_close(fsp->file);
4589 
4590 		new_path = fil_make_ibbackup_old_name(fsp->filepath);
4591 
4592 		mutex_exit(&fil_system->mutex);
4593 
4594 		bool	success = os_file_rename(
4595 			innodb_file_data_key, fsp->filepath, new_path);
4596 
4597 		ut_a(success);
4598 
4599 		mem_free(new_path);
4600 
4601 		goto func_exit_after_close;
4602 	}
4603 	mutex_exit(&fil_system->mutex);
4604 #endif /* UNIV_HOTBACKUP */
4605 	ibool file_space_create_success = fil_space_create(
4606 		tablename, fsp->id, fsp->flags, FIL_TABLESPACE);
4607 
4608 	if (!file_space_create_success) {
4609 		if (srv_force_recovery > 0) {
4610 			fprintf(stderr,
4611 				"InnoDB: innodb_force_recovery was set"
4612 				" to %lu. Continuing crash recovery\n"
4613 				"InnoDB: even though the tablespace"
4614 				" creation of this table failed.\n",
4615 				srv_force_recovery);
4616 			goto func_exit;
4617 		}
4618 
4619 		/* Exit here with a core dump, stack, etc. */
4620 		ut_a(file_space_create_success);
4621 	}
4622 
4623 	/* We do not use the size information we have about the file, because
4624 	the rounding formula for extents and pages is somewhat complex; we
4625 	let fil_node_open() do that task. */
4626 
4627 	if (!fil_node_create(fsp->filepath, 0, fsp->id, FALSE)) {
4628 		ut_error;
4629 	}
4630 
4631 func_exit:
4632 	os_file_close(fsp->file);
4633 
4634 #ifdef UNIV_HOTBACKUP
4635 func_exit_after_close:
4636 #else
4637 	ut_ad(!mutex_own(&fil_system->mutex));
4638 #endif
4639 	mem_free(tablename);
4640 	if (remote.success) {
4641 		mem_free(remote.filepath);
4642 	}
4643 	mem_free(def.filepath);
4644 }
4645 
4646 /***********************************************************************//**
4647 A fault-tolerant function that tries to read the next file name in the
4648 directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
4649 idea is to read as much good data as we can and jump over bad data.
4650 @return 0 if ok, -1 if error even after the retries, 1 if at the end
4651 of the directory */
4652 static
4653 int
fil_file_readdir_next_file(dberr_t * err,const char * dirname,os_file_dir_t dir,os_file_stat_t * info)4654 fil_file_readdir_next_file(
4655 /*=======================*/
4656 	dberr_t*	err,	/*!< out: this is set to DB_ERROR if an error
4657 				was encountered, otherwise not changed */
4658 	const char*	dirname,/*!< in: directory name or path */
4659 	os_file_dir_t	dir,	/*!< in: directory stream */
4660 	os_file_stat_t*	info)	/*!< in/out: buffer where the
4661 				info is returned */
4662 {
4663 	for (ulint i = 0; i < 100; i++) {
4664 		int	ret = os_file_readdir_next_file(dirname, dir, info);
4665 
4666 		if (ret != -1) {
4667 
4668 			return(ret);
4669 		}
4670 
4671 		ib_logf(IB_LOG_LEVEL_ERROR,
4672 			"os_file_readdir_next_file() returned -1 in "
4673 			"directory %s, crash recovery may have failed "
4674 			"for some .ibd files!", dirname);
4675 
4676 		*err = DB_ERROR;
4677 	}
4678 
4679 	return(-1);
4680 }
4681 
4682 /********************************************************************//**
4683 At the server startup, if we need crash recovery, scans the database
4684 directories under the MySQL datadir, looking for .ibd files. Those files are
4685 single-table tablespaces. We need to know the space id in each of them so that
4686 we know into which file we should look to check the contents of a page stored
4687 in the doublewrite buffer, also to know where to apply log records where the
4688 space id is != 0.
4689 @return	DB_SUCCESS or error number */
4690 UNIV_INTERN
4691 dberr_t
fil_load_single_table_tablespaces(void)4692 fil_load_single_table_tablespaces(void)
4693 /*===================================*/
4694 {
4695 	int		ret;
4696 	char*		dbpath		= NULL;
4697 	ulint		dbpath_len	= 100;
4698 	os_file_dir_t	dir;
4699 	os_file_dir_t	dbdir;
4700 	os_file_stat_t	dbinfo;
4701 	os_file_stat_t	fileinfo;
4702 	dberr_t		err		= DB_SUCCESS;
4703 
4704 	/* The datadir of MySQL is always the default directory of mysqld */
4705 
4706 	dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE);
4707 
4708 	if (dir == NULL) {
4709 
4710 		return(DB_ERROR);
4711 	}
4712 
4713 	dbpath = static_cast<char*>(mem_alloc(dbpath_len));
4714 
4715 	/* Scan all directories under the datadir. They are the database
4716 	directories of MySQL. */
4717 
4718 	ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir,
4719 					 &dbinfo);
4720 	while (ret == 0) {
4721 		ulint len;
4722 		/* printf("Looking at %s in datadir\n", dbinfo.name); */
4723 
4724 		if (dbinfo.type == OS_FILE_TYPE_FILE
4725 		    || dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
4726 
4727 			goto next_datadir_item;
4728 		}
4729 
4730 		/* We found a symlink or a directory; try opening it to see
4731 		if a symlink is a directory */
4732 
4733 		len = strlen(fil_path_to_mysql_datadir)
4734 			+ strlen (dbinfo.name) + 2;
4735 		if (len > dbpath_len) {
4736 			dbpath_len = len;
4737 
4738 			if (dbpath) {
4739 				mem_free(dbpath);
4740 			}
4741 
4742 			dbpath = static_cast<char*>(mem_alloc(dbpath_len));
4743 		}
4744 		ut_snprintf(dbpath, dbpath_len,
4745 			    "%s/%s", fil_path_to_mysql_datadir, dbinfo.name);
4746 		srv_normalize_path_for_win(dbpath);
4747 
4748 		dbdir = os_file_opendir(dbpath, FALSE);
4749 
4750 		if (dbdir != NULL) {
4751 
4752 			/* We found a database directory; loop through it,
4753 			looking for possible .ibd files in it */
4754 
4755 			ret = fil_file_readdir_next_file(&err, dbpath, dbdir,
4756 							 &fileinfo);
4757 			while (ret == 0) {
4758 
4759 				if (fileinfo.type == OS_FILE_TYPE_DIR) {
4760 
4761 					goto next_file_item;
4762 				}
4763 
4764 				/* We found a symlink or a file */
4765 				if (strlen(fileinfo.name) > 4
4766 				    && (0 == strcmp(fileinfo.name
4767 						   + strlen(fileinfo.name) - 4,
4768 						   ".ibd")
4769 					|| 0 == strcmp(fileinfo.name
4770 						   + strlen(fileinfo.name) - 4,
4771 						   ".isl"))) {
4772 					/* The name ends in .ibd or .isl;
4773 					try opening the file */
4774 					fil_load_single_table_tablespace(
4775 						dbinfo.name, fileinfo.name);
4776 				}
4777 next_file_item:
4778 				ret = fil_file_readdir_next_file(&err,
4779 								 dbpath, dbdir,
4780 								 &fileinfo);
4781 			}
4782 
4783 			if (0 != os_file_closedir(dbdir)) {
4784 				fputs("InnoDB: Warning: could not"
4785 				      " close database directory ", stderr);
4786 				ut_print_filename(stderr, dbpath);
4787 				putc('\n', stderr);
4788 
4789 				err = DB_ERROR;
4790 			}
4791 		}
4792 
4793 next_datadir_item:
4794 		ret = fil_file_readdir_next_file(&err,
4795 						 fil_path_to_mysql_datadir,
4796 						 dir, &dbinfo);
4797 	}
4798 
4799 	mem_free(dbpath);
4800 
4801 	if (0 != os_file_closedir(dir)) {
4802 		fprintf(stderr,
4803 			"InnoDB: Error: could not close MySQL datadir\n");
4804 
4805 		return(DB_ERROR);
4806 	}
4807 
4808 	return(err);
4809 }
4810 
4811 /*******************************************************************//**
4812 Returns TRUE if a single-table tablespace does not exist in the memory cache,
4813 or is being deleted there.
4814 @return	TRUE if does not exist or is being deleted */
4815 UNIV_INTERN
4816 ibool
fil_tablespace_deleted_or_being_deleted_in_mem(ulint id,ib_int64_t version)4817 fil_tablespace_deleted_or_being_deleted_in_mem(
4818 /*===========================================*/
4819 	ulint		id,	/*!< in: space id */
4820 	ib_int64_t	version)/*!< in: tablespace_version should be this; if
4821 				you pass -1 as the value of this, then this
4822 				parameter is ignored */
4823 {
4824 	fil_space_t*	space;
4825 
4826 	ut_ad(fil_system);
4827 
4828 	mutex_enter(&fil_system->mutex);
4829 
4830 	space = fil_space_get_by_id(id);
4831 
4832 	if (space == NULL || space->stop_new_ops) {
4833 		mutex_exit(&fil_system->mutex);
4834 
4835 		return(TRUE);
4836 	}
4837 
4838 	if (version != ((ib_int64_t)-1)
4839 	    && space->tablespace_version != version) {
4840 		mutex_exit(&fil_system->mutex);
4841 
4842 		return(TRUE);
4843 	}
4844 
4845 	mutex_exit(&fil_system->mutex);
4846 
4847 	return(FALSE);
4848 }
4849 
4850 /*******************************************************************//**
4851 Returns TRUE if a single-table tablespace exists in the memory cache.
4852 @return	TRUE if exists */
4853 UNIV_INTERN
4854 ibool
fil_tablespace_exists_in_mem(ulint id)4855 fil_tablespace_exists_in_mem(
4856 /*=========================*/
4857 	ulint	id)	/*!< in: space id */
4858 {
4859 	fil_space_t*	space;
4860 
4861 	ut_ad(fil_system);
4862 
4863 	mutex_enter(&fil_system->mutex);
4864 
4865 	space = fil_space_get_by_id(id);
4866 
4867 	mutex_exit(&fil_system->mutex);
4868 
4869 	return(space != NULL);
4870 }
4871 
4872 /*******************************************************************//**
4873 Report that a tablespace for a table was not found. */
4874 static
4875 void
fil_report_missing_tablespace(const char * name,ulint space_id)4876 fil_report_missing_tablespace(
4877 /*===========================*/
4878 	const char*	name,			/*!< in: table name */
4879 	ulint		space_id)		/*!< in: table's space id */
4880 {
4881 	char index_name[MAX_FULL_NAME_LEN + 1];
4882 
4883 	innobase_format_name(index_name, sizeof(index_name), name, TRUE);
4884 
4885 	ib_logf(IB_LOG_LEVEL_ERROR,
4886 		"Table %s in the InnoDB data dictionary has tablespace id %lu, "
4887 		"but tablespace with that id or name does not exist. Have "
4888 		"you deleted or moved .ibd files? This may also be a table "
4889 		"created with CREATE TEMPORARY TABLE whose .ibd and .frm "
4890 		"files MySQL automatically removed, but the table still "
4891 		"exists in the InnoDB internal data dictionary.",
4892 		name, space_id);
4893 }
4894 
4895 /*******************************************************************//**
4896 Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
4897 cache. Note that if we have not done a crash recovery at the database startup,
4898 there may be many tablespaces which are not yet in the memory cache.
4899 @return	TRUE if a matching tablespace exists in the memory cache */
4900 UNIV_INTERN
4901 ibool
fil_space_for_table_exists_in_mem(ulint id,const char * name,ibool mark_space,ibool print_error_if_does_not_exist,bool adjust_space,mem_heap_t * heap,table_id_t table_id)4902 fil_space_for_table_exists_in_mem(
4903 /*==============================*/
4904 	ulint		id,		/*!< in: space id */
4905 	const char*	name,		/*!< in: table name used in
4906 					fil_space_create().  Either the
4907 					standard 'dbname/tablename' format
4908 					or table->dir_path_of_temp_table */
4909 	ibool		mark_space,	/*!< in: in crash recovery, at database
4910 					startup we mark all spaces which have
4911 					an associated table in the InnoDB
4912 					data dictionary, so that
4913 					we can print a warning about orphaned
4914 					tablespaces */
4915 	ibool		print_error_if_does_not_exist,
4916 					/*!< in: print detailed error
4917 					information to the .err log if a
4918 					matching tablespace is not found from
4919 					memory */
4920 	bool		adjust_space,	/*!< in: whether to adjust space id
4921 					when find table space mismatch */
4922 	mem_heap_t*	heap,		/*!< in: heap memory */
4923 	table_id_t	table_id)	/*!< in: table id */
4924 {
4925 	fil_space_t*	fnamespace;
4926 	fil_space_t*	space;
4927 
4928 	ut_ad(fil_system);
4929 
4930 	mutex_enter(&fil_system->mutex);
4931 
4932 	/* Look if there is a space with the same id */
4933 
4934 	space = fil_space_get_by_id(id);
4935 
4936 	/* Look if there is a space with the same name; the name is the
4937 	directory path from the datadir to the file */
4938 
4939 	fnamespace = fil_space_get_by_name(name);
4940 	if (space && space == fnamespace) {
4941 		/* Found */
4942 
4943 		if (mark_space) {
4944 			space->mark = TRUE;
4945 		}
4946 
4947 		mutex_exit(&fil_system->mutex);
4948 
4949 		return(TRUE);
4950 	}
4951 
4952 	/* Info from "fnamespace" comes from the ibd file itself, it can
4953 	be different from data obtained from System tables since it is
4954 	not transactional. If adjust_space is set, and the mismatching
4955 	space are between a user table and its temp table, we shall
4956 	adjust the ibd file name according to system table info */
4957 	if (adjust_space
4958 	    && space != NULL
4959 	    && row_is_mysql_tmp_table_name(space->name)
4960 	    && !row_is_mysql_tmp_table_name(name)) {
4961 
4962 		mutex_exit(&fil_system->mutex);
4963 
4964 		DBUG_EXECUTE_IF("ib_crash_before_adjust_fil_space",
4965 				DBUG_SUICIDE(););
4966 
4967 		if (fnamespace) {
4968 			char*	tmp_name;
4969 
4970 			tmp_name = dict_mem_create_temporary_tablename(
4971 				heap, name, table_id);
4972 
4973 			fil_rename_tablespace(fnamespace->name, fnamespace->id,
4974 					      tmp_name, NULL);
4975 		}
4976 
4977 		DBUG_EXECUTE_IF("ib_crash_after_adjust_one_fil_space",
4978 				DBUG_SUICIDE(););
4979 
4980 		fil_rename_tablespace(space->name, id, name, NULL);
4981 
4982 		DBUG_EXECUTE_IF("ib_crash_after_adjust_fil_space",
4983 				DBUG_SUICIDE(););
4984 
4985 		mutex_enter(&fil_system->mutex);
4986 		fnamespace = fil_space_get_by_name(name);
4987 		ut_ad(space == fnamespace);
4988 		mutex_exit(&fil_system->mutex);
4989 
4990 		return(TRUE);
4991 	}
4992 
4993 	if (!print_error_if_does_not_exist) {
4994 
4995 		mutex_exit(&fil_system->mutex);
4996 
4997 		return(FALSE);
4998 	}
4999 
5000 	if (space == NULL) {
5001 		if (fnamespace == NULL) {
5002 			if (print_error_if_does_not_exist) {
5003 				fil_report_missing_tablespace(name, id);
5004 			}
5005 		} else {
5006 			ut_print_timestamp(stderr);
5007 			fputs("  InnoDB: Error: table ", stderr);
5008 			ut_print_filename(stderr, name);
5009 			fprintf(stderr, "\n"
5010 				"InnoDB: in InnoDB data dictionary has"
5011 				" tablespace id %lu,\n"
5012 				"InnoDB: but a tablespace with that id"
5013 				" does not exist. There is\n"
5014 				"InnoDB: a tablespace of name %s and id %lu,"
5015 				" though. Have\n"
5016 				"InnoDB: you deleted or moved .ibd files?\n",
5017 				(ulong) id, fnamespace->name,
5018 				(ulong) fnamespace->id);
5019 		}
5020 error_exit:
5021 		fputs("InnoDB: Please refer to\n"
5022 		      "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
5023 		      "InnoDB: for how to resolve the issue.\n", stderr);
5024 
5025 		mutex_exit(&fil_system->mutex);
5026 
5027 		return(FALSE);
5028 	}
5029 
5030 	if (0 != strcmp(space->name, name)) {
5031 		ut_print_timestamp(stderr);
5032 		fputs("  InnoDB: Error: table ", stderr);
5033 		ut_print_filename(stderr, name);
5034 		fprintf(stderr, "\n"
5035 			"InnoDB: in InnoDB data dictionary has"
5036 			" tablespace id %lu,\n"
5037 			"InnoDB: but the tablespace with that id"
5038 			" has name %s.\n"
5039 			"InnoDB: Have you deleted or moved .ibd files?\n",
5040 			(ulong) id, space->name);
5041 
5042 		if (fnamespace != NULL) {
5043 			fputs("InnoDB: There is a tablespace"
5044 			      " with the right name\n"
5045 			      "InnoDB: ", stderr);
5046 			ut_print_filename(stderr, fnamespace->name);
5047 			fprintf(stderr, ", but its id is %lu.\n",
5048 				(ulong) fnamespace->id);
5049 		}
5050 
5051 		goto error_exit;
5052 	}
5053 
5054 	mutex_exit(&fil_system->mutex);
5055 
5056 	return(FALSE);
5057 }
5058 
5059 /*******************************************************************//**
5060 Checks if a single-table tablespace for a given table name exists in the
5061 tablespace memory cache.
5062 @return	space id, ULINT_UNDEFINED if not found */
5063 UNIV_INTERN
5064 ulint
fil_get_space_id_for_table(const char * tablename)5065 fil_get_space_id_for_table(
5066 /*=======================*/
5067 	const char*	tablename)	/*!< in: table name in the standard
5068 				'databasename/tablename' format */
5069 {
5070 	fil_space_t*	fnamespace;
5071 	ulint		id		= ULINT_UNDEFINED;
5072 
5073 	ut_ad(fil_system);
5074 
5075 	mutex_enter(&fil_system->mutex);
5076 
5077 	/* Look if there is a space with the same name. */
5078 
5079 	fnamespace = fil_space_get_by_name(tablename);
5080 
5081 	if (fnamespace) {
5082 		id = fnamespace->id;
5083 	}
5084 
5085 	mutex_exit(&fil_system->mutex);
5086 
5087 	return(id);
5088 }
5089 
5090 /**********************************************************************//**
5091 Tries to extend a data file so that it would accommodate the number of pages
5092 given. The tablespace must be cached in the memory cache. If the space is big
5093 enough already, does nothing.
5094 @return	TRUE if success */
5095 UNIV_INTERN
5096 ibool
fil_extend_space_to_desired_size(ulint * actual_size,ulint space_id,ulint size_after_extend)5097 fil_extend_space_to_desired_size(
5098 /*=============================*/
5099 	ulint*	actual_size,	/*!< out: size of the space after extension;
5100 				if we ran out of disk space this may be lower
5101 				than the desired size */
5102 	ulint	space_id,	/*!< in: space id */
5103 	ulint	size_after_extend)/*!< in: desired size in pages after the
5104 				extension; if the current space size is bigger
5105 				than this already, the function does nothing */
5106 {
5107 	fil_node_t*	node;
5108 	fil_space_t*	space;
5109 	byte*		buf2;
5110 	byte*		buf;
5111 	ulint		buf_size;
5112 	ulint		start_page_no;
5113 	ulint		file_start_page_no;
5114 	ulint		page_size;
5115 	ulint		pages_added;
5116 	ibool		success;
5117 
5118 	ut_ad(!srv_read_only_mode);
5119 
5120 retry:
5121 	pages_added = 0;
5122 	success = TRUE;
5123 
5124 	fil_mutex_enter_and_prepare_for_io(space_id);
5125 
5126 	space = fil_space_get_by_id(space_id);
5127 	ut_a(space);
5128 
5129 	if (space->size >= size_after_extend) {
5130 		/* Space already big enough */
5131 
5132 		*actual_size = space->size;
5133 
5134 		mutex_exit(&fil_system->mutex);
5135 
5136 		return(TRUE);
5137 	}
5138 
5139 	page_size = fsp_flags_get_zip_size(space->flags);
5140 	if (!page_size) {
5141 		page_size = UNIV_PAGE_SIZE;
5142 	}
5143 
5144 	node = UT_LIST_GET_LAST(space->chain);
5145 
5146 	if (!node->being_extended) {
5147 		/* Mark this node as undergoing extension. This flag
5148 		is used by other threads to wait for the extension
5149 		opereation to finish. */
5150 		node->being_extended = TRUE;
5151 	} else {
5152 		/* Another thread is currently extending the file. Wait
5153 		for it to finish.
5154 		It'd have been better to use event driven mechanism but
5155 		the entire module is peppered with polling stuff. */
5156 		mutex_exit(&fil_system->mutex);
5157 		os_thread_sleep(100000);
5158 		goto retry;
5159 	}
5160 
5161 	if (!fil_node_prepare_for_io(node, fil_system, space)) {
5162 		/* The tablespace data file, such as .ibd file, is missing */
5163 		node->being_extended = false;
5164 		mutex_exit(&fil_system->mutex);
5165 
5166 		return(false);
5167 	}
5168 
5169 	/* At this point it is safe to release fil_system mutex. No
5170 	other thread can rename, delete or close the file because
5171 	we have set the node->being_extended flag. */
5172 	mutex_exit(&fil_system->mutex);
5173 
5174 	start_page_no = space->size;
5175 	file_start_page_no = space->size - node->size;
5176 
5177 	/* Extend at most 64 pages at a time */
5178 	buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
5179 	buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size));
5180 	buf = static_cast<byte*>(ut_align(buf2, page_size));
5181 
5182 	memset(buf, 0, buf_size);
5183 
5184 	while (start_page_no < size_after_extend) {
5185 		ulint		n_pages
5186 			= ut_min(buf_size / page_size,
5187 				 size_after_extend - start_page_no);
5188 
5189 		os_offset_t	offset
5190 			= ((os_offset_t) (start_page_no - file_start_page_no))
5191 			* page_size;
5192 #ifdef UNIV_HOTBACKUP
5193 		success = os_file_write(node->name, node->handle, buf,
5194 					offset, page_size * n_pages);
5195 #else
5196 		success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
5197 				 node->name, node->handle, buf,
5198 				 offset, page_size * n_pages,
5199 				 NULL, NULL);
5200 #endif /* UNIV_HOTBACKUP */
5201 		if (success) {
5202 			os_has_said_disk_full = FALSE;
5203 		} else {
5204 			/* Let us measure the size of the file to determine
5205 			how much we were able to extend it */
5206 			os_offset_t	size;
5207 
5208 			size = os_file_get_size(node->handle);
5209 			ut_a(size != (os_offset_t) -1);
5210 
5211 			n_pages = ((ulint) (size / page_size))
5212 				- node->size - pages_added;
5213 
5214 			pages_added += n_pages;
5215 			break;
5216 		}
5217 
5218 		start_page_no += n_pages;
5219 		pages_added += n_pages;
5220 	}
5221 
5222 	mem_free(buf2);
5223 
5224 	mutex_enter(&fil_system->mutex);
5225 
5226 	ut_a(node->being_extended);
5227 
5228 	space->size += pages_added;
5229 	node->size += pages_added;
5230 	node->being_extended = FALSE;
5231 
5232 	fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
5233 
5234 	*actual_size = space->size;
5235 
5236 #ifndef UNIV_HOTBACKUP
5237 	if (space_id == 0) {
5238 		ulint pages_per_mb = (1024 * 1024) / page_size;
5239 
5240 		/* Keep the last data file size info up to date, rounded to
5241 		full megabytes */
5242 
5243 		srv_data_file_sizes[srv_n_data_files - 1]
5244 			= (node->size / pages_per_mb) * pages_per_mb;
5245 	}
5246 #endif /* !UNIV_HOTBACKUP */
5247 
5248 	/*
5249 	printf("Extended %s to %lu, actual size %lu pages\n", space->name,
5250 	size_after_extend, *actual_size); */
5251 	mutex_exit(&fil_system->mutex);
5252 
5253 	fil_flush(space_id);
5254 
5255 	return(success);
5256 }
5257 
5258 #ifdef UNIV_HOTBACKUP
5259 /********************************************************************//**
5260 Extends all tablespaces to the size stored in the space header. During the
5261 mysqlbackup --apply-log phase we extended the spaces on-demand so that log
5262 records could be applied, but that may have left spaces still too small
5263 compared to the size stored in the space header. */
5264 UNIV_INTERN
5265 void
fil_extend_tablespaces_to_stored_len(void)5266 fil_extend_tablespaces_to_stored_len(void)
5267 /*======================================*/
5268 {
5269 	fil_space_t*	space;
5270 	byte*		buf;
5271 	ulint		actual_size;
5272 	ulint		size_in_header;
5273 	dberr_t		error;
5274 	ibool		success;
5275 
5276 	buf = mem_alloc(UNIV_PAGE_SIZE);
5277 
5278 	mutex_enter(&fil_system->mutex);
5279 
5280 	space = UT_LIST_GET_FIRST(fil_system->space_list);
5281 
5282 	while (space) {
5283 		ut_a(space->purpose == FIL_TABLESPACE);
5284 
5285 		mutex_exit(&fil_system->mutex); /* no need to protect with a
5286 					      mutex, because this is a
5287 					      single-threaded operation */
5288 		error = fil_read(TRUE, space->id,
5289 				 fsp_flags_get_zip_size(space->flags),
5290 				 0, 0, UNIV_PAGE_SIZE, buf, NULL);
5291 		ut_a(error == DB_SUCCESS);
5292 
5293 		size_in_header = fsp_get_size_low(buf);
5294 
5295 		success = fil_extend_space_to_desired_size(
5296 			&actual_size, space->id, size_in_header);
5297 		if (!success) {
5298 			fprintf(stderr,
5299 				"InnoDB: Error: could not extend the"
5300 				" tablespace of %s\n"
5301 				"InnoDB: to the size stored in header,"
5302 				" %lu pages;\n"
5303 				"InnoDB: size after extension %lu pages\n"
5304 				"InnoDB: Check that you have free disk space"
5305 				" and retry!\n",
5306 				space->name, size_in_header, actual_size);
5307 			ut_a(success);
5308 		}
5309 
5310 		mutex_enter(&fil_system->mutex);
5311 
5312 		space = UT_LIST_GET_NEXT(space_list, space);
5313 	}
5314 
5315 	mutex_exit(&fil_system->mutex);
5316 
5317 	mem_free(buf);
5318 }
5319 #endif
5320 
5321 /*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
5322 
5323 /*******************************************************************//**
5324 Tries to reserve free extents in a file space.
5325 @return	TRUE if succeed */
5326 UNIV_INTERN
5327 ibool
fil_space_reserve_free_extents(ulint id,ulint n_free_now,ulint n_to_reserve)5328 fil_space_reserve_free_extents(
5329 /*===========================*/
5330 	ulint	id,		/*!< in: space id */
5331 	ulint	n_free_now,	/*!< in: number of free extents now */
5332 	ulint	n_to_reserve)	/*!< in: how many one wants to reserve */
5333 {
5334 	fil_space_t*	space;
5335 	ibool		success;
5336 
5337 	ut_ad(fil_system);
5338 
5339 	mutex_enter(&fil_system->mutex);
5340 
5341 	space = fil_space_get_by_id(id);
5342 
5343 	ut_a(space);
5344 
5345 	if (space->n_reserved_extents + n_to_reserve > n_free_now) {
5346 		success = FALSE;
5347 	} else {
5348 		space->n_reserved_extents += n_to_reserve;
5349 		success = TRUE;
5350 	}
5351 
5352 	mutex_exit(&fil_system->mutex);
5353 
5354 	return(success);
5355 }
5356 
5357 /*******************************************************************//**
5358 Releases free extents in a file space. */
5359 UNIV_INTERN
5360 void
fil_space_release_free_extents(ulint id,ulint n_reserved)5361 fil_space_release_free_extents(
5362 /*===========================*/
5363 	ulint	id,		/*!< in: space id */
5364 	ulint	n_reserved)	/*!< in: how many one reserved */
5365 {
5366 	fil_space_t*	space;
5367 
5368 	ut_ad(fil_system);
5369 
5370 	mutex_enter(&fil_system->mutex);
5371 
5372 	space = fil_space_get_by_id(id);
5373 
5374 	ut_a(space);
5375 	ut_a(space->n_reserved_extents >= n_reserved);
5376 
5377 	space->n_reserved_extents -= n_reserved;
5378 
5379 	mutex_exit(&fil_system->mutex);
5380 }
5381 
5382 /*******************************************************************//**
5383 Gets the number of reserved extents. If the database is silent, this number
5384 should be zero. */
5385 UNIV_INTERN
5386 ulint
fil_space_get_n_reserved_extents(ulint id)5387 fil_space_get_n_reserved_extents(
5388 /*=============================*/
5389 	ulint	id)		/*!< in: space id */
5390 {
5391 	fil_space_t*	space;
5392 	ulint		n;
5393 
5394 	ut_ad(fil_system);
5395 
5396 	mutex_enter(&fil_system->mutex);
5397 
5398 	space = fil_space_get_by_id(id);
5399 
5400 	ut_a(space);
5401 
5402 	n = space->n_reserved_extents;
5403 
5404 	mutex_exit(&fil_system->mutex);
5405 
5406 	return(n);
5407 }
5408 
5409 /*============================ FILE I/O ================================*/
5410 
5411 /********************************************************************//**
5412 NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
5413 
5414 Prepares a file node for i/o. Opens the file if it is closed. Updates the
5415 pending i/o's field in the node and the system appropriately. Takes the node
5416 off the LRU list if it is in the LRU list. The caller must hold the fil_sys
5417 mutex.
5418 @return false if the file can't be opened, otherwise true */
5419 static
5420 bool
fil_node_prepare_for_io(fil_node_t * node,fil_system_t * system,fil_space_t * space)5421 fil_node_prepare_for_io(
5422 /*====================*/
5423 	fil_node_t*	node,	/*!< in: file node */
5424 	fil_system_t*	system,	/*!< in: tablespace memory cache */
5425 	fil_space_t*	space)	/*!< in: space */
5426 {
5427 	ut_ad(node && system && space);
5428 	ut_ad(mutex_own(&(system->mutex)));
5429 
5430 	if (system->n_open > system->max_n_open + 5) {
5431 		ut_print_timestamp(stderr);
5432 		fprintf(stderr,
5433 			"  InnoDB: Warning: open files %lu"
5434 			" exceeds the limit %lu\n",
5435 			(ulong) system->n_open,
5436 			(ulong) system->max_n_open);
5437 	}
5438 
5439 	if (node->open == FALSE) {
5440 		/* File is closed: open it */
5441 		ut_a(node->n_pending == 0);
5442 
5443 		if (!fil_node_open_file(node, system, space)) {
5444 			return(false);
5445 		}
5446 	}
5447 
5448 	if (node->n_pending == 0 && fil_space_belongs_in_lru(space)) {
5449 		/* The node is in the LRU list, remove it */
5450 
5451 		ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
5452 
5453 		UT_LIST_REMOVE(LRU, system->LRU, node);
5454 	}
5455 
5456 	node->n_pending++;
5457 
5458 	return(true);
5459 }
5460 
5461 /********************************************************************//**
5462 Updates the data structures when an i/o operation finishes. Updates the
5463 pending i/o's field in the node appropriately. */
5464 static
5465 void
fil_node_complete_io(fil_node_t * node,fil_system_t * system,ulint type)5466 fil_node_complete_io(
5467 /*=================*/
5468 	fil_node_t*	node,	/*!< in: file node */
5469 	fil_system_t*	system,	/*!< in: tablespace memory cache */
5470 	ulint		type)	/*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
5471 				the node as modified if
5472 				type == OS_FILE_WRITE */
5473 {
5474 	ut_ad(node);
5475 	ut_ad(system);
5476 	ut_ad(mutex_own(&(system->mutex)));
5477 
5478 	ut_a(node->n_pending > 0);
5479 
5480 	node->n_pending--;
5481 
5482 	if (type == OS_FILE_WRITE) {
5483 		ut_ad(!srv_read_only_mode);
5484 		system->modification_counter++;
5485 		node->modification_counter = system->modification_counter;
5486 
5487 		if (fil_buffering_disabled(node->space)) {
5488 
5489 			/* We don't need to keep track of unflushed
5490 			changes as user has explicitly disabled
5491 			buffering. */
5492 			ut_ad(!node->space->is_in_unflushed_spaces);
5493 			node->flush_counter = node->modification_counter;
5494 
5495 		} else if (!node->space->is_in_unflushed_spaces) {
5496 
5497 			node->space->is_in_unflushed_spaces = true;
5498 			UT_LIST_ADD_FIRST(unflushed_spaces,
5499 					  system->unflushed_spaces,
5500 					  node->space);
5501 		}
5502 	}
5503 
5504 	if (node->n_pending == 0 && fil_space_belongs_in_lru(node->space)) {
5505 
5506 		/* The node must be put back to the LRU list */
5507 		UT_LIST_ADD_FIRST(LRU, system->LRU, node);
5508 	}
5509 }
5510 
5511 /********************************************************************//**
5512 Report information about an invalid page access. */
5513 static
5514 void
fil_report_invalid_page_access(ulint block_offset,ulint space_id,const char * space_name,ulint byte_offset,ulint len,ulint type)5515 fil_report_invalid_page_access(
5516 /*===========================*/
5517 	ulint		block_offset,	/*!< in: block offset */
5518 	ulint		space_id,	/*!< in: space id */
5519 	const char*	space_name,	/*!< in: space name */
5520 	ulint		byte_offset,	/*!< in: byte offset */
5521 	ulint		len,		/*!< in: I/O length */
5522 	ulint		type)		/*!< in: I/O type */
5523 {
5524 	fprintf(stderr,
5525 		"InnoDB: Error: trying to access page number %lu"
5526 		" in space %lu,\n"
5527 		"InnoDB: space name %s,\n"
5528 		"InnoDB: which is outside the tablespace bounds.\n"
5529 		"InnoDB: Byte offset %lu, len %lu, i/o type %lu.\n"
5530 		"InnoDB: If you get this error at mysqld startup,"
5531 		" please check that\n"
5532 		"InnoDB: your my.cnf matches the ibdata files"
5533 		" that you have in the\n"
5534 		"InnoDB: MySQL server.\n",
5535 		(ulong) block_offset, (ulong) space_id, space_name,
5536 		(ulong) byte_offset, (ulong) len, (ulong) type);
5537 }
5538 
5539 /********************************************************************//**
5540 Reads or writes data. This operation is asynchronous (aio).
5541 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
5542 i/o on a tablespace which does not exist */
5543 UNIV_INTERN
5544 dberr_t
fil_io(ulint type,bool sync,ulint space_id,ulint zip_size,ulint block_offset,ulint byte_offset,ulint len,void * buf,void * message)5545 fil_io(
5546 /*===*/
5547 	ulint	type,		/*!< in: OS_FILE_READ or OS_FILE_WRITE,
5548 				ORed to OS_FILE_LOG, if a log i/o
5549 				and ORed to OS_AIO_SIMULATED_WAKE_LATER
5550 				if simulated aio and we want to post a
5551 				batch of i/os; NOTE that a simulated batch
5552 				may introduce hidden chances of deadlocks,
5553 				because i/os are not actually handled until
5554 				all have been posted: use with great
5555 				caution! */
5556 	bool	sync,		/*!< in: true if synchronous aio is desired */
5557 	ulint	space_id,	/*!< in: space id */
5558 	ulint	zip_size,	/*!< in: compressed page size in bytes;
5559 				0 for uncompressed pages */
5560 	ulint	block_offset,	/*!< in: offset in number of blocks */
5561 	ulint	byte_offset,	/*!< in: remainder of offset in bytes; in
5562 				aio this must be divisible by the OS block
5563 				size */
5564 	ulint	len,		/*!< in: how many bytes to read or write; this
5565 				must not cross a file boundary; in aio this
5566 				must be a block size multiple */
5567 	void*	buf,		/*!< in/out: buffer where to store read data
5568 				or from where to write; in aio this must be
5569 				appropriately aligned */
5570 	void*	message)	/*!< in: message for aio handler if non-sync
5571 				aio used, else ignored */
5572 {
5573 	ulint		mode;
5574 	fil_space_t*	space;
5575 	fil_node_t*	node;
5576 	ibool		ret;
5577 	ulint		is_log;
5578 	ulint		wake_later;
5579 	os_offset_t	offset;
5580 	ibool		ignore_nonexistent_pages;
5581 
5582 	is_log = type & OS_FILE_LOG;
5583 	type = type & ~OS_FILE_LOG;
5584 
5585 	wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
5586 	type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
5587 
5588 	ignore_nonexistent_pages = type & BUF_READ_IGNORE_NONEXISTENT_PAGES;
5589 	type &= ~BUF_READ_IGNORE_NONEXISTENT_PAGES;
5590 
5591 	ut_ad(byte_offset < UNIV_PAGE_SIZE);
5592 	ut_ad(!zip_size || !byte_offset);
5593 	ut_ad(ut_is_2pow(zip_size));
5594 	ut_ad(buf);
5595 	ut_ad(len > 0);
5596 	ut_ad(UNIV_PAGE_SIZE == (ulong)(1 << UNIV_PAGE_SIZE_SHIFT));
5597 #if (1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX
5598 # error "(1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX"
5599 #endif
5600 #if (1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN
5601 # error "(1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN"
5602 #endif
5603 	ut_ad(fil_validate_skip());
5604 #ifndef UNIV_HOTBACKUP
5605 # ifndef UNIV_LOG_DEBUG
5606 	/* ibuf bitmap pages must be read in the sync aio mode: */
5607 	ut_ad(recv_no_ibuf_operations
5608 	      || type == OS_FILE_WRITE
5609 	      || !ibuf_bitmap_page(zip_size, block_offset)
5610 	      || sync
5611 	      || is_log);
5612 # endif /* UNIV_LOG_DEBUG */
5613 	if (sync) {
5614 		mode = OS_AIO_SYNC;
5615 	} else if (is_log) {
5616 		mode = OS_AIO_LOG;
5617 	} else if (type == OS_FILE_READ
5618 		   && !recv_no_ibuf_operations
5619 		   && ibuf_page(space_id, zip_size, block_offset, NULL)) {
5620 		mode = OS_AIO_IBUF;
5621 	} else {
5622 		mode = OS_AIO_NORMAL;
5623 	}
5624 #else /* !UNIV_HOTBACKUP */
5625 	ut_a(sync);
5626 	mode = OS_AIO_SYNC;
5627 #endif /* !UNIV_HOTBACKUP */
5628 
5629 	if (type == OS_FILE_READ) {
5630 		srv_stats.data_read.add(len);
5631 	} else if (type == OS_FILE_WRITE) {
5632 		ut_ad(!srv_read_only_mode);
5633 		srv_stats.data_written.add(len);
5634 	}
5635 
5636 	/* Reserve the fil_system mutex and make sure that we can open at
5637 	least one file while holding it, if the file is not already open */
5638 
5639 	fil_mutex_enter_and_prepare_for_io(space_id);
5640 
5641 	space = fil_space_get_by_id(space_id);
5642 
5643 	/* If we are deleting a tablespace we don't allow async read operations
5644 	on that. However, we do allow write and sync read operations */
5645 	if (space == 0
5646 	    || (type == OS_FILE_READ && !sync && space->stop_new_ops)) {
5647 		mutex_exit(&fil_system->mutex);
5648 
5649 		ib_logf(IB_LOG_LEVEL_ERROR,
5650 			"Trying to do i/o to a tablespace which does "
5651 			"not exist. i/o type %lu, space id %lu, "
5652 			"page no. %lu, i/o length %lu bytes",
5653 			(ulong) type, (ulong) space_id, (ulong) block_offset,
5654 			(ulong) len);
5655 
5656 		return(DB_TABLESPACE_DELETED);
5657 	}
5658 
5659 	ut_ad(mode != OS_AIO_IBUF || space->purpose == FIL_TABLESPACE);
5660 
5661 	node = UT_LIST_GET_FIRST(space->chain);
5662 
5663 	for (;;) {
5664 		if (node == NULL) {
5665 			if (ignore_nonexistent_pages) {
5666 				mutex_exit(&fil_system->mutex);
5667 				return(DB_ERROR);
5668 			}
5669 
5670 			fil_report_invalid_page_access(
5671 				block_offset, space_id, space->name,
5672 				byte_offset, len, type);
5673 
5674 			ut_error;
5675 
5676 		} else if (fil_is_user_tablespace_id(space->id)
5677 			   && node->size == 0) {
5678 
5679 			/* We do not know the size of a single-table tablespace
5680 			before we open the file */
5681 			break;
5682 		} else if (node->size > block_offset) {
5683 			/* Found! */
5684 			break;
5685 		} else {
5686 			block_offset -= node->size;
5687 			node = UT_LIST_GET_NEXT(chain, node);
5688 		}
5689 	}
5690 
5691 	/* Open file if closed */
5692 	if (!fil_node_prepare_for_io(node, fil_system, space)) {
5693 		if (space->purpose == FIL_TABLESPACE
5694 		    && fil_is_user_tablespace_id(space->id)) {
5695 			mutex_exit(&fil_system->mutex);
5696 
5697 			ib_logf(IB_LOG_LEVEL_ERROR,
5698 				"Trying to do i/o to a tablespace which "
5699 				"exists without .ibd data file. "
5700 				"i/o type %lu, space id %lu, page no %lu, "
5701 				"i/o length %lu bytes",
5702 				(ulong) type, (ulong) space_id,
5703 				(ulong) block_offset, (ulong) len);
5704 
5705 			return(DB_TABLESPACE_DELETED);
5706 		}
5707 
5708 		/* The tablespace is for log. Currently, we just assert here
5709 		to prevent handling errors along the way fil_io returns.
5710 		Also, if the log files are missing, it would be hard to
5711 		promise the server can continue running. */
5712 		ut_a(0);
5713 	}
5714 
5715 	/* Check that at least the start offset is within the bounds of a
5716 	single-table tablespace, including rollback tablespaces. */
5717 	if (UNIV_UNLIKELY(node->size <= block_offset)
5718 	    && space->id != 0 && space->purpose == FIL_TABLESPACE) {
5719 
5720 		fil_report_invalid_page_access(
5721 			block_offset, space_id, space->name, byte_offset,
5722 			len, type);
5723 
5724 		ut_error;
5725 	}
5726 
5727 	/* Now we have made the changes in the data structures of fil_system */
5728 	mutex_exit(&fil_system->mutex);
5729 
5730 	/* Calculate the low 32 bits and the high 32 bits of the file offset */
5731 
5732 	if (!zip_size) {
5733 		offset = ((os_offset_t) block_offset << UNIV_PAGE_SIZE_SHIFT)
5734 			+ byte_offset;
5735 
5736 		ut_a(node->size - block_offset
5737 		     >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
5738 			 / UNIV_PAGE_SIZE));
5739 	} else {
5740 		ulint	zip_size_shift;
5741 		switch (zip_size) {
5742 		case 1024: zip_size_shift = 10; break;
5743 		case 2048: zip_size_shift = 11; break;
5744 		case 4096: zip_size_shift = 12; break;
5745 		case 8192: zip_size_shift = 13; break;
5746 		case 16384: zip_size_shift = 14; break;
5747 		default: ut_error;
5748 		}
5749 		offset = ((os_offset_t) block_offset << zip_size_shift)
5750 			+ byte_offset;
5751 		ut_a(node->size - block_offset
5752 		     >= (len + (zip_size - 1)) / zip_size);
5753 	}
5754 
5755 	/* Do aio */
5756 
5757 	ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
5758 	ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
5759 
5760 #ifdef UNIV_HOTBACKUP
5761 	/* In mysqlbackup do normal i/o, not aio */
5762 	if (type == OS_FILE_READ) {
5763 		ret = os_file_read(node->handle, buf, offset, len);
5764 	} else {
5765 		ut_ad(!srv_read_only_mode);
5766 		ret = os_file_write(node->name, node->handle, buf,
5767 				    offset, len);
5768 	}
5769 #else
5770 	/* Queue the aio request */
5771 	ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
5772 		     offset, len, node, message);
5773 #endif /* UNIV_HOTBACKUP */
5774 	ut_a(ret);
5775 
5776 	if (mode == OS_AIO_SYNC) {
5777 		/* The i/o operation is already completed when we return from
5778 		os_aio: */
5779 
5780 		mutex_enter(&fil_system->mutex);
5781 
5782 		fil_node_complete_io(node, fil_system, type);
5783 
5784 		mutex_exit(&fil_system->mutex);
5785 
5786 		ut_ad(fil_validate_skip());
5787 	}
5788 
5789 	return(DB_SUCCESS);
5790 }
5791 
5792 #ifndef UNIV_HOTBACKUP
5793 /**********************************************************************//**
5794 Waits for an aio operation to complete. This function is used to write the
5795 handler for completed requests. The aio array of pending requests is divided
5796 into segments (see os0file.cc for more info). The thread specifies which
5797 segment it wants to wait for. */
5798 UNIV_INTERN
5799 void
fil_aio_wait(ulint segment)5800 fil_aio_wait(
5801 /*=========*/
5802 	ulint	segment)	/*!< in: the number of the segment in the aio
5803 				array to wait for */
5804 {
5805 	ibool		ret;
5806 	fil_node_t*	fil_node;
5807 	void*		message;
5808 	ulint		type;
5809 
5810 	ut_ad(fil_validate_skip());
5811 
5812 	if (srv_use_native_aio) {
5813 		srv_set_io_thread_op_info(segment, "native aio handle");
5814 #ifdef WIN_ASYNC_IO
5815 		ret = os_aio_windows_handle(
5816 			segment, 0, &fil_node, &message, &type);
5817 #elif defined(LINUX_NATIVE_AIO)
5818 		ret = os_aio_linux_handle(
5819 			segment, &fil_node, &message, &type);
5820 #else
5821 		ut_error;
5822 		ret = 0; /* Eliminate compiler warning */
5823 #endif /* WIN_ASYNC_IO */
5824 	} else {
5825 		srv_set_io_thread_op_info(segment, "simulated aio handle");
5826 
5827 		ret = os_aio_simulated_handle(
5828 			segment, &fil_node, &message, &type);
5829 	}
5830 
5831 	ut_a(ret);
5832 	if (fil_node == NULL) {
5833 		ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
5834 		return;
5835 	}
5836 
5837 	srv_set_io_thread_op_info(segment, "complete io for fil node");
5838 
5839 	mutex_enter(&fil_system->mutex);
5840 
5841 	fil_node_complete_io(fil_node, fil_system, type);
5842 
5843 	mutex_exit(&fil_system->mutex);
5844 
5845 	ut_ad(fil_validate_skip());
5846 
5847 	/* Do the i/o handling */
5848 	/* IMPORTANT: since i/o handling for reads will read also the insert
5849 	buffer in tablespace 0, you have to be very careful not to introduce
5850 	deadlocks in the i/o system. We keep tablespace 0 data files always
5851 	open, and use a special i/o thread to serve insert buffer requests. */
5852 
5853 	if (fil_node->space->purpose == FIL_TABLESPACE) {
5854 		srv_set_io_thread_op_info(segment, "complete io for buf page");
5855 		buf_page_io_complete(static_cast<buf_page_t*>(message));
5856 	} else {
5857 		srv_set_io_thread_op_info(segment, "complete io for log");
5858 		log_io_complete(static_cast<log_group_t*>(message));
5859 	}
5860 }
5861 #endif /* UNIV_HOTBACKUP */
5862 
5863 /**********************************************************************//**
5864 Flushes to disk possible writes cached by the OS. If the space does not exist
5865 or is being dropped, does not do anything. */
5866 UNIV_INTERN
5867 void
fil_flush(ulint space_id)5868 fil_flush(
5869 /*======*/
5870 	ulint	space_id)	/*!< in: file space id (this can be a group of
5871 				log files or a tablespace of the database) */
5872 {
5873 	fil_space_t*	space;
5874 	fil_node_t*	node;
5875 	pfs_os_file_t	file;
5876 
5877 
5878 	mutex_enter(&fil_system->mutex);
5879 
5880 	space = fil_space_get_by_id(space_id);
5881 
5882 	if (!space || space->stop_new_ops) {
5883 		mutex_exit(&fil_system->mutex);
5884 
5885 		return;
5886 	}
5887 
5888 	if (fil_buffering_disabled(space)) {
5889 
5890 		/* No need to flush. User has explicitly disabled
5891 		buffering. */
5892 		ut_ad(!space->is_in_unflushed_spaces);
5893 		ut_ad(fil_space_is_flushed(space));
5894 		ut_ad(space->n_pending_flushes == 0);
5895 
5896 #ifdef UNIV_DEBUG
5897 		for (node = UT_LIST_GET_FIRST(space->chain);
5898 		     node != NULL;
5899 		     node = UT_LIST_GET_NEXT(chain, node)) {
5900 			ut_ad(node->modification_counter
5901 			      == node->flush_counter);
5902 			ut_ad(node->n_pending_flushes == 0);
5903 		}
5904 #endif /* UNIV_DEBUG */
5905 
5906 		mutex_exit(&fil_system->mutex);
5907 		return;
5908 	}
5909 
5910 	space->n_pending_flushes++;	/*!< prevent dropping of the space while
5911 					we are flushing */
5912 	for (node = UT_LIST_GET_FIRST(space->chain);
5913 	     node != NULL;
5914 	     node = UT_LIST_GET_NEXT(chain, node)) {
5915 
5916 		ib_int64_t old_mod_counter = node->modification_counter;;
5917 
5918 		if (old_mod_counter <= node->flush_counter) {
5919 			continue;
5920 		}
5921 
5922 		ut_a(node->open);
5923 
5924 		if (space->purpose == FIL_TABLESPACE) {
5925 			fil_n_pending_tablespace_flushes++;
5926 		} else {
5927 			fil_n_pending_log_flushes++;
5928 			fil_n_log_flushes++;
5929 		}
5930 #ifdef __WIN__
5931 		if (node->is_raw_disk) {
5932 
5933 			goto skip_flush;
5934 		}
5935 #endif /* __WIN__ */
5936 retry:
5937 		if (node->n_pending_flushes > 0) {
5938 			/* We want to avoid calling os_file_flush() on
5939 			the file twice at the same time, because we do
5940 			not know what bugs OS's may contain in file
5941 			i/o */
5942 
5943 			ib_int64_t sig_count =
5944 				os_event_reset(node->sync_event);
5945 
5946 			mutex_exit(&fil_system->mutex);
5947 
5948 			os_event_wait_low(node->sync_event, sig_count);
5949 
5950 			mutex_enter(&fil_system->mutex);
5951 
5952 			if (node->flush_counter >= old_mod_counter) {
5953 
5954 				goto skip_flush;
5955 			}
5956 
5957 			goto retry;
5958 		}
5959 
5960 		ut_a(node->open);
5961 		file = node->handle;
5962 		node->n_pending_flushes++;
5963 
5964 		mutex_exit(&fil_system->mutex);
5965 
5966 		os_file_flush(file);
5967 
5968 		mutex_enter(&fil_system->mutex);
5969 
5970 		os_event_set(node->sync_event);
5971 
5972 		node->n_pending_flushes--;
5973 skip_flush:
5974 		if (node->flush_counter < old_mod_counter) {
5975 			node->flush_counter = old_mod_counter;
5976 
5977 			if (space->is_in_unflushed_spaces
5978 			    && fil_space_is_flushed(space)) {
5979 
5980 				space->is_in_unflushed_spaces = false;
5981 
5982 				UT_LIST_REMOVE(
5983 					unflushed_spaces,
5984 					fil_system->unflushed_spaces,
5985 					space);
5986 			}
5987 		}
5988 
5989 		if (space->purpose == FIL_TABLESPACE) {
5990 			fil_n_pending_tablespace_flushes--;
5991 		} else {
5992 			fil_n_pending_log_flushes--;
5993 		}
5994 	}
5995 
5996 	space->n_pending_flushes--;
5997 
5998 	mutex_exit(&fil_system->mutex);
5999 }
6000 
6001 /**********************************************************************//**
6002 Flushes to disk the writes in file spaces of the given type possibly cached by
6003 the OS. */
6004 UNIV_INTERN
6005 void
fil_flush_file_spaces(ulint purpose)6006 fil_flush_file_spaces(
6007 /*==================*/
6008 	ulint	purpose)	/*!< in: FIL_TABLESPACE, FIL_LOG */
6009 {
6010 	fil_space_t*	space;
6011 	ulint*		space_ids;
6012 	ulint		n_space_ids;
6013 	ulint		i;
6014 
6015 	mutex_enter(&fil_system->mutex);
6016 
6017 	n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces);
6018 	if (n_space_ids == 0) {
6019 
6020 		mutex_exit(&fil_system->mutex);
6021 		return;
6022 	}
6023 
6024 	/* Assemble a list of space ids to flush.  Previously, we
6025 	traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT()
6026 	on a space that was just removed from the list by fil_flush().
6027 	Thus, the space could be dropped and the memory overwritten. */
6028 	space_ids = static_cast<ulint*>(
6029 		mem_alloc(n_space_ids * sizeof *space_ids));
6030 
6031 	n_space_ids = 0;
6032 
6033 	for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces);
6034 	     space;
6035 	     space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
6036 
6037 		if (space->purpose == purpose && !space->stop_new_ops) {
6038 
6039 			space_ids[n_space_ids++] = space->id;
6040 		}
6041 	}
6042 
6043 	mutex_exit(&fil_system->mutex);
6044 
6045 	/* Flush the spaces.  It will not hurt to call fil_flush() on
6046 	a non-existing space id. */
6047 	for (i = 0; i < n_space_ids; i++) {
6048 
6049 		fil_flush(space_ids[i]);
6050 	}
6051 
6052 	mem_free(space_ids);
6053 }
6054 
6055 /** Functor to validate the space list. */
6056 struct	Check {
operator ()Check6057 	void	operator()(const fil_node_t* elem)
6058 	{
6059 		ut_a(elem->open || !elem->n_pending);
6060 	}
6061 };
6062 
6063 /******************************************************************//**
6064 Checks the consistency of the tablespace cache.
6065 @return	TRUE if ok */
6066 UNIV_INTERN
6067 ibool
fil_validate(void)6068 fil_validate(void)
6069 /*==============*/
6070 {
6071 	fil_space_t*	space;
6072 	fil_node_t*	fil_node;
6073 	ulint		n_open		= 0;
6074 	ulint		i;
6075 
6076 	mutex_enter(&fil_system->mutex);
6077 
6078 	/* Look for spaces in the hash table */
6079 
6080 	for (i = 0; i < hash_get_n_cells(fil_system->spaces); i++) {
6081 
6082 		for (space = static_cast<fil_space_t*>(
6083 				HASH_GET_FIRST(fil_system->spaces, i));
6084 		     space != 0;
6085 		     space = static_cast<fil_space_t*>(
6086 			     	HASH_GET_NEXT(hash, space))) {
6087 
6088 			UT_LIST_VALIDATE(
6089 				chain, fil_node_t, space->chain, Check());
6090 
6091 			for (fil_node = UT_LIST_GET_FIRST(space->chain);
6092 			     fil_node != 0;
6093 			     fil_node = UT_LIST_GET_NEXT(chain, fil_node)) {
6094 
6095 				if (fil_node->n_pending > 0) {
6096 					ut_a(fil_node->open);
6097 				}
6098 
6099 				if (fil_node->open) {
6100 					n_open++;
6101 				}
6102 			}
6103 		}
6104 	}
6105 
6106 	ut_a(fil_system->n_open == n_open);
6107 
6108 	UT_LIST_CHECK(LRU, fil_node_t, fil_system->LRU);
6109 
6110 	for (fil_node = UT_LIST_GET_FIRST(fil_system->LRU);
6111 	     fil_node != 0;
6112 	     fil_node = UT_LIST_GET_NEXT(LRU, fil_node)) {
6113 
6114 		ut_a(fil_node->n_pending == 0);
6115 		ut_a(!fil_node->being_extended);
6116 		ut_a(fil_node->open);
6117 		ut_a(fil_space_belongs_in_lru(fil_node->space));
6118 	}
6119 
6120 	mutex_exit(&fil_system->mutex);
6121 
6122 	return(TRUE);
6123 }
6124 
6125 /********************************************************************//**
6126 Returns TRUE if file address is undefined.
6127 @return	TRUE if undefined */
6128 UNIV_INTERN
6129 ibool
fil_addr_is_null(fil_addr_t addr)6130 fil_addr_is_null(
6131 /*=============*/
6132 	fil_addr_t	addr)	/*!< in: address */
6133 {
6134 	return(addr.page == FIL_NULL);
6135 }
6136 
6137 /********************************************************************//**
6138 Get the predecessor of a file page.
6139 @return	FIL_PAGE_PREV */
6140 UNIV_INTERN
6141 ulint
fil_page_get_prev(const byte * page)6142 fil_page_get_prev(
6143 /*==============*/
6144 	const byte*	page)	/*!< in: file page */
6145 {
6146 	return(mach_read_from_4(page + FIL_PAGE_PREV));
6147 }
6148 
6149 /********************************************************************//**
6150 Get the successor of a file page.
6151 @return	FIL_PAGE_NEXT */
6152 UNIV_INTERN
6153 ulint
fil_page_get_next(const byte * page)6154 fil_page_get_next(
6155 /*==============*/
6156 	const byte*	page)	/*!< in: file page */
6157 {
6158 	return(mach_read_from_4(page + FIL_PAGE_NEXT));
6159 }
6160 
6161 /*********************************************************************//**
6162 Sets the file page type. */
6163 UNIV_INTERN
6164 void
fil_page_set_type(byte * page,ulint type)6165 fil_page_set_type(
6166 /*==============*/
6167 	byte*	page,	/*!< in/out: file page */
6168 	ulint	type)	/*!< in: type */
6169 {
6170 	ut_ad(page);
6171 
6172 	mach_write_to_2(page + FIL_PAGE_TYPE, type);
6173 }
6174 
6175 /*********************************************************************//**
6176 Gets the file page type.
6177 @return type; NOTE that if the type has not been written to page, the
6178 return value not defined */
6179 UNIV_INTERN
6180 ulint
fil_page_get_type(const byte * page)6181 fil_page_get_type(
6182 /*==============*/
6183 	const byte*	page)	/*!< in: file page */
6184 {
6185 	ut_ad(page);
6186 
6187 	return(mach_read_from_2(page + FIL_PAGE_TYPE));
6188 }
6189 
6190 /****************************************************************//**
6191 Closes the tablespace memory cache. */
6192 UNIV_INTERN
6193 void
fil_close(void)6194 fil_close(void)
6195 /*===========*/
6196 {
6197 #ifndef UNIV_HOTBACKUP
6198 	/* The mutex should already have been freed. */
6199 	ut_ad(fil_system->mutex.magic_n == 0);
6200 #endif /* !UNIV_HOTBACKUP */
6201 
6202 	hash_table_free(fil_system->spaces);
6203 
6204 	hash_table_free(fil_system->name_hash);
6205 
6206 	ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0);
6207 	ut_a(UT_LIST_GET_LEN(fil_system->unflushed_spaces) == 0);
6208 	ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0);
6209 
6210 	mem_free(fil_system);
6211 
6212 	fil_system = NULL;
6213 }
6214 
6215 /********************************************************************//**
6216 Initializes a buffer control block when the buf_pool is created. */
6217 static
6218 void
fil_buf_block_init(buf_block_t * block,byte * frame)6219 fil_buf_block_init(
6220 /*===============*/
6221 	buf_block_t*	block,		/*!< in: pointer to control block */
6222 	byte*		frame)		/*!< in: pointer to buffer frame */
6223 {
6224 	UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
6225 
6226 	block->frame = frame;
6227 
6228 	block->page.io_fix = BUF_IO_NONE;
6229 	/* There are assertions that check for this. */
6230 	block->page.buf_fix_count = 1;
6231 	block->page.state = BUF_BLOCK_READY_FOR_USE;
6232 
6233 	page_zip_des_init(&block->page.zip);
6234 }
6235 
6236 struct fil_iterator_t {
6237 	pfs_os_file_t	file;			/*!< File handle */
6238 	const char*	filepath;		/*!< File path name */
6239 	os_offset_t	start;			/*!< From where to start */
6240 	os_offset_t	end;			/*!< Where to stop */
6241 	os_offset_t	file_size;		/*!< File size in bytes */
6242 	ulint		page_size;		/*!< Page size */
6243 	ulint		n_io_buffers;		/*!< Number of pages to use
6244 						for IO */
6245 	byte*		io_buffer;		/*!< Buffer to use for IO */
6246 };
6247 
6248 /********************************************************************//**
6249 TODO: This can be made parallel trivially by chunking up the file and creating
6250 a callback per thread. . Main benefit will be to use multiple CPUs for
6251 checksums and compressed tables. We have to do compressed tables block by
6252 block right now. Secondly we need to decompress/compress and copy too much
6253 of data. These are CPU intensive.
6254 
6255 Iterate over all the pages in the tablespace.
6256 @param iter - Tablespace iterator
6257 @param block - block to use for IO
6258 @param callback - Callback to inspect and update page contents
6259 @retval DB_SUCCESS or error code */
6260 static
6261 dberr_t
fil_iterate(const fil_iterator_t & iter,buf_block_t * block,PageCallback & callback)6262 fil_iterate(
6263 /*========*/
6264 	const fil_iterator_t&	iter,
6265 	buf_block_t*		block,
6266 	PageCallback&		callback)
6267 {
6268 	os_offset_t		offset;
6269 	ulint			page_no = 0;
6270 	ulint			space_id = callback.get_space_id();
6271 	ulint			n_bytes = iter.n_io_buffers * iter.page_size;
6272 
6273 	ut_ad(!srv_read_only_mode);
6274 
6275 	/* TODO: For compressed tables we do a lot of useless
6276 	copying for non-index pages. Unfortunately, it is
6277 	required by buf_zip_decompress() */
6278 
6279 	for (offset = iter.start; offset < iter.end; offset += n_bytes) {
6280 
6281 		byte*		io_buffer = iter.io_buffer;
6282 
6283 		block->frame = io_buffer;
6284 
6285 		if (callback.get_zip_size() > 0) {
6286 			page_zip_des_init(&block->page.zip);
6287 			page_zip_set_size(&block->page.zip, iter.page_size);
6288 			block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
6289 			ut_d(block->page.zip.m_external = true);
6290 			ut_ad(iter.page_size == callback.get_zip_size());
6291 
6292 			/* Zip IO is done in the compressed page buffer. */
6293 			io_buffer = block->page.zip.data;
6294 		} else {
6295 			io_buffer = iter.io_buffer;
6296 		}
6297 
6298 		/* We have to read the exact number of bytes. Otherwise the
6299 		InnoDB IO functions croak on failed reads. */
6300 
6301 		n_bytes = static_cast<ulint>(
6302 			ut_min(static_cast<os_offset_t>(n_bytes),
6303 			       iter.end - offset));
6304 
6305 		ut_ad(n_bytes > 0);
6306 		ut_ad(!(n_bytes % iter.page_size));
6307 
6308 		if (!os_file_read(iter.file, io_buffer, offset,
6309 				  (ulint) n_bytes)) {
6310 
6311 			ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed");
6312 
6313 			return(DB_IO_ERROR);
6314 		}
6315 
6316 		bool		updated = false;
6317 		os_offset_t	page_off = offset;
6318 		ulint		n_pages_read = (ulint) n_bytes / iter.page_size;
6319 
6320 		for (ulint i = 0; i < n_pages_read; ++i) {
6321 
6322 			buf_block_set_file_page(block, space_id, page_no++);
6323 
6324 			dberr_t	err;
6325 
6326 			if ((err = callback(page_off, block)) != DB_SUCCESS) {
6327 
6328 				return(err);
6329 
6330 			} else if (!updated) {
6331 				updated = buf_block_get_state(block)
6332 					== BUF_BLOCK_FILE_PAGE;
6333 			}
6334 
6335 			buf_block_set_state(block, BUF_BLOCK_NOT_USED);
6336 			buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
6337 
6338 			page_off += iter.page_size;
6339 			block->frame += iter.page_size;
6340 		}
6341 
6342 		/* A page was updated in the set, write back to disk. */
6343 		if (updated
6344 		    && !os_file_write(
6345 				iter.filepath, iter.file, io_buffer,
6346 				offset, (ulint) n_bytes)) {
6347 
6348 			ib_logf(IB_LOG_LEVEL_ERROR, "os_file_write() failed");
6349 
6350 			return(DB_IO_ERROR);
6351 		}
6352 	}
6353 
6354 	return(DB_SUCCESS);
6355 }
6356 
6357 /********************************************************************//**
6358 Iterate over all the pages in the tablespace.
6359 @param table - the table definiton in the server
6360 @param n_io_buffers - number of blocks to read and write together
6361 @param callback - functor that will do the page updates
6362 @return	DB_SUCCESS or error code */
6363 UNIV_INTERN
6364 dberr_t
fil_tablespace_iterate(dict_table_t * table,ulint n_io_buffers,PageCallback & callback)6365 fil_tablespace_iterate(
6366 /*===================*/
6367 	dict_table_t*	table,
6368 	ulint		n_io_buffers,
6369 	PageCallback&	callback)
6370 {
6371 	dberr_t		err;
6372 	pfs_os_file_t	file;
6373 	char*		filepath;
6374 
6375 	ut_a(n_io_buffers > 0);
6376 	ut_ad(!srv_read_only_mode);
6377 
6378 	DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
6379 			return(DB_CORRUPTION););
6380 
6381 	if (DICT_TF_HAS_DATA_DIR(table->flags)) {
6382 		dict_get_and_save_data_dir_path(table, false);
6383 		ut_a(table->data_dir_path);
6384 
6385 		filepath = os_file_make_remote_pathname(
6386 			table->data_dir_path, table->name, "ibd");
6387 	} else {
6388 		filepath = fil_make_ibd_name(table->name, false);
6389 	}
6390 
6391 	{
6392 		ibool	success;
6393 
6394 		file = os_file_create_simple_no_error_handling(
6395 			innodb_file_data_key, filepath,
6396 			OS_FILE_OPEN, OS_FILE_READ_WRITE, &success);
6397 
6398 		DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
6399 		{
6400 			static bool once;
6401 
6402 			if (!once || ut_rnd_interval(0, 10) == 5) {
6403 				once = true;
6404 				success = FALSE;
6405 				os_file_close(file);
6406 			}
6407 		});
6408 
6409 		if (!success) {
6410 			/* The following call prints an error message */
6411 			os_file_get_last_error(true);
6412 
6413 			ib_logf(IB_LOG_LEVEL_ERROR,
6414 				"Trying to import a tablespace, but could not "
6415 				"open the tablespace file %s", filepath);
6416 
6417 			mem_free(filepath);
6418 
6419 			return(DB_TABLESPACE_NOT_FOUND);
6420 
6421 		} else {
6422 			err = DB_SUCCESS;
6423 		}
6424 	}
6425 
6426 	callback.set_file(filepath, file);
6427 
6428 	os_offset_t	file_size = os_file_get_size(file);
6429 	ut_a(file_size != (os_offset_t) -1);
6430 
6431 	/* The block we will use for every physical page */
6432 	buf_block_t	block;
6433 
6434 	memset(&block, 0x0, sizeof(block));
6435 
6436 	/* Allocate a page to read in the tablespace header, so that we
6437 	can determine the page size and zip_size (if it is compressed).
6438 	We allocate an extra page in case it is a compressed table. One
6439 	page is to ensure alignement. */
6440 
6441 	void*	page_ptr = mem_alloc(3 * UNIV_PAGE_SIZE);
6442 	byte*	page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
6443 
6444 	fil_buf_block_init(&block, page);
6445 
6446 	/* Read the first page and determine the page and zip size. */
6447 
6448 	if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE)) {
6449 
6450 		err = DB_IO_ERROR;
6451 
6452 	} else if ((err = callback.init(file_size, &block)) == DB_SUCCESS) {
6453 		fil_iterator_t	iter;
6454 
6455 		iter.file = file;
6456 		iter.start = 0;
6457 		iter.end = file_size;
6458 		iter.filepath = filepath;
6459 		iter.file_size = file_size;
6460 		iter.n_io_buffers = n_io_buffers;
6461 		iter.page_size = callback.get_page_size();
6462 
6463 		/* Compressed pages can't be optimised for block IO for now.
6464 		We do the IMPORT page by page. */
6465 
6466 		if (callback.get_zip_size() > 0) {
6467 			iter.n_io_buffers = 1;
6468 			ut_a(iter.page_size == callback.get_zip_size());
6469 		}
6470 
6471 		/** Add an extra page for compressed page scratch area. */
6472 
6473 		void*	io_buffer = mem_alloc(
6474 			(2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
6475 
6476 		iter.io_buffer = static_cast<byte*>(
6477 			ut_align(io_buffer, UNIV_PAGE_SIZE));
6478 
6479 		err = fil_iterate(iter, &block, callback);
6480 
6481 		mem_free(io_buffer);
6482 	}
6483 
6484 	if (err == DB_SUCCESS) {
6485 
6486 		ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk");
6487 
6488 		if (!os_file_flush(file)) {
6489 			ib_logf(IB_LOG_LEVEL_INFO, "os_file_flush() failed!");
6490 			err = DB_IO_ERROR;
6491 		} else {
6492 			ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk - done!");
6493 		}
6494 	}
6495 
6496 	os_file_close(file);
6497 
6498 	mem_free(page_ptr);
6499 	mem_free(filepath);
6500 
6501 	return(err);
6502 }
6503 
6504 /**
6505 Set the tablespace compressed table size.
6506 @return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
6507 dberr_t
set_zip_size(const buf_frame_t * page)6508 PageCallback::set_zip_size(const buf_frame_t* page) UNIV_NOTHROW
6509 {
6510 	m_zip_size = fsp_header_get_zip_size(page);
6511 
6512 	if (!ut_is_2pow(m_zip_size) || m_zip_size > UNIV_ZIP_SIZE_MAX) {
6513 		return(DB_CORRUPTION);
6514 	}
6515 
6516 	return(DB_SUCCESS);
6517 }
6518 
6519 /********************************************************************//**
6520 Delete the tablespace file and any related files like .cfg.
6521 This should not be called for temporary tables. */
6522 UNIV_INTERN
6523 void
fil_delete_file(const char * ibd_name)6524 fil_delete_file(
6525 /*============*/
6526 	const char*	ibd_name)	/*!< in: filepath of the ibd
6527 					tablespace */
6528 {
6529 	/* Force a delete of any stale .ibd files that are lying around. */
6530 
6531 	ib_logf(IB_LOG_LEVEL_INFO, "Deleting %s", ibd_name);
6532 
6533 	os_file_delete_if_exists(innodb_file_data_key, ibd_name);
6534 
6535 	char*	cfg_name = fil_make_cfg_name(ibd_name);
6536 
6537 	os_file_delete_if_exists(innodb_file_data_key, cfg_name);
6538 
6539 	mem_free(cfg_name);
6540 }
6541 
6542 /**
6543 Iterate over all the spaces in the space list and fetch the
6544 tablespace names. It will return a copy of the name that must be
6545 freed by the caller using: delete[].
6546 @return DB_SUCCESS if all OK. */
6547 UNIV_INTERN
6548 dberr_t
fil_get_space_names(space_name_list_t & space_name_list)6549 fil_get_space_names(
6550 /*================*/
6551 	space_name_list_t&	space_name_list)
6552 				/*!< in/out: List to append to */
6553 {
6554 	fil_space_t*	space;
6555 	dberr_t		err = DB_SUCCESS;
6556 
6557 	mutex_enter(&fil_system->mutex);
6558 
6559 	for (space = UT_LIST_GET_FIRST(fil_system->space_list);
6560 	     space != NULL;
6561 	     space = UT_LIST_GET_NEXT(space_list, space)) {
6562 
6563 		if (space->purpose == FIL_TABLESPACE) {
6564 			ulint	len;
6565 			char*	name;
6566 
6567 			len = strlen(space->name);
6568 			name = new(std::nothrow) char[len + 1];
6569 
6570 			if (name == 0) {
6571 				/* Caller to free elements allocated so far. */
6572 				err = DB_OUT_OF_MEMORY;
6573 				break;
6574 			}
6575 
6576 			memcpy(name, space->name, len);
6577 			name[len] = 0;
6578 
6579 			space_name_list.push_back(name);
6580 		}
6581 	}
6582 
6583 	mutex_exit(&fil_system->mutex);
6584 
6585 	return(err);
6586 }
6587 
6588 /** Generate redo log for swapping two .ibd files
6589 @param[in]	old_table	old table
6590 @param[in]	new_table	new table
6591 @param[in]	tmp_name	temporary table name
6592 @param[in,out]	mtr		mini-transaction
6593 @return innodb error code */
6594 UNIV_INTERN
6595 dberr_t
fil_mtr_rename_log(const dict_table_t * old_table,const dict_table_t * new_table,const char * tmp_name,mtr_t * mtr)6596 fil_mtr_rename_log(
6597 	const dict_table_t*	old_table,
6598 	const dict_table_t*	new_table,
6599 	const char*		tmp_name,
6600 	mtr_t*			mtr)
6601 {
6602 	dberr_t	err = DB_SUCCESS;
6603 	char*	old_path;
6604 
6605 	/* If neither table is file-per-table,
6606 	there will be no renaming of files. */
6607 	if (old_table->space == TRX_SYS_SPACE
6608 	    && new_table->space == TRX_SYS_SPACE) {
6609 		return(DB_SUCCESS);
6610 	}
6611 
6612 	if (DICT_TF_HAS_DATA_DIR(old_table->flags)) {
6613 		old_path = os_file_make_remote_pathname(
6614 			old_table->data_dir_path, old_table->name, "ibd");
6615 	} else {
6616 		old_path = fil_make_ibd_name(old_table->name, false);
6617 	}
6618 	if (old_path == NULL) {
6619 		return(DB_OUT_OF_MEMORY);
6620 	}
6621 
6622 	if (old_table->space != TRX_SYS_SPACE) {
6623 		char*	tmp_path;
6624 
6625 		if (DICT_TF_HAS_DATA_DIR(old_table->flags)) {
6626 			tmp_path = os_file_make_remote_pathname(
6627 				old_table->data_dir_path, tmp_name, "ibd");
6628 		}
6629 		else {
6630 			tmp_path = fil_make_ibd_name(tmp_name, false);
6631 		}
6632 
6633 		if (tmp_path == NULL) {
6634 			mem_free(old_path);
6635 			return(DB_OUT_OF_MEMORY);
6636 		}
6637 
6638 		/* Temp filepath must not exist. */
6639 		err = fil_rename_tablespace_check(
6640 			old_table->space, old_path, tmp_path,
6641 			dict_table_is_discarded(old_table));
6642 		mem_free(tmp_path);
6643 		if (err != DB_SUCCESS) {
6644 			mem_free(old_path);
6645 			return(err);
6646 		}
6647 
6648 		fil_op_write_log(MLOG_FILE_RENAME, old_table->space,
6649 				 0, 0, old_table->name, tmp_name, mtr);
6650 	}
6651 
6652 	if (new_table->space != TRX_SYS_SPACE) {
6653 
6654 		/* Destination filepath must not exist unless this ALTER
6655 		TABLE starts and ends with a file_per-table tablespace. */
6656 		if (old_table->space == TRX_SYS_SPACE) {
6657 			char*	new_path = NULL;
6658 
6659 			if (DICT_TF_HAS_DATA_DIR(new_table->flags)) {
6660 				new_path = os_file_make_remote_pathname(
6661 					new_table->data_dir_path,
6662 					new_table->name, "ibd");
6663 			}
6664 			else {
6665 				new_path = fil_make_ibd_name(
6666 					new_table->name, false);
6667 			}
6668 
6669 			if (new_path == NULL) {
6670 				mem_free(old_path);
6671 				return(DB_OUT_OF_MEMORY);
6672 			}
6673 
6674 			err = fil_rename_tablespace_check(
6675 				new_table->space, new_path, old_path,
6676 				dict_table_is_discarded(new_table));
6677 			mem_free(new_path);
6678 			if (err != DB_SUCCESS) {
6679 				mem_free(old_path);
6680 				return(err);
6681 			}
6682 		}
6683 
6684 		fil_op_write_log(MLOG_FILE_RENAME, new_table->space,
6685 				 0, 0, new_table->name, old_table->name, mtr);
6686 
6687 	}
6688 
6689 	mem_free(old_path);
6690 
6691 	return(err);
6692 }
6693