1 /*****************************************************************************
2
3 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file fil/fil0fil.cc
29 The tablespace memory cache
30
31 Created 10/25/1995 Heikki Tuuri
32 *******************************************************/
33
34 #include "fil0fil.h"
35
36 #include <debug_sync.h>
37 #include <my_dbug.h>
38
39 #include "mem0mem.h"
40 #include "hash0hash.h"
41 #include "os0file.h"
42 #include "mach0data.h"
43 #include "buf0buf.h"
44 #include "buf0flu.h"
45 #include "log0recv.h"
46 #include "fsp0fsp.h"
47 #include "srv0srv.h"
48 #include "srv0start.h"
49 #include "mtr0mtr.h"
50 #include "mtr0log.h"
51 #include "dict0dict.h"
52 #include "page0page.h"
53 #include "page0zip.h"
54 #include "trx0sys.h"
55 #include "row0mysql.h"
56 #ifndef UNIV_HOTBACKUP
57 # include "buf0lru.h"
58 # include "ibuf0ibuf.h"
59 # include "sync0sync.h"
60 # include "os0sync.h"
61 #else /* !UNIV_HOTBACKUP */
62 # include "srv0srv.h"
63 static ulint srv_data_read, srv_data_written;
64 #endif /* !UNIV_HOTBACKUP */
65
66 /*
67 IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
68 =============================================
69
70 The tablespace cache is responsible for providing fast read/write access to
71 tablespaces and logs of the database. File creation and deletion is done
72 in other modules which know more of the logic of the operation, however.
73
74 A tablespace consists of a chain of files. The size of the files does not
75 have to be divisible by the database block size, because we may just leave
76 the last incomplete block unused. When a new file is appended to the
77 tablespace, the maximum size of the file is also specified. At the moment,
78 we think that it is best to extend the file to its maximum size already at
79 the creation of the file, because then we can avoid dynamically extending
80 the file when more space is needed for the tablespace.
81
82 A block's position in the tablespace is specified with a 32-bit unsigned
83 integer. The files in the chain are thought to be catenated, and the block
84 corresponding to an address n is the nth block in the catenated file (where
85 the first block is named the 0th block, and the incomplete block fragments
86 at the end of files are not taken into account). A tablespace can be extended
87 by appending a new file at the end of the chain.
88
89 Our tablespace concept is similar to the one of Oracle.
90
91 To acquire more speed in disk transfers, a technique called disk striping is
92 sometimes used. This means that logical block addresses are divided in a
93 round-robin fashion across several disks. Windows NT supports disk striping,
94 so there we do not need to support it in the database. Disk striping is
95 implemented in hardware in RAID disks. We conclude that it is not necessary
96 to implement it in the database. Oracle 7 does not support disk striping,
97 either.
98
99 Another trick used at some database sites is replacing tablespace files by
100 raw disks, that is, the whole physical disk drive, or a partition of it, is
101 opened as a single file, and it is accessed through byte offsets calculated
102 from the start of the disk or the partition. This is recommended in some
103 books on database tuning to achieve more speed in i/o. Using raw disk
104 certainly prevents the OS from fragmenting disk space, but it is not clear
105 if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file
106 system + EIDE Conner disk only a negligible difference in speed when reading
107 from a file, versus reading from a raw disk.
108
109 To have fast access to a tablespace or a log file, we put the data structures
110 to a hash table. Each tablespace and log file is given an unique 32-bit
111 identifier.
112
113 Some operating systems do not support many open files at the same time,
114 though NT seems to tolerate at least 900 open files. Therefore, we put the
115 open files in an LRU-list. If we need to open another file, we may close the
116 file at the end of the LRU-list. When an i/o-operation is pending on a file,
117 the file cannot be closed. We take the file nodes with pending i/o-operations
118 out of the LRU-list and keep a count of pending operations. When an operation
119 completes, we decrement the count and return the file node to the LRU-list if
120 the count drops to zero. */
121
122 /** When mysqld is run, the default directory "." is the mysqld datadir,
123 but in the MySQL Embedded Server Library and mysqlbackup it is not the default
124 directory, and we must set the base file path explicitly */
125 UNIV_INTERN const char* fil_path_to_mysql_datadir = ".";
126
127 /** The number of fsyncs done to the log */
128 UNIV_INTERN ulint fil_n_log_flushes = 0;
129
130 /** Number of pending redo log flushes */
131 UNIV_INTERN ulint fil_n_pending_log_flushes = 0;
132 /** Number of pending tablespace flushes */
133 UNIV_INTERN ulint fil_n_pending_tablespace_flushes = 0;
134
135 /** Number of files currently open */
136 UNIV_INTERN ulint fil_n_file_opened = 0;
137
138 /** The null file address */
139 UNIV_INTERN fil_addr_t fil_addr_null = {FIL_NULL, 0};
140
141 #ifdef UNIV_PFS_MUTEX
142 /* Key to register fil_system_mutex with performance schema */
143 UNIV_INTERN mysql_pfs_key_t fil_system_mutex_key;
144 #endif /* UNIV_PFS_MUTEX */
145
146 #ifdef UNIV_PFS_RWLOCK
147 /* Key to register file space latch with performance schema */
148 UNIV_INTERN mysql_pfs_key_t fil_space_latch_key;
149 #endif /* UNIV_PFS_RWLOCK */
150
151 /** File node of a tablespace or the log data space */
152 struct fil_node_t {
153 fil_space_t* space; /*!< backpointer to the space where this node
154 belongs */
155 char* name; /*!< path to the file */
156 ibool open; /*!< TRUE if file open */
157 pfs_os_file_t handle; /*!< OS handle to the file, if file open */
158 os_event_t sync_event;/*!< Condition event to group and
159 serialize calls to fsync */
160 ibool is_raw_disk;/*!< TRUE if the 'file' is actually a raw
161 device or a raw disk partition */
162 ulint size; /*!< size of the file in database pages, 0 if
163 not known yet; the possible last incomplete
164 megabyte may be ignored if space == 0 */
165 ulint n_pending;
166 /*!< count of pending i/o's on this file;
167 closing of the file is not allowed if
168 this is > 0 */
169 ulint n_pending_flushes;
170 /*!< count of pending flushes on this file;
171 closing of the file is not allowed if
172 this is > 0 */
173 ibool being_extended;
174 /*!< TRUE if the node is currently
175 being extended. */
176 ib_int64_t modification_counter;/*!< when we write to the file we
177 increment this by one */
178 ib_int64_t flush_counter;/*!< up to what
179 modification_counter value we have
180 flushed the modifications to disk */
181 UT_LIST_NODE_T(fil_node_t) chain;
182 /*!< link field for the file chain */
183 UT_LIST_NODE_T(fil_node_t) LRU;
184 /*!< link field for the LRU list */
185 ulint magic_n;/*!< FIL_NODE_MAGIC_N */
186 };
187
188 /** Value of fil_node_t::magic_n */
189 #define FIL_NODE_MAGIC_N 89389
190
191 /** Tablespace or log data space: let us call them by a common name space */
192 struct fil_space_t {
193 char* name; /*!< space name = the path to the first file in
194 it */
195 ulint id; /*!< space id */
196 ib_int64_t tablespace_version;
197 /*!< in DISCARD/IMPORT this timestamp
198 is used to check if we should ignore
199 an insert buffer merge request for a
200 page because it actually was for the
201 previous incarnation of the space */
202 ibool mark; /*!< this is set to TRUE at database startup if
203 the space corresponds to a table in the InnoDB
204 data dictionary; so we can print a warning of
205 orphaned tablespaces */
206 ibool stop_ios;/*!< TRUE if we want to rename the
207 .ibd file of tablespace and want to
208 stop temporarily posting of new i/o
209 requests on the file */
210 ibool stop_new_ops;
211 /*!< we set this TRUE when we start
212 deleting a single-table tablespace.
213 When this is set following new ops
214 are not allowed:
215 * read IO request
216 * ibuf merge
217 * file flush
218 Note that we can still possibly have
219 new write operations because we don't
220 check this flag when doing flush
221 batches. */
222 ulint purpose;/*!< FIL_TABLESPACE, FIL_LOG, or
223 FIL_ARCH_LOG */
224 UT_LIST_BASE_NODE_T(fil_node_t) chain;
225 /*!< base node for the file chain */
226 ulint size; /*!< space size in pages; 0 if a single-table
227 tablespace whose size we do not know yet;
228 last incomplete megabytes in data files may be
229 ignored if space == 0 */
230 ulint flags; /*!< tablespace flags; see
231 fsp_flags_is_valid(),
232 fsp_flags_get_zip_size() */
233 ulint n_reserved_extents;
234 /*!< number of reserved free extents for
235 ongoing operations like B-tree page split */
236 ulint n_pending_flushes; /*!< this is positive when flushing
237 the tablespace to disk; dropping of the
238 tablespace is forbidden if this is positive */
239 ulint n_pending_ops;/*!< this is positive when we
240 have pending operations against this
241 tablespace. The pending operations can
242 be ibuf merges or lock validation code
243 trying to read a block.
244 Dropping of the tablespace is forbidden
245 if this is positive */
246 hash_node_t hash; /*!< hash chain node */
247 hash_node_t name_hash;/*!< hash chain the name_hash table */
248 #ifndef UNIV_HOTBACKUP
249 prio_rw_lock_t latch; /*!< latch protecting the file space storage
250 allocation */
251 #endif /* !UNIV_HOTBACKUP */
252 UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
253 /*!< list of spaces with at least one unflushed
254 file we have written to */
255 bool is_in_unflushed_spaces;
256 /*!< true if this space is currently in
257 unflushed_spaces */
258 ibool is_corrupt;
259 UT_LIST_NODE_T(fil_space_t) space_list;
260 /*!< list of all spaces */
261 ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
262 };
263
264 /** Value of fil_space_t::magic_n */
265 #define FIL_SPACE_MAGIC_N 89472
266
267 /** The tablespace memory cache; also the totality of logs (the log
268 data space) is stored here; below we talk about tablespaces, but also
269 the ib_logfiles form a 'space' and it is handled here */
270 struct fil_system_t {
271 #ifndef UNIV_HOTBACKUP
272 ib_mutex_t mutex; /*!< The mutex protecting the cache */
273 #endif /* !UNIV_HOTBACKUP */
274 hash_table_t* spaces; /*!< The hash table of spaces in the
275 system; they are hashed on the space
276 id */
277 hash_table_t* name_hash; /*!< hash table based on the space
278 name */
279 UT_LIST_BASE_NODE_T(fil_node_t) LRU;
280 /*!< base node for the LRU list of the
281 most recently used open files with no
282 pending i/o's; if we start an i/o on
283 the file, we first remove it from this
284 list, and return it to the start of
285 the list when the i/o ends;
286 log files and the system tablespace are
287 not put to this list: they are opened
288 after the startup, and kept open until
289 shutdown */
290 UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;
291 /*!< base node for the list of those
292 tablespaces whose files contain
293 unflushed writes; those spaces have
294 at least one file node where
295 modification_counter > flush_counter */
296 ulint n_open; /*!< number of files currently open */
297 ulint max_n_open; /*!< n_open is not allowed to exceed
298 this */
299 ib_int64_t modification_counter;/*!< when we write to a file we
300 increment this by one */
301 ulint max_assigned_id;/*!< maximum space id in the existing
302 tables, or assigned during the time
303 mysqld has been up; at an InnoDB
304 startup we scan the data dictionary
305 and set here the maximum of the
306 space id's of the tables there */
307 ib_int64_t tablespace_version;
308 /*!< a counter which is incremented for
309 every space object memory creation;
310 every space mem object gets a
311 'timestamp' from this; in DISCARD/
312 IMPORT this is used to check if we
313 should ignore an insert buffer merge
314 request */
315 UT_LIST_BASE_NODE_T(fil_space_t) space_list;
316 /*!< list of all file spaces */
317 ibool space_id_reuse_warned;
318 /* !< TRUE if fil_space_create()
319 has issued a warning about
320 potential space_id reuse */
321 };
322
323 /** The tablespace memory cache. This variable is NULL before the module is
324 initialized. */
325 static fil_system_t* fil_system = NULL;
326
327 /** Determine if (i) is a user tablespace id or not. */
328 # define fil_is_user_tablespace_id(i) (i != 0 \
329 && !srv_is_undo_tablespace(i))
330
331 /** Determine if user has explicitly disabled fsync(). */
332 #ifndef __WIN__
333 # define fil_buffering_disabled(s) \
334 (((s)->purpose == FIL_TABLESPACE \
335 && srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)\
336 || ((s)->purpose == FIL_LOG \
337 && srv_unix_file_flush_method == SRV_UNIX_ALL_O_DIRECT))
338
339 #else /* __WIN__ */
340 # define fil_buffering_disabled(s) (0)
341 #endif /* __WIN__ */
342
343 #ifdef UNIV_DEBUG
344 /** Try fil_validate() every this many times */
345 # define FIL_VALIDATE_SKIP 17
346
347 /******************************************************************//**
348 Checks the consistency of the tablespace cache some of the time.
349 @return TRUE if ok or the check was skipped */
350 static
351 ibool
fil_validate_skip(void)352 fil_validate_skip(void)
353 /*===================*/
354 {
355 /** The fil_validate() call skip counter. Use a signed type
356 because of the race condition below. */
357 static int fil_validate_count = FIL_VALIDATE_SKIP;
358
359 /* There is a race condition below, but it does not matter,
360 because this call is only for heuristic purposes. We want to
361 reduce the call frequency of the costly fil_validate() check
362 in debug builds. */
363 if (--fil_validate_count > 0) {
364 return(TRUE);
365 }
366
367 fil_validate_count = FIL_VALIDATE_SKIP;
368 return(fil_validate());
369 }
370 #endif /* UNIV_DEBUG */
371
372 /********************************************************************//**
373 Determines if a file node belongs to the least-recently-used list.
374 @return TRUE if the file belongs to fil_system->LRU mutex. */
375 UNIV_INLINE
376 ibool
fil_space_belongs_in_lru(const fil_space_t * space)377 fil_space_belongs_in_lru(
378 /*=====================*/
379 const fil_space_t* space) /*!< in: file space */
380 {
381 return(space->purpose == FIL_TABLESPACE
382 && fil_is_user_tablespace_id(space->id));
383 }
384
385 /********************************************************************//**
386 NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
387
388 Prepares a file node for i/o. Opens the file if it is closed. Updates the
389 pending i/o's field in the node and the system appropriately. Takes the node
390 off the LRU list if it is in the LRU list. The caller must hold the fil_sys
391 mutex.
392 @return false if the file can't be opened, otherwise true */
393 static
394 bool
395 fil_node_prepare_for_io(
396 /*====================*/
397 fil_node_t* node, /*!< in: file node */
398 fil_system_t* system, /*!< in: tablespace memory cache */
399 fil_space_t* space); /*!< in: space */
400 /********************************************************************//**
401 Updates the data structures when an i/o operation finishes. Updates the
402 pending i/o's field in the node appropriately. */
403 static
404 void
405 fil_node_complete_io(
406 /*=================*/
407 fil_node_t* node, /*!< in: file node */
408 fil_system_t* system, /*!< in: tablespace memory cache */
409 ulint type); /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
410 the node as modified if
411 type == OS_FILE_WRITE */
412 /*******************************************************************//**
413 Frees a space object from the tablespace memory cache. Closes the files in
414 the chain but does not delete them. There must not be any pending i/o's or
415 flushes on the files.
416 @return TRUE on success */
417 static
418 ibool
419 fil_space_free(
420 /*===========*/
421 ulint id, /* in: space id */
422 ibool x_latched); /* in: TRUE if caller has space->latch
423 in X mode */
424 /********************************************************************//**
425 Reads data from a space to a buffer. Remember that the possible incomplete
426 blocks at the end of file are ignored: they are not taken into account when
427 calculating the byte offset within a space.
428 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
429 i/o on a tablespace which does not exist */
430 UNIV_INLINE
431 dberr_t
fil_read(bool sync,ulint space_id,ulint zip_size,ulint block_offset,ulint byte_offset,ulint len,void * buf,void * message)432 fil_read(
433 /*=====*/
434 bool sync, /*!< in: true if synchronous aio is desired */
435 ulint space_id, /*!< in: space id */
436 ulint zip_size, /*!< in: compressed page size in bytes;
437 0 for uncompressed pages */
438 ulint block_offset, /*!< in: offset in number of blocks */
439 ulint byte_offset, /*!< in: remainder of offset in bytes; in aio
440 this must be divisible by the OS block size */
441 ulint len, /*!< in: how many bytes to read; this must not
442 cross a file boundary; in aio this must be a
443 block size multiple */
444 void* buf, /*!< in/out: buffer where to store data read;
445 in aio this must be appropriately aligned */
446 void* message) /*!< in: message for aio handler if non-sync
447 aio used, else ignored */
448 {
449 return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
450 byte_offset, len, buf, message));
451 }
452
453 /********************************************************************//**
454 Writes data to a space from a buffer. Remember that the possible incomplete
455 blocks at the end of file are ignored: they are not taken into account when
456 calculating the byte offset within a space.
457 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
458 i/o on a tablespace which does not exist */
459 UNIV_INLINE
460 dberr_t
fil_write(bool sync,ulint space_id,ulint zip_size,ulint block_offset,ulint byte_offset,ulint len,void * buf,void * message)461 fil_write(
462 /*======*/
463 bool sync, /*!< in: true if synchronous aio is desired */
464 ulint space_id, /*!< in: space id */
465 ulint zip_size, /*!< in: compressed page size in bytes;
466 0 for uncompressed pages */
467 ulint block_offset, /*!< in: offset in number of blocks */
468 ulint byte_offset, /*!< in: remainder of offset in bytes; in aio
469 this must be divisible by the OS block size */
470 ulint len, /*!< in: how many bytes to write; this must
471 not cross a file boundary; in aio this must
472 be a block size multiple */
473 void* buf, /*!< in: buffer from which to write; in aio
474 this must be appropriately aligned */
475 void* message) /*!< in: message for aio handler if non-sync
476 aio used, else ignored */
477 {
478 ut_ad(!srv_read_only_mode);
479
480 return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
481 byte_offset, len, buf, message));
482 }
483
484 /*******************************************************************//**
485 Returns the table space by a given id, NULL if not found. */
486 UNIV_INLINE
487 fil_space_t*
fil_space_get_by_id(ulint id)488 fil_space_get_by_id(
489 /*================*/
490 ulint id) /*!< in: space id */
491 {
492 fil_space_t* space;
493
494 ut_ad(mutex_own(&fil_system->mutex));
495
496 HASH_SEARCH(hash, fil_system->spaces, id,
497 fil_space_t*, space,
498 ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
499 space->id == id);
500
501 /* The system tablespace must always be found */
502 ut_ad(space || id != 0 || srv_is_being_started);
503 return(space);
504 }
505
506 /*******************************************************************//**
507 Returns the table space by a given name, NULL if not found. */
508 UNIV_INLINE
509 fil_space_t*
fil_space_get_by_name(const char * name)510 fil_space_get_by_name(
511 /*==================*/
512 const char* name) /*!< in: space name */
513 {
514 fil_space_t* space;
515 ulint fold;
516
517 ut_ad(mutex_own(&fil_system->mutex));
518
519 fold = ut_fold_string(name);
520
521 HASH_SEARCH(name_hash, fil_system->name_hash, fold,
522 fil_space_t*, space,
523 ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
524 !strcmp(name, space->name));
525
526 return(space);
527 }
528
529 #ifndef UNIV_HOTBACKUP
530 /*******************************************************************//**
531 Returns the version number of a tablespace, -1 if not found.
532 @return version number, -1 if the tablespace does not exist in the
533 memory cache */
534 UNIV_INTERN
535 ib_int64_t
fil_space_get_version(ulint id)536 fil_space_get_version(
537 /*==================*/
538 ulint id) /*!< in: space id */
539 {
540 fil_space_t* space;
541 ib_int64_t version = -1;
542
543 ut_ad(fil_system);
544
545 mutex_enter(&fil_system->mutex);
546
547 space = fil_space_get_by_id(id);
548
549 if (space) {
550 version = space->tablespace_version;
551 }
552
553 mutex_exit(&fil_system->mutex);
554
555 return(version);
556 }
557
558 /*******************************************************************//**
559 Returns the latch of a file space.
560 @return latch protecting storage allocation */
561 UNIV_INTERN
562 prio_rw_lock_t*
fil_space_get_latch(ulint id,ulint * flags)563 fil_space_get_latch(
564 /*================*/
565 ulint id, /*!< in: space id */
566 ulint* flags) /*!< out: tablespace flags */
567 {
568 fil_space_t* space;
569
570 ut_ad(fil_system);
571
572 mutex_enter(&fil_system->mutex);
573
574 space = fil_space_get_by_id(id);
575
576 ut_a(space);
577
578 if (flags) {
579 *flags = space->flags;
580 }
581
582 mutex_exit(&fil_system->mutex);
583
584 return(&(space->latch));
585 }
586
587 /*******************************************************************//**
588 Returns the type of a file space.
589 @return FIL_TABLESPACE or FIL_LOG */
590 UNIV_INTERN
591 ulint
fil_space_get_type(ulint id)592 fil_space_get_type(
593 /*===============*/
594 ulint id) /*!< in: space id */
595 {
596 fil_space_t* space;
597
598 ut_ad(fil_system);
599
600 mutex_enter(&fil_system->mutex);
601
602 space = fil_space_get_by_id(id);
603
604 ut_a(space);
605
606 mutex_exit(&fil_system->mutex);
607
608 return(space->purpose);
609 }
610 #endif /* !UNIV_HOTBACKUP */
611
612 /**********************************************************************//**
613 Checks if all the file nodes in a space are flushed. The caller must hold
614 the fil_system mutex.
615 @return true if all are flushed */
616 static
617 bool
fil_space_is_flushed(fil_space_t * space)618 fil_space_is_flushed(
619 /*=================*/
620 fil_space_t* space) /*!< in: space */
621 {
622 fil_node_t* node;
623
624 ut_ad(mutex_own(&fil_system->mutex));
625
626 node = UT_LIST_GET_FIRST(space->chain);
627
628 while (node) {
629 if (node->modification_counter > node->flush_counter) {
630
631 ut_ad(!fil_buffering_disabled(space));
632 return(false);
633 }
634
635 node = UT_LIST_GET_NEXT(chain, node);
636 }
637
638 return(true);
639 }
640
641 /*******************************************************************//**
642 Appends a new file to the chain of files of a space. File must be closed.
643 @return pointer to the file name, or NULL on error */
644 UNIV_INTERN
645 char*
fil_node_create(const char * name,ulint size,ulint id,ibool is_raw)646 fil_node_create(
647 /*============*/
648 const char* name, /*!< in: file name (file must be closed) */
649 ulint size, /*!< in: file size in database blocks, rounded
650 downwards to an integer */
651 ulint id, /*!< in: space id where to append */
652 ibool is_raw) /*!< in: TRUE if a raw device or
653 a raw disk partition */
654 {
655 fil_node_t* node;
656 fil_space_t* space;
657
658 ut_a(fil_system);
659 ut_a(name);
660
661 mutex_enter(&fil_system->mutex);
662
663 node = static_cast<fil_node_t*>(mem_zalloc(sizeof(fil_node_t)));
664
665 node->name = mem_strdup(name);
666
667 ut_a(!is_raw || srv_start_raw_disk_in_use);
668
669 node->sync_event = os_event_create();
670 node->is_raw_disk = is_raw;
671 node->size = size;
672 node->magic_n = FIL_NODE_MAGIC_N;
673
674 space = fil_space_get_by_id(id);
675
676 if (!space) {
677 ut_print_timestamp(stderr);
678 fprintf(stderr,
679 " InnoDB: Error: Could not find tablespace %lu for\n"
680 "InnoDB: file ", (ulong) id);
681 ut_print_filename(stderr, name);
682 fputs(" in the tablespace memory cache.\n", stderr);
683 mem_free(node->name);
684
685 mem_free(node);
686
687 mutex_exit(&fil_system->mutex);
688
689 return(NULL);
690 }
691
692 space->size += size;
693
694 node->space = space;
695
696 UT_LIST_ADD_LAST(chain, space->chain, node);
697
698 if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
699
700 fil_system->max_assigned_id = id;
701 }
702
703 mutex_exit(&fil_system->mutex);
704
705 return(node->name);
706 }
707
708 /********************************************************************//**
709 Opens a file of a node of a tablespace. The caller must own the fil_system
710 mutex.
711 @return false if the file can't be opened, otherwise true */
712 static
713 bool
fil_node_open_file(fil_node_t * node,fil_system_t * system,fil_space_t * space)714 fil_node_open_file(
715 /*===============*/
716 fil_node_t* node, /*!< in: file node */
717 fil_system_t* system, /*!< in: tablespace memory cache */
718 fil_space_t* space) /*!< in: space */
719 {
720 os_offset_t size_bytes;
721 ibool ret;
722 ibool success;
723 byte* buf2;
724 byte* page;
725 ulint space_id;
726 ulint flags;
727 ulint page_size;
728
729 ut_ad(mutex_own(&(system->mutex)));
730 ut_a(node->n_pending == 0);
731 ut_a(node->open == FALSE);
732
733 if (node->size == 0) {
734 /* It must be a single-table tablespace and we do not know the
735 size of the file yet. First we open the file in the normal
736 mode, no async I/O here, for simplicity. Then do some checks,
737 and close the file again.
738 NOTE that we could not use the simple file read function
739 os_file_read() in Windows to read from a file opened for
740 async I/O! */
741
742 node->handle = os_file_create_simple_no_error_handling(
743 innodb_file_data_key, node->name, OS_FILE_OPEN,
744 OS_FILE_READ_ONLY, &success);
745 if (!success) {
746 /* The following call prints an error message */
747 os_file_get_last_error(true);
748
749 ut_print_timestamp(stderr);
750
751 ib_logf(IB_LOG_LEVEL_WARN, "InnoDB: Error: cannot "
752 "open %s\n. InnoDB: Have you deleted .ibd "
753 "files under a running mysqld server?\n",
754 node->name);
755
756 return(false);
757 }
758
759 size_bytes = os_file_get_size(node->handle);
760 ut_a(size_bytes != (os_offset_t) -1);
761 #ifdef UNIV_HOTBACKUP
762 if (space->id == 0) {
763 node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
764 os_file_close(node->handle);
765 goto add_size;
766 }
767 #endif /* UNIV_HOTBACKUP */
768 ut_a(space->purpose != FIL_LOG);
769 ut_a(fil_is_user_tablespace_id(space->id));
770
771 if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
772 fprintf(stderr,
773 "InnoDB: Error: the size of single-table"
774 " tablespace file %s\n"
775 "InnoDB: is only " UINT64PF ","
776 " should be at least %lu!\n",
777 node->name,
778 size_bytes,
779 (ulong) (FIL_IBD_FILE_INITIAL_SIZE
780 * UNIV_PAGE_SIZE));
781
782 ut_a(0);
783 }
784
785 /* Read the first page of the tablespace */
786
787 buf2 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
788 /* Align the memory for file i/o if we might have O_DIRECT
789 set */
790 page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
791
792 success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE);
793 space_id = fsp_header_get_space_id(page);
794 flags = fsp_header_get_flags(page);
795 page_size = fsp_flags_get_page_size(flags);
796
797 ut_free(buf2);
798
799 /* Close the file now that we have read the space id from it */
800
801 os_file_close(node->handle);
802
803 if (UNIV_UNLIKELY(space_id != space->id)) {
804 fprintf(stderr,
805 "InnoDB: Error: tablespace id is %lu"
806 " in the data dictionary\n"
807 "InnoDB: but in file %s it is %lu!\n",
808 space->id, node->name, space_id);
809
810 ut_error;
811 }
812
813 if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED
814 || space_id == 0)) {
815 fprintf(stderr,
816 "InnoDB: Error: tablespace id %lu"
817 " in file %s is not sensible\n",
818 (ulong) space_id, node->name);
819
820 ut_error;
821 }
822
823 if (UNIV_UNLIKELY(fsp_flags_get_page_size(space->flags)
824 != page_size)) {
825 fprintf(stderr,
826 "InnoDB: Error: tablespace file %s"
827 " has page size 0x%lx\n"
828 "InnoDB: but the data dictionary"
829 " expects page size 0x%lx!\n",
830 node->name, flags,
831 fsp_flags_get_page_size(space->flags));
832
833 ut_error;
834 }
835
836 if (UNIV_UNLIKELY(space->flags != flags)) {
837 fprintf(stderr,
838 "InnoDB: Error: table flags are 0x%lx"
839 " in the data dictionary\n"
840 "InnoDB: but the flags in file %s are 0x%lx!\n",
841 space->flags, node->name, flags);
842
843 ut_error;
844 }
845
846 if (size_bytes >= 1024 * 1024) {
847 /* Truncate the size to whole megabytes. */
848 size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
849 }
850
851 if (!fsp_flags_is_compressed(flags)) {
852 node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
853 } else {
854 node->size = (ulint)
855 (size_bytes
856 / fsp_flags_get_zip_size(flags));
857 }
858
859 #ifdef UNIV_HOTBACKUP
860 add_size:
861 #endif /* UNIV_HOTBACKUP */
862 space->size += node->size;
863 }
864
865 /* printf("Opening file %s\n", node->name); */
866
867 /* Open the file for reading and writing, in Windows normally in the
868 unbuffered async I/O mode, though global variables may make
869 os_file_create() to fall back to the normal file I/O mode. */
870
871 if (space->purpose == FIL_LOG) {
872 node->handle = os_file_create(innodb_file_log_key,
873 node->name, OS_FILE_OPEN,
874 OS_FILE_AIO, OS_LOG_FILE,
875 &ret);
876 } else if (node->is_raw_disk) {
877 node->handle = os_file_create(innodb_file_data_key,
878 node->name,
879 OS_FILE_OPEN_RAW,
880 OS_FILE_AIO, OS_DATA_FILE,
881 &ret);
882 } else {
883 node->handle = os_file_create(innodb_file_data_key,
884 node->name, OS_FILE_OPEN,
885 OS_FILE_AIO, OS_DATA_FILE,
886 &ret);
887 }
888
889 ut_a(ret);
890
891 node->open = TRUE;
892
893 system->n_open++;
894 fil_n_file_opened++;
895
896 if (fil_space_belongs_in_lru(space)) {
897
898 /* Put the node to the LRU list */
899 UT_LIST_ADD_FIRST(LRU, system->LRU, node);
900 }
901
902 return(true);
903 }
904
905 /**********************************************************************//**
906 Closes a file. */
907 static
908 void
fil_node_close_file(fil_node_t * node,fil_system_t * system)909 fil_node_close_file(
910 /*================*/
911 fil_node_t* node, /*!< in: file node */
912 fil_system_t* system) /*!< in: tablespace memory cache */
913 {
914 ibool ret;
915
916 ut_ad(node && system);
917 ut_ad(mutex_own(&(system->mutex)));
918 ut_a(node->open);
919 ut_a(node->n_pending == 0);
920 ut_a(node->n_pending_flushes == 0);
921 ut_a(!node->being_extended);
922 #ifndef UNIV_HOTBACKUP
923 ut_a(node->modification_counter == node->flush_counter
924 || srv_fast_shutdown == 2);
925 #endif /* !UNIV_HOTBACKUP */
926
927 ret = os_file_close(node->handle);
928 ut_a(ret);
929
930 /* printf("Closing file %s\n", node->name); */
931
932 node->open = FALSE;
933 ut_a(system->n_open > 0);
934 system->n_open--;
935 fil_n_file_opened--;
936
937 if (fil_space_belongs_in_lru(node->space)) {
938
939 ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
940
941 /* The node is in the LRU list, remove it */
942 UT_LIST_REMOVE(LRU, system->LRU, node);
943 }
944 }
945
946 /********************************************************************//**
947 Tries to close a file in the LRU list. The caller must hold the fil_sys
948 mutex.
949 @return TRUE if success, FALSE if should retry later; since i/o's
950 generally complete in < 100 ms, and as InnoDB writes at most 128 pages
951 from the buffer pool in a batch, and then immediately flushes the
952 files, there is a good chance that the next time we find a suitable
953 node from the LRU list */
954 static
955 ibool
fil_try_to_close_file_in_LRU(ibool print_info)956 fil_try_to_close_file_in_LRU(
957 /*=========================*/
958 ibool print_info) /*!< in: if TRUE, prints information why it
959 cannot close a file */
960 {
961 fil_node_t* node;
962
963 ut_ad(mutex_own(&fil_system->mutex));
964
965 if (print_info) {
966 fprintf(stderr,
967 "InnoDB: fil_sys open file LRU len %lu\n",
968 (ulong) UT_LIST_GET_LEN(fil_system->LRU));
969 }
970
971 for (node = UT_LIST_GET_LAST(fil_system->LRU);
972 node != NULL;
973 node = UT_LIST_GET_PREV(LRU, node)) {
974
975 if (node->modification_counter == node->flush_counter
976 && node->n_pending_flushes == 0
977 && !node->being_extended) {
978
979 fil_node_close_file(node, fil_system);
980
981 return(TRUE);
982 }
983
984 if (!print_info) {
985 continue;
986 }
987
988 if (node->n_pending_flushes > 0) {
989 fputs("InnoDB: cannot close file ", stderr);
990 ut_print_filename(stderr, node->name);
991 fprintf(stderr, ", because n_pending_flushes %lu\n",
992 (ulong) node->n_pending_flushes);
993 }
994
995 if (node->modification_counter != node->flush_counter) {
996 fputs("InnoDB: cannot close file ", stderr);
997 ut_print_filename(stderr, node->name);
998 fprintf(stderr,
999 ", because mod_count %ld != fl_count %ld\n",
1000 (long) node->modification_counter,
1001 (long) node->flush_counter);
1002
1003 }
1004
1005 if (node->being_extended) {
1006 fputs("InnoDB: cannot close file ", stderr);
1007 ut_print_filename(stderr, node->name);
1008 fprintf(stderr, ", because it is being extended\n");
1009 }
1010 }
1011
1012 return(FALSE);
1013 }
1014
1015 /*******************************************************************//**
1016 Reserves the fil_system mutex and tries to make sure we can open at least one
1017 file while holding it. This should be called before calling
1018 fil_node_prepare_for_io(), because that function may need to open a file. */
1019 static
1020 void
fil_mutex_enter_and_prepare_for_io(ulint space_id)1021 fil_mutex_enter_and_prepare_for_io(
1022 /*===============================*/
1023 ulint space_id) /*!< in: space id */
1024 {
1025 fil_space_t* space;
1026 ibool success;
1027 ibool print_info = FALSE;
1028 ulint count = 0;
1029 ulint count2 = 0;
1030
1031 retry:
1032 mutex_enter(&fil_system->mutex);
1033
1034 if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
1035 /* We keep log files and system tablespace files always open;
1036 this is important in preventing deadlocks in this module, as
1037 a page read completion often performs another read from the
1038 insert buffer. The insert buffer is in tablespace 0, and we
1039 cannot end up waiting in this function. */
1040
1041 return;
1042 }
1043
1044 space = fil_space_get_by_id(space_id);
1045
1046 if (space != NULL && space->stop_ios) {
1047 /* We are going to do a rename file and want to stop new i/o's
1048 for a while */
1049
1050 if (count2 > 20000) {
1051 fputs("InnoDB: Warning: tablespace ", stderr);
1052 ut_print_filename(stderr, space->name);
1053 fprintf(stderr,
1054 " has i/o ops stopped for a long time %lu\n",
1055 (ulong) count2);
1056 }
1057
1058 mutex_exit(&fil_system->mutex);
1059
1060 #ifndef UNIV_HOTBACKUP
1061
1062 /* Wake the i/o-handler threads to make sure pending
1063 i/o's are performed */
1064 os_aio_simulated_wake_handler_threads();
1065
1066 /* The sleep here is just to give IO helper threads a
1067 bit of time to do some work. It is not required that
1068 all IO related to the tablespace being renamed must
1069 be flushed here as we do fil_flush() in
1070 fil_rename_tablespace() as well. */
1071 os_thread_sleep(20000);
1072
1073 #endif /* UNIV_HOTBACKUP */
1074
1075 /* Flush tablespaces so that we can close modified
1076 files in the LRU list */
1077 fil_flush_file_spaces(FIL_TABLESPACE);
1078
1079 os_thread_sleep(20000);
1080
1081 count2++;
1082
1083 goto retry;
1084 }
1085
1086 if (fil_system->n_open < fil_system->max_n_open) {
1087
1088 return;
1089 }
1090
1091 /* If the file is already open, no need to do anything; if the space
1092 does not exist, we handle the situation in the function which called
1093 this function */
1094
1095 if (!space || UT_LIST_GET_FIRST(space->chain)->open) {
1096
1097 return;
1098 }
1099
1100 if (count > 1) {
1101 print_info = TRUE;
1102 }
1103
1104 /* Too many files are open, try to close some */
1105 close_more:
1106 success = fil_try_to_close_file_in_LRU(print_info);
1107
1108 if (success && fil_system->n_open >= fil_system->max_n_open) {
1109
1110 goto close_more;
1111 }
1112
1113 if (fil_system->n_open < fil_system->max_n_open) {
1114 /* Ok */
1115
1116 return;
1117 }
1118
1119 if (count >= 2) {
1120 ut_print_timestamp(stderr);
1121 fprintf(stderr,
1122 " InnoDB: Warning: too many (%lu) files stay open"
1123 " while the maximum\n"
1124 "InnoDB: allowed value would be %lu.\n"
1125 "InnoDB: You may need to raise the value of"
1126 " innodb_open_files in\n"
1127 "InnoDB: my.cnf.\n",
1128 (ulong) fil_system->n_open,
1129 (ulong) fil_system->max_n_open);
1130
1131 return;
1132 }
1133
1134 mutex_exit(&fil_system->mutex);
1135
1136 #ifndef UNIV_HOTBACKUP
1137 /* Wake the i/o-handler threads to make sure pending i/o's are
1138 performed */
1139 os_aio_simulated_wake_handler_threads();
1140
1141 os_thread_sleep(20000);
1142 #endif
1143 /* Flush tablespaces so that we can close modified files in the LRU
1144 list */
1145
1146 fil_flush_file_spaces(FIL_TABLESPACE);
1147
1148 count++;
1149
1150 goto retry;
1151 }
1152
1153 /*******************************************************************//**
1154 Frees a file node object from a tablespace memory cache. */
1155 static
1156 void
fil_node_free(fil_node_t * node,fil_system_t * system,fil_space_t * space)1157 fil_node_free(
1158 /*==========*/
1159 fil_node_t* node, /*!< in, own: file node */
1160 fil_system_t* system, /*!< in: tablespace memory cache */
1161 fil_space_t* space) /*!< in: space where the file node is chained */
1162 {
1163 ut_ad(node && system && space);
1164 ut_ad(mutex_own(&(system->mutex)));
1165 ut_a(node->magic_n == FIL_NODE_MAGIC_N);
1166 ut_a(node->n_pending == 0);
1167 ut_a(!node->being_extended);
1168
1169 if (node->open) {
1170 /* We fool the assertion in fil_node_close_file() to think
1171 there are no unflushed modifications in the file */
1172
1173 node->modification_counter = node->flush_counter;
1174 os_event_set(node->sync_event);
1175
1176 if (fil_buffering_disabled(space)) {
1177
1178 ut_ad(!space->is_in_unflushed_spaces);
1179 ut_ad(fil_space_is_flushed(space));
1180
1181 } else if (space->is_in_unflushed_spaces
1182 && fil_space_is_flushed(space)) {
1183
1184 space->is_in_unflushed_spaces = false;
1185
1186 UT_LIST_REMOVE(unflushed_spaces,
1187 system->unflushed_spaces,
1188 space);
1189 }
1190
1191 fil_node_close_file(node, system);
1192 }
1193
1194 space->size -= node->size;
1195
1196 UT_LIST_REMOVE(chain, space->chain, node);
1197
1198 os_event_free(node->sync_event);
1199 mem_free(node->name);
1200 mem_free(node);
1201 }
1202
1203 #ifdef UNIV_LOG_ARCHIVE
1204 /****************************************************************//**
1205 Drops files from the start of a file space, so that its size is cut by
1206 the amount given. */
1207 UNIV_INTERN
1208 void
fil_space_truncate_start(ulint id,ulint trunc_len)1209 fil_space_truncate_start(
1210 /*=====================*/
1211 ulint id, /*!< in: space id */
1212 ulint trunc_len) /*!< in: truncate by this much; it is an error
1213 if this does not equal to the combined size of
1214 some initial files in the space */
1215 {
1216 fil_node_t* node;
1217 fil_space_t* space;
1218
1219 mutex_enter(&fil_system->mutex);
1220
1221 space = fil_space_get_by_id(id);
1222
1223 ut_a(space);
1224
1225 while (trunc_len > 0) {
1226 node = UT_LIST_GET_FIRST(space->chain);
1227
1228 ut_a(node->size * UNIV_PAGE_SIZE <= trunc_len);
1229
1230 trunc_len -= node->size * UNIV_PAGE_SIZE;
1231
1232 fil_node_free(node, fil_system, space);
1233 }
1234
1235 mutex_exit(&fil_system->mutex);
1236 }
1237
1238 /****************************************************************//**
1239 Check is there node in file space with given name. */
1240 UNIV_INTERN
1241 ibool
fil_space_contains_node(ulint id,char * node_name)1242 fil_space_contains_node(
1243 /*====================*/
1244 ulint id, /*!< in: space id */
1245 char* node_name) /*!< in: node name */
1246 {
1247 fil_node_t* node;
1248 fil_space_t* space;
1249
1250 mutex_enter(&fil_system->mutex);
1251
1252 space = fil_space_get_by_id(id);
1253
1254 ut_a(space);
1255
1256 for (node = UT_LIST_GET_FIRST(space->chain); node != NULL;
1257 node = UT_LIST_GET_NEXT(chain, node)) {
1258
1259 if (ut_strcmp(node->name, node_name) == 0) {
1260 mutex_exit(&fil_system->mutex);
1261 return(TRUE);
1262 }
1263
1264 }
1265
1266 mutex_exit(&fil_system->mutex);
1267 return(FALSE);
1268 }
1269
1270 #endif /* UNIV_LOG_ARCHIVE */
1271
1272 /*******************************************************************//**
1273 Creates a space memory object and puts it to the 'fil system' hash table.
1274 If there is an error, prints an error message to the .err log.
1275 @return TRUE if success */
1276 UNIV_INTERN
1277 ibool
fil_space_create(const char * name,ulint id,ulint flags,ulint purpose)1278 fil_space_create(
1279 /*=============*/
1280 const char* name, /*!< in: space name */
1281 ulint id, /*!< in: space id */
1282 ulint flags, /*!< in: tablespace flags */
1283 ulint purpose)/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
1284 {
1285 fil_space_t* space;
1286
1287 DBUG_EXECUTE_IF("fil_space_create_failure", return(false););
1288
1289 ut_a(fil_system);
1290 ut_a(fsp_flags_is_valid(flags));
1291
1292 /* Look for a matching tablespace and if found free it. */
1293 do {
1294 mutex_enter(&fil_system->mutex);
1295
1296 space = fil_space_get_by_name(name);
1297
1298 if (space != 0) {
1299 ib_logf(IB_LOG_LEVEL_WARN,
1300 "Tablespace '%s' exists in the cache "
1301 "with id %lu != %lu",
1302 name, (ulong) space->id, (ulong) id);
1303
1304 if (id == 0 || purpose != FIL_TABLESPACE) {
1305
1306 mutex_exit(&fil_system->mutex);
1307
1308 return(FALSE);
1309 }
1310
1311 ib_logf(IB_LOG_LEVEL_WARN,
1312 "Freeing existing tablespace '%s' entry "
1313 "from the cache with id %lu",
1314 name, (ulong) id);
1315
1316 ibool success = fil_space_free(space->id, FALSE);
1317 ut_a(success);
1318
1319 mutex_exit(&fil_system->mutex);
1320 }
1321
1322 } while (space != 0);
1323
1324 space = fil_space_get_by_id(id);
1325
1326 if (space != 0) {
1327 ib_logf(IB_LOG_LEVEL_ERROR,
1328 "Trying to add tablespace '%s' with id %lu "
1329 "to the tablespace memory cache, but tablespace '%s' "
1330 "with id %lu already exists in the cache!",
1331 name, (ulong) id, space->name, (ulong) space->id);
1332
1333 mutex_exit(&fil_system->mutex);
1334
1335 return(FALSE);
1336 }
1337
1338 space = static_cast<fil_space_t*>(mem_zalloc(sizeof(*space)));
1339
1340 space->name = mem_strdup(name);
1341 space->id = id;
1342
1343 fil_system->tablespace_version++;
1344 space->tablespace_version = fil_system->tablespace_version;
1345 space->mark = FALSE;
1346
1347 if (purpose == FIL_TABLESPACE && !recv_recovery_on
1348 && id > fil_system->max_assigned_id) {
1349
1350 if (!fil_system->space_id_reuse_warned) {
1351 fil_system->space_id_reuse_warned = TRUE;
1352
1353 ib_logf(IB_LOG_LEVEL_WARN,
1354 "Allocated tablespace %lu, old maximum "
1355 "was %lu",
1356 (ulong) id,
1357 (ulong) fil_system->max_assigned_id);
1358 }
1359
1360 fil_system->max_assigned_id = id;
1361 }
1362
1363 space->purpose = purpose;
1364 space->flags = flags;
1365
1366 space->magic_n = FIL_SPACE_MAGIC_N;
1367
1368 rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
1369
1370 HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
1371
1372 HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
1373 ut_fold_string(name), space);
1374 space->is_in_unflushed_spaces = false;
1375
1376 space->is_corrupt = FALSE;
1377
1378 UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
1379
1380 mutex_exit(&fil_system->mutex);
1381
1382 return(TRUE);
1383 }
1384
1385 /*******************************************************************//**
1386 Assigns a new space id for a new single-table tablespace. This works simply by
1387 incrementing the global counter. If 4 billion id's is not enough, we may need
1388 to recycle id's.
1389 @return TRUE if assigned, FALSE if not */
1390 UNIV_INTERN
1391 ibool
fil_assign_new_space_id(ulint * space_id)1392 fil_assign_new_space_id(
1393 /*====================*/
1394 ulint* space_id) /*!< in/out: space id */
1395 {
1396 ulint id;
1397 ibool success;
1398
1399 mutex_enter(&fil_system->mutex);
1400
1401 id = *space_id;
1402
1403 if (id < fil_system->max_assigned_id) {
1404 id = fil_system->max_assigned_id;
1405 }
1406
1407 id++;
1408
1409 if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
1410 ut_print_timestamp(stderr);
1411 fprintf(stderr,
1412 "InnoDB: Warning: you are running out of new"
1413 " single-table tablespace id's.\n"
1414 "InnoDB: Current counter is %lu and it"
1415 " must not exceed %lu!\n"
1416 "InnoDB: To reset the counter to zero"
1417 " you have to dump all your tables and\n"
1418 "InnoDB: recreate the whole InnoDB installation.\n",
1419 (ulong) id,
1420 (ulong) SRV_LOG_SPACE_FIRST_ID);
1421 }
1422
1423 success = (id < SRV_LOG_SPACE_FIRST_ID);
1424
1425 if (success) {
1426 *space_id = fil_system->max_assigned_id = id;
1427 } else {
1428 ut_print_timestamp(stderr);
1429 fprintf(stderr,
1430 "InnoDB: You have run out of single-table"
1431 " tablespace id's!\n"
1432 "InnoDB: Current counter is %lu.\n"
1433 "InnoDB: To reset the counter to zero you"
1434 " have to dump all your tables and\n"
1435 "InnoDB: recreate the whole InnoDB installation.\n",
1436 (ulong) id);
1437 *space_id = ULINT_UNDEFINED;
1438 }
1439
1440 mutex_exit(&fil_system->mutex);
1441
1442 return(success);
1443 }
1444
1445 /*******************************************************************//**
1446 Frees a space object from the tablespace memory cache. Closes the files in
1447 the chain but does not delete them. There must not be any pending i/o's or
1448 flushes on the files.
1449 @return TRUE if success */
1450 static
1451 ibool
fil_space_free(ulint id,ibool x_latched)1452 fil_space_free(
1453 /*===========*/
1454 /* out: TRUE if success */
1455 ulint id, /* in: space id */
1456 ibool x_latched) /* in: TRUE if caller has space->latch
1457 in X mode */
1458 {
1459 fil_space_t* space;
1460 fil_space_t* fnamespace;
1461
1462 ut_ad(mutex_own(&fil_system->mutex));
1463
1464 space = fil_space_get_by_id(id);
1465
1466 if (!space) {
1467 ut_print_timestamp(stderr);
1468 fprintf(stderr,
1469 " InnoDB: Error: trying to remove tablespace %lu"
1470 " from the cache but\n"
1471 "InnoDB: it is not there.\n", (ulong) id);
1472
1473 return(FALSE);
1474 }
1475
1476 HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space);
1477
1478 fnamespace = fil_space_get_by_name(space->name);
1479 ut_a(fnamespace);
1480 ut_a(space == fnamespace);
1481
1482 HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
1483 ut_fold_string(space->name), space);
1484
1485 if (space->is_in_unflushed_spaces) {
1486
1487 ut_ad(!fil_buffering_disabled(space));
1488 space->is_in_unflushed_spaces = false;
1489
1490 UT_LIST_REMOVE(unflushed_spaces, fil_system->unflushed_spaces,
1491 space);
1492 }
1493
1494 UT_LIST_REMOVE(space_list, fil_system->space_list, space);
1495
1496 ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
1497 ut_a(0 == space->n_pending_flushes);
1498
1499 for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain);
1500 fil_node != NULL;
1501 fil_node = UT_LIST_GET_FIRST(space->chain)) {
1502
1503 fil_node_free(fil_node, fil_system, space);
1504 }
1505
1506 ut_a(0 == UT_LIST_GET_LEN(space->chain));
1507
1508 if (x_latched) {
1509 rw_lock_x_unlock(&space->latch);
1510 }
1511
1512 rw_lock_free(&(space->latch));
1513
1514 mem_free(space->name);
1515 mem_free(space);
1516
1517 return(TRUE);
1518 }
1519
1520 /*******************************************************************//**
1521 Returns a pointer to the file_space_t that is in the memory cache
1522 associated with a space id. The caller must lock fil_system->mutex.
1523 @return file_space_t pointer, NULL if space not found */
1524 UNIV_INLINE
1525 fil_space_t*
fil_space_get_space(ulint id)1526 fil_space_get_space(
1527 /*================*/
1528 ulint id) /*!< in: space id */
1529 {
1530 fil_space_t* space;
1531 fil_node_t* node;
1532
1533 ut_ad(fil_system);
1534
1535 space = fil_space_get_by_id(id);
1536 if (space == NULL) {
1537 return(NULL);
1538 }
1539
1540 if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
1541 ut_a(id != 0);
1542
1543 mutex_exit(&fil_system->mutex);
1544
1545 /* It is possible that the space gets evicted at this point
1546 before the fil_mutex_enter_and_prepare_for_io() acquires
1547 the fil_system->mutex. Check for this after completing the
1548 call to fil_mutex_enter_and_prepare_for_io(). */
1549 fil_mutex_enter_and_prepare_for_io(id);
1550
1551 /* We are still holding the fil_system->mutex. Check if
1552 the space is still in memory cache. */
1553 space = fil_space_get_by_id(id);
1554 if (space == NULL) {
1555 return(NULL);
1556 }
1557
1558 /* The following code must change when InnoDB supports
1559 multiple datafiles per tablespace. */
1560 ut_a(1 == UT_LIST_GET_LEN(space->chain));
1561
1562 node = UT_LIST_GET_FIRST(space->chain);
1563
1564 /* It must be a single-table tablespace and we have not opened
1565 the file yet; the following calls will open it and update the
1566 size fields */
1567
1568 if (!fil_node_prepare_for_io(node, fil_system, space)) {
1569 /* The single-table tablespace can't be opened,
1570 because the ibd file is missing. */
1571 return(NULL);
1572 }
1573 fil_node_complete_io(node, fil_system, OS_FILE_READ);
1574 }
1575
1576 return(space);
1577 }
1578
1579 /*******************************************************************//**
1580 Returns the path from the first fil_node_t found for the space ID sent.
1581 The caller is responsible for freeing the memory allocated here for the
1582 value returned.
1583 @return own: A copy of fil_node_t::path, NULL if space ID is zero
1584 or not found. */
1585 UNIV_INTERN
1586 char*
fil_space_get_first_path(ulint id)1587 fil_space_get_first_path(
1588 /*=====================*/
1589 ulint id) /*!< in: space id */
1590 {
1591 fil_space_t* space;
1592 fil_node_t* node;
1593 char* path;
1594
1595 ut_ad(fil_system);
1596 ut_a(id);
1597
1598 fil_mutex_enter_and_prepare_for_io(id);
1599
1600 space = fil_space_get_space(id);
1601
1602 if (space == NULL) {
1603 mutex_exit(&fil_system->mutex);
1604
1605 return(NULL);
1606 }
1607
1608 ut_ad(mutex_own(&fil_system->mutex));
1609
1610 node = UT_LIST_GET_FIRST(space->chain);
1611
1612 path = mem_strdup(node->name);
1613
1614 mutex_exit(&fil_system->mutex);
1615
1616 return(path);
1617 }
1618
1619 /*******************************************************************//**
1620 Returns the size of the space in pages. The tablespace must be cached in the
1621 memory cache.
1622 @return space size, 0 if space not found */
1623 UNIV_INTERN
1624 ulint
fil_space_get_size(ulint id)1625 fil_space_get_size(
1626 /*===============*/
1627 ulint id) /*!< in: space id */
1628 {
1629 fil_space_t* space;
1630 ulint size;
1631
1632 ut_ad(fil_system);
1633 mutex_enter(&fil_system->mutex);
1634
1635 space = fil_space_get_space(id);
1636
1637 size = space ? space->size : 0;
1638
1639 mutex_exit(&fil_system->mutex);
1640
1641 return(size);
1642 }
1643
1644 /*******************************************************************//**
1645 Returns the flags of the space. The tablespace must be cached
1646 in the memory cache.
1647 @return flags, ULINT_UNDEFINED if space not found */
1648 UNIV_INTERN
1649 ulint
fil_space_get_flags(ulint id)1650 fil_space_get_flags(
1651 /*================*/
1652 ulint id) /*!< in: space id */
1653 {
1654 fil_space_t* space;
1655 ulint flags;
1656
1657 ut_ad(fil_system);
1658
1659 if (!id) {
1660 return(0);
1661 }
1662
1663 mutex_enter(&fil_system->mutex);
1664
1665 space = fil_space_get_space(id);
1666
1667 if (space == NULL) {
1668 mutex_exit(&fil_system->mutex);
1669
1670 return(ULINT_UNDEFINED);
1671 }
1672
1673 flags = space->flags;
1674
1675 mutex_exit(&fil_system->mutex);
1676
1677 return(flags);
1678 }
1679
1680 /*******************************************************************//**
1681 Returns the compressed page size of the space, or 0 if the space
1682 is not compressed. The tablespace must be cached in the memory cache.
1683 @return compressed page size, ULINT_UNDEFINED if space not found */
1684 UNIV_INTERN
1685 ulint
fil_space_get_zip_size(ulint id)1686 fil_space_get_zip_size(
1687 /*===================*/
1688 ulint id) /*!< in: space id */
1689 {
1690 ulint flags;
1691
1692 flags = fil_space_get_flags(id);
1693
1694 if (flags && flags != ULINT_UNDEFINED) {
1695
1696 return(fsp_flags_get_zip_size(flags));
1697 }
1698
1699 return(flags);
1700 }
1701
1702 /*******************************************************************//**
1703 Checks if the pair space, page_no refers to an existing page in a tablespace
1704 file space. The tablespace must be cached in the memory cache.
1705 @return TRUE if the address is meaningful */
1706 UNIV_INTERN
1707 ibool
fil_check_adress_in_tablespace(ulint id,ulint page_no)1708 fil_check_adress_in_tablespace(
1709 /*===========================*/
1710 ulint id, /*!< in: space id */
1711 ulint page_no)/*!< in: page number */
1712 {
1713 if (fil_space_get_size(id) > page_no) {
1714
1715 return(TRUE);
1716 }
1717
1718 return(FALSE);
1719 }
1720
1721 /****************************************************************//**
1722 Initializes the tablespace memory cache. */
1723 UNIV_INTERN
1724 void
fil_init(ulint hash_size,ulint max_n_open)1725 fil_init(
1726 /*=====*/
1727 ulint hash_size, /*!< in: hash table size */
1728 ulint max_n_open) /*!< in: max number of open files */
1729 {
1730 ut_a(fil_system == NULL);
1731
1732 ut_a(hash_size > 0);
1733 ut_a(max_n_open > 0);
1734
1735 fil_system = static_cast<fil_system_t*>(
1736 mem_zalloc(sizeof(fil_system_t)));
1737
1738 mutex_create(fil_system_mutex_key,
1739 &fil_system->mutex, SYNC_ANY_LATCH);
1740
1741 fil_system->spaces = hash_create(hash_size);
1742 fil_system->name_hash = hash_create(hash_size);
1743
1744 UT_LIST_INIT(fil_system->LRU);
1745
1746 fil_system->max_n_open = max_n_open;
1747 }
1748
1749 /*******************************************************************//**
1750 Opens all log files and system tablespace data files. They stay open until the
1751 database server shutdown. This should be called at a server startup after the
1752 space objects for the log and the system tablespace have been created. The
1753 purpose of this operation is to make sure we never run out of file descriptors
1754 if we need to read from the insert buffer or to write to the log. */
1755 UNIV_INTERN
1756 void
fil_open_log_and_system_tablespace_files(void)1757 fil_open_log_and_system_tablespace_files(void)
1758 /*==========================================*/
1759 {
1760 fil_space_t* space;
1761
1762 mutex_enter(&fil_system->mutex);
1763
1764 for (space = UT_LIST_GET_FIRST(fil_system->space_list);
1765 space != NULL;
1766 space = UT_LIST_GET_NEXT(space_list, space)) {
1767
1768 fil_node_t* node;
1769
1770 if (fil_space_belongs_in_lru(space)) {
1771
1772 continue;
1773 }
1774
1775 for (node = UT_LIST_GET_FIRST(space->chain);
1776 node != NULL;
1777 node = UT_LIST_GET_NEXT(chain, node)) {
1778
1779 if (!node->open) {
1780 if (!fil_node_open_file(node, fil_system,
1781 space)) {
1782 /* This func is called during server's
1783 startup. If some file of log or system
1784 tablespace is missing, the server
1785 can't start successfully. So we should
1786 assert for it. */
1787 ut_a(0);
1788 }
1789 }
1790
1791 if (fil_system->max_n_open < 10 + fil_system->n_open) {
1792
1793 fprintf(stderr,
1794 "InnoDB: Warning: you must"
1795 " raise the value of"
1796 " innodb_open_files in\n"
1797 "InnoDB: my.cnf! Remember that"
1798 " InnoDB keeps all log files"
1799 " and all system\n"
1800 "InnoDB: tablespace files open"
1801 " for the whole time mysqld is"
1802 " running, and\n"
1803 "InnoDB: needs to open also"
1804 " some .ibd files if the"
1805 " file-per-table storage\n"
1806 "InnoDB: model is used."
1807 " Current open files %lu,"
1808 " max allowed"
1809 " open files %lu.\n",
1810 (ulong) fil_system->n_open,
1811 (ulong) fil_system->max_n_open);
1812 }
1813 }
1814 }
1815
1816 mutex_exit(&fil_system->mutex);
1817 }
1818
1819 /*******************************************************************//**
1820 Closes all open files. There must not be any pending i/o's or not flushed
1821 modifications in the files. */
1822 UNIV_INTERN
1823 void
fil_close_all_files(void)1824 fil_close_all_files(void)
1825 /*=====================*/
1826 {
1827 fil_space_t* space;
1828
1829 // Must check both flags as it's possible for this to be called during
1830 // server startup with srv_track_changed_pages == true but
1831 // srv_redo_log_thread_started == false
1832 if (srv_track_changed_pages && srv_redo_log_thread_started)
1833 os_event_wait(srv_redo_log_tracked_event);
1834
1835 mutex_enter(&fil_system->mutex);
1836
1837 space = UT_LIST_GET_FIRST(fil_system->space_list);
1838
1839 while (space != NULL) {
1840 fil_node_t* node;
1841 fil_space_t* prev_space = space;
1842
1843 for (node = UT_LIST_GET_FIRST(space->chain);
1844 node != NULL;
1845 node = UT_LIST_GET_NEXT(chain, node)) {
1846
1847 if (node->open) {
1848 fil_node_close_file(node, fil_system);
1849 }
1850 }
1851
1852 space = UT_LIST_GET_NEXT(space_list, space);
1853
1854 fil_space_free(prev_space->id, FALSE);
1855 }
1856
1857 mutex_exit(&fil_system->mutex);
1858 }
1859
1860 /*******************************************************************//**
1861 Closes the redo log files. There must not be any pending i/o's or not
1862 flushed modifications in the files. */
1863 UNIV_INTERN
1864 void
fil_close_log_files(bool free)1865 fil_close_log_files(
1866 /*================*/
1867 bool free) /*!< in: whether to free the memory object */
1868 {
1869 fil_space_t* space;
1870
1871 // Must check both flags as it's possible for this to be called during
1872 // server startup with srv_track_changed_pages == true but
1873 // srv_redo_log_thread_started == false
1874 if (srv_track_changed_pages && srv_redo_log_thread_started)
1875 os_event_wait(srv_redo_log_tracked_event);
1876
1877 mutex_enter(&fil_system->mutex);
1878
1879 space = UT_LIST_GET_FIRST(fil_system->space_list);
1880
1881 while (space != NULL) {
1882 fil_node_t* node;
1883 fil_space_t* prev_space = space;
1884
1885 if (space->purpose != FIL_LOG) {
1886 space = UT_LIST_GET_NEXT(space_list, space);
1887 continue;
1888 }
1889
1890 for (node = UT_LIST_GET_FIRST(space->chain);
1891 node != NULL;
1892 node = UT_LIST_GET_NEXT(chain, node)) {
1893
1894 if (node->open) {
1895 fil_node_close_file(node, fil_system);
1896 }
1897 }
1898
1899 space = UT_LIST_GET_NEXT(space_list, space);
1900
1901 if (free) {
1902 fil_space_free(prev_space->id, FALSE);
1903 }
1904 }
1905
1906 mutex_exit(&fil_system->mutex);
1907 }
1908
1909 /*******************************************************************//**
1910 Sets the max tablespace id counter if the given number is bigger than the
1911 previous value. */
1912 UNIV_INTERN
1913 void
fil_set_max_space_id_if_bigger(ulint max_id)1914 fil_set_max_space_id_if_bigger(
1915 /*===========================*/
1916 ulint max_id) /*!< in: maximum known id */
1917 {
1918 if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
1919 fprintf(stderr,
1920 "InnoDB: Fatal error: max tablespace id"
1921 " is too high, %lu\n", (ulong) max_id);
1922 ut_error;
1923 }
1924
1925 mutex_enter(&fil_system->mutex);
1926
1927 if (fil_system->max_assigned_id < max_id) {
1928
1929 fil_system->max_assigned_id = max_id;
1930 }
1931
1932 mutex_exit(&fil_system->mutex);
1933 }
1934
1935 /****************************************************************//**
1936 Writes the flushed lsn and the latest archived log number to the page header
1937 of the first page of a data file of the system tablespace (space 0),
1938 which is uncompressed. */
1939 static MY_ATTRIBUTE((warn_unused_result))
1940 dberr_t
fil_write_lsn_and_arch_no_to_file(ulint space,ulint sum_of_sizes,lsn_t lsn,ulint arch_log_no MY_ATTRIBUTE ((unused)))1941 fil_write_lsn_and_arch_no_to_file(
1942 /*==============================*/
1943 ulint space, /*!< in: space to write to */
1944 ulint sum_of_sizes, /*!< in: combined size of previous files
1945 in space, in database pages */
1946 lsn_t lsn, /*!< in: lsn to write */
1947 ulint arch_log_no MY_ATTRIBUTE((unused)))
1948 /*!< in: archived log number to write */
1949 {
1950 byte* buf1;
1951 byte* buf;
1952 dberr_t err;
1953
1954 buf1 = static_cast<byte*>(mem_alloc(2 * UNIV_PAGE_SIZE));
1955 buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE));
1956
1957 err = fil_read(TRUE, space, 0, sum_of_sizes, 0,
1958 UNIV_PAGE_SIZE, buf, NULL);
1959 if (err == DB_SUCCESS) {
1960 mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
1961
1962 err = fil_write(TRUE, space, 0, sum_of_sizes, 0,
1963 UNIV_PAGE_SIZE, buf, NULL);
1964 }
1965
1966 mem_free(buf1);
1967
1968 return(err);
1969 }
1970
1971 /****************************************************************//**
1972 Writes the flushed lsn and the latest archived log number to the page
1973 header of the first page of each data file in the system tablespace.
1974 @return DB_SUCCESS or error number */
1975 UNIV_INTERN
1976 dberr_t
fil_write_flushed_lsn_to_data_files(lsn_t lsn,ulint arch_log_no)1977 fil_write_flushed_lsn_to_data_files(
1978 /*================================*/
1979 lsn_t lsn, /*!< in: lsn to write */
1980 ulint arch_log_no) /*!< in: latest archived log file number */
1981 {
1982 fil_space_t* space;
1983 fil_node_t* node;
1984 dberr_t err;
1985
1986 mutex_enter(&fil_system->mutex);
1987
1988 for (space = UT_LIST_GET_FIRST(fil_system->space_list);
1989 space != NULL;
1990 space = UT_LIST_GET_NEXT(space_list, space)) {
1991
1992 /* We only write the lsn to all existing data files which have
1993 been open during the lifetime of the mysqld process; they are
1994 represented by the space objects in the tablespace memory
1995 cache. Note that all data files in the system tablespace 0
1996 and the UNDO log tablespaces (if separate) are always open. */
1997
1998 if (space->purpose == FIL_TABLESPACE
1999 && !fil_is_user_tablespace_id(space->id)) {
2000 ulint sum_of_sizes = 0;
2001
2002 for (node = UT_LIST_GET_FIRST(space->chain);
2003 node != NULL;
2004 node = UT_LIST_GET_NEXT(chain, node)) {
2005
2006 mutex_exit(&fil_system->mutex);
2007
2008 err = fil_write_lsn_and_arch_no_to_file(
2009 space->id, sum_of_sizes, lsn,
2010 arch_log_no);
2011
2012 if (err != DB_SUCCESS) {
2013
2014 return(err);
2015 }
2016
2017 mutex_enter(&fil_system->mutex);
2018
2019 sum_of_sizes += node->size;
2020 }
2021 }
2022 }
2023
2024 mutex_exit(&fil_system->mutex);
2025
2026 return(DB_SUCCESS);
2027 }
2028
2029 /*******************************************************************//**
2030 Checks the consistency of the first data page of a tablespace
2031 at database startup.
2032 @retval NULL on success, or if innodb_force_recovery is set
2033 @return pointer to an error message string */
2034 static MY_ATTRIBUTE((warn_unused_result))
2035 const char*
fil_check_first_page(const page_t * page)2036 fil_check_first_page(
2037 /*=================*/
2038 const page_t* page) /*!< in: data page */
2039 {
2040 ulint space_id;
2041 ulint flags;
2042
2043 if (srv_force_recovery >= SRV_FORCE_IGNORE_CORRUPT) {
2044 return(NULL);
2045 }
2046
2047 space_id = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page);
2048 flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
2049
2050 if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) {
2051 return("innodb-page-size mismatch");
2052 }
2053
2054 if (!space_id && !flags) {
2055 ulint nonzero_bytes = UNIV_PAGE_SIZE;
2056 const byte* b = page;
2057
2058 while (!*b && --nonzero_bytes) {
2059 b++;
2060 }
2061
2062 if (!nonzero_bytes) {
2063 return("space header page consists of zero bytes");
2064 }
2065 }
2066
2067 if (buf_page_is_corrupted(
2068 false, page, fsp_flags_get_zip_size(flags))) {
2069 return("checksum mismatch");
2070 }
2071
2072 if (page_get_space_id(page) == space_id
2073 && page_get_page_no(page) == 0) {
2074 return(NULL);
2075 }
2076
2077 return("inconsistent data in space header");
2078 }
2079
2080 /*******************************************************************//**
2081 Reads the flushed lsn, arch no, space_id and tablespace flag fields from
2082 the first page of a data file at database startup.
2083 @retval NULL on success, or if innodb_force_recovery is set
2084 @return pointer to an error message string */
2085 UNIV_INTERN
2086 const char*
fil_read_first_page(pfs_os_file_t data_file,ibool one_read_already,ulint * flags,ulint * space_id,lsn_t * min_flushed_lsn,lsn_t * max_flushed_lsn)2087 fil_read_first_page(
2088 /*================*/
2089 pfs_os_file_t data_file, /*!< in: open data file */
2090 ibool one_read_already, /*!< in: TRUE if min and max
2091 parameters below already
2092 contain sensible data */
2093 ulint* flags, /*!< out: tablespace flags */
2094 ulint* space_id, /*!< out: tablespace ID */
2095 lsn_t* min_flushed_lsn, /*!< out: min of flushed
2096 lsn values in data files */
2097 lsn_t* max_flushed_lsn) /*!< out: max of flushed
2098 lsn values in data files */
2099 {
2100 byte* buf;
2101 byte* page;
2102 lsn_t flushed_lsn;
2103 const char* check_msg = NULL;
2104
2105 buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
2106
2107 /* Align the memory for a possible read from a raw device */
2108
2109 page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
2110
2111 os_file_read(data_file, page, 0, UNIV_PAGE_SIZE);
2112
2113 /* The FSP_HEADER on page 0 is only valid for the first file
2114 in a tablespace. So if this is not the first datafile, leave
2115 *flags and *space_id as they were read from the first file and
2116 do not validate the first page. */
2117 if (!one_read_already) {
2118 *flags = fsp_header_get_flags(page);
2119 *space_id = fsp_header_get_space_id(page);
2120
2121 check_msg = fil_check_first_page(page);
2122 }
2123
2124 flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
2125
2126 ut_free(buf);
2127
2128 if (check_msg) {
2129 return(check_msg);
2130 }
2131
2132 if (!one_read_already) {
2133 *min_flushed_lsn = flushed_lsn;
2134 *max_flushed_lsn = flushed_lsn;
2135
2136 return(NULL);
2137 }
2138
2139 if (*min_flushed_lsn > flushed_lsn) {
2140 *min_flushed_lsn = flushed_lsn;
2141 }
2142 if (*max_flushed_lsn < flushed_lsn) {
2143 *max_flushed_lsn = flushed_lsn;
2144 }
2145
2146 return(NULL);
2147 }
2148
2149 /*================ SINGLE-TABLE TABLESPACES ==========================*/
2150
2151 #ifndef UNIV_HOTBACKUP
2152 /*******************************************************************//**
2153 Increments the count of pending operation, if space is not being deleted.
2154 @return TRUE if being deleted, and operation should be skipped */
2155 UNIV_INTERN
2156 ibool
fil_inc_pending_ops(ulint id,ibool print_err)2157 fil_inc_pending_ops(
2158 /*================*/
2159 ulint id, /*!< in: space id */
2160 ibool print_err) /*!< in: need to print error or not */
2161 {
2162 fil_space_t* space;
2163
2164 mutex_enter(&fil_system->mutex);
2165
2166 space = fil_space_get_by_id(id);
2167
2168 if (space == NULL) {
2169 if (print_err) {
2170 fprintf(stderr,
2171 "InnoDB: Error: trying to do an operation on a"
2172 " dropped tablespace %lu\n",
2173 (ulong) id);
2174 }
2175 }
2176
2177 if (space == NULL || space->stop_new_ops) {
2178 mutex_exit(&fil_system->mutex);
2179
2180 return(TRUE);
2181 }
2182
2183 space->n_pending_ops++;
2184
2185 mutex_exit(&fil_system->mutex);
2186
2187 return(FALSE);
2188 }
2189
2190 /*******************************************************************//**
2191 Decrements the count of pending operations. */
2192 UNIV_INTERN
2193 void
fil_decr_pending_ops(ulint id)2194 fil_decr_pending_ops(
2195 /*=================*/
2196 ulint id) /*!< in: space id */
2197 {
2198 fil_space_t* space;
2199
2200 mutex_enter(&fil_system->mutex);
2201
2202 space = fil_space_get_by_id(id);
2203
2204 if (space == NULL) {
2205 fprintf(stderr,
2206 "InnoDB: Error: decrementing pending operation"
2207 " of a dropped tablespace %lu\n",
2208 (ulong) id);
2209 }
2210
2211 if (space != NULL) {
2212 space->n_pending_ops--;
2213 }
2214
2215 mutex_exit(&fil_system->mutex);
2216 }
2217 #endif /* !UNIV_HOTBACKUP */
2218
2219 /********************************************************//**
2220 Creates the database directory for a table if it does not exist yet. */
2221 static
2222 void
fil_create_directory_for_tablename(const char * name)2223 fil_create_directory_for_tablename(
2224 /*===============================*/
2225 const char* name) /*!< in: name in the standard
2226 'databasename/tablename' format */
2227 {
2228 const char* namend;
2229 char* path;
2230 ulint len;
2231
2232 len = strlen(fil_path_to_mysql_datadir);
2233 namend = strchr(name, '/');
2234 ut_a(namend);
2235 path = static_cast<char*>(mem_alloc(len + (namend - name) + 2));
2236
2237 memcpy(path, fil_path_to_mysql_datadir, len);
2238 path[len] = '/';
2239 memcpy(path + len + 1, name, namend - name);
2240 path[len + (namend - name) + 1] = 0;
2241
2242 srv_normalize_path_for_win(path);
2243
2244 ut_a(os_file_create_directory(path, FALSE));
2245 mem_free(path);
2246 }
2247
2248 #ifndef UNIV_HOTBACKUP
2249 /********************************************************//**
2250 Writes a log record about an .ibd file create/rename/delete. */
2251 static
2252 void
fil_op_write_log(ulint type,ulint space_id,ulint log_flags,ulint flags,const char * name,const char * new_name,mtr_t * mtr)2253 fil_op_write_log(
2254 /*=============*/
2255 ulint type, /*!< in: MLOG_FILE_CREATE,
2256 MLOG_FILE_CREATE2,
2257 MLOG_FILE_DELETE, or
2258 MLOG_FILE_RENAME */
2259 ulint space_id, /*!< in: space id */
2260 ulint log_flags, /*!< in: redo log flags (stored
2261 in the page number field) */
2262 ulint flags, /*!< in: compressed page size
2263 and file format
2264 if type==MLOG_FILE_CREATE2, or 0 */
2265 const char* name, /*!< in: table name in the familiar
2266 'databasename/tablename' format, or
2267 the file path in the case of
2268 MLOG_FILE_DELETE */
2269 const char* new_name, /*!< in: if type is MLOG_FILE_RENAME,
2270 the new table name in the
2271 'databasename/tablename' format */
2272 mtr_t* mtr) /*!< in: mini-transaction handle */
2273 {
2274 byte* log_ptr;
2275 ulint len;
2276
2277 log_ptr = mlog_open(mtr, 11 + 2 + 1);
2278
2279 if (!log_ptr) {
2280 /* Logging in mtr is switched off during crash recovery:
2281 in that case mlog_open returns NULL */
2282 return;
2283 }
2284
2285 log_ptr = mlog_write_initial_log_record_for_file_op(
2286 type, space_id, log_flags, log_ptr, mtr);
2287 if (type == MLOG_FILE_CREATE2) {
2288 mach_write_to_4(log_ptr, flags);
2289 log_ptr += 4;
2290 }
2291 /* Let us store the strings as null-terminated for easier readability
2292 and handling */
2293
2294 len = strlen(name) + 1;
2295
2296 mach_write_to_2(log_ptr, len);
2297 log_ptr += 2;
2298 mlog_close(mtr, log_ptr);
2299
2300 mlog_catenate_string(mtr, (byte*) name, len);
2301
2302 if (type == MLOG_FILE_RENAME) {
2303 len = strlen(new_name) + 1;
2304 log_ptr = mlog_open(mtr, 2 + len);
2305 ut_a(log_ptr);
2306 mach_write_to_2(log_ptr, len);
2307 log_ptr += 2;
2308 mlog_close(mtr, log_ptr);
2309
2310 mlog_catenate_string(mtr, (byte*) new_name, len);
2311 }
2312 }
2313 #endif
2314
2315 /*******************************************************************//**
2316 Parses the body of a log record written about an .ibd file operation. That is,
2317 the log record part after the standard (type, space id, page no) header of the
2318 log record.
2319
2320 If desired, also replays the delete or rename operation if the .ibd file
2321 exists and the space id in it matches. Replays the create operation if a file
2322 at that path does not exist yet. If the database directory for the file to be
2323 created does not exist, then we create the directory, too.
2324
2325 Note that mysqlbackup --apply-log sets fil_path_to_mysql_datadir to point to
2326 the datadir that we should use in replaying the file operations.
2327
2328 InnoDB recovery does not replay these fully since it always sets the space id
2329 to zero. But mysqlbackup does replay them. TODO: If remote tablespaces are
2330 used, mysqlbackup will only create tables in the default directory since
2331 MLOG_FILE_CREATE and MLOG_FILE_CREATE2 only know the tablename, not the path.
2332
2333 @return end of log record, or NULL if the record was not completely
2334 contained between ptr and end_ptr */
2335 UNIV_INTERN
2336 byte*
fil_op_log_parse_or_replay(byte * ptr,byte * end_ptr,ulint type,ulint space_id,ulint log_flags)2337 fil_op_log_parse_or_replay(
2338 /*=======================*/
2339 byte* ptr, /*!< in: buffer containing the log record body,
2340 or an initial segment of it, if the record does
2341 not fir completely between ptr and end_ptr */
2342 byte* end_ptr, /*!< in: buffer end */
2343 ulint type, /*!< in: the type of this log record */
2344 ulint space_id, /*!< in: the space id of the tablespace in
2345 question, or 0 if the log record should
2346 only be parsed but not replayed */
2347 ulint log_flags) /*!< in: redo log flags
2348 (stored in the page number parameter) */
2349 {
2350 ulint name_len;
2351 ulint new_name_len;
2352 const char* name;
2353 const char* new_name = NULL;
2354 ulint flags = 0;
2355
2356 if (type == MLOG_FILE_CREATE2) {
2357 if (end_ptr < ptr + 4) {
2358
2359 return(NULL);
2360 }
2361
2362 flags = mach_read_from_4(ptr);
2363 ptr += 4;
2364 }
2365
2366 if (end_ptr < ptr + 2) {
2367
2368 return(NULL);
2369 }
2370
2371 name_len = mach_read_from_2(ptr);
2372
2373 ptr += 2;
2374
2375 if (end_ptr < ptr + name_len) {
2376
2377 return(NULL);
2378 }
2379
2380 name = (const char*) ptr;
2381
2382 ptr += name_len;
2383
2384 if (type == MLOG_FILE_RENAME) {
2385 if (end_ptr < ptr + 2) {
2386
2387 return(NULL);
2388 }
2389
2390 new_name_len = mach_read_from_2(ptr);
2391
2392 ptr += 2;
2393
2394 if (end_ptr < ptr + new_name_len) {
2395
2396 return(NULL);
2397 }
2398
2399 new_name = (const char*) ptr;
2400
2401 ptr += new_name_len;
2402 }
2403
2404 /* We managed to parse a full log record body */
2405 /*
2406 printf("Parsed log rec of type %lu space %lu\n"
2407 "name %s\n", type, space_id, name);
2408
2409 if (type == MLOG_FILE_RENAME) {
2410 printf("new name %s\n", new_name);
2411 }
2412 */
2413 if (!space_id) {
2414 return(ptr);
2415 } else {
2416 /* Only replay file ops during recovery. This is a
2417 release-build assert to minimize any data loss risk by a
2418 misapplied file operation. */
2419 ut_a(recv_recovery_is_on());
2420 }
2421
2422 /* Let us try to perform the file operation, if sensible. Note that
2423 mysqlbackup has at this stage already read in all space id info to the
2424 fil0fil.cc data structures.
2425
2426 NOTE that our algorithm is not guaranteed to work correctly if there
2427 were renames of tables during the backup. See mysqlbackup code for more
2428 on the problem. */
2429
2430 switch (type) {
2431 case MLOG_FILE_DELETE:
2432 if (fil_tablespace_exists_in_mem(space_id)) {
2433 dberr_t err = fil_delete_tablespace(
2434 space_id, BUF_REMOVE_FLUSH_NO_WRITE);
2435 ut_a(err == DB_SUCCESS);
2436 }
2437
2438 break;
2439
2440 case MLOG_FILE_RENAME:
2441 /* In order to replay the rename, the following must hold:
2442 * The new name is not already used.
2443 * A tablespace is open in memory with the old name.
2444 * The space ID for that tablepace matches this log entry.
2445 This will prevent unintended renames during recovery. */
2446
2447 if (fil_get_space_id_for_table(new_name) == ULINT_UNDEFINED
2448 && space_id == fil_get_space_id_for_table(name)) {
2449 /* Create the database directory for the new name, if
2450 it does not exist yet */
2451 fil_create_directory_for_tablename(new_name);
2452
2453 if (!fil_rename_tablespace(name, space_id,
2454 new_name, NULL)) {
2455 ut_error;
2456 }
2457 }
2458
2459 break;
2460
2461 case MLOG_FILE_CREATE:
2462 case MLOG_FILE_CREATE2:
2463 if (fil_tablespace_exists_in_mem(space_id)) {
2464 /* Do nothing */
2465 } else if (fil_get_space_id_for_table(name)
2466 != ULINT_UNDEFINED) {
2467 /* Do nothing */
2468 } else if (log_flags & MLOG_FILE_FLAG_TEMP) {
2469 /* Temporary table, do nothing */
2470 } else {
2471 const char* path = NULL;
2472
2473 /* Create the database directory for name, if it does
2474 not exist yet */
2475 fil_create_directory_for_tablename(name);
2476
2477 if (fil_create_new_single_table_tablespace(
2478 space_id, name, path, flags,
2479 DICT_TF2_USE_TABLESPACE,
2480 FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
2481 ut_error;
2482 }
2483 }
2484
2485 break;
2486
2487 default:
2488 ut_error;
2489 }
2490
2491 return(ptr);
2492 }
2493
2494 /*******************************************************************//**
2495 Allocates a file name for the EXPORT/IMPORT config file name. The
2496 string must be freed by caller with mem_free().
2497 @return own: file name */
2498 static
2499 char*
fil_make_cfg_name(const char * filepath)2500 fil_make_cfg_name(
2501 /*==============*/
2502 const char* filepath) /*!< in: .ibd file name */
2503 {
2504 char* cfg_name;
2505
2506 /* Create a temporary file path by replacing the .ibd suffix
2507 with .cfg. */
2508
2509 ut_ad(strlen(filepath) > 4);
2510
2511 cfg_name = mem_strdup(filepath);
2512 ut_snprintf(cfg_name + strlen(cfg_name) - 3, 4, "cfg");
2513 return(cfg_name);
2514 }
2515
2516 /*******************************************************************//**
2517 Check for change buffer merges.
2518 @return 0 if no merges else count + 1. */
2519 static
2520 ulint
fil_ibuf_check_pending_ops(fil_space_t * space,ulint count)2521 fil_ibuf_check_pending_ops(
2522 /*=======================*/
2523 fil_space_t* space, /*!< in/out: Tablespace to check */
2524 ulint count) /*!< in: number of attempts so far */
2525 {
2526 ut_ad(mutex_own(&fil_system->mutex));
2527
2528 if (space != 0 && space->n_pending_ops != 0) {
2529
2530 if (count > 5000) {
2531 ib_logf(IB_LOG_LEVEL_WARN,
2532 "Trying to close/delete tablespace "
2533 "'%s' but there are %lu pending change "
2534 "buffer merges on it.",
2535 space->name,
2536 (ulong) space->n_pending_ops);
2537 }
2538
2539 return(count + 1);
2540 }
2541
2542 return(0);
2543 }
2544
2545 /*******************************************************************//**
2546 Check for pending IO.
2547 @return 0 if no pending else count + 1. */
2548 static
2549 ulint
fil_check_pending_io(fil_space_t * space,fil_node_t ** node,ulint count)2550 fil_check_pending_io(
2551 /*=================*/
2552 fil_space_t* space, /*!< in/out: Tablespace to check */
2553 fil_node_t** node, /*!< out: Node in space list */
2554 ulint count) /*!< in: number of attempts so far */
2555 {
2556 ut_ad(mutex_own(&fil_system->mutex));
2557 ut_a(space->n_pending_ops == 0);
2558
2559 /* The following code must change when InnoDB supports
2560 multiple datafiles per tablespace. */
2561 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2562
2563 *node = UT_LIST_GET_FIRST(space->chain);
2564
2565 if (space->n_pending_flushes > 0 || (*node)->n_pending > 0) {
2566
2567 ut_a(!(*node)->being_extended);
2568
2569 if (count > 1000) {
2570 ib_logf(IB_LOG_LEVEL_WARN,
2571 "Trying to close/delete tablespace '%s' "
2572 "but there are %lu flushes "
2573 " and %lu pending i/o's on it.",
2574 space->name,
2575 (ulong) space->n_pending_flushes,
2576 (ulong) (*node)->n_pending);
2577 }
2578
2579 return(count + 1);
2580 }
2581
2582 return(0);
2583 }
2584
2585 /*******************************************************************//**
2586 Check pending operations on a tablespace.
2587 @return DB_SUCCESS or error failure. */
2588 static
2589 dberr_t
fil_check_pending_operations(ulint id,fil_space_t ** space,char ** path)2590 fil_check_pending_operations(
2591 /*=========================*/
2592 ulint id, /*!< in: space id */
2593 fil_space_t** space, /*!< out: tablespace instance in memory */
2594 char** path) /*!< out/own: tablespace path */
2595 {
2596 ulint count = 0;
2597
2598 ut_a(id != TRX_SYS_SPACE);
2599 ut_ad(space);
2600
2601 *space = 0;
2602
2603 mutex_enter(&fil_system->mutex);
2604 fil_space_t* sp = fil_space_get_by_id(id);
2605 if (sp) {
2606 sp->stop_new_ops = TRUE;
2607 }
2608 mutex_exit(&fil_system->mutex);
2609
2610 /* Check for pending change buffer merges. */
2611
2612 do {
2613 mutex_enter(&fil_system->mutex);
2614
2615 sp = fil_space_get_by_id(id);
2616
2617 count = fil_ibuf_check_pending_ops(sp, count);
2618
2619 mutex_exit(&fil_system->mutex);
2620
2621 if (count > 0) {
2622 os_thread_sleep(20000);
2623 }
2624
2625 } while (count > 0);
2626
2627 /* Check for pending IO. */
2628
2629 *path = 0;
2630
2631 do {
2632 mutex_enter(&fil_system->mutex);
2633
2634 sp = fil_space_get_by_id(id);
2635
2636 if (sp == NULL) {
2637 mutex_exit(&fil_system->mutex);
2638 return(DB_TABLESPACE_NOT_FOUND);
2639 }
2640
2641 fil_node_t* node;
2642
2643 count = fil_check_pending_io(sp, &node, count);
2644
2645 if (count == 0) {
2646 *path = mem_strdup(node->name);
2647 }
2648
2649 mutex_exit(&fil_system->mutex);
2650
2651 if (count > 0) {
2652 os_thread_sleep(20000);
2653 }
2654
2655 } while (count > 0);
2656
2657 ut_ad(sp);
2658
2659 *space = sp;
2660 return(DB_SUCCESS);
2661 }
2662
2663 /*******************************************************************//**
2664 Closes a single-table tablespace. The tablespace must be cached in the
2665 memory cache. Free all pages used by the tablespace.
2666 @return DB_SUCCESS or error */
2667 UNIV_INTERN
2668 dberr_t
fil_close_tablespace(trx_t * trx,ulint id)2669 fil_close_tablespace(
2670 /*=================*/
2671 trx_t* trx, /*!< in/out: Transaction covering the close */
2672 ulint id) /*!< in: space id */
2673 {
2674 char* path = 0;
2675 fil_space_t* space = 0;
2676
2677 ut_a(id != TRX_SYS_SPACE);
2678
2679 dberr_t err = fil_check_pending_operations(id, &space, &path);
2680
2681 if (err != DB_SUCCESS) {
2682 return(err);
2683 }
2684
2685 ut_a(space);
2686 ut_a(path != 0);
2687
2688 rw_lock_x_lock(&space->latch);
2689
2690 #ifndef UNIV_HOTBACKUP
2691 /* Invalidate in the buffer pool all pages belonging to the
2692 tablespace. Since we have set space->stop_new_ops = TRUE, readahead
2693 or ibuf merge can no longer read more pages of this tablespace to the
2694 buffer pool. Thus we can clean the tablespace out of the buffer pool
2695 completely and permanently. The flag stop_new_ops also prevents
2696 fil_flush() from being applied to this tablespace. */
2697
2698 buf_LRU_flush_or_remove_pages(id, BUF_REMOVE_FLUSH_WRITE, trx);
2699 #endif
2700 mutex_enter(&fil_system->mutex);
2701
2702 /* If the free is successful, the X lock will be released before
2703 the space memory data structure is freed. */
2704
2705 if (!fil_space_free(id, TRUE)) {
2706 rw_lock_x_unlock(&space->latch);
2707 err = DB_TABLESPACE_NOT_FOUND;
2708 } else {
2709 err = DB_SUCCESS;
2710 }
2711
2712 mutex_exit(&fil_system->mutex);
2713
2714 /* If it is a delete then also delete any generated files, otherwise
2715 when we drop the database the remove directory will fail. */
2716
2717 char* cfg_name = fil_make_cfg_name(path);
2718
2719 os_file_delete_if_exists(innodb_file_data_key, cfg_name);
2720
2721 mem_free(path);
2722 mem_free(cfg_name);
2723
2724 return(err);
2725 }
2726
2727 /*******************************************************************//**
2728 Deletes a single-table tablespace. The tablespace must be cached in the
2729 memory cache.
2730 @return DB_SUCCESS or error */
2731 UNIV_INTERN
2732 dberr_t
fil_delete_tablespace(ulint id,buf_remove_t buf_remove)2733 fil_delete_tablespace(
2734 /*==================*/
2735 ulint id, /*!< in: space id */
2736 buf_remove_t buf_remove) /*!< in: specify the action to take
2737 on the tables pages in the buffer
2738 pool */
2739 {
2740 char* path = 0;
2741 fil_space_t* space = 0;
2742
2743 ut_a(id != TRX_SYS_SPACE);
2744
2745 dberr_t err = fil_check_pending_operations(id, &space, &path);
2746
2747 if (err != DB_SUCCESS) {
2748
2749 ib_logf(IB_LOG_LEVEL_ERROR,
2750 "Cannot delete tablespace %lu because it is not "
2751 "found in the tablespace memory cache.",
2752 (ulong) id);
2753
2754 return(err);
2755 }
2756
2757 ut_a(space);
2758 ut_a(path != 0);
2759
2760 /* Important: We rely on the data dictionary mutex to ensure
2761 that a race is not possible here. It should serialize the tablespace
2762 drop/free. We acquire an X latch only to avoid a race condition
2763 when accessing the tablespace instance via:
2764
2765 fsp_get_available_space_in_free_extents().
2766
2767 There our main motivation is to reduce the contention on the
2768 dictionary mutex. */
2769
2770 rw_lock_x_lock(&space->latch);
2771
2772 #ifndef UNIV_HOTBACKUP
2773 /* IMPORTANT: Because we have set space::stop_new_ops there
2774 can't be any new ibuf merges, reads or flushes. We are here
2775 because node::n_pending was zero above. However, it is still
2776 possible to have pending read and write requests:
2777
2778 A read request can happen because the reader thread has
2779 gone through the ::stop_new_ops check in buf_page_init_for_read()
2780 before the flag was set and has not yet incremented ::n_pending
2781 when we checked it above.
2782
2783 A write request can be issued any time because we don't check
2784 the ::stop_new_ops flag when queueing a block for write.
2785
2786 We deal with pending write requests in the following function
2787 where we'd minimally evict all dirty pages belonging to this
2788 space from the flush_list. Not that if a block is IO-fixed
2789 we'll wait for IO to complete.
2790
2791 To deal with potential read requests by checking the
2792 ::stop_new_ops flag in fil_io() */
2793
2794 buf_LRU_flush_or_remove_pages(id, buf_remove, 0);
2795
2796 #endif /* !UNIV_HOTBACKUP */
2797
2798 /* If it is a delete then also delete any generated files, otherwise
2799 when we drop the database the remove directory will fail. */
2800 {
2801 char* cfg_name = fil_make_cfg_name(path);
2802 os_file_delete_if_exists(innodb_file_data_key, cfg_name);
2803 mem_free(cfg_name);
2804 }
2805
2806 /* Delete the link file pointing to the ibd file we are deleting. */
2807 if (FSP_FLAGS_HAS_DATA_DIR(space->flags)) {
2808 fil_delete_link_file(space->name);
2809 }
2810
2811 mutex_enter(&fil_system->mutex);
2812
2813 /* Double check the sanity of pending ops after reacquiring
2814 the fil_system::mutex. */
2815 if (fil_space_get_by_id(id)) {
2816 ut_a(space->n_pending_ops == 0);
2817 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2818 fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
2819 ut_a(node->n_pending == 0);
2820 }
2821
2822 if (!fil_space_free(id, TRUE)) {
2823 err = DB_TABLESPACE_NOT_FOUND;
2824 }
2825
2826 mutex_exit(&fil_system->mutex);
2827
2828 if (err != DB_SUCCESS) {
2829 rw_lock_x_unlock(&space->latch);
2830 } else if (!os_file_delete(innodb_file_data_key, path)
2831 && !os_file_delete_if_exists(innodb_file_data_key, path)) {
2832
2833 /* Note: This is because we have removed the
2834 tablespace instance from the cache. */
2835
2836 err = DB_IO_ERROR;
2837 }
2838
2839 if (err == DB_SUCCESS) {
2840 #ifndef UNIV_HOTBACKUP
2841 /* Write a log record about the deletion of the .ibd
2842 file, so that mysqlbackup can replay it in the
2843 --apply-log phase. We use a dummy mtr and the familiar
2844 log write mechanism. */
2845 mtr_t mtr;
2846
2847 /* When replaying the operation in mysqlbackup, do not try
2848 to write any log record */
2849 mtr_start(&mtr);
2850
2851 fil_op_write_log(MLOG_FILE_DELETE, id, 0, 0, path, NULL, &mtr);
2852 mtr_commit(&mtr);
2853 #endif
2854 err = DB_SUCCESS;
2855 }
2856
2857 mem_free(path);
2858
2859 return(err);
2860 }
2861
2862 /*******************************************************************//**
2863 Returns TRUE if a single-table tablespace is being deleted.
2864 @return TRUE if being deleted */
2865 UNIV_INTERN
2866 ibool
fil_tablespace_is_being_deleted(ulint id)2867 fil_tablespace_is_being_deleted(
2868 /*============================*/
2869 ulint id) /*!< in: space id */
2870 {
2871 fil_space_t* space;
2872 ibool is_being_deleted;
2873
2874 mutex_enter(&fil_system->mutex);
2875
2876 space = fil_space_get_by_id(id);
2877
2878 ut_a(space != NULL);
2879
2880 is_being_deleted = space->stop_new_ops;
2881
2882 mutex_exit(&fil_system->mutex);
2883
2884 return(is_being_deleted);
2885 }
2886
2887 #ifndef UNIV_HOTBACKUP
2888 /*******************************************************************//**
2889 Discards a single-table tablespace. The tablespace must be cached in the
2890 memory cache. Discarding is like deleting a tablespace, but
2891
2892 1. We do not drop the table from the data dictionary;
2893
2894 2. We remove all insert buffer entries for the tablespace immediately;
2895 in DROP TABLE they are only removed gradually in the background;
2896
2897 3. Free all the pages in use by the tablespace.
2898 @return DB_SUCCESS or error */
2899 UNIV_INTERN
2900 dberr_t
fil_discard_tablespace(ulint id)2901 fil_discard_tablespace(
2902 /*===================*/
2903 ulint id) /*!< in: space id */
2904 {
2905 dberr_t err;
2906
2907 switch (err = fil_delete_tablespace(id, BUF_REMOVE_ALL_NO_WRITE)) {
2908 case DB_SUCCESS:
2909 break;
2910
2911 case DB_IO_ERROR:
2912 ib_logf(IB_LOG_LEVEL_WARN,
2913 "While deleting tablespace %lu in DISCARD TABLESPACE."
2914 " File rename/delete failed: %s",
2915 (ulong) id, ut_strerr(err));
2916 break;
2917
2918 case DB_TABLESPACE_NOT_FOUND:
2919 ib_logf(IB_LOG_LEVEL_WARN,
2920 "Cannot delete tablespace %lu in DISCARD "
2921 "TABLESPACE. %s",
2922 (ulong) id, ut_strerr(err));
2923 break;
2924
2925 default:
2926 ut_error;
2927 }
2928
2929 /* Remove all insert buffer entries for the tablespace */
2930
2931 ibuf_delete_for_discarded_space(id);
2932
2933 return(err);
2934 }
2935 #endif /* !UNIV_HOTBACKUP */
2936
2937 /*******************************************************************//**
2938 Renames the memory cache structures of a single-table tablespace.
2939 @return TRUE if success */
2940 static
2941 ibool
fil_rename_tablespace_in_mem(fil_space_t * space,fil_node_t * node,const char * new_name,const char * new_path)2942 fil_rename_tablespace_in_mem(
2943 /*=========================*/
2944 fil_space_t* space, /*!< in: tablespace memory object */
2945 fil_node_t* node, /*!< in: file node of that tablespace */
2946 const char* new_name, /*!< in: new name */
2947 const char* new_path) /*!< in: new file path */
2948 {
2949 fil_space_t* space2;
2950 const char* old_name = space->name;
2951
2952 ut_ad(mutex_own(&fil_system->mutex));
2953
2954 space2 = fil_space_get_by_name(old_name);
2955 if (space != space2) {
2956 fputs("InnoDB: Error: cannot find ", stderr);
2957 ut_print_filename(stderr, old_name);
2958 fputs(" in tablespace memory cache\n", stderr);
2959
2960 return(FALSE);
2961 }
2962
2963 space2 = fil_space_get_by_name(new_name);
2964 if (space2 != NULL) {
2965 fputs("InnoDB: Error: ", stderr);
2966 ut_print_filename(stderr, new_name);
2967 fputs(" is already in tablespace memory cache\n", stderr);
2968
2969 return(FALSE);
2970 }
2971
2972 HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
2973 ut_fold_string(space->name), space);
2974 mem_free(space->name);
2975 mem_free(node->name);
2976
2977 space->name = mem_strdup(new_name);
2978 node->name = mem_strdup(new_path);
2979
2980 HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
2981 ut_fold_string(new_name), space);
2982 return(TRUE);
2983 }
2984
2985 /*******************************************************************//**
2986 Allocates a file name for a single-table tablespace. The string must be freed
2987 by caller with mem_free().
2988 @return own: file name */
2989 UNIV_INTERN
2990 char*
fil_make_ibd_name(const char * name,bool is_full_path)2991 fil_make_ibd_name(
2992 /*==============*/
2993 const char* name, /*!< in: table name or a dir path */
2994 bool is_full_path) /*!< in: TRUE if it is a dir path */
2995 {
2996 char* filename;
2997 ulint namelen = strlen(name);
2998 ulint dirlen = strlen(fil_path_to_mysql_datadir);
2999 ulint pathlen = dirlen + namelen + sizeof "/.ibd";
3000
3001 filename = static_cast<char*>(mem_alloc(pathlen));
3002
3003 if (is_full_path) {
3004 memcpy(filename, name, namelen);
3005 memcpy(filename + namelen, ".ibd", sizeof ".ibd");
3006 } else {
3007 ut_snprintf(filename, pathlen, "%s/%s.ibd",
3008 fil_path_to_mysql_datadir, name);
3009
3010 }
3011
3012 srv_normalize_path_for_win(filename);
3013
3014 return(filename);
3015 }
3016
3017 /*******************************************************************//**
3018 Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link).
3019 The string must be freed by caller with mem_free().
3020 @return own: file name */
3021 UNIV_INTERN
3022 char*
fil_make_isl_name(const char * name)3023 fil_make_isl_name(
3024 /*==============*/
3025 const char* name) /*!< in: table name */
3026 {
3027 char* filename;
3028 ulint namelen = strlen(name);
3029 ulint dirlen = strlen(fil_path_to_mysql_datadir);
3030 ulint pathlen = dirlen + namelen + sizeof "/.isl";
3031
3032 filename = static_cast<char*>(mem_alloc(pathlen));
3033
3034 ut_snprintf(filename, pathlen, "%s/%s.isl",
3035 fil_path_to_mysql_datadir, name);
3036
3037 srv_normalize_path_for_win(filename);
3038
3039 return(filename);
3040 }
3041
3042 /** Test if a tablespace file can be renamed to a new filepath by checking
3043 if that the old filepath exists and the new filepath does not exist.
3044 @param[in] space_id tablespace id
3045 @param[in] old_path old filepath
3046 @param[in] new_path new filepath
3047 @param[in] is_discarded whether the tablespace is discarded
3048 @return innodb error code */
3049 dberr_t
fil_rename_tablespace_check(ulint space_id,const char * old_path,const char * new_path,bool is_discarded)3050 fil_rename_tablespace_check(
3051 ulint space_id,
3052 const char* old_path,
3053 const char* new_path,
3054 bool is_discarded)
3055 {
3056 ulint exists = false;
3057 os_file_type_t ftype;
3058
3059 if (!is_discarded
3060 && os_file_status(old_path, &exists, &ftype)
3061 && !exists) {
3062 ib_logf(IB_LOG_LEVEL_ERROR,
3063 "Cannot rename '%s' to '%s' for space ID %lu"
3064 " because the source file does not exist.",
3065 old_path, new_path, space_id);
3066
3067 return(DB_TABLESPACE_NOT_FOUND);
3068 }
3069
3070 exists = false;
3071 if (!os_file_status(new_path, &exists, &ftype) || exists) {
3072 ib_logf(IB_LOG_LEVEL_ERROR,
3073 "Cannot rename '%s' to '%s' for space ID %lu"
3074 " because the target file exists."
3075 " Remove the target file and try again.",
3076 old_path, new_path, space_id);
3077
3078 return(DB_TABLESPACE_EXISTS);
3079 }
3080
3081 return(DB_SUCCESS);
3082 }
3083
3084 /*******************************************************************//**
3085 Renames a single-table tablespace. The tablespace must be cached in the
3086 tablespace memory cache.
3087 @return TRUE if success */
3088 UNIV_INTERN
3089 ibool
fil_rename_tablespace(const char * old_name_in,ulint id,const char * new_name,const char * new_path_in)3090 fil_rename_tablespace(
3091 /*==================*/
3092 const char* old_name_in, /*!< in: old table name in the
3093 standard databasename/tablename
3094 format of InnoDB, or NULL if we
3095 do the rename based on the space
3096 id only */
3097 ulint id, /*!< in: space id */
3098 const char* new_name, /*!< in: new table name in the
3099 standard databasename/tablename
3100 format of InnoDB */
3101 const char* new_path_in) /*!< in: new full datafile path
3102 if the tablespace is remotely
3103 located, or NULL if it is located
3104 in the normal data directory. */
3105 {
3106 ibool success;
3107 fil_space_t* space;
3108 fil_node_t* node;
3109 ulint count = 0;
3110 char* new_path;
3111 char* old_name;
3112 char* old_path;
3113 const char* not_given = "(name not specified)";
3114
3115 ut_a(id != 0);
3116
3117 retry:
3118 count++;
3119
3120 if (!(count % 1000)) {
3121 ut_print_timestamp(stderr);
3122 fputs(" InnoDB: Warning: problems renaming ", stderr);
3123 ut_print_filename(stderr,
3124 old_name_in ? old_name_in : not_given);
3125 fputs(" to ", stderr);
3126 ut_print_filename(stderr, new_name);
3127 fprintf(stderr, ", %lu iterations\n", (ulong) count);
3128 }
3129
3130 mutex_enter(&fil_system->mutex);
3131
3132 space = fil_space_get_by_id(id);
3133
3134 DBUG_EXECUTE_IF("fil_rename_tablespace_failure_1", space = NULL; );
3135
3136 if (space == NULL) {
3137 ib_logf(IB_LOG_LEVEL_ERROR,
3138 "Cannot find space id %lu in the tablespace "
3139 "memory cache, though the table '%s' in a "
3140 "rename operation should have that id.",
3141 (ulong) id, old_name_in ? old_name_in : not_given);
3142 mutex_exit(&fil_system->mutex);
3143
3144 return(FALSE);
3145 }
3146
3147 if (count > 25000) {
3148 space->stop_ios = FALSE;
3149 mutex_exit(&fil_system->mutex);
3150
3151 return(FALSE);
3152 }
3153
3154 /* We temporarily close the .ibd file because we do not trust that
3155 operating systems can rename an open file. For the closing we have to
3156 wait until there are no pending i/o's or flushes on the file. */
3157
3158 space->stop_ios = TRUE;
3159
3160 /* The following code must change when InnoDB supports
3161 multiple datafiles per tablespace. */
3162 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
3163 node = UT_LIST_GET_FIRST(space->chain);
3164
3165 if (node->n_pending > 0
3166 || node->n_pending_flushes > 0
3167 || node->being_extended) {
3168 /* There are pending i/o's or flushes or the file is
3169 currently being extended, sleep for a while and
3170 retry */
3171
3172 mutex_exit(&fil_system->mutex);
3173
3174 os_thread_sleep(20000);
3175
3176 goto retry;
3177
3178 } else if (node->modification_counter > node->flush_counter) {
3179 /* Flush the space */
3180
3181 mutex_exit(&fil_system->mutex);
3182
3183 os_thread_sleep(20000);
3184
3185 fil_flush(id);
3186
3187 goto retry;
3188
3189 } else if (node->open) {
3190 /* Close the file */
3191
3192 fil_node_close_file(node, fil_system);
3193 }
3194
3195 /* Check that the old name in the space is right */
3196
3197 if (old_name_in) {
3198 old_name = mem_strdup(old_name_in);
3199 ut_a(strcmp(space->name, old_name) == 0);
3200 } else {
3201 old_name = mem_strdup(space->name);
3202 }
3203 old_path = mem_strdup(node->name);
3204
3205 /* Rename the tablespace and the node in the memory cache */
3206 new_path = new_path_in ? mem_strdup(new_path_in)
3207 : fil_make_ibd_name(new_name, false);
3208
3209 success = fil_rename_tablespace_in_mem(
3210 space, node, new_name, new_path);
3211
3212 if (success) {
3213
3214 DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
3215 goto skip_second_rename; );
3216
3217 success = os_file_rename(
3218 innodb_file_data_key, old_path, new_path);
3219
3220 DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
3221 skip_second_rename:
3222 success = FALSE; );
3223
3224 if (!success) {
3225 /* We have to revert the changes we made
3226 to the tablespace memory cache */
3227
3228 ut_a(fil_rename_tablespace_in_mem(
3229 space, node, old_name, old_path));
3230 }
3231 }
3232
3233 space->stop_ios = FALSE;
3234
3235 mutex_exit(&fil_system->mutex);
3236
3237 #ifndef UNIV_HOTBACKUP
3238 if (success && !recv_recovery_on) {
3239 mtr_t mtr;
3240
3241 mtr_start(&mtr);
3242
3243 fil_op_write_log(MLOG_FILE_RENAME, id, 0, 0, old_name, new_name,
3244 &mtr);
3245 mtr_commit(&mtr);
3246 }
3247 #endif /* !UNIV_HOTBACKUP */
3248
3249 mem_free(new_path);
3250 mem_free(old_path);
3251 mem_free(old_name);
3252
3253 return(success);
3254 }
3255
3256 /*******************************************************************//**
3257 Creates a new InnoDB Symbolic Link (ISL) file. It is always created
3258 under the 'datadir' of MySQL. The datadir is the directory of a
3259 running mysqld program. We can refer to it by simply using the path '.'.
3260 @return DB_SUCCESS or error code */
3261 UNIV_INTERN
3262 dberr_t
fil_create_link_file(const char * tablename,const char * filepath)3263 fil_create_link_file(
3264 /*=================*/
3265 const char* tablename, /*!< in: tablename */
3266 const char* filepath) /*!< in: pathname of tablespace */
3267 {
3268 dberr_t err = DB_SUCCESS;
3269 char* link_filepath;
3270 char* prev_filepath = fil_read_link_file(tablename);
3271
3272 ut_ad(!srv_read_only_mode);
3273
3274 if (prev_filepath) {
3275 /* Truncate will call this with an existing
3276 link file which contains the same filepath. */
3277 if (0 == strcmp(prev_filepath, filepath)) {
3278 mem_free(prev_filepath);
3279 return(DB_SUCCESS);
3280 }
3281 mem_free(prev_filepath);
3282 }
3283
3284 link_filepath = fil_make_isl_name(tablename);
3285
3286 /** Check if the file already exists. */
3287 FILE* file = NULL;
3288 ibool exists;
3289 os_file_type_t ftype;
3290
3291 bool success = os_file_status(link_filepath, &exists, &ftype);
3292
3293 ulint error = 0;
3294 if (success && !exists) {
3295 file = fopen(link_filepath, "w");
3296 if (file == NULL) {
3297 /* This call will print its own error message */
3298 error = os_file_get_last_error(true);
3299 }
3300 } else {
3301 error = OS_FILE_ALREADY_EXISTS;
3302 }
3303 if (error != 0) {
3304
3305 ut_print_timestamp(stderr);
3306 fputs(" InnoDB: Cannot create file ", stderr);
3307 ut_print_filename(stderr, link_filepath);
3308 fputs(".\n", stderr);
3309
3310 if (error == OS_FILE_ALREADY_EXISTS) {
3311 fputs("InnoDB: The link file: ", stderr);
3312 ut_print_filename(stderr, filepath);
3313 fputs(" already exists.\n", stderr);
3314 err = DB_TABLESPACE_EXISTS;
3315
3316 } else if (error == OS_FILE_DISK_FULL) {
3317 err = DB_OUT_OF_FILE_SPACE;
3318
3319 } else {
3320 err = DB_ERROR;
3321 }
3322
3323 /* file is not open, no need to close it. */
3324 mem_free(link_filepath);
3325 return(err);
3326 }
3327
3328 ulint rbytes = fwrite(filepath, 1, strlen(filepath), file);
3329 if (rbytes != strlen(filepath)) {
3330 os_file_get_last_error(true);
3331 ib_logf(IB_LOG_LEVEL_ERROR,
3332 "cannot write link file "
3333 "%s",filepath);
3334 err = DB_ERROR;
3335 }
3336
3337 /* Close the file, we only need it at startup */
3338 fclose(file);
3339
3340 mem_free(link_filepath);
3341
3342 return(err);
3343 }
3344
3345 /*******************************************************************//**
3346 Deletes an InnoDB Symbolic Link (ISL) file. */
3347 UNIV_INTERN
3348 void
fil_delete_link_file(const char * tablename)3349 fil_delete_link_file(
3350 /*=================*/
3351 const char* tablename) /*!< in: name of table */
3352 {
3353 char* link_filepath = fil_make_isl_name(tablename);
3354
3355 os_file_delete_if_exists(innodb_file_data_key, link_filepath);
3356
3357 mem_free(link_filepath);
3358 }
3359
3360 /*******************************************************************//**
3361 Reads an InnoDB Symbolic Link (ISL) file.
3362 It is always created under the 'datadir' of MySQL. The name is of the
3363 form {databasename}/{tablename}. and the isl file is expected to be in a
3364 '{databasename}' directory called '{tablename}.isl'. The caller must free
3365 the memory of the null-terminated path returned if it is not null.
3366 @return own: filepath found in link file, NULL if not found. */
3367 UNIV_INTERN
3368 char*
fil_read_link_file(const char * name)3369 fil_read_link_file(
3370 /*===============*/
3371 const char* name) /*!< in: tablespace name */
3372 {
3373 char* filepath = NULL;
3374 char* link_filepath;
3375 FILE* file = NULL;
3376
3377 /* The .isl file is in the 'normal' tablespace location. */
3378 link_filepath = fil_make_isl_name(name);
3379
3380 file = fopen(link_filepath, "r+b");
3381
3382 mem_free(link_filepath);
3383
3384 if (file) {
3385 filepath = static_cast<char*>(mem_alloc(OS_FILE_MAX_PATH));
3386
3387 os_file_read_string(file, filepath, OS_FILE_MAX_PATH);
3388 fclose(file);
3389
3390 if (strlen(filepath)) {
3391 /* Trim whitespace from end of filepath */
3392 ulint lastch = strlen(filepath) - 1;
3393 while (lastch > 4 && filepath[lastch] <= 0x20) {
3394 filepath[lastch--] = 0x00;
3395 }
3396 srv_normalize_path_for_win(filepath);
3397 }
3398 }
3399
3400 return(filepath);
3401 }
3402
3403 /*******************************************************************//**
3404 Opens a handle to the file linked to in an InnoDB Symbolic Link file.
3405 @return TRUE if remote linked tablespace file is found and opened. */
3406 UNIV_INTERN
3407 ibool
fil_open_linked_file(const char * tablename,char ** remote_filepath,pfs_os_file_t * remote_file)3408 fil_open_linked_file(
3409 /*===============*/
3410 const char* tablename, /*!< in: database/tablename */
3411 char** remote_filepath,/*!< out: remote filepath */
3412 pfs_os_file_t* remote_file) /*!< out: remote file handle */
3413
3414 {
3415 ibool success;
3416
3417 *remote_filepath = fil_read_link_file(tablename);
3418 if (*remote_filepath == NULL) {
3419 return(FALSE);
3420 }
3421
3422 /* The filepath provided is different from what was
3423 found in the link file. */
3424 *remote_file = os_file_create_simple_no_error_handling(
3425 innodb_file_data_key, *remote_filepath,
3426 OS_FILE_OPEN, OS_FILE_READ_ONLY,
3427 &success);
3428
3429 if (!success) {
3430 char* link_filepath = fil_make_isl_name(tablename);
3431
3432 /* The following call prints an error message */
3433 os_file_get_last_error(true);
3434
3435 ib_logf(IB_LOG_LEVEL_ERROR,
3436 "A link file was found named '%s' "
3437 "but the linked tablespace '%s' "
3438 "could not be opened.",
3439 link_filepath, *remote_filepath);
3440
3441 mem_free(link_filepath);
3442 mem_free(*remote_filepath);
3443 *remote_filepath = NULL;
3444 }
3445
3446 return(success);
3447 }
3448
3449 /*******************************************************************//**
3450 Creates a new single-table tablespace to a database directory of MySQL.
3451 Database directories are under the 'datadir' of MySQL. The datadir is the
3452 directory of a running mysqld program. We can refer to it by simply the
3453 path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
3454 dir of the mysqld server.
3455
3456 @return DB_SUCCESS or error code */
3457 UNIV_INTERN
3458 dberr_t
fil_create_new_single_table_tablespace(ulint space_id,const char * tablename,const char * dir_path,ulint flags,ulint flags2,ulint size)3459 fil_create_new_single_table_tablespace(
3460 /*===================================*/
3461 ulint space_id, /*!< in: space id */
3462 const char* tablename, /*!< in: the table name in the usual
3463 databasename/tablename format
3464 of InnoDB */
3465 const char* dir_path, /*!< in: NULL or a dir path */
3466 ulint flags, /*!< in: tablespace flags */
3467 ulint flags2, /*!< in: table flags2 */
3468 ulint size) /*!< in: the initial size of the
3469 tablespace file in pages,
3470 must be >= FIL_IBD_FILE_INITIAL_SIZE */
3471 {
3472 pfs_os_file_t file;
3473
3474 ibool ret;
3475 dberr_t err;
3476 byte* buf2;
3477 byte* page;
3478 char* path;
3479 ibool success;
3480 /* TRUE if a table is created with CREATE TEMPORARY TABLE */
3481 bool is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
3482 bool has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
3483
3484 ut_a(space_id > 0);
3485 ut_ad(!srv_read_only_mode);
3486 ut_a(space_id < SRV_LOG_SPACE_FIRST_ID);
3487 ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
3488 ut_a(fsp_flags_is_valid(flags));
3489
3490 if (is_temp) {
3491 /* Temporary table filepath */
3492 ut_ad(dir_path);
3493 path = fil_make_ibd_name(dir_path, true);
3494 } else if (has_data_dir) {
3495 ut_ad(dir_path);
3496 path = os_file_make_remote_pathname(dir_path, tablename, "ibd");
3497
3498 /* Since this tablespace file will be created in a
3499 remote directory, let's create the subdirectories
3500 in the path, if they are not there already. */
3501 success = os_file_create_subdirs_if_needed(path);
3502 if (!success) {
3503 err = DB_ERROR;
3504 goto error_exit_3;
3505 }
3506 } else {
3507 path = fil_make_ibd_name(tablename, false);
3508 }
3509
3510 file = os_file_create(
3511 innodb_file_data_key, path,
3512 OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
3513 OS_FILE_NORMAL,
3514 OS_DATA_FILE,
3515 &ret);
3516
3517 if (ret == FALSE) {
3518 /* The following call will print an error message */
3519 ulint error = os_file_get_last_error(true);
3520
3521 ib_logf(IB_LOG_LEVEL_ERROR,
3522 "Cannot create file '%s'\n", path);
3523
3524 if (error == OS_FILE_ALREADY_EXISTS) {
3525 ib_logf(IB_LOG_LEVEL_ERROR,
3526 "The file '%s' already exists though the "
3527 "corresponding table did not exist "
3528 "in the InnoDB data dictionary. "
3529 "Have you moved InnoDB .ibd files "
3530 "around without using the SQL commands "
3531 "DISCARD TABLESPACE and IMPORT TABLESPACE, "
3532 "or did mysqld crash in the middle of "
3533 "CREATE TABLE? "
3534 "You can resolve the problem by removing "
3535 "the file '%s' under the 'datadir' of MySQL.",
3536 path, path);
3537
3538 err = DB_TABLESPACE_EXISTS;
3539 goto error_exit_3;
3540 }
3541
3542 if (error == OS_FILE_DISK_FULL) {
3543 err = DB_OUT_OF_FILE_SPACE;
3544 goto error_exit_3;
3545 }
3546
3547 err = DB_ERROR;
3548 goto error_exit_3;
3549 }
3550
3551 ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE);
3552
3553 if (!ret) {
3554 err = DB_OUT_OF_FILE_SPACE;
3555 goto error_exit_2;
3556 }
3557
3558 /* printf("Creating tablespace %s id %lu\n", path, space_id); */
3559
3560 /* We have to write the space id to the file immediately and flush the
3561 file to disk. This is because in crash recovery we must be aware what
3562 tablespaces exist and what are their space id's, so that we can apply
3563 the log records to the right file. It may take quite a while until
3564 buffer pool flush algorithms write anything to the file and flush it to
3565 disk. If we would not write here anything, the file would be filled
3566 with zeros from the call of os_file_set_size(), until a buffer pool
3567 flush would write to it. */
3568
3569 buf2 = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE));
3570 /* Align the memory for file i/o if we might have O_DIRECT set */
3571 page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
3572
3573 memset(page, '\0', UNIV_PAGE_SIZE);
3574
3575 /* Add the UNIV_PAGE_SIZE to the table flags and write them to the
3576 tablespace header. */
3577 flags = fsp_flags_set_page_size(flags, UNIV_PAGE_SIZE);
3578 fsp_header_init_fields(page, space_id, flags);
3579 mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
3580
3581 if (!(fsp_flags_is_compressed(flags))) {
3582 buf_flush_init_for_writing(page, NULL, 0);
3583 ret = os_file_write(path, file, page, 0, UNIV_PAGE_SIZE);
3584 } else {
3585 page_zip_des_t page_zip;
3586 ulint zip_size;
3587
3588 zip_size = fsp_flags_get_zip_size(flags);
3589
3590 page_zip_set_size(&page_zip, zip_size);
3591 page_zip.data = page + UNIV_PAGE_SIZE;
3592 #ifdef UNIV_DEBUG
3593 page_zip.m_start =
3594 #endif /* UNIV_DEBUG */
3595 page_zip.m_end = page_zip.m_nonempty =
3596 page_zip.n_blobs = 0;
3597 buf_flush_init_for_writing(page, &page_zip, 0);
3598 ret = os_file_write(path, file, page_zip.data, 0, zip_size);
3599 }
3600
3601 ut_free(buf2);
3602
3603 if (!ret) {
3604 ib_logf(IB_LOG_LEVEL_ERROR,
3605 "Could not write the first page to tablespace "
3606 "'%s'", path);
3607
3608 err = DB_ERROR;
3609 goto error_exit_2;
3610 }
3611
3612 ret = os_file_flush(file);
3613
3614 if (!ret) {
3615 ib_logf(IB_LOG_LEVEL_ERROR,
3616 "File flush of tablespace '%s' failed", path);
3617 err = DB_ERROR;
3618 goto error_exit_2;
3619 }
3620
3621 if (has_data_dir) {
3622 /* Now that the IBD file is created, make the ISL file. */
3623 err = fil_create_link_file(tablename, path);
3624 if (err != DB_SUCCESS) {
3625 goto error_exit_2;
3626 }
3627 }
3628
3629 success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE);
3630 if (!success || !fil_node_create(path, size, space_id, FALSE)) {
3631 err = DB_ERROR;
3632 goto error_exit_1;
3633 }
3634
3635 #ifndef UNIV_HOTBACKUP
3636 {
3637 mtr_t mtr;
3638 ulint mlog_file_flag = 0;
3639
3640 if (is_temp) {
3641 mlog_file_flag |= MLOG_FILE_FLAG_TEMP;
3642 }
3643
3644 mtr_start(&mtr);
3645
3646 fil_op_write_log(flags
3647 ? MLOG_FILE_CREATE2
3648 : MLOG_FILE_CREATE,
3649 space_id, mlog_file_flag, flags,
3650 tablename, NULL, &mtr);
3651
3652 mtr_commit(&mtr);
3653 }
3654 #endif
3655 err = DB_SUCCESS;
3656
3657 /* Error code is set. Cleanup the various variables used.
3658 These labels reflect the order in which variables are assigned or
3659 actions are done. */
3660 error_exit_1:
3661 if (has_data_dir && err != DB_SUCCESS) {
3662 fil_delete_link_file(tablename);
3663 }
3664 error_exit_2:
3665 os_file_close(file);
3666 if (err != DB_SUCCESS) {
3667 os_file_delete(innodb_file_data_key, path);
3668 }
3669 error_exit_3:
3670 mem_free(path);
3671
3672 return(err);
3673 }
3674
3675 #ifndef UNIV_HOTBACKUP
3676 /********************************************************************//**
3677 Report information about a bad tablespace. */
3678 static
3679 void
fil_report_bad_tablespace(const char * filepath,const char * check_msg,ulint found_id,ulint found_flags,ulint expected_id,ulint expected_flags)3680 fil_report_bad_tablespace(
3681 /*======================*/
3682 const char* filepath, /*!< in: filepath */
3683 const char* check_msg, /*!< in: fil_check_first_page() */
3684 ulint found_id, /*!< in: found space ID */
3685 ulint found_flags, /*!< in: found flags */
3686 ulint expected_id, /*!< in: expected space id */
3687 ulint expected_flags) /*!< in: expected flags */
3688 {
3689 if (check_msg) {
3690 ib_logf(IB_LOG_LEVEL_ERROR,
3691 "Error %s in file '%s',"
3692 "tablespace id=%lu, flags=%lu. "
3693 "Please refer to "
3694 REFMAN "innodb-troubleshooting-datadict.html "
3695 "for how to resolve the issue.",
3696 check_msg, filepath,
3697 (ulong) expected_id, (ulong) expected_flags);
3698 return;
3699 }
3700
3701 ib_logf(IB_LOG_LEVEL_ERROR,
3702 "In file '%s', tablespace id and flags are %lu and %lu, "
3703 "but in the InnoDB data dictionary they are %lu and %lu. "
3704 "Have you moved InnoDB .ibd files around without using the "
3705 "commands DISCARD TABLESPACE and IMPORT TABLESPACE? "
3706 "Please refer to "
3707 REFMAN "innodb-troubleshooting-datadict.html "
3708 "for how to resolve the issue.",
3709 filepath, (ulong) found_id, (ulong) found_flags,
3710 (ulong) expected_id, (ulong) expected_flags);
3711 }
3712
3713 /********************************************************************//**
3714 Tries to open a single-table tablespace and optionally checks that the
3715 space id in it is correct. If this does not succeed, print an error message
3716 to the .err log. This function is used to open a tablespace when we start
3717 mysqld after the dictionary has been booted, and also in IMPORT TABLESPACE.
3718
3719 NOTE that we assume this operation is used either at the database startup
3720 or under the protection of the dictionary mutex, so that two users cannot
3721 race here. This operation does not leave the file associated with the
3722 tablespace open, but closes it after we have looked at the space id in it.
3723
3724 If the validate boolean is set, we read the first page of the file and
3725 check that the space id in the file is what we expect. We assume that
3726 this function runs much faster if no check is made, since accessing the
3727 file inode probably is much faster (the OS caches them) than accessing
3728 the first page of the file. This boolean may be initially FALSE, but if
3729 a remote tablespace is found it will be changed to true.
3730
3731 If the fix_dict boolean is set, then it is safe to use an internal SQL
3732 statement to update the dictionary tables if they are incorrect.
3733
3734 @return DB_SUCCESS or error code */
3735 UNIV_INTERN
3736 dberr_t
fil_open_single_table_tablespace(bool validate,bool fix_dict,ulint id,ulint flags,const char * tablename,const char * path_in)3737 fil_open_single_table_tablespace(
3738 /*=============================*/
3739 bool validate, /*!< in: Do we validate tablespace? */
3740 bool fix_dict, /*!< in: Can we fix the dictionary? */
3741 ulint id, /*!< in: space id */
3742 ulint flags, /*!< in: tablespace flags */
3743 const char* tablename, /*!< in: table name in the
3744 databasename/tablename format */
3745 const char* path_in) /*!< in: tablespace filepath */
3746 {
3747 dberr_t err = DB_SUCCESS;
3748 bool dict_filepath_same_as_default = false;
3749 bool link_file_found = false;
3750 bool link_file_is_bad = false;
3751 fsp_open_info def;
3752 fsp_open_info dict;
3753 fsp_open_info remote;
3754 ulint tablespaces_found = 0;
3755 ulint valid_tablespaces_found = 0;
3756
3757 #ifdef UNIV_SYNC_DEBUG
3758 ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
3759 #endif /* UNIV_SYNC_DEBUG */
3760 ut_ad(!fix_dict || mutex_own(&(dict_sys->mutex)));
3761
3762 if (!fsp_flags_is_valid(flags)) {
3763 return(DB_CORRUPTION);
3764 }
3765
3766 /* If the tablespace was relocated, we do not
3767 compare the DATA_DIR flag */
3768 ulint mod_flags = flags & ~FSP_FLAGS_MASK_DATA_DIR;
3769
3770 memset(&def, 0, sizeof(def));
3771 memset(&dict, 0, sizeof(dict));
3772 memset(&remote, 0, sizeof(remote));
3773
3774 /* Discover the correct filepath. We will always look for an ibd
3775 in the default location. If it is remote, it should not be here. */
3776 def.filepath = fil_make_ibd_name(tablename, false);
3777
3778 /* The path_in was read from SYS_DATAFILES. */
3779 if (path_in) {
3780 if (strcmp(def.filepath, path_in)) {
3781 dict.filepath = mem_strdup(path_in);
3782 /* possibility of multiple files. */
3783 validate = true;
3784 } else {
3785 dict_filepath_same_as_default = true;
3786 }
3787 }
3788
3789 link_file_found = fil_open_linked_file(
3790 tablename, &remote.filepath, &remote.file);
3791 remote.success = link_file_found;
3792 if (remote.success) {
3793 /* possibility of multiple files. */
3794 validate = true;
3795 tablespaces_found++;
3796
3797 /* A link file was found. MySQL does not allow a DATA
3798 DIRECTORY to be be the same as the default filepath. */
3799 ut_a(strcmp(def.filepath, remote.filepath));
3800
3801 /* If there was a filepath found in SYS_DATAFILES,
3802 we hope it was the same as this remote.filepath found
3803 in the ISL file. */
3804 if (dict.filepath
3805 && (0 == strcmp(dict.filepath, remote.filepath))) {
3806 remote.success = FALSE;
3807 os_file_close(remote.file);
3808 mem_free(remote.filepath);
3809 remote.filepath = NULL;
3810 tablespaces_found--;
3811 }
3812 }
3813
3814 /* Attempt to open the tablespace at other possible filepaths. */
3815 if (dict.filepath) {
3816 dict.file = os_file_create_simple_no_error_handling(
3817 innodb_file_data_key, dict.filepath, OS_FILE_OPEN,
3818 OS_FILE_READ_ONLY, &dict.success);
3819 if (dict.success) {
3820 /* possibility of multiple files. */
3821 validate = true;
3822 tablespaces_found++;
3823 }
3824 }
3825
3826 /* Always look for a file at the default location. */
3827 ut_a(def.filepath);
3828 def.file = os_file_create_simple_no_error_handling(
3829 innodb_file_data_key, def.filepath, OS_FILE_OPEN,
3830 OS_FILE_READ_ONLY, &def.success);
3831 if (def.success) {
3832 tablespaces_found++;
3833 }
3834
3835 /* We have now checked all possible tablespace locations and
3836 have a count of how many we found. If things are normal, we
3837 only found 1. */
3838 if (!validate && tablespaces_found == 1) {
3839 goto skip_validate;
3840 }
3841
3842 /* Read the first page of the datadir tablespace, if found. */
3843 if (def.success) {
3844 def.check_msg = fil_read_first_page(
3845 def.file, FALSE, &def.flags, &def.id,
3846 &def.lsn, &def.lsn);
3847 def.valid = !def.check_msg;
3848
3849 /* Validate this single-table-tablespace with SYS_TABLES,
3850 but do not compare the DATA_DIR flag, in case the
3851 tablespace was relocated. */
3852 if (def.valid && def.id == id
3853 && (def.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) {
3854 valid_tablespaces_found++;
3855 } else {
3856 def.valid = false;
3857 /* Do not use this tablespace. */
3858 fil_report_bad_tablespace(
3859 def.filepath, def.check_msg, def.id,
3860 def.flags, id, flags);
3861 }
3862 }
3863
3864 /* Read the first page of the remote tablespace */
3865 if (remote.success) {
3866 remote.check_msg = fil_read_first_page(
3867 remote.file, FALSE, &remote.flags, &remote.id,
3868 &remote.lsn, &remote.lsn);
3869 remote.valid = !remote.check_msg;
3870
3871 /* Validate this single-table-tablespace with SYS_TABLES,
3872 but do not compare the DATA_DIR flag, in case the
3873 tablespace was relocated. */
3874 if (remote.valid && remote.id == id
3875 && (remote.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) {
3876 valid_tablespaces_found++;
3877 } else {
3878 remote.valid = false;
3879 /* Do not use this linked tablespace. */
3880 fil_report_bad_tablespace(
3881 remote.filepath, remote.check_msg, remote.id,
3882 remote.flags, id, flags);
3883 link_file_is_bad = true;
3884 }
3885 }
3886
3887 /* Read the first page of the datadir tablespace, if found. */
3888 if (dict.success) {
3889 dict.check_msg = fil_read_first_page(
3890 dict.file, FALSE, &dict.flags, &dict.id,
3891 &dict.lsn, &dict.lsn);
3892 dict.valid = !dict.check_msg;
3893
3894 /* Validate this single-table-tablespace with SYS_TABLES,
3895 but do not compare the DATA_DIR flag, in case the
3896 tablespace was relocated. */
3897 if (dict.valid && dict.id == id
3898 && (dict.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) {
3899 valid_tablespaces_found++;
3900 } else {
3901 dict.valid = false;
3902 /* Do not use this tablespace. */
3903 fil_report_bad_tablespace(
3904 dict.filepath, dict.check_msg, dict.id,
3905 dict.flags, id, flags);
3906 }
3907 }
3908
3909 /* Make sense of these three possible locations.
3910 First, bail out if no tablespace files were found. */
3911 if (valid_tablespaces_found == 0) {
3912 /* The following call prints an error message */
3913 os_file_get_last_error(true);
3914
3915 ib_logf(IB_LOG_LEVEL_ERROR,
3916 "Could not find a valid tablespace file for '%s'. "
3917 "See " REFMAN "innodb-troubleshooting-datadict.html "
3918 "for how to resolve the issue.",
3919 tablename);
3920
3921 err = DB_CORRUPTION;
3922
3923 goto cleanup_and_exit;
3924 }
3925
3926 /* Do not open any tablespaces if more than one tablespace with
3927 the correct space ID and flags were found. */
3928 if (tablespaces_found > 1) {
3929 ib_logf(IB_LOG_LEVEL_ERROR,
3930 "A tablespace for %s has been found in "
3931 "multiple places;", tablename);
3932 if (def.success) {
3933 ib_logf(IB_LOG_LEVEL_ERROR,
3934 "Default location; %s, LSN=" LSN_PF
3935 ", Space ID=%lu, Flags=%lu",
3936 def.filepath, def.lsn,
3937 (ulong) def.id, (ulong) def.flags);
3938 }
3939 if (remote.success) {
3940 ib_logf(IB_LOG_LEVEL_ERROR,
3941 "Remote location; %s, LSN=" LSN_PF
3942 ", Space ID=%lu, Flags=%lu",
3943 remote.filepath, remote.lsn,
3944 (ulong) remote.id, (ulong) remote.flags);
3945 }
3946 if (dict.success) {
3947 ib_logf(IB_LOG_LEVEL_ERROR,
3948 "Dictionary location; %s, LSN=" LSN_PF
3949 ", Space ID=%lu, Flags=%lu",
3950 dict.filepath, dict.lsn,
3951 (ulong) dict.id, (ulong) dict.flags);
3952 }
3953
3954 /* Force-recovery will allow some tablespaces to be
3955 skipped by REDO if there was more than one file found.
3956 Unlike during the REDO phase of recovery, we now know
3957 if the tablespace is valid according to the dictionary,
3958 which was not available then. So if we did not force
3959 recovery and there is only one good tablespace, ignore
3960 any bad tablespaces. */
3961 if (valid_tablespaces_found > 1 || srv_force_recovery > 0) {
3962 ib_logf(IB_LOG_LEVEL_ERROR,
3963 "Will not open the tablespace for '%s'",
3964 tablename);
3965
3966 if (def.success != def.valid
3967 || dict.success != dict.valid
3968 || remote.success != remote.valid) {
3969 err = DB_CORRUPTION;
3970 } else {
3971 err = DB_ERROR;
3972 }
3973 goto cleanup_and_exit;
3974 }
3975
3976 /* There is only one valid tablespace found and we did
3977 not use srv_force_recovery during REDO. Use this one
3978 tablespace and clean up invalid tablespace pointers */
3979 if (def.success && !def.valid) {
3980 def.success = false;
3981 os_file_close(def.file);
3982 tablespaces_found--;
3983 }
3984 if (dict.success && !dict.valid) {
3985 dict.success = false;
3986 os_file_close(dict.file);
3987 /* Leave dict.filepath so that SYS_DATAFILES
3988 can be corrected below. */
3989 tablespaces_found--;
3990 }
3991 if (remote.success && !remote.valid) {
3992 remote.success = false;
3993 os_file_close(remote.file);
3994 mem_free(remote.filepath);
3995 remote.filepath = NULL;
3996 tablespaces_found--;
3997 }
3998 }
3999
4000 /* At this point, there should be only one filepath. */
4001 ut_a(tablespaces_found == 1);
4002 ut_a(valid_tablespaces_found == 1);
4003
4004 /* Only fix the dictionary at startup when there is only one thread.
4005 Calls to dict_load_table() can be done while holding other latches. */
4006 if (!fix_dict) {
4007 goto skip_validate;
4008 }
4009
4010 /* We may need to change what is stored in SYS_DATAFILES or
4011 SYS_TABLESPACES or adjust the link file.
4012 Since a failure to update SYS_TABLESPACES or SYS_DATAFILES does
4013 not prevent opening and using the single_table_tablespace either
4014 this time or the next, we do not check the return code or fail
4015 to open the tablespace. But dict_update_filepath() will issue a
4016 warning to the log. */
4017 if (dict.filepath) {
4018 if (remote.success) {
4019 dict_update_filepath(id, remote.filepath);
4020 } else if (def.success) {
4021 dict_update_filepath(id, def.filepath);
4022 if (link_file_is_bad) {
4023 fil_delete_link_file(tablename);
4024 }
4025 } else if (!link_file_found || link_file_is_bad) {
4026 ut_ad(dict.success);
4027 /* Fix the link file if we got our filepath
4028 from the dictionary but a link file did not
4029 exist or it did not point to a valid file. */
4030 fil_delete_link_file(tablename);
4031 fil_create_link_file(tablename, dict.filepath);
4032 }
4033
4034 } else if (remote.success && dict_filepath_same_as_default) {
4035 dict_update_filepath(id, remote.filepath);
4036
4037 } else if (remote.success && path_in == NULL) {
4038 /* SYS_DATAFILES record for this space ID was not found. */
4039 dict_insert_tablespace_and_filepath(
4040 id, tablename, remote.filepath, flags);
4041 }
4042
4043 skip_validate:
4044 if (err != DB_SUCCESS) {
4045 ; // Don't load the tablespace into the cache
4046 } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE)) {
4047 err = DB_ERROR;
4048 } else {
4049 /* We do not measure the size of the file, that is why
4050 we pass the 0 below */
4051
4052 if (!fil_node_create(remote.success ? remote.filepath :
4053 dict.success ? dict.filepath :
4054 def.filepath, 0, id, FALSE)) {
4055 err = DB_ERROR;
4056 }
4057 }
4058
4059 cleanup_and_exit:
4060 if (remote.success) {
4061 os_file_close(remote.file);
4062 }
4063 if (remote.filepath) {
4064 mem_free(remote.filepath);
4065 }
4066 if (dict.success) {
4067 os_file_close(dict.file);
4068 }
4069 if (dict.filepath) {
4070 mem_free(dict.filepath);
4071 }
4072 if (def.success) {
4073 os_file_close(def.file);
4074 }
4075 mem_free(def.filepath);
4076
4077 return(err);
4078 }
4079 #endif /* !UNIV_HOTBACKUP */
4080
4081 #ifdef UNIV_HOTBACKUP
4082 /*******************************************************************//**
4083 Allocates a file name for an old version of a single-table tablespace.
4084 The string must be freed by caller with mem_free()!
4085 @return own: file name */
4086 static
4087 char*
fil_make_ibbackup_old_name(const char * name)4088 fil_make_ibbackup_old_name(
4089 /*=======================*/
4090 const char* name) /*!< in: original file name */
4091 {
4092 static const char suffix[] = "_ibbackup_old_vers_";
4093 char* path;
4094 ulint len = strlen(name);
4095
4096 path = static_cast<char*>(mem_alloc(len + (15 + sizeof suffix)));
4097
4098 memcpy(path, name, len);
4099 memcpy(path + len, suffix, (sizeof suffix) - 1);
4100 ut_sprintf_timestamp_without_extra_chars(
4101 path + len + ((sizeof suffix) - 1));
4102 return(path);
4103 }
4104 #endif /* UNIV_HOTBACKUP */
4105
4106
4107 /*******************************************************************//**
4108 Determine the space id of the given file descriptor by reading a few
4109 pages from the beginning of the .ibd file.
4110 @return true if space id was successfully identified, or false. */
4111 static
4112 bool
fil_user_tablespace_find_space_id(fsp_open_info * fsp)4113 fil_user_tablespace_find_space_id(
4114 /*==============================*/
4115 fsp_open_info* fsp) /* in/out: contains file descriptor, which is
4116 used as input. contains space_id, which is
4117 the output */
4118 {
4119 bool st;
4120 os_offset_t file_size;
4121
4122 file_size = os_file_get_size(fsp->file);
4123
4124 if (file_size == (os_offset_t) -1) {
4125 ib_logf(IB_LOG_LEVEL_ERROR, "Could not get file size: %s",
4126 fsp->filepath);
4127 return(false);
4128 }
4129
4130 /* Assuming a page size, read the space_id from each page and store it
4131 in a map. Find out which space_id is agreed on by majority of the
4132 pages. Choose that space_id. */
4133 for (ulint page_size = UNIV_ZIP_SIZE_MIN;
4134 page_size <= UNIV_PAGE_SIZE_MAX; page_size <<= 1) {
4135
4136 /* map[space_id] = count of pages */
4137 std::map<ulint, ulint> verify;
4138
4139 ulint page_count = 64;
4140 ulint valid_pages = 0;
4141
4142 /* Adjust the number of pages to analyze based on file size */
4143 while ((page_count * page_size) > file_size) {
4144 --page_count;
4145 }
4146
4147 ib_logf(IB_LOG_LEVEL_INFO, "Page size:%lu Pages to analyze:"
4148 "%lu", page_size, page_count);
4149
4150 byte* buf = static_cast<byte*>(ut_malloc(2*page_size));
4151 byte* page = static_cast<byte*>(ut_align(buf, page_size));
4152
4153 for (ulint j = 0; j < page_count; ++j) {
4154
4155 st = os_file_read(fsp->file, page, (j* page_size), page_size);
4156
4157 if (!st) {
4158 ib_logf(IB_LOG_LEVEL_INFO,
4159 "READ FAIL: page_no:%lu", j);
4160 continue;
4161 }
4162
4163 bool uncompressed_ok = false;
4164
4165 /* For uncompressed pages, the page size must be equal
4166 to UNIV_PAGE_SIZE. */
4167 if (page_size == UNIV_PAGE_SIZE) {
4168 uncompressed_ok = !buf_page_is_corrupted(
4169 false, page, 0);
4170 }
4171
4172 bool compressed_ok = !buf_page_is_corrupted(
4173 false, page, page_size);
4174
4175 if (uncompressed_ok || compressed_ok) {
4176
4177 ulint space_id = mach_read_from_4(page
4178 + FIL_PAGE_SPACE_ID);
4179
4180 if (space_id > 0) {
4181 ib_logf(IB_LOG_LEVEL_INFO,
4182 "VALID: space:%lu "
4183 "page_no:%lu page_size:%lu",
4184 space_id, j, page_size);
4185 verify[space_id]++;
4186 ++valid_pages;
4187 }
4188 }
4189 }
4190
4191 ut_free(buf);
4192
4193 ib_logf(IB_LOG_LEVEL_INFO, "Page size: %lu, Possible space_id "
4194 "count:%lu", page_size, (ulint) verify.size());
4195
4196 const ulint pages_corrupted = 3;
4197 for (ulint missed = 0; missed <= pages_corrupted; ++missed) {
4198
4199 for (std::map<ulint, ulint>::iterator
4200 m = verify.begin(); m != verify.end(); ++m ) {
4201
4202 ib_logf(IB_LOG_LEVEL_INFO, "space_id:%lu, "
4203 "Number of pages matched: %lu/%lu "
4204 "(%lu)", m->first, m->second,
4205 valid_pages, page_size);
4206
4207 if (m->second == (valid_pages - missed)) {
4208
4209 ib_logf(IB_LOG_LEVEL_INFO,
4210 "Chosen space:%lu\n", m->first);
4211
4212 fsp->id = m->first;
4213 return(true);
4214 }
4215 }
4216
4217 }
4218 }
4219
4220 return(false);
4221 }
4222
4223 /*******************************************************************//**
4224 Finds the given page_no of the given space id from the double write buffer,
4225 and copies it to the corresponding .ibd file.
4226 @return true if copy was successful, or false. */
4227 bool
fil_user_tablespace_restore_page(fsp_open_info * fsp,ulint page_no)4228 fil_user_tablespace_restore_page(
4229 /*==============================*/
4230 fsp_open_info* fsp, /* in: contains space id and .ibd
4231 file information */
4232 ulint page_no) /* in: page_no to obtain from double
4233 write buffer */
4234 {
4235 bool err;
4236 ulint flags;
4237 ulint zip_size;
4238 ulint page_size;
4239 ulint buflen;
4240 byte* page;
4241
4242 ib_logf(IB_LOG_LEVEL_INFO, "Restoring page %lu of tablespace %lu",
4243 page_no, fsp->id);
4244
4245 // find if double write buffer has page_no of given space id
4246 page = recv_sys->dblwr.find_page(fsp->id, page_no);
4247
4248 if (!page) {
4249 ib_logf(IB_LOG_LEVEL_WARN, "Doublewrite does not have "
4250 "page_no=%lu of space: %lu", page_no, fsp->id);
4251 err = false;
4252 goto out;
4253 }
4254
4255 flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
4256 zip_size = fsp_flags_get_zip_size(flags);
4257 page_size = fsp_flags_get_page_size(flags);
4258
4259 ut_ad(page_no == page_get_page_no(page));
4260
4261 buflen = zip_size ? zip_size: page_size;
4262
4263 ib_logf(IB_LOG_LEVEL_INFO, "Writing %lu bytes into file: %s",
4264 buflen, fsp->filepath);
4265
4266 err = os_file_write(fsp->filepath, fsp->file, page,
4267 (zip_size ? zip_size : page_size) * page_no,
4268 buflen);
4269
4270 os_file_flush(fsp->file);
4271 out:
4272 return(err);
4273 }
4274
4275 /********************************************************************//**
4276 Opens an .ibd file and adds the associated single-table tablespace to the
4277 InnoDB fil0fil.cc data structures.
4278 Set fsp->success to TRUE if tablespace is valid, FALSE if not. */
4279 static
4280 void
fil_validate_single_table_tablespace(const char * tablename,fsp_open_info * fsp)4281 fil_validate_single_table_tablespace(
4282 /*=================================*/
4283 const char* tablename, /*!< in: database/tablename */
4284 fsp_open_info* fsp) /*!< in/out: tablespace info */
4285 {
4286 bool restore_attempted = false;
4287
4288 check_first_page:
4289 fsp->success = TRUE;
4290 if (const char* check_msg = fil_read_first_page(
4291 fsp->file, FALSE, &fsp->flags, &fsp->id,
4292 &fsp->lsn, &fsp->lsn)) {
4293 ib_logf(IB_LOG_LEVEL_ERROR,
4294 "%s in tablespace %s (table %s)",
4295 check_msg, fsp->filepath, tablename);
4296 fsp->success = FALSE;
4297 }
4298
4299 if (!fsp->success) {
4300 if (!restore_attempted) {
4301 if (!fil_user_tablespace_find_space_id(fsp)) {
4302 return;
4303 }
4304 restore_attempted = true;
4305
4306 if (fsp->id > 0
4307 && !fil_user_tablespace_restore_page(fsp, 0)) {
4308 return;
4309 }
4310 goto check_first_page;
4311 }
4312 return;
4313 }
4314
4315 if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
4316 ib_logf(IB_LOG_LEVEL_ERROR,
4317 "Tablespace is not sensible;"
4318 " Table: %s Space ID: %lu Filepath: %s\n",
4319 tablename, (ulong) fsp->id, fsp->filepath);
4320 fsp->success = FALSE;
4321 return;
4322 }
4323
4324 mutex_enter(&fil_system->mutex);
4325 fil_space_t* space = fil_space_get_by_id(fsp->id);
4326 mutex_exit(&fil_system->mutex);
4327 if (space != NULL) {
4328 char* prev_filepath = fil_space_get_first_path(fsp->id);
4329
4330 ib_logf(IB_LOG_LEVEL_ERROR,
4331 "Attempted to open a previously opened tablespace. "
4332 "Previous tablespace %s uses space ID: %lu at "
4333 "filepath: %s. Cannot open tablespace %s which uses "
4334 "space ID: %lu at filepath: %s",
4335 space->name, (ulong) space->id, prev_filepath,
4336 tablename, (ulong) fsp->id, fsp->filepath);
4337
4338 mem_free(prev_filepath);
4339 fsp->success = FALSE;
4340 return;
4341 }
4342
4343 fsp->success = TRUE;
4344 }
4345
4346
4347 /********************************************************************//**
4348 Opens an .ibd file and adds the associated single-table tablespace to the
4349 InnoDB fil0fil.cc data structures. */
4350 static
4351 void
fil_load_single_table_tablespace(const char * dbname,const char * filename)4352 fil_load_single_table_tablespace(
4353 /*=============================*/
4354 const char* dbname, /*!< in: database name */
4355 const char* filename) /*!< in: file name (not a path),
4356 including the .ibd or .isl extension */
4357 {
4358 char* tablename;
4359 ulint tablename_len;
4360 ulint dbname_len = strlen(dbname);
4361 ulint filename_len = strlen(filename);
4362 fsp_open_info def;
4363 fsp_open_info remote;
4364 os_offset_t size;
4365 #ifdef UNIV_HOTBACKUP
4366 fil_space_t* space;
4367 #endif
4368
4369 memset(&def, 0, sizeof(def));
4370 memset(&remote, 0, sizeof(remote));
4371
4372 /* The caller assured that the extension is ".ibd" or ".isl". */
4373 ut_ad(0 == memcmp(filename + filename_len - 4, ".ibd", 4)
4374 || 0 == memcmp(filename + filename_len - 4, ".isl", 4));
4375
4376 /* Build up the tablename in the standard form database/table. */
4377 tablename = static_cast<char*>(
4378 mem_alloc(dbname_len + filename_len + 2));
4379
4380 /* When lower_case_table_names = 2 it is possible that the
4381 dbname is in upper case ,but while storing it in fil_space_t
4382 we must convert it into lower case */
4383 sprintf(tablename, "%s" , dbname);
4384 tablename[dbname_len] = '\0';
4385
4386 if (lower_case_file_system) {
4387 dict_casedn_str(tablename);
4388 }
4389
4390 sprintf(tablename+dbname_len,"/%s",filename);
4391 tablename_len = strlen(tablename) - strlen(".ibd");
4392 tablename[tablename_len] = '\0';
4393
4394 /* There may be both .ibd and .isl file in the directory.
4395 And it is possible that the .isl file refers to a different
4396 .ibd file. If so, we open and compare them the first time
4397 one of them is sent to this function. So if this table has
4398 already been loaded, there is nothing to do.*/
4399 mutex_enter(&fil_system->mutex);
4400 if (fil_space_get_by_name(tablename)) {
4401 mem_free(tablename);
4402 mutex_exit(&fil_system->mutex);
4403 return;
4404 }
4405 mutex_exit(&fil_system->mutex);
4406
4407 /* Build up the filepath of the .ibd tablespace in the datadir.
4408 This must be freed independent of def.success. */
4409 def.filepath = fil_make_ibd_name(tablename, false);
4410
4411 #ifdef __WIN__
4412 # ifndef UNIV_HOTBACKUP
4413 /* If lower_case_table_names is 0 or 2, then MySQL allows database
4414 directory names with upper case letters. On Windows, all table and
4415 database names in InnoDB are internally always in lower case. Put the
4416 file path to lower case, so that we are consistent with InnoDB's
4417 internal data dictionary. */
4418
4419 dict_casedn_str(def.filepath);
4420 # endif /* !UNIV_HOTBACKUP */
4421 #endif
4422
4423 /* Check for a link file which locates a remote tablespace. */
4424 remote.success = fil_open_linked_file(
4425 tablename, &remote.filepath, &remote.file);
4426
4427 /* Read the first page of the remote tablespace */
4428 if (remote.success) {
4429 fil_validate_single_table_tablespace(tablename, &remote);
4430 if (!remote.success) {
4431 os_file_close(remote.file);
4432 mem_free(remote.filepath);
4433 }
4434 }
4435
4436
4437 /* Try to open the tablespace in the datadir. */
4438 def.file = os_file_create_simple_no_error_handling(
4439 innodb_file_data_key, def.filepath, OS_FILE_OPEN,
4440 OS_FILE_READ_WRITE, &def.success);
4441
4442 /* Read the first page of the remote tablespace */
4443 if (def.success) {
4444 fil_validate_single_table_tablespace(tablename, &def);
4445 if (!def.success) {
4446 os_file_close(def.file);
4447 }
4448 }
4449
4450 if (!def.success && !remote.success) {
4451 /* The following call prints an error message */
4452 os_file_get_last_error(true);
4453 fprintf(stderr,
4454 "InnoDB: Error: could not open single-table"
4455 " tablespace file %s\n", def.filepath);
4456
4457 if (!strncmp(filename,
4458 tmp_file_prefix, tmp_file_prefix_length)) {
4459 /* Ignore errors for #sql tablespaces. */
4460 mem_free(tablename);
4461 if (remote.filepath) {
4462 mem_free(remote.filepath);
4463 }
4464 if (def.filepath) {
4465 mem_free(def.filepath);
4466 }
4467 return;
4468 }
4469 no_good_file:
4470 fprintf(stderr,
4471 "InnoDB: We do not continue the crash recovery,"
4472 " because the table may become\n"
4473 "InnoDB: corrupt if we cannot apply the log"
4474 " records in the InnoDB log to it.\n"
4475 "InnoDB: To fix the problem and start mysqld:\n"
4476 "InnoDB: 1) If there is a permission problem"
4477 " in the file and mysqld cannot\n"
4478 "InnoDB: open the file, you should"
4479 " modify the permissions.\n"
4480 "InnoDB: 2) If the table is not needed, or you"
4481 " can restore it from a backup,\n"
4482 "InnoDB: then you can remove the .ibd file,"
4483 " and InnoDB will do a normal\n"
4484 "InnoDB: crash recovery and ignore that table.\n"
4485 "InnoDB: 3) If the file system or the"
4486 " disk is broken, and you cannot remove\n"
4487 "InnoDB: the .ibd file, you can set"
4488 " innodb_force_recovery > 0 in my.cnf\n"
4489 "InnoDB: and force InnoDB to continue crash"
4490 " recovery here.\n");
4491 will_not_choose:
4492 mem_free(tablename);
4493 if (remote.filepath) {
4494 mem_free(remote.filepath);
4495 }
4496 if (def.filepath) {
4497 mem_free(def.filepath);
4498 }
4499
4500 if (srv_force_recovery > 0) {
4501 ib_logf(IB_LOG_LEVEL_INFO,
4502 "innodb_force_recovery was set to %lu. "
4503 "Continuing crash recovery even though we "
4504 "cannot access the .ibd file of this table.",
4505 srv_force_recovery);
4506 return;
4507 }
4508
4509 exit(1);
4510 }
4511
4512 if (def.success && remote.success) {
4513 ib_logf(IB_LOG_LEVEL_ERROR,
4514 "Tablespaces for %s have been found in two places;\n"
4515 "Location 1: SpaceID: %lu LSN: %lu File: %s\n"
4516 "Location 2: SpaceID: %lu LSN: %lu File: %s\n"
4517 "You must delete one of them.",
4518 tablename, (ulong) def.id, (ulong) def.lsn,
4519 def.filepath, (ulong) remote.id, (ulong) remote.lsn,
4520 remote.filepath);
4521
4522 def.success = FALSE;
4523 os_file_close(def.file);
4524 os_file_close(remote.file);
4525 goto will_not_choose;
4526 }
4527
4528 /* At this point, only one tablespace is open */
4529 ut_a(def.success == !remote.success);
4530
4531 fsp_open_info* fsp = def.success ? &def : &remote;
4532
4533 /* Get and test the file size. */
4534 size = os_file_get_size(fsp->file);
4535
4536 if (size == (os_offset_t) -1) {
4537 /* The following call prints an error message */
4538 os_file_get_last_error(true);
4539
4540 ib_logf(IB_LOG_LEVEL_ERROR,
4541 "could not measure the size of single-table "
4542 "tablespace file %s", fsp->filepath);
4543
4544 os_file_close(fsp->file);
4545 goto no_good_file;
4546 }
4547
4548 /* Every .ibd file is created >= 4 pages in size. Smaller files
4549 cannot be ok. */
4550 ulong minimum_size = FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE;
4551 if (size < minimum_size) {
4552 #ifndef UNIV_HOTBACKUP
4553 ib_logf(IB_LOG_LEVEL_ERROR,
4554 "The size of single-table tablespace file %s "
4555 "is only " UINT64PF ", should be at least %lu!",
4556 fsp->filepath, size, minimum_size);
4557 os_file_close(fsp->file);
4558 goto no_good_file;
4559 #else
4560 fsp->id = ULINT_UNDEFINED;
4561 fsp->flags = 0;
4562 #endif /* !UNIV_HOTBACKUP */
4563 }
4564
4565 #ifdef UNIV_HOTBACKUP
4566 if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
4567 char* new_path;
4568
4569 fprintf(stderr,
4570 "InnoDB: Renaming tablespace %s of id %lu,\n"
4571 "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
4572 "InnoDB: because its size %" PRId64 " is too small"
4573 " (< 4 pages 16 kB each),\n"
4574 "InnoDB: or the space id in the file header"
4575 " is not sensible.\n"
4576 "InnoDB: This can happen in an mysqlbackup run,"
4577 " and is not dangerous.\n",
4578 fsp->filepath, fsp->id, fsp->filepath, size);
4579 os_file_close(fsp->file);
4580
4581 new_path = fil_make_ibbackup_old_name(fsp->filepath);
4582
4583 bool success = os_file_rename(
4584 innodb_file_data_key, fsp->filepath, new_path);
4585
4586 ut_a(success);
4587
4588 mem_free(new_path);
4589
4590 goto func_exit_after_close;
4591 }
4592
4593 /* A backup may contain the same space several times, if the space got
4594 renamed at a sensitive time. Since it is enough to have one version of
4595 the space, we rename the file if a space with the same space id
4596 already exists in the tablespace memory cache. We rather rename the
4597 file than delete it, because if there is a bug, we do not want to
4598 destroy valuable data. */
4599
4600 mutex_enter(&fil_system->mutex);
4601
4602 space = fil_space_get_by_id(fsp->id);
4603
4604 if (space) {
4605 char* new_path;
4606
4607 fprintf(stderr,
4608 "InnoDB: Renaming tablespace %s of id %lu,\n"
4609 "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
4610 "InnoDB: because space %s with the same id\n"
4611 "InnoDB: was scanned earlier. This can happen"
4612 " if you have renamed tables\n"
4613 "InnoDB: during an mysqlbackup run.\n",
4614 fsp->filepath, fsp->id, fsp->filepath,
4615 space->name);
4616 os_file_close(fsp->file);
4617
4618 new_path = fil_make_ibbackup_old_name(fsp->filepath);
4619
4620 mutex_exit(&fil_system->mutex);
4621
4622 bool success = os_file_rename(
4623 innodb_file_data_key, fsp->filepath, new_path);
4624
4625 ut_a(success);
4626
4627 mem_free(new_path);
4628
4629 goto func_exit_after_close;
4630 }
4631 mutex_exit(&fil_system->mutex);
4632 #endif /* UNIV_HOTBACKUP */
4633 ibool file_space_create_success = fil_space_create(
4634 tablename, fsp->id, fsp->flags, FIL_TABLESPACE);
4635
4636 if (!file_space_create_success) {
4637 if (srv_force_recovery > 0) {
4638 fprintf(stderr,
4639 "InnoDB: innodb_force_recovery was set"
4640 " to %lu. Continuing crash recovery\n"
4641 "InnoDB: even though the tablespace"
4642 " creation of this table failed.\n",
4643 srv_force_recovery);
4644 goto func_exit;
4645 }
4646
4647 /* Exit here with a core dump, stack, etc. */
4648 ut_a(file_space_create_success);
4649 }
4650
4651 /* We do not use the size information we have about the file, because
4652 the rounding formula for extents and pages is somewhat complex; we
4653 let fil_node_open() do that task. */
4654
4655 if (!fil_node_create(fsp->filepath, 0, fsp->id, FALSE)) {
4656 ut_error;
4657 }
4658
4659 func_exit:
4660 os_file_close(fsp->file);
4661
4662 #ifdef UNIV_HOTBACKUP
4663 func_exit_after_close:
4664 #else
4665 ut_ad(!mutex_own(&fil_system->mutex));
4666 #endif
4667 mem_free(tablename);
4668 if (remote.success) {
4669 mem_free(remote.filepath);
4670 }
4671 mem_free(def.filepath);
4672 }
4673
4674 /***********************************************************************//**
4675 A fault-tolerant function that tries to read the next file name in the
4676 directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
4677 idea is to read as much good data as we can and jump over bad data.
4678 @return 0 if ok, -1 if error even after the retries, 1 if at the end
4679 of the directory */
4680 static
4681 int
fil_file_readdir_next_file(dberr_t * err,const char * dirname,os_file_dir_t dir,os_file_stat_t * info)4682 fil_file_readdir_next_file(
4683 /*=======================*/
4684 dberr_t* err, /*!< out: this is set to DB_ERROR if an error
4685 was encountered, otherwise not changed */
4686 const char* dirname,/*!< in: directory name or path */
4687 os_file_dir_t dir, /*!< in: directory stream */
4688 os_file_stat_t* info) /*!< in/out: buffer where the
4689 info is returned */
4690 {
4691 for (ulint i = 0; i < 100; i++) {
4692 int ret = os_file_readdir_next_file(dirname, dir, info);
4693
4694 if (ret != -1) {
4695
4696 return(ret);
4697 }
4698
4699 ib_logf(IB_LOG_LEVEL_ERROR,
4700 "os_file_readdir_next_file() returned -1 in "
4701 "directory %s, crash recovery may have failed "
4702 "for some .ibd files!", dirname);
4703
4704 *err = DB_ERROR;
4705 }
4706
4707 return(-1);
4708 }
4709
4710 /********************************************************************//**
4711 At the server startup, if we need crash recovery, scans the database
4712 directories under the MySQL datadir, looking for .ibd files. Those files are
4713 single-table tablespaces. We need to know the space id in each of them so that
4714 we know into which file we should look to check the contents of a page stored
4715 in the doublewrite buffer, also to know where to apply log records where the
4716 space id is != 0.
4717 @return DB_SUCCESS or error number */
4718 UNIV_INTERN
4719 dberr_t
fil_load_single_table_tablespaces(void)4720 fil_load_single_table_tablespaces(void)
4721 /*===================================*/
4722 {
4723 int ret;
4724 char* dbpath = NULL;
4725 ulint dbpath_len = 100;
4726 os_file_dir_t dir;
4727 os_file_dir_t dbdir;
4728 os_file_stat_t dbinfo;
4729 os_file_stat_t fileinfo;
4730 dberr_t err = DB_SUCCESS;
4731
4732 /* The datadir of MySQL is always the default directory of mysqld */
4733
4734 dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE);
4735
4736 if (dir == NULL) {
4737
4738 return(DB_ERROR);
4739 }
4740
4741 dbpath = static_cast<char*>(mem_alloc(dbpath_len));
4742
4743 /* Scan all directories under the datadir. They are the database
4744 directories of MySQL. */
4745
4746 ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir,
4747 &dbinfo);
4748 while (ret == 0) {
4749 ulint len;
4750 /* printf("Looking at %s in datadir\n", dbinfo.name); */
4751
4752 if (dbinfo.type == OS_FILE_TYPE_FILE
4753 || dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
4754
4755 goto next_datadir_item;
4756 }
4757
4758 /* We found a symlink or a directory; try opening it to see
4759 if a symlink is a directory */
4760
4761 len = strlen(fil_path_to_mysql_datadir)
4762 + strlen (dbinfo.name) + 2;
4763 if (len > dbpath_len) {
4764 dbpath_len = len;
4765
4766 if (dbpath) {
4767 mem_free(dbpath);
4768 }
4769
4770 dbpath = static_cast<char*>(mem_alloc(dbpath_len));
4771 }
4772 ut_snprintf(dbpath, dbpath_len,
4773 "%s/%s", fil_path_to_mysql_datadir, dbinfo.name);
4774 srv_normalize_path_for_win(dbpath);
4775
4776 dbdir = os_file_opendir(dbpath, FALSE);
4777
4778 if (dbdir != NULL) {
4779
4780 /* We found a database directory; loop through it,
4781 looking for possible .ibd files in it */
4782
4783 ret = fil_file_readdir_next_file(&err, dbpath, dbdir,
4784 &fileinfo);
4785 while (ret == 0) {
4786
4787 if (fileinfo.type == OS_FILE_TYPE_DIR) {
4788
4789 goto next_file_item;
4790 }
4791
4792 /* We found a symlink or a file */
4793 if (strlen(fileinfo.name) > 4
4794 && (0 == strcmp(fileinfo.name
4795 + strlen(fileinfo.name) - 4,
4796 ".ibd")
4797 || 0 == strcmp(fileinfo.name
4798 + strlen(fileinfo.name) - 4,
4799 ".isl"))) {
4800 /* The name ends in .ibd or .isl;
4801 try opening the file */
4802 fil_load_single_table_tablespace(
4803 dbinfo.name, fileinfo.name);
4804 }
4805 next_file_item:
4806 ret = fil_file_readdir_next_file(&err,
4807 dbpath, dbdir,
4808 &fileinfo);
4809 }
4810
4811 if (0 != os_file_closedir(dbdir)) {
4812 fputs("InnoDB: Warning: could not"
4813 " close database directory ", stderr);
4814 ut_print_filename(stderr, dbpath);
4815 putc('\n', stderr);
4816
4817 err = DB_ERROR;
4818 }
4819 }
4820
4821 next_datadir_item:
4822 ret = fil_file_readdir_next_file(&err,
4823 fil_path_to_mysql_datadir,
4824 dir, &dbinfo);
4825 }
4826
4827 mem_free(dbpath);
4828
4829 if (0 != os_file_closedir(dir)) {
4830 fprintf(stderr,
4831 "InnoDB: Error: could not close MySQL datadir\n");
4832
4833 return(DB_ERROR);
4834 }
4835
4836 return(err);
4837 }
4838
4839 /*******************************************************************//**
4840 Returns TRUE if a single-table tablespace does not exist in the memory cache,
4841 or is being deleted there.
4842 @return TRUE if does not exist or is being deleted */
4843 UNIV_INTERN
4844 ibool
fil_tablespace_deleted_or_being_deleted_in_mem(ulint id,ib_int64_t version)4845 fil_tablespace_deleted_or_being_deleted_in_mem(
4846 /*===========================================*/
4847 ulint id, /*!< in: space id */
4848 ib_int64_t version)/*!< in: tablespace_version should be this; if
4849 you pass -1 as the value of this, then this
4850 parameter is ignored */
4851 {
4852 fil_space_t* space;
4853
4854 ut_ad(fil_system);
4855
4856 mutex_enter(&fil_system->mutex);
4857
4858 space = fil_space_get_by_id(id);
4859
4860 if (space == NULL || space->stop_new_ops) {
4861 mutex_exit(&fil_system->mutex);
4862
4863 return(TRUE);
4864 }
4865
4866 if (version != ((ib_int64_t)-1)
4867 && space->tablespace_version != version) {
4868 mutex_exit(&fil_system->mutex);
4869
4870 return(TRUE);
4871 }
4872
4873 mutex_exit(&fil_system->mutex);
4874
4875 return(FALSE);
4876 }
4877
4878 /*******************************************************************//**
4879 Returns TRUE if a single-table tablespace exists in the memory cache.
4880 @return TRUE if exists */
4881 UNIV_INTERN
4882 ibool
fil_tablespace_exists_in_mem(ulint id)4883 fil_tablespace_exists_in_mem(
4884 /*=========================*/
4885 ulint id) /*!< in: space id */
4886 {
4887 fil_space_t* space;
4888
4889 ut_ad(fil_system);
4890
4891 mutex_enter(&fil_system->mutex);
4892
4893 space = fil_space_get_by_id(id);
4894
4895 mutex_exit(&fil_system->mutex);
4896
4897 return(space != NULL);
4898 }
4899
4900 /*******************************************************************//**
4901 Report that a tablespace for a table was not found. */
4902 static
4903 void
fil_report_missing_tablespace(const char * name,ulint space_id)4904 fil_report_missing_tablespace(
4905 /*===========================*/
4906 const char* name, /*!< in: table name */
4907 ulint space_id) /*!< in: table's space id */
4908 {
4909 char index_name[MAX_FULL_NAME_LEN + 1];
4910
4911 innobase_format_name(index_name, sizeof(index_name), name, TRUE);
4912
4913 ib_logf(IB_LOG_LEVEL_ERROR,
4914 "Table %s in the InnoDB data dictionary has tablespace id %lu, "
4915 "but tablespace with that id or name does not exist. Have "
4916 "you deleted or moved .ibd files? This may also be a table "
4917 "created with CREATE TEMPORARY TABLE whose .ibd and .frm "
4918 "files MySQL automatically removed, but the table still "
4919 "exists in the InnoDB internal data dictionary.",
4920 name, space_id);
4921 }
4922
4923 /*******************************************************************//**
4924 Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
4925 cache. Note that if we have not done a crash recovery at the database startup,
4926 there may be many tablespaces which are not yet in the memory cache.
4927 @return TRUE if a matching tablespace exists in the memory cache */
4928 UNIV_INTERN
4929 ibool
fil_space_for_table_exists_in_mem(ulint id,const char * name,ibool mark_space,ibool print_error_if_does_not_exist,bool adjust_space,mem_heap_t * heap,table_id_t table_id)4930 fil_space_for_table_exists_in_mem(
4931 /*==============================*/
4932 ulint id, /*!< in: space id */
4933 const char* name, /*!< in: table name used in
4934 fil_space_create(). Either the
4935 standard 'dbname/tablename' format
4936 or table->dir_path_of_temp_table */
4937 ibool mark_space, /*!< in: in crash recovery, at database
4938 startup we mark all spaces which have
4939 an associated table in the InnoDB
4940 data dictionary, so that
4941 we can print a warning about orphaned
4942 tablespaces */
4943 ibool print_error_if_does_not_exist,
4944 /*!< in: print detailed error
4945 information to the .err log if a
4946 matching tablespace is not found from
4947 memory */
4948 bool adjust_space, /*!< in: whether to adjust space id
4949 when find table space mismatch */
4950 mem_heap_t* heap, /*!< in: heap memory */
4951 table_id_t table_id) /*!< in: table id */
4952 {
4953 fil_space_t* fnamespace;
4954 fil_space_t* space;
4955
4956 ut_ad(fil_system);
4957
4958 mutex_enter(&fil_system->mutex);
4959
4960 /* Look if there is a space with the same id */
4961
4962 space = fil_space_get_by_id(id);
4963
4964 /* Look if there is a space with the same name; the name is the
4965 directory path from the datadir to the file */
4966
4967 fnamespace = fil_space_get_by_name(name);
4968 if (space && space == fnamespace) {
4969 /* Found */
4970
4971 if (mark_space) {
4972 space->mark = TRUE;
4973 }
4974
4975 mutex_exit(&fil_system->mutex);
4976
4977 return(TRUE);
4978 }
4979
4980 /* Info from "fnamespace" comes from the ibd file itself, it can
4981 be different from data obtained from System tables since it is
4982 not transactional. If adjust_space is set, and the mismatching
4983 space are between a user table and its temp table, we shall
4984 adjust the ibd file name according to system table info */
4985 if (adjust_space
4986 && space != NULL
4987 && row_is_mysql_tmp_table_name(space->name)
4988 && !row_is_mysql_tmp_table_name(name)) {
4989
4990 mutex_exit(&fil_system->mutex);
4991
4992 DBUG_EXECUTE_IF("ib_crash_before_adjust_fil_space",
4993 DBUG_SUICIDE(););
4994
4995 if (fnamespace) {
4996 char* tmp_name;
4997
4998 tmp_name = dict_mem_create_temporary_tablename(
4999 heap, name, table_id);
5000
5001 fil_rename_tablespace(fnamespace->name, fnamespace->id,
5002 tmp_name, NULL);
5003 }
5004
5005 DBUG_EXECUTE_IF("ib_crash_after_adjust_one_fil_space",
5006 DBUG_SUICIDE(););
5007
5008 fil_rename_tablespace(space->name, id, name, NULL);
5009
5010 DBUG_EXECUTE_IF("ib_crash_after_adjust_fil_space",
5011 DBUG_SUICIDE(););
5012
5013 mutex_enter(&fil_system->mutex);
5014 fnamespace = fil_space_get_by_name(name);
5015 ut_ad(space == fnamespace);
5016 mutex_exit(&fil_system->mutex);
5017
5018 return(TRUE);
5019 }
5020
5021 if (!print_error_if_does_not_exist) {
5022
5023 mutex_exit(&fil_system->mutex);
5024
5025 return(FALSE);
5026 }
5027
5028 if (space == NULL) {
5029 if (fnamespace == NULL) {
5030 if (print_error_if_does_not_exist) {
5031 fil_report_missing_tablespace(name, id);
5032 }
5033 } else {
5034 ut_print_timestamp(stderr);
5035 fputs(" InnoDB: Error: table ", stderr);
5036 ut_print_filename(stderr, name);
5037 fprintf(stderr, "\n"
5038 "InnoDB: in InnoDB data dictionary has"
5039 " tablespace id %lu,\n"
5040 "InnoDB: but a tablespace with that id"
5041 " does not exist. There is\n"
5042 "InnoDB: a tablespace of name %s and id %lu,"
5043 " though. Have\n"
5044 "InnoDB: you deleted or moved .ibd files?\n",
5045 (ulong) id, fnamespace->name,
5046 (ulong) fnamespace->id);
5047 }
5048 error_exit:
5049 fputs("InnoDB: Please refer to\n"
5050 "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
5051 "InnoDB: for how to resolve the issue.\n", stderr);
5052
5053 mutex_exit(&fil_system->mutex);
5054
5055 return(FALSE);
5056 }
5057
5058 if (0 != strcmp(space->name, name)) {
5059 ut_print_timestamp(stderr);
5060 fputs(" InnoDB: Error: table ", stderr);
5061 ut_print_filename(stderr, name);
5062 fprintf(stderr, "\n"
5063 "InnoDB: in InnoDB data dictionary has"
5064 " tablespace id %lu,\n"
5065 "InnoDB: but the tablespace with that id"
5066 " has name %s.\n"
5067 "InnoDB: Have you deleted or moved .ibd files?\n",
5068 (ulong) id, space->name);
5069
5070 if (fnamespace != NULL) {
5071 fputs("InnoDB: There is a tablespace"
5072 " with the right name\n"
5073 "InnoDB: ", stderr);
5074 ut_print_filename(stderr, fnamespace->name);
5075 fprintf(stderr, ", but its id is %lu.\n",
5076 (ulong) fnamespace->id);
5077 }
5078
5079 goto error_exit;
5080 }
5081
5082 mutex_exit(&fil_system->mutex);
5083
5084 return(FALSE);
5085 }
5086
5087 /*******************************************************************//**
5088 Checks if a single-table tablespace for a given table name exists in the
5089 tablespace memory cache.
5090 @return space id, ULINT_UNDEFINED if not found */
5091 UNIV_INTERN
5092 ulint
fil_get_space_id_for_table(const char * tablename)5093 fil_get_space_id_for_table(
5094 /*=======================*/
5095 const char* tablename) /*!< in: table name in the standard
5096 'databasename/tablename' format */
5097 {
5098 fil_space_t* fnamespace;
5099 ulint id = ULINT_UNDEFINED;
5100
5101 ut_ad(fil_system);
5102
5103 mutex_enter(&fil_system->mutex);
5104
5105 /* Look if there is a space with the same name. */
5106
5107 fnamespace = fil_space_get_by_name(tablename);
5108
5109 if (fnamespace) {
5110 id = fnamespace->id;
5111 }
5112
5113 mutex_exit(&fil_system->mutex);
5114
5115 return(id);
5116 }
5117
5118 /**********************************************************************//**
5119 Tries to extend a data file so that it would accommodate the number of pages
5120 given. The tablespace must be cached in the memory cache. If the space is big
5121 enough already, does nothing.
5122 @return TRUE if success */
5123 UNIV_INTERN
5124 ibool
fil_extend_space_to_desired_size(ulint * actual_size,ulint space_id,ulint size_after_extend)5125 fil_extend_space_to_desired_size(
5126 /*=============================*/
5127 ulint* actual_size, /*!< out: size of the space after extension;
5128 if we ran out of disk space this may be lower
5129 than the desired size */
5130 ulint space_id, /*!< in: space id */
5131 ulint size_after_extend)/*!< in: desired size in pages after the
5132 extension; if the current space size is bigger
5133 than this already, the function does nothing */
5134 {
5135 fil_node_t* node;
5136 fil_space_t* space;
5137 byte* buf2;
5138 byte* buf;
5139 ulint buf_size;
5140 ulint start_page_no;
5141 ulint file_start_page_no;
5142 ulint page_size;
5143 ulint pages_added;
5144 ibool success;
5145
5146 ut_ad(!srv_read_only_mode);
5147
5148 retry:
5149 pages_added = 0;
5150 success = TRUE;
5151
5152 fil_mutex_enter_and_prepare_for_io(space_id);
5153
5154 space = fil_space_get_by_id(space_id);
5155 ut_a(space);
5156
5157 if (space->size >= size_after_extend) {
5158 /* Space already big enough */
5159
5160 *actual_size = space->size;
5161
5162 mutex_exit(&fil_system->mutex);
5163
5164 return(TRUE);
5165 }
5166
5167 page_size = fsp_flags_get_zip_size(space->flags);
5168 if (!page_size) {
5169 page_size = UNIV_PAGE_SIZE;
5170 }
5171
5172 node = UT_LIST_GET_LAST(space->chain);
5173
5174 if (!node->being_extended) {
5175 /* Mark this node as undergoing extension. This flag
5176 is used by other threads to wait for the extension
5177 opereation to finish. */
5178 node->being_extended = TRUE;
5179 } else {
5180 /* Another thread is currently extending the file. Wait
5181 for it to finish.
5182 It'd have been better to use event driven mechanism but
5183 the entire module is peppered with polling stuff. */
5184 mutex_exit(&fil_system->mutex);
5185 os_thread_sleep(100000);
5186 goto retry;
5187 }
5188
5189 if (!fil_node_prepare_for_io(node, fil_system, space)) {
5190 /* The tablespace data file, such as .ibd file, is missing */
5191 node->being_extended = false;
5192 mutex_exit(&fil_system->mutex);
5193
5194 return(false);
5195 }
5196
5197 /* At this point it is safe to release fil_system mutex. No
5198 other thread can rename, delete or close the file because
5199 we have set the node->being_extended flag. */
5200 mutex_exit(&fil_system->mutex);
5201
5202 start_page_no = space->size;
5203 file_start_page_no = space->size - node->size;
5204
5205 #ifdef HAVE_POSIX_FALLOCATE
5206 if (srv_use_posix_fallocate) {
5207
5208 os_offset_t start_offset = file_start_page_no * page_size;
5209 os_offset_t end_offset
5210 = (size_after_extend - file_start_page_no) * page_size;
5211
5212 success = (os_file_allocate(node->handle, start_offset,
5213 end_offset) == 0);
5214 if (!success)
5215 {
5216 ib_logf(IB_LOG_LEVEL_ERROR,
5217 "preallocating file space for file \'%s\' "
5218 "failed. Current size " INT64PF
5219 ", len " INT64PF ", desired size " INT64PF,
5220 node->name, start_offset, end_offset,
5221 start_offset + end_offset);
5222 }
5223 mutex_enter(&fil_system->mutex);
5224 if (success) {
5225 node->size += (size_after_extend - start_page_no);
5226 space->size += (size_after_extend - start_page_no);
5227 os_has_said_disk_full = FALSE;
5228 }
5229 node->being_extended = FALSE;
5230 fil_node_complete_io(node, fil_system, OS_FILE_READ);
5231 goto complete_io;
5232 }
5233 #endif
5234
5235 /* Extend at most 64 pages at a time */
5236 buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
5237 buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size));
5238 buf = static_cast<byte*>(ut_align(buf2, page_size));
5239
5240 memset(buf, 0, buf_size);
5241
5242 while (start_page_no < size_after_extend) {
5243 ulint n_pages
5244 = ut_min(buf_size / page_size,
5245 size_after_extend - start_page_no);
5246
5247 os_offset_t offset
5248 = ((os_offset_t) (start_page_no - file_start_page_no))
5249 * page_size;
5250 #ifdef UNIV_HOTBACKUP
5251 success = os_file_write(node->name, node->handle, buf,
5252 offset, page_size * n_pages);
5253 #else
5254 success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
5255 node->name, node->handle, buf,
5256 offset, page_size * n_pages,
5257 NULL, NULL, space_id, NULL, false);
5258 #endif /* UNIV_HOTBACKUP */
5259 if (success) {
5260 os_has_said_disk_full = FALSE;
5261 } else {
5262 /* Let us measure the size of the file to determine
5263 how much we were able to extend it */
5264 os_offset_t size;
5265
5266 size = os_file_get_size(node->handle);
5267 ut_a(size != (os_offset_t) -1);
5268
5269 n_pages = ((ulint) (size / page_size))
5270 - node->size - pages_added;
5271
5272 pages_added += n_pages;
5273 break;
5274 }
5275
5276 start_page_no += n_pages;
5277 pages_added += n_pages;
5278 }
5279
5280 mem_free(buf2);
5281
5282 mutex_enter(&fil_system->mutex);
5283
5284 ut_a(node->being_extended);
5285
5286 space->size += pages_added;
5287 node->size += pages_added;
5288 node->being_extended = FALSE;
5289
5290 fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
5291
5292 #ifdef HAVE_POSIX_FALLOCATE
5293 complete_io:
5294 #endif
5295
5296 *actual_size = space->size;
5297
5298 #ifndef UNIV_HOTBACKUP
5299 if (space_id == 0) {
5300 ulint pages_per_mb = (1024 * 1024) / page_size;
5301
5302 /* Keep the last data file size info up to date, rounded to
5303 full megabytes */
5304
5305 srv_data_file_sizes[srv_n_data_files - 1]
5306 = (node->size / pages_per_mb) * pages_per_mb;
5307 }
5308 #endif /* !UNIV_HOTBACKUP */
5309
5310 /*
5311 printf("Extended %s to %lu, actual size %lu pages\n", space->name,
5312 size_after_extend, *actual_size); */
5313 mutex_exit(&fil_system->mutex);
5314
5315 fil_flush(space_id);
5316
5317 return(success);
5318 }
5319
5320 #ifdef UNIV_HOTBACKUP
5321 /********************************************************************//**
5322 Extends all tablespaces to the size stored in the space header. During the
5323 mysqlbackup --apply-log phase we extended the spaces on-demand so that log
5324 records could be applied, but that may have left spaces still too small
5325 compared to the size stored in the space header. */
5326 UNIV_INTERN
5327 void
fil_extend_tablespaces_to_stored_len(void)5328 fil_extend_tablespaces_to_stored_len(void)
5329 /*======================================*/
5330 {
5331 fil_space_t* space;
5332 byte* buf;
5333 ulint actual_size;
5334 ulint size_in_header;
5335 dberr_t error;
5336 ibool success;
5337
5338 buf = mem_alloc(UNIV_PAGE_SIZE);
5339
5340 mutex_enter(&fil_system->mutex);
5341
5342 space = UT_LIST_GET_FIRST(fil_system->space_list);
5343
5344 while (space) {
5345 ut_a(space->purpose == FIL_TABLESPACE);
5346
5347 mutex_exit(&fil_system->mutex); /* no need to protect with a
5348 mutex, because this is a
5349 single-threaded operation */
5350 error = fil_read(TRUE, space->id,
5351 fsp_flags_get_zip_size(space->flags),
5352 0, 0, UNIV_PAGE_SIZE, buf, NULL);
5353 ut_a(error == DB_SUCCESS);
5354
5355 size_in_header = fsp_get_size_low(buf);
5356
5357 success = fil_extend_space_to_desired_size(
5358 &actual_size, space->id, size_in_header);
5359 if (!success) {
5360 fprintf(stderr,
5361 "InnoDB: Error: could not extend the"
5362 " tablespace of %s\n"
5363 "InnoDB: to the size stored in header,"
5364 " %lu pages;\n"
5365 "InnoDB: size after extension %lu pages\n"
5366 "InnoDB: Check that you have free disk space"
5367 " and retry!\n",
5368 space->name, size_in_header, actual_size);
5369 ut_a(success);
5370 }
5371
5372 mutex_enter(&fil_system->mutex);
5373
5374 space = UT_LIST_GET_NEXT(space_list, space);
5375 }
5376
5377 mutex_exit(&fil_system->mutex);
5378
5379 mem_free(buf);
5380 }
5381 #endif
5382
5383 /*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
5384
5385 /*******************************************************************//**
5386 Tries to reserve free extents in a file space.
5387 @return TRUE if succeed */
5388 UNIV_INTERN
5389 ibool
fil_space_reserve_free_extents(ulint id,ulint n_free_now,ulint n_to_reserve)5390 fil_space_reserve_free_extents(
5391 /*===========================*/
5392 ulint id, /*!< in: space id */
5393 ulint n_free_now, /*!< in: number of free extents now */
5394 ulint n_to_reserve) /*!< in: how many one wants to reserve */
5395 {
5396 fil_space_t* space;
5397 ibool success;
5398
5399 ut_ad(fil_system);
5400
5401 mutex_enter(&fil_system->mutex);
5402
5403 space = fil_space_get_by_id(id);
5404
5405 ut_a(space);
5406
5407 if (space->n_reserved_extents + n_to_reserve > n_free_now) {
5408 success = FALSE;
5409 } else {
5410 space->n_reserved_extents += n_to_reserve;
5411 success = TRUE;
5412 }
5413
5414 mutex_exit(&fil_system->mutex);
5415
5416 return(success);
5417 }
5418
5419 /*******************************************************************//**
5420 Releases free extents in a file space. */
5421 UNIV_INTERN
5422 void
fil_space_release_free_extents(ulint id,ulint n_reserved)5423 fil_space_release_free_extents(
5424 /*===========================*/
5425 ulint id, /*!< in: space id */
5426 ulint n_reserved) /*!< in: how many one reserved */
5427 {
5428 fil_space_t* space;
5429
5430 ut_ad(fil_system);
5431
5432 mutex_enter(&fil_system->mutex);
5433
5434 space = fil_space_get_by_id(id);
5435
5436 ut_a(space);
5437 ut_a(space->n_reserved_extents >= n_reserved);
5438
5439 space->n_reserved_extents -= n_reserved;
5440
5441 mutex_exit(&fil_system->mutex);
5442 }
5443
5444 /*******************************************************************//**
5445 Gets the number of reserved extents. If the database is silent, this number
5446 should be zero. */
5447 UNIV_INTERN
5448 ulint
fil_space_get_n_reserved_extents(ulint id)5449 fil_space_get_n_reserved_extents(
5450 /*=============================*/
5451 ulint id) /*!< in: space id */
5452 {
5453 fil_space_t* space;
5454 ulint n;
5455
5456 ut_ad(fil_system);
5457
5458 mutex_enter(&fil_system->mutex);
5459
5460 space = fil_space_get_by_id(id);
5461
5462 ut_a(space);
5463
5464 n = space->n_reserved_extents;
5465
5466 mutex_exit(&fil_system->mutex);
5467
5468 return(n);
5469 }
5470
5471 /*============================ FILE I/O ================================*/
5472
5473 /********************************************************************//**
5474 NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
5475
5476 Prepares a file node for i/o. Opens the file if it is closed. Updates the
5477 pending i/o's field in the node and the system appropriately. Takes the node
5478 off the LRU list if it is in the LRU list. The caller must hold the fil_sys
5479 mutex.
5480 @return false if the file can't be opened, otherwise true */
5481 static
5482 bool
fil_node_prepare_for_io(fil_node_t * node,fil_system_t * system,fil_space_t * space)5483 fil_node_prepare_for_io(
5484 /*====================*/
5485 fil_node_t* node, /*!< in: file node */
5486 fil_system_t* system, /*!< in: tablespace memory cache */
5487 fil_space_t* space) /*!< in: space */
5488 {
5489 ut_ad(node && system && space);
5490 ut_ad(mutex_own(&(system->mutex)));
5491
5492 if (system->n_open > system->max_n_open + 5) {
5493 ut_print_timestamp(stderr);
5494 fprintf(stderr,
5495 " InnoDB: Warning: open files %lu"
5496 " exceeds the limit %lu\n",
5497 (ulong) system->n_open,
5498 (ulong) system->max_n_open);
5499 }
5500
5501 if (node->open == FALSE) {
5502 /* File is closed: open it */
5503 ut_a(node->n_pending == 0);
5504
5505 if (!fil_node_open_file(node, system, space)) {
5506 return(false);
5507 }
5508 }
5509
5510 if (node->n_pending == 0 && fil_space_belongs_in_lru(space)) {
5511 /* The node is in the LRU list, remove it */
5512
5513 ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
5514
5515 UT_LIST_REMOVE(LRU, system->LRU, node);
5516 }
5517
5518 node->n_pending++;
5519
5520 return(true);
5521 }
5522
5523 /********************************************************************//**
5524 Updates the data structures when an i/o operation finishes. Updates the
5525 pending i/o's field in the node appropriately. */
5526 static
5527 void
fil_node_complete_io(fil_node_t * node,fil_system_t * system,ulint type)5528 fil_node_complete_io(
5529 /*=================*/
5530 fil_node_t* node, /*!< in: file node */
5531 fil_system_t* system, /*!< in: tablespace memory cache */
5532 ulint type) /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
5533 the node as modified if
5534 type == OS_FILE_WRITE */
5535 {
5536 ut_ad(node);
5537 ut_ad(system);
5538 ut_ad(mutex_own(&(system->mutex)));
5539
5540 ut_a(node->n_pending > 0);
5541
5542 node->n_pending--;
5543
5544 if (type == OS_FILE_WRITE) {
5545 ut_ad(!srv_read_only_mode);
5546 system->modification_counter++;
5547 node->modification_counter = system->modification_counter;
5548
5549 if (fil_buffering_disabled(node->space)) {
5550
5551 /* We don't need to keep track of unflushed
5552 changes as user has explicitly disabled
5553 buffering. */
5554 ut_ad(!node->space->is_in_unflushed_spaces);
5555 node->flush_counter = node->modification_counter;
5556
5557 } else if (!node->space->is_in_unflushed_spaces) {
5558
5559 node->space->is_in_unflushed_spaces = true;
5560 UT_LIST_ADD_FIRST(unflushed_spaces,
5561 system->unflushed_spaces,
5562 node->space);
5563 }
5564 }
5565
5566 if (node->n_pending == 0 && fil_space_belongs_in_lru(node->space)) {
5567
5568 /* The node must be put back to the LRU list */
5569 UT_LIST_ADD_FIRST(LRU, system->LRU, node);
5570 }
5571 }
5572
5573 /********************************************************************//**
5574 Report information about an invalid page access. */
5575 static
5576 void
fil_report_invalid_page_access(ulint block_offset,ulint space_id,const char * space_name,ulint byte_offset,ulint len,ulint type)5577 fil_report_invalid_page_access(
5578 /*===========================*/
5579 ulint block_offset, /*!< in: block offset */
5580 ulint space_id, /*!< in: space id */
5581 const char* space_name, /*!< in: space name */
5582 ulint byte_offset, /*!< in: byte offset */
5583 ulint len, /*!< in: I/O length */
5584 ulint type) /*!< in: I/O type */
5585 {
5586 fprintf(stderr,
5587 "InnoDB: Error: trying to access page number %lu"
5588 " in space %lu,\n"
5589 "InnoDB: space name %s,\n"
5590 "InnoDB: which is outside the tablespace bounds.\n"
5591 "InnoDB: Byte offset %lu, len %lu, i/o type %lu.\n"
5592 "InnoDB: If you get this error at mysqld startup,"
5593 " please check that\n"
5594 "InnoDB: your my.cnf matches the ibdata files"
5595 " that you have in the\n"
5596 "InnoDB: MySQL server.\n",
5597 (ulong) block_offset, (ulong) space_id, space_name,
5598 (ulong) byte_offset, (ulong) len, (ulong) type);
5599 }
5600
5601 /********************************************************************//**
5602 Reads or writes data. This operation is asynchronous (aio).
5603 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
5604 i/o on a tablespace which does not exist */
5605 UNIV_INTERN
5606 dberr_t
_fil_io(ulint type,bool sync,ulint space_id,ulint zip_size,ulint block_offset,ulint byte_offset,ulint len,void * buf,void * message,trx_t * trx,bool should_buffer)5607 _fil_io(
5608 /*===*/
5609 ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
5610 ORed to OS_FILE_LOG, if a log i/o
5611 and ORed to OS_AIO_SIMULATED_WAKE_LATER
5612 if simulated aio and we want to post a
5613 batch of i/os; NOTE that a simulated batch
5614 may introduce hidden chances of deadlocks,
5615 because i/os are not actually handled until
5616 all have been posted: use with great
5617 caution! */
5618 bool sync, /*!< in: true if synchronous aio is desired */
5619 ulint space_id, /*!< in: space id */
5620 ulint zip_size, /*!< in: compressed page size in bytes;
5621 0 for uncompressed pages */
5622 ulint block_offset, /*!< in: offset in number of blocks */
5623 ulint byte_offset, /*!< in: remainder of offset in bytes; in
5624 aio this must be divisible by the OS block
5625 size */
5626 ulint len, /*!< in: how many bytes to read or write; this
5627 must not cross a file boundary; in aio this
5628 must be a block size multiple */
5629 void* buf, /*!< in/out: buffer where to store read data
5630 or from where to write; in aio this must be
5631 appropriately aligned */
5632 void* message, /*!< in: message for aio handler if non-sync
5633 aio used, else ignored */
5634 trx_t* trx,
5635 bool should_buffer) /*!< in: whether to buffer an aio request.
5636 AIO read ahead uses this. If you plan to
5637 use this parameter, make sure you remember
5638 to call os_aio_dispatch_read_array_submit()
5639 when you're ready to commit all your requests.*/
5640 {
5641 ulint mode;
5642 fil_space_t* space;
5643 fil_node_t* node;
5644 ibool ret;
5645 ulint is_log;
5646 ulint wake_later;
5647 os_offset_t offset;
5648 ibool ignore_nonexistent_pages;
5649
5650 is_log = type & OS_FILE_LOG;
5651 type = type & ~OS_FILE_LOG;
5652
5653 wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
5654 type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
5655
5656 ignore_nonexistent_pages = type & BUF_READ_IGNORE_NONEXISTENT_PAGES;
5657 type &= ~BUF_READ_IGNORE_NONEXISTENT_PAGES;
5658
5659 ut_ad(byte_offset < UNIV_PAGE_SIZE);
5660 ut_ad(!zip_size || !byte_offset);
5661 ut_ad(ut_is_2pow(zip_size));
5662 ut_ad(buf);
5663 ut_ad(len > 0);
5664 ut_ad(UNIV_PAGE_SIZE == (ulong)(1 << UNIV_PAGE_SIZE_SHIFT));
5665 #if (1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX
5666 # error "(1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX"
5667 #endif
5668 #if (1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN
5669 # error "(1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN"
5670 #endif
5671 ut_ad(fil_validate_skip());
5672 #ifndef UNIV_HOTBACKUP
5673 # ifndef UNIV_LOG_DEBUG
5674 /* ibuf bitmap pages must be read in the sync aio mode: */
5675 ut_ad(recv_no_ibuf_operations
5676 || type == OS_FILE_WRITE
5677 || !ibuf_bitmap_page(zip_size, block_offset)
5678 || sync
5679 || is_log);
5680 # endif /* UNIV_LOG_DEBUG */
5681 if (sync) {
5682 mode = OS_AIO_SYNC;
5683 } else if (is_log) {
5684 mode = OS_AIO_LOG;
5685 } else if (type == OS_FILE_READ
5686 && !recv_no_ibuf_operations
5687 && ibuf_page(space_id, zip_size, block_offset, NULL)) {
5688 mode = OS_AIO_IBUF;
5689 } else {
5690 mode = OS_AIO_NORMAL;
5691 }
5692 #else /* !UNIV_HOTBACKUP */
5693 ut_a(sync);
5694 mode = OS_AIO_SYNC;
5695 #endif /* !UNIV_HOTBACKUP */
5696
5697 if (type == OS_FILE_READ) {
5698 srv_stats.data_read.add(len);
5699 } else if (type == OS_FILE_WRITE) {
5700 ut_ad(!srv_read_only_mode);
5701 srv_stats.data_written.add(len);
5702 }
5703
5704 /* Reserve the fil_system mutex and make sure that we can open at
5705 least one file while holding it, if the file is not already open */
5706
5707 fil_mutex_enter_and_prepare_for_io(space_id);
5708
5709 space = fil_space_get_by_id(space_id);
5710
5711 /* If we are deleting a tablespace we don't allow async read operations
5712 on that. However, we do allow write and sync read operations */
5713 if (space == 0
5714 || (type == OS_FILE_READ && !sync && space->stop_new_ops)) {
5715 mutex_exit(&fil_system->mutex);
5716
5717 ib_logf(IB_LOG_LEVEL_ERROR,
5718 "Trying to do i/o to a tablespace which does "
5719 "not exist. i/o type %lu, space id %lu, "
5720 "page no. %lu, i/o length %lu bytes",
5721 (ulong) type, (ulong) space_id, (ulong) block_offset,
5722 (ulong) len);
5723
5724 return(DB_TABLESPACE_DELETED);
5725 }
5726
5727 ut_ad(mode != OS_AIO_IBUF || space->purpose == FIL_TABLESPACE);
5728
5729 node = UT_LIST_GET_FIRST(space->chain);
5730
5731 for (;;) {
5732 if (node == NULL) {
5733 if (ignore_nonexistent_pages) {
5734 mutex_exit(&fil_system->mutex);
5735 return(DB_ERROR);
5736 }
5737
5738 fil_report_invalid_page_access(
5739 block_offset, space_id, space->name,
5740 byte_offset, len, type);
5741
5742 ut_error;
5743
5744 } else if (fil_is_user_tablespace_id(space->id)
5745 && node->size == 0) {
5746
5747 /* We do not know the size of a single-table tablespace
5748 before we open the file */
5749 break;
5750 } else if (node->size > block_offset) {
5751 /* Found! */
5752 break;
5753 } else {
5754 block_offset -= node->size;
5755 node = UT_LIST_GET_NEXT(chain, node);
5756 }
5757 }
5758
5759 /* Open file if closed */
5760 if (!fil_node_prepare_for_io(node, fil_system, space)) {
5761 if (space->purpose == FIL_TABLESPACE
5762 && fil_is_user_tablespace_id(space->id)) {
5763 mutex_exit(&fil_system->mutex);
5764
5765 ib_logf(IB_LOG_LEVEL_ERROR,
5766 "Trying to do i/o to a tablespace which "
5767 "exists without .ibd data file. "
5768 "i/o type %lu, space id %lu, page no %lu, "
5769 "i/o length %lu bytes",
5770 (ulong) type, (ulong) space_id,
5771 (ulong) block_offset, (ulong) len);
5772
5773 return(DB_TABLESPACE_DELETED);
5774 }
5775
5776 /* The tablespace is for log. Currently, we just assert here
5777 to prevent handling errors along the way fil_io returns.
5778 Also, if the log files are missing, it would be hard to
5779 promise the server can continue running. */
5780 ut_a(0);
5781 }
5782
5783 /* Check that at least the start offset is within the bounds of a
5784 single-table tablespace, including rollback tablespaces. */
5785 if (UNIV_UNLIKELY(node->size <= block_offset)
5786 && space->id != 0 && space->purpose == FIL_TABLESPACE) {
5787
5788 fil_report_invalid_page_access(
5789 block_offset, space_id, space->name, byte_offset,
5790 len, type);
5791
5792 ut_error;
5793 }
5794
5795 /* Now we have made the changes in the data structures of fil_system */
5796 mutex_exit(&fil_system->mutex);
5797
5798 /* Calculate the low 32 bits and the high 32 bits of the file offset */
5799
5800 if (!zip_size) {
5801 offset = ((os_offset_t) block_offset << UNIV_PAGE_SIZE_SHIFT)
5802 + byte_offset;
5803
5804 ut_a(node->size - block_offset
5805 >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
5806 / UNIV_PAGE_SIZE));
5807 } else {
5808 ulint zip_size_shift;
5809 switch (zip_size) {
5810 case 1024: zip_size_shift = 10; break;
5811 case 2048: zip_size_shift = 11; break;
5812 case 4096: zip_size_shift = 12; break;
5813 case 8192: zip_size_shift = 13; break;
5814 case 16384: zip_size_shift = 14; break;
5815 default: ut_error;
5816 }
5817 offset = ((os_offset_t) block_offset << zip_size_shift)
5818 + byte_offset;
5819 ut_a(node->size - block_offset
5820 >= (len + (zip_size - 1)) / zip_size);
5821 }
5822
5823 /* Do aio */
5824
5825 ut_a(byte_offset % OS_MIN_LOG_BLOCK_SIZE == 0);
5826 ut_a((len % OS_MIN_LOG_BLOCK_SIZE) == 0);
5827
5828 #ifndef UNIV_HOTBACKUP
5829 if (UNIV_UNLIKELY(space->is_corrupt && srv_pass_corrupt_table)) {
5830
5831 /* should ignore i/o for the crashed space */
5832 if (srv_pass_corrupt_table == 1 ||
5833 type == OS_FILE_WRITE) {
5834
5835 mutex_enter(&fil_system->mutex);
5836 fil_node_complete_io(node, fil_system, type);
5837 mutex_exit(&fil_system->mutex);
5838 if (mode == OS_AIO_NORMAL) {
5839 ut_a(space->purpose == FIL_TABLESPACE);
5840 buf_page_io_complete(static_cast<buf_page_t *>
5841 (message));
5842 }
5843 }
5844
5845 if (srv_pass_corrupt_table == 1 && type == OS_FILE_READ) {
5846
5847 return(DB_TABLESPACE_DELETED);
5848
5849 } else if (type == OS_FILE_WRITE) {
5850
5851 return(DB_SUCCESS);
5852 }
5853 }
5854
5855 /* Queue the aio request */
5856 ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
5857 offset, len, node, message, space_id, trx, should_buffer);
5858
5859 #else
5860 /* In mysqlbackup do normal i/o, not aio */
5861 if (type == OS_FILE_READ) {
5862 ret = os_file_read(node->handle, buf, offset, len);
5863 } else {
5864 ut_ad(!srv_read_only_mode);
5865 ret = os_file_write(node->name, node->handle, buf,
5866 offset, len);
5867 }
5868 #endif /* !UNIV_HOTBACKUP */
5869 ut_a(ret);
5870
5871 if (mode == OS_AIO_SYNC) {
5872 /* The i/o operation is already completed when we return from
5873 os_aio: */
5874
5875 mutex_enter(&fil_system->mutex);
5876
5877 fil_node_complete_io(node, fil_system, type);
5878
5879 mutex_exit(&fil_system->mutex);
5880
5881 ut_ad(fil_validate_skip());
5882 }
5883
5884 return(DB_SUCCESS);
5885 }
5886
5887 #ifndef UNIV_HOTBACKUP
5888 /**********************************************************************//**
5889 Waits for an aio operation to complete. This function is used to write the
5890 handler for completed requests. The aio array of pending requests is divided
5891 into segments (see os0file.cc for more info). The thread specifies which
5892 segment it wants to wait for. */
5893 UNIV_INTERN
5894 void
fil_aio_wait(ulint segment)5895 fil_aio_wait(
5896 /*=========*/
5897 ulint segment) /*!< in: the number of the segment in the aio
5898 array to wait for */
5899 {
5900 ibool ret;
5901 fil_node_t* fil_node;
5902 void* message;
5903 ulint type;
5904 ulint space_id = 0;
5905
5906 ut_ad(fil_validate_skip());
5907
5908 if (srv_use_native_aio) {
5909 srv_set_io_thread_op_info(segment, "native aio handle");
5910 #ifdef WIN_ASYNC_IO
5911 ret = os_aio_windows_handle(
5912 segment, 0, &fil_node, &message, &type, &space_id);
5913 #elif defined(LINUX_NATIVE_AIO)
5914 ret = os_aio_linux_handle(
5915 segment, &fil_node, &message, &type, &space_id);
5916 #else
5917 ut_error;
5918 ret = 0; /* Eliminate compiler warning */
5919 #endif /* WIN_ASYNC_IO */
5920 } else {
5921 srv_set_io_thread_op_info(segment, "simulated aio handle");
5922
5923 ret = os_aio_simulated_handle(
5924 segment, &fil_node, &message, &type, &space_id);
5925 }
5926
5927 ut_a(ret);
5928 if (fil_node == NULL) {
5929 ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
5930 return;
5931 }
5932
5933 srv_set_io_thread_op_info(segment, "complete io for fil node");
5934
5935 mutex_enter(&fil_system->mutex);
5936
5937 fil_node_complete_io(fil_node, fil_system, type);
5938
5939 mutex_exit(&fil_system->mutex);
5940
5941 ut_ad(fil_validate_skip());
5942
5943 /* Do the i/o handling */
5944 /* IMPORTANT: since i/o handling for reads will read also the insert
5945 buffer in tablespace 0, you have to be very careful not to introduce
5946 deadlocks in the i/o system. We keep tablespace 0 data files always
5947 open, and use a special i/o thread to serve insert buffer requests. */
5948
5949 if (fil_node->space->purpose == FIL_TABLESPACE) {
5950 srv_set_io_thread_op_info(segment, "complete io for buf page");
5951 buf_page_io_complete(static_cast<buf_page_t*>(message));
5952 } else {
5953 srv_set_io_thread_op_info(segment, "complete io for log");
5954 log_io_complete(static_cast<log_group_t*>(message));
5955 }
5956 }
5957 #endif /* UNIV_HOTBACKUP */
5958
5959 /**********************************************************************//**
5960 Flushes to disk possible writes cached by the OS. If the space does not exist
5961 or is being dropped, does not do anything. */
5962 UNIV_INTERN
5963 void
fil_flush(ulint space_id)5964 fil_flush(
5965 /*======*/
5966 ulint space_id) /*!< in: file space id (this can be a group of
5967 log files or a tablespace of the database) */
5968 {
5969 fil_space_t* space;
5970 fil_node_t* node;
5971 pfs_os_file_t file;
5972
5973
5974 mutex_enter(&fil_system->mutex);
5975
5976 space = fil_space_get_by_id(space_id);
5977
5978 if (!space || space->stop_new_ops) {
5979 mutex_exit(&fil_system->mutex);
5980
5981 return;
5982 }
5983
5984 if (fil_buffering_disabled(space)) {
5985
5986 /* No need to flush. User has explicitly disabled
5987 buffering. */
5988 ut_ad(!space->is_in_unflushed_spaces);
5989 ut_ad(fil_space_is_flushed(space));
5990 ut_ad(space->n_pending_flushes == 0);
5991
5992 #ifdef UNIV_DEBUG
5993 for (node = UT_LIST_GET_FIRST(space->chain);
5994 node != NULL;
5995 node = UT_LIST_GET_NEXT(chain, node)) {
5996 ut_ad(node->modification_counter
5997 == node->flush_counter);
5998 ut_ad(node->n_pending_flushes == 0);
5999 }
6000 #endif /* UNIV_DEBUG */
6001
6002 mutex_exit(&fil_system->mutex);
6003 return;
6004 }
6005
6006 space->n_pending_flushes++; /*!< prevent dropping of the space while
6007 we are flushing */
6008 for (node = UT_LIST_GET_FIRST(space->chain);
6009 node != NULL;
6010 node = UT_LIST_GET_NEXT(chain, node)) {
6011
6012 ib_int64_t old_mod_counter = node->modification_counter;;
6013
6014 if (old_mod_counter <= node->flush_counter) {
6015 continue;
6016 }
6017
6018 ut_a(node->open);
6019
6020 if (space->purpose == FIL_TABLESPACE) {
6021 fil_n_pending_tablespace_flushes++;
6022 } else {
6023 fil_n_pending_log_flushes++;
6024 fil_n_log_flushes++;
6025 }
6026 #ifdef __WIN__
6027 if (node->is_raw_disk) {
6028
6029 goto skip_flush;
6030 }
6031 #endif /* __WIN__ */
6032 retry:
6033 if (node->n_pending_flushes > 0) {
6034 /* We want to avoid calling os_file_flush() on
6035 the file twice at the same time, because we do
6036 not know what bugs OS's may contain in file
6037 i/o */
6038
6039 ib_int64_t sig_count =
6040 os_event_reset(node->sync_event);
6041
6042 mutex_exit(&fil_system->mutex);
6043
6044 os_event_wait_low(node->sync_event, sig_count);
6045
6046 mutex_enter(&fil_system->mutex);
6047
6048 if (node->flush_counter >= old_mod_counter) {
6049
6050 goto skip_flush;
6051 }
6052
6053 goto retry;
6054 }
6055
6056 ut_a(node->open);
6057 file = node->handle;
6058 node->n_pending_flushes++;
6059
6060 mutex_exit(&fil_system->mutex);
6061
6062 os_file_flush(file);
6063
6064 mutex_enter(&fil_system->mutex);
6065
6066 os_event_set(node->sync_event);
6067
6068 node->n_pending_flushes--;
6069 skip_flush:
6070 if (node->flush_counter < old_mod_counter) {
6071 node->flush_counter = old_mod_counter;
6072
6073 if (space->is_in_unflushed_spaces
6074 && fil_space_is_flushed(space)) {
6075
6076 space->is_in_unflushed_spaces = false;
6077
6078 UT_LIST_REMOVE(
6079 unflushed_spaces,
6080 fil_system->unflushed_spaces,
6081 space);
6082 }
6083 }
6084
6085 if (space->purpose == FIL_TABLESPACE) {
6086 fil_n_pending_tablespace_flushes--;
6087 } else {
6088 fil_n_pending_log_flushes--;
6089 }
6090 }
6091
6092 space->n_pending_flushes--;
6093
6094 mutex_exit(&fil_system->mutex);
6095 }
6096
6097 /**********************************************************************//**
6098 Flushes to disk the writes in file spaces of the given type possibly cached by
6099 the OS. */
6100 UNIV_INTERN
6101 void
fil_flush_file_spaces(ulint purpose)6102 fil_flush_file_spaces(
6103 /*==================*/
6104 ulint purpose) /*!< in: FIL_TABLESPACE, FIL_LOG */
6105 {
6106 fil_space_t* space;
6107 ulint* space_ids;
6108 ulint n_space_ids;
6109 ulint i;
6110
6111 mutex_enter(&fil_system->mutex);
6112
6113 n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces);
6114 if (n_space_ids == 0) {
6115
6116 mutex_exit(&fil_system->mutex);
6117 return;
6118 }
6119
6120 /* Assemble a list of space ids to flush. Previously, we
6121 traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT()
6122 on a space that was just removed from the list by fil_flush().
6123 Thus, the space could be dropped and the memory overwritten. */
6124 space_ids = static_cast<ulint*>(
6125 mem_alloc(n_space_ids * sizeof *space_ids));
6126
6127 n_space_ids = 0;
6128
6129 for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces);
6130 space;
6131 space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
6132
6133 if (space->purpose == purpose && !space->stop_new_ops) {
6134
6135 space_ids[n_space_ids++] = space->id;
6136 }
6137 }
6138
6139 mutex_exit(&fil_system->mutex);
6140
6141 /* Flush the spaces. It will not hurt to call fil_flush() on
6142 a non-existing space id. */
6143 for (i = 0; i < n_space_ids; i++) {
6144
6145 fil_flush(space_ids[i]);
6146 }
6147
6148 mem_free(space_ids);
6149 }
6150
6151 /** Functor to validate the space list. */
6152 struct Check {
operator ()Check6153 void operator()(const fil_node_t* elem)
6154 {
6155 ut_a(elem->open || !elem->n_pending);
6156 }
6157 };
6158
6159 /******************************************************************//**
6160 Checks the consistency of the tablespace cache.
6161 @return TRUE if ok */
6162 UNIV_INTERN
6163 ibool
fil_validate(void)6164 fil_validate(void)
6165 /*==============*/
6166 {
6167 fil_space_t* space;
6168 fil_node_t* fil_node;
6169 ulint n_open = 0;
6170 ulint i;
6171
6172 mutex_enter(&fil_system->mutex);
6173
6174 /* Look for spaces in the hash table */
6175
6176 for (i = 0; i < hash_get_n_cells(fil_system->spaces); i++) {
6177
6178 for (space = static_cast<fil_space_t*>(
6179 HASH_GET_FIRST(fil_system->spaces, i));
6180 space != 0;
6181 space = static_cast<fil_space_t*>(
6182 HASH_GET_NEXT(hash, space))) {
6183
6184 UT_LIST_VALIDATE(
6185 chain, fil_node_t, space->chain, Check());
6186
6187 for (fil_node = UT_LIST_GET_FIRST(space->chain);
6188 fil_node != 0;
6189 fil_node = UT_LIST_GET_NEXT(chain, fil_node)) {
6190
6191 if (fil_node->n_pending > 0) {
6192 ut_a(fil_node->open);
6193 }
6194
6195 if (fil_node->open) {
6196 n_open++;
6197 }
6198 }
6199 }
6200 }
6201
6202 ut_a(fil_system->n_open == n_open);
6203
6204 UT_LIST_CHECK(LRU, fil_node_t, fil_system->LRU);
6205
6206 for (fil_node = UT_LIST_GET_FIRST(fil_system->LRU);
6207 fil_node != 0;
6208 fil_node = UT_LIST_GET_NEXT(LRU, fil_node)) {
6209
6210 ut_a(fil_node->n_pending == 0);
6211 ut_a(!fil_node->being_extended);
6212 ut_a(fil_node->open);
6213 ut_a(fil_space_belongs_in_lru(fil_node->space));
6214 }
6215
6216 mutex_exit(&fil_system->mutex);
6217
6218 return(TRUE);
6219 }
6220
6221 /********************************************************************//**
6222 Returns TRUE if file address is undefined.
6223 @return TRUE if undefined */
6224 UNIV_INTERN
6225 ibool
fil_addr_is_null(fil_addr_t addr)6226 fil_addr_is_null(
6227 /*=============*/
6228 fil_addr_t addr) /*!< in: address */
6229 {
6230 return(addr.page == FIL_NULL);
6231 }
6232
6233 /********************************************************************//**
6234 Get the predecessor of a file page.
6235 @return FIL_PAGE_PREV */
6236 UNIV_INTERN
6237 ulint
fil_page_get_prev(const byte * page)6238 fil_page_get_prev(
6239 /*==============*/
6240 const byte* page) /*!< in: file page */
6241 {
6242 return(mach_read_from_4(page + FIL_PAGE_PREV));
6243 }
6244
6245 /********************************************************************//**
6246 Get the successor of a file page.
6247 @return FIL_PAGE_NEXT */
6248 UNIV_INTERN
6249 ulint
fil_page_get_next(const byte * page)6250 fil_page_get_next(
6251 /*==============*/
6252 const byte* page) /*!< in: file page */
6253 {
6254 return(mach_read_from_4(page + FIL_PAGE_NEXT));
6255 }
6256
6257 /*********************************************************************//**
6258 Sets the file page type. */
6259 UNIV_INTERN
6260 void
fil_page_set_type(byte * page,ulint type)6261 fil_page_set_type(
6262 /*==============*/
6263 byte* page, /*!< in/out: file page */
6264 ulint type) /*!< in: type */
6265 {
6266 ut_ad(page);
6267
6268 mach_write_to_2(page + FIL_PAGE_TYPE, type);
6269 }
6270
6271 /*********************************************************************//**
6272 Gets the file page type.
6273 @return type; NOTE that if the type has not been written to page, the
6274 return value not defined */
6275 UNIV_INTERN
6276 ulint
fil_page_get_type(const byte * page)6277 fil_page_get_type(
6278 /*==============*/
6279 const byte* page) /*!< in: file page */
6280 {
6281 ut_ad(page);
6282
6283 return(mach_read_from_2(page + FIL_PAGE_TYPE));
6284 }
6285
6286 /****************************************************************//**
6287 Closes the tablespace memory cache. */
6288 UNIV_INTERN
6289 void
fil_close(void)6290 fil_close(void)
6291 /*===========*/
6292 {
6293 mutex_free(&fil_system->mutex);
6294
6295 hash_table_free(fil_system->spaces);
6296
6297 hash_table_free(fil_system->name_hash);
6298
6299 ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0);
6300 ut_a(UT_LIST_GET_LEN(fil_system->unflushed_spaces) == 0);
6301 ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0);
6302
6303 mem_free(fil_system);
6304
6305 fil_system = NULL;
6306 }
6307
6308 /********************************************************************//**
6309 Initializes a buffer control block when the buf_pool is created. */
6310 static
6311 void
fil_buf_block_init(buf_block_t * block,byte * frame)6312 fil_buf_block_init(
6313 /*===============*/
6314 buf_block_t* block, /*!< in: pointer to control block */
6315 byte* frame) /*!< in: pointer to buffer frame */
6316 {
6317 UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
6318
6319 block->frame = frame;
6320
6321 block->page.io_fix = BUF_IO_NONE;
6322 /* There are assertions that check for this. */
6323 block->page.buf_fix_count = 1;
6324 block->page.state = BUF_BLOCK_READY_FOR_USE;
6325
6326 page_zip_des_init(&block->page.zip);
6327 }
6328
6329 struct fil_iterator_t {
6330 pfs_os_file_t file; /*!< File handle */
6331 const char* filepath; /*!< File path name */
6332 os_offset_t start; /*!< From where to start */
6333 os_offset_t end; /*!< Where to stop */
6334 os_offset_t file_size; /*!< File size in bytes */
6335 ulint page_size; /*!< Page size */
6336 ulint n_io_buffers; /*!< Number of pages to use
6337 for IO */
6338 byte* io_buffer; /*!< Buffer to use for IO */
6339 };
6340
6341 /********************************************************************//**
6342 TODO: This can be made parallel trivially by chunking up the file and creating
6343 a callback per thread. . Main benefit will be to use multiple CPUs for
6344 checksums and compressed tables. We have to do compressed tables block by
6345 block right now. Secondly we need to decompress/compress and copy too much
6346 of data. These are CPU intensive.
6347
6348 Iterate over all the pages in the tablespace.
6349 @param iter - Tablespace iterator
6350 @param block - block to use for IO
6351 @param callback - Callback to inspect and update page contents
6352 @retval DB_SUCCESS or error code */
6353 static
6354 dberr_t
fil_iterate(const fil_iterator_t & iter,buf_block_t * block,PageCallback & callback)6355 fil_iterate(
6356 /*========*/
6357 const fil_iterator_t& iter,
6358 buf_block_t* block,
6359 PageCallback& callback)
6360 {
6361 os_offset_t offset;
6362 ulint page_no = 0;
6363 ulint space_id = callback.get_space_id();
6364 ulint n_bytes = iter.n_io_buffers * iter.page_size;
6365
6366 ut_ad(!srv_read_only_mode);
6367
6368 /* TODO: For compressed tables we do a lot of useless
6369 copying for non-index pages. Unfortunately, it is
6370 required by buf_zip_decompress() */
6371
6372 for (offset = iter.start; offset < iter.end; offset += n_bytes) {
6373
6374 byte* io_buffer = iter.io_buffer;
6375
6376 block->frame = io_buffer;
6377
6378 if (callback.get_zip_size() > 0) {
6379 page_zip_des_init(&block->page.zip);
6380 page_zip_set_size(&block->page.zip, iter.page_size);
6381 block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
6382 ut_d(block->page.zip.m_external = true);
6383 ut_ad(iter.page_size == callback.get_zip_size());
6384
6385 /* Zip IO is done in the compressed page buffer. */
6386 io_buffer = block->page.zip.data;
6387 } else {
6388 io_buffer = iter.io_buffer;
6389 }
6390
6391 /* We have to read the exact number of bytes. Otherwise the
6392 InnoDB IO functions croak on failed reads. */
6393
6394 n_bytes = static_cast<ulint>(
6395 ut_min(static_cast<os_offset_t>(n_bytes),
6396 iter.end - offset));
6397
6398 ut_ad(n_bytes > 0);
6399 ut_ad(!(n_bytes % iter.page_size));
6400
6401 if (!os_file_read(iter.file, io_buffer, offset,
6402 (ulint) n_bytes)) {
6403
6404 ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed");
6405
6406 return(DB_IO_ERROR);
6407 }
6408
6409 bool updated = false;
6410 os_offset_t page_off = offset;
6411 ulint n_pages_read = (ulint) n_bytes / iter.page_size;
6412
6413 for (ulint i = 0; i < n_pages_read; ++i) {
6414
6415 buf_block_set_file_page(block, space_id, page_no++);
6416
6417 dberr_t err;
6418
6419 if ((err = callback(page_off, block)) != DB_SUCCESS) {
6420
6421 return(err);
6422
6423 } else if (!updated) {
6424 updated = buf_block_get_state(block)
6425 == BUF_BLOCK_FILE_PAGE;
6426 }
6427
6428 buf_block_set_state(block, BUF_BLOCK_NOT_USED);
6429 buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
6430
6431 page_off += iter.page_size;
6432 block->frame += iter.page_size;
6433 }
6434
6435 /* A page was updated in the set, write back to disk. */
6436 if (updated
6437 && !os_file_write(
6438 iter.filepath, iter.file, io_buffer,
6439 offset, (ulint) n_bytes)) {
6440
6441 ib_logf(IB_LOG_LEVEL_ERROR, "os_file_write() failed");
6442
6443 return(DB_IO_ERROR);
6444 }
6445 }
6446
6447 return(DB_SUCCESS);
6448 }
6449
6450 /********************************************************************//**
6451 Iterate over all the pages in the tablespace.
6452 @param table - the table definiton in the server
6453 @param n_io_buffers - number of blocks to read and write together
6454 @param callback - functor that will do the page updates
6455 @return DB_SUCCESS or error code */
6456 UNIV_INTERN
6457 dberr_t
fil_tablespace_iterate(dict_table_t * table,ulint n_io_buffers,PageCallback & callback)6458 fil_tablespace_iterate(
6459 /*===================*/
6460 dict_table_t* table,
6461 ulint n_io_buffers,
6462 PageCallback& callback)
6463 {
6464 dberr_t err;
6465 pfs_os_file_t file;
6466 char* filepath;
6467
6468 ut_a(n_io_buffers > 0);
6469 ut_ad(!srv_read_only_mode);
6470
6471 DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
6472 return(DB_CORRUPTION););
6473
6474 if (DICT_TF_HAS_DATA_DIR(table->flags)) {
6475 dict_get_and_save_data_dir_path(table, false);
6476 ut_a(table->data_dir_path);
6477
6478 filepath = os_file_make_remote_pathname(
6479 table->data_dir_path, table->name, "ibd");
6480 } else {
6481 filepath = fil_make_ibd_name(table->name, false);
6482 }
6483
6484 {
6485 ibool success;
6486
6487 file = os_file_create_simple_no_error_handling(
6488 innodb_file_data_key, filepath,
6489 OS_FILE_OPEN, OS_FILE_READ_WRITE, &success);
6490
6491 DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
6492 {
6493 static bool once;
6494
6495 if (!once || ut_rnd_interval(0, 10) == 5) {
6496 once = true;
6497 success = FALSE;
6498 os_file_close(file);
6499 }
6500 });
6501
6502 if (!success) {
6503 /* The following call prints an error message */
6504 os_file_get_last_error(true);
6505
6506 ib_logf(IB_LOG_LEVEL_ERROR,
6507 "Trying to import a tablespace, but could not "
6508 "open the tablespace file %s", filepath);
6509
6510 mem_free(filepath);
6511
6512 return(DB_TABLESPACE_NOT_FOUND);
6513
6514 } else {
6515 err = DB_SUCCESS;
6516 }
6517 }
6518
6519 callback.set_file(filepath, file);
6520
6521 os_offset_t file_size = os_file_get_size(file);
6522 ut_a(file_size != (os_offset_t) -1);
6523
6524 /* The block we will use for every physical page */
6525 buf_block_t block;
6526
6527 memset(&block, 0x0, sizeof(block));
6528
6529 /* Allocate a page to read in the tablespace header, so that we
6530 can determine the page size and zip_size (if it is compressed).
6531 We allocate an extra page in case it is a compressed table. One
6532 page is to ensure alignement. */
6533
6534 void* page_ptr = mem_alloc(3 * UNIV_PAGE_SIZE);
6535 byte* page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
6536
6537 fil_buf_block_init(&block, page);
6538
6539 /* Read the first page and determine the page and zip size. */
6540
6541 if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE)) {
6542
6543 err = DB_IO_ERROR;
6544
6545 } else if ((err = callback.init(file_size, &block)) == DB_SUCCESS) {
6546 fil_iterator_t iter;
6547
6548 iter.file = file;
6549 iter.start = 0;
6550 iter.end = file_size;
6551 iter.filepath = filepath;
6552 iter.file_size = file_size;
6553 iter.n_io_buffers = n_io_buffers;
6554 iter.page_size = callback.get_page_size();
6555
6556 /* Compressed pages can't be optimised for block IO for now.
6557 We do the IMPORT page by page. */
6558
6559 if (callback.get_zip_size() > 0) {
6560 iter.n_io_buffers = 1;
6561 ut_a(iter.page_size == callback.get_zip_size());
6562 }
6563
6564 /** Add an extra page for compressed page scratch area. */
6565
6566 void* io_buffer = mem_alloc(
6567 (2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
6568
6569 iter.io_buffer = static_cast<byte*>(
6570 ut_align(io_buffer, UNIV_PAGE_SIZE));
6571
6572 err = fil_iterate(iter, &block, callback);
6573
6574 mem_free(io_buffer);
6575 }
6576
6577 if (err == DB_SUCCESS) {
6578
6579 ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk");
6580
6581 if (!os_file_flush(file)) {
6582 ib_logf(IB_LOG_LEVEL_INFO, "os_file_flush() failed!");
6583 err = DB_IO_ERROR;
6584 } else {
6585 ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk - done!");
6586 }
6587 }
6588
6589 os_file_close(file);
6590
6591 mem_free(page_ptr);
6592 mem_free(filepath);
6593
6594 return(err);
6595 }
6596
6597 /**
6598 Set the tablespace compressed table size.
6599 @return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
6600 dberr_t
set_zip_size(const buf_frame_t * page)6601 PageCallback::set_zip_size(const buf_frame_t* page) UNIV_NOTHROW
6602 {
6603 m_zip_size = fsp_header_get_zip_size(page);
6604
6605 if (!ut_is_2pow(m_zip_size) || m_zip_size > UNIV_ZIP_SIZE_MAX) {
6606 return(DB_CORRUPTION);
6607 }
6608
6609 return(DB_SUCCESS);
6610 }
6611
6612 /********************************************************************//**
6613 Delete the tablespace file and any related files like .cfg.
6614 This should not be called for temporary tables. */
6615 UNIV_INTERN
6616 void
fil_delete_file(const char * ibd_name)6617 fil_delete_file(
6618 /*============*/
6619 const char* ibd_name) /*!< in: filepath of the ibd
6620 tablespace */
6621 {
6622 /* Force a delete of any stale .ibd files that are lying around. */
6623
6624 ib_logf(IB_LOG_LEVEL_INFO, "Deleting %s", ibd_name);
6625
6626 os_file_delete_if_exists(innodb_file_data_key, ibd_name);
6627
6628 char* cfg_name = fil_make_cfg_name(ibd_name);
6629
6630 os_file_delete_if_exists(innodb_file_data_key, cfg_name);
6631
6632 mem_free(cfg_name);
6633 }
6634
6635 /*************************************************************************
6636 Return local hash table informations. */
6637
6638 ulint
fil_system_hash_cells(void)6639 fil_system_hash_cells(void)
6640 /*=======================*/
6641 {
6642 if (fil_system) {
6643 return (fil_system->spaces->n_cells
6644 + fil_system->name_hash->n_cells);
6645 } else {
6646 return 0;
6647 }
6648 }
6649
6650 ulint
fil_system_hash_nodes(void)6651 fil_system_hash_nodes(void)
6652 /*=======================*/
6653 {
6654 if (fil_system) {
6655 return (UT_LIST_GET_LEN(fil_system->space_list)
6656 * (sizeof(fil_space_t) + MEM_BLOCK_HEADER_SIZE));
6657 } else {
6658 return 0;
6659 }
6660 }
6661
6662 /**
6663 Iterate over all the spaces in the space list and fetch the
6664 tablespace names. It will return a copy of the name that must be
6665 freed by the caller using: delete[].
6666 @return DB_SUCCESS if all OK. */
6667 UNIV_INTERN
6668 dberr_t
fil_get_space_names(space_name_list_t & space_name_list)6669 fil_get_space_names(
6670 /*================*/
6671 space_name_list_t& space_name_list)
6672 /*!< in/out: List to append to */
6673 {
6674 fil_space_t* space;
6675 dberr_t err = DB_SUCCESS;
6676
6677 mutex_enter(&fil_system->mutex);
6678
6679 for (space = UT_LIST_GET_FIRST(fil_system->space_list);
6680 space != NULL;
6681 space = UT_LIST_GET_NEXT(space_list, space)) {
6682
6683 if (space->purpose == FIL_TABLESPACE) {
6684 ulint len;
6685 char* name;
6686
6687 len = strlen(space->name);
6688 name = new(std::nothrow) char[len + 1];
6689
6690 if (name == 0) {
6691 /* Caller to free elements allocated so far. */
6692 err = DB_OUT_OF_MEMORY;
6693 break;
6694 }
6695
6696 memcpy(name, space->name, len);
6697 name[len] = 0;
6698
6699 space_name_list.push_back(name);
6700 }
6701 }
6702
6703 mutex_exit(&fil_system->mutex);
6704
6705 return(err);
6706 }
6707
6708 /** Generate redo log for swapping two .ibd files
6709 @param[in] old_table old table
6710 @param[in] new_table new table
6711 @param[in] tmp_name temporary table name
6712 @param[in,out] mtr mini-transaction
6713 @return innodb error code */
6714 UNIV_INTERN
6715 dberr_t
fil_mtr_rename_log(const dict_table_t * old_table,const dict_table_t * new_table,const char * tmp_name,mtr_t * mtr)6716 fil_mtr_rename_log(
6717 const dict_table_t* old_table,
6718 const dict_table_t* new_table,
6719 const char* tmp_name,
6720 mtr_t* mtr)
6721 {
6722 dberr_t err = DB_SUCCESS;
6723 char* old_path;
6724
6725 /* If neither table is file-per-table,
6726 there will be no renaming of files. */
6727 if (old_table->space == TRX_SYS_SPACE
6728 && new_table->space == TRX_SYS_SPACE) {
6729 return(DB_SUCCESS);
6730 }
6731
6732 if (DICT_TF_HAS_DATA_DIR(old_table->flags)) {
6733 old_path = os_file_make_remote_pathname(
6734 old_table->data_dir_path, old_table->name, "ibd");
6735 } else {
6736 old_path = fil_make_ibd_name(old_table->name, false);
6737 }
6738 if (old_path == NULL) {
6739 return(DB_OUT_OF_MEMORY);
6740 }
6741
6742 if (old_table->space != TRX_SYS_SPACE) {
6743 char* tmp_path;
6744
6745 if (DICT_TF_HAS_DATA_DIR(old_table->flags)) {
6746 tmp_path = os_file_make_remote_pathname(
6747 old_table->data_dir_path, tmp_name, "ibd");
6748 }
6749 else {
6750 tmp_path = fil_make_ibd_name(tmp_name, false);
6751 }
6752
6753 if (tmp_path == NULL) {
6754 mem_free(old_path);
6755 return(DB_OUT_OF_MEMORY);
6756 }
6757
6758 /* Temp filepath must not exist. */
6759 err = fil_rename_tablespace_check(
6760 old_table->space, old_path, tmp_path,
6761 dict_table_is_discarded(old_table));
6762 mem_free(tmp_path);
6763 if (err != DB_SUCCESS) {
6764 mem_free(old_path);
6765 return(err);
6766 }
6767
6768 fil_op_write_log(MLOG_FILE_RENAME, old_table->space,
6769 0, 0, old_table->name, tmp_name, mtr);
6770 }
6771
6772 if (new_table->space != TRX_SYS_SPACE) {
6773
6774 /* Destination filepath must not exist unless this ALTER
6775 TABLE starts and ends with a file_per-table tablespace. */
6776 if (old_table->space == TRX_SYS_SPACE) {
6777 char* new_path = NULL;
6778
6779 if (DICT_TF_HAS_DATA_DIR(new_table->flags)) {
6780 new_path = os_file_make_remote_pathname(
6781 new_table->data_dir_path,
6782 new_table->name, "ibd");
6783 }
6784 else {
6785 new_path = fil_make_ibd_name(
6786 new_table->name, false);
6787 }
6788
6789 if (new_path == NULL) {
6790 mem_free(old_path);
6791 return(DB_OUT_OF_MEMORY);
6792 }
6793
6794 err = fil_rename_tablespace_check(
6795 new_table->space, new_path, old_path,
6796 dict_table_is_discarded(new_table));
6797 mem_free(new_path);
6798 if (err != DB_SUCCESS) {
6799 mem_free(old_path);
6800 return(err);
6801 }
6802 }
6803
6804 fil_op_write_log(MLOG_FILE_RENAME, new_table->space,
6805 0, 0, new_table->name, old_table->name, mtr);
6806
6807 }
6808
6809 mem_free(old_path);
6810
6811 return(err);
6812 }
6813
6814 /*************************************************************************
6815 functions to access is_corrupt flag of fil_space_t*/
6816
6817 void
fil_space_set_corrupt(ulint space_id)6818 fil_space_set_corrupt(
6819 /*==================*/
6820 ulint space_id)
6821 {
6822 fil_space_t* space;
6823
6824 mutex_enter(&fil_system->mutex);
6825
6826 space = fil_space_get_by_id(space_id);
6827
6828 if (space) {
6829 space->is_corrupt = TRUE;
6830 }
6831
6832 mutex_exit(&fil_system->mutex);
6833 }
6834