1 /*****************************************************************************
2
3 Copyright (c) 1995, 2017, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file fil/fil0fil.cc
29 The tablespace memory cache
30
31 Created 10/25/1995 Heikki Tuuri
32 *******************************************************/
33
34 #include "fil0fil.h"
35
36 #include <debug_sync.h>
37 #include <my_dbug.h>
38
39 #include "mem0mem.h"
40 #include "hash0hash.h"
41 #include "os0file.h"
42 #include "mach0data.h"
43 #include "buf0buf.h"
44 #include "buf0flu.h"
45 #include "log0recv.h"
46 #include "fsp0fsp.h"
47 #include "srv0srv.h"
48 #include "srv0start.h"
49 #include "mtr0mtr.h"
50 #include "mtr0log.h"
51 #include "dict0dict.h"
52 #include "page0page.h"
53 #include "page0zip.h"
54 #include "trx0sys.h"
55 #include "row0mysql.h"
56 #ifndef UNIV_HOTBACKUP
57 # include "buf0lru.h"
58 # include "ibuf0ibuf.h"
59 # include "sync0sync.h"
60 # include "os0sync.h"
61 #else /* !UNIV_HOTBACKUP */
62 # include "srv0srv.h"
63 static ulint srv_data_read, srv_data_written;
64 #endif /* !UNIV_HOTBACKUP */
65
66 /*
67 IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
68 =============================================
69
70 The tablespace cache is responsible for providing fast read/write access to
71 tablespaces and logs of the database. File creation and deletion is done
72 in other modules which know more of the logic of the operation, however.
73
74 A tablespace consists of a chain of files. The size of the files does not
75 have to be divisible by the database block size, because we may just leave
76 the last incomplete block unused. When a new file is appended to the
77 tablespace, the maximum size of the file is also specified. At the moment,
78 we think that it is best to extend the file to its maximum size already at
79 the creation of the file, because then we can avoid dynamically extending
80 the file when more space is needed for the tablespace.
81
82 A block's position in the tablespace is specified with a 32-bit unsigned
83 integer. The files in the chain are thought to be catenated, and the block
84 corresponding to an address n is the nth block in the catenated file (where
85 the first block is named the 0th block, and the incomplete block fragments
86 at the end of files are not taken into account). A tablespace can be extended
87 by appending a new file at the end of the chain.
88
89 Our tablespace concept is similar to the one of Oracle.
90
91 To acquire more speed in disk transfers, a technique called disk striping is
92 sometimes used. This means that logical block addresses are divided in a
93 round-robin fashion across several disks. Windows NT supports disk striping,
94 so there we do not need to support it in the database. Disk striping is
95 implemented in hardware in RAID disks. We conclude that it is not necessary
96 to implement it in the database. Oracle 7 does not support disk striping,
97 either.
98
99 Another trick used at some database sites is replacing tablespace files by
100 raw disks, that is, the whole physical disk drive, or a partition of it, is
101 opened as a single file, and it is accessed through byte offsets calculated
102 from the start of the disk or the partition. This is recommended in some
103 books on database tuning to achieve more speed in i/o. Using raw disk
104 certainly prevents the OS from fragmenting disk space, but it is not clear
105 if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file
106 system + EIDE Conner disk only a negligible difference in speed when reading
107 from a file, versus reading from a raw disk.
108
109 To have fast access to a tablespace or a log file, we put the data structures
110 to a hash table. Each tablespace and log file is given an unique 32-bit
111 identifier.
112
113 Some operating systems do not support many open files at the same time,
114 though NT seems to tolerate at least 900 open files. Therefore, we put the
115 open files in an LRU-list. If we need to open another file, we may close the
116 file at the end of the LRU-list. When an i/o-operation is pending on a file,
117 the file cannot be closed. We take the file nodes with pending i/o-operations
118 out of the LRU-list and keep a count of pending operations. When an operation
119 completes, we decrement the count and return the file node to the LRU-list if
120 the count drops to zero. */
121
122 /** When mysqld is run, the default directory "." is the mysqld datadir,
123 but in the MySQL Embedded Server Library and mysqlbackup it is not the default
124 directory, and we must set the base file path explicitly */
125 UNIV_INTERN const char* fil_path_to_mysql_datadir = ".";
126
127 /** The number of fsyncs done to the log */
128 UNIV_INTERN ulint fil_n_log_flushes = 0;
129
130 /** Number of pending redo log flushes */
131 UNIV_INTERN ulint fil_n_pending_log_flushes = 0;
132 /** Number of pending tablespace flushes */
133 UNIV_INTERN ulint fil_n_pending_tablespace_flushes = 0;
134
135 /** Number of files currently open */
136 UNIV_INTERN ulint fil_n_file_opened = 0;
137
138 /** The null file address */
139 UNIV_INTERN fil_addr_t fil_addr_null = {FIL_NULL, 0};
140
141 #ifdef UNIV_PFS_MUTEX
142 /* Key to register fil_system_mutex with performance schema */
143 UNIV_INTERN mysql_pfs_key_t fil_system_mutex_key;
144 #endif /* UNIV_PFS_MUTEX */
145
146 #ifdef UNIV_PFS_RWLOCK
147 /* Key to register file space latch with performance schema */
148 UNIV_INTERN mysql_pfs_key_t fil_space_latch_key;
149 #endif /* UNIV_PFS_RWLOCK */
150
151 /** File node of a tablespace or the log data space */
152 struct fil_node_t {
153 fil_space_t* space; /*!< backpointer to the space where this node
154 belongs */
155 char* name; /*!< path to the file */
156 ibool open; /*!< TRUE if file open */
157 pfs_os_file_t handle; /*!< OS handle to the file, if file open */
158 os_event_t sync_event;/*!< Condition event to group and
159 serialize calls to fsync */
160 ibool is_raw_disk;/*!< TRUE if the 'file' is actually a raw
161 device or a raw disk partition */
162 ulint size; /*!< size of the file in database pages, 0 if
163 not known yet; the possible last incomplete
164 megabyte may be ignored if space == 0 */
165 ulint n_pending;
166 /*!< count of pending i/o's on this file;
167 closing of the file is not allowed if
168 this is > 0 */
169 ulint n_pending_flushes;
170 /*!< count of pending flushes on this file;
171 closing of the file is not allowed if
172 this is > 0 */
173 ibool being_extended;
174 /*!< TRUE if the node is currently
175 being extended. */
176 ib_int64_t modification_counter;/*!< when we write to the file we
177 increment this by one */
178 ib_int64_t flush_counter;/*!< up to what
179 modification_counter value we have
180 flushed the modifications to disk */
181 UT_LIST_NODE_T(fil_node_t) chain;
182 /*!< link field for the file chain */
183 UT_LIST_NODE_T(fil_node_t) LRU;
184 /*!< link field for the LRU list */
185 ulint magic_n;/*!< FIL_NODE_MAGIC_N */
186 };
187
188 /** Value of fil_node_t::magic_n */
189 #define FIL_NODE_MAGIC_N 89389
190
191 /** Tablespace or log data space: let us call them by a common name space */
192 struct fil_space_t {
193 char* name; /*!< space name = the path to the first file in
194 it */
195 ulint id; /*!< space id */
196 ib_int64_t tablespace_version;
197 /*!< in DISCARD/IMPORT this timestamp
198 is used to check if we should ignore
199 an insert buffer merge request for a
200 page because it actually was for the
201 previous incarnation of the space */
202 ibool mark; /*!< this is set to TRUE at database startup if
203 the space corresponds to a table in the InnoDB
204 data dictionary; so we can print a warning of
205 orphaned tablespaces */
206 ibool stop_ios;/*!< TRUE if we want to rename the
207 .ibd file of tablespace and want to
208 stop temporarily posting of new i/o
209 requests on the file */
210 ibool stop_new_ops;
211 /*!< we set this TRUE when we start
212 deleting a single-table tablespace.
213 When this is set following new ops
214 are not allowed:
215 * read IO request
216 * ibuf merge
217 * file flush
218 Note that we can still possibly have
219 new write operations because we don't
220 check this flag when doing flush
221 batches. */
222 ulint purpose;/*!< FIL_TABLESPACE, FIL_LOG, or
223 FIL_ARCH_LOG */
224 UT_LIST_BASE_NODE_T(fil_node_t) chain;
225 /*!< base node for the file chain */
226 ulint size; /*!< space size in pages; 0 if a single-table
227 tablespace whose size we do not know yet;
228 last incomplete megabytes in data files may be
229 ignored if space == 0 */
230 ulint flags; /*!< tablespace flags; see
231 fsp_flags_is_valid(),
232 fsp_flags_get_zip_size() */
233 ulint n_reserved_extents;
234 /*!< number of reserved free extents for
235 ongoing operations like B-tree page split */
236 ulint n_pending_flushes; /*!< this is positive when flushing
237 the tablespace to disk; dropping of the
238 tablespace is forbidden if this is positive */
239 ulint n_pending_ops;/*!< this is positive when we
240 have pending operations against this
241 tablespace. The pending operations can
242 be ibuf merges or lock validation code
243 trying to read a block.
244 Dropping of the tablespace is forbidden
245 if this is positive */
246 hash_node_t hash; /*!< hash chain node */
247 hash_node_t name_hash;/*!< hash chain the name_hash table */
248 #ifndef UNIV_HOTBACKUP
249 rw_lock_t latch; /*!< latch protecting the file space storage
250 allocation */
251 #endif /* !UNIV_HOTBACKUP */
252 UT_LIST_NODE_T(fil_space_t) unflushed_spaces;
253 /*!< list of spaces with at least one unflushed
254 file we have written to */
255 bool is_in_unflushed_spaces;
256 /*!< true if this space is currently in
257 unflushed_spaces */
258 UT_LIST_NODE_T(fil_space_t) space_list;
259 /*!< list of all spaces */
260 ulint magic_n;/*!< FIL_SPACE_MAGIC_N */
261 };
262
263 /** Value of fil_space_t::magic_n */
264 #define FIL_SPACE_MAGIC_N 89472
265
266 /** The tablespace memory cache; also the totality of logs (the log
267 data space) is stored here; below we talk about tablespaces, but also
268 the ib_logfiles form a 'space' and it is handled here */
269 struct fil_system_t {
270 #ifndef UNIV_HOTBACKUP
271 ib_mutex_t mutex; /*!< The mutex protecting the cache */
272 #endif /* !UNIV_HOTBACKUP */
273 hash_table_t* spaces; /*!< The hash table of spaces in the
274 system; they are hashed on the space
275 id */
276 hash_table_t* name_hash; /*!< hash table based on the space
277 name */
278 UT_LIST_BASE_NODE_T(fil_node_t) LRU;
279 /*!< base node for the LRU list of the
280 most recently used open files with no
281 pending i/o's; if we start an i/o on
282 the file, we first remove it from this
283 list, and return it to the start of
284 the list when the i/o ends;
285 log files and the system tablespace are
286 not put to this list: they are opened
287 after the startup, and kept open until
288 shutdown */
289 UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;
290 /*!< base node for the list of those
291 tablespaces whose files contain
292 unflushed writes; those spaces have
293 at least one file node where
294 modification_counter > flush_counter */
295 ulint n_open; /*!< number of files currently open */
296 ulint max_n_open; /*!< n_open is not allowed to exceed
297 this */
298 ib_int64_t modification_counter;/*!< when we write to a file we
299 increment this by one */
300 ulint max_assigned_id;/*!< maximum space id in the existing
301 tables, or assigned during the time
302 mysqld has been up; at an InnoDB
303 startup we scan the data dictionary
304 and set here the maximum of the
305 space id's of the tables there */
306 ib_int64_t tablespace_version;
307 /*!< a counter which is incremented for
308 every space object memory creation;
309 every space mem object gets a
310 'timestamp' from this; in DISCARD/
311 IMPORT this is used to check if we
312 should ignore an insert buffer merge
313 request */
314 UT_LIST_BASE_NODE_T(fil_space_t) space_list;
315 /*!< list of all file spaces */
316 ibool space_id_reuse_warned;
317 /* !< TRUE if fil_space_create()
318 has issued a warning about
319 potential space_id reuse */
320 };
321
322 /** The tablespace memory cache. This variable is NULL before the module is
323 initialized. */
324 static fil_system_t* fil_system = NULL;
325
326 /** Determine if (i) is a user tablespace id or not. */
327 # define fil_is_user_tablespace_id(i) (i != 0 \
328 && !srv_is_undo_tablespace(i))
329
330 /** Determine if user has explicitly disabled fsync(). */
331 #ifndef __WIN__
332 # define fil_buffering_disabled(s) \
333 ((s)->purpose == FIL_TABLESPACE \
334 && srv_unix_file_flush_method \
335 == SRV_UNIX_O_DIRECT_NO_FSYNC)
336 #else /* __WIN__ */
337 # define fil_buffering_disabled(s) (0)
338 #endif /* __WIN__ */
339
340 #ifdef UNIV_DEBUG
341 /** Try fil_validate() every this many times */
342 # define FIL_VALIDATE_SKIP 17
343
344 /******************************************************************//**
345 Checks the consistency of the tablespace cache some of the time.
346 @return TRUE if ok or the check was skipped */
347 static
348 ibool
fil_validate_skip(void)349 fil_validate_skip(void)
350 /*===================*/
351 {
352 /** The fil_validate() call skip counter. Use a signed type
353 because of the race condition below. */
354 static int fil_validate_count = FIL_VALIDATE_SKIP;
355
356 /* There is a race condition below, but it does not matter,
357 because this call is only for heuristic purposes. We want to
358 reduce the call frequency of the costly fil_validate() check
359 in debug builds. */
360 if (--fil_validate_count > 0) {
361 return(TRUE);
362 }
363
364 fil_validate_count = FIL_VALIDATE_SKIP;
365 return(fil_validate());
366 }
367 #endif /* UNIV_DEBUG */
368
369 /********************************************************************//**
370 Determines if a file node belongs to the least-recently-used list.
371 @return TRUE if the file belongs to fil_system->LRU mutex. */
372 UNIV_INLINE
373 ibool
fil_space_belongs_in_lru(const fil_space_t * space)374 fil_space_belongs_in_lru(
375 /*=====================*/
376 const fil_space_t* space) /*!< in: file space */
377 {
378 return(space->purpose == FIL_TABLESPACE
379 && fil_is_user_tablespace_id(space->id));
380 }
381
382 /********************************************************************//**
383 NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
384
385 Prepares a file node for i/o. Opens the file if it is closed. Updates the
386 pending i/o's field in the node and the system appropriately. Takes the node
387 off the LRU list if it is in the LRU list. The caller must hold the fil_sys
388 mutex.
389 @return false if the file can't be opened, otherwise true */
390 static
391 bool
392 fil_node_prepare_for_io(
393 /*====================*/
394 fil_node_t* node, /*!< in: file node */
395 fil_system_t* system, /*!< in: tablespace memory cache */
396 fil_space_t* space); /*!< in: space */
397 /********************************************************************//**
398 Updates the data structures when an i/o operation finishes. Updates the
399 pending i/o's field in the node appropriately. */
400 static
401 void
402 fil_node_complete_io(
403 /*=================*/
404 fil_node_t* node, /*!< in: file node */
405 fil_system_t* system, /*!< in: tablespace memory cache */
406 ulint type); /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
407 the node as modified if
408 type == OS_FILE_WRITE */
409 /*******************************************************************//**
410 Frees a space object from the tablespace memory cache. Closes the files in
411 the chain but does not delete them. There must not be any pending i/o's or
412 flushes on the files.
413 @return TRUE on success */
414 static
415 ibool
416 fil_space_free(
417 /*===========*/
418 ulint id, /* in: space id */
419 ibool x_latched); /* in: TRUE if caller has space->latch
420 in X mode */
421 /********************************************************************//**
422 Reads data from a space to a buffer. Remember that the possible incomplete
423 blocks at the end of file are ignored: they are not taken into account when
424 calculating the byte offset within a space.
425 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
426 i/o on a tablespace which does not exist */
427 UNIV_INLINE
428 dberr_t
fil_read(bool sync,ulint space_id,ulint zip_size,ulint block_offset,ulint byte_offset,ulint len,void * buf,void * message)429 fil_read(
430 /*=====*/
431 bool sync, /*!< in: true if synchronous aio is desired */
432 ulint space_id, /*!< in: space id */
433 ulint zip_size, /*!< in: compressed page size in bytes;
434 0 for uncompressed pages */
435 ulint block_offset, /*!< in: offset in number of blocks */
436 ulint byte_offset, /*!< in: remainder of offset in bytes; in aio
437 this must be divisible by the OS block size */
438 ulint len, /*!< in: how many bytes to read; this must not
439 cross a file boundary; in aio this must be a
440 block size multiple */
441 void* buf, /*!< in/out: buffer where to store data read;
442 in aio this must be appropriately aligned */
443 void* message) /*!< in: message for aio handler if non-sync
444 aio used, else ignored */
445 {
446 return(fil_io(OS_FILE_READ, sync, space_id, zip_size, block_offset,
447 byte_offset, len, buf, message));
448 }
449
450 /********************************************************************//**
451 Writes data to a space from a buffer. Remember that the possible incomplete
452 blocks at the end of file are ignored: they are not taken into account when
453 calculating the byte offset within a space.
454 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
455 i/o on a tablespace which does not exist */
456 UNIV_INLINE
457 dberr_t
fil_write(bool sync,ulint space_id,ulint zip_size,ulint block_offset,ulint byte_offset,ulint len,void * buf,void * message)458 fil_write(
459 /*======*/
460 bool sync, /*!< in: true if synchronous aio is desired */
461 ulint space_id, /*!< in: space id */
462 ulint zip_size, /*!< in: compressed page size in bytes;
463 0 for uncompressed pages */
464 ulint block_offset, /*!< in: offset in number of blocks */
465 ulint byte_offset, /*!< in: remainder of offset in bytes; in aio
466 this must be divisible by the OS block size */
467 ulint len, /*!< in: how many bytes to write; this must
468 not cross a file boundary; in aio this must
469 be a block size multiple */
470 void* buf, /*!< in: buffer from which to write; in aio
471 this must be appropriately aligned */
472 void* message) /*!< in: message for aio handler if non-sync
473 aio used, else ignored */
474 {
475 ut_ad(!srv_read_only_mode);
476
477 return(fil_io(OS_FILE_WRITE, sync, space_id, zip_size, block_offset,
478 byte_offset, len, buf, message));
479 }
480
481 /*******************************************************************//**
482 Returns the table space by a given id, NULL if not found. */
483 UNIV_INLINE
484 fil_space_t*
fil_space_get_by_id(ulint id)485 fil_space_get_by_id(
486 /*================*/
487 ulint id) /*!< in: space id */
488 {
489 fil_space_t* space;
490
491 ut_ad(mutex_own(&fil_system->mutex));
492
493 HASH_SEARCH(hash, fil_system->spaces, id,
494 fil_space_t*, space,
495 ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
496 space->id == id);
497
498 return(space);
499 }
500
501 /*******************************************************************//**
502 Returns the table space by a given name, NULL if not found. */
503 UNIV_INLINE
504 fil_space_t*
fil_space_get_by_name(const char * name)505 fil_space_get_by_name(
506 /*==================*/
507 const char* name) /*!< in: space name */
508 {
509 fil_space_t* space;
510 ulint fold;
511
512 ut_ad(mutex_own(&fil_system->mutex));
513
514 fold = ut_fold_string(name);
515
516 HASH_SEARCH(name_hash, fil_system->name_hash, fold,
517 fil_space_t*, space,
518 ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
519 !strcmp(name, space->name));
520
521 return(space);
522 }
523
524 #ifndef UNIV_HOTBACKUP
525 /*******************************************************************//**
526 Returns the version number of a tablespace, -1 if not found.
527 @return version number, -1 if the tablespace does not exist in the
528 memory cache */
529 UNIV_INTERN
530 ib_int64_t
fil_space_get_version(ulint id)531 fil_space_get_version(
532 /*==================*/
533 ulint id) /*!< in: space id */
534 {
535 fil_space_t* space;
536 ib_int64_t version = -1;
537
538 ut_ad(fil_system);
539
540 mutex_enter(&fil_system->mutex);
541
542 space = fil_space_get_by_id(id);
543
544 if (space) {
545 version = space->tablespace_version;
546 }
547
548 mutex_exit(&fil_system->mutex);
549
550 return(version);
551 }
552
553 /*******************************************************************//**
554 Returns the latch of a file space.
555 @return latch protecting storage allocation */
556 UNIV_INTERN
557 rw_lock_t*
fil_space_get_latch(ulint id,ulint * flags)558 fil_space_get_latch(
559 /*================*/
560 ulint id, /*!< in: space id */
561 ulint* flags) /*!< out: tablespace flags */
562 {
563 fil_space_t* space;
564
565 ut_ad(fil_system);
566
567 mutex_enter(&fil_system->mutex);
568
569 space = fil_space_get_by_id(id);
570
571 ut_a(space);
572
573 if (flags) {
574 *flags = space->flags;
575 }
576
577 mutex_exit(&fil_system->mutex);
578
579 return(&(space->latch));
580 }
581
582 /*******************************************************************//**
583 Returns the type of a file space.
584 @return FIL_TABLESPACE or FIL_LOG */
585 UNIV_INTERN
586 ulint
fil_space_get_type(ulint id)587 fil_space_get_type(
588 /*===============*/
589 ulint id) /*!< in: space id */
590 {
591 fil_space_t* space;
592
593 ut_ad(fil_system);
594
595 mutex_enter(&fil_system->mutex);
596
597 space = fil_space_get_by_id(id);
598
599 ut_a(space);
600
601 mutex_exit(&fil_system->mutex);
602
603 return(space->purpose);
604 }
605 #endif /* !UNIV_HOTBACKUP */
606
607 /**********************************************************************//**
608 Checks if all the file nodes in a space are flushed. The caller must hold
609 the fil_system mutex.
610 @return true if all are flushed */
611 static
612 bool
fil_space_is_flushed(fil_space_t * space)613 fil_space_is_flushed(
614 /*=================*/
615 fil_space_t* space) /*!< in: space */
616 {
617 fil_node_t* node;
618
619 ut_ad(mutex_own(&fil_system->mutex));
620
621 node = UT_LIST_GET_FIRST(space->chain);
622
623 while (node) {
624 if (node->modification_counter > node->flush_counter) {
625
626 ut_ad(!fil_buffering_disabled(space));
627 return(false);
628 }
629
630 node = UT_LIST_GET_NEXT(chain, node);
631 }
632
633 return(true);
634 }
635
636 /*******************************************************************//**
637 Appends a new file to the chain of files of a space. File must be closed.
638 @return pointer to the file name, or NULL on error */
639 UNIV_INTERN
640 char*
fil_node_create(const char * name,ulint size,ulint id,ibool is_raw)641 fil_node_create(
642 /*============*/
643 const char* name, /*!< in: file name (file must be closed) */
644 ulint size, /*!< in: file size in database blocks, rounded
645 downwards to an integer */
646 ulint id, /*!< in: space id where to append */
647 ibool is_raw) /*!< in: TRUE if a raw device or
648 a raw disk partition */
649 {
650 fil_node_t* node;
651 fil_space_t* space;
652
653 ut_a(fil_system);
654 ut_a(name);
655
656 mutex_enter(&fil_system->mutex);
657
658 node = static_cast<fil_node_t*>(mem_zalloc(sizeof(fil_node_t)));
659
660 node->name = mem_strdup(name);
661
662 ut_a(!is_raw || srv_start_raw_disk_in_use);
663
664 node->sync_event = os_event_create();
665 node->is_raw_disk = is_raw;
666 node->size = size;
667 node->magic_n = FIL_NODE_MAGIC_N;
668
669 space = fil_space_get_by_id(id);
670
671 if (!space) {
672 ut_print_timestamp(stderr);
673 fprintf(stderr,
674 " InnoDB: Error: Could not find tablespace %lu for\n"
675 "InnoDB: file ", (ulong) id);
676 ut_print_filename(stderr, name);
677 fputs(" in the tablespace memory cache.\n", stderr);
678 mem_free(node->name);
679
680 mem_free(node);
681
682 mutex_exit(&fil_system->mutex);
683
684 return(NULL);
685 }
686
687 space->size += size;
688
689 node->space = space;
690
691 UT_LIST_ADD_LAST(chain, space->chain, node);
692
693 if (id < SRV_LOG_SPACE_FIRST_ID && fil_system->max_assigned_id < id) {
694
695 fil_system->max_assigned_id = id;
696 }
697
698 mutex_exit(&fil_system->mutex);
699
700 return(node->name);
701 }
702
703 /********************************************************************//**
704 Opens a file of a node of a tablespace. The caller must own the fil_system
705 mutex.
706 @return false if the file can't be opened, otherwise true */
707 static
708 bool
fil_node_open_file(fil_node_t * node,fil_system_t * system,fil_space_t * space)709 fil_node_open_file(
710 /*===============*/
711 fil_node_t* node, /*!< in: file node */
712 fil_system_t* system, /*!< in: tablespace memory cache */
713 fil_space_t* space) /*!< in: space */
714 {
715 os_offset_t size_bytes;
716 ibool ret;
717 ibool success;
718 byte* buf2;
719 byte* page;
720 ulint space_id;
721 ulint flags;
722 ulint page_size;
723
724 ut_ad(mutex_own(&(system->mutex)));
725 ut_a(node->n_pending == 0);
726 ut_a(node->open == FALSE);
727
728 if (node->size == 0) {
729 /* It must be a single-table tablespace and we do not know the
730 size of the file yet. First we open the file in the normal
731 mode, no async I/O here, for simplicity. Then do some checks,
732 and close the file again.
733 NOTE that we could not use the simple file read function
734 os_file_read() in Windows to read from a file opened for
735 async I/O! */
736
737 node->handle = os_file_create_simple_no_error_handling(
738 innodb_file_data_key, node->name, OS_FILE_OPEN,
739 OS_FILE_READ_ONLY, &success);
740 if (!success) {
741 /* The following call prints an error message */
742 os_file_get_last_error(true);
743
744 ut_print_timestamp(stderr);
745
746 ib_logf(IB_LOG_LEVEL_WARN, "InnoDB: Error: cannot "
747 "open %s\n. InnoDB: Have you deleted .ibd "
748 "files under a running mysqld server?\n",
749 node->name);
750
751 return(false);
752 }
753
754 size_bytes = os_file_get_size(node->handle);
755 ut_a(size_bytes != (os_offset_t) -1);
756 #ifdef UNIV_HOTBACKUP
757 if (space->id == 0) {
758 node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
759 os_file_close(node->handle);
760 goto add_size;
761 }
762 #endif /* UNIV_HOTBACKUP */
763 ut_a(space->purpose != FIL_LOG);
764 ut_a(fil_is_user_tablespace_id(space->id));
765
766 if (size_bytes < FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE) {
767 fprintf(stderr,
768 "InnoDB: Error: the size of single-table"
769 " tablespace file %s\n"
770 "InnoDB: is only " UINT64PF ","
771 " should be at least %lu!\n",
772 node->name,
773 size_bytes,
774 (ulong) (FIL_IBD_FILE_INITIAL_SIZE
775 * UNIV_PAGE_SIZE));
776
777 ut_a(0);
778 }
779
780 /* Read the first page of the tablespace */
781
782 buf2 = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
783 /* Align the memory for file i/o if we might have O_DIRECT
784 set */
785 page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
786
787 success = os_file_read(node->handle, page, 0, UNIV_PAGE_SIZE);
788 space_id = fsp_header_get_space_id(page);
789 flags = fsp_header_get_flags(page);
790 page_size = fsp_flags_get_page_size(flags);
791
792 ut_free(buf2);
793
794 /* Close the file now that we have read the space id from it */
795
796 os_file_close(node->handle);
797
798 if (UNIV_UNLIKELY(space_id != space->id)) {
799 fprintf(stderr,
800 "InnoDB: Error: tablespace id is %lu"
801 " in the data dictionary\n"
802 "InnoDB: but in file %s it is %lu!\n",
803 space->id, node->name, space_id);
804
805 ut_error;
806 }
807
808 if (UNIV_UNLIKELY(space_id == ULINT_UNDEFINED
809 || space_id == 0)) {
810 fprintf(stderr,
811 "InnoDB: Error: tablespace id %lu"
812 " in file %s is not sensible\n",
813 (ulong) space_id, node->name);
814
815 ut_error;
816 }
817
818 if (UNIV_UNLIKELY(fsp_flags_get_page_size(space->flags)
819 != page_size)) {
820 fprintf(stderr,
821 "InnoDB: Error: tablespace file %s"
822 " has page size 0x%lx\n"
823 "InnoDB: but the data dictionary"
824 " expects page size 0x%lx!\n",
825 node->name, flags,
826 fsp_flags_get_page_size(space->flags));
827
828 ut_error;
829 }
830
831 if (UNIV_UNLIKELY(space->flags != flags)) {
832 fprintf(stderr,
833 "InnoDB: Error: table flags are 0x%lx"
834 " in the data dictionary\n"
835 "InnoDB: but the flags in file %s are 0x%lx!\n",
836 space->flags, node->name, flags);
837
838 ut_error;
839 }
840
841 if (size_bytes >= 1024 * 1024) {
842 /* Truncate the size to whole megabytes. */
843 size_bytes = ut_2pow_round(size_bytes, 1024 * 1024);
844 }
845
846 if (!fsp_flags_is_compressed(flags)) {
847 node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
848 } else {
849 node->size = (ulint)
850 (size_bytes
851 / fsp_flags_get_zip_size(flags));
852 }
853
854 #ifdef UNIV_HOTBACKUP
855 add_size:
856 #endif /* UNIV_HOTBACKUP */
857 space->size += node->size;
858 }
859
860 /* printf("Opening file %s\n", node->name); */
861
862 /* Open the file for reading and writing, in Windows normally in the
863 unbuffered async I/O mode, though global variables may make
864 os_file_create() to fall back to the normal file I/O mode. */
865
866 if (space->purpose == FIL_LOG) {
867 node->handle = os_file_create(innodb_file_log_key,
868 node->name, OS_FILE_OPEN,
869 OS_FILE_AIO, OS_LOG_FILE,
870 &ret);
871 } else if (node->is_raw_disk) {
872 node->handle = os_file_create(innodb_file_data_key,
873 node->name,
874 OS_FILE_OPEN_RAW,
875 OS_FILE_AIO, OS_DATA_FILE,
876 &ret);
877 } else {
878 node->handle = os_file_create(innodb_file_data_key,
879 node->name, OS_FILE_OPEN,
880 OS_FILE_AIO, OS_DATA_FILE,
881 &ret);
882 }
883
884 ut_a(ret);
885
886 node->open = TRUE;
887
888 system->n_open++;
889 fil_n_file_opened++;
890
891 if (fil_space_belongs_in_lru(space)) {
892
893 /* Put the node to the LRU list */
894 UT_LIST_ADD_FIRST(LRU, system->LRU, node);
895 }
896
897 return(true);
898 }
899
900 /**********************************************************************//**
901 Closes a file. */
902 static
903 void
fil_node_close_file(fil_node_t * node,fil_system_t * system)904 fil_node_close_file(
905 /*================*/
906 fil_node_t* node, /*!< in: file node */
907 fil_system_t* system) /*!< in: tablespace memory cache */
908 {
909 ibool ret;
910
911 ut_ad(node && system);
912 ut_ad(mutex_own(&(system->mutex)));
913 ut_a(node->open);
914 ut_a(node->n_pending == 0);
915 ut_a(node->n_pending_flushes == 0);
916 ut_a(!node->being_extended);
917 #ifndef UNIV_HOTBACKUP
918 ut_a(node->modification_counter == node->flush_counter
919 || srv_fast_shutdown == 2);
920 #endif /* !UNIV_HOTBACKUP */
921
922 ret = os_file_close(node->handle);
923 ut_a(ret);
924
925 /* printf("Closing file %s\n", node->name); */
926
927 node->open = FALSE;
928 ut_a(system->n_open > 0);
929 system->n_open--;
930 fil_n_file_opened--;
931
932 if (fil_space_belongs_in_lru(node->space)) {
933
934 ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
935
936 /* The node is in the LRU list, remove it */
937 UT_LIST_REMOVE(LRU, system->LRU, node);
938 }
939 }
940
941 /********************************************************************//**
942 Tries to close a file in the LRU list. The caller must hold the fil_sys
943 mutex.
944 @return TRUE if success, FALSE if should retry later; since i/o's
945 generally complete in < 100 ms, and as InnoDB writes at most 128 pages
946 from the buffer pool in a batch, and then immediately flushes the
947 files, there is a good chance that the next time we find a suitable
948 node from the LRU list */
949 static
950 ibool
fil_try_to_close_file_in_LRU(ibool print_info)951 fil_try_to_close_file_in_LRU(
952 /*=========================*/
953 ibool print_info) /*!< in: if TRUE, prints information why it
954 cannot close a file */
955 {
956 fil_node_t* node;
957
958 ut_ad(mutex_own(&fil_system->mutex));
959
960 if (print_info) {
961 fprintf(stderr,
962 "InnoDB: fil_sys open file LRU len %lu\n",
963 (ulong) UT_LIST_GET_LEN(fil_system->LRU));
964 }
965
966 for (node = UT_LIST_GET_LAST(fil_system->LRU);
967 node != NULL;
968 node = UT_LIST_GET_PREV(LRU, node)) {
969
970 if (node->modification_counter == node->flush_counter
971 && node->n_pending_flushes == 0
972 && !node->being_extended) {
973
974 fil_node_close_file(node, fil_system);
975
976 return(TRUE);
977 }
978
979 if (!print_info) {
980 continue;
981 }
982
983 if (node->n_pending_flushes > 0) {
984 fputs("InnoDB: cannot close file ", stderr);
985 ut_print_filename(stderr, node->name);
986 fprintf(stderr, ", because n_pending_flushes %lu\n",
987 (ulong) node->n_pending_flushes);
988 }
989
990 if (node->modification_counter != node->flush_counter) {
991 fputs("InnoDB: cannot close file ", stderr);
992 ut_print_filename(stderr, node->name);
993 fprintf(stderr,
994 ", because mod_count %ld != fl_count %ld\n",
995 (long) node->modification_counter,
996 (long) node->flush_counter);
997
998 }
999
1000 if (node->being_extended) {
1001 fputs("InnoDB: cannot close file ", stderr);
1002 ut_print_filename(stderr, node->name);
1003 fprintf(stderr, ", because it is being extended\n");
1004 }
1005 }
1006
1007 return(FALSE);
1008 }
1009
1010 /*******************************************************************//**
1011 Reserves the fil_system mutex and tries to make sure we can open at least one
1012 file while holding it. This should be called before calling
1013 fil_node_prepare_for_io(), because that function may need to open a file. */
1014 static
1015 void
fil_mutex_enter_and_prepare_for_io(ulint space_id)1016 fil_mutex_enter_and_prepare_for_io(
1017 /*===============================*/
1018 ulint space_id) /*!< in: space id */
1019 {
1020 fil_space_t* space;
1021 ibool success;
1022 ibool print_info = FALSE;
1023 ulint count = 0;
1024 ulint count2 = 0;
1025
1026 retry:
1027 mutex_enter(&fil_system->mutex);
1028
1029 if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
1030 /* We keep log files and system tablespace files always open;
1031 this is important in preventing deadlocks in this module, as
1032 a page read completion often performs another read from the
1033 insert buffer. The insert buffer is in tablespace 0, and we
1034 cannot end up waiting in this function. */
1035
1036 return;
1037 }
1038
1039 space = fil_space_get_by_id(space_id);
1040
1041 if (space != NULL && space->stop_ios) {
1042 /* We are going to do a rename file and want to stop new i/o's
1043 for a while */
1044
1045 if (count2 > 20000) {
1046 fputs("InnoDB: Warning: tablespace ", stderr);
1047 ut_print_filename(stderr, space->name);
1048 fprintf(stderr,
1049 " has i/o ops stopped for a long time %lu\n",
1050 (ulong) count2);
1051 }
1052
1053 mutex_exit(&fil_system->mutex);
1054
1055 #ifndef UNIV_HOTBACKUP
1056
1057 /* Wake the i/o-handler threads to make sure pending
1058 i/o's are performed */
1059 os_aio_simulated_wake_handler_threads();
1060
1061 /* The sleep here is just to give IO helper threads a
1062 bit of time to do some work. It is not required that
1063 all IO related to the tablespace being renamed must
1064 be flushed here as we do fil_flush() in
1065 fil_rename_tablespace() as well. */
1066 os_thread_sleep(20000);
1067
1068 #endif /* UNIV_HOTBACKUP */
1069
1070 /* Flush tablespaces so that we can close modified
1071 files in the LRU list */
1072 fil_flush_file_spaces(FIL_TABLESPACE);
1073
1074 os_thread_sleep(20000);
1075
1076 count2++;
1077
1078 goto retry;
1079 }
1080
1081 if (fil_system->n_open < fil_system->max_n_open) {
1082
1083 return;
1084 }
1085
1086 /* If the file is already open, no need to do anything; if the space
1087 does not exist, we handle the situation in the function which called
1088 this function */
1089
1090 if (!space || UT_LIST_GET_FIRST(space->chain)->open) {
1091
1092 return;
1093 }
1094
1095 if (count > 1) {
1096 print_info = TRUE;
1097 }
1098
1099 /* Too many files are open, try to close some */
1100 close_more:
1101 success = fil_try_to_close_file_in_LRU(print_info);
1102
1103 if (success && fil_system->n_open >= fil_system->max_n_open) {
1104
1105 goto close_more;
1106 }
1107
1108 if (fil_system->n_open < fil_system->max_n_open) {
1109 /* Ok */
1110
1111 return;
1112 }
1113
1114 if (count >= 2) {
1115 ut_print_timestamp(stderr);
1116 fprintf(stderr,
1117 " InnoDB: Warning: too many (%lu) files stay open"
1118 " while the maximum\n"
1119 "InnoDB: allowed value would be %lu.\n"
1120 "InnoDB: You may need to raise the value of"
1121 " innodb_open_files in\n"
1122 "InnoDB: my.cnf.\n",
1123 (ulong) fil_system->n_open,
1124 (ulong) fil_system->max_n_open);
1125
1126 return;
1127 }
1128
1129 mutex_exit(&fil_system->mutex);
1130
1131 #ifndef UNIV_HOTBACKUP
1132 /* Wake the i/o-handler threads to make sure pending i/o's are
1133 performed */
1134 os_aio_simulated_wake_handler_threads();
1135
1136 os_thread_sleep(20000);
1137 #endif
1138 /* Flush tablespaces so that we can close modified files in the LRU
1139 list */
1140
1141 fil_flush_file_spaces(FIL_TABLESPACE);
1142
1143 count++;
1144
1145 goto retry;
1146 }
1147
1148 /*******************************************************************//**
1149 Frees a file node object from a tablespace memory cache. */
1150 static
1151 void
fil_node_free(fil_node_t * node,fil_system_t * system,fil_space_t * space)1152 fil_node_free(
1153 /*==========*/
1154 fil_node_t* node, /*!< in, own: file node */
1155 fil_system_t* system, /*!< in: tablespace memory cache */
1156 fil_space_t* space) /*!< in: space where the file node is chained */
1157 {
1158 ut_ad(node && system && space);
1159 ut_ad(mutex_own(&(system->mutex)));
1160 ut_a(node->magic_n == FIL_NODE_MAGIC_N);
1161 ut_a(node->n_pending == 0);
1162 ut_a(!node->being_extended);
1163
1164 if (node->open) {
1165 /* We fool the assertion in fil_node_close_file() to think
1166 there are no unflushed modifications in the file */
1167
1168 node->modification_counter = node->flush_counter;
1169 os_event_set(node->sync_event);
1170
1171 if (fil_buffering_disabled(space)) {
1172
1173 ut_ad(!space->is_in_unflushed_spaces);
1174 ut_ad(fil_space_is_flushed(space));
1175
1176 } else if (space->is_in_unflushed_spaces
1177 && fil_space_is_flushed(space)) {
1178
1179 space->is_in_unflushed_spaces = false;
1180
1181 UT_LIST_REMOVE(unflushed_spaces,
1182 system->unflushed_spaces,
1183 space);
1184 }
1185
1186 fil_node_close_file(node, system);
1187 }
1188
1189 space->size -= node->size;
1190
1191 UT_LIST_REMOVE(chain, space->chain, node);
1192
1193 os_event_free(node->sync_event);
1194 mem_free(node->name);
1195 mem_free(node);
1196 }
1197
1198 #ifdef UNIV_LOG_ARCHIVE
1199 /****************************************************************//**
1200 Drops files from the start of a file space, so that its size is cut by
1201 the amount given. */
1202 UNIV_INTERN
1203 void
fil_space_truncate_start(ulint id,ulint trunc_len)1204 fil_space_truncate_start(
1205 /*=====================*/
1206 ulint id, /*!< in: space id */
1207 ulint trunc_len) /*!< in: truncate by this much; it is an error
1208 if this does not equal to the combined size of
1209 some initial files in the space */
1210 {
1211 fil_node_t* node;
1212 fil_space_t* space;
1213
1214 mutex_enter(&fil_system->mutex);
1215
1216 space = fil_space_get_by_id(id);
1217
1218 ut_a(space);
1219
1220 while (trunc_len > 0) {
1221 node = UT_LIST_GET_FIRST(space->chain);
1222
1223 ut_a(node->size * UNIV_PAGE_SIZE <= trunc_len);
1224
1225 trunc_len -= node->size * UNIV_PAGE_SIZE;
1226
1227 fil_node_free(node, fil_system, space);
1228 }
1229
1230 mutex_exit(&fil_system->mutex);
1231 }
1232 #endif /* UNIV_LOG_ARCHIVE */
1233
1234 /*******************************************************************//**
1235 Creates a space memory object and puts it to the 'fil system' hash table.
1236 If there is an error, prints an error message to the .err log.
1237 @return TRUE if success */
1238 UNIV_INTERN
1239 ibool
fil_space_create(const char * name,ulint id,ulint flags,ulint purpose)1240 fil_space_create(
1241 /*=============*/
1242 const char* name, /*!< in: space name */
1243 ulint id, /*!< in: space id */
1244 ulint flags, /*!< in: tablespace flags */
1245 ulint purpose)/*!< in: FIL_TABLESPACE, or FIL_LOG if log */
1246 {
1247 fil_space_t* space;
1248
1249 DBUG_EXECUTE_IF("fil_space_create_failure", return(false););
1250
1251 ut_a(fil_system);
1252 ut_a(fsp_flags_is_valid(flags));
1253
1254 /* Look for a matching tablespace and if found free it. */
1255 do {
1256 mutex_enter(&fil_system->mutex);
1257
1258 space = fil_space_get_by_name(name);
1259
1260 if (space != 0) {
1261 ib_logf(IB_LOG_LEVEL_WARN,
1262 "Tablespace '%s' exists in the cache "
1263 "with id %lu != %lu",
1264 name, (ulong) space->id, (ulong) id);
1265
1266 if (id == 0 || purpose != FIL_TABLESPACE) {
1267
1268 mutex_exit(&fil_system->mutex);
1269
1270 return(FALSE);
1271 }
1272
1273 ib_logf(IB_LOG_LEVEL_WARN,
1274 "Freeing existing tablespace '%s' entry "
1275 "from the cache with id %lu",
1276 name, (ulong) id);
1277
1278 ibool success = fil_space_free(space->id, FALSE);
1279 ut_a(success);
1280
1281 mutex_exit(&fil_system->mutex);
1282 }
1283
1284 } while (space != 0);
1285
1286 space = fil_space_get_by_id(id);
1287
1288 if (space != 0) {
1289 ib_logf(IB_LOG_LEVEL_ERROR,
1290 "Trying to add tablespace '%s' with id %lu "
1291 "to the tablespace memory cache, but tablespace '%s' "
1292 "with id %lu already exists in the cache!",
1293 name, (ulong) id, space->name, (ulong) space->id);
1294
1295 mutex_exit(&fil_system->mutex);
1296
1297 return(FALSE);
1298 }
1299
1300 space = static_cast<fil_space_t*>(mem_zalloc(sizeof(*space)));
1301
1302 space->name = mem_strdup(name);
1303 space->id = id;
1304
1305 fil_system->tablespace_version++;
1306 space->tablespace_version = fil_system->tablespace_version;
1307 space->mark = FALSE;
1308
1309 if (purpose == FIL_TABLESPACE && !recv_recovery_on
1310 && id > fil_system->max_assigned_id) {
1311
1312 if (!fil_system->space_id_reuse_warned) {
1313 fil_system->space_id_reuse_warned = TRUE;
1314
1315 ib_logf(IB_LOG_LEVEL_WARN,
1316 "Allocated tablespace %lu, old maximum "
1317 "was %lu",
1318 (ulong) id,
1319 (ulong) fil_system->max_assigned_id);
1320 }
1321
1322 fil_system->max_assigned_id = id;
1323 }
1324
1325 space->purpose = purpose;
1326 space->flags = flags;
1327
1328 space->magic_n = FIL_SPACE_MAGIC_N;
1329
1330 rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
1331
1332 HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
1333
1334 HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
1335 ut_fold_string(name), space);
1336 space->is_in_unflushed_spaces = false;
1337
1338 UT_LIST_ADD_LAST(space_list, fil_system->space_list, space);
1339
1340 mutex_exit(&fil_system->mutex);
1341
1342 return(TRUE);
1343 }
1344
1345 /*******************************************************************//**
1346 Assigns a new space id for a new single-table tablespace. This works simply by
1347 incrementing the global counter. If 4 billion id's is not enough, we may need
1348 to recycle id's.
1349 @return TRUE if assigned, FALSE if not */
1350 UNIV_INTERN
1351 ibool
fil_assign_new_space_id(ulint * space_id)1352 fil_assign_new_space_id(
1353 /*====================*/
1354 ulint* space_id) /*!< in/out: space id */
1355 {
1356 ulint id;
1357 ibool success;
1358
1359 mutex_enter(&fil_system->mutex);
1360
1361 id = *space_id;
1362
1363 if (id < fil_system->max_assigned_id) {
1364 id = fil_system->max_assigned_id;
1365 }
1366
1367 id++;
1368
1369 if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
1370 ut_print_timestamp(stderr);
1371 fprintf(stderr,
1372 "InnoDB: Warning: you are running out of new"
1373 " single-table tablespace id's.\n"
1374 "InnoDB: Current counter is %lu and it"
1375 " must not exceed %lu!\n"
1376 "InnoDB: To reset the counter to zero"
1377 " you have to dump all your tables and\n"
1378 "InnoDB: recreate the whole InnoDB installation.\n",
1379 (ulong) id,
1380 (ulong) SRV_LOG_SPACE_FIRST_ID);
1381 }
1382
1383 success = (id < SRV_LOG_SPACE_FIRST_ID);
1384
1385 if (success) {
1386 *space_id = fil_system->max_assigned_id = id;
1387 } else {
1388 ut_print_timestamp(stderr);
1389 fprintf(stderr,
1390 "InnoDB: You have run out of single-table"
1391 " tablespace id's!\n"
1392 "InnoDB: Current counter is %lu.\n"
1393 "InnoDB: To reset the counter to zero you"
1394 " have to dump all your tables and\n"
1395 "InnoDB: recreate the whole InnoDB installation.\n",
1396 (ulong) id);
1397 *space_id = ULINT_UNDEFINED;
1398 }
1399
1400 mutex_exit(&fil_system->mutex);
1401
1402 return(success);
1403 }
1404
1405 /*******************************************************************//**
1406 Frees a space object from the tablespace memory cache. Closes the files in
1407 the chain but does not delete them. There must not be any pending i/o's or
1408 flushes on the files.
1409 @return TRUE if success */
1410 static
1411 ibool
fil_space_free(ulint id,ibool x_latched)1412 fil_space_free(
1413 /*===========*/
1414 /* out: TRUE if success */
1415 ulint id, /* in: space id */
1416 ibool x_latched) /* in: TRUE if caller has space->latch
1417 in X mode */
1418 {
1419 fil_space_t* space;
1420 fil_space_t* fnamespace;
1421
1422 ut_ad(mutex_own(&fil_system->mutex));
1423
1424 space = fil_space_get_by_id(id);
1425
1426 if (!space) {
1427 ut_print_timestamp(stderr);
1428 fprintf(stderr,
1429 " InnoDB: Error: trying to remove tablespace %lu"
1430 " from the cache but\n"
1431 "InnoDB: it is not there.\n", (ulong) id);
1432
1433 return(FALSE);
1434 }
1435
1436 HASH_DELETE(fil_space_t, hash, fil_system->spaces, id, space);
1437
1438 fnamespace = fil_space_get_by_name(space->name);
1439 ut_a(fnamespace);
1440 ut_a(space == fnamespace);
1441
1442 HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
1443 ut_fold_string(space->name), space);
1444
1445 if (space->is_in_unflushed_spaces) {
1446
1447 ut_ad(!fil_buffering_disabled(space));
1448 space->is_in_unflushed_spaces = false;
1449
1450 UT_LIST_REMOVE(unflushed_spaces, fil_system->unflushed_spaces,
1451 space);
1452 }
1453
1454 UT_LIST_REMOVE(space_list, fil_system->space_list, space);
1455
1456 ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
1457 ut_a(0 == space->n_pending_flushes);
1458
1459 for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain);
1460 fil_node != NULL;
1461 fil_node = UT_LIST_GET_FIRST(space->chain)) {
1462
1463 fil_node_free(fil_node, fil_system, space);
1464 }
1465
1466 ut_a(0 == UT_LIST_GET_LEN(space->chain));
1467
1468 if (x_latched) {
1469 rw_lock_x_unlock(&space->latch);
1470 }
1471
1472 rw_lock_free(&(space->latch));
1473
1474 mem_free(space->name);
1475 mem_free(space);
1476
1477 return(TRUE);
1478 }
1479
1480 /*******************************************************************//**
1481 Returns a pointer to the file_space_t that is in the memory cache
1482 associated with a space id. The caller must lock fil_system->mutex.
1483 @return file_space_t pointer, NULL if space not found */
1484 UNIV_INLINE
1485 fil_space_t*
fil_space_get_space(ulint id)1486 fil_space_get_space(
1487 /*================*/
1488 ulint id) /*!< in: space id */
1489 {
1490 fil_space_t* space;
1491 fil_node_t* node;
1492
1493 ut_ad(fil_system);
1494
1495 space = fil_space_get_by_id(id);
1496 if (space == NULL) {
1497 return(NULL);
1498 }
1499
1500 if (space->size == 0 && space->purpose == FIL_TABLESPACE) {
1501 ut_a(id != 0);
1502
1503 mutex_exit(&fil_system->mutex);
1504
1505 /* It is possible that the space gets evicted at this point
1506 before the fil_mutex_enter_and_prepare_for_io() acquires
1507 the fil_system->mutex. Check for this after completing the
1508 call to fil_mutex_enter_and_prepare_for_io(). */
1509 fil_mutex_enter_and_prepare_for_io(id);
1510
1511 /* We are still holding the fil_system->mutex. Check if
1512 the space is still in memory cache. */
1513 space = fil_space_get_by_id(id);
1514 if (space == NULL) {
1515 return(NULL);
1516 }
1517
1518 /* The following code must change when InnoDB supports
1519 multiple datafiles per tablespace. */
1520 ut_a(1 == UT_LIST_GET_LEN(space->chain));
1521
1522 node = UT_LIST_GET_FIRST(space->chain);
1523
1524 /* It must be a single-table tablespace and we have not opened
1525 the file yet; the following calls will open it and update the
1526 size fields */
1527
1528 if (!fil_node_prepare_for_io(node, fil_system, space)) {
1529 /* The single-table tablespace can't be opened,
1530 because the ibd file is missing. */
1531 return(NULL);
1532 }
1533 fil_node_complete_io(node, fil_system, OS_FILE_READ);
1534 }
1535
1536 return(space);
1537 }
1538
1539 /*******************************************************************//**
1540 Returns the path from the first fil_node_t found for the space ID sent.
1541 The caller is responsible for freeing the memory allocated here for the
1542 value returned.
1543 @return own: A copy of fil_node_t::path, NULL if space ID is zero
1544 or not found. */
1545 UNIV_INTERN
1546 char*
fil_space_get_first_path(ulint id)1547 fil_space_get_first_path(
1548 /*=====================*/
1549 ulint id) /*!< in: space id */
1550 {
1551 fil_space_t* space;
1552 fil_node_t* node;
1553 char* path;
1554
1555 ut_ad(fil_system);
1556 ut_a(id);
1557
1558 fil_mutex_enter_and_prepare_for_io(id);
1559
1560 space = fil_space_get_space(id);
1561
1562 if (space == NULL) {
1563 mutex_exit(&fil_system->mutex);
1564
1565 return(NULL);
1566 }
1567
1568 ut_ad(mutex_own(&fil_system->mutex));
1569
1570 node = UT_LIST_GET_FIRST(space->chain);
1571
1572 path = mem_strdup(node->name);
1573
1574 mutex_exit(&fil_system->mutex);
1575
1576 return(path);
1577 }
1578
1579 /*******************************************************************//**
1580 Returns the size of the space in pages. The tablespace must be cached in the
1581 memory cache.
1582 @return space size, 0 if space not found */
1583 UNIV_INTERN
1584 ulint
fil_space_get_size(ulint id)1585 fil_space_get_size(
1586 /*===============*/
1587 ulint id) /*!< in: space id */
1588 {
1589 fil_space_t* space;
1590 ulint size;
1591
1592 ut_ad(fil_system);
1593 mutex_enter(&fil_system->mutex);
1594
1595 space = fil_space_get_space(id);
1596
1597 size = space ? space->size : 0;
1598
1599 mutex_exit(&fil_system->mutex);
1600
1601 return(size);
1602 }
1603
1604 /*******************************************************************//**
1605 Returns the flags of the space. The tablespace must be cached
1606 in the memory cache.
1607 @return flags, ULINT_UNDEFINED if space not found */
1608 UNIV_INTERN
1609 ulint
fil_space_get_flags(ulint id)1610 fil_space_get_flags(
1611 /*================*/
1612 ulint id) /*!< in: space id */
1613 {
1614 fil_space_t* space;
1615 ulint flags;
1616
1617 ut_ad(fil_system);
1618
1619 if (!id) {
1620 return(0);
1621 }
1622
1623 mutex_enter(&fil_system->mutex);
1624
1625 space = fil_space_get_space(id);
1626
1627 if (space == NULL) {
1628 mutex_exit(&fil_system->mutex);
1629
1630 return(ULINT_UNDEFINED);
1631 }
1632
1633 flags = space->flags;
1634
1635 mutex_exit(&fil_system->mutex);
1636
1637 return(flags);
1638 }
1639
1640 /*******************************************************************//**
1641 Returns the compressed page size of the space, or 0 if the space
1642 is not compressed. The tablespace must be cached in the memory cache.
1643 @return compressed page size, ULINT_UNDEFINED if space not found */
1644 UNIV_INTERN
1645 ulint
fil_space_get_zip_size(ulint id)1646 fil_space_get_zip_size(
1647 /*===================*/
1648 ulint id) /*!< in: space id */
1649 {
1650 ulint flags;
1651
1652 flags = fil_space_get_flags(id);
1653
1654 if (flags && flags != ULINT_UNDEFINED) {
1655
1656 return(fsp_flags_get_zip_size(flags));
1657 }
1658
1659 return(flags);
1660 }
1661
1662 /*******************************************************************//**
1663 Checks if the pair space, page_no refers to an existing page in a tablespace
1664 file space. The tablespace must be cached in the memory cache.
1665 @return TRUE if the address is meaningful */
1666 UNIV_INTERN
1667 ibool
fil_check_adress_in_tablespace(ulint id,ulint page_no)1668 fil_check_adress_in_tablespace(
1669 /*===========================*/
1670 ulint id, /*!< in: space id */
1671 ulint page_no)/*!< in: page number */
1672 {
1673 if (fil_space_get_size(id) > page_no) {
1674
1675 return(TRUE);
1676 }
1677
1678 return(FALSE);
1679 }
1680
1681 /****************************************************************//**
1682 Initializes the tablespace memory cache. */
1683 UNIV_INTERN
1684 void
fil_init(ulint hash_size,ulint max_n_open)1685 fil_init(
1686 /*=====*/
1687 ulint hash_size, /*!< in: hash table size */
1688 ulint max_n_open) /*!< in: max number of open files */
1689 {
1690 ut_a(fil_system == NULL);
1691
1692 ut_a(hash_size > 0);
1693 ut_a(max_n_open > 0);
1694
1695 fil_system = static_cast<fil_system_t*>(
1696 mem_zalloc(sizeof(fil_system_t)));
1697
1698 mutex_create(fil_system_mutex_key,
1699 &fil_system->mutex, SYNC_ANY_LATCH);
1700
1701 fil_system->spaces = hash_create(hash_size);
1702 fil_system->name_hash = hash_create(hash_size);
1703
1704 UT_LIST_INIT(fil_system->LRU);
1705
1706 fil_system->max_n_open = max_n_open;
1707 }
1708
1709 /*******************************************************************//**
1710 Opens all log files and system tablespace data files. They stay open until the
1711 database server shutdown. This should be called at a server startup after the
1712 space objects for the log and the system tablespace have been created. The
1713 purpose of this operation is to make sure we never run out of file descriptors
1714 if we need to read from the insert buffer or to write to the log. */
1715 UNIV_INTERN
1716 void
fil_open_log_and_system_tablespace_files(void)1717 fil_open_log_and_system_tablespace_files(void)
1718 /*==========================================*/
1719 {
1720 fil_space_t* space;
1721
1722 mutex_enter(&fil_system->mutex);
1723
1724 for (space = UT_LIST_GET_FIRST(fil_system->space_list);
1725 space != NULL;
1726 space = UT_LIST_GET_NEXT(space_list, space)) {
1727
1728 fil_node_t* node;
1729
1730 if (fil_space_belongs_in_lru(space)) {
1731
1732 continue;
1733 }
1734
1735 for (node = UT_LIST_GET_FIRST(space->chain);
1736 node != NULL;
1737 node = UT_LIST_GET_NEXT(chain, node)) {
1738
1739 if (!node->open) {
1740 if (!fil_node_open_file(node, fil_system,
1741 space)) {
1742 /* This func is called during server's
1743 startup. If some file of log or system
1744 tablespace is missing, the server
1745 can't start successfully. So we should
1746 assert for it. */
1747 ut_a(0);
1748 }
1749 }
1750
1751 if (fil_system->max_n_open < 10 + fil_system->n_open) {
1752
1753 fprintf(stderr,
1754 "InnoDB: Warning: you must"
1755 " raise the value of"
1756 " innodb_open_files in\n"
1757 "InnoDB: my.cnf! Remember that"
1758 " InnoDB keeps all log files"
1759 " and all system\n"
1760 "InnoDB: tablespace files open"
1761 " for the whole time mysqld is"
1762 " running, and\n"
1763 "InnoDB: needs to open also"
1764 " some .ibd files if the"
1765 " file-per-table storage\n"
1766 "InnoDB: model is used."
1767 " Current open files %lu,"
1768 " max allowed"
1769 " open files %lu.\n",
1770 (ulong) fil_system->n_open,
1771 (ulong) fil_system->max_n_open);
1772 }
1773 }
1774 }
1775
1776 mutex_exit(&fil_system->mutex);
1777 }
1778
1779 /*******************************************************************//**
1780 Closes all open files. There must not be any pending i/o's or not flushed
1781 modifications in the files. */
1782 UNIV_INTERN
1783 void
fil_close_all_files(void)1784 fil_close_all_files(void)
1785 /*=====================*/
1786 {
1787 fil_space_t* space;
1788
1789 mutex_enter(&fil_system->mutex);
1790
1791 space = UT_LIST_GET_FIRST(fil_system->space_list);
1792
1793 while (space != NULL) {
1794 fil_node_t* node;
1795 fil_space_t* prev_space = space;
1796
1797 for (node = UT_LIST_GET_FIRST(space->chain);
1798 node != NULL;
1799 node = UT_LIST_GET_NEXT(chain, node)) {
1800
1801 if (node->open) {
1802 fil_node_close_file(node, fil_system);
1803 }
1804 }
1805
1806 space = UT_LIST_GET_NEXT(space_list, space);
1807
1808 fil_space_free(prev_space->id, FALSE);
1809 }
1810
1811 mutex_exit(&fil_system->mutex);
1812 }
1813
1814 /*******************************************************************//**
1815 Closes the redo log files. There must not be any pending i/o's or not
1816 flushed modifications in the files. */
1817 UNIV_INTERN
1818 void
fil_close_log_files(bool free)1819 fil_close_log_files(
1820 /*================*/
1821 bool free) /*!< in: whether to free the memory object */
1822 {
1823 fil_space_t* space;
1824
1825 mutex_enter(&fil_system->mutex);
1826
1827 space = UT_LIST_GET_FIRST(fil_system->space_list);
1828
1829 while (space != NULL) {
1830 fil_node_t* node;
1831 fil_space_t* prev_space = space;
1832
1833 if (space->purpose != FIL_LOG) {
1834 space = UT_LIST_GET_NEXT(space_list, space);
1835 continue;
1836 }
1837
1838 for (node = UT_LIST_GET_FIRST(space->chain);
1839 node != NULL;
1840 node = UT_LIST_GET_NEXT(chain, node)) {
1841
1842 if (node->open) {
1843 fil_node_close_file(node, fil_system);
1844 }
1845 }
1846
1847 space = UT_LIST_GET_NEXT(space_list, space);
1848
1849 if (free) {
1850 fil_space_free(prev_space->id, FALSE);
1851 }
1852 }
1853
1854 mutex_exit(&fil_system->mutex);
1855 }
1856
1857 /*******************************************************************//**
1858 Sets the max tablespace id counter if the given number is bigger than the
1859 previous value. */
1860 UNIV_INTERN
1861 void
fil_set_max_space_id_if_bigger(ulint max_id)1862 fil_set_max_space_id_if_bigger(
1863 /*===========================*/
1864 ulint max_id) /*!< in: maximum known id */
1865 {
1866 if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
1867 fprintf(stderr,
1868 "InnoDB: Fatal error: max tablespace id"
1869 " is too high, %lu\n", (ulong) max_id);
1870 ut_error;
1871 }
1872
1873 mutex_enter(&fil_system->mutex);
1874
1875 if (fil_system->max_assigned_id < max_id) {
1876
1877 fil_system->max_assigned_id = max_id;
1878 }
1879
1880 mutex_exit(&fil_system->mutex);
1881 }
1882
1883 /****************************************************************//**
1884 Writes the flushed lsn and the latest archived log number to the page header
1885 of the first page of a data file of the system tablespace (space 0),
1886 which is uncompressed. */
1887 static MY_ATTRIBUTE((warn_unused_result))
1888 dberr_t
fil_write_lsn_and_arch_no_to_file(ulint space,ulint sum_of_sizes,lsn_t lsn,ulint arch_log_no MY_ATTRIBUTE ((unused)))1889 fil_write_lsn_and_arch_no_to_file(
1890 /*==============================*/
1891 ulint space, /*!< in: space to write to */
1892 ulint sum_of_sizes, /*!< in: combined size of previous files
1893 in space, in database pages */
1894 lsn_t lsn, /*!< in: lsn to write */
1895 ulint arch_log_no MY_ATTRIBUTE((unused)))
1896 /*!< in: archived log number to write */
1897 {
1898 byte* buf1;
1899 byte* buf;
1900 dberr_t err;
1901
1902 buf1 = static_cast<byte*>(mem_alloc(2 * UNIV_PAGE_SIZE));
1903 buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE));
1904
1905 err = fil_read(TRUE, space, 0, sum_of_sizes, 0,
1906 UNIV_PAGE_SIZE, buf, NULL);
1907 if (err == DB_SUCCESS) {
1908 mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
1909
1910 err = fil_write(TRUE, space, 0, sum_of_sizes, 0,
1911 UNIV_PAGE_SIZE, buf, NULL);
1912 }
1913
1914 mem_free(buf1);
1915
1916 return(err);
1917 }
1918
1919 /****************************************************************//**
1920 Writes the flushed lsn and the latest archived log number to the page
1921 header of the first page of each data file in the system tablespace.
1922 @return DB_SUCCESS or error number */
1923 UNIV_INTERN
1924 dberr_t
fil_write_flushed_lsn_to_data_files(lsn_t lsn,ulint arch_log_no)1925 fil_write_flushed_lsn_to_data_files(
1926 /*================================*/
1927 lsn_t lsn, /*!< in: lsn to write */
1928 ulint arch_log_no) /*!< in: latest archived log file number */
1929 {
1930 fil_space_t* space;
1931 fil_node_t* node;
1932 dberr_t err;
1933
1934 mutex_enter(&fil_system->mutex);
1935
1936 for (space = UT_LIST_GET_FIRST(fil_system->space_list);
1937 space != NULL;
1938 space = UT_LIST_GET_NEXT(space_list, space)) {
1939
1940 /* We only write the lsn to all existing data files which have
1941 been open during the lifetime of the mysqld process; they are
1942 represented by the space objects in the tablespace memory
1943 cache. Note that all data files in the system tablespace 0
1944 and the UNDO log tablespaces (if separate) are always open. */
1945
1946 if (space->purpose == FIL_TABLESPACE
1947 && !fil_is_user_tablespace_id(space->id)) {
1948 ulint sum_of_sizes = 0;
1949
1950 for (node = UT_LIST_GET_FIRST(space->chain);
1951 node != NULL;
1952 node = UT_LIST_GET_NEXT(chain, node)) {
1953
1954 mutex_exit(&fil_system->mutex);
1955
1956 err = fil_write_lsn_and_arch_no_to_file(
1957 space->id, sum_of_sizes, lsn,
1958 arch_log_no);
1959
1960 if (err != DB_SUCCESS) {
1961
1962 return(err);
1963 }
1964
1965 mutex_enter(&fil_system->mutex);
1966
1967 sum_of_sizes += node->size;
1968 }
1969 }
1970 }
1971
1972 mutex_exit(&fil_system->mutex);
1973
1974 return(DB_SUCCESS);
1975 }
1976
1977 /*******************************************************************//**
1978 Checks the consistency of the first data page of a tablespace
1979 at database startup.
1980 @retval NULL on success, or if innodb_force_recovery is set
1981 @return pointer to an error message string */
1982 static MY_ATTRIBUTE((warn_unused_result))
1983 const char*
fil_check_first_page(const page_t * page)1984 fil_check_first_page(
1985 /*=================*/
1986 const page_t* page) /*!< in: data page */
1987 {
1988 ulint space_id;
1989 ulint flags;
1990
1991 if (srv_force_recovery >= SRV_FORCE_IGNORE_CORRUPT) {
1992 return(NULL);
1993 }
1994
1995 space_id = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_ID + page);
1996 flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
1997
1998 if (UNIV_PAGE_SIZE != fsp_flags_get_page_size(flags)) {
1999 return("innodb-page-size mismatch");
2000 }
2001
2002 if (!space_id && !flags) {
2003 ulint nonzero_bytes = UNIV_PAGE_SIZE;
2004 const byte* b = page;
2005
2006 while (!*b && --nonzero_bytes) {
2007 b++;
2008 }
2009
2010 if (!nonzero_bytes) {
2011 return("space header page consists of zero bytes");
2012 }
2013 }
2014
2015 if (buf_page_is_corrupted(
2016 false, page, fsp_flags_get_zip_size(flags))) {
2017 return("checksum mismatch");
2018 }
2019
2020 if (page_get_space_id(page) == space_id
2021 && page_get_page_no(page) == 0) {
2022 return(NULL);
2023 }
2024
2025 return("inconsistent data in space header");
2026 }
2027
2028 /*******************************************************************//**
2029 Reads the flushed lsn, arch no, space_id and tablespace flag fields from
2030 the first page of a data file at database startup.
2031 @retval NULL on success, or if innodb_force_recovery is set
2032 @return pointer to an error message string */
2033 UNIV_INTERN
2034 const char*
fil_read_first_page(pfs_os_file_t data_file,ibool one_read_already,ulint * flags,ulint * space_id,ulint * min_arch_log_no,ulint * max_arch_log_no,lsn_t * min_flushed_lsn,lsn_t * max_flushed_lsn)2035 fil_read_first_page(
2036 /*================*/
2037 pfs_os_file_t data_file, /*!< in: open data file */
2038 ibool one_read_already, /*!< in: TRUE if min and max
2039 parameters below already
2040 contain sensible data */
2041 ulint* flags, /*!< out: tablespace flags */
2042 ulint* space_id, /*!< out: tablespace ID */
2043 #ifdef UNIV_LOG_ARCHIVE
2044 ulint* min_arch_log_no, /*!< out: min of archived
2045 log numbers in data files */
2046 ulint* max_arch_log_no, /*!< out: max of archived
2047 log numbers in data files */
2048 #endif /* UNIV_LOG_ARCHIVE */
2049 lsn_t* min_flushed_lsn, /*!< out: min of flushed
2050 lsn values in data files */
2051 lsn_t* max_flushed_lsn) /*!< out: max of flushed
2052 lsn values in data files */
2053 {
2054 byte* buf;
2055 byte* page;
2056 lsn_t flushed_lsn;
2057 const char* check_msg = NULL;
2058
2059 buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
2060
2061 /* Align the memory for a possible read from a raw device */
2062
2063 page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
2064
2065 os_file_read(data_file, page, 0, UNIV_PAGE_SIZE);
2066
2067 /* The FSP_HEADER on page 0 is only valid for the first file
2068 in a tablespace. So if this is not the first datafile, leave
2069 *flags and *space_id as they were read from the first file and
2070 do not validate the first page. */
2071 if (!one_read_already) {
2072 *flags = fsp_header_get_flags(page);
2073 *space_id = fsp_header_get_space_id(page);
2074
2075 check_msg = fil_check_first_page(page);
2076 }
2077
2078 flushed_lsn = mach_read_from_8(page + FIL_PAGE_FILE_FLUSH_LSN);
2079
2080 ut_free(buf);
2081
2082 if (check_msg) {
2083 return(check_msg);
2084 }
2085
2086 if (!one_read_already) {
2087 *min_flushed_lsn = flushed_lsn;
2088 *max_flushed_lsn = flushed_lsn;
2089 #ifdef UNIV_LOG_ARCHIVE
2090 *min_arch_log_no = arch_log_no;
2091 *max_arch_log_no = arch_log_no;
2092 #endif /* UNIV_LOG_ARCHIVE */
2093 return(NULL);
2094 }
2095
2096 if (*min_flushed_lsn > flushed_lsn) {
2097 *min_flushed_lsn = flushed_lsn;
2098 }
2099 if (*max_flushed_lsn < flushed_lsn) {
2100 *max_flushed_lsn = flushed_lsn;
2101 }
2102 #ifdef UNIV_LOG_ARCHIVE
2103 if (*min_arch_log_no > arch_log_no) {
2104 *min_arch_log_no = arch_log_no;
2105 }
2106 if (*max_arch_log_no < arch_log_no) {
2107 *max_arch_log_no = arch_log_no;
2108 }
2109 #endif /* UNIV_LOG_ARCHIVE */
2110
2111 return(NULL);
2112 }
2113
2114 /*================ SINGLE-TABLE TABLESPACES ==========================*/
2115
2116 #ifndef UNIV_HOTBACKUP
2117 /*******************************************************************//**
2118 Increments the count of pending operation, if space is not being deleted.
2119 @return TRUE if being deleted, and operation should be skipped */
2120 UNIV_INTERN
2121 ibool
fil_inc_pending_ops(ulint id,ibool print_err)2122 fil_inc_pending_ops(
2123 /*================*/
2124 ulint id, /*!< in: space id */
2125 ibool print_err) /*!< in: need to print error or not */
2126 {
2127 fil_space_t* space;
2128
2129 mutex_enter(&fil_system->mutex);
2130
2131 space = fil_space_get_by_id(id);
2132
2133 if (space == NULL) {
2134 if (print_err) {
2135 fprintf(stderr,
2136 "InnoDB: Error: trying to do an operation on a"
2137 " dropped tablespace %lu\n",
2138 (ulong) id);
2139 }
2140 }
2141
2142 if (space == NULL || space->stop_new_ops) {
2143 mutex_exit(&fil_system->mutex);
2144
2145 return(TRUE);
2146 }
2147
2148 space->n_pending_ops++;
2149
2150 mutex_exit(&fil_system->mutex);
2151
2152 return(FALSE);
2153 }
2154
2155 /*******************************************************************//**
2156 Decrements the count of pending operations. */
2157 UNIV_INTERN
2158 void
fil_decr_pending_ops(ulint id)2159 fil_decr_pending_ops(
2160 /*=================*/
2161 ulint id) /*!< in: space id */
2162 {
2163 fil_space_t* space;
2164
2165 mutex_enter(&fil_system->mutex);
2166
2167 space = fil_space_get_by_id(id);
2168
2169 if (space == NULL) {
2170 fprintf(stderr,
2171 "InnoDB: Error: decrementing pending operation"
2172 " of a dropped tablespace %lu\n",
2173 (ulong) id);
2174 }
2175
2176 if (space != NULL) {
2177 space->n_pending_ops--;
2178 }
2179
2180 mutex_exit(&fil_system->mutex);
2181 }
2182 #endif /* !UNIV_HOTBACKUP */
2183
2184 /********************************************************//**
2185 Creates the database directory for a table if it does not exist yet. */
2186 static
2187 void
fil_create_directory_for_tablename(const char * name)2188 fil_create_directory_for_tablename(
2189 /*===============================*/
2190 const char* name) /*!< in: name in the standard
2191 'databasename/tablename' format */
2192 {
2193 const char* namend;
2194 char* path;
2195 ulint len;
2196
2197 len = strlen(fil_path_to_mysql_datadir);
2198 namend = strchr(name, '/');
2199 ut_a(namend);
2200 path = static_cast<char*>(mem_alloc(len + (namend - name) + 2));
2201
2202 memcpy(path, fil_path_to_mysql_datadir, len);
2203 path[len] = '/';
2204 memcpy(path + len + 1, name, namend - name);
2205 path[len + (namend - name) + 1] = 0;
2206
2207 srv_normalize_path_for_win(path);
2208
2209 ut_a(os_file_create_directory(path, FALSE));
2210 mem_free(path);
2211 }
2212
2213 #ifndef UNIV_HOTBACKUP
2214 /********************************************************//**
2215 Writes a log record about an .ibd file create/rename/delete. */
2216 static
2217 void
fil_op_write_log(ulint type,ulint space_id,ulint log_flags,ulint flags,const char * name,const char * new_name,mtr_t * mtr)2218 fil_op_write_log(
2219 /*=============*/
2220 ulint type, /*!< in: MLOG_FILE_CREATE,
2221 MLOG_FILE_CREATE2,
2222 MLOG_FILE_DELETE, or
2223 MLOG_FILE_RENAME */
2224 ulint space_id, /*!< in: space id */
2225 ulint log_flags, /*!< in: redo log flags (stored
2226 in the page number field) */
2227 ulint flags, /*!< in: compressed page size
2228 and file format
2229 if type==MLOG_FILE_CREATE2, or 0 */
2230 const char* name, /*!< in: table name in the familiar
2231 'databasename/tablename' format, or
2232 the file path in the case of
2233 MLOG_FILE_DELETE */
2234 const char* new_name, /*!< in: if type is MLOG_FILE_RENAME,
2235 the new table name in the
2236 'databasename/tablename' format */
2237 mtr_t* mtr) /*!< in: mini-transaction handle */
2238 {
2239 byte* log_ptr;
2240 ulint len;
2241
2242 log_ptr = mlog_open(mtr, 11 + 2 + 1);
2243
2244 if (!log_ptr) {
2245 /* Logging in mtr is switched off during crash recovery:
2246 in that case mlog_open returns NULL */
2247 return;
2248 }
2249
2250 log_ptr = mlog_write_initial_log_record_for_file_op(
2251 type, space_id, log_flags, log_ptr, mtr);
2252 if (type == MLOG_FILE_CREATE2) {
2253 mach_write_to_4(log_ptr, flags);
2254 log_ptr += 4;
2255 }
2256 /* Let us store the strings as null-terminated for easier readability
2257 and handling */
2258
2259 len = strlen(name) + 1;
2260
2261 mach_write_to_2(log_ptr, len);
2262 log_ptr += 2;
2263 mlog_close(mtr, log_ptr);
2264
2265 mlog_catenate_string(mtr, (byte*) name, len);
2266
2267 if (type == MLOG_FILE_RENAME) {
2268 len = strlen(new_name) + 1;
2269 log_ptr = mlog_open(mtr, 2 + len);
2270 ut_a(log_ptr);
2271 mach_write_to_2(log_ptr, len);
2272 log_ptr += 2;
2273 mlog_close(mtr, log_ptr);
2274
2275 mlog_catenate_string(mtr, (byte*) new_name, len);
2276 }
2277 }
2278 #endif
2279
2280 /*******************************************************************//**
2281 Parses the body of a log record written about an .ibd file operation. That is,
2282 the log record part after the standard (type, space id, page no) header of the
2283 log record.
2284
2285 If desired, also replays the delete or rename operation if the .ibd file
2286 exists and the space id in it matches. Replays the create operation if a file
2287 at that path does not exist yet. If the database directory for the file to be
2288 created does not exist, then we create the directory, too.
2289
2290 Note that mysqlbackup --apply-log sets fil_path_to_mysql_datadir to point to
2291 the datadir that we should use in replaying the file operations.
2292
2293 InnoDB recovery does not replay these fully since it always sets the space id
2294 to zero. But mysqlbackup does replay them. TODO: If remote tablespaces are
2295 used, mysqlbackup will only create tables in the default directory since
2296 MLOG_FILE_CREATE and MLOG_FILE_CREATE2 only know the tablename, not the path.
2297
2298 @return end of log record, or NULL if the record was not completely
2299 contained between ptr and end_ptr */
2300 UNIV_INTERN
2301 byte*
fil_op_log_parse_or_replay(byte * ptr,byte * end_ptr,ulint type,ulint space_id,ulint log_flags)2302 fil_op_log_parse_or_replay(
2303 /*=======================*/
2304 byte* ptr, /*!< in: buffer containing the log record body,
2305 or an initial segment of it, if the record does
2306 not fir completely between ptr and end_ptr */
2307 byte* end_ptr, /*!< in: buffer end */
2308 ulint type, /*!< in: the type of this log record */
2309 ulint space_id, /*!< in: the space id of the tablespace in
2310 question, or 0 if the log record should
2311 only be parsed but not replayed */
2312 ulint log_flags) /*!< in: redo log flags
2313 (stored in the page number parameter) */
2314 {
2315 ulint name_len;
2316 ulint new_name_len;
2317 const char* name;
2318 const char* new_name = NULL;
2319 ulint flags = 0;
2320
2321 if (type == MLOG_FILE_CREATE2) {
2322 if (end_ptr < ptr + 4) {
2323
2324 return(NULL);
2325 }
2326
2327 flags = mach_read_from_4(ptr);
2328 ptr += 4;
2329 }
2330
2331 if (end_ptr < ptr + 2) {
2332
2333 return(NULL);
2334 }
2335
2336 name_len = mach_read_from_2(ptr);
2337
2338 ptr += 2;
2339
2340 if (end_ptr < ptr + name_len) {
2341
2342 return(NULL);
2343 }
2344
2345 name = (const char*) ptr;
2346
2347 ptr += name_len;
2348
2349 if (type == MLOG_FILE_RENAME) {
2350 if (end_ptr < ptr + 2) {
2351
2352 return(NULL);
2353 }
2354
2355 new_name_len = mach_read_from_2(ptr);
2356
2357 ptr += 2;
2358
2359 if (end_ptr < ptr + new_name_len) {
2360
2361 return(NULL);
2362 }
2363
2364 new_name = (const char*) ptr;
2365
2366 ptr += new_name_len;
2367 }
2368
2369 /* We managed to parse a full log record body */
2370 /*
2371 printf("Parsed log rec of type %lu space %lu\n"
2372 "name %s\n", type, space_id, name);
2373
2374 if (type == MLOG_FILE_RENAME) {
2375 printf("new name %s\n", new_name);
2376 }
2377 */
2378 if (!space_id) {
2379 return(ptr);
2380 }
2381
2382 /* Let us try to perform the file operation, if sensible. Note that
2383 mysqlbackup has at this stage already read in all space id info to the
2384 fil0fil.cc data structures.
2385
2386 NOTE that our algorithm is not guaranteed to work correctly if there
2387 were renames of tables during the backup. See mysqlbackup code for more
2388 on the problem. */
2389
2390 switch (type) {
2391 case MLOG_FILE_DELETE:
2392 if (fil_tablespace_exists_in_mem(space_id)) {
2393 dberr_t err = fil_delete_tablespace(
2394 space_id, BUF_REMOVE_FLUSH_NO_WRITE);
2395 ut_a(err == DB_SUCCESS);
2396 }
2397
2398 break;
2399
2400 case MLOG_FILE_RENAME:
2401 /* In order to replay the rename, the following must hold:
2402 * The new name is not already used.
2403 * A tablespace is open in memory with the old name.
2404 * The space ID for that tablepace matches this log entry.
2405 This will prevent unintended renames during recovery. */
2406
2407 if (fil_get_space_id_for_table(new_name) == ULINT_UNDEFINED
2408 && space_id == fil_get_space_id_for_table(name)) {
2409 /* Create the database directory for the new name, if
2410 it does not exist yet */
2411 fil_create_directory_for_tablename(new_name);
2412
2413 if (!fil_rename_tablespace(name, space_id,
2414 new_name, NULL)) {
2415 ut_error;
2416 }
2417 }
2418
2419 break;
2420
2421 case MLOG_FILE_CREATE:
2422 case MLOG_FILE_CREATE2:
2423 if (fil_tablespace_exists_in_mem(space_id)) {
2424 /* Do nothing */
2425 } else if (fil_get_space_id_for_table(name)
2426 != ULINT_UNDEFINED) {
2427 /* Do nothing */
2428 } else if (log_flags & MLOG_FILE_FLAG_TEMP) {
2429 /* Temporary table, do nothing */
2430 } else {
2431 const char* path = NULL;
2432
2433 /* Create the database directory for name, if it does
2434 not exist yet */
2435 fil_create_directory_for_tablename(name);
2436
2437 if (fil_create_new_single_table_tablespace(
2438 space_id, name, path, flags,
2439 DICT_TF2_USE_TABLESPACE,
2440 FIL_IBD_FILE_INITIAL_SIZE) != DB_SUCCESS) {
2441 ut_error;
2442 }
2443 }
2444
2445 break;
2446
2447 default:
2448 ut_error;
2449 }
2450
2451 return(ptr);
2452 }
2453
2454 /*******************************************************************//**
2455 Allocates a file name for the EXPORT/IMPORT config file name. The
2456 string must be freed by caller with mem_free().
2457 @return own: file name */
2458 static
2459 char*
fil_make_cfg_name(const char * filepath)2460 fil_make_cfg_name(
2461 /*==============*/
2462 const char* filepath) /*!< in: .ibd file name */
2463 {
2464 char* cfg_name;
2465
2466 /* Create a temporary file path by replacing the .ibd suffix
2467 with .cfg. */
2468
2469 ut_ad(strlen(filepath) > 4);
2470
2471 cfg_name = mem_strdup(filepath);
2472 ut_snprintf(cfg_name + strlen(cfg_name) - 3, 4, "cfg");
2473 return(cfg_name);
2474 }
2475
2476 /*******************************************************************//**
2477 Check for change buffer merges.
2478 @return 0 if no merges else count + 1. */
2479 static
2480 ulint
fil_ibuf_check_pending_ops(fil_space_t * space,ulint count)2481 fil_ibuf_check_pending_ops(
2482 /*=======================*/
2483 fil_space_t* space, /*!< in/out: Tablespace to check */
2484 ulint count) /*!< in: number of attempts so far */
2485 {
2486 ut_ad(mutex_own(&fil_system->mutex));
2487
2488 if (space != 0 && space->n_pending_ops != 0) {
2489
2490 if (count > 5000) {
2491 ib_logf(IB_LOG_LEVEL_WARN,
2492 "Trying to close/delete tablespace "
2493 "'%s' but there are %lu pending change "
2494 "buffer merges on it.",
2495 space->name,
2496 (ulong) space->n_pending_ops);
2497 }
2498
2499 return(count + 1);
2500 }
2501
2502 return(0);
2503 }
2504
2505 /*******************************************************************//**
2506 Check for pending IO.
2507 @return 0 if no pending else count + 1. */
2508 static
2509 ulint
fil_check_pending_io(fil_space_t * space,fil_node_t ** node,ulint count)2510 fil_check_pending_io(
2511 /*=================*/
2512 fil_space_t* space, /*!< in/out: Tablespace to check */
2513 fil_node_t** node, /*!< out: Node in space list */
2514 ulint count) /*!< in: number of attempts so far */
2515 {
2516 ut_ad(mutex_own(&fil_system->mutex));
2517 ut_a(space->n_pending_ops == 0);
2518
2519 /* The following code must change when InnoDB supports
2520 multiple datafiles per tablespace. */
2521 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2522
2523 *node = UT_LIST_GET_FIRST(space->chain);
2524
2525 if (space->n_pending_flushes > 0 || (*node)->n_pending > 0) {
2526
2527 ut_a(!(*node)->being_extended);
2528
2529 if (count > 1000) {
2530 ib_logf(IB_LOG_LEVEL_WARN,
2531 "Trying to close/delete tablespace '%s' "
2532 "but there are %lu flushes "
2533 " and %lu pending i/o's on it.",
2534 space->name,
2535 (ulong) space->n_pending_flushes,
2536 (ulong) (*node)->n_pending);
2537 }
2538
2539 return(count + 1);
2540 }
2541
2542 return(0);
2543 }
2544
2545 /*******************************************************************//**
2546 Check pending operations on a tablespace.
2547 @return DB_SUCCESS or error failure. */
2548 static
2549 dberr_t
fil_check_pending_operations(ulint id,fil_space_t ** space,char ** path)2550 fil_check_pending_operations(
2551 /*=========================*/
2552 ulint id, /*!< in: space id */
2553 fil_space_t** space, /*!< out: tablespace instance in memory */
2554 char** path) /*!< out/own: tablespace path */
2555 {
2556 ulint count = 0;
2557
2558 ut_a(id != TRX_SYS_SPACE);
2559 ut_ad(space);
2560
2561 *space = 0;
2562
2563 mutex_enter(&fil_system->mutex);
2564 fil_space_t* sp = fil_space_get_by_id(id);
2565 if (sp) {
2566 sp->stop_new_ops = TRUE;
2567 }
2568 mutex_exit(&fil_system->mutex);
2569
2570 /* Check for pending change buffer merges. */
2571
2572 do {
2573 mutex_enter(&fil_system->mutex);
2574
2575 sp = fil_space_get_by_id(id);
2576
2577 count = fil_ibuf_check_pending_ops(sp, count);
2578
2579 mutex_exit(&fil_system->mutex);
2580
2581 if (count > 0) {
2582 os_thread_sleep(20000);
2583 }
2584
2585 } while (count > 0);
2586
2587 /* Check for pending IO. */
2588
2589 *path = 0;
2590
2591 do {
2592 mutex_enter(&fil_system->mutex);
2593
2594 sp = fil_space_get_by_id(id);
2595
2596 if (sp == NULL) {
2597 mutex_exit(&fil_system->mutex);
2598 return(DB_TABLESPACE_NOT_FOUND);
2599 }
2600
2601 fil_node_t* node;
2602
2603 count = fil_check_pending_io(sp, &node, count);
2604
2605 if (count == 0) {
2606 *path = mem_strdup(node->name);
2607 }
2608
2609 mutex_exit(&fil_system->mutex);
2610
2611 if (count > 0) {
2612 os_thread_sleep(20000);
2613 }
2614
2615 } while (count > 0);
2616
2617 ut_ad(sp);
2618
2619 *space = sp;
2620 return(DB_SUCCESS);
2621 }
2622
2623 /*******************************************************************//**
2624 Closes a single-table tablespace. The tablespace must be cached in the
2625 memory cache. Free all pages used by the tablespace.
2626 @return DB_SUCCESS or error */
2627 UNIV_INTERN
2628 dberr_t
fil_close_tablespace(trx_t * trx,ulint id)2629 fil_close_tablespace(
2630 /*=================*/
2631 trx_t* trx, /*!< in/out: Transaction covering the close */
2632 ulint id) /*!< in: space id */
2633 {
2634 char* path = 0;
2635 fil_space_t* space = 0;
2636
2637 ut_a(id != TRX_SYS_SPACE);
2638
2639 dberr_t err = fil_check_pending_operations(id, &space, &path);
2640
2641 if (err != DB_SUCCESS) {
2642 return(err);
2643 }
2644
2645 ut_a(space);
2646 ut_a(path != 0);
2647
2648 rw_lock_x_lock(&space->latch);
2649
2650 #ifndef UNIV_HOTBACKUP
2651 /* Invalidate in the buffer pool all pages belonging to the
2652 tablespace. Since we have set space->stop_new_ops = TRUE, readahead
2653 or ibuf merge can no longer read more pages of this tablespace to the
2654 buffer pool. Thus we can clean the tablespace out of the buffer pool
2655 completely and permanently. The flag stop_new_ops also prevents
2656 fil_flush() from being applied to this tablespace. */
2657
2658 buf_LRU_flush_or_remove_pages(id, BUF_REMOVE_FLUSH_WRITE, trx);
2659 #endif
2660 mutex_enter(&fil_system->mutex);
2661
2662 /* If the free is successful, the X lock will be released before
2663 the space memory data structure is freed. */
2664
2665 if (!fil_space_free(id, TRUE)) {
2666 rw_lock_x_unlock(&space->latch);
2667 err = DB_TABLESPACE_NOT_FOUND;
2668 } else {
2669 err = DB_SUCCESS;
2670 }
2671
2672 mutex_exit(&fil_system->mutex);
2673
2674 /* If it is a delete then also delete any generated files, otherwise
2675 when we drop the database the remove directory will fail. */
2676
2677 char* cfg_name = fil_make_cfg_name(path);
2678
2679 os_file_delete_if_exists(innodb_file_data_key, cfg_name);
2680
2681 mem_free(path);
2682 mem_free(cfg_name);
2683
2684 return(err);
2685 }
2686
2687 /*******************************************************************//**
2688 Deletes a single-table tablespace. The tablespace must be cached in the
2689 memory cache.
2690 @return DB_SUCCESS or error */
2691 UNIV_INTERN
2692 dberr_t
fil_delete_tablespace(ulint id,buf_remove_t buf_remove)2693 fil_delete_tablespace(
2694 /*==================*/
2695 ulint id, /*!< in: space id */
2696 buf_remove_t buf_remove) /*!< in: specify the action to take
2697 on the tables pages in the buffer
2698 pool */
2699 {
2700 char* path = 0;
2701 fil_space_t* space = 0;
2702
2703 ut_a(id != TRX_SYS_SPACE);
2704
2705 dberr_t err = fil_check_pending_operations(id, &space, &path);
2706
2707 if (err != DB_SUCCESS) {
2708
2709 ib_logf(IB_LOG_LEVEL_ERROR,
2710 "Cannot delete tablespace %lu because it is not "
2711 "found in the tablespace memory cache.",
2712 (ulong) id);
2713
2714 return(err);
2715 }
2716
2717 ut_a(space);
2718 ut_a(path != 0);
2719
2720 /* Important: We rely on the data dictionary mutex to ensure
2721 that a race is not possible here. It should serialize the tablespace
2722 drop/free. We acquire an X latch only to avoid a race condition
2723 when accessing the tablespace instance via:
2724
2725 fsp_get_available_space_in_free_extents().
2726
2727 There our main motivation is to reduce the contention on the
2728 dictionary mutex. */
2729
2730 rw_lock_x_lock(&space->latch);
2731
2732 #ifndef UNIV_HOTBACKUP
2733 /* IMPORTANT: Because we have set space::stop_new_ops there
2734 can't be any new ibuf merges, reads or flushes. We are here
2735 because node::n_pending was zero above. However, it is still
2736 possible to have pending read and write requests:
2737
2738 A read request can happen because the reader thread has
2739 gone through the ::stop_new_ops check in buf_page_init_for_read()
2740 before the flag was set and has not yet incremented ::n_pending
2741 when we checked it above.
2742
2743 A write request can be issued any time because we don't check
2744 the ::stop_new_ops flag when queueing a block for write.
2745
2746 We deal with pending write requests in the following function
2747 where we'd minimally evict all dirty pages belonging to this
2748 space from the flush_list. Not that if a block is IO-fixed
2749 we'll wait for IO to complete.
2750
2751 To deal with potential read requests by checking the
2752 ::stop_new_ops flag in fil_io() */
2753
2754 buf_LRU_flush_or_remove_pages(id, buf_remove, 0);
2755
2756 #endif /* !UNIV_HOTBACKUP */
2757
2758 /* If it is a delete then also delete any generated files, otherwise
2759 when we drop the database the remove directory will fail. */
2760 {
2761 char* cfg_name = fil_make_cfg_name(path);
2762 os_file_delete_if_exists(innodb_file_data_key, cfg_name);
2763 mem_free(cfg_name);
2764 }
2765
2766 /* Delete the link file pointing to the ibd file we are deleting. */
2767 if (FSP_FLAGS_HAS_DATA_DIR(space->flags)) {
2768 fil_delete_link_file(space->name);
2769 }
2770
2771 mutex_enter(&fil_system->mutex);
2772
2773 /* Double check the sanity of pending ops after reacquiring
2774 the fil_system::mutex. */
2775 if (fil_space_get_by_id(id)) {
2776 ut_a(space->n_pending_ops == 0);
2777 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2778 fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
2779 ut_a(node->n_pending == 0);
2780 }
2781
2782 if (!fil_space_free(id, TRUE)) {
2783 err = DB_TABLESPACE_NOT_FOUND;
2784 }
2785
2786 mutex_exit(&fil_system->mutex);
2787
2788 if (err != DB_SUCCESS) {
2789 rw_lock_x_unlock(&space->latch);
2790 } else if (!os_file_delete(innodb_file_data_key, path)
2791 && !os_file_delete_if_exists(innodb_file_data_key, path)) {
2792
2793 /* Note: This is because we have removed the
2794 tablespace instance from the cache. */
2795
2796 err = DB_IO_ERROR;
2797 }
2798
2799 if (err == DB_SUCCESS) {
2800 #ifndef UNIV_HOTBACKUP
2801 /* Write a log record about the deletion of the .ibd
2802 file, so that mysqlbackup can replay it in the
2803 --apply-log phase. We use a dummy mtr and the familiar
2804 log write mechanism. */
2805 mtr_t mtr;
2806
2807 /* When replaying the operation in mysqlbackup, do not try
2808 to write any log record */
2809 mtr_start(&mtr);
2810
2811 fil_op_write_log(MLOG_FILE_DELETE, id, 0, 0, path, NULL, &mtr);
2812 mtr_commit(&mtr);
2813 #endif
2814 err = DB_SUCCESS;
2815 }
2816
2817 mem_free(path);
2818
2819 return(err);
2820 }
2821
2822 /*******************************************************************//**
2823 Returns TRUE if a single-table tablespace is being deleted.
2824 @return TRUE if being deleted */
2825 UNIV_INTERN
2826 ibool
fil_tablespace_is_being_deleted(ulint id)2827 fil_tablespace_is_being_deleted(
2828 /*============================*/
2829 ulint id) /*!< in: space id */
2830 {
2831 fil_space_t* space;
2832 ibool is_being_deleted;
2833
2834 mutex_enter(&fil_system->mutex);
2835
2836 space = fil_space_get_by_id(id);
2837
2838 ut_a(space != NULL);
2839
2840 is_being_deleted = space->stop_new_ops;
2841
2842 mutex_exit(&fil_system->mutex);
2843
2844 return(is_being_deleted);
2845 }
2846
2847 #ifndef UNIV_HOTBACKUP
2848 /*******************************************************************//**
2849 Discards a single-table tablespace. The tablespace must be cached in the
2850 memory cache. Discarding is like deleting a tablespace, but
2851
2852 1. We do not drop the table from the data dictionary;
2853
2854 2. We remove all insert buffer entries for the tablespace immediately;
2855 in DROP TABLE they are only removed gradually in the background;
2856
2857 3. Free all the pages in use by the tablespace.
2858 @return DB_SUCCESS or error */
2859 UNIV_INTERN
2860 dberr_t
fil_discard_tablespace(ulint id)2861 fil_discard_tablespace(
2862 /*===================*/
2863 ulint id) /*!< in: space id */
2864 {
2865 dberr_t err;
2866
2867 switch (err = fil_delete_tablespace(id, BUF_REMOVE_ALL_NO_WRITE)) {
2868 case DB_SUCCESS:
2869 break;
2870
2871 case DB_IO_ERROR:
2872 ib_logf(IB_LOG_LEVEL_WARN,
2873 "While deleting tablespace %lu in DISCARD TABLESPACE."
2874 " File rename/delete failed: %s",
2875 (ulong) id, ut_strerr(err));
2876 break;
2877
2878 case DB_TABLESPACE_NOT_FOUND:
2879 ib_logf(IB_LOG_LEVEL_WARN,
2880 "Cannot delete tablespace %lu in DISCARD "
2881 "TABLESPACE. %s",
2882 (ulong) id, ut_strerr(err));
2883 break;
2884
2885 default:
2886 ut_error;
2887 }
2888
2889 /* Remove all insert buffer entries for the tablespace */
2890
2891 ibuf_delete_for_discarded_space(id);
2892
2893 return(err);
2894 }
2895 #endif /* !UNIV_HOTBACKUP */
2896
2897 /*******************************************************************//**
2898 Renames the memory cache structures of a single-table tablespace.
2899 @return TRUE if success */
2900 static
2901 ibool
fil_rename_tablespace_in_mem(fil_space_t * space,fil_node_t * node,const char * new_name,const char * new_path)2902 fil_rename_tablespace_in_mem(
2903 /*=========================*/
2904 fil_space_t* space, /*!< in: tablespace memory object */
2905 fil_node_t* node, /*!< in: file node of that tablespace */
2906 const char* new_name, /*!< in: new name */
2907 const char* new_path) /*!< in: new file path */
2908 {
2909 fil_space_t* space2;
2910 const char* old_name = space->name;
2911
2912 ut_ad(mutex_own(&fil_system->mutex));
2913
2914 space2 = fil_space_get_by_name(old_name);
2915 if (space != space2) {
2916 fputs("InnoDB: Error: cannot find ", stderr);
2917 ut_print_filename(stderr, old_name);
2918 fputs(" in tablespace memory cache\n", stderr);
2919
2920 return(FALSE);
2921 }
2922
2923 space2 = fil_space_get_by_name(new_name);
2924 if (space2 != NULL) {
2925 fputs("InnoDB: Error: ", stderr);
2926 ut_print_filename(stderr, new_name);
2927 fputs(" is already in tablespace memory cache\n", stderr);
2928
2929 return(FALSE);
2930 }
2931
2932 HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
2933 ut_fold_string(space->name), space);
2934 mem_free(space->name);
2935 mem_free(node->name);
2936
2937 space->name = mem_strdup(new_name);
2938 node->name = mem_strdup(new_path);
2939
2940 HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
2941 ut_fold_string(new_name), space);
2942 return(TRUE);
2943 }
2944
2945 /*******************************************************************//**
2946 Allocates a file name for a single-table tablespace. The string must be freed
2947 by caller with mem_free().
2948 @return own: file name */
2949 UNIV_INTERN
2950 char*
fil_make_ibd_name(const char * name,bool is_full_path)2951 fil_make_ibd_name(
2952 /*==============*/
2953 const char* name, /*!< in: table name or a dir path */
2954 bool is_full_path) /*!< in: TRUE if it is a dir path */
2955 {
2956 char* filename;
2957 ulint namelen = strlen(name);
2958 ulint dirlen = strlen(fil_path_to_mysql_datadir);
2959 ulint pathlen = dirlen + namelen + sizeof "/.ibd";
2960
2961 filename = static_cast<char*>(mem_alloc(pathlen));
2962
2963 if (is_full_path) {
2964 memcpy(filename, name, namelen);
2965 memcpy(filename + namelen, ".ibd", sizeof ".ibd");
2966 } else {
2967 ut_snprintf(filename, pathlen, "%s/%s.ibd",
2968 fil_path_to_mysql_datadir, name);
2969
2970 }
2971
2972 srv_normalize_path_for_win(filename);
2973
2974 return(filename);
2975 }
2976
2977 /*******************************************************************//**
2978 Allocates a file name for a tablespace ISL file (InnoDB Symbolic Link).
2979 The string must be freed by caller with mem_free().
2980 @return own: file name */
2981 UNIV_INTERN
2982 char*
fil_make_isl_name(const char * name)2983 fil_make_isl_name(
2984 /*==============*/
2985 const char* name) /*!< in: table name */
2986 {
2987 char* filename;
2988 ulint namelen = strlen(name);
2989 ulint dirlen = strlen(fil_path_to_mysql_datadir);
2990 ulint pathlen = dirlen + namelen + sizeof "/.isl";
2991
2992 filename = static_cast<char*>(mem_alloc(pathlen));
2993
2994 ut_snprintf(filename, pathlen, "%s/%s.isl",
2995 fil_path_to_mysql_datadir, name);
2996
2997 srv_normalize_path_for_win(filename);
2998
2999 return(filename);
3000 }
3001
3002 /** Test if a tablespace file can be renamed to a new filepath by checking
3003 if that the old filepath exists and the new filepath does not exist.
3004 @param[in] space_id tablespace id
3005 @param[in] old_path old filepath
3006 @param[in] new_path new filepath
3007 @param[in] is_discarded whether the tablespace is discarded
3008 @return innodb error code */
3009 dberr_t
fil_rename_tablespace_check(ulint space_id,const char * old_path,const char * new_path,bool is_discarded)3010 fil_rename_tablespace_check(
3011 ulint space_id,
3012 const char* old_path,
3013 const char* new_path,
3014 bool is_discarded)
3015 {
3016 ulint exists = false;
3017 os_file_type_t ftype;
3018
3019 if (!is_discarded
3020 && os_file_status(old_path, &exists, &ftype)
3021 && !exists) {
3022 ib_logf(IB_LOG_LEVEL_ERROR,
3023 "Cannot rename '%s' to '%s' for space ID %lu"
3024 " because the source file does not exist.",
3025 old_path, new_path, space_id);
3026
3027 return(DB_TABLESPACE_NOT_FOUND);
3028 }
3029
3030 exists = false;
3031 if (!os_file_status(new_path, &exists, &ftype) || exists) {
3032 ib_logf(IB_LOG_LEVEL_ERROR,
3033 "Cannot rename '%s' to '%s' for space ID %lu"
3034 " because the target file exists."
3035 " Remove the target file and try again.",
3036 old_path, new_path, space_id);
3037
3038 return(DB_TABLESPACE_EXISTS);
3039 }
3040
3041 return(DB_SUCCESS);
3042 }
3043
3044 /*******************************************************************//**
3045 Renames a single-table tablespace. The tablespace must be cached in the
3046 tablespace memory cache.
3047 @return TRUE if success */
3048 UNIV_INTERN
3049 ibool
fil_rename_tablespace(const char * old_name_in,ulint id,const char * new_name,const char * new_path_in)3050 fil_rename_tablespace(
3051 /*==================*/
3052 const char* old_name_in, /*!< in: old table name in the
3053 standard databasename/tablename
3054 format of InnoDB, or NULL if we
3055 do the rename based on the space
3056 id only */
3057 ulint id, /*!< in: space id */
3058 const char* new_name, /*!< in: new table name in the
3059 standard databasename/tablename
3060 format of InnoDB */
3061 const char* new_path_in) /*!< in: new full datafile path
3062 if the tablespace is remotely
3063 located, or NULL if it is located
3064 in the normal data directory. */
3065 {
3066 ibool success;
3067 fil_space_t* space;
3068 fil_node_t* node;
3069 ulint count = 0;
3070 char* new_path;
3071 char* old_name;
3072 char* old_path;
3073 const char* not_given = "(name not specified)";
3074
3075 ut_a(id != 0);
3076
3077 retry:
3078 count++;
3079
3080 if (!(count % 1000)) {
3081 ut_print_timestamp(stderr);
3082 fputs(" InnoDB: Warning: problems renaming ", stderr);
3083 ut_print_filename(stderr,
3084 old_name_in ? old_name_in : not_given);
3085 fputs(" to ", stderr);
3086 ut_print_filename(stderr, new_name);
3087 fprintf(stderr, ", %lu iterations\n", (ulong) count);
3088 }
3089
3090 mutex_enter(&fil_system->mutex);
3091
3092 space = fil_space_get_by_id(id);
3093
3094 DBUG_EXECUTE_IF("fil_rename_tablespace_failure_1", space = NULL; );
3095
3096 if (space == NULL) {
3097 ib_logf(IB_LOG_LEVEL_ERROR,
3098 "Cannot find space id %lu in the tablespace "
3099 "memory cache, though the table '%s' in a "
3100 "rename operation should have that id.",
3101 (ulong) id, old_name_in ? old_name_in : not_given);
3102 mutex_exit(&fil_system->mutex);
3103
3104 return(FALSE);
3105 }
3106
3107 if (count > 25000) {
3108 space->stop_ios = FALSE;
3109 mutex_exit(&fil_system->mutex);
3110
3111 return(FALSE);
3112 }
3113
3114 /* We temporarily close the .ibd file because we do not trust that
3115 operating systems can rename an open file. For the closing we have to
3116 wait until there are no pending i/o's or flushes on the file. */
3117
3118 space->stop_ios = TRUE;
3119
3120 /* The following code must change when InnoDB supports
3121 multiple datafiles per tablespace. */
3122 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
3123 node = UT_LIST_GET_FIRST(space->chain);
3124
3125 if (node->n_pending > 0
3126 || node->n_pending_flushes > 0
3127 || node->being_extended) {
3128 /* There are pending i/o's or flushes or the file is
3129 currently being extended, sleep for a while and
3130 retry */
3131
3132 mutex_exit(&fil_system->mutex);
3133
3134 os_thread_sleep(20000);
3135
3136 goto retry;
3137
3138 } else if (node->modification_counter > node->flush_counter) {
3139 /* Flush the space */
3140
3141 mutex_exit(&fil_system->mutex);
3142
3143 os_thread_sleep(20000);
3144
3145 fil_flush(id);
3146
3147 goto retry;
3148
3149 } else if (node->open) {
3150 /* Close the file */
3151
3152 fil_node_close_file(node, fil_system);
3153 }
3154
3155 /* Check that the old name in the space is right */
3156
3157 if (old_name_in) {
3158 old_name = mem_strdup(old_name_in);
3159 ut_a(strcmp(space->name, old_name) == 0);
3160 } else {
3161 old_name = mem_strdup(space->name);
3162 }
3163 old_path = mem_strdup(node->name);
3164
3165 /* Rename the tablespace and the node in the memory cache */
3166 new_path = new_path_in ? mem_strdup(new_path_in)
3167 : fil_make_ibd_name(new_name, false);
3168
3169 success = fil_rename_tablespace_in_mem(
3170 space, node, new_name, new_path);
3171
3172 if (success) {
3173
3174 DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
3175 goto skip_second_rename; );
3176
3177 success = os_file_rename(
3178 innodb_file_data_key, old_path, new_path);
3179
3180 DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
3181 skip_second_rename:
3182 success = FALSE; );
3183
3184 if (!success) {
3185 /* We have to revert the changes we made
3186 to the tablespace memory cache */
3187
3188 ut_a(fil_rename_tablespace_in_mem(
3189 space, node, old_name, old_path));
3190 }
3191 }
3192
3193 space->stop_ios = FALSE;
3194
3195 mutex_exit(&fil_system->mutex);
3196
3197 #ifndef UNIV_HOTBACKUP
3198 if (success && !recv_recovery_on) {
3199 mtr_t mtr;
3200
3201 mtr_start(&mtr);
3202
3203 fil_op_write_log(MLOG_FILE_RENAME, id, 0, 0, old_name, new_name,
3204 &mtr);
3205 mtr_commit(&mtr);
3206 }
3207 #endif /* !UNIV_HOTBACKUP */
3208
3209 mem_free(new_path);
3210 mem_free(old_path);
3211 mem_free(old_name);
3212
3213 return(success);
3214 }
3215
3216 /*******************************************************************//**
3217 Creates a new InnoDB Symbolic Link (ISL) file. It is always created
3218 under the 'datadir' of MySQL. The datadir is the directory of a
3219 running mysqld program. We can refer to it by simply using the path '.'.
3220 @return DB_SUCCESS or error code */
3221 UNIV_INTERN
3222 dberr_t
fil_create_link_file(const char * tablename,const char * filepath)3223 fil_create_link_file(
3224 /*=================*/
3225 const char* tablename, /*!< in: tablename */
3226 const char* filepath) /*!< in: pathname of tablespace */
3227 {
3228 dberr_t err = DB_SUCCESS;
3229 char* link_filepath;
3230 char* prev_filepath = fil_read_link_file(tablename);
3231
3232 ut_ad(!srv_read_only_mode);
3233
3234 if (prev_filepath) {
3235 /* Truncate will call this with an existing
3236 link file which contains the same filepath. */
3237 if (0 == strcmp(prev_filepath, filepath)) {
3238 mem_free(prev_filepath);
3239 return(DB_SUCCESS);
3240 }
3241 mem_free(prev_filepath);
3242 }
3243
3244 link_filepath = fil_make_isl_name(tablename);
3245
3246 /** Check if the file already exists. */
3247 FILE* file = NULL;
3248 ibool exists;
3249 os_file_type_t ftype;
3250
3251 bool success = os_file_status(link_filepath, &exists, &ftype);
3252
3253 ulint error = 0;
3254 if (success && !exists) {
3255 file = fopen(link_filepath, "w");
3256 if (file == NULL) {
3257 /* This call will print its own error message */
3258 error = os_file_get_last_error(true);
3259 }
3260 } else {
3261 error = OS_FILE_ALREADY_EXISTS;
3262 }
3263 if (error != 0) {
3264
3265 ut_print_timestamp(stderr);
3266 fputs(" InnoDB: Cannot create file ", stderr);
3267 ut_print_filename(stderr, link_filepath);
3268 fputs(".\n", stderr);
3269
3270 if (error == OS_FILE_ALREADY_EXISTS) {
3271 fputs("InnoDB: The link file: ", stderr);
3272 ut_print_filename(stderr, filepath);
3273 fputs(" already exists.\n", stderr);
3274 err = DB_TABLESPACE_EXISTS;
3275
3276 } else if (error == OS_FILE_DISK_FULL) {
3277 err = DB_OUT_OF_FILE_SPACE;
3278
3279 } else {
3280 err = DB_ERROR;
3281 }
3282
3283 /* file is not open, no need to close it. */
3284 mem_free(link_filepath);
3285 return(err);
3286 }
3287
3288 ulint rbytes = fwrite(filepath, 1, strlen(filepath), file);
3289 if (rbytes != strlen(filepath)) {
3290 os_file_get_last_error(true);
3291 ib_logf(IB_LOG_LEVEL_ERROR,
3292 "cannot write link file "
3293 "%s",filepath);
3294 err = DB_ERROR;
3295 }
3296
3297 /* Close the file, we only need it at startup */
3298 fclose(file);
3299
3300 mem_free(link_filepath);
3301
3302 return(err);
3303 }
3304
3305 /*******************************************************************//**
3306 Deletes an InnoDB Symbolic Link (ISL) file. */
3307 UNIV_INTERN
3308 void
fil_delete_link_file(const char * tablename)3309 fil_delete_link_file(
3310 /*=================*/
3311 const char* tablename) /*!< in: name of table */
3312 {
3313 char* link_filepath = fil_make_isl_name(tablename);
3314
3315 os_file_delete_if_exists(innodb_file_data_key, link_filepath);
3316
3317 mem_free(link_filepath);
3318 }
3319
3320 /*******************************************************************//**
3321 Reads an InnoDB Symbolic Link (ISL) file.
3322 It is always created under the 'datadir' of MySQL. The name is of the
3323 form {databasename}/{tablename}. and the isl file is expected to be in a
3324 '{databasename}' directory called '{tablename}.isl'. The caller must free
3325 the memory of the null-terminated path returned if it is not null.
3326 @return own: filepath found in link file, NULL if not found. */
3327 UNIV_INTERN
3328 char*
fil_read_link_file(const char * name)3329 fil_read_link_file(
3330 /*===============*/
3331 const char* name) /*!< in: tablespace name */
3332 {
3333 char* filepath = NULL;
3334 char* link_filepath;
3335 FILE* file = NULL;
3336
3337 /* The .isl file is in the 'normal' tablespace location. */
3338 link_filepath = fil_make_isl_name(name);
3339
3340 file = fopen(link_filepath, "r+b");
3341
3342 mem_free(link_filepath);
3343
3344 if (file) {
3345 filepath = static_cast<char*>(mem_alloc(OS_FILE_MAX_PATH));
3346
3347 os_file_read_string(file, filepath, OS_FILE_MAX_PATH);
3348 fclose(file);
3349
3350 if (strlen(filepath)) {
3351 /* Trim whitespace from end of filepath */
3352 ulint lastch = strlen(filepath) - 1;
3353 while (lastch > 4 && filepath[lastch] <= 0x20) {
3354 filepath[lastch--] = 0x00;
3355 }
3356 srv_normalize_path_for_win(filepath);
3357 }
3358 }
3359
3360 return(filepath);
3361 }
3362
3363 /*******************************************************************//**
3364 Opens a handle to the file linked to in an InnoDB Symbolic Link file.
3365 @return TRUE if remote linked tablespace file is found and opened. */
3366 UNIV_INTERN
3367 ibool
fil_open_linked_file(const char * tablename,char ** remote_filepath,pfs_os_file_t * remote_file)3368 fil_open_linked_file(
3369 /*===============*/
3370 const char* tablename, /*!< in: database/tablename */
3371 char** remote_filepath,/*!< out: remote filepath */
3372 pfs_os_file_t* remote_file) /*!< out: remote file handle */
3373
3374 {
3375 ibool success;
3376
3377 *remote_filepath = fil_read_link_file(tablename);
3378 if (*remote_filepath == NULL) {
3379 return(FALSE);
3380 }
3381
3382 /* The filepath provided is different from what was
3383 found in the link file. */
3384 *remote_file = os_file_create_simple_no_error_handling(
3385 innodb_file_data_key, *remote_filepath,
3386 OS_FILE_OPEN, OS_FILE_READ_ONLY,
3387 &success);
3388
3389 if (!success) {
3390 char* link_filepath = fil_make_isl_name(tablename);
3391
3392 /* The following call prints an error message */
3393 os_file_get_last_error(true);
3394
3395 ib_logf(IB_LOG_LEVEL_ERROR,
3396 "A link file was found named '%s' "
3397 "but the linked tablespace '%s' "
3398 "could not be opened.",
3399 link_filepath, *remote_filepath);
3400
3401 mem_free(link_filepath);
3402 mem_free(*remote_filepath);
3403 *remote_filepath = NULL;
3404 }
3405
3406 return(success);
3407 }
3408
3409 /*******************************************************************//**
3410 Creates a new single-table tablespace to a database directory of MySQL.
3411 Database directories are under the 'datadir' of MySQL. The datadir is the
3412 directory of a running mysqld program. We can refer to it by simply the
3413 path '.'. Tables created with CREATE TEMPORARY TABLE we place in the temp
3414 dir of the mysqld server.
3415
3416 @return DB_SUCCESS or error code */
3417 UNIV_INTERN
3418 dberr_t
fil_create_new_single_table_tablespace(ulint space_id,const char * tablename,const char * dir_path,ulint flags,ulint flags2,ulint size)3419 fil_create_new_single_table_tablespace(
3420 /*===================================*/
3421 ulint space_id, /*!< in: space id */
3422 const char* tablename, /*!< in: the table name in the usual
3423 databasename/tablename format
3424 of InnoDB */
3425 const char* dir_path, /*!< in: NULL or a dir path */
3426 ulint flags, /*!< in: tablespace flags */
3427 ulint flags2, /*!< in: table flags2 */
3428 ulint size) /*!< in: the initial size of the
3429 tablespace file in pages,
3430 must be >= FIL_IBD_FILE_INITIAL_SIZE */
3431 {
3432 pfs_os_file_t file;
3433
3434 ibool ret;
3435 dberr_t err;
3436 byte* buf2;
3437 byte* page;
3438 char* path;
3439 ibool success;
3440 /* TRUE if a table is created with CREATE TEMPORARY TABLE */
3441 bool is_temp = !!(flags2 & DICT_TF2_TEMPORARY);
3442 bool has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
3443
3444 ut_a(space_id > 0);
3445 ut_ad(!srv_read_only_mode);
3446 ut_a(space_id < SRV_LOG_SPACE_FIRST_ID);
3447 ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
3448 ut_a(fsp_flags_is_valid(flags));
3449
3450 if (is_temp) {
3451 /* Temporary table filepath */
3452 ut_ad(dir_path);
3453 path = fil_make_ibd_name(dir_path, true);
3454 } else if (has_data_dir) {
3455 ut_ad(dir_path);
3456 path = os_file_make_remote_pathname(dir_path, tablename, "ibd");
3457
3458 /* Since this tablespace file will be created in a
3459 remote directory, let's create the subdirectories
3460 in the path, if they are not there already. */
3461 success = os_file_create_subdirs_if_needed(path);
3462 if (!success) {
3463 err = DB_ERROR;
3464 goto error_exit_3;
3465 }
3466 } else {
3467 path = fil_make_ibd_name(tablename, false);
3468 }
3469
3470 file = os_file_create(
3471 innodb_file_data_key, path,
3472 OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
3473 OS_FILE_NORMAL,
3474 OS_DATA_FILE,
3475 &ret);
3476
3477 if (ret == FALSE) {
3478 /* The following call will print an error message */
3479 ulint error = os_file_get_last_error(true);
3480
3481 ib_logf(IB_LOG_LEVEL_ERROR,
3482 "Cannot create file '%s'\n", path);
3483
3484 if (error == OS_FILE_ALREADY_EXISTS) {
3485 ib_logf(IB_LOG_LEVEL_ERROR,
3486 "The file '%s' already exists though the "
3487 "corresponding table did not exist "
3488 "in the InnoDB data dictionary. "
3489 "Have you moved InnoDB .ibd files "
3490 "around without using the SQL commands "
3491 "DISCARD TABLESPACE and IMPORT TABLESPACE, "
3492 "or did mysqld crash in the middle of "
3493 "CREATE TABLE? "
3494 "You can resolve the problem by removing "
3495 "the file '%s' under the 'datadir' of MySQL.",
3496 path, path);
3497
3498 err = DB_TABLESPACE_EXISTS;
3499 goto error_exit_3;
3500 }
3501
3502 if (error == OS_FILE_DISK_FULL) {
3503 err = DB_OUT_OF_FILE_SPACE;
3504 goto error_exit_3;
3505 }
3506
3507 err = DB_ERROR;
3508 goto error_exit_3;
3509 }
3510
3511 ret = os_file_set_size(path, file, size * UNIV_PAGE_SIZE);
3512
3513 if (!ret) {
3514 err = DB_OUT_OF_FILE_SPACE;
3515 goto error_exit_2;
3516 }
3517
3518 /* printf("Creating tablespace %s id %lu\n", path, space_id); */
3519
3520 /* We have to write the space id to the file immediately and flush the
3521 file to disk. This is because in crash recovery we must be aware what
3522 tablespaces exist and what are their space id's, so that we can apply
3523 the log records to the right file. It may take quite a while until
3524 buffer pool flush algorithms write anything to the file and flush it to
3525 disk. If we would not write here anything, the file would be filled
3526 with zeros from the call of os_file_set_size(), until a buffer pool
3527 flush would write to it. */
3528
3529 buf2 = static_cast<byte*>(ut_malloc(3 * UNIV_PAGE_SIZE));
3530 /* Align the memory for file i/o if we might have O_DIRECT set */
3531 page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
3532
3533 memset(page, '\0', UNIV_PAGE_SIZE);
3534
3535 /* Add the UNIV_PAGE_SIZE to the table flags and write them to the
3536 tablespace header. */
3537 flags = fsp_flags_set_page_size(flags, UNIV_PAGE_SIZE);
3538 fsp_header_init_fields(page, space_id, flags);
3539 mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
3540
3541 if (!(fsp_flags_is_compressed(flags))) {
3542 buf_flush_init_for_writing(page, NULL, 0);
3543 ret = os_file_write(path, file, page, 0, UNIV_PAGE_SIZE);
3544 } else {
3545 page_zip_des_t page_zip;
3546 ulint zip_size;
3547
3548 zip_size = fsp_flags_get_zip_size(flags);
3549
3550 page_zip_set_size(&page_zip, zip_size);
3551 page_zip.data = page + UNIV_PAGE_SIZE;
3552 #ifdef UNIV_DEBUG
3553 page_zip.m_start =
3554 #endif /* UNIV_DEBUG */
3555 page_zip.m_end = page_zip.m_nonempty =
3556 page_zip.n_blobs = 0;
3557 buf_flush_init_for_writing(page, &page_zip, 0);
3558 ret = os_file_write(path, file, page_zip.data, 0, zip_size);
3559 }
3560
3561 ut_free(buf2);
3562
3563 if (!ret) {
3564 ib_logf(IB_LOG_LEVEL_ERROR,
3565 "Could not write the first page to tablespace "
3566 "'%s'", path);
3567
3568 err = DB_ERROR;
3569 goto error_exit_2;
3570 }
3571
3572 ret = os_file_flush(file);
3573
3574 if (!ret) {
3575 ib_logf(IB_LOG_LEVEL_ERROR,
3576 "File flush of tablespace '%s' failed", path);
3577 err = DB_ERROR;
3578 goto error_exit_2;
3579 }
3580
3581 if (has_data_dir) {
3582 /* Now that the IBD file is created, make the ISL file. */
3583 err = fil_create_link_file(tablename, path);
3584 if (err != DB_SUCCESS) {
3585 goto error_exit_2;
3586 }
3587 }
3588
3589 success = fil_space_create(tablename, space_id, flags, FIL_TABLESPACE);
3590 if (!success || !fil_node_create(path, size, space_id, FALSE)) {
3591 err = DB_ERROR;
3592 goto error_exit_1;
3593 }
3594
3595 #ifndef UNIV_HOTBACKUP
3596 {
3597 mtr_t mtr;
3598 ulint mlog_file_flag = 0;
3599
3600 if (is_temp) {
3601 mlog_file_flag |= MLOG_FILE_FLAG_TEMP;
3602 }
3603
3604 mtr_start(&mtr);
3605
3606 fil_op_write_log(flags
3607 ? MLOG_FILE_CREATE2
3608 : MLOG_FILE_CREATE,
3609 space_id, mlog_file_flag, flags,
3610 tablename, NULL, &mtr);
3611
3612 mtr_commit(&mtr);
3613 }
3614 #endif
3615 err = DB_SUCCESS;
3616
3617 /* Error code is set. Cleanup the various variables used.
3618 These labels reflect the order in which variables are assigned or
3619 actions are done. */
3620 error_exit_1:
3621 if (has_data_dir && err != DB_SUCCESS) {
3622 fil_delete_link_file(tablename);
3623 }
3624 error_exit_2:
3625 os_file_close(file);
3626 if (err != DB_SUCCESS) {
3627 os_file_delete(innodb_file_data_key, path);
3628 }
3629 error_exit_3:
3630 mem_free(path);
3631
3632 return(err);
3633 }
3634
3635 #ifndef UNIV_HOTBACKUP
3636 /********************************************************************//**
3637 Report information about a bad tablespace. */
3638 static
3639 void
fil_report_bad_tablespace(const char * filepath,const char * check_msg,ulint found_id,ulint found_flags,ulint expected_id,ulint expected_flags)3640 fil_report_bad_tablespace(
3641 /*======================*/
3642 const char* filepath, /*!< in: filepath */
3643 const char* check_msg, /*!< in: fil_check_first_page() */
3644 ulint found_id, /*!< in: found space ID */
3645 ulint found_flags, /*!< in: found flags */
3646 ulint expected_id, /*!< in: expected space id */
3647 ulint expected_flags) /*!< in: expected flags */
3648 {
3649 if (check_msg) {
3650 ib_logf(IB_LOG_LEVEL_ERROR,
3651 "Error %s in file '%s',"
3652 "tablespace id=%lu, flags=%lu. "
3653 "Please refer to "
3654 REFMAN "innodb-troubleshooting-datadict.html "
3655 "for how to resolve the issue.",
3656 check_msg, filepath,
3657 (ulong) expected_id, (ulong) expected_flags);
3658 return;
3659 }
3660
3661 ib_logf(IB_LOG_LEVEL_ERROR,
3662 "In file '%s', tablespace id and flags are %lu and %lu, "
3663 "but in the InnoDB data dictionary they are %lu and %lu. "
3664 "Have you moved InnoDB .ibd files around without using the "
3665 "commands DISCARD TABLESPACE and IMPORT TABLESPACE? "
3666 "Please refer to "
3667 REFMAN "innodb-troubleshooting-datadict.html "
3668 "for how to resolve the issue.",
3669 filepath, (ulong) found_id, (ulong) found_flags,
3670 (ulong) expected_id, (ulong) expected_flags);
3671 }
3672
3673 /********************************************************************//**
3674 Tries to open a single-table tablespace and optionally checks that the
3675 space id in it is correct. If this does not succeed, print an error message
3676 to the .err log. This function is used to open a tablespace when we start
3677 mysqld after the dictionary has been booted, and also in IMPORT TABLESPACE.
3678
3679 NOTE that we assume this operation is used either at the database startup
3680 or under the protection of the dictionary mutex, so that two users cannot
3681 race here. This operation does not leave the file associated with the
3682 tablespace open, but closes it after we have looked at the space id in it.
3683
3684 If the validate boolean is set, we read the first page of the file and
3685 check that the space id in the file is what we expect. We assume that
3686 this function runs much faster if no check is made, since accessing the
3687 file inode probably is much faster (the OS caches them) than accessing
3688 the first page of the file. This boolean may be initially FALSE, but if
3689 a remote tablespace is found it will be changed to true.
3690
3691 If the fix_dict boolean is set, then it is safe to use an internal SQL
3692 statement to update the dictionary tables if they are incorrect.
3693
3694 @return DB_SUCCESS or error code */
3695 UNIV_INTERN
3696 dberr_t
fil_open_single_table_tablespace(bool validate,bool fix_dict,ulint id,ulint flags,const char * tablename,const char * path_in)3697 fil_open_single_table_tablespace(
3698 /*=============================*/
3699 bool validate, /*!< in: Do we validate tablespace? */
3700 bool fix_dict, /*!< in: Can we fix the dictionary? */
3701 ulint id, /*!< in: space id */
3702 ulint flags, /*!< in: tablespace flags */
3703 const char* tablename, /*!< in: table name in the
3704 databasename/tablename format */
3705 const char* path_in) /*!< in: tablespace filepath */
3706 {
3707 dberr_t err = DB_SUCCESS;
3708 bool dict_filepath_same_as_default = false;
3709 bool link_file_found = false;
3710 bool link_file_is_bad = false;
3711 fsp_open_info def;
3712 fsp_open_info dict;
3713 fsp_open_info remote;
3714 ulint tablespaces_found = 0;
3715 ulint valid_tablespaces_found = 0;
3716
3717 #ifdef UNIV_SYNC_DEBUG
3718 ut_ad(!fix_dict || rw_lock_own(&dict_operation_lock, RW_LOCK_EX));
3719 #endif /* UNIV_SYNC_DEBUG */
3720 ut_ad(!fix_dict || mutex_own(&(dict_sys->mutex)));
3721
3722 if (!fsp_flags_is_valid(flags)) {
3723 return(DB_CORRUPTION);
3724 }
3725
3726 /* If the tablespace was relocated, we do not
3727 compare the DATA_DIR flag */
3728 ulint mod_flags = flags & ~FSP_FLAGS_MASK_DATA_DIR;
3729
3730 memset(&def, 0, sizeof(def));
3731 memset(&dict, 0, sizeof(dict));
3732 memset(&remote, 0, sizeof(remote));
3733
3734 /* Discover the correct filepath. We will always look for an ibd
3735 in the default location. If it is remote, it should not be here. */
3736 def.filepath = fil_make_ibd_name(tablename, false);
3737
3738 /* The path_in was read from SYS_DATAFILES. */
3739 if (path_in) {
3740 if (strcmp(def.filepath, path_in)) {
3741 dict.filepath = mem_strdup(path_in);
3742 /* possibility of multiple files. */
3743 validate = true;
3744 } else {
3745 dict_filepath_same_as_default = true;
3746 }
3747 }
3748
3749 link_file_found = fil_open_linked_file(
3750 tablename, &remote.filepath, &remote.file);
3751 remote.success = link_file_found;
3752 if (remote.success) {
3753 /* possibility of multiple files. */
3754 validate = true;
3755 tablespaces_found++;
3756
3757 /* A link file was found. MySQL does not allow a DATA
3758 DIRECTORY to be be the same as the default filepath. */
3759 ut_a(strcmp(def.filepath, remote.filepath));
3760
3761 /* If there was a filepath found in SYS_DATAFILES,
3762 we hope it was the same as this remote.filepath found
3763 in the ISL file. */
3764 if (dict.filepath
3765 && (0 == strcmp(dict.filepath, remote.filepath))) {
3766 remote.success = FALSE;
3767 os_file_close(remote.file);
3768 mem_free(remote.filepath);
3769 remote.filepath = NULL;
3770 tablespaces_found--;
3771 }
3772 }
3773
3774 /* Attempt to open the tablespace at other possible filepaths. */
3775 if (dict.filepath) {
3776 dict.file = os_file_create_simple_no_error_handling(
3777 innodb_file_data_key, dict.filepath, OS_FILE_OPEN,
3778 OS_FILE_READ_ONLY, &dict.success);
3779 if (dict.success) {
3780 /* possibility of multiple files. */
3781 validate = true;
3782 tablespaces_found++;
3783 }
3784 }
3785
3786 /* Always look for a file at the default location. */
3787 ut_a(def.filepath);
3788 def.file = os_file_create_simple_no_error_handling(
3789 innodb_file_data_key, def.filepath, OS_FILE_OPEN,
3790 OS_FILE_READ_ONLY, &def.success);
3791 if (def.success) {
3792 tablespaces_found++;
3793 }
3794
3795 /* We have now checked all possible tablespace locations and
3796 have a count of how many we found. If things are normal, we
3797 only found 1. */
3798 if (!validate && tablespaces_found == 1) {
3799 goto skip_validate;
3800 }
3801
3802 /* Read the first page of the datadir tablespace, if found. */
3803 if (def.success) {
3804 def.check_msg = fil_read_first_page(
3805 def.file, FALSE, &def.flags, &def.id,
3806 #ifdef UNIV_LOG_ARCHIVE
3807 &space_arch_log_no, &space_arch_log_no,
3808 #endif /* UNIV_LOG_ARCHIVE */
3809 &def.lsn, &def.lsn);
3810 def.valid = !def.check_msg;
3811
3812 /* Validate this single-table-tablespace with SYS_TABLES,
3813 but do not compare the DATA_DIR flag, in case the
3814 tablespace was relocated. */
3815 if (def.valid && def.id == id
3816 && (def.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) {
3817 valid_tablespaces_found++;
3818 } else {
3819 def.valid = false;
3820 /* Do not use this tablespace. */
3821 fil_report_bad_tablespace(
3822 def.filepath, def.check_msg, def.id,
3823 def.flags, id, flags);
3824 }
3825 }
3826
3827 /* Read the first page of the remote tablespace */
3828 if (remote.success) {
3829 remote.check_msg = fil_read_first_page(
3830 remote.file, FALSE, &remote.flags, &remote.id,
3831 #ifdef UNIV_LOG_ARCHIVE
3832 &remote.arch_log_no, &remote.arch_log_no,
3833 #endif /* UNIV_LOG_ARCHIVE */
3834 &remote.lsn, &remote.lsn);
3835 remote.valid = !remote.check_msg;
3836
3837 /* Validate this single-table-tablespace with SYS_TABLES,
3838 but do not compare the DATA_DIR flag, in case the
3839 tablespace was relocated. */
3840 if (remote.valid && remote.id == id
3841 && (remote.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) {
3842 valid_tablespaces_found++;
3843 } else {
3844 remote.valid = false;
3845 /* Do not use this linked tablespace. */
3846 fil_report_bad_tablespace(
3847 remote.filepath, remote.check_msg, remote.id,
3848 remote.flags, id, flags);
3849 link_file_is_bad = true;
3850 }
3851 }
3852
3853 /* Read the first page of the datadir tablespace, if found. */
3854 if (dict.success) {
3855 dict.check_msg = fil_read_first_page(
3856 dict.file, FALSE, &dict.flags, &dict.id,
3857 #ifdef UNIV_LOG_ARCHIVE
3858 &dict.arch_log_no, &dict.arch_log_no,
3859 #endif /* UNIV_LOG_ARCHIVE */
3860 &dict.lsn, &dict.lsn);
3861 dict.valid = !dict.check_msg;
3862
3863 /* Validate this single-table-tablespace with SYS_TABLES,
3864 but do not compare the DATA_DIR flag, in case the
3865 tablespace was relocated. */
3866 if (dict.valid && dict.id == id
3867 && (dict.flags & ~FSP_FLAGS_MASK_DATA_DIR) == mod_flags) {
3868 valid_tablespaces_found++;
3869 } else {
3870 dict.valid = false;
3871 /* Do not use this tablespace. */
3872 fil_report_bad_tablespace(
3873 dict.filepath, dict.check_msg, dict.id,
3874 dict.flags, id, flags);
3875 }
3876 }
3877
3878 /* Make sense of these three possible locations.
3879 First, bail out if no tablespace files were found. */
3880 if (valid_tablespaces_found == 0) {
3881 /* The following call prints an error message */
3882 os_file_get_last_error(true);
3883
3884 ib_logf(IB_LOG_LEVEL_ERROR,
3885 "Could not find a valid tablespace file for '%s'. "
3886 "See " REFMAN "innodb-troubleshooting-datadict.html "
3887 "for how to resolve the issue.",
3888 tablename);
3889
3890 err = DB_CORRUPTION;
3891
3892 goto cleanup_and_exit;
3893 }
3894
3895 /* Do not open any tablespaces if more than one tablespace with
3896 the correct space ID and flags were found. */
3897 if (tablespaces_found > 1) {
3898 ib_logf(IB_LOG_LEVEL_ERROR,
3899 "A tablespace for %s has been found in "
3900 "multiple places;", tablename);
3901 if (def.success) {
3902 ib_logf(IB_LOG_LEVEL_ERROR,
3903 "Default location; %s, LSN=" LSN_PF
3904 ", Space ID=%lu, Flags=%lu",
3905 def.filepath, def.lsn,
3906 (ulong) def.id, (ulong) def.flags);
3907 }
3908 if (remote.success) {
3909 ib_logf(IB_LOG_LEVEL_ERROR,
3910 "Remote location; %s, LSN=" LSN_PF
3911 ", Space ID=%lu, Flags=%lu",
3912 remote.filepath, remote.lsn,
3913 (ulong) remote.id, (ulong) remote.flags);
3914 }
3915 if (dict.success) {
3916 ib_logf(IB_LOG_LEVEL_ERROR,
3917 "Dictionary location; %s, LSN=" LSN_PF
3918 ", Space ID=%lu, Flags=%lu",
3919 dict.filepath, dict.lsn,
3920 (ulong) dict.id, (ulong) dict.flags);
3921 }
3922
3923 /* Force-recovery will allow some tablespaces to be
3924 skipped by REDO if there was more than one file found.
3925 Unlike during the REDO phase of recovery, we now know
3926 if the tablespace is valid according to the dictionary,
3927 which was not available then. So if we did not force
3928 recovery and there is only one good tablespace, ignore
3929 any bad tablespaces. */
3930 if (valid_tablespaces_found > 1 || srv_force_recovery > 0) {
3931 ib_logf(IB_LOG_LEVEL_ERROR,
3932 "Will not open the tablespace for '%s'",
3933 tablename);
3934
3935 if (def.success != def.valid
3936 || dict.success != dict.valid
3937 || remote.success != remote.valid) {
3938 err = DB_CORRUPTION;
3939 } else {
3940 err = DB_ERROR;
3941 }
3942 goto cleanup_and_exit;
3943 }
3944
3945 /* There is only one valid tablespace found and we did
3946 not use srv_force_recovery during REDO. Use this one
3947 tablespace and clean up invalid tablespace pointers */
3948 if (def.success && !def.valid) {
3949 def.success = false;
3950 os_file_close(def.file);
3951 tablespaces_found--;
3952 }
3953 if (dict.success && !dict.valid) {
3954 dict.success = false;
3955 os_file_close(dict.file);
3956 /* Leave dict.filepath so that SYS_DATAFILES
3957 can be corrected below. */
3958 tablespaces_found--;
3959 }
3960 if (remote.success && !remote.valid) {
3961 remote.success = false;
3962 os_file_close(remote.file);
3963 mem_free(remote.filepath);
3964 remote.filepath = NULL;
3965 tablespaces_found--;
3966 }
3967 }
3968
3969 /* At this point, there should be only one filepath. */
3970 ut_a(tablespaces_found == 1);
3971 ut_a(valid_tablespaces_found == 1);
3972
3973 /* Only fix the dictionary at startup when there is only one thread.
3974 Calls to dict_load_table() can be done while holding other latches. */
3975 if (!fix_dict) {
3976 goto skip_validate;
3977 }
3978
3979 /* We may need to change what is stored in SYS_DATAFILES or
3980 SYS_TABLESPACES or adjust the link file.
3981 Since a failure to update SYS_TABLESPACES or SYS_DATAFILES does
3982 not prevent opening and using the single_table_tablespace either
3983 this time or the next, we do not check the return code or fail
3984 to open the tablespace. But dict_update_filepath() will issue a
3985 warning to the log. */
3986 if (dict.filepath) {
3987 if (remote.success) {
3988 dict_update_filepath(id, remote.filepath);
3989 } else if (def.success) {
3990 dict_update_filepath(id, def.filepath);
3991 if (link_file_is_bad) {
3992 fil_delete_link_file(tablename);
3993 }
3994 } else if (!link_file_found || link_file_is_bad) {
3995 ut_ad(dict.success);
3996 /* Fix the link file if we got our filepath
3997 from the dictionary but a link file did not
3998 exist or it did not point to a valid file. */
3999 fil_delete_link_file(tablename);
4000 fil_create_link_file(tablename, dict.filepath);
4001 }
4002
4003 } else if (remote.success && dict_filepath_same_as_default) {
4004 dict_update_filepath(id, remote.filepath);
4005
4006 } else if (remote.success && path_in == NULL) {
4007 /* SYS_DATAFILES record for this space ID was not found. */
4008 dict_insert_tablespace_and_filepath(
4009 id, tablename, remote.filepath, flags);
4010 }
4011
4012 skip_validate:
4013 if (err != DB_SUCCESS) {
4014 ; // Don't load the tablespace into the cache
4015 } else if (!fil_space_create(tablename, id, flags, FIL_TABLESPACE)) {
4016 err = DB_ERROR;
4017 } else {
4018 /* We do not measure the size of the file, that is why
4019 we pass the 0 below */
4020
4021 if (!fil_node_create(remote.success ? remote.filepath :
4022 dict.success ? dict.filepath :
4023 def.filepath, 0, id, FALSE)) {
4024 err = DB_ERROR;
4025 }
4026 }
4027
4028 cleanup_and_exit:
4029 if (remote.success) {
4030 os_file_close(remote.file);
4031 }
4032 if (remote.filepath) {
4033 mem_free(remote.filepath);
4034 }
4035 if (dict.success) {
4036 os_file_close(dict.file);
4037 }
4038 if (dict.filepath) {
4039 mem_free(dict.filepath);
4040 }
4041 if (def.success) {
4042 os_file_close(def.file);
4043 }
4044 mem_free(def.filepath);
4045
4046 return(err);
4047 }
4048 #endif /* !UNIV_HOTBACKUP */
4049
4050 #ifdef UNIV_HOTBACKUP
4051 /*******************************************************************//**
4052 Allocates a file name for an old version of a single-table tablespace.
4053 The string must be freed by caller with mem_free()!
4054 @return own: file name */
4055 static
4056 char*
fil_make_ibbackup_old_name(const char * name)4057 fil_make_ibbackup_old_name(
4058 /*=======================*/
4059 const char* name) /*!< in: original file name */
4060 {
4061 static const char suffix[] = "_ibbackup_old_vers_";
4062 char* path;
4063 ulint len = strlen(name);
4064
4065 path = static_cast<char*>(mem_alloc(len + (15 + sizeof suffix)));
4066
4067 memcpy(path, name, len);
4068 memcpy(path + len, suffix, (sizeof suffix) - 1);
4069 ut_sprintf_timestamp_without_extra_chars(
4070 path + len + ((sizeof suffix) - 1));
4071 return(path);
4072 }
4073 #endif /* UNIV_HOTBACKUP */
4074
4075
4076 /*******************************************************************//**
4077 Determine the space id of the given file descriptor by reading a few
4078 pages from the beginning of the .ibd file.
4079 @return true if space id was successfully identified, or false. */
4080 static
4081 bool
fil_user_tablespace_find_space_id(fsp_open_info * fsp)4082 fil_user_tablespace_find_space_id(
4083 /*==============================*/
4084 fsp_open_info* fsp) /* in/out: contains file descriptor, which is
4085 used as input. contains space_id, which is
4086 the output */
4087 {
4088 bool st;
4089 os_offset_t file_size;
4090
4091 file_size = os_file_get_size(fsp->file);
4092
4093 if (file_size == (os_offset_t) -1) {
4094 ib_logf(IB_LOG_LEVEL_ERROR, "Could not get file size: %s",
4095 fsp->filepath);
4096 return(false);
4097 }
4098
4099 /* Assuming a page size, read the space_id from each page and store it
4100 in a map. Find out which space_id is agreed on by majority of the
4101 pages. Choose that space_id. */
4102 for (ulint page_size = UNIV_ZIP_SIZE_MIN;
4103 page_size <= UNIV_PAGE_SIZE_MAX; page_size <<= 1) {
4104
4105 /* map[space_id] = count of pages */
4106 std::map<ulint, ulint> verify;
4107
4108 ulint page_count = 64;
4109 ulint valid_pages = 0;
4110
4111 /* Adjust the number of pages to analyze based on file size */
4112 while ((page_count * page_size) > file_size) {
4113 --page_count;
4114 }
4115
4116 ib_logf(IB_LOG_LEVEL_INFO, "Page size:%lu Pages to analyze:"
4117 "%lu", page_size, page_count);
4118
4119 byte* buf = static_cast<byte*>(ut_malloc(2*page_size));
4120 byte* page = static_cast<byte*>(ut_align(buf, page_size));
4121
4122 for (ulint j = 0; j < page_count; ++j) {
4123
4124 st = os_file_read(fsp->file, page, (j* page_size), page_size);
4125
4126 if (!st) {
4127 ib_logf(IB_LOG_LEVEL_INFO,
4128 "READ FAIL: page_no:%lu", j);
4129 continue;
4130 }
4131
4132 bool uncompressed_ok = false;
4133
4134 /* For uncompressed pages, the page size must be equal
4135 to UNIV_PAGE_SIZE. */
4136 if (page_size == UNIV_PAGE_SIZE) {
4137 uncompressed_ok = !buf_page_is_corrupted(
4138 false, page, 0);
4139 }
4140
4141 bool compressed_ok = !buf_page_is_corrupted(
4142 false, page, page_size);
4143
4144 if (uncompressed_ok || compressed_ok) {
4145
4146 ulint space_id = mach_read_from_4(page
4147 + FIL_PAGE_SPACE_ID);
4148
4149 if (space_id > 0) {
4150 ib_logf(IB_LOG_LEVEL_INFO,
4151 "VALID: space:%lu "
4152 "page_no:%lu page_size:%lu",
4153 space_id, j, page_size);
4154 verify[space_id]++;
4155 ++valid_pages;
4156 }
4157 }
4158 }
4159
4160 ut_free(buf);
4161
4162 ib_logf(IB_LOG_LEVEL_INFO, "Page size: %lu, Possible space_id "
4163 "count:%lu", page_size, (ulint) verify.size());
4164
4165 const ulint pages_corrupted = 3;
4166 for (ulint missed = 0; missed <= pages_corrupted; ++missed) {
4167
4168 for (std::map<ulint, ulint>::iterator
4169 m = verify.begin(); m != verify.end(); ++m ) {
4170
4171 ib_logf(IB_LOG_LEVEL_INFO, "space_id:%lu, "
4172 "Number of pages matched: %lu/%lu "
4173 "(%lu)", m->first, m->second,
4174 valid_pages, page_size);
4175
4176 if (m->second == (valid_pages - missed)) {
4177
4178 ib_logf(IB_LOG_LEVEL_INFO,
4179 "Chosen space:%lu\n", m->first);
4180
4181 fsp->id = m->first;
4182 return(true);
4183 }
4184 }
4185
4186 }
4187 }
4188
4189 return(false);
4190 }
4191
4192 /*******************************************************************//**
4193 Finds the given page_no of the given space id from the double write buffer,
4194 and copies it to the corresponding .ibd file.
4195 @return true if copy was successful, or false. */
4196 bool
fil_user_tablespace_restore_page(fsp_open_info * fsp,ulint page_no)4197 fil_user_tablespace_restore_page(
4198 /*==============================*/
4199 fsp_open_info* fsp, /* in: contains space id and .ibd
4200 file information */
4201 ulint page_no) /* in: page_no to obtain from double
4202 write buffer */
4203 {
4204 bool err;
4205 ulint flags;
4206 ulint zip_size;
4207 ulint page_size;
4208 ulint buflen;
4209 byte* page;
4210
4211 ib_logf(IB_LOG_LEVEL_INFO, "Restoring page %lu of tablespace %lu",
4212 page_no, fsp->id);
4213
4214 // find if double write buffer has page_no of given space id
4215 page = recv_sys->dblwr.find_page(fsp->id, page_no);
4216
4217 if (!page) {
4218 ib_logf(IB_LOG_LEVEL_WARN, "Doublewrite does not have "
4219 "page_no=%lu of space: %lu", page_no, fsp->id);
4220 err = false;
4221 goto out;
4222 }
4223
4224 flags = mach_read_from_4(FSP_HEADER_OFFSET + FSP_SPACE_FLAGS + page);
4225 zip_size = fsp_flags_get_zip_size(flags);
4226 page_size = fsp_flags_get_page_size(flags);
4227
4228 ut_ad(page_no == page_get_page_no(page));
4229
4230 buflen = zip_size ? zip_size: page_size;
4231
4232 ib_logf(IB_LOG_LEVEL_INFO, "Writing %lu bytes into file: %s",
4233 buflen, fsp->filepath);
4234
4235 err = os_file_write(fsp->filepath, fsp->file, page,
4236 (zip_size ? zip_size : page_size) * page_no,
4237 buflen);
4238
4239 os_file_flush(fsp->file);
4240 out:
4241 return(err);
4242 }
4243
4244 /********************************************************************//**
4245 Opens an .ibd file and adds the associated single-table tablespace to the
4246 InnoDB fil0fil.cc data structures.
4247 Set fsp->success to TRUE if tablespace is valid, FALSE if not. */
4248 static
4249 void
fil_validate_single_table_tablespace(const char * tablename,fsp_open_info * fsp)4250 fil_validate_single_table_tablespace(
4251 /*=================================*/
4252 const char* tablename, /*!< in: database/tablename */
4253 fsp_open_info* fsp) /*!< in/out: tablespace info */
4254 {
4255 bool restore_attempted = false;
4256
4257 check_first_page:
4258 fsp->success = TRUE;
4259 if (const char* check_msg = fil_read_first_page(
4260 fsp->file, FALSE, &fsp->flags, &fsp->id,
4261 #ifdef UNIV_LOG_ARCHIVE
4262 &fsp->arch_log_no, &fsp->arch_log_no,
4263 #endif /* UNIV_LOG_ARCHIVE */
4264 &fsp->lsn, &fsp->lsn)) {
4265 ib_logf(IB_LOG_LEVEL_ERROR,
4266 "%s in tablespace %s (table %s)",
4267 check_msg, fsp->filepath, tablename);
4268 fsp->success = FALSE;
4269 }
4270
4271 if (!fsp->success) {
4272 if (!restore_attempted) {
4273 if (!fil_user_tablespace_find_space_id(fsp)) {
4274 return;
4275 }
4276 restore_attempted = true;
4277
4278 if (fsp->id > 0
4279 && !fil_user_tablespace_restore_page(fsp, 0)) {
4280 return;
4281 }
4282 goto check_first_page;
4283 }
4284 return;
4285 }
4286
4287 if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
4288 ib_logf(IB_LOG_LEVEL_ERROR,
4289 "Tablespace is not sensible;"
4290 " Table: %s Space ID: %lu Filepath: %s\n",
4291 tablename, (ulong) fsp->id, fsp->filepath);
4292 fsp->success = FALSE;
4293 return;
4294 }
4295
4296 mutex_enter(&fil_system->mutex);
4297 fil_space_t* space = fil_space_get_by_id(fsp->id);
4298 mutex_exit(&fil_system->mutex);
4299 if (space != NULL) {
4300 char* prev_filepath = fil_space_get_first_path(fsp->id);
4301
4302 ib_logf(IB_LOG_LEVEL_ERROR,
4303 "Attempted to open a previously opened tablespace. "
4304 "Previous tablespace %s uses space ID: %lu at "
4305 "filepath: %s. Cannot open tablespace %s which uses "
4306 "space ID: %lu at filepath: %s",
4307 space->name, (ulong) space->id, prev_filepath,
4308 tablename, (ulong) fsp->id, fsp->filepath);
4309
4310 mem_free(prev_filepath);
4311 fsp->success = FALSE;
4312 return;
4313 }
4314
4315 fsp->success = TRUE;
4316 }
4317
4318
4319 /********************************************************************//**
4320 Opens an .ibd file and adds the associated single-table tablespace to the
4321 InnoDB fil0fil.cc data structures. */
4322 static
4323 void
fil_load_single_table_tablespace(const char * dbname,const char * filename)4324 fil_load_single_table_tablespace(
4325 /*=============================*/
4326 const char* dbname, /*!< in: database name */
4327 const char* filename) /*!< in: file name (not a path),
4328 including the .ibd or .isl extension */
4329 {
4330 char* tablename;
4331 ulint tablename_len;
4332 ulint dbname_len = strlen(dbname);
4333 ulint filename_len = strlen(filename);
4334 fsp_open_info def;
4335 fsp_open_info remote;
4336 os_offset_t size;
4337 #ifdef UNIV_HOTBACKUP
4338 fil_space_t* space;
4339 #endif
4340
4341 memset(&def, 0, sizeof(def));
4342 memset(&remote, 0, sizeof(remote));
4343
4344 /* The caller assured that the extension is ".ibd" or ".isl". */
4345 ut_ad(0 == memcmp(filename + filename_len - 4, ".ibd", 4)
4346 || 0 == memcmp(filename + filename_len - 4, ".isl", 4));
4347
4348 /* Build up the tablename in the standard form database/table. */
4349 tablename = static_cast<char*>(
4350 mem_alloc(dbname_len + filename_len + 2));
4351
4352 /* When lower_case_table_names = 2 it is possible that the
4353 dbname is in upper case ,but while storing it in fil_space_t
4354 we must convert it into lower case */
4355 sprintf(tablename, "%s" , dbname);
4356 tablename[dbname_len] = '\0';
4357
4358 if (lower_case_file_system) {
4359 dict_casedn_str(tablename);
4360 }
4361
4362 sprintf(tablename+dbname_len,"/%s",filename);
4363 tablename_len = strlen(tablename) - strlen(".ibd");
4364 tablename[tablename_len] = '\0';
4365
4366 /* There may be both .ibd and .isl file in the directory.
4367 And it is possible that the .isl file refers to a different
4368 .ibd file. If so, we open and compare them the first time
4369 one of them is sent to this function. So if this table has
4370 already been loaded, there is nothing to do.*/
4371 mutex_enter(&fil_system->mutex);
4372 if (fil_space_get_by_name(tablename)) {
4373 mem_free(tablename);
4374 mutex_exit(&fil_system->mutex);
4375 return;
4376 }
4377 mutex_exit(&fil_system->mutex);
4378
4379 /* Build up the filepath of the .ibd tablespace in the datadir.
4380 This must be freed independent of def.success. */
4381 def.filepath = fil_make_ibd_name(tablename, false);
4382
4383 #ifdef __WIN__
4384 # ifndef UNIV_HOTBACKUP
4385 /* If lower_case_table_names is 0 or 2, then MySQL allows database
4386 directory names with upper case letters. On Windows, all table and
4387 database names in InnoDB are internally always in lower case. Put the
4388 file path to lower case, so that we are consistent with InnoDB's
4389 internal data dictionary. */
4390
4391 dict_casedn_str(def.filepath);
4392 # endif /* !UNIV_HOTBACKUP */
4393 #endif
4394
4395 /* Check for a link file which locates a remote tablespace. */
4396 remote.success = fil_open_linked_file(
4397 tablename, &remote.filepath, &remote.file);
4398
4399 /* Read the first page of the remote tablespace */
4400 if (remote.success) {
4401 fil_validate_single_table_tablespace(tablename, &remote);
4402 if (!remote.success) {
4403 os_file_close(remote.file);
4404 mem_free(remote.filepath);
4405 }
4406 }
4407
4408
4409 /* Try to open the tablespace in the datadir. */
4410 def.file = os_file_create_simple_no_error_handling(
4411 innodb_file_data_key, def.filepath, OS_FILE_OPEN,
4412 OS_FILE_READ_WRITE, &def.success);
4413
4414 /* Read the first page of the remote tablespace */
4415 if (def.success) {
4416 fil_validate_single_table_tablespace(tablename, &def);
4417 if (!def.success) {
4418 os_file_close(def.file);
4419 }
4420 }
4421
4422 if (!def.success && !remote.success) {
4423 /* The following call prints an error message */
4424 os_file_get_last_error(true);
4425 fprintf(stderr,
4426 "InnoDB: Error: could not open single-table"
4427 " tablespace file %s\n", def.filepath);
4428
4429 if (!strncmp(filename,
4430 tmp_file_prefix, tmp_file_prefix_length)) {
4431 /* Ignore errors for #sql tablespaces. */
4432 mem_free(tablename);
4433 if (remote.filepath) {
4434 mem_free(remote.filepath);
4435 }
4436 if (def.filepath) {
4437 mem_free(def.filepath);
4438 }
4439 return;
4440 }
4441 no_good_file:
4442 fprintf(stderr,
4443 "InnoDB: We do not continue the crash recovery,"
4444 " because the table may become\n"
4445 "InnoDB: corrupt if we cannot apply the log"
4446 " records in the InnoDB log to it.\n"
4447 "InnoDB: To fix the problem and start mysqld:\n"
4448 "InnoDB: 1) If there is a permission problem"
4449 " in the file and mysqld cannot\n"
4450 "InnoDB: open the file, you should"
4451 " modify the permissions.\n"
4452 "InnoDB: 2) If the table is not needed, or you"
4453 " can restore it from a backup,\n"
4454 "InnoDB: then you can remove the .ibd file,"
4455 " and InnoDB will do a normal\n"
4456 "InnoDB: crash recovery and ignore that table.\n"
4457 "InnoDB: 3) If the file system or the"
4458 " disk is broken, and you cannot remove\n"
4459 "InnoDB: the .ibd file, you can set"
4460 " innodb_force_recovery > 0 in my.cnf\n"
4461 "InnoDB: and force InnoDB to continue crash"
4462 " recovery here.\n");
4463 will_not_choose:
4464 mem_free(tablename);
4465 if (remote.filepath) {
4466 mem_free(remote.filepath);
4467 }
4468 if (def.filepath) {
4469 mem_free(def.filepath);
4470 }
4471
4472 if (srv_force_recovery > 0) {
4473 ib_logf(IB_LOG_LEVEL_INFO,
4474 "innodb_force_recovery was set to %lu. "
4475 "Continuing crash recovery even though we "
4476 "cannot access the .ibd file of this table.",
4477 srv_force_recovery);
4478 return;
4479 }
4480
4481 exit(1);
4482 }
4483
4484 if (def.success && remote.success) {
4485 ib_logf(IB_LOG_LEVEL_ERROR,
4486 "Tablespaces for %s have been found in two places;\n"
4487 "Location 1: SpaceID: %lu LSN: %lu File: %s\n"
4488 "Location 2: SpaceID: %lu LSN: %lu File: %s\n"
4489 "You must delete one of them.",
4490 tablename, (ulong) def.id, (ulong) def.lsn,
4491 def.filepath, (ulong) remote.id, (ulong) remote.lsn,
4492 remote.filepath);
4493
4494 def.success = FALSE;
4495 os_file_close(def.file);
4496 os_file_close(remote.file);
4497 goto will_not_choose;
4498 }
4499
4500 /* At this point, only one tablespace is open */
4501 ut_a(def.success == !remote.success);
4502
4503 fsp_open_info* fsp = def.success ? &def : &remote;
4504
4505 /* Get and test the file size. */
4506 size = os_file_get_size(fsp->file);
4507
4508 if (size == (os_offset_t) -1) {
4509 /* The following call prints an error message */
4510 os_file_get_last_error(true);
4511
4512 ib_logf(IB_LOG_LEVEL_ERROR,
4513 "could not measure the size of single-table "
4514 "tablespace file %s", fsp->filepath);
4515
4516 os_file_close(fsp->file);
4517 goto no_good_file;
4518 }
4519
4520 /* Every .ibd file is created >= 4 pages in size. Smaller files
4521 cannot be ok. */
4522 ulong minimum_size = FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE;
4523 if (size < minimum_size) {
4524 #ifndef UNIV_HOTBACKUP
4525 ib_logf(IB_LOG_LEVEL_ERROR,
4526 "The size of single-table tablespace file %s "
4527 "is only " UINT64PF ", should be at least %lu!",
4528 fsp->filepath, size, minimum_size);
4529 os_file_close(fsp->file);
4530 goto no_good_file;
4531 #else
4532 fsp->id = ULINT_UNDEFINED;
4533 fsp->flags = 0;
4534 #endif /* !UNIV_HOTBACKUP */
4535 }
4536
4537 #ifdef UNIV_HOTBACKUP
4538 if (fsp->id == ULINT_UNDEFINED || fsp->id == 0) {
4539 char* new_path;
4540
4541 fprintf(stderr,
4542 "InnoDB: Renaming tablespace %s of id %lu,\n"
4543 "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
4544 "InnoDB: because its size %" PRId64 " is too small"
4545 " (< 4 pages 16 kB each),\n"
4546 "InnoDB: or the space id in the file header"
4547 " is not sensible.\n"
4548 "InnoDB: This can happen in an mysqlbackup run,"
4549 " and is not dangerous.\n",
4550 fsp->filepath, fsp->id, fsp->filepath, size);
4551 os_file_close(fsp->file);
4552
4553 new_path = fil_make_ibbackup_old_name(fsp->filepath);
4554
4555 bool success = os_file_rename(
4556 innodb_file_data_key, fsp->filepath, new_path);
4557
4558 ut_a(success);
4559
4560 mem_free(new_path);
4561
4562 goto func_exit_after_close;
4563 }
4564
4565 /* A backup may contain the same space several times, if the space got
4566 renamed at a sensitive time. Since it is enough to have one version of
4567 the space, we rename the file if a space with the same space id
4568 already exists in the tablespace memory cache. We rather rename the
4569 file than delete it, because if there is a bug, we do not want to
4570 destroy valuable data. */
4571
4572 mutex_enter(&fil_system->mutex);
4573
4574 space = fil_space_get_by_id(fsp->id);
4575
4576 if (space) {
4577 char* new_path;
4578
4579 fprintf(stderr,
4580 "InnoDB: Renaming tablespace %s of id %lu,\n"
4581 "InnoDB: to %s_ibbackup_old_vers_<timestamp>\n"
4582 "InnoDB: because space %s with the same id\n"
4583 "InnoDB: was scanned earlier. This can happen"
4584 " if you have renamed tables\n"
4585 "InnoDB: during an mysqlbackup run.\n",
4586 fsp->filepath, fsp->id, fsp->filepath,
4587 space->name);
4588 os_file_close(fsp->file);
4589
4590 new_path = fil_make_ibbackup_old_name(fsp->filepath);
4591
4592 mutex_exit(&fil_system->mutex);
4593
4594 bool success = os_file_rename(
4595 innodb_file_data_key, fsp->filepath, new_path);
4596
4597 ut_a(success);
4598
4599 mem_free(new_path);
4600
4601 goto func_exit_after_close;
4602 }
4603 mutex_exit(&fil_system->mutex);
4604 #endif /* UNIV_HOTBACKUP */
4605 ibool file_space_create_success = fil_space_create(
4606 tablename, fsp->id, fsp->flags, FIL_TABLESPACE);
4607
4608 if (!file_space_create_success) {
4609 if (srv_force_recovery > 0) {
4610 fprintf(stderr,
4611 "InnoDB: innodb_force_recovery was set"
4612 " to %lu. Continuing crash recovery\n"
4613 "InnoDB: even though the tablespace"
4614 " creation of this table failed.\n",
4615 srv_force_recovery);
4616 goto func_exit;
4617 }
4618
4619 /* Exit here with a core dump, stack, etc. */
4620 ut_a(file_space_create_success);
4621 }
4622
4623 /* We do not use the size information we have about the file, because
4624 the rounding formula for extents and pages is somewhat complex; we
4625 let fil_node_open() do that task. */
4626
4627 if (!fil_node_create(fsp->filepath, 0, fsp->id, FALSE)) {
4628 ut_error;
4629 }
4630
4631 func_exit:
4632 os_file_close(fsp->file);
4633
4634 #ifdef UNIV_HOTBACKUP
4635 func_exit_after_close:
4636 #else
4637 ut_ad(!mutex_own(&fil_system->mutex));
4638 #endif
4639 mem_free(tablename);
4640 if (remote.success) {
4641 mem_free(remote.filepath);
4642 }
4643 mem_free(def.filepath);
4644 }
4645
4646 /***********************************************************************//**
4647 A fault-tolerant function that tries to read the next file name in the
4648 directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
4649 idea is to read as much good data as we can and jump over bad data.
4650 @return 0 if ok, -1 if error even after the retries, 1 if at the end
4651 of the directory */
4652 static
4653 int
fil_file_readdir_next_file(dberr_t * err,const char * dirname,os_file_dir_t dir,os_file_stat_t * info)4654 fil_file_readdir_next_file(
4655 /*=======================*/
4656 dberr_t* err, /*!< out: this is set to DB_ERROR if an error
4657 was encountered, otherwise not changed */
4658 const char* dirname,/*!< in: directory name or path */
4659 os_file_dir_t dir, /*!< in: directory stream */
4660 os_file_stat_t* info) /*!< in/out: buffer where the
4661 info is returned */
4662 {
4663 for (ulint i = 0; i < 100; i++) {
4664 int ret = os_file_readdir_next_file(dirname, dir, info);
4665
4666 if (ret != -1) {
4667
4668 return(ret);
4669 }
4670
4671 ib_logf(IB_LOG_LEVEL_ERROR,
4672 "os_file_readdir_next_file() returned -1 in "
4673 "directory %s, crash recovery may have failed "
4674 "for some .ibd files!", dirname);
4675
4676 *err = DB_ERROR;
4677 }
4678
4679 return(-1);
4680 }
4681
4682 /********************************************************************//**
4683 At the server startup, if we need crash recovery, scans the database
4684 directories under the MySQL datadir, looking for .ibd files. Those files are
4685 single-table tablespaces. We need to know the space id in each of them so that
4686 we know into which file we should look to check the contents of a page stored
4687 in the doublewrite buffer, also to know where to apply log records where the
4688 space id is != 0.
4689 @return DB_SUCCESS or error number */
4690 UNIV_INTERN
4691 dberr_t
fil_load_single_table_tablespaces(void)4692 fil_load_single_table_tablespaces(void)
4693 /*===================================*/
4694 {
4695 int ret;
4696 char* dbpath = NULL;
4697 ulint dbpath_len = 100;
4698 os_file_dir_t dir;
4699 os_file_dir_t dbdir;
4700 os_file_stat_t dbinfo;
4701 os_file_stat_t fileinfo;
4702 dberr_t err = DB_SUCCESS;
4703
4704 /* The datadir of MySQL is always the default directory of mysqld */
4705
4706 dir = os_file_opendir(fil_path_to_mysql_datadir, TRUE);
4707
4708 if (dir == NULL) {
4709
4710 return(DB_ERROR);
4711 }
4712
4713 dbpath = static_cast<char*>(mem_alloc(dbpath_len));
4714
4715 /* Scan all directories under the datadir. They are the database
4716 directories of MySQL. */
4717
4718 ret = fil_file_readdir_next_file(&err, fil_path_to_mysql_datadir, dir,
4719 &dbinfo);
4720 while (ret == 0) {
4721 ulint len;
4722 /* printf("Looking at %s in datadir\n", dbinfo.name); */
4723
4724 if (dbinfo.type == OS_FILE_TYPE_FILE
4725 || dbinfo.type == OS_FILE_TYPE_UNKNOWN) {
4726
4727 goto next_datadir_item;
4728 }
4729
4730 /* We found a symlink or a directory; try opening it to see
4731 if a symlink is a directory */
4732
4733 len = strlen(fil_path_to_mysql_datadir)
4734 + strlen (dbinfo.name) + 2;
4735 if (len > dbpath_len) {
4736 dbpath_len = len;
4737
4738 if (dbpath) {
4739 mem_free(dbpath);
4740 }
4741
4742 dbpath = static_cast<char*>(mem_alloc(dbpath_len));
4743 }
4744 ut_snprintf(dbpath, dbpath_len,
4745 "%s/%s", fil_path_to_mysql_datadir, dbinfo.name);
4746 srv_normalize_path_for_win(dbpath);
4747
4748 dbdir = os_file_opendir(dbpath, FALSE);
4749
4750 if (dbdir != NULL) {
4751
4752 /* We found a database directory; loop through it,
4753 looking for possible .ibd files in it */
4754
4755 ret = fil_file_readdir_next_file(&err, dbpath, dbdir,
4756 &fileinfo);
4757 while (ret == 0) {
4758
4759 if (fileinfo.type == OS_FILE_TYPE_DIR) {
4760
4761 goto next_file_item;
4762 }
4763
4764 /* We found a symlink or a file */
4765 if (strlen(fileinfo.name) > 4
4766 && (0 == strcmp(fileinfo.name
4767 + strlen(fileinfo.name) - 4,
4768 ".ibd")
4769 || 0 == strcmp(fileinfo.name
4770 + strlen(fileinfo.name) - 4,
4771 ".isl"))) {
4772 /* The name ends in .ibd or .isl;
4773 try opening the file */
4774 fil_load_single_table_tablespace(
4775 dbinfo.name, fileinfo.name);
4776 }
4777 next_file_item:
4778 ret = fil_file_readdir_next_file(&err,
4779 dbpath, dbdir,
4780 &fileinfo);
4781 }
4782
4783 if (0 != os_file_closedir(dbdir)) {
4784 fputs("InnoDB: Warning: could not"
4785 " close database directory ", stderr);
4786 ut_print_filename(stderr, dbpath);
4787 putc('\n', stderr);
4788
4789 err = DB_ERROR;
4790 }
4791 }
4792
4793 next_datadir_item:
4794 ret = fil_file_readdir_next_file(&err,
4795 fil_path_to_mysql_datadir,
4796 dir, &dbinfo);
4797 }
4798
4799 mem_free(dbpath);
4800
4801 if (0 != os_file_closedir(dir)) {
4802 fprintf(stderr,
4803 "InnoDB: Error: could not close MySQL datadir\n");
4804
4805 return(DB_ERROR);
4806 }
4807
4808 return(err);
4809 }
4810
4811 /*******************************************************************//**
4812 Returns TRUE if a single-table tablespace does not exist in the memory cache,
4813 or is being deleted there.
4814 @return TRUE if does not exist or is being deleted */
4815 UNIV_INTERN
4816 ibool
fil_tablespace_deleted_or_being_deleted_in_mem(ulint id,ib_int64_t version)4817 fil_tablespace_deleted_or_being_deleted_in_mem(
4818 /*===========================================*/
4819 ulint id, /*!< in: space id */
4820 ib_int64_t version)/*!< in: tablespace_version should be this; if
4821 you pass -1 as the value of this, then this
4822 parameter is ignored */
4823 {
4824 fil_space_t* space;
4825
4826 ut_ad(fil_system);
4827
4828 mutex_enter(&fil_system->mutex);
4829
4830 space = fil_space_get_by_id(id);
4831
4832 if (space == NULL || space->stop_new_ops) {
4833 mutex_exit(&fil_system->mutex);
4834
4835 return(TRUE);
4836 }
4837
4838 if (version != ((ib_int64_t)-1)
4839 && space->tablespace_version != version) {
4840 mutex_exit(&fil_system->mutex);
4841
4842 return(TRUE);
4843 }
4844
4845 mutex_exit(&fil_system->mutex);
4846
4847 return(FALSE);
4848 }
4849
4850 /*******************************************************************//**
4851 Returns TRUE if a single-table tablespace exists in the memory cache.
4852 @return TRUE if exists */
4853 UNIV_INTERN
4854 ibool
fil_tablespace_exists_in_mem(ulint id)4855 fil_tablespace_exists_in_mem(
4856 /*=========================*/
4857 ulint id) /*!< in: space id */
4858 {
4859 fil_space_t* space;
4860
4861 ut_ad(fil_system);
4862
4863 mutex_enter(&fil_system->mutex);
4864
4865 space = fil_space_get_by_id(id);
4866
4867 mutex_exit(&fil_system->mutex);
4868
4869 return(space != NULL);
4870 }
4871
4872 /*******************************************************************//**
4873 Report that a tablespace for a table was not found. */
4874 static
4875 void
fil_report_missing_tablespace(const char * name,ulint space_id)4876 fil_report_missing_tablespace(
4877 /*===========================*/
4878 const char* name, /*!< in: table name */
4879 ulint space_id) /*!< in: table's space id */
4880 {
4881 char index_name[MAX_FULL_NAME_LEN + 1];
4882
4883 innobase_format_name(index_name, sizeof(index_name), name, TRUE);
4884
4885 ib_logf(IB_LOG_LEVEL_ERROR,
4886 "Table %s in the InnoDB data dictionary has tablespace id %lu, "
4887 "but tablespace with that id or name does not exist. Have "
4888 "you deleted or moved .ibd files? This may also be a table "
4889 "created with CREATE TEMPORARY TABLE whose .ibd and .frm "
4890 "files MySQL automatically removed, but the table still "
4891 "exists in the InnoDB internal data dictionary.",
4892 name, space_id);
4893 }
4894
4895 /*******************************************************************//**
4896 Returns TRUE if a matching tablespace exists in the InnoDB tablespace memory
4897 cache. Note that if we have not done a crash recovery at the database startup,
4898 there may be many tablespaces which are not yet in the memory cache.
4899 @return TRUE if a matching tablespace exists in the memory cache */
4900 UNIV_INTERN
4901 ibool
fil_space_for_table_exists_in_mem(ulint id,const char * name,ibool mark_space,ibool print_error_if_does_not_exist,bool adjust_space,mem_heap_t * heap,table_id_t table_id)4902 fil_space_for_table_exists_in_mem(
4903 /*==============================*/
4904 ulint id, /*!< in: space id */
4905 const char* name, /*!< in: table name used in
4906 fil_space_create(). Either the
4907 standard 'dbname/tablename' format
4908 or table->dir_path_of_temp_table */
4909 ibool mark_space, /*!< in: in crash recovery, at database
4910 startup we mark all spaces which have
4911 an associated table in the InnoDB
4912 data dictionary, so that
4913 we can print a warning about orphaned
4914 tablespaces */
4915 ibool print_error_if_does_not_exist,
4916 /*!< in: print detailed error
4917 information to the .err log if a
4918 matching tablespace is not found from
4919 memory */
4920 bool adjust_space, /*!< in: whether to adjust space id
4921 when find table space mismatch */
4922 mem_heap_t* heap, /*!< in: heap memory */
4923 table_id_t table_id) /*!< in: table id */
4924 {
4925 fil_space_t* fnamespace;
4926 fil_space_t* space;
4927
4928 ut_ad(fil_system);
4929
4930 mutex_enter(&fil_system->mutex);
4931
4932 /* Look if there is a space with the same id */
4933
4934 space = fil_space_get_by_id(id);
4935
4936 /* Look if there is a space with the same name; the name is the
4937 directory path from the datadir to the file */
4938
4939 fnamespace = fil_space_get_by_name(name);
4940 if (space && space == fnamespace) {
4941 /* Found */
4942
4943 if (mark_space) {
4944 space->mark = TRUE;
4945 }
4946
4947 mutex_exit(&fil_system->mutex);
4948
4949 return(TRUE);
4950 }
4951
4952 /* Info from "fnamespace" comes from the ibd file itself, it can
4953 be different from data obtained from System tables since it is
4954 not transactional. If adjust_space is set, and the mismatching
4955 space are between a user table and its temp table, we shall
4956 adjust the ibd file name according to system table info */
4957 if (adjust_space
4958 && space != NULL
4959 && row_is_mysql_tmp_table_name(space->name)
4960 && !row_is_mysql_tmp_table_name(name)) {
4961
4962 mutex_exit(&fil_system->mutex);
4963
4964 DBUG_EXECUTE_IF("ib_crash_before_adjust_fil_space",
4965 DBUG_SUICIDE(););
4966
4967 if (fnamespace) {
4968 char* tmp_name;
4969
4970 tmp_name = dict_mem_create_temporary_tablename(
4971 heap, name, table_id);
4972
4973 fil_rename_tablespace(fnamespace->name, fnamespace->id,
4974 tmp_name, NULL);
4975 }
4976
4977 DBUG_EXECUTE_IF("ib_crash_after_adjust_one_fil_space",
4978 DBUG_SUICIDE(););
4979
4980 fil_rename_tablespace(space->name, id, name, NULL);
4981
4982 DBUG_EXECUTE_IF("ib_crash_after_adjust_fil_space",
4983 DBUG_SUICIDE(););
4984
4985 mutex_enter(&fil_system->mutex);
4986 fnamespace = fil_space_get_by_name(name);
4987 ut_ad(space == fnamespace);
4988 mutex_exit(&fil_system->mutex);
4989
4990 return(TRUE);
4991 }
4992
4993 if (!print_error_if_does_not_exist) {
4994
4995 mutex_exit(&fil_system->mutex);
4996
4997 return(FALSE);
4998 }
4999
5000 if (space == NULL) {
5001 if (fnamespace == NULL) {
5002 if (print_error_if_does_not_exist) {
5003 fil_report_missing_tablespace(name, id);
5004 }
5005 } else {
5006 ut_print_timestamp(stderr);
5007 fputs(" InnoDB: Error: table ", stderr);
5008 ut_print_filename(stderr, name);
5009 fprintf(stderr, "\n"
5010 "InnoDB: in InnoDB data dictionary has"
5011 " tablespace id %lu,\n"
5012 "InnoDB: but a tablespace with that id"
5013 " does not exist. There is\n"
5014 "InnoDB: a tablespace of name %s and id %lu,"
5015 " though. Have\n"
5016 "InnoDB: you deleted or moved .ibd files?\n",
5017 (ulong) id, fnamespace->name,
5018 (ulong) fnamespace->id);
5019 }
5020 error_exit:
5021 fputs("InnoDB: Please refer to\n"
5022 "InnoDB: " REFMAN "innodb-troubleshooting-datadict.html\n"
5023 "InnoDB: for how to resolve the issue.\n", stderr);
5024
5025 mutex_exit(&fil_system->mutex);
5026
5027 return(FALSE);
5028 }
5029
5030 if (0 != strcmp(space->name, name)) {
5031 ut_print_timestamp(stderr);
5032 fputs(" InnoDB: Error: table ", stderr);
5033 ut_print_filename(stderr, name);
5034 fprintf(stderr, "\n"
5035 "InnoDB: in InnoDB data dictionary has"
5036 " tablespace id %lu,\n"
5037 "InnoDB: but the tablespace with that id"
5038 " has name %s.\n"
5039 "InnoDB: Have you deleted or moved .ibd files?\n",
5040 (ulong) id, space->name);
5041
5042 if (fnamespace != NULL) {
5043 fputs("InnoDB: There is a tablespace"
5044 " with the right name\n"
5045 "InnoDB: ", stderr);
5046 ut_print_filename(stderr, fnamespace->name);
5047 fprintf(stderr, ", but its id is %lu.\n",
5048 (ulong) fnamespace->id);
5049 }
5050
5051 goto error_exit;
5052 }
5053
5054 mutex_exit(&fil_system->mutex);
5055
5056 return(FALSE);
5057 }
5058
5059 /*******************************************************************//**
5060 Checks if a single-table tablespace for a given table name exists in the
5061 tablespace memory cache.
5062 @return space id, ULINT_UNDEFINED if not found */
5063 UNIV_INTERN
5064 ulint
fil_get_space_id_for_table(const char * tablename)5065 fil_get_space_id_for_table(
5066 /*=======================*/
5067 const char* tablename) /*!< in: table name in the standard
5068 'databasename/tablename' format */
5069 {
5070 fil_space_t* fnamespace;
5071 ulint id = ULINT_UNDEFINED;
5072
5073 ut_ad(fil_system);
5074
5075 mutex_enter(&fil_system->mutex);
5076
5077 /* Look if there is a space with the same name. */
5078
5079 fnamespace = fil_space_get_by_name(tablename);
5080
5081 if (fnamespace) {
5082 id = fnamespace->id;
5083 }
5084
5085 mutex_exit(&fil_system->mutex);
5086
5087 return(id);
5088 }
5089
5090 /**********************************************************************//**
5091 Tries to extend a data file so that it would accommodate the number of pages
5092 given. The tablespace must be cached in the memory cache. If the space is big
5093 enough already, does nothing.
5094 @return TRUE if success */
5095 UNIV_INTERN
5096 ibool
fil_extend_space_to_desired_size(ulint * actual_size,ulint space_id,ulint size_after_extend)5097 fil_extend_space_to_desired_size(
5098 /*=============================*/
5099 ulint* actual_size, /*!< out: size of the space after extension;
5100 if we ran out of disk space this may be lower
5101 than the desired size */
5102 ulint space_id, /*!< in: space id */
5103 ulint size_after_extend)/*!< in: desired size in pages after the
5104 extension; if the current space size is bigger
5105 than this already, the function does nothing */
5106 {
5107 fil_node_t* node;
5108 fil_space_t* space;
5109 byte* buf2;
5110 byte* buf;
5111 ulint buf_size;
5112 ulint start_page_no;
5113 ulint file_start_page_no;
5114 ulint page_size;
5115 ulint pages_added;
5116 ibool success;
5117
5118 ut_ad(!srv_read_only_mode);
5119
5120 retry:
5121 pages_added = 0;
5122 success = TRUE;
5123
5124 fil_mutex_enter_and_prepare_for_io(space_id);
5125
5126 space = fil_space_get_by_id(space_id);
5127 ut_a(space);
5128
5129 if (space->size >= size_after_extend) {
5130 /* Space already big enough */
5131
5132 *actual_size = space->size;
5133
5134 mutex_exit(&fil_system->mutex);
5135
5136 return(TRUE);
5137 }
5138
5139 page_size = fsp_flags_get_zip_size(space->flags);
5140 if (!page_size) {
5141 page_size = UNIV_PAGE_SIZE;
5142 }
5143
5144 node = UT_LIST_GET_LAST(space->chain);
5145
5146 if (!node->being_extended) {
5147 /* Mark this node as undergoing extension. This flag
5148 is used by other threads to wait for the extension
5149 opereation to finish. */
5150 node->being_extended = TRUE;
5151 } else {
5152 /* Another thread is currently extending the file. Wait
5153 for it to finish.
5154 It'd have been better to use event driven mechanism but
5155 the entire module is peppered with polling stuff. */
5156 mutex_exit(&fil_system->mutex);
5157 os_thread_sleep(100000);
5158 goto retry;
5159 }
5160
5161 if (!fil_node_prepare_for_io(node, fil_system, space)) {
5162 /* The tablespace data file, such as .ibd file, is missing */
5163 node->being_extended = false;
5164 mutex_exit(&fil_system->mutex);
5165
5166 return(false);
5167 }
5168
5169 /* At this point it is safe to release fil_system mutex. No
5170 other thread can rename, delete or close the file because
5171 we have set the node->being_extended flag. */
5172 mutex_exit(&fil_system->mutex);
5173
5174 start_page_no = space->size;
5175 file_start_page_no = space->size - node->size;
5176
5177 /* Extend at most 64 pages at a time */
5178 buf_size = ut_min(64, size_after_extend - start_page_no) * page_size;
5179 buf2 = static_cast<byte*>(mem_alloc(buf_size + page_size));
5180 buf = static_cast<byte*>(ut_align(buf2, page_size));
5181
5182 memset(buf, 0, buf_size);
5183
5184 while (start_page_no < size_after_extend) {
5185 ulint n_pages
5186 = ut_min(buf_size / page_size,
5187 size_after_extend - start_page_no);
5188
5189 os_offset_t offset
5190 = ((os_offset_t) (start_page_no - file_start_page_no))
5191 * page_size;
5192 #ifdef UNIV_HOTBACKUP
5193 success = os_file_write(node->name, node->handle, buf,
5194 offset, page_size * n_pages);
5195 #else
5196 success = os_aio(OS_FILE_WRITE, OS_AIO_SYNC,
5197 node->name, node->handle, buf,
5198 offset, page_size * n_pages,
5199 NULL, NULL);
5200 #endif /* UNIV_HOTBACKUP */
5201 if (success) {
5202 os_has_said_disk_full = FALSE;
5203 } else {
5204 /* Let us measure the size of the file to determine
5205 how much we were able to extend it */
5206 os_offset_t size;
5207
5208 size = os_file_get_size(node->handle);
5209 ut_a(size != (os_offset_t) -1);
5210
5211 n_pages = ((ulint) (size / page_size))
5212 - node->size - pages_added;
5213
5214 pages_added += n_pages;
5215 break;
5216 }
5217
5218 start_page_no += n_pages;
5219 pages_added += n_pages;
5220 }
5221
5222 mem_free(buf2);
5223
5224 mutex_enter(&fil_system->mutex);
5225
5226 ut_a(node->being_extended);
5227
5228 space->size += pages_added;
5229 node->size += pages_added;
5230 node->being_extended = FALSE;
5231
5232 fil_node_complete_io(node, fil_system, OS_FILE_WRITE);
5233
5234 *actual_size = space->size;
5235
5236 #ifndef UNIV_HOTBACKUP
5237 if (space_id == 0) {
5238 ulint pages_per_mb = (1024 * 1024) / page_size;
5239
5240 /* Keep the last data file size info up to date, rounded to
5241 full megabytes */
5242
5243 srv_data_file_sizes[srv_n_data_files - 1]
5244 = (node->size / pages_per_mb) * pages_per_mb;
5245 }
5246 #endif /* !UNIV_HOTBACKUP */
5247
5248 /*
5249 printf("Extended %s to %lu, actual size %lu pages\n", space->name,
5250 size_after_extend, *actual_size); */
5251 mutex_exit(&fil_system->mutex);
5252
5253 fil_flush(space_id);
5254
5255 return(success);
5256 }
5257
5258 #ifdef UNIV_HOTBACKUP
5259 /********************************************************************//**
5260 Extends all tablespaces to the size stored in the space header. During the
5261 mysqlbackup --apply-log phase we extended the spaces on-demand so that log
5262 records could be applied, but that may have left spaces still too small
5263 compared to the size stored in the space header. */
5264 UNIV_INTERN
5265 void
fil_extend_tablespaces_to_stored_len(void)5266 fil_extend_tablespaces_to_stored_len(void)
5267 /*======================================*/
5268 {
5269 fil_space_t* space;
5270 byte* buf;
5271 ulint actual_size;
5272 ulint size_in_header;
5273 dberr_t error;
5274 ibool success;
5275
5276 buf = mem_alloc(UNIV_PAGE_SIZE);
5277
5278 mutex_enter(&fil_system->mutex);
5279
5280 space = UT_LIST_GET_FIRST(fil_system->space_list);
5281
5282 while (space) {
5283 ut_a(space->purpose == FIL_TABLESPACE);
5284
5285 mutex_exit(&fil_system->mutex); /* no need to protect with a
5286 mutex, because this is a
5287 single-threaded operation */
5288 error = fil_read(TRUE, space->id,
5289 fsp_flags_get_zip_size(space->flags),
5290 0, 0, UNIV_PAGE_SIZE, buf, NULL);
5291 ut_a(error == DB_SUCCESS);
5292
5293 size_in_header = fsp_get_size_low(buf);
5294
5295 success = fil_extend_space_to_desired_size(
5296 &actual_size, space->id, size_in_header);
5297 if (!success) {
5298 fprintf(stderr,
5299 "InnoDB: Error: could not extend the"
5300 " tablespace of %s\n"
5301 "InnoDB: to the size stored in header,"
5302 " %lu pages;\n"
5303 "InnoDB: size after extension %lu pages\n"
5304 "InnoDB: Check that you have free disk space"
5305 " and retry!\n",
5306 space->name, size_in_header, actual_size);
5307 ut_a(success);
5308 }
5309
5310 mutex_enter(&fil_system->mutex);
5311
5312 space = UT_LIST_GET_NEXT(space_list, space);
5313 }
5314
5315 mutex_exit(&fil_system->mutex);
5316
5317 mem_free(buf);
5318 }
5319 #endif
5320
5321 /*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
5322
5323 /*******************************************************************//**
5324 Tries to reserve free extents in a file space.
5325 @return TRUE if succeed */
5326 UNIV_INTERN
5327 ibool
fil_space_reserve_free_extents(ulint id,ulint n_free_now,ulint n_to_reserve)5328 fil_space_reserve_free_extents(
5329 /*===========================*/
5330 ulint id, /*!< in: space id */
5331 ulint n_free_now, /*!< in: number of free extents now */
5332 ulint n_to_reserve) /*!< in: how many one wants to reserve */
5333 {
5334 fil_space_t* space;
5335 ibool success;
5336
5337 ut_ad(fil_system);
5338
5339 mutex_enter(&fil_system->mutex);
5340
5341 space = fil_space_get_by_id(id);
5342
5343 ut_a(space);
5344
5345 if (space->n_reserved_extents + n_to_reserve > n_free_now) {
5346 success = FALSE;
5347 } else {
5348 space->n_reserved_extents += n_to_reserve;
5349 success = TRUE;
5350 }
5351
5352 mutex_exit(&fil_system->mutex);
5353
5354 return(success);
5355 }
5356
5357 /*******************************************************************//**
5358 Releases free extents in a file space. */
5359 UNIV_INTERN
5360 void
fil_space_release_free_extents(ulint id,ulint n_reserved)5361 fil_space_release_free_extents(
5362 /*===========================*/
5363 ulint id, /*!< in: space id */
5364 ulint n_reserved) /*!< in: how many one reserved */
5365 {
5366 fil_space_t* space;
5367
5368 ut_ad(fil_system);
5369
5370 mutex_enter(&fil_system->mutex);
5371
5372 space = fil_space_get_by_id(id);
5373
5374 ut_a(space);
5375 ut_a(space->n_reserved_extents >= n_reserved);
5376
5377 space->n_reserved_extents -= n_reserved;
5378
5379 mutex_exit(&fil_system->mutex);
5380 }
5381
5382 /*******************************************************************//**
5383 Gets the number of reserved extents. If the database is silent, this number
5384 should be zero. */
5385 UNIV_INTERN
5386 ulint
fil_space_get_n_reserved_extents(ulint id)5387 fil_space_get_n_reserved_extents(
5388 /*=============================*/
5389 ulint id) /*!< in: space id */
5390 {
5391 fil_space_t* space;
5392 ulint n;
5393
5394 ut_ad(fil_system);
5395
5396 mutex_enter(&fil_system->mutex);
5397
5398 space = fil_space_get_by_id(id);
5399
5400 ut_a(space);
5401
5402 n = space->n_reserved_extents;
5403
5404 mutex_exit(&fil_system->mutex);
5405
5406 return(n);
5407 }
5408
5409 /*============================ FILE I/O ================================*/
5410
5411 /********************************************************************//**
5412 NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
5413
5414 Prepares a file node for i/o. Opens the file if it is closed. Updates the
5415 pending i/o's field in the node and the system appropriately. Takes the node
5416 off the LRU list if it is in the LRU list. The caller must hold the fil_sys
5417 mutex.
5418 @return false if the file can't be opened, otherwise true */
5419 static
5420 bool
fil_node_prepare_for_io(fil_node_t * node,fil_system_t * system,fil_space_t * space)5421 fil_node_prepare_for_io(
5422 /*====================*/
5423 fil_node_t* node, /*!< in: file node */
5424 fil_system_t* system, /*!< in: tablespace memory cache */
5425 fil_space_t* space) /*!< in: space */
5426 {
5427 ut_ad(node && system && space);
5428 ut_ad(mutex_own(&(system->mutex)));
5429
5430 if (system->n_open > system->max_n_open + 5) {
5431 ut_print_timestamp(stderr);
5432 fprintf(stderr,
5433 " InnoDB: Warning: open files %lu"
5434 " exceeds the limit %lu\n",
5435 (ulong) system->n_open,
5436 (ulong) system->max_n_open);
5437 }
5438
5439 if (node->open == FALSE) {
5440 /* File is closed: open it */
5441 ut_a(node->n_pending == 0);
5442
5443 if (!fil_node_open_file(node, system, space)) {
5444 return(false);
5445 }
5446 }
5447
5448 if (node->n_pending == 0 && fil_space_belongs_in_lru(space)) {
5449 /* The node is in the LRU list, remove it */
5450
5451 ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
5452
5453 UT_LIST_REMOVE(LRU, system->LRU, node);
5454 }
5455
5456 node->n_pending++;
5457
5458 return(true);
5459 }
5460
5461 /********************************************************************//**
5462 Updates the data structures when an i/o operation finishes. Updates the
5463 pending i/o's field in the node appropriately. */
5464 static
5465 void
fil_node_complete_io(fil_node_t * node,fil_system_t * system,ulint type)5466 fil_node_complete_io(
5467 /*=================*/
5468 fil_node_t* node, /*!< in: file node */
5469 fil_system_t* system, /*!< in: tablespace memory cache */
5470 ulint type) /*!< in: OS_FILE_WRITE or OS_FILE_READ; marks
5471 the node as modified if
5472 type == OS_FILE_WRITE */
5473 {
5474 ut_ad(node);
5475 ut_ad(system);
5476 ut_ad(mutex_own(&(system->mutex)));
5477
5478 ut_a(node->n_pending > 0);
5479
5480 node->n_pending--;
5481
5482 if (type == OS_FILE_WRITE) {
5483 ut_ad(!srv_read_only_mode);
5484 system->modification_counter++;
5485 node->modification_counter = system->modification_counter;
5486
5487 if (fil_buffering_disabled(node->space)) {
5488
5489 /* We don't need to keep track of unflushed
5490 changes as user has explicitly disabled
5491 buffering. */
5492 ut_ad(!node->space->is_in_unflushed_spaces);
5493 node->flush_counter = node->modification_counter;
5494
5495 } else if (!node->space->is_in_unflushed_spaces) {
5496
5497 node->space->is_in_unflushed_spaces = true;
5498 UT_LIST_ADD_FIRST(unflushed_spaces,
5499 system->unflushed_spaces,
5500 node->space);
5501 }
5502 }
5503
5504 if (node->n_pending == 0 && fil_space_belongs_in_lru(node->space)) {
5505
5506 /* The node must be put back to the LRU list */
5507 UT_LIST_ADD_FIRST(LRU, system->LRU, node);
5508 }
5509 }
5510
5511 /********************************************************************//**
5512 Report information about an invalid page access. */
5513 static
5514 void
fil_report_invalid_page_access(ulint block_offset,ulint space_id,const char * space_name,ulint byte_offset,ulint len,ulint type)5515 fil_report_invalid_page_access(
5516 /*===========================*/
5517 ulint block_offset, /*!< in: block offset */
5518 ulint space_id, /*!< in: space id */
5519 const char* space_name, /*!< in: space name */
5520 ulint byte_offset, /*!< in: byte offset */
5521 ulint len, /*!< in: I/O length */
5522 ulint type) /*!< in: I/O type */
5523 {
5524 fprintf(stderr,
5525 "InnoDB: Error: trying to access page number %lu"
5526 " in space %lu,\n"
5527 "InnoDB: space name %s,\n"
5528 "InnoDB: which is outside the tablespace bounds.\n"
5529 "InnoDB: Byte offset %lu, len %lu, i/o type %lu.\n"
5530 "InnoDB: If you get this error at mysqld startup,"
5531 " please check that\n"
5532 "InnoDB: your my.cnf matches the ibdata files"
5533 " that you have in the\n"
5534 "InnoDB: MySQL server.\n",
5535 (ulong) block_offset, (ulong) space_id, space_name,
5536 (ulong) byte_offset, (ulong) len, (ulong) type);
5537 }
5538
5539 /********************************************************************//**
5540 Reads or writes data. This operation is asynchronous (aio).
5541 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
5542 i/o on a tablespace which does not exist */
5543 UNIV_INTERN
5544 dberr_t
fil_io(ulint type,bool sync,ulint space_id,ulint zip_size,ulint block_offset,ulint byte_offset,ulint len,void * buf,void * message)5545 fil_io(
5546 /*===*/
5547 ulint type, /*!< in: OS_FILE_READ or OS_FILE_WRITE,
5548 ORed to OS_FILE_LOG, if a log i/o
5549 and ORed to OS_AIO_SIMULATED_WAKE_LATER
5550 if simulated aio and we want to post a
5551 batch of i/os; NOTE that a simulated batch
5552 may introduce hidden chances of deadlocks,
5553 because i/os are not actually handled until
5554 all have been posted: use with great
5555 caution! */
5556 bool sync, /*!< in: true if synchronous aio is desired */
5557 ulint space_id, /*!< in: space id */
5558 ulint zip_size, /*!< in: compressed page size in bytes;
5559 0 for uncompressed pages */
5560 ulint block_offset, /*!< in: offset in number of blocks */
5561 ulint byte_offset, /*!< in: remainder of offset in bytes; in
5562 aio this must be divisible by the OS block
5563 size */
5564 ulint len, /*!< in: how many bytes to read or write; this
5565 must not cross a file boundary; in aio this
5566 must be a block size multiple */
5567 void* buf, /*!< in/out: buffer where to store read data
5568 or from where to write; in aio this must be
5569 appropriately aligned */
5570 void* message) /*!< in: message for aio handler if non-sync
5571 aio used, else ignored */
5572 {
5573 ulint mode;
5574 fil_space_t* space;
5575 fil_node_t* node;
5576 ibool ret;
5577 ulint is_log;
5578 ulint wake_later;
5579 os_offset_t offset;
5580 ibool ignore_nonexistent_pages;
5581
5582 is_log = type & OS_FILE_LOG;
5583 type = type & ~OS_FILE_LOG;
5584
5585 wake_later = type & OS_AIO_SIMULATED_WAKE_LATER;
5586 type = type & ~OS_AIO_SIMULATED_WAKE_LATER;
5587
5588 ignore_nonexistent_pages = type & BUF_READ_IGNORE_NONEXISTENT_PAGES;
5589 type &= ~BUF_READ_IGNORE_NONEXISTENT_PAGES;
5590
5591 ut_ad(byte_offset < UNIV_PAGE_SIZE);
5592 ut_ad(!zip_size || !byte_offset);
5593 ut_ad(ut_is_2pow(zip_size));
5594 ut_ad(buf);
5595 ut_ad(len > 0);
5596 ut_ad(UNIV_PAGE_SIZE == (ulong)(1 << UNIV_PAGE_SIZE_SHIFT));
5597 #if (1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX
5598 # error "(1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX"
5599 #endif
5600 #if (1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN
5601 # error "(1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN"
5602 #endif
5603 ut_ad(fil_validate_skip());
5604 #ifndef UNIV_HOTBACKUP
5605 # ifndef UNIV_LOG_DEBUG
5606 /* ibuf bitmap pages must be read in the sync aio mode: */
5607 ut_ad(recv_no_ibuf_operations
5608 || type == OS_FILE_WRITE
5609 || !ibuf_bitmap_page(zip_size, block_offset)
5610 || sync
5611 || is_log);
5612 # endif /* UNIV_LOG_DEBUG */
5613 if (sync) {
5614 mode = OS_AIO_SYNC;
5615 } else if (is_log) {
5616 mode = OS_AIO_LOG;
5617 } else if (type == OS_FILE_READ
5618 && !recv_no_ibuf_operations
5619 && ibuf_page(space_id, zip_size, block_offset, NULL)) {
5620 mode = OS_AIO_IBUF;
5621 } else {
5622 mode = OS_AIO_NORMAL;
5623 }
5624 #else /* !UNIV_HOTBACKUP */
5625 ut_a(sync);
5626 mode = OS_AIO_SYNC;
5627 #endif /* !UNIV_HOTBACKUP */
5628
5629 if (type == OS_FILE_READ) {
5630 srv_stats.data_read.add(len);
5631 } else if (type == OS_FILE_WRITE) {
5632 ut_ad(!srv_read_only_mode);
5633 srv_stats.data_written.add(len);
5634 }
5635
5636 /* Reserve the fil_system mutex and make sure that we can open at
5637 least one file while holding it, if the file is not already open */
5638
5639 fil_mutex_enter_and_prepare_for_io(space_id);
5640
5641 space = fil_space_get_by_id(space_id);
5642
5643 /* If we are deleting a tablespace we don't allow async read operations
5644 on that. However, we do allow write and sync read operations */
5645 if (space == 0
5646 || (type == OS_FILE_READ && !sync && space->stop_new_ops)) {
5647 mutex_exit(&fil_system->mutex);
5648
5649 ib_logf(IB_LOG_LEVEL_ERROR,
5650 "Trying to do i/o to a tablespace which does "
5651 "not exist. i/o type %lu, space id %lu, "
5652 "page no. %lu, i/o length %lu bytes",
5653 (ulong) type, (ulong) space_id, (ulong) block_offset,
5654 (ulong) len);
5655
5656 return(DB_TABLESPACE_DELETED);
5657 }
5658
5659 ut_ad(mode != OS_AIO_IBUF || space->purpose == FIL_TABLESPACE);
5660
5661 node = UT_LIST_GET_FIRST(space->chain);
5662
5663 for (;;) {
5664 if (node == NULL) {
5665 if (ignore_nonexistent_pages) {
5666 mutex_exit(&fil_system->mutex);
5667 return(DB_ERROR);
5668 }
5669
5670 fil_report_invalid_page_access(
5671 block_offset, space_id, space->name,
5672 byte_offset, len, type);
5673
5674 ut_error;
5675
5676 } else if (fil_is_user_tablespace_id(space->id)
5677 && node->size == 0) {
5678
5679 /* We do not know the size of a single-table tablespace
5680 before we open the file */
5681 break;
5682 } else if (node->size > block_offset) {
5683 /* Found! */
5684 break;
5685 } else {
5686 block_offset -= node->size;
5687 node = UT_LIST_GET_NEXT(chain, node);
5688 }
5689 }
5690
5691 /* Open file if closed */
5692 if (!fil_node_prepare_for_io(node, fil_system, space)) {
5693 if (space->purpose == FIL_TABLESPACE
5694 && fil_is_user_tablespace_id(space->id)) {
5695 mutex_exit(&fil_system->mutex);
5696
5697 ib_logf(IB_LOG_LEVEL_ERROR,
5698 "Trying to do i/o to a tablespace which "
5699 "exists without .ibd data file. "
5700 "i/o type %lu, space id %lu, page no %lu, "
5701 "i/o length %lu bytes",
5702 (ulong) type, (ulong) space_id,
5703 (ulong) block_offset, (ulong) len);
5704
5705 return(DB_TABLESPACE_DELETED);
5706 }
5707
5708 /* The tablespace is for log. Currently, we just assert here
5709 to prevent handling errors along the way fil_io returns.
5710 Also, if the log files are missing, it would be hard to
5711 promise the server can continue running. */
5712 ut_a(0);
5713 }
5714
5715 /* Check that at least the start offset is within the bounds of a
5716 single-table tablespace, including rollback tablespaces. */
5717 if (UNIV_UNLIKELY(node->size <= block_offset)
5718 && space->id != 0 && space->purpose == FIL_TABLESPACE) {
5719
5720 fil_report_invalid_page_access(
5721 block_offset, space_id, space->name, byte_offset,
5722 len, type);
5723
5724 ut_error;
5725 }
5726
5727 /* Now we have made the changes in the data structures of fil_system */
5728 mutex_exit(&fil_system->mutex);
5729
5730 /* Calculate the low 32 bits and the high 32 bits of the file offset */
5731
5732 if (!zip_size) {
5733 offset = ((os_offset_t) block_offset << UNIV_PAGE_SIZE_SHIFT)
5734 + byte_offset;
5735
5736 ut_a(node->size - block_offset
5737 >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
5738 / UNIV_PAGE_SIZE));
5739 } else {
5740 ulint zip_size_shift;
5741 switch (zip_size) {
5742 case 1024: zip_size_shift = 10; break;
5743 case 2048: zip_size_shift = 11; break;
5744 case 4096: zip_size_shift = 12; break;
5745 case 8192: zip_size_shift = 13; break;
5746 case 16384: zip_size_shift = 14; break;
5747 default: ut_error;
5748 }
5749 offset = ((os_offset_t) block_offset << zip_size_shift)
5750 + byte_offset;
5751 ut_a(node->size - block_offset
5752 >= (len + (zip_size - 1)) / zip_size);
5753 }
5754
5755 /* Do aio */
5756
5757 ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
5758 ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
5759
5760 #ifdef UNIV_HOTBACKUP
5761 /* In mysqlbackup do normal i/o, not aio */
5762 if (type == OS_FILE_READ) {
5763 ret = os_file_read(node->handle, buf, offset, len);
5764 } else {
5765 ut_ad(!srv_read_only_mode);
5766 ret = os_file_write(node->name, node->handle, buf,
5767 offset, len);
5768 }
5769 #else
5770 /* Queue the aio request */
5771 ret = os_aio(type, mode | wake_later, node->name, node->handle, buf,
5772 offset, len, node, message);
5773 #endif /* UNIV_HOTBACKUP */
5774 ut_a(ret);
5775
5776 if (mode == OS_AIO_SYNC) {
5777 /* The i/o operation is already completed when we return from
5778 os_aio: */
5779
5780 mutex_enter(&fil_system->mutex);
5781
5782 fil_node_complete_io(node, fil_system, type);
5783
5784 mutex_exit(&fil_system->mutex);
5785
5786 ut_ad(fil_validate_skip());
5787 }
5788
5789 return(DB_SUCCESS);
5790 }
5791
5792 #ifndef UNIV_HOTBACKUP
5793 /**********************************************************************//**
5794 Waits for an aio operation to complete. This function is used to write the
5795 handler for completed requests. The aio array of pending requests is divided
5796 into segments (see os0file.cc for more info). The thread specifies which
5797 segment it wants to wait for. */
5798 UNIV_INTERN
5799 void
fil_aio_wait(ulint segment)5800 fil_aio_wait(
5801 /*=========*/
5802 ulint segment) /*!< in: the number of the segment in the aio
5803 array to wait for */
5804 {
5805 ibool ret;
5806 fil_node_t* fil_node;
5807 void* message;
5808 ulint type;
5809
5810 ut_ad(fil_validate_skip());
5811
5812 if (srv_use_native_aio) {
5813 srv_set_io_thread_op_info(segment, "native aio handle");
5814 #ifdef WIN_ASYNC_IO
5815 ret = os_aio_windows_handle(
5816 segment, 0, &fil_node, &message, &type);
5817 #elif defined(LINUX_NATIVE_AIO)
5818 ret = os_aio_linux_handle(
5819 segment, &fil_node, &message, &type);
5820 #else
5821 ut_error;
5822 ret = 0; /* Eliminate compiler warning */
5823 #endif /* WIN_ASYNC_IO */
5824 } else {
5825 srv_set_io_thread_op_info(segment, "simulated aio handle");
5826
5827 ret = os_aio_simulated_handle(
5828 segment, &fil_node, &message, &type);
5829 }
5830
5831 ut_a(ret);
5832 if (fil_node == NULL) {
5833 ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
5834 return;
5835 }
5836
5837 srv_set_io_thread_op_info(segment, "complete io for fil node");
5838
5839 mutex_enter(&fil_system->mutex);
5840
5841 fil_node_complete_io(fil_node, fil_system, type);
5842
5843 mutex_exit(&fil_system->mutex);
5844
5845 ut_ad(fil_validate_skip());
5846
5847 /* Do the i/o handling */
5848 /* IMPORTANT: since i/o handling for reads will read also the insert
5849 buffer in tablespace 0, you have to be very careful not to introduce
5850 deadlocks in the i/o system. We keep tablespace 0 data files always
5851 open, and use a special i/o thread to serve insert buffer requests. */
5852
5853 if (fil_node->space->purpose == FIL_TABLESPACE) {
5854 srv_set_io_thread_op_info(segment, "complete io for buf page");
5855 buf_page_io_complete(static_cast<buf_page_t*>(message));
5856 } else {
5857 srv_set_io_thread_op_info(segment, "complete io for log");
5858 log_io_complete(static_cast<log_group_t*>(message));
5859 }
5860 }
5861 #endif /* UNIV_HOTBACKUP */
5862
5863 /**********************************************************************//**
5864 Flushes to disk possible writes cached by the OS. If the space does not exist
5865 or is being dropped, does not do anything. */
5866 UNIV_INTERN
5867 void
fil_flush(ulint space_id)5868 fil_flush(
5869 /*======*/
5870 ulint space_id) /*!< in: file space id (this can be a group of
5871 log files or a tablespace of the database) */
5872 {
5873 fil_space_t* space;
5874 fil_node_t* node;
5875 pfs_os_file_t file;
5876
5877
5878 mutex_enter(&fil_system->mutex);
5879
5880 space = fil_space_get_by_id(space_id);
5881
5882 if (!space || space->stop_new_ops) {
5883 mutex_exit(&fil_system->mutex);
5884
5885 return;
5886 }
5887
5888 if (fil_buffering_disabled(space)) {
5889
5890 /* No need to flush. User has explicitly disabled
5891 buffering. */
5892 ut_ad(!space->is_in_unflushed_spaces);
5893 ut_ad(fil_space_is_flushed(space));
5894 ut_ad(space->n_pending_flushes == 0);
5895
5896 #ifdef UNIV_DEBUG
5897 for (node = UT_LIST_GET_FIRST(space->chain);
5898 node != NULL;
5899 node = UT_LIST_GET_NEXT(chain, node)) {
5900 ut_ad(node->modification_counter
5901 == node->flush_counter);
5902 ut_ad(node->n_pending_flushes == 0);
5903 }
5904 #endif /* UNIV_DEBUG */
5905
5906 mutex_exit(&fil_system->mutex);
5907 return;
5908 }
5909
5910 space->n_pending_flushes++; /*!< prevent dropping of the space while
5911 we are flushing */
5912 for (node = UT_LIST_GET_FIRST(space->chain);
5913 node != NULL;
5914 node = UT_LIST_GET_NEXT(chain, node)) {
5915
5916 ib_int64_t old_mod_counter = node->modification_counter;;
5917
5918 if (old_mod_counter <= node->flush_counter) {
5919 continue;
5920 }
5921
5922 ut_a(node->open);
5923
5924 if (space->purpose == FIL_TABLESPACE) {
5925 fil_n_pending_tablespace_flushes++;
5926 } else {
5927 fil_n_pending_log_flushes++;
5928 fil_n_log_flushes++;
5929 }
5930 #ifdef __WIN__
5931 if (node->is_raw_disk) {
5932
5933 goto skip_flush;
5934 }
5935 #endif /* __WIN__ */
5936 retry:
5937 if (node->n_pending_flushes > 0) {
5938 /* We want to avoid calling os_file_flush() on
5939 the file twice at the same time, because we do
5940 not know what bugs OS's may contain in file
5941 i/o */
5942
5943 ib_int64_t sig_count =
5944 os_event_reset(node->sync_event);
5945
5946 mutex_exit(&fil_system->mutex);
5947
5948 os_event_wait_low(node->sync_event, sig_count);
5949
5950 mutex_enter(&fil_system->mutex);
5951
5952 if (node->flush_counter >= old_mod_counter) {
5953
5954 goto skip_flush;
5955 }
5956
5957 goto retry;
5958 }
5959
5960 ut_a(node->open);
5961 file = node->handle;
5962 node->n_pending_flushes++;
5963
5964 mutex_exit(&fil_system->mutex);
5965
5966 os_file_flush(file);
5967
5968 mutex_enter(&fil_system->mutex);
5969
5970 os_event_set(node->sync_event);
5971
5972 node->n_pending_flushes--;
5973 skip_flush:
5974 if (node->flush_counter < old_mod_counter) {
5975 node->flush_counter = old_mod_counter;
5976
5977 if (space->is_in_unflushed_spaces
5978 && fil_space_is_flushed(space)) {
5979
5980 space->is_in_unflushed_spaces = false;
5981
5982 UT_LIST_REMOVE(
5983 unflushed_spaces,
5984 fil_system->unflushed_spaces,
5985 space);
5986 }
5987 }
5988
5989 if (space->purpose == FIL_TABLESPACE) {
5990 fil_n_pending_tablespace_flushes--;
5991 } else {
5992 fil_n_pending_log_flushes--;
5993 }
5994 }
5995
5996 space->n_pending_flushes--;
5997
5998 mutex_exit(&fil_system->mutex);
5999 }
6000
6001 /**********************************************************************//**
6002 Flushes to disk the writes in file spaces of the given type possibly cached by
6003 the OS. */
6004 UNIV_INTERN
6005 void
fil_flush_file_spaces(ulint purpose)6006 fil_flush_file_spaces(
6007 /*==================*/
6008 ulint purpose) /*!< in: FIL_TABLESPACE, FIL_LOG */
6009 {
6010 fil_space_t* space;
6011 ulint* space_ids;
6012 ulint n_space_ids;
6013 ulint i;
6014
6015 mutex_enter(&fil_system->mutex);
6016
6017 n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces);
6018 if (n_space_ids == 0) {
6019
6020 mutex_exit(&fil_system->mutex);
6021 return;
6022 }
6023
6024 /* Assemble a list of space ids to flush. Previously, we
6025 traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT()
6026 on a space that was just removed from the list by fil_flush().
6027 Thus, the space could be dropped and the memory overwritten. */
6028 space_ids = static_cast<ulint*>(
6029 mem_alloc(n_space_ids * sizeof *space_ids));
6030
6031 n_space_ids = 0;
6032
6033 for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces);
6034 space;
6035 space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
6036
6037 if (space->purpose == purpose && !space->stop_new_ops) {
6038
6039 space_ids[n_space_ids++] = space->id;
6040 }
6041 }
6042
6043 mutex_exit(&fil_system->mutex);
6044
6045 /* Flush the spaces. It will not hurt to call fil_flush() on
6046 a non-existing space id. */
6047 for (i = 0; i < n_space_ids; i++) {
6048
6049 fil_flush(space_ids[i]);
6050 }
6051
6052 mem_free(space_ids);
6053 }
6054
6055 /** Functor to validate the space list. */
6056 struct Check {
operator ()Check6057 void operator()(const fil_node_t* elem)
6058 {
6059 ut_a(elem->open || !elem->n_pending);
6060 }
6061 };
6062
6063 /******************************************************************//**
6064 Checks the consistency of the tablespace cache.
6065 @return TRUE if ok */
6066 UNIV_INTERN
6067 ibool
fil_validate(void)6068 fil_validate(void)
6069 /*==============*/
6070 {
6071 fil_space_t* space;
6072 fil_node_t* fil_node;
6073 ulint n_open = 0;
6074 ulint i;
6075
6076 mutex_enter(&fil_system->mutex);
6077
6078 /* Look for spaces in the hash table */
6079
6080 for (i = 0; i < hash_get_n_cells(fil_system->spaces); i++) {
6081
6082 for (space = static_cast<fil_space_t*>(
6083 HASH_GET_FIRST(fil_system->spaces, i));
6084 space != 0;
6085 space = static_cast<fil_space_t*>(
6086 HASH_GET_NEXT(hash, space))) {
6087
6088 UT_LIST_VALIDATE(
6089 chain, fil_node_t, space->chain, Check());
6090
6091 for (fil_node = UT_LIST_GET_FIRST(space->chain);
6092 fil_node != 0;
6093 fil_node = UT_LIST_GET_NEXT(chain, fil_node)) {
6094
6095 if (fil_node->n_pending > 0) {
6096 ut_a(fil_node->open);
6097 }
6098
6099 if (fil_node->open) {
6100 n_open++;
6101 }
6102 }
6103 }
6104 }
6105
6106 ut_a(fil_system->n_open == n_open);
6107
6108 UT_LIST_CHECK(LRU, fil_node_t, fil_system->LRU);
6109
6110 for (fil_node = UT_LIST_GET_FIRST(fil_system->LRU);
6111 fil_node != 0;
6112 fil_node = UT_LIST_GET_NEXT(LRU, fil_node)) {
6113
6114 ut_a(fil_node->n_pending == 0);
6115 ut_a(!fil_node->being_extended);
6116 ut_a(fil_node->open);
6117 ut_a(fil_space_belongs_in_lru(fil_node->space));
6118 }
6119
6120 mutex_exit(&fil_system->mutex);
6121
6122 return(TRUE);
6123 }
6124
6125 /********************************************************************//**
6126 Returns TRUE if file address is undefined.
6127 @return TRUE if undefined */
6128 UNIV_INTERN
6129 ibool
fil_addr_is_null(fil_addr_t addr)6130 fil_addr_is_null(
6131 /*=============*/
6132 fil_addr_t addr) /*!< in: address */
6133 {
6134 return(addr.page == FIL_NULL);
6135 }
6136
6137 /********************************************************************//**
6138 Get the predecessor of a file page.
6139 @return FIL_PAGE_PREV */
6140 UNIV_INTERN
6141 ulint
fil_page_get_prev(const byte * page)6142 fil_page_get_prev(
6143 /*==============*/
6144 const byte* page) /*!< in: file page */
6145 {
6146 return(mach_read_from_4(page + FIL_PAGE_PREV));
6147 }
6148
6149 /********************************************************************//**
6150 Get the successor of a file page.
6151 @return FIL_PAGE_NEXT */
6152 UNIV_INTERN
6153 ulint
fil_page_get_next(const byte * page)6154 fil_page_get_next(
6155 /*==============*/
6156 const byte* page) /*!< in: file page */
6157 {
6158 return(mach_read_from_4(page + FIL_PAGE_NEXT));
6159 }
6160
6161 /*********************************************************************//**
6162 Sets the file page type. */
6163 UNIV_INTERN
6164 void
fil_page_set_type(byte * page,ulint type)6165 fil_page_set_type(
6166 /*==============*/
6167 byte* page, /*!< in/out: file page */
6168 ulint type) /*!< in: type */
6169 {
6170 ut_ad(page);
6171
6172 mach_write_to_2(page + FIL_PAGE_TYPE, type);
6173 }
6174
6175 /*********************************************************************//**
6176 Gets the file page type.
6177 @return type; NOTE that if the type has not been written to page, the
6178 return value not defined */
6179 UNIV_INTERN
6180 ulint
fil_page_get_type(const byte * page)6181 fil_page_get_type(
6182 /*==============*/
6183 const byte* page) /*!< in: file page */
6184 {
6185 ut_ad(page);
6186
6187 return(mach_read_from_2(page + FIL_PAGE_TYPE));
6188 }
6189
6190 /****************************************************************//**
6191 Closes the tablespace memory cache. */
6192 UNIV_INTERN
6193 void
fil_close(void)6194 fil_close(void)
6195 /*===========*/
6196 {
6197 #ifndef UNIV_HOTBACKUP
6198 /* The mutex should already have been freed. */
6199 ut_ad(fil_system->mutex.magic_n == 0);
6200 #endif /* !UNIV_HOTBACKUP */
6201
6202 hash_table_free(fil_system->spaces);
6203
6204 hash_table_free(fil_system->name_hash);
6205
6206 ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0);
6207 ut_a(UT_LIST_GET_LEN(fil_system->unflushed_spaces) == 0);
6208 ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0);
6209
6210 mem_free(fil_system);
6211
6212 fil_system = NULL;
6213 }
6214
6215 /********************************************************************//**
6216 Initializes a buffer control block when the buf_pool is created. */
6217 static
6218 void
fil_buf_block_init(buf_block_t * block,byte * frame)6219 fil_buf_block_init(
6220 /*===============*/
6221 buf_block_t* block, /*!< in: pointer to control block */
6222 byte* frame) /*!< in: pointer to buffer frame */
6223 {
6224 UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
6225
6226 block->frame = frame;
6227
6228 block->page.io_fix = BUF_IO_NONE;
6229 /* There are assertions that check for this. */
6230 block->page.buf_fix_count = 1;
6231 block->page.state = BUF_BLOCK_READY_FOR_USE;
6232
6233 page_zip_des_init(&block->page.zip);
6234 }
6235
6236 struct fil_iterator_t {
6237 pfs_os_file_t file; /*!< File handle */
6238 const char* filepath; /*!< File path name */
6239 os_offset_t start; /*!< From where to start */
6240 os_offset_t end; /*!< Where to stop */
6241 os_offset_t file_size; /*!< File size in bytes */
6242 ulint page_size; /*!< Page size */
6243 ulint n_io_buffers; /*!< Number of pages to use
6244 for IO */
6245 byte* io_buffer; /*!< Buffer to use for IO */
6246 };
6247
6248 /********************************************************************//**
6249 TODO: This can be made parallel trivially by chunking up the file and creating
6250 a callback per thread. . Main benefit will be to use multiple CPUs for
6251 checksums and compressed tables. We have to do compressed tables block by
6252 block right now. Secondly we need to decompress/compress and copy too much
6253 of data. These are CPU intensive.
6254
6255 Iterate over all the pages in the tablespace.
6256 @param iter - Tablespace iterator
6257 @param block - block to use for IO
6258 @param callback - Callback to inspect and update page contents
6259 @retval DB_SUCCESS or error code */
6260 static
6261 dberr_t
fil_iterate(const fil_iterator_t & iter,buf_block_t * block,PageCallback & callback)6262 fil_iterate(
6263 /*========*/
6264 const fil_iterator_t& iter,
6265 buf_block_t* block,
6266 PageCallback& callback)
6267 {
6268 os_offset_t offset;
6269 ulint page_no = 0;
6270 ulint space_id = callback.get_space_id();
6271 ulint n_bytes = iter.n_io_buffers * iter.page_size;
6272
6273 ut_ad(!srv_read_only_mode);
6274
6275 /* TODO: For compressed tables we do a lot of useless
6276 copying for non-index pages. Unfortunately, it is
6277 required by buf_zip_decompress() */
6278
6279 for (offset = iter.start; offset < iter.end; offset += n_bytes) {
6280
6281 byte* io_buffer = iter.io_buffer;
6282
6283 block->frame = io_buffer;
6284
6285 if (callback.get_zip_size() > 0) {
6286 page_zip_des_init(&block->page.zip);
6287 page_zip_set_size(&block->page.zip, iter.page_size);
6288 block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
6289 ut_d(block->page.zip.m_external = true);
6290 ut_ad(iter.page_size == callback.get_zip_size());
6291
6292 /* Zip IO is done in the compressed page buffer. */
6293 io_buffer = block->page.zip.data;
6294 } else {
6295 io_buffer = iter.io_buffer;
6296 }
6297
6298 /* We have to read the exact number of bytes. Otherwise the
6299 InnoDB IO functions croak on failed reads. */
6300
6301 n_bytes = static_cast<ulint>(
6302 ut_min(static_cast<os_offset_t>(n_bytes),
6303 iter.end - offset));
6304
6305 ut_ad(n_bytes > 0);
6306 ut_ad(!(n_bytes % iter.page_size));
6307
6308 if (!os_file_read(iter.file, io_buffer, offset,
6309 (ulint) n_bytes)) {
6310
6311 ib_logf(IB_LOG_LEVEL_ERROR, "os_file_read() failed");
6312
6313 return(DB_IO_ERROR);
6314 }
6315
6316 bool updated = false;
6317 os_offset_t page_off = offset;
6318 ulint n_pages_read = (ulint) n_bytes / iter.page_size;
6319
6320 for (ulint i = 0; i < n_pages_read; ++i) {
6321
6322 buf_block_set_file_page(block, space_id, page_no++);
6323
6324 dberr_t err;
6325
6326 if ((err = callback(page_off, block)) != DB_SUCCESS) {
6327
6328 return(err);
6329
6330 } else if (!updated) {
6331 updated = buf_block_get_state(block)
6332 == BUF_BLOCK_FILE_PAGE;
6333 }
6334
6335 buf_block_set_state(block, BUF_BLOCK_NOT_USED);
6336 buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
6337
6338 page_off += iter.page_size;
6339 block->frame += iter.page_size;
6340 }
6341
6342 /* A page was updated in the set, write back to disk. */
6343 if (updated
6344 && !os_file_write(
6345 iter.filepath, iter.file, io_buffer,
6346 offset, (ulint) n_bytes)) {
6347
6348 ib_logf(IB_LOG_LEVEL_ERROR, "os_file_write() failed");
6349
6350 return(DB_IO_ERROR);
6351 }
6352 }
6353
6354 return(DB_SUCCESS);
6355 }
6356
6357 /********************************************************************//**
6358 Iterate over all the pages in the tablespace.
6359 @param table - the table definiton in the server
6360 @param n_io_buffers - number of blocks to read and write together
6361 @param callback - functor that will do the page updates
6362 @return DB_SUCCESS or error code */
6363 UNIV_INTERN
6364 dberr_t
fil_tablespace_iterate(dict_table_t * table,ulint n_io_buffers,PageCallback & callback)6365 fil_tablespace_iterate(
6366 /*===================*/
6367 dict_table_t* table,
6368 ulint n_io_buffers,
6369 PageCallback& callback)
6370 {
6371 dberr_t err;
6372 pfs_os_file_t file;
6373 char* filepath;
6374
6375 ut_a(n_io_buffers > 0);
6376 ut_ad(!srv_read_only_mode);
6377
6378 DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
6379 return(DB_CORRUPTION););
6380
6381 if (DICT_TF_HAS_DATA_DIR(table->flags)) {
6382 dict_get_and_save_data_dir_path(table, false);
6383 ut_a(table->data_dir_path);
6384
6385 filepath = os_file_make_remote_pathname(
6386 table->data_dir_path, table->name, "ibd");
6387 } else {
6388 filepath = fil_make_ibd_name(table->name, false);
6389 }
6390
6391 {
6392 ibool success;
6393
6394 file = os_file_create_simple_no_error_handling(
6395 innodb_file_data_key, filepath,
6396 OS_FILE_OPEN, OS_FILE_READ_WRITE, &success);
6397
6398 DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
6399 {
6400 static bool once;
6401
6402 if (!once || ut_rnd_interval(0, 10) == 5) {
6403 once = true;
6404 success = FALSE;
6405 os_file_close(file);
6406 }
6407 });
6408
6409 if (!success) {
6410 /* The following call prints an error message */
6411 os_file_get_last_error(true);
6412
6413 ib_logf(IB_LOG_LEVEL_ERROR,
6414 "Trying to import a tablespace, but could not "
6415 "open the tablespace file %s", filepath);
6416
6417 mem_free(filepath);
6418
6419 return(DB_TABLESPACE_NOT_FOUND);
6420
6421 } else {
6422 err = DB_SUCCESS;
6423 }
6424 }
6425
6426 callback.set_file(filepath, file);
6427
6428 os_offset_t file_size = os_file_get_size(file);
6429 ut_a(file_size != (os_offset_t) -1);
6430
6431 /* The block we will use for every physical page */
6432 buf_block_t block;
6433
6434 memset(&block, 0x0, sizeof(block));
6435
6436 /* Allocate a page to read in the tablespace header, so that we
6437 can determine the page size and zip_size (if it is compressed).
6438 We allocate an extra page in case it is a compressed table. One
6439 page is to ensure alignement. */
6440
6441 void* page_ptr = mem_alloc(3 * UNIV_PAGE_SIZE);
6442 byte* page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
6443
6444 fil_buf_block_init(&block, page);
6445
6446 /* Read the first page and determine the page and zip size. */
6447
6448 if (!os_file_read(file, page, 0, UNIV_PAGE_SIZE)) {
6449
6450 err = DB_IO_ERROR;
6451
6452 } else if ((err = callback.init(file_size, &block)) == DB_SUCCESS) {
6453 fil_iterator_t iter;
6454
6455 iter.file = file;
6456 iter.start = 0;
6457 iter.end = file_size;
6458 iter.filepath = filepath;
6459 iter.file_size = file_size;
6460 iter.n_io_buffers = n_io_buffers;
6461 iter.page_size = callback.get_page_size();
6462
6463 /* Compressed pages can't be optimised for block IO for now.
6464 We do the IMPORT page by page. */
6465
6466 if (callback.get_zip_size() > 0) {
6467 iter.n_io_buffers = 1;
6468 ut_a(iter.page_size == callback.get_zip_size());
6469 }
6470
6471 /** Add an extra page for compressed page scratch area. */
6472
6473 void* io_buffer = mem_alloc(
6474 (2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
6475
6476 iter.io_buffer = static_cast<byte*>(
6477 ut_align(io_buffer, UNIV_PAGE_SIZE));
6478
6479 err = fil_iterate(iter, &block, callback);
6480
6481 mem_free(io_buffer);
6482 }
6483
6484 if (err == DB_SUCCESS) {
6485
6486 ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk");
6487
6488 if (!os_file_flush(file)) {
6489 ib_logf(IB_LOG_LEVEL_INFO, "os_file_flush() failed!");
6490 err = DB_IO_ERROR;
6491 } else {
6492 ib_logf(IB_LOG_LEVEL_INFO, "Sync to disk - done!");
6493 }
6494 }
6495
6496 os_file_close(file);
6497
6498 mem_free(page_ptr);
6499 mem_free(filepath);
6500
6501 return(err);
6502 }
6503
6504 /**
6505 Set the tablespace compressed table size.
6506 @return DB_SUCCESS if it is valie or DB_CORRUPTION if not */
6507 dberr_t
set_zip_size(const buf_frame_t * page)6508 PageCallback::set_zip_size(const buf_frame_t* page) UNIV_NOTHROW
6509 {
6510 m_zip_size = fsp_header_get_zip_size(page);
6511
6512 if (!ut_is_2pow(m_zip_size) || m_zip_size > UNIV_ZIP_SIZE_MAX) {
6513 return(DB_CORRUPTION);
6514 }
6515
6516 return(DB_SUCCESS);
6517 }
6518
6519 /********************************************************************//**
6520 Delete the tablespace file and any related files like .cfg.
6521 This should not be called for temporary tables. */
6522 UNIV_INTERN
6523 void
fil_delete_file(const char * ibd_name)6524 fil_delete_file(
6525 /*============*/
6526 const char* ibd_name) /*!< in: filepath of the ibd
6527 tablespace */
6528 {
6529 /* Force a delete of any stale .ibd files that are lying around. */
6530
6531 ib_logf(IB_LOG_LEVEL_INFO, "Deleting %s", ibd_name);
6532
6533 os_file_delete_if_exists(innodb_file_data_key, ibd_name);
6534
6535 char* cfg_name = fil_make_cfg_name(ibd_name);
6536
6537 os_file_delete_if_exists(innodb_file_data_key, cfg_name);
6538
6539 mem_free(cfg_name);
6540 }
6541
6542 /**
6543 Iterate over all the spaces in the space list and fetch the
6544 tablespace names. It will return a copy of the name that must be
6545 freed by the caller using: delete[].
6546 @return DB_SUCCESS if all OK. */
6547 UNIV_INTERN
6548 dberr_t
fil_get_space_names(space_name_list_t & space_name_list)6549 fil_get_space_names(
6550 /*================*/
6551 space_name_list_t& space_name_list)
6552 /*!< in/out: List to append to */
6553 {
6554 fil_space_t* space;
6555 dberr_t err = DB_SUCCESS;
6556
6557 mutex_enter(&fil_system->mutex);
6558
6559 for (space = UT_LIST_GET_FIRST(fil_system->space_list);
6560 space != NULL;
6561 space = UT_LIST_GET_NEXT(space_list, space)) {
6562
6563 if (space->purpose == FIL_TABLESPACE) {
6564 ulint len;
6565 char* name;
6566
6567 len = strlen(space->name);
6568 name = new(std::nothrow) char[len + 1];
6569
6570 if (name == 0) {
6571 /* Caller to free elements allocated so far. */
6572 err = DB_OUT_OF_MEMORY;
6573 break;
6574 }
6575
6576 memcpy(name, space->name, len);
6577 name[len] = 0;
6578
6579 space_name_list.push_back(name);
6580 }
6581 }
6582
6583 mutex_exit(&fil_system->mutex);
6584
6585 return(err);
6586 }
6587
6588 /** Generate redo log for swapping two .ibd files
6589 @param[in] old_table old table
6590 @param[in] new_table new table
6591 @param[in] tmp_name temporary table name
6592 @param[in,out] mtr mini-transaction
6593 @return innodb error code */
6594 UNIV_INTERN
6595 dberr_t
fil_mtr_rename_log(const dict_table_t * old_table,const dict_table_t * new_table,const char * tmp_name,mtr_t * mtr)6596 fil_mtr_rename_log(
6597 const dict_table_t* old_table,
6598 const dict_table_t* new_table,
6599 const char* tmp_name,
6600 mtr_t* mtr)
6601 {
6602 dberr_t err = DB_SUCCESS;
6603 char* old_path;
6604
6605 /* If neither table is file-per-table,
6606 there will be no renaming of files. */
6607 if (old_table->space == TRX_SYS_SPACE
6608 && new_table->space == TRX_SYS_SPACE) {
6609 return(DB_SUCCESS);
6610 }
6611
6612 if (DICT_TF_HAS_DATA_DIR(old_table->flags)) {
6613 old_path = os_file_make_remote_pathname(
6614 old_table->data_dir_path, old_table->name, "ibd");
6615 } else {
6616 old_path = fil_make_ibd_name(old_table->name, false);
6617 }
6618 if (old_path == NULL) {
6619 return(DB_OUT_OF_MEMORY);
6620 }
6621
6622 if (old_table->space != TRX_SYS_SPACE) {
6623 char* tmp_path;
6624
6625 if (DICT_TF_HAS_DATA_DIR(old_table->flags)) {
6626 tmp_path = os_file_make_remote_pathname(
6627 old_table->data_dir_path, tmp_name, "ibd");
6628 }
6629 else {
6630 tmp_path = fil_make_ibd_name(tmp_name, false);
6631 }
6632
6633 if (tmp_path == NULL) {
6634 mem_free(old_path);
6635 return(DB_OUT_OF_MEMORY);
6636 }
6637
6638 /* Temp filepath must not exist. */
6639 err = fil_rename_tablespace_check(
6640 old_table->space, old_path, tmp_path,
6641 dict_table_is_discarded(old_table));
6642 mem_free(tmp_path);
6643 if (err != DB_SUCCESS) {
6644 mem_free(old_path);
6645 return(err);
6646 }
6647
6648 fil_op_write_log(MLOG_FILE_RENAME, old_table->space,
6649 0, 0, old_table->name, tmp_name, mtr);
6650 }
6651
6652 if (new_table->space != TRX_SYS_SPACE) {
6653
6654 /* Destination filepath must not exist unless this ALTER
6655 TABLE starts and ends with a file_per-table tablespace. */
6656 if (old_table->space == TRX_SYS_SPACE) {
6657 char* new_path = NULL;
6658
6659 if (DICT_TF_HAS_DATA_DIR(new_table->flags)) {
6660 new_path = os_file_make_remote_pathname(
6661 new_table->data_dir_path,
6662 new_table->name, "ibd");
6663 }
6664 else {
6665 new_path = fil_make_ibd_name(
6666 new_table->name, false);
6667 }
6668
6669 if (new_path == NULL) {
6670 mem_free(old_path);
6671 return(DB_OUT_OF_MEMORY);
6672 }
6673
6674 err = fil_rename_tablespace_check(
6675 new_table->space, new_path, old_path,
6676 dict_table_is_discarded(new_table));
6677 mem_free(new_path);
6678 if (err != DB_SUCCESS) {
6679 mem_free(old_path);
6680 return(err);
6681 }
6682 }
6683
6684 fil_op_write_log(MLOG_FILE_RENAME, new_table->space,
6685 0, 0, new_table->name, old_table->name, mtr);
6686
6687 }
6688
6689 mem_free(old_path);
6690
6691 return(err);
6692 }
6693