1 /*****************************************************************************
2
3 Copyright (c) 1995, 2021, Oracle and/or its affiliates.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
24
25 *****************************************************************************/
26
27 /**************************************************//**
28 @file fil/fil0fil.cc
29 The tablespace memory cache
30
31 Created 10/25/1995 Heikki Tuuri
32 *******************************************************/
33
34 #include "ha_prototypes.h"
35
36 #ifndef UNIV_HOTBACKUP
37 #include "btr0btr.h"
38 #include "buf0buf.h"
39 #include "dict0boot.h"
40 #include "dict0dict.h"
41 #include "fsp0file.h"
42 #include "fsp0fsp.h"
43 #include "fsp0space.h"
44 #include "fsp0sysspace.h"
45 #include "hash0hash.h"
46 #include "log0recv.h"
47 #include "mach0data.h"
48 #include "mem0mem.h"
49 #include "mtr0log.h"
50 #include "os0file.h"
51 #include "page0zip.h"
52 #include "row0mysql.h"
53 #include "row0trunc.h"
54 # include "buf0lru.h"
55 # include "ibuf0ibuf.h"
56 # include "os0event.h"
57 # include "sync0sync.h"
58 #endif /* !UNIV_HOTBACKUP */
59 #include "buf0flu.h"
60 #include "srv0start.h"
61 #include "trx0purge.h"
62 #include "ut0new.h"
63 #include "btr0sea.h"
64 #include "log0log.h"
65
66 /** Tries to close a file in the LRU list. The caller must hold the fil_sys
67 mutex.
68 @return true if success, false if should retry later; since i/o's
69 generally complete in < 100 ms, and as InnoDB writes at most 128 pages
70 from the buffer pool in a batch, and then immediately flushes the
71 files, there is a good chance that the next time we find a suitable
72 node from the LRU list.
73 @param[in] print_info if true, prints information why it
74 cannot close a file */
75 static
76 bool
77 fil_try_to_close_file_in_LRU(bool print_info);
78
79 /*
80 IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
81 =============================================
82
83 The tablespace cache is responsible for providing fast read/write access to
84 tablespaces and logs of the database. File creation and deletion is done
85 in other modules which know more of the logic of the operation, however.
86
87 A tablespace consists of a chain of files. The size of the files does not
88 have to be divisible by the database block size, because we may just leave
89 the last incomplete block unused. When a new file is appended to the
90 tablespace, the maximum size of the file is also specified. At the moment,
91 we think that it is best to extend the file to its maximum size already at
92 the creation of the file, because then we can avoid dynamically extending
93 the file when more space is needed for the tablespace.
94
95 A block's position in the tablespace is specified with a 32-bit unsigned
96 integer. The files in the chain are thought to be catenated, and the block
97 corresponding to an address n is the nth block in the catenated file (where
98 the first block is named the 0th block, and the incomplete block fragments
99 at the end of files are not taken into account). A tablespace can be extended
100 by appending a new file at the end of the chain.
101
102 Our tablespace concept is similar to the one of Oracle.
103
104 To acquire more speed in disk transfers, a technique called disk striping is
105 sometimes used. This means that logical block addresses are divided in a
106 round-robin fashion across several disks. Windows NT supports disk striping,
107 so there we do not need to support it in the database. Disk striping is
108 implemented in hardware in RAID disks. We conclude that it is not necessary
109 to implement it in the database. Oracle 7 does not support disk striping,
110 either.
111
112 Another trick used at some database sites is replacing tablespace files by
113 raw disks, that is, the whole physical disk drive, or a partition of it, is
114 opened as a single file, and it is accessed through byte offsets calculated
115 from the start of the disk or the partition. This is recommended in some
116 books on database tuning to achieve more speed in i/o. Using raw disk
117 certainly prevents the OS from fragmenting disk space, but it is not clear
118 if it really adds speed. We measured on the Pentium 100 MHz + NT + NTFS file
119 system + EIDE Conner disk only a negligible difference in speed when reading
120 from a file, versus reading from a raw disk.
121
122 To have fast access to a tablespace or a log file, we put the data structures
123 to a hash table. Each tablespace and log file is given an unique 32-bit
124 identifier.
125
126 Some operating systems do not support many open files at the same time,
127 though NT seems to tolerate at least 900 open files. Therefore, we put the
128 open files in an LRU-list. If we need to open another file, we may close the
129 file at the end of the LRU-list. When an i/o-operation is pending on a file,
130 the file cannot be closed. We take the file nodes with pending i/o-operations
131 out of the LRU-list and keep a count of pending operations. When an operation
132 completes, we decrement the count and return the file node to the LRU-list if
133 the count drops to zero. */
134
135 /** This tablespace name is used internally during recovery to open a
136 general tablespace before the data dictionary are recovered and available. */
137 const char general_space_name[] = "innodb_general";
138
139 /** Reference to the server data directory. Usually it is the
140 current working directory ".", but in the MySQL Embedded Server Library
141 it is an absolute path. */
142 const char* fil_path_to_mysql_datadir;
143 Folder folder_mysql_datadir;
144
145 /** Common InnoDB file extentions */
146 const char* dot_ext[] = { "", ".ibd", ".isl", ".cfg", ".cfp" };
147
148 /** The number of fsyncs done to the log */
149 ulint fil_n_log_flushes = 0;
150
151 /** Number of pending redo log flushes */
152 ulint fil_n_pending_log_flushes = 0;
153 /** Number of pending tablespace flushes */
154 ulint fil_n_pending_tablespace_flushes = 0;
155
156 /** Number of files currently open */
157 ulint fil_n_file_opened = 0;
158
159 /** The null file address */
160 fil_addr_t fil_addr_null = {FIL_NULL, 0};
161
162 /** The tablespace memory cache; also the totality of logs (the log
163 data space) is stored here; below we talk about tablespaces, but also
164 the ib_logfiles form a 'space' and it is handled here */
165 struct fil_system_t {
166 #ifndef UNIV_HOTBACKUP
167 ib_mutex_t mutex; /*!< The mutex protecting the cache */
168 #endif /* !UNIV_HOTBACKUP */
169 hash_table_t* spaces; /*!< The hash table of spaces in the
170 system; they are hashed on the space
171 id */
172 hash_table_t* name_hash; /*!< hash table based on the space
173 name */
174 UT_LIST_BASE_NODE_T(fil_node_t) LRU;
175 /*!< base node for the LRU list of the
176 most recently used open files with no
177 pending i/o's; if we start an i/o on
178 the file, we first remove it from this
179 list, and return it to the start of
180 the list when the i/o ends;
181 log files and the system tablespace are
182 not put to this list: they are opened
183 after the startup, and kept open until
184 shutdown */
185 UT_LIST_BASE_NODE_T(fil_space_t) unflushed_spaces;
186 /*!< base node for the list of those
187 tablespaces whose files contain
188 unflushed writes; those spaces have
189 at least one file node where
190 modification_counter > flush_counter */
191 ulint n_open; /*!< number of files currently open */
192 ulint max_n_open; /*!< n_open is not allowed to exceed
193 this */
194 int64_t modification_counter;/*!< when we write to a file we
195 increment this by one */
196 ulint max_assigned_id;/*!< maximum space id in the existing
197 tables, or assigned during the time
198 mysqld has been up; at an InnoDB
199 startup we scan the data dictionary
200 and set here the maximum of the
201 space id's of the tables there */
202 UT_LIST_BASE_NODE_T(fil_space_t) space_list;
203 /*!< list of all file spaces */
204 UT_LIST_BASE_NODE_T(fil_space_t) named_spaces;
205 /*!< list of all file spaces
206 for which a MLOG_FILE_NAME
207 record has been written since
208 the latest redo log checkpoint.
209 Protected only by log_sys->mutex. */
210 bool space_id_reuse_warned;
211 /* !< true if fil_space_create()
212 has issued a warning about
213 potential space_id reuse */
214 };
215
216 /** The tablespace memory cache. This variable is NULL before the module is
217 initialized. */
218 static fil_system_t* fil_system = NULL;
219
220 #ifdef UNIV_HOTBACKUP
221 static ulint srv_data_read;
222 static ulint srv_data_written;
223 #endif /* UNIV_HOTBACKUP */
224
225 /** Determine if user has explicitly disabled fsync(). */
226 #ifndef _WIN32
227 # define fil_buffering_disabled(s) \
228 ((s)->purpose == FIL_TYPE_TABLESPACE \
229 && srv_unix_file_flush_method \
230 == SRV_UNIX_O_DIRECT_NO_FSYNC)
231 #else /* _WIN32 */
232 # define fil_buffering_disabled(s) (0)
233 #endif /* __WIN32 */
234
235 /** Determine if the space id is a user tablespace id or not.
236 @param[in] space_id Space ID to check
237 @return true if it is a user tablespace ID */
238 UNIV_INLINE
239 bool
fil_is_user_tablespace_id(ulint space_id)240 fil_is_user_tablespace_id(
241 ulint space_id)
242 {
243 return(!srv_is_undo_tablespace(space_id)
244 && space_id != srv_tmp_space.space_id());
245 }
246
247 #ifdef UNIV_DEBUG
248 /** Try fil_validate() every this many times */
249 # define FIL_VALIDATE_SKIP 17
250
251 /******************************************************************//**
252 Checks the consistency of the tablespace cache some of the time.
253 @return true if ok or the check was skipped */
254 static
255 bool
fil_validate_skip(void)256 fil_validate_skip(void)
257 /*===================*/
258 {
259 /** The fil_validate() call skip counter. Use a signed type
260 because of the race condition below. */
261 static int fil_validate_count = FIL_VALIDATE_SKIP;
262
263 /* There is a race condition below, but it does not matter,
264 because this call is only for heuristic purposes. We want to
265 reduce the call frequency of the costly fil_validate() check
266 in debug builds. */
267 if (--fil_validate_count > 0) {
268 return(true);
269 }
270
271 fil_validate_count = FIL_VALIDATE_SKIP;
272 return(fil_validate());
273 }
274 #endif /* UNIV_DEBUG */
275
276 /********************************************************************//**
277 Determines if a file node belongs to the least-recently-used list.
278 @return true if the file belongs to fil_system->LRU mutex. */
279 UNIV_INLINE
280 bool
fil_space_belongs_in_lru(const fil_space_t * space)281 fil_space_belongs_in_lru(
282 /*=====================*/
283 const fil_space_t* space) /*!< in: file space */
284 {
285 switch (space->purpose) {
286 case FIL_TYPE_LOG:
287 return(false);
288 case FIL_TYPE_TABLESPACE:
289 case FIL_TYPE_TEMPORARY:
290 case FIL_TYPE_IMPORT:
291 return(fil_is_user_tablespace_id(space->id));
292 }
293
294 ut_ad(0);
295 return(false);
296 }
297
298 /********************************************************************//**
299 NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
300
301 Prepares a file node for i/o. Opens the file if it is closed. Updates the
302 pending i/o's field in the node and the system appropriately. Takes the node
303 off the LRU list if it is in the LRU list. The caller must hold the fil_sys
304 mutex.
305 @return false if the file can't be opened, otherwise true */
306 static
307 bool
308 fil_node_prepare_for_io(
309 /*====================*/
310 fil_node_t* node, /*!< in: file node */
311 fil_system_t* system, /*!< in: tablespace memory cache */
312 fil_space_t* space); /*!< in: space */
313
314 /**
315 Updates the data structures when an i/o operation finishes. Updates the
316 pending i/o's field in the node appropriately.
317 @param[in,out] node file node
318 @param[in,out] system tablespace instance
319 @param[in] type IO context */
320 static
321 void
322 fil_node_complete_io(
323 fil_node_t* node,
324 fil_system_t* system,
325 const IORequest& type);
326
327 /** Reads data from a space to a buffer. Remember that the possible incomplete
328 blocks at the end of file are ignored: they are not taken into account when
329 calculating the byte offset within a space.
330 @param[in] page_id page id
331 @param[in] page_size page size
332 @param[in] byte_offset remainder of offset in bytes; in aio this
333 must be divisible by the OS block size
334 @param[in] len how many bytes to read; this must not cross a
335 file boundary; in aio this must be a block size multiple
336 @param[in,out] buf buffer where to store data read; in aio this
337 must be appropriately aligned
338 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
339 i/o on a tablespace which does not exist */
340 UNIV_INLINE
341 dberr_t
fil_read(const page_id_t & page_id,const page_size_t & page_size,ulint byte_offset,ulint len,void * buf)342 fil_read(
343 const page_id_t& page_id,
344 const page_size_t& page_size,
345 ulint byte_offset,
346 ulint len,
347 void* buf)
348 {
349 return(fil_io(IORequestRead, true, page_id, page_size,
350 byte_offset, len, buf, NULL));
351 }
352
353 /** Writes data to a space from a buffer. Remember that the possible incomplete
354 blocks at the end of file are ignored: they are not taken into account when
355 calculating the byte offset within a space.
356 @param[in] page_id page id
357 @param[in] page_size page size
358 @param[in] byte_offset remainder of offset in bytes; in aio this
359 must be divisible by the OS block size
360 @param[in] len how many bytes to write; this must not cross
361 a file boundary; in aio this must be a block size multiple
362 @param[in] buf buffer from which to write; in aio this must
363 be appropriately aligned
364 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
365 i/o on a tablespace which does not exist */
366 UNIV_INLINE
367 dberr_t
fil_write(const page_id_t & page_id,const page_size_t & page_size,ulint byte_offset,ulint len,void * buf)368 fil_write(
369 const page_id_t& page_id,
370 const page_size_t& page_size,
371 ulint byte_offset,
372 ulint len,
373 void* buf)
374 {
375 ut_ad(!srv_read_only_mode);
376
377 return(fil_io(IORequestWrite, true, page_id, page_size,
378 byte_offset, len, buf, NULL));
379 }
380
381 /*******************************************************************//**
382 Returns the table space by a given id, NULL if not found. */
383 UNIV_INLINE
384 fil_space_t*
fil_space_get_by_id(ulint id)385 fil_space_get_by_id(
386 /*================*/
387 ulint id) /*!< in: space id */
388 {
389 fil_space_t* space;
390
391 ut_ad(mutex_own(&fil_system->mutex));
392
393 HASH_SEARCH(hash, fil_system->spaces, id,
394 fil_space_t*, space,
395 ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
396 space->id == id);
397
398 return(space);
399 }
400
401 /*******************************************************************//**
402 Returns the table space by a given name, NULL if not found. */
403 UNIV_INLINE
404 fil_space_t*
fil_space_get_by_name(const char * name)405 fil_space_get_by_name(
406 /*==================*/
407 const char* name) /*!< in: space name */
408 {
409 fil_space_t* space;
410 ulint fold;
411
412 ut_ad(mutex_own(&fil_system->mutex));
413
414 fold = ut_fold_string(name);
415
416 HASH_SEARCH(name_hash, fil_system->name_hash, fold,
417 fil_space_t*, space,
418 ut_ad(space->magic_n == FIL_SPACE_MAGIC_N),
419 !strcmp(name, space->name));
420
421 return(space);
422 }
423
424 /** Look up a tablespace.
425 The caller should hold an InnoDB table lock or a MDL that prevents
426 the tablespace from being dropped during the operation,
427 or the caller should be in single-threaded crash recovery mode
428 (no user connections that could drop tablespaces).
429 If this is not the case, fil_space_acquire() and fil_space_release()
430 should be used instead.
431 @param[in] id tablespace ID
432 @return tablespace, or NULL if not found */
433 fil_space_t*
fil_space_get(ulint id)434 fil_space_get(
435 ulint id)
436 {
437 mutex_enter(&fil_system->mutex);
438 fil_space_t* space = fil_space_get_by_id(id);
439 mutex_exit(&fil_system->mutex);
440 ut_ad(space == NULL || space->purpose != FIL_TYPE_LOG);
441 return(space);
442 }
443 #ifndef UNIV_HOTBACKUP
444 /** Returns the latch of a file space.
445 @param[in] id space id
446 @param[out] flags tablespace flags
447 @return latch protecting storage allocation */
448 rw_lock_t*
fil_space_get_latch(ulint id,ulint * flags)449 fil_space_get_latch(
450 ulint id,
451 ulint* flags)
452 {
453 fil_space_t* space;
454
455 ut_ad(fil_system);
456
457 mutex_enter(&fil_system->mutex);
458
459 space = fil_space_get_by_id(id);
460
461 ut_a(space);
462
463 if (flags) {
464 *flags = space->flags;
465 }
466
467 mutex_exit(&fil_system->mutex);
468
469 return(&(space->latch));
470 }
471
472 #ifdef UNIV_DEBUG
473 /** Gets the type of a file space.
474 @param[in] id tablespace identifier
475 @return file type */
476 fil_type_t
fil_space_get_type(ulint id)477 fil_space_get_type(
478 ulint id)
479 {
480 fil_space_t* space;
481
482 ut_ad(fil_system);
483
484 mutex_enter(&fil_system->mutex);
485
486 space = fil_space_get_by_id(id);
487
488 ut_a(space);
489
490 mutex_exit(&fil_system->mutex);
491
492 return(space->purpose);
493 }
494 #endif /* UNIV_DEBUG */
495
496 /** Note that a tablespace has been imported.
497 It is initially marked as FIL_TYPE_IMPORT so that no logging is
498 done during the import process when the space ID is stamped to each page.
499 Now we change it to FIL_SPACE_TABLESPACE to start redo and undo logging.
500 NOTE: temporary tablespaces are never imported.
501 @param[in] id tablespace identifier */
502 void
fil_space_set_imported(ulint id)503 fil_space_set_imported(
504 ulint id)
505 {
506 ut_ad(fil_system != NULL);
507
508 mutex_enter(&fil_system->mutex);
509
510 fil_space_t* space = fil_space_get_by_id(id);
511
512 ut_ad(space->purpose == FIL_TYPE_IMPORT);
513 space->purpose = FIL_TYPE_TABLESPACE;
514
515 mutex_exit(&fil_system->mutex);
516 }
517 #endif /* !UNIV_HOTBACKUP */
518
519 /**********************************************************************//**
520 Checks if all the file nodes in a space are flushed. The caller must hold
521 the fil_system mutex.
522 @return true if all are flushed */
523 static
524 bool
fil_space_is_flushed(fil_space_t * space)525 fil_space_is_flushed(
526 /*=================*/
527 fil_space_t* space) /*!< in: space */
528 {
529 ut_ad(mutex_own(&fil_system->mutex));
530
531 for (const fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
532 node != NULL;
533 node = UT_LIST_GET_NEXT(chain, node)) {
534
535 if (node->modification_counter > node->flush_counter) {
536
537 ut_ad(!fil_buffering_disabled(space));
538 return(false);
539 }
540 }
541
542 return(true);
543 }
544
545 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
546
547 #include <sys/ioctl.h>
548 /** FusionIO atomic write control info */
549 #define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint)
550
551 /**
552 Try and enable FusionIO atomic writes.
553 @param[in] file OS file handle
554 @return true if successful */
555 bool
fil_fusionio_enable_atomic_write(pfs_os_file_t file)556 fil_fusionio_enable_atomic_write(pfs_os_file_t file)
557 {
558 if (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT) {
559
560 uint atomic = 1;
561 ut_a(file.m_file != -1);
562 if (ioctl(file.m_file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic) != -1) {
563
564 return(true);
565 }
566 }
567
568 return(false);
569 }
570 #endif /* !NO_FALLOCATE && UNIV_LINUX */
571
572 /** Append a file to the chain of files of a space.
573 @param[in] name file name of a file that is not open
574 @param[in] size file size in entire database blocks
575 @param[in,out] space tablespace from fil_space_create()
576 @param[in] is_raw whether this is a raw device or partition
577 @param[in] punch_hole true if supported for this node
578 @param[in] atomic_write true if the file has atomic write enabled
579 @param[in] max_pages maximum number of pages in file,
580 ULINT_MAX means the file size is unlimited.
581 @return pointer to the file name
582 @retval NULL if error */
583 static
584 fil_node_t*
fil_node_create_low(const char * name,ulint size,fil_space_t * space,bool is_raw,bool punch_hole,bool atomic_write,ulint max_pages=ULINT_MAX)585 fil_node_create_low(
586 const char* name,
587 ulint size,
588 fil_space_t* space,
589 bool is_raw,
590 bool punch_hole,
591 bool atomic_write,
592 ulint max_pages = ULINT_MAX)
593 {
594 fil_node_t* node;
595
596 ut_ad(name != NULL);
597 ut_ad(fil_system != NULL);
598
599 if (space == NULL) {
600 return(NULL);
601 }
602
603 node = reinterpret_cast<fil_node_t*>(ut_zalloc_nokey(sizeof(*node)));
604
605 node->name = mem_strdup(name);
606
607 ut_a(!is_raw || srv_start_raw_disk_in_use);
608
609 node->sync_event = os_event_create("fsync_event");
610
611 node->is_raw_disk = is_raw;
612
613 node->size = size;
614
615 node->flush_size = size;
616
617 node->magic_n = FIL_NODE_MAGIC_N;
618
619 node->init_size = size;
620 node->max_size = max_pages;
621
622 mutex_enter(&fil_system->mutex);
623
624 space->size += size;
625
626 node->space = space;
627
628 os_file_stat_t stat_info;
629
630 #ifdef UNIV_DEBUG
631 dberr_t err =
632 #endif /* UNIV_DEBUG */
633
634 os_file_get_status(
635 node->name, &stat_info, false,
636 fsp_is_system_temporary(space->id) ? true : srv_read_only_mode);
637
638 ut_ad(err == DB_SUCCESS);
639
640 node->block_size = stat_info.block_size;
641
642 /* In this debugging mode, we can overcome the limitation of some
643 OSes like Windows that support Punch Hole but have a hole size
644 effectively too large. By setting the block size to be half the
645 page size, we can bypass one of the checks that would normally
646 turn Page Compression off. This execution mode allows compression
647 to be tested even when full punch hole support is not available. */
648 DBUG_EXECUTE_IF("ignore_punch_hole",
649 node->block_size = ut_min(stat_info.block_size,
650 static_cast<size_t>(UNIV_PAGE_SIZE / 2));
651 );
652
653 if (!IORequest::is_punch_hole_supported()
654 || !punch_hole
655 || node->block_size >= srv_page_size) {
656
657 fil_no_punch_hole(node);
658 } else {
659 node->punch_hole = punch_hole;
660 }
661
662 node->atomic_write = atomic_write;
663
664 UT_LIST_ADD_LAST(space->chain, node);
665 mutex_exit(&fil_system->mutex);
666
667 return(node);
668 }
669
670 /** Appends a new file to the chain of files of a space. File must be closed.
671 @param[in] name file name (file must be closed)
672 @param[in] size file size in database blocks, rounded downwards to
673 an integer
674 @param[in,out] space space where to append
675 @param[in] is_raw true if a raw device or a raw disk partition
676 @param[in] atomic_write true if the file has atomic write enabled
677 @param[in] max_pages maximum number of pages in file,
678 ULINT_MAX means the file size is unlimited.
679 @return pointer to the file name
680 @retval NULL if error */
681 char*
fil_node_create(const char * name,ulint size,fil_space_t * space,bool is_raw,bool atomic_write,ulint max_pages)682 fil_node_create(
683 const char* name,
684 ulint size,
685 fil_space_t* space,
686 bool is_raw,
687 bool atomic_write,
688 ulint max_pages)
689 {
690 fil_node_t* node;
691
692 node = fil_node_create_low(
693 name, size, space, is_raw, IORequest::is_punch_hole_supported(),
694 atomic_write, max_pages);
695
696 return(node == NULL ? NULL : node->name);
697 }
698
699 /** Open a file node of a tablespace.
700 The caller must own the fil_system mutex.
701 @param[in,out] node File node
702 @return false if the file can't be opened, otherwise true */
703 static
704 bool
fil_node_open_file(fil_node_t * node)705 fil_node_open_file(
706 fil_node_t* node)
707 {
708 os_offset_t size_bytes;
709 bool success;
710 byte* buf2;
711 byte* page;
712 ulint space_id;
713 ulint flags;
714 ulint min_size;
715 bool read_only_mode;
716 fil_space_t* space = node->space;
717
718 ut_ad(mutex_own(&fil_system->mutex));
719 ut_a(node->n_pending == 0);
720 ut_a(!node->is_open);
721
722 read_only_mode = !fsp_is_system_temporary(space->id)
723 && srv_read_only_mode;
724
725 if (node->size == 0
726 || (space->purpose == FIL_TYPE_TABLESPACE
727 && node == UT_LIST_GET_FIRST(space->chain)
728 && !undo::Truncate::was_tablespace_truncated(space->id)
729 && srv_startup_is_before_trx_rollback_phase)) {
730 /* We do not know the size of the file yet. First we
731 open the file in the normal mode, no async I/O here,
732 for simplicity. Then do some checks, and close the
733 file again. NOTE that we could not use the simple
734 file read function os_file_read() in Windows to read
735 from a file opened for async I/O! */
736
737 retry:
738 node->handle = os_file_create_simple_no_error_handling(
739 innodb_data_file_key, node->name, OS_FILE_OPEN,
740 OS_FILE_READ_ONLY, read_only_mode, &success);
741
742 if (!success) {
743 /* The following call prints an error message */
744 ulint err = os_file_get_last_error(true);
745 if (err == EMFILE + 100) {
746 if (fil_try_to_close_file_in_LRU(true))
747 goto retry;
748 }
749
750 ib::warn() << "Cannot open '" << node->name << "'."
751 " Have you deleted .ibd files under a"
752 " running mysqld server?";
753
754 return(false);
755 }
756
757 size_bytes = os_file_get_size(node->handle);
758 ut_a(size_bytes != (os_offset_t) -1);
759
760 #ifdef UNIV_HOTBACKUP
761 if (space->id == 0) {
762 node->size = (ulint) (size_bytes / UNIV_PAGE_SIZE);
763 os_file_close(node->handle);
764 goto add_size;
765 }
766 #endif /* UNIV_HOTBACKUP */
767 ut_a(space->purpose != FIL_TYPE_LOG);
768
769 /* Read the first page of the tablespace */
770
771 const ulint buf2_size = recv_recovery_is_on()
772 ? (2 * UNIV_PAGE_SIZE) : UNIV_PAGE_SIZE;
773 buf2 = static_cast<byte*>(
774 ut_malloc_nokey(buf2_size + UNIV_PAGE_SIZE));
775
776 /* Align the memory for file i/o if we might have O_DIRECT
777 set */
778 page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
779 ut_ad(page == page_align(page));
780
781 IORequest request(IORequest::READ);
782
783 success = os_file_read(
784 request, node->handle, page, 0, buf2_size);
785
786 space_id = fsp_header_get_space_id(page);
787 flags = fsp_header_get_flags(page);
788
789 /* Close the file now that we have read the space id from it */
790
791 os_file_close(node->handle);
792
793 const page_size_t page_size(flags);
794
795 min_size = FIL_IBD_FILE_INITIAL_SIZE * page_size.physical();
796
797 if (size_bytes < min_size) {
798
799 ib::error() << "The size of tablespace file "
800 << node->name << " is only " << size_bytes
801 << ", should be at least " << min_size << "!";
802
803 ut_error;
804 }
805
806 if (space_id != space->id) {
807 ib::fatal() << "Tablespace id is " << space->id
808 << " in the data dictionary but in file "
809 << node->name << " it is " << space_id << "!";
810 }
811
812 const page_size_t space_page_size(space->flags);
813
814 if (!page_size.equals_to(space_page_size)) {
815 ib::fatal() << "Tablespace file " << node->name
816 << " has page size " << page_size
817 << " (flags=" << ib::hex(flags) << ") but the"
818 " data dictionary expects page size "
819 << space_page_size << " (flags="
820 << ib::hex(space->flags) << ")!";
821 }
822
823 if (space->flags != flags) {
824
825 ib::fatal()
826 << "Table flags are "
827 << ib::hex(space->flags) << " in the data"
828 " dictionary but the flags in file "
829 << node->name << " are " << ib::hex(flags)
830 << "!";
831 }
832
833 {
834 ulint size = fsp_header_get_field(
835 page, FSP_SIZE);
836 ulint free_limit = fsp_header_get_field(
837 page, FSP_FREE_LIMIT);
838 ulint free_len = flst_get_len(
839 FSP_HEADER_OFFSET + FSP_FREE + page);
840 ut_ad(space->free_limit == 0
841 || space->free_limit == free_limit);
842 ut_ad(space->free_len == 0
843 || space->free_len == free_len);
844 space->size_in_header = size;
845 space->free_limit = free_limit;
846 space->free_len = free_len;
847
848 /* Set estimated value for space->compression_type
849 during recovery process. */
850 if (recv_recovery_is_on()
851 && (Compression::is_compressed_page(
852 page + page_size.physical())
853 || Compression::is_compressed_encrypted_page(
854 page + page_size.physical()))) {
855 ut_ad(buf2_size >= (2 * UNIV_PAGE_SIZE));
856 Compression::meta_t header;
857 Compression::deserialize_header(
858 page + page_size.physical(), &header);
859 space->compression_type = header.m_algorithm;
860 }
861 }
862
863 ut_free(buf2);
864
865 /* For encrypted tablespace, we need to check the
866 encrytion key and iv(initial vector) is readed. */
867 if (FSP_FLAGS_GET_ENCRYPTION(flags)
868 && !recv_recovery_is_on()) {
869 if (space->encryption_type != Encryption::AES) {
870 ib::error()
871 << "Can't read encryption"
872 << " key from file "
873 << node->name << "!";
874 return(false);
875 }
876 }
877
878 if (node->size == 0) {
879 ulint extent_size;
880
881 extent_size = page_size.physical() * FSP_EXTENT_SIZE;
882
883 /* After apply-incremental, tablespaces are not extended
884 to a whole megabyte. Do not cut off valid data. */
885 #ifndef UNIV_HOTBACKUP
886 /* Truncate the size to a multiple of extent size. */
887 if (size_bytes >= extent_size) {
888 size_bytes = ut_2pow_round(size_bytes,
889 extent_size);
890 }
891 #endif /* !UNIV_HOTBACKUP */
892 node->size = (ulint)
893 (size_bytes / page_size.physical());
894
895 #ifdef UNIV_HOTBACKUP
896 add_size:
897 #endif /* UNIV_HOTBACKUP */
898 space->size += node->size;
899 }
900 }
901
902 /* printf("Opening file %s\n", node->name); */
903
904 /* Open the file for reading and writing, in Windows normally in the
905 unbuffered async I/O mode, though global variables may make
906 os_file_create() to fall back to the normal file I/O mode. */
907
908 if (space->purpose == FIL_TYPE_LOG) {
909 node->handle = os_file_create(
910 innodb_log_file_key, node->name, OS_FILE_OPEN,
911 OS_FILE_AIO, OS_LOG_FILE, read_only_mode, &success);
912 } else if (node->is_raw_disk) {
913 node->handle = os_file_create(
914 innodb_data_file_key, node->name, OS_FILE_OPEN_RAW,
915 OS_FILE_AIO, OS_DATA_FILE, read_only_mode, &success);
916 } else {
917 node->handle = os_file_create(
918 innodb_data_file_key, node->name, OS_FILE_OPEN,
919 OS_FILE_AIO, OS_DATA_FILE, read_only_mode, &success);
920 }
921
922 ut_a(success);
923
924 node->is_open = true;
925
926 fil_system->n_open++;
927 fil_n_file_opened++;
928
929 if (fil_space_belongs_in_lru(space)) {
930
931 /* Put the node to the LRU list */
932 UT_LIST_ADD_FIRST(fil_system->LRU, node);
933 }
934
935 return(true);
936 }
937
938 /** Close a file node.
939 @param[in,out] node File node */
940 static
941 void
fil_node_close_file(fil_node_t * node)942 fil_node_close_file(
943 fil_node_t* node)
944 {
945 bool ret;
946
947 ut_ad(mutex_own(&(fil_system->mutex)));
948 ut_a(node->is_open);
949 ut_a(node->n_pending == 0);
950 ut_a(node->n_pending_flushes == 0);
951 ut_a(!node->being_extended);
952 #ifndef UNIV_HOTBACKUP
953 ut_a(node->modification_counter == node->flush_counter
954 || node->space->purpose == FIL_TYPE_TEMPORARY
955 || srv_fast_shutdown == 2);
956 #endif /* !UNIV_HOTBACKUP */
957
958 ret = os_file_close(node->handle);
959 ut_a(ret);
960
961 /* printf("Closing file %s\n", node->name); */
962
963 node->is_open = false;
964 ut_a(fil_system->n_open > 0);
965 fil_system->n_open--;
966 fil_n_file_opened--;
967
968 if (fil_space_belongs_in_lru(node->space)) {
969
970 ut_a(UT_LIST_GET_LEN(fil_system->LRU) > 0);
971
972 /* The node is in the LRU list, remove it */
973 UT_LIST_REMOVE(fil_system->LRU, node);
974 }
975 }
976
977 /** Tries to close a file in the LRU list. The caller must hold the fil_sys
978 mutex.
979 @return true if success, false if should retry later; since i/o's
980 generally complete in < 100 ms, and as InnoDB writes at most 128 pages
981 from the buffer pool in a batch, and then immediately flushes the
982 files, there is a good chance that the next time we find a suitable
983 node from the LRU list.
984 @param[in] print_info if true, prints information why it
985 cannot close a file*/
986 static
987 bool
fil_try_to_close_file_in_LRU(bool print_info)988 fil_try_to_close_file_in_LRU(
989
990 bool print_info)
991 {
992 fil_node_t* node;
993
994 ut_ad(mutex_own(&fil_system->mutex));
995
996 if (print_info) {
997 ib::info() << "fil_sys open file LRU len "
998 << UT_LIST_GET_LEN(fil_system->LRU);
999 }
1000
1001 for (node = UT_LIST_GET_LAST(fil_system->LRU);
1002 node != NULL;
1003 node = UT_LIST_GET_PREV(LRU, node)) {
1004
1005 if (node->modification_counter == node->flush_counter
1006 && node->n_pending_flushes == 0
1007 && !node->being_extended) {
1008
1009 fil_node_close_file(node);
1010
1011 return(true);
1012 }
1013
1014 if (!print_info) {
1015 continue;
1016 }
1017
1018 if (node->n_pending_flushes > 0) {
1019
1020 ib::info() << "Cannot close file " << node->name
1021 << ", because n_pending_flushes "
1022 << node->n_pending_flushes;
1023 }
1024
1025 if (node->modification_counter != node->flush_counter) {
1026 ib::warn() << "Cannot close file " << node->name
1027 << ", because modification count "
1028 << node->modification_counter <<
1029 " != flush count " << node->flush_counter;
1030 }
1031
1032 if (node->being_extended) {
1033 ib::info() << "Cannot close file " << node->name
1034 << ", because it is being extended";
1035 }
1036 }
1037
1038 return(false);
1039 }
1040
1041 /*******************************************************************//**
1042 Reserves the fil_system mutex and tries to make sure we can open at least one
1043 file while holding it. This should be called before calling
1044 fil_node_prepare_for_io(), because that function may need to open a file. */
1045 static
1046 void
fil_mutex_enter_and_prepare_for_io(ulint space_id)1047 fil_mutex_enter_and_prepare_for_io(
1048 /*===============================*/
1049 ulint space_id) /*!< in: space id */
1050 {
1051 fil_space_t* space;
1052 bool success;
1053 bool print_info = false;
1054 ulint count = 0;
1055 ulint count2 = 0;
1056
1057 for (;;) {
1058 mutex_enter(&fil_system->mutex);
1059
1060 if (space_id == 0 || space_id >= SRV_LOG_SPACE_FIRST_ID) {
1061 /* We keep log files and system tablespace files always
1062 open; this is important in preventing deadlocks in this
1063 module, as a page read completion often performs
1064 another read from the insert buffer. The insert buffer
1065 is in tablespace 0, and we cannot end up waiting in
1066 this function. */
1067 return;
1068 }
1069
1070 space = fil_space_get_by_id(space_id);
1071
1072 if (space != NULL && space->stop_ios) {
1073 /* We are going to do a rename file and want to stop
1074 new i/o's for a while. */
1075
1076 if (count2 > 20000) {
1077 ib::warn() << "Tablespace " << space->name
1078 << " has i/o ops stopped for a long"
1079 " time " << count2;
1080 }
1081
1082 mutex_exit(&fil_system->mutex);
1083
1084 #ifndef UNIV_HOTBACKUP
1085
1086 /* Wake the i/o-handler threads to make sure pending
1087 i/o's are performed */
1088 os_aio_simulated_wake_handler_threads();
1089
1090 /* The sleep here is just to give IO helper threads a
1091 bit of time to do some work. It is not required that
1092 all IO related to the tablespace being renamed must
1093 be flushed here as we do fil_flush() in
1094 fil_rename_tablespace() as well. */
1095 os_thread_sleep(20000);
1096
1097 #endif /* UNIV_HOTBACKUP */
1098
1099 /* Flush tablespaces so that we can close modified
1100 files in the LRU list */
1101 fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
1102
1103 os_thread_sleep(20000);
1104
1105 count2++;
1106
1107 continue;
1108 }
1109
1110 if (fil_system->n_open < fil_system->max_n_open) {
1111
1112 return;
1113 }
1114
1115 /* If the file is already open, no need to do anything; if the
1116 space does not exist, we handle the situation in the function
1117 which called this function. */
1118
1119 if (space == NULL || UT_LIST_GET_FIRST(space->chain)->is_open) {
1120
1121 return;
1122 }
1123
1124 if (count > 1) {
1125 print_info = true;
1126 }
1127
1128 /* Too many files are open, try to close some */
1129 do {
1130 success = fil_try_to_close_file_in_LRU(print_info);
1131
1132 } while (success
1133 && fil_system->n_open >= fil_system->max_n_open);
1134
1135 if (fil_system->n_open < fil_system->max_n_open) {
1136 /* Ok */
1137 return;
1138 }
1139
1140 if (count >= 2) {
1141 ib::warn() << "Too many (" << fil_system->n_open
1142 << ") files stay open while the maximum"
1143 " allowed value would be "
1144 << fil_system->max_n_open << ". You may need"
1145 " to raise the value of innodb_open_files in"
1146 " my.cnf.";
1147
1148 return;
1149 }
1150
1151 mutex_exit(&fil_system->mutex);
1152
1153 #ifndef UNIV_HOTBACKUP
1154 /* Wake the i/o-handler threads to make sure pending i/o's are
1155 performed */
1156 os_aio_simulated_wake_handler_threads();
1157
1158 os_thread_sleep(20000);
1159 #endif /* !UNIV_HOTBACKUP */
1160 /* Flush tablespaces so that we can close modified files in
1161 the LRU list. */
1162
1163 fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
1164
1165 count++;
1166 }
1167 }
1168
1169 /** Prepare to free a file node object from a tablespace memory cache.
1170 @param[in,out] node file node
1171 @param[in] space tablespace */
1172 static
1173 void
fil_node_close_to_free(fil_node_t * node,fil_space_t * space)1174 fil_node_close_to_free(
1175 fil_node_t* node,
1176 fil_space_t* space)
1177 {
1178 ut_ad(mutex_own(&fil_system->mutex));
1179 ut_a(node->magic_n == FIL_NODE_MAGIC_N);
1180 ut_a(node->n_pending == 0);
1181 ut_a(!node->being_extended);
1182
1183 if (node->is_open) {
1184 /* We fool the assertion in fil_node_close_file() to think
1185 there are no unflushed modifications in the file */
1186
1187 node->modification_counter = node->flush_counter;
1188 os_event_set(node->sync_event);
1189
1190 if (fil_buffering_disabled(space)) {
1191
1192 ut_ad(!space->is_in_unflushed_spaces);
1193 ut_ad(fil_space_is_flushed(space));
1194
1195 } else if (space->is_in_unflushed_spaces
1196 && fil_space_is_flushed(space)) {
1197
1198 space->is_in_unflushed_spaces = false;
1199
1200 UT_LIST_REMOVE(fil_system->unflushed_spaces, space);
1201 }
1202
1203 fil_node_close_file(node);
1204 }
1205 }
1206
1207 /** Detach a space object from the tablespace memory cache.
1208 Closes the files in the chain but does not delete them.
1209 There must not be any pending i/o's or flushes on the files.
1210 @param[in,out] space tablespace */
1211 static
1212 void
fil_space_detach(fil_space_t * space)1213 fil_space_detach(
1214 fil_space_t* space)
1215 {
1216 ut_ad(mutex_own(&fil_system->mutex));
1217
1218 HASH_DELETE(fil_space_t, hash, fil_system->spaces, space->id, space);
1219
1220 fil_space_t* fnamespace = fil_space_get_by_name(space->name);
1221
1222 ut_a(space == fnamespace);
1223
1224 HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
1225 ut_fold_string(space->name), space);
1226
1227 if (space->is_in_unflushed_spaces) {
1228
1229 ut_ad(!fil_buffering_disabled(space));
1230 space->is_in_unflushed_spaces = false;
1231
1232 UT_LIST_REMOVE(fil_system->unflushed_spaces, space);
1233 }
1234
1235 UT_LIST_REMOVE(fil_system->space_list, space);
1236
1237 ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
1238 ut_a(space->n_pending_flushes == 0);
1239
1240 for (fil_node_t* fil_node = UT_LIST_GET_FIRST(space->chain);
1241 fil_node != NULL;
1242 fil_node = UT_LIST_GET_NEXT(chain, fil_node)) {
1243
1244 fil_node_close_to_free(fil_node, space);
1245 }
1246 }
1247
1248 /** Free a tablespace object on which fil_space_detach() was invoked.
1249 There must not be any pending i/o's or flushes on the files.
1250 @param[in,out] space tablespace */
1251 static
1252 void
fil_space_free_low(fil_space_t * space)1253 fil_space_free_low(
1254 fil_space_t* space)
1255 {
1256 /* The tablespace must not be in fil_system->named_spaces. */
1257 ut_ad(srv_fast_shutdown == 2 || space->max_lsn == 0);
1258
1259 for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
1260 node != NULL; ) {
1261 ut_d(space->size -= node->size);
1262 os_event_destroy(node->sync_event);
1263 ut_free(node->name);
1264 fil_node_t* old_node = node;
1265 node = UT_LIST_GET_NEXT(chain, node);
1266 ut_free(old_node);
1267 }
1268
1269 ut_ad(space->size == 0);
1270
1271 rw_lock_free(&space->latch);
1272
1273 ut_free(space->name);
1274 ut_free(space);
1275 }
1276
1277 /** Frees a space object from the tablespace memory cache.
1278 Closes the files in the chain but does not delete them.
1279 There must not be any pending i/o's or flushes on the files.
1280 @param[in] id tablespace identifier
1281 @param[in] x_latched whether the caller holds X-mode space->latch
1282 @return true if success */
1283 bool
fil_space_free(ulint id,bool x_latched)1284 fil_space_free(
1285 ulint id,
1286 bool x_latched)
1287 {
1288 ut_ad(id != TRX_SYS_SPACE);
1289
1290 mutex_enter(&fil_system->mutex);
1291 fil_space_t* space = fil_space_get_by_id(id);
1292
1293 if (space != NULL) {
1294 fil_space_detach(space);
1295 }
1296
1297 mutex_exit(&fil_system->mutex);
1298
1299 if (space != NULL) {
1300 if (x_latched) {
1301 rw_lock_x_unlock(&space->latch);
1302 }
1303
1304 bool need_mutex = !recv_recovery_on;
1305
1306 if (need_mutex) {
1307 log_mutex_enter();
1308 }
1309
1310 ut_ad(log_mutex_own());
1311
1312 if (space->max_lsn != 0) {
1313 ut_d(space->max_lsn = 0);
1314 UT_LIST_REMOVE(fil_system->named_spaces, space);
1315 }
1316
1317 if (need_mutex) {
1318 log_mutex_exit();
1319 }
1320
1321 fil_space_free_low(space);
1322 }
1323
1324 return(space != NULL);
1325 }
1326
1327 /** Create a space memory object and put it to the fil_system hash table.
1328 The tablespace name is independent from the tablespace file-name.
1329 Error messages are issued to the server log.
1330 @param[in] name Tablespace name
1331 @param[in] id Tablespace identifier
1332 @param[in] flags Tablespace flags
1333 @param[in] purpose Tablespace purpose
1334 @return pointer to created tablespace, to be filled in with fil_node_create()
1335 @retval NULL on failure (such as when the same tablespace exists) */
1336 fil_space_t*
fil_space_create(const char * name,ulint id,ulint flags,fil_type_t purpose)1337 fil_space_create(
1338 const char* name,
1339 ulint id,
1340 ulint flags,
1341 fil_type_t purpose)
1342 {
1343 fil_space_t* space;
1344
1345 ut_ad(fil_system);
1346 ut_ad(fsp_flags_is_valid(flags));
1347 ut_ad(srv_page_size == UNIV_PAGE_SIZE_ORIG || flags != 0);
1348
1349 DBUG_EXECUTE_IF("fil_space_create_failure", return(NULL););
1350
1351 mutex_enter(&fil_system->mutex);
1352
1353 /* Look for a matching tablespace. */
1354 space = fil_space_get_by_name(name);
1355
1356 if (space != NULL) {
1357 mutex_exit(&fil_system->mutex);
1358
1359 ib::warn() << "Tablespace '" << name << "' exists in the"
1360 " cache with id " << space->id << " != " << id;
1361
1362 return(NULL);
1363 }
1364
1365 space = fil_space_get_by_id(id);
1366
1367 if (space != NULL) {
1368 ib::error() << "Trying to add tablespace '" << name
1369 << "' with id " << id
1370 << " to the tablespace memory cache, but tablespace '"
1371 << space->name << "' already exists in the cache!";
1372 mutex_exit(&fil_system->mutex);
1373 return(NULL);
1374 }
1375
1376 space = static_cast<fil_space_t*>(ut_zalloc_nokey(sizeof(*space)));
1377
1378 space->id = id;
1379 space->name = mem_strdup(name);
1380
1381 UT_LIST_INIT(space->chain, &fil_node_t::chain);
1382
1383 /* This warning is not applicable while MEB scanning the redo logs */
1384 #ifndef UNIV_HOTBACKUP
1385 if (fil_type_is_data(purpose)
1386 && !recv_recovery_on
1387 && id > fil_system->max_assigned_id) {
1388
1389 if (!fil_system->space_id_reuse_warned) {
1390 fil_system->space_id_reuse_warned = true;
1391
1392 ib::warn() << "Allocated tablespace ID " << id
1393 << " for " << name << ", old maximum was "
1394 << fil_system->max_assigned_id;
1395 }
1396
1397 fil_system->max_assigned_id = id;
1398 }
1399 #endif /* !UNIV_HOTBACKUP */
1400 space->purpose = purpose;
1401 space->flags = flags;
1402
1403 space->magic_n = FIL_SPACE_MAGIC_N;
1404
1405 space->encryption_type = Encryption::NONE;
1406
1407 rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
1408
1409 if (space->purpose == FIL_TYPE_TEMPORARY) {
1410 #ifndef UNIV_HOTBACKUP
1411 ut_d(space->latch.set_temp_fsp());
1412 #endif /* !UNIV_HOTBACKUP */
1413 }
1414
1415 HASH_INSERT(fil_space_t, hash, fil_system->spaces, id, space);
1416
1417 HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
1418 ut_fold_string(name), space);
1419
1420 UT_LIST_ADD_LAST(fil_system->space_list, space);
1421
1422 if (id < SRV_LOG_SPACE_FIRST_ID && id > fil_system->max_assigned_id) {
1423
1424 fil_system->max_assigned_id = id;
1425 }
1426
1427 mutex_exit(&fil_system->mutex);
1428
1429 return(space);
1430 }
1431
1432 /*******************************************************************//**
1433 Assigns a new space id for a new single-table tablespace. This works simply by
1434 incrementing the global counter. If 4 billion id's is not enough, we may need
1435 to recycle id's.
1436 @return true if assigned, false if not */
1437 bool
fil_assign_new_space_id(ulint * space_id)1438 fil_assign_new_space_id(
1439 /*====================*/
1440 ulint* space_id) /*!< in/out: space id */
1441 {
1442 ulint id;
1443 bool success;
1444
1445 mutex_enter(&fil_system->mutex);
1446
1447 id = *space_id;
1448
1449 if (id < fil_system->max_assigned_id) {
1450 id = fil_system->max_assigned_id;
1451 }
1452
1453 id++;
1454
1455 if (id > (SRV_LOG_SPACE_FIRST_ID / 2) && (id % 1000000UL == 0)) {
1456 ib::warn() << "You are running out of new single-table"
1457 " tablespace id's. Current counter is " << id
1458 << " and it must not exceed" << SRV_LOG_SPACE_FIRST_ID
1459 << "! To reset the counter to zero you have to dump"
1460 " all your tables and recreate the whole InnoDB"
1461 " installation.";
1462 }
1463
1464 success = (id < SRV_LOG_SPACE_FIRST_ID);
1465
1466 if (success) {
1467 *space_id = fil_system->max_assigned_id = id;
1468 } else {
1469 ib::warn() << "You have run out of single-table tablespace"
1470 " id's! Current counter is " << id
1471 << ". To reset the counter to zero"
1472 " you have to dump all your tables and"
1473 " recreate the whole InnoDB installation.";
1474 *space_id = ULINT_UNDEFINED;
1475 }
1476
1477 mutex_exit(&fil_system->mutex);
1478
1479 return(success);
1480 }
1481
1482 /*******************************************************************//**
1483 Returns a pointer to the fil_space_t that is in the memory cache
1484 associated with a space id. The caller must lock fil_system->mutex.
1485 @return file_space_t pointer, NULL if space not found */
1486 UNIV_INLINE
1487 fil_space_t*
fil_space_get_space(ulint id)1488 fil_space_get_space(
1489 /*================*/
1490 ulint id) /*!< in: space id */
1491 {
1492 fil_space_t* space;
1493 fil_node_t* node;
1494
1495 ut_ad(fil_system);
1496
1497 space = fil_space_get_by_id(id);
1498 if (space == NULL || space->size != 0) {
1499 return(space);
1500 }
1501
1502 switch (space->purpose) {
1503 case FIL_TYPE_LOG:
1504 break;
1505 case FIL_TYPE_TEMPORARY:
1506 case FIL_TYPE_TABLESPACE:
1507 case FIL_TYPE_IMPORT:
1508 ut_a(id != 0);
1509
1510 mutex_exit(&fil_system->mutex);
1511
1512 /* It is possible that the space gets evicted at this point
1513 before the fil_mutex_enter_and_prepare_for_io() acquires
1514 the fil_system->mutex. Check for this after completing the
1515 call to fil_mutex_enter_and_prepare_for_io(). */
1516 fil_mutex_enter_and_prepare_for_io(id);
1517
1518 /* We are still holding the fil_system->mutex. Check if
1519 the space is still in memory cache. */
1520 space = fil_space_get_by_id(id);
1521 if (space == NULL) {
1522 return(NULL);
1523 }
1524
1525 /* The following code must change when InnoDB supports
1526 multiple datafiles per tablespace. */
1527 ut_a(1 == UT_LIST_GET_LEN(space->chain));
1528
1529 node = UT_LIST_GET_FIRST(space->chain);
1530
1531 /* It must be a single-table tablespace and we have not opened
1532 the file yet; the following calls will open it and update the
1533 size fields */
1534
1535 if (!fil_node_prepare_for_io(node, fil_system, space)) {
1536 /* The single-table tablespace can't be opened,
1537 because the ibd file is missing. */
1538 return(NULL);
1539 }
1540
1541 fil_node_complete_io(node, fil_system, IORequestRead);
1542 }
1543
1544 return(space);
1545 }
1546
1547 /** Returns the path from the first fil_node_t found with this space ID.
1548 The caller is responsible for freeing the memory allocated here for the
1549 value returned.
1550 @param[in] id Tablespace ID
1551 @return own: A copy of fil_node_t::path, NULL if space ID is zero
1552 or not found. */
1553 char*
fil_space_get_first_path(ulint id)1554 fil_space_get_first_path(
1555 ulint id)
1556 {
1557 fil_space_t* space;
1558 fil_node_t* node;
1559 char* path;
1560
1561 ut_ad(fil_system);
1562 ut_a(id);
1563
1564 fil_mutex_enter_and_prepare_for_io(id);
1565
1566 space = fil_space_get_space(id);
1567
1568 if (space == NULL) {
1569 mutex_exit(&fil_system->mutex);
1570
1571 return(NULL);
1572 }
1573
1574 ut_ad(mutex_own(&fil_system->mutex));
1575
1576 node = UT_LIST_GET_FIRST(space->chain);
1577
1578 path = mem_strdup(node->name);
1579
1580 mutex_exit(&fil_system->mutex);
1581
1582 return(path);
1583 }
1584
1585 /*******************************************************************//**
1586 Returns the size of the space in pages. The tablespace must be cached in the
1587 memory cache.
1588 @return space size, 0 if space not found */
1589 ulint
fil_space_get_size(ulint id)1590 fil_space_get_size(
1591 /*===============*/
1592 ulint id) /*!< in: space id */
1593 {
1594 fil_space_t* space;
1595 ulint size;
1596
1597 ut_ad(fil_system);
1598 mutex_enter(&fil_system->mutex);
1599
1600 space = fil_space_get_space(id);
1601
1602 size = space ? space->size : 0;
1603
1604 mutex_exit(&fil_system->mutex);
1605
1606 return(size);
1607 }
1608
1609 /*******************************************************************//**
1610 Returns the flags of the space. The tablespace must be cached
1611 in the memory cache.
1612 @return flags, ULINT_UNDEFINED if space not found */
1613 ulint
fil_space_get_flags(ulint id)1614 fil_space_get_flags(
1615 /*================*/
1616 ulint id) /*!< in: space id */
1617 {
1618 fil_space_t* space;
1619 ulint flags;
1620
1621 ut_ad(fil_system);
1622
1623 mutex_enter(&fil_system->mutex);
1624
1625 space = fil_space_get_space(id);
1626
1627 if (space == NULL) {
1628 mutex_exit(&fil_system->mutex);
1629
1630 return(ULINT_UNDEFINED);
1631 }
1632
1633 flags = space->flags;
1634
1635 mutex_exit(&fil_system->mutex);
1636
1637 return(flags);
1638 }
1639
1640 /** Check if table is mark for truncate.
1641 @param[in] id space id
1642 @return true if tablespace is marked for truncate. */
1643 bool
fil_space_is_being_truncated(ulint id)1644 fil_space_is_being_truncated(
1645 ulint id)
1646 {
1647 bool mark_for_truncate;
1648 mutex_enter(&fil_system->mutex);
1649 mark_for_truncate = fil_space_get_by_id(id)->is_being_truncated;
1650 mutex_exit(&fil_system->mutex);
1651 return(mark_for_truncate);
1652 }
1653
1654 /** Open each fil_node_t of a named fil_space_t if not already open.
1655 @param[in] name Tablespace name
1656 @return true if all nodes are open */
1657 bool
fil_space_open(const char * name)1658 fil_space_open(
1659 const char* name)
1660 {
1661 ut_ad(fil_system != NULL);
1662
1663 mutex_enter(&fil_system->mutex);
1664
1665 fil_space_t* space = fil_space_get_by_name(name);
1666 fil_node_t* node;
1667
1668 for (node = UT_LIST_GET_FIRST(space->chain);
1669 node != NULL;
1670 node = UT_LIST_GET_NEXT(chain, node)) {
1671
1672 if (!node->is_open
1673 && !fil_node_open_file(node)) {
1674 mutex_exit(&fil_system->mutex);
1675 return(false);
1676 }
1677 }
1678
1679 mutex_exit(&fil_system->mutex);
1680
1681 return(true);
1682 }
1683
1684 /** Close each fil_node_t of a named fil_space_t if open.
1685 @param[in] name Tablespace name */
1686 void
fil_space_close(const char * name)1687 fil_space_close(
1688 const char* name)
1689 {
1690 if (fil_system == NULL) {
1691 return;
1692 }
1693
1694 mutex_enter(&fil_system->mutex);
1695
1696 fil_space_t* space = fil_space_get_by_name(name);
1697 if (space == NULL) {
1698 mutex_exit(&fil_system->mutex);
1699 return;
1700 }
1701
1702 for (fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
1703 node != NULL;
1704 node = UT_LIST_GET_NEXT(chain, node)) {
1705
1706 if (node->is_open) {
1707 fil_node_close_file(node);
1708 }
1709 }
1710
1711 mutex_exit(&fil_system->mutex);
1712 }
1713
1714 /** Returns the page size of the space and whether it is compressed or not.
1715 The tablespace must be cached in the memory cache.
1716 @param[in] id space id
1717 @param[out] found true if tablespace was found
1718 @return page size */
1719 const page_size_t
fil_space_get_page_size(ulint id,bool * found)1720 fil_space_get_page_size(
1721 ulint id,
1722 bool* found)
1723 {
1724 const ulint flags = fil_space_get_flags(id);
1725
1726 if (flags == ULINT_UNDEFINED) {
1727 *found = false;
1728 return(univ_page_size);
1729 }
1730
1731 *found = true;
1732
1733 return(page_size_t(flags));
1734 }
1735
1736 /****************************************************************//**
1737 Initializes the tablespace memory cache. */
1738 void
fil_init(ulint hash_size,ulint max_n_open)1739 fil_init(
1740 /*=====*/
1741 ulint hash_size, /*!< in: hash table size */
1742 ulint max_n_open) /*!< in: max number of open files */
1743 {
1744 ut_a(fil_system == NULL);
1745
1746 ut_a(hash_size > 0);
1747 ut_a(max_n_open > 0);
1748
1749 fil_system = static_cast<fil_system_t*>(
1750 ut_zalloc_nokey(sizeof(*fil_system)));
1751
1752 mutex_create(LATCH_ID_FIL_SYSTEM, &fil_system->mutex);
1753
1754 fil_system->spaces = hash_create(hash_size);
1755 fil_system->name_hash = hash_create(hash_size);
1756
1757 UT_LIST_INIT(fil_system->LRU, &fil_node_t::LRU);
1758 UT_LIST_INIT(fil_system->space_list, &fil_space_t::space_list);
1759 UT_LIST_INIT(fil_system->unflushed_spaces,
1760 &fil_space_t::unflushed_spaces);
1761 UT_LIST_INIT(fil_system->named_spaces, &fil_space_t::named_spaces);
1762
1763 fil_system->max_n_open = max_n_open;
1764 }
1765
1766 /*******************************************************************//**
1767 Opens all log files and system tablespace data files. They stay open until the
1768 database server shutdown. This should be called at a server startup after the
1769 space objects for the log and the system tablespace have been created. The
1770 purpose of this operation is to make sure we never run out of file descriptors
1771 if we need to read from the insert buffer or to write to the log. */
1772 void
fil_open_log_and_system_tablespace_files(void)1773 fil_open_log_and_system_tablespace_files(void)
1774 /*==========================================*/
1775 {
1776 fil_space_t* space;
1777
1778 mutex_enter(&fil_system->mutex);
1779
1780 for (space = UT_LIST_GET_FIRST(fil_system->space_list);
1781 space != NULL;
1782 space = UT_LIST_GET_NEXT(space_list, space)) {
1783
1784 fil_node_t* node;
1785
1786 if (fil_space_belongs_in_lru(space)) {
1787
1788 continue;
1789 }
1790
1791 for (node = UT_LIST_GET_FIRST(space->chain);
1792 node != NULL;
1793 node = UT_LIST_GET_NEXT(chain, node)) {
1794
1795 if (!node->is_open) {
1796 if (!fil_node_open_file(node)) {
1797 /* This func is called during server's
1798 startup. If some file of log or system
1799 tablespace is missing, the server
1800 can't start successfully. So we should
1801 assert for it. */
1802 ut_a(0);
1803 }
1804 }
1805
1806 if (fil_system->max_n_open < 10 + fil_system->n_open) {
1807
1808 ib::warn() << "You must raise the value of"
1809 " innodb_open_files in my.cnf!"
1810 " Remember that InnoDB keeps all"
1811 " log files and all system"
1812 " tablespace files open"
1813 " for the whole time mysqld is"
1814 " running, and needs to open also"
1815 " some .ibd files if the"
1816 " file-per-table storage model is used."
1817 " Current open files "
1818 << fil_system->n_open
1819 << ", max allowed open files "
1820 << fil_system->max_n_open
1821 << ".";
1822 }
1823 }
1824 }
1825
1826 mutex_exit(&fil_system->mutex);
1827 }
1828
1829 /*******************************************************************//**
1830 Closes all open files. There must not be any pending i/o's or not flushed
1831 modifications in the files. */
1832 void
fil_close_all_files(void)1833 fil_close_all_files(void)
1834 /*=====================*/
1835 {
1836 fil_space_t* space;
1837
1838 /* At shutdown, we should not have any files in this list. */
1839 ut_ad(srv_fast_shutdown == 2
1840 || UT_LIST_GET_LEN(fil_system->named_spaces) == 0);
1841
1842 mutex_enter(&fil_system->mutex);
1843
1844 for (space = UT_LIST_GET_FIRST(fil_system->space_list);
1845 space != NULL; ) {
1846 fil_node_t* node;
1847 fil_space_t* prev_space = space;
1848
1849 for (node = UT_LIST_GET_FIRST(space->chain);
1850 node != NULL;
1851 node = UT_LIST_GET_NEXT(chain, node)) {
1852
1853 if (node->is_open) {
1854 fil_node_close_file(node);
1855 }
1856 }
1857
1858 space = UT_LIST_GET_NEXT(space_list, space);
1859 fil_space_detach(prev_space);
1860 fil_space_free_low(prev_space);
1861 }
1862
1863 mutex_exit(&fil_system->mutex);
1864
1865 ut_ad(srv_fast_shutdown == 2
1866 || UT_LIST_GET_LEN(fil_system->named_spaces) == 0);
1867 }
1868
1869 /*******************************************************************//**
1870 Closes the redo log files. There must not be any pending i/o's or not
1871 flushed modifications in the files. */
1872 void
fil_close_log_files(bool free)1873 fil_close_log_files(
1874 /*================*/
1875 bool free) /*!< in: whether to free the memory object */
1876 {
1877 fil_space_t* space;
1878
1879 mutex_enter(&fil_system->mutex);
1880
1881 space = UT_LIST_GET_FIRST(fil_system->space_list);
1882
1883 while (space != NULL) {
1884 fil_node_t* node;
1885 fil_space_t* prev_space = space;
1886
1887 if (space->purpose != FIL_TYPE_LOG) {
1888 space = UT_LIST_GET_NEXT(space_list, space);
1889 continue;
1890 }
1891
1892 /* Log files are not in the fil_system->named_spaces list. */
1893 ut_ad(space->max_lsn == 0);
1894
1895 for (node = UT_LIST_GET_FIRST(space->chain);
1896 node != NULL;
1897 node = UT_LIST_GET_NEXT(chain, node)) {
1898
1899 if (node->is_open) {
1900 fil_node_close_file(node);
1901 }
1902 }
1903
1904 space = UT_LIST_GET_NEXT(space_list, space);
1905
1906 if (free) {
1907 fil_space_detach(prev_space);
1908 fil_space_free_low(prev_space);
1909 }
1910 }
1911
1912 mutex_exit(&fil_system->mutex);
1913 }
1914
1915 /*******************************************************************//**
1916 Sets the max tablespace id counter if the given number is bigger than the
1917 previous value. */
1918 void
fil_set_max_space_id_if_bigger(ulint max_id)1919 fil_set_max_space_id_if_bigger(
1920 /*===========================*/
1921 ulint max_id) /*!< in: maximum known id */
1922 {
1923 if (max_id >= SRV_LOG_SPACE_FIRST_ID) {
1924 ib::fatal() << "Max tablespace id is too high, " << max_id;
1925 }
1926
1927 mutex_enter(&fil_system->mutex);
1928
1929 if (fil_system->max_assigned_id < max_id) {
1930
1931 fil_system->max_assigned_id = max_id;
1932 }
1933
1934 mutex_exit(&fil_system->mutex);
1935 }
1936
1937 /** Write the flushed LSN to the page header of the first page in the
1938 system tablespace.
1939 @param[in] lsn flushed LSN
1940 @return DB_SUCCESS or error number */
1941 dberr_t
fil_write_flushed_lsn(lsn_t lsn)1942 fil_write_flushed_lsn(
1943 lsn_t lsn)
1944 {
1945 byte* buf1;
1946 byte* buf;
1947 dberr_t err;
1948
1949 buf1 = static_cast<byte*>(ut_malloc_nokey(2 * UNIV_PAGE_SIZE));
1950 buf = static_cast<byte*>(ut_align(buf1, UNIV_PAGE_SIZE));
1951
1952 const page_id_t page_id(TRX_SYS_SPACE, 0);
1953
1954 err = fil_read(page_id, univ_page_size, 0, univ_page_size.physical(),
1955 buf);
1956
1957 if (err == DB_SUCCESS) {
1958 mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
1959
1960 err = fil_write(page_id, univ_page_size, 0,
1961 univ_page_size.physical(), buf);
1962
1963 fil_flush_file_spaces(FIL_TYPE_TABLESPACE);
1964 }
1965
1966 ut_free(buf1);
1967
1968 return(err);
1969 }
1970 #ifndef UNIV_HOTBACKUP
1971 /** Acquire a tablespace when it could be dropped concurrently.
1972 Used by background threads that do not necessarily hold proper locks
1973 for concurrency control.
1974 @param[in] id tablespace ID
1975 @param[in] silent whether to silently ignore missing tablespaces
1976 @return the tablespace, or NULL if missing or being deleted */
1977 inline
1978 fil_space_t*
fil_space_acquire_low(ulint id,bool silent)1979 fil_space_acquire_low(
1980 ulint id,
1981 bool silent)
1982 {
1983 fil_space_t* space;
1984
1985 mutex_enter(&fil_system->mutex);
1986
1987 space = fil_space_get_by_id(id);
1988
1989 if (space == NULL) {
1990 if (!silent) {
1991 ib::warn() << "Trying to access missing"
1992 " tablespace " << id;
1993 }
1994 } else if (space->stop_new_ops || space->is_being_truncated) {
1995 space = NULL;
1996 } else {
1997 space->n_pending_ops++;
1998 }
1999
2000 mutex_exit(&fil_system->mutex);
2001
2002 return(space);
2003 }
2004
2005 /** Acquire a tablespace when it could be dropped concurrently.
2006 Used by background threads that do not necessarily hold proper locks
2007 for concurrency control.
2008 @param[in] id tablespace ID
2009 @return the tablespace, or NULL if missing or being deleted */
2010 fil_space_t*
fil_space_acquire(ulint id)2011 fil_space_acquire(
2012 ulint id)
2013 {
2014 return(fil_space_acquire_low(id, false));
2015 }
2016
2017 /** Acquire a tablespace that may not exist.
2018 Used by background threads that do not necessarily hold proper locks
2019 for concurrency control.
2020 @param[in] id tablespace ID
2021 @return the tablespace, or NULL if missing or being deleted */
2022 fil_space_t*
fil_space_acquire_silent(ulint id)2023 fil_space_acquire_silent(
2024 ulint id)
2025 {
2026 return(fil_space_acquire_low(id, true));
2027 }
2028
2029 /** Release a tablespace acquired with fil_space_acquire().
2030 @param[in,out] space tablespace to release */
2031 void
fil_space_release(fil_space_t * space)2032 fil_space_release(
2033 fil_space_t* space)
2034 {
2035 mutex_enter(&fil_system->mutex);
2036 ut_ad(space->magic_n == FIL_SPACE_MAGIC_N);
2037 ut_ad(space->n_pending_ops > 0);
2038 space->n_pending_ops--;
2039 mutex_exit(&fil_system->mutex);
2040 }
2041 #endif /* !UNIV_HOTBACKUP */
2042
2043 /********************************************************//**
2044 Creates the database directory for a table if it does not exist yet. */
2045 void
fil_create_directory_for_tablename(const char * name)2046 fil_create_directory_for_tablename(
2047 /*===============================*/
2048 const char* name) /*!< in: name in the standard
2049 'databasename/tablename' format */
2050 {
2051 const char* namend;
2052 char* path;
2053 ulint len;
2054
2055 len = strlen(fil_path_to_mysql_datadir);
2056 namend = strchr(name, '/');
2057 ut_a(namend);
2058 path = static_cast<char*>(ut_malloc_nokey(len + (namend - name) + 2));
2059
2060 memcpy(path, fil_path_to_mysql_datadir, len);
2061 path[len] = '/';
2062 memcpy(path + len + 1, name, namend - name);
2063 path[len + (namend - name) + 1] = 0;
2064
2065 os_normalize_path(path);
2066
2067 bool success = os_file_create_directory(path, false);
2068 ut_a(success);
2069
2070 ut_free(path);
2071 }
2072
2073 /** Write a log record about an operation on a tablespace file.
2074 @param[in] type MLOG_FILE_NAME or MLOG_FILE_DELETE
2075 or MLOG_FILE_CREATE2 or MLOG_FILE_RENAME2
2076 @param[in] space_id tablespace identifier
2077 @param[in] first_page_no first page number in the file
2078 @param[in] path file path
2079 @param[in] new_path if type is MLOG_FILE_RENAME2, the new name
2080 @param[in] flags if type is MLOG_FILE_CREATE2, the space flags
2081 @param[in,out] mtr mini-transaction */
2082 static
2083 void
fil_op_write_log(mlog_id_t type,ulint space_id,ulint first_page_no,const char * path,const char * new_path,ulint flags,mtr_t * mtr)2084 fil_op_write_log(
2085 mlog_id_t type,
2086 ulint space_id,
2087 ulint first_page_no,
2088 const char* path,
2089 const char* new_path,
2090 ulint flags,
2091 mtr_t* mtr)
2092 {
2093 byte* log_ptr;
2094 ulint len;
2095
2096 ut_ad(first_page_no == 0);
2097
2098 /* fil_name_parse() requires that there be at least one path
2099 separator and that the file path end with ".ibd". */
2100 ut_ad(strchr(path, OS_PATH_SEPARATOR) != NULL);
2101 ut_ad(strcmp(&path[strlen(path) - strlen(DOT_IBD)], DOT_IBD) == 0);
2102
2103 log_ptr = mlog_open(mtr, 11 + 4 + 2 + 1);
2104
2105 if (log_ptr == NULL) {
2106 /* Logging in mtr is switched off during crash recovery:
2107 in that case mlog_open returns NULL */
2108 return;
2109 }
2110
2111 log_ptr = mlog_write_initial_log_record_low(
2112 type, space_id, first_page_no, log_ptr, mtr);
2113
2114 if (type == MLOG_FILE_CREATE2) {
2115 mach_write_to_4(log_ptr, flags);
2116 log_ptr += 4;
2117 }
2118
2119 /* Let us store the strings as null-terminated for easier readability
2120 and handling */
2121
2122 len = strlen(path) + 1;
2123
2124 mach_write_to_2(log_ptr, len);
2125 log_ptr += 2;
2126 mlog_close(mtr, log_ptr);
2127
2128 mlog_catenate_string(
2129 mtr, reinterpret_cast<const byte*>(path), len);
2130
2131 switch (type) {
2132 case MLOG_FILE_RENAME2:
2133 ut_ad(strchr(new_path, OS_PATH_SEPARATOR) != NULL);
2134 len = strlen(new_path) + 1;
2135 log_ptr = mlog_open(mtr, 2 + len);
2136 ut_a(log_ptr);
2137 mach_write_to_2(log_ptr, len);
2138 log_ptr += 2;
2139 mlog_close(mtr, log_ptr);
2140
2141 mlog_catenate_string(
2142 mtr, reinterpret_cast<const byte*>(new_path), len);
2143 break;
2144 case MLOG_FILE_NAME:
2145 case MLOG_FILE_DELETE:
2146 case MLOG_FILE_CREATE2:
2147 break;
2148 default:
2149 ut_ad(0);
2150 }
2151 }
2152 #ifndef UNIV_HOTBACKUP
2153 /** Write redo log for renaming a file.
2154 @param[in] space_id tablespace id
2155 @param[in] first_page_no first page number in the file
2156 @param[in] old_name tablespace file name
2157 @param[in] new_name tablespace file name after renaming
2158 @param[in,out] mtr mini-transaction */
2159 static
2160 void
fil_name_write_rename(ulint space_id,ulint first_page_no,const char * old_name,const char * new_name,mtr_t * mtr)2161 fil_name_write_rename(
2162 ulint space_id,
2163 ulint first_page_no,
2164 const char* old_name,
2165 const char* new_name,
2166 mtr_t* mtr)
2167 {
2168 ut_ad(!is_predefined_tablespace(space_id));
2169
2170 fil_op_write_log(
2171 MLOG_FILE_RENAME2,
2172 space_id, first_page_no, old_name, new_name, 0, mtr);
2173 }
2174 #endif /* !UNIV_HOTBACKUP */
2175 /** Write MLOG_FILE_NAME for a file.
2176 @param[in] space_id tablespace id
2177 @param[in] first_page_no first page number in the file
2178 @param[in] name tablespace file name
2179 @param[in,out] mtr mini-transaction */
2180 static
2181 void
fil_name_write(ulint space_id,ulint first_page_no,const char * name,mtr_t * mtr)2182 fil_name_write(
2183 ulint space_id,
2184 ulint first_page_no,
2185 const char* name,
2186 mtr_t* mtr)
2187 {
2188 fil_op_write_log(
2189 MLOG_FILE_NAME, space_id, first_page_no, name, NULL, 0, mtr);
2190 }
2191
2192 /** Write MLOG_FILE_NAME for a file.
2193 @param[in] space tablespace
2194 @param[in] first_page_no first page number in the file
2195 @param[in] file tablespace file
2196 @param[in,out] mtr mini-transaction */
2197 static
2198 void
fil_name_write(const fil_space_t * space,ulint first_page_no,const fil_node_t * file,mtr_t * mtr)2199 fil_name_write(
2200 const fil_space_t* space,
2201 ulint first_page_no,
2202 const fil_node_t* file,
2203 mtr_t* mtr)
2204 {
2205 fil_name_write(space->id, first_page_no, file->name, mtr);
2206 }
2207
2208 #ifndef UNIV_HOTBACKUP
2209 /********************************************************//**
2210 Recreates table indexes by applying
2211 TRUNCATE log record during recovery.
2212 @return DB_SUCCESS or error code */
2213 dberr_t
fil_recreate_table(ulint space_id,ulint format_flags,ulint flags,const char * name,truncate_t & truncate)2214 fil_recreate_table(
2215 /*===============*/
2216 ulint space_id, /*!< in: space id */
2217 ulint format_flags, /*!< in: page format */
2218 ulint flags, /*!< in: tablespace flags */
2219 const char* name, /*!< in: table name */
2220 truncate_t& truncate) /*!< in: The information of
2221 TRUNCATE log record */
2222 {
2223 dberr_t err = DB_SUCCESS;
2224 bool found;
2225 const page_size_t page_size(fil_space_get_page_size(space_id,
2226 &found));
2227
2228 if (!found) {
2229 ib::info() << "Missing .ibd file for table '" << name
2230 << "' with tablespace " << space_id;
2231 return(DB_ERROR);
2232 }
2233
2234 ut_ad(!truncate_t::s_fix_up_active);
2235 truncate_t::s_fix_up_active = true;
2236
2237 /* Step-1: Scan for active indexes from REDO logs and drop
2238 all the indexes using low level function that take root_page_no
2239 and space-id. */
2240 truncate.drop_indexes(space_id);
2241
2242 /* Step-2: Scan for active indexes and re-create them. */
2243 err = truncate.create_indexes(
2244 name, space_id, page_size, flags, format_flags);
2245 if (err != DB_SUCCESS) {
2246 ib::info() << "Failed to create indexes for the table '"
2247 << name << "' with tablespace " << space_id
2248 << " while fixing up truncate action";
2249 return(err);
2250 }
2251
2252 truncate_t::s_fix_up_active = false;
2253
2254 return(err);
2255 }
2256
2257 /********************************************************//**
2258 Recreates the tablespace and table indexes by applying
2259 TRUNCATE log record during recovery.
2260 @return DB_SUCCESS or error code */
2261 dberr_t
fil_recreate_tablespace(ulint space_id,ulint format_flags,ulint flags,const char * name,truncate_t & truncate,lsn_t recv_lsn)2262 fil_recreate_tablespace(
2263 /*====================*/
2264 ulint space_id, /*!< in: space id */
2265 ulint format_flags, /*!< in: page format */
2266 ulint flags, /*!< in: tablespace flags */
2267 const char* name, /*!< in: table name */
2268 truncate_t& truncate, /*!< in: The information of
2269 TRUNCATE log record */
2270 lsn_t recv_lsn) /*!< in: the end LSN of
2271 the log record */
2272 {
2273 dberr_t err = DB_SUCCESS;
2274 mtr_t mtr;
2275
2276 ut_ad(!truncate_t::s_fix_up_active);
2277 truncate_t::s_fix_up_active = true;
2278
2279 /* Step-1: Invalidate buffer pool pages belonging to the tablespace
2280 to re-create. */
2281 buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_ALL_NO_WRITE, 0);
2282
2283 /* Remove all insert buffer entries for the tablespace */
2284 ibuf_delete_for_discarded_space(space_id);
2285
2286 /* Step-2: truncate tablespace (reset the size back to original or
2287 default size) of tablespace. */
2288 err = truncate.truncate(
2289 space_id, truncate.get_dir_path(), name, flags, true);
2290
2291 if (err != DB_SUCCESS) {
2292
2293 ib::info() << "Cannot access .ibd file for table '"
2294 << name << "' with tablespace " << space_id
2295 << " while truncating";
2296 return(DB_ERROR);
2297 }
2298
2299 bool found;
2300 const page_size_t& page_size =
2301 fil_space_get_page_size(space_id, &found);
2302
2303 if (!found) {
2304 ib::info() << "Missing .ibd file for table '" << name
2305 << "' with tablespace " << space_id;
2306 return(DB_ERROR);
2307 }
2308
2309 /* Step-3: Initialize Header. */
2310 if (page_size.is_compressed()) {
2311 byte* buf;
2312 page_t* page;
2313
2314 buf = static_cast<byte*>(ut_zalloc_nokey(3 * UNIV_PAGE_SIZE));
2315
2316 /* Align the memory for file i/o */
2317 page = static_cast<byte*>(ut_align(buf, UNIV_PAGE_SIZE));
2318
2319 flags = fsp_flags_set_page_size(flags, univ_page_size);
2320
2321 fsp_header_init_fields(page, space_id, flags);
2322
2323 mach_write_to_4(
2324 page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
2325
2326 page_zip_des_t page_zip;
2327 page_zip_set_size(&page_zip, page_size.physical());
2328 page_zip.data = page + UNIV_PAGE_SIZE;
2329
2330 #ifdef UNIV_DEBUG
2331 page_zip.m_start =
2332 #endif /* UNIV_DEBUG */
2333 page_zip.m_end = page_zip.m_nonempty = page_zip.n_blobs = 0;
2334 buf_flush_init_for_writing(
2335 NULL, page, &page_zip, 0,
2336 fsp_is_checksum_disabled(space_id));
2337
2338 err = fil_write(page_id_t(space_id, 0), page_size, 0,
2339 page_size.physical(), page_zip.data);
2340
2341 ut_free(buf);
2342
2343 if (err != DB_SUCCESS) {
2344 ib::info() << "Failed to clean header of the"
2345 " table '" << name << "' with tablespace "
2346 << space_id;
2347 return(err);
2348 }
2349 }
2350
2351 mtr_start(&mtr);
2352 /* Don't log the operation while fixing up table truncate operation
2353 as crash at this level can still be sustained with recovery restarting
2354 from last checkpoint. */
2355 mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
2356
2357 /* Initialize the first extent descriptor page and
2358 the second bitmap page for the new tablespace. */
2359 fsp_header_init(space_id, FIL_IBD_FILE_INITIAL_SIZE, &mtr);
2360 mtr_commit(&mtr);
2361
2362 /* Step-4: Re-Create Indexes to newly re-created tablespace.
2363 This operation will restore tablespace back to what it was
2364 when it was created during CREATE TABLE. */
2365 err = truncate.create_indexes(
2366 name, space_id, page_size, flags, format_flags);
2367 if (err != DB_SUCCESS) {
2368 return(err);
2369 }
2370
2371 /* Step-5: Write new created pages into ibd file handle and
2372 flush it to disk for the tablespace, in case i/o-handler thread
2373 deletes the bitmap page from buffer. */
2374 mtr_start(&mtr);
2375
2376 mtr_set_log_mode(&mtr, MTR_LOG_NO_REDO);
2377
2378 mutex_enter(&fil_system->mutex);
2379
2380 fil_space_t* space = fil_space_get_by_id(space_id);
2381
2382 mutex_exit(&fil_system->mutex);
2383
2384 fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
2385
2386 for (ulint page_no = 0; page_no < node->size; ++page_no) {
2387
2388 const page_id_t cur_page_id(space_id, page_no);
2389
2390 buf_block_t* block = buf_page_get(cur_page_id, page_size,
2391 RW_X_LATCH, &mtr);
2392
2393 byte* page = buf_block_get_frame(block);
2394
2395 if (!fsp_flags_is_compressed(flags)) {
2396
2397 ut_ad(!page_size.is_compressed());
2398
2399 buf_flush_init_for_writing(
2400 block, page, NULL, recv_lsn,
2401 fsp_is_checksum_disabled(space_id));
2402
2403 err = fil_write(cur_page_id, page_size, 0,
2404 page_size.physical(), page);
2405 } else {
2406 ut_ad(page_size.is_compressed());
2407
2408 /* We don't want to rewrite empty pages. */
2409
2410 if (fil_page_get_type(page) != 0) {
2411 page_zip_des_t* page_zip =
2412 buf_block_get_page_zip(block);
2413
2414 buf_flush_init_for_writing(
2415 block, page, page_zip, recv_lsn,
2416 fsp_is_checksum_disabled(space_id));
2417
2418 err = fil_write(cur_page_id, page_size, 0,
2419 page_size.physical(),
2420 page_zip->data);
2421 } else {
2422 #ifdef UNIV_DEBUG
2423 const byte* data = block->page.zip.data;
2424
2425 /* Make sure that the page is really empty */
2426 for (ulint i = 0;
2427 i < page_size.physical();
2428 ++i) {
2429
2430 ut_a(data[i] == 0);
2431 }
2432 #endif /* UNIV_DEBUG */
2433 }
2434 }
2435
2436 if (err != DB_SUCCESS) {
2437 ib::info() << "Cannot write page " << page_no
2438 << " into a .ibd file for table '"
2439 << name << "' with tablespace " << space_id;
2440 }
2441 }
2442
2443 mtr_commit(&mtr);
2444
2445 truncate_t::s_fix_up_active = false;
2446
2447 return(err);
2448 }
2449 #endif /* UNIV_HOTBACKUP */
2450 /** Replay a file rename operation if possible.
2451 @param[in] space_id tablespace identifier
2452 @param[in] first_page_no first page number in the file
2453 @param[in] name old file name
2454 @param[in] new_name new file name
2455 @return whether the operation was successfully applied
2456 (the name did not exist, or new_name did not exist and
2457 name was successfully renamed to new_name) */
2458 bool
fil_op_replay_rename(ulint space_id,ulint first_page_no,const char * name,const char * new_name)2459 fil_op_replay_rename(
2460 ulint space_id,
2461 ulint first_page_no,
2462 const char* name,
2463 const char* new_name)
2464 {
2465 #ifdef UNIV_HOTBACKUP
2466 ut_ad(recv_replay_file_ops);
2467 #endif /* UNIV_HOTBACKUP */
2468 ut_ad(first_page_no == 0);
2469
2470 /* In order to replay the rename, the following must hold:
2471 * The new name is not already used.
2472 * A tablespace exists with the old name.
2473 * The space ID for that tablepace matches this log entry.
2474 This will prevent unintended renames during recovery. */
2475 fil_space_t* space = fil_space_get(space_id);
2476
2477 if (space == NULL) {
2478 return(true);
2479 }
2480
2481 const bool name_match
2482 = strcmp(name, UT_LIST_GET_FIRST(space->chain)->name) == 0;
2483
2484 if (!name_match) {
2485 return(true);
2486 }
2487
2488 /* Create the database directory for the new name, if
2489 it does not exist yet */
2490
2491 const char* namend = strrchr(new_name, OS_PATH_SEPARATOR);
2492 ut_a(namend != NULL);
2493
2494 char* dir = static_cast<char*>(
2495 ut_malloc_nokey(namend - new_name + 1));
2496
2497 memcpy(dir, new_name, namend - new_name);
2498 dir[namend - new_name] = '\0';
2499
2500 bool success = os_file_create_directory(dir, false);
2501 ut_a(success);
2502
2503 ulint dirlen = 0;
2504
2505 if (const char* dirend = strrchr(dir, OS_PATH_SEPARATOR)) {
2506 dirlen = dirend - dir + 1;
2507 }
2508
2509 ut_free(dir);
2510
2511 /* New path must not exist. */
2512 dberr_t err = fil_rename_tablespace_check(
2513 space_id, name, new_name, false);
2514 if (err != DB_SUCCESS) {
2515 ib::error() << " Cannot replay file rename."
2516 " Remove either file and try again.";
2517 return(false);
2518 }
2519
2520 char* new_table = mem_strdupl(
2521 new_name + dirlen,
2522 strlen(new_name + dirlen)
2523 - 4 /* remove ".ibd" */);
2524
2525 ut_ad(new_table[namend - new_name - dirlen]
2526 == OS_PATH_SEPARATOR);
2527 #if OS_PATH_SEPARATOR != '/'
2528 new_table[namend - new_name - dirlen] = '/';
2529 #endif
2530
2531 if (!fil_rename_tablespace(
2532 space_id, name, new_table, new_name)) {
2533 ut_error;
2534 }
2535
2536 ut_free(new_table);
2537 return(true);
2538 }
2539
2540 /** File operations for tablespace */
2541 enum fil_operation_t {
2542 FIL_OPERATION_DELETE, /*!< delete a single-table tablespace */
2543 FIL_OPERATION_CLOSE, /*!< close a single-table tablespace */
2544 FIL_OPERATION_TRUNCATE /*!< truncate a single-table tablespace */
2545 };
2546
2547 /** Check for pending operations.
2548 @param[in] space tablespace
2549 @param[in] count number of attempts so far
2550 @return 0 if no operations else count + 1. */
2551 static
2552 ulint
fil_check_pending_ops(fil_space_t * space,ulint count)2553 fil_check_pending_ops(
2554 fil_space_t* space,
2555 ulint count)
2556 {
2557 ut_ad(mutex_own(&fil_system->mutex));
2558
2559 const ulint n_pending_ops = space ? space->n_pending_ops : 0;
2560
2561 if (n_pending_ops) {
2562
2563 if (count > 5000) {
2564 ib::warn() << "Trying to close/delete/truncate"
2565 " tablespace '" << space->name
2566 << "' but there are " << n_pending_ops
2567 << " pending operations on it.";
2568 }
2569
2570 return(count + 1);
2571 }
2572
2573 return(0);
2574 }
2575
2576 /*******************************************************************//**
2577 Check for pending IO.
2578 @return 0 if no pending else count + 1. */
2579 static
2580 ulint
fil_check_pending_io(fil_operation_t operation,fil_space_t * space,fil_node_t ** node,ulint count)2581 fil_check_pending_io(
2582 /*=================*/
2583 fil_operation_t operation, /*!< in: File operation */
2584 fil_space_t* space, /*!< in/out: Tablespace to check */
2585 fil_node_t** node, /*!< out: Node in space list */
2586 ulint count) /*!< in: number of attempts so far */
2587 {
2588 ut_ad(mutex_own(&fil_system->mutex));
2589 ut_a(space->n_pending_ops == 0);
2590
2591 switch (operation) {
2592 case FIL_OPERATION_DELETE:
2593 case FIL_OPERATION_CLOSE:
2594 break;
2595 case FIL_OPERATION_TRUNCATE:
2596 space->is_being_truncated = true;
2597 break;
2598 }
2599
2600 /* The following code must change when InnoDB supports
2601 multiple datafiles per tablespace. */
2602 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2603
2604 *node = UT_LIST_GET_FIRST(space->chain);
2605
2606 if (space->n_pending_flushes > 0 || (*node)->n_pending > 0) {
2607
2608 ut_a(!(*node)->being_extended);
2609
2610 if (count > 1000) {
2611 ib::warn() << "Trying to delete/close/truncate"
2612 " tablespace '" << space->name
2613 << "' but there are "
2614 << space->n_pending_flushes
2615 << " flushes and " << (*node)->n_pending
2616 << " pending i/o's on it.";
2617 }
2618
2619 return(count + 1);
2620 }
2621
2622 return(0);
2623 }
2624
2625 /*******************************************************************//**
2626 Check pending operations on a tablespace.
2627 @return DB_SUCCESS or error failure. */
2628 static
2629 dberr_t
fil_check_pending_operations(ulint id,fil_operation_t operation,fil_space_t ** space,char ** path)2630 fil_check_pending_operations(
2631 /*=========================*/
2632 ulint id, /*!< in: space id */
2633 fil_operation_t operation, /*!< in: File operation */
2634 fil_space_t** space, /*!< out: tablespace instance
2635 in memory */
2636 char** path) /*!< out/own: tablespace path */
2637 {
2638 ulint count = 0;
2639
2640 ut_a(!is_system_tablespace(id));
2641 ut_ad(space);
2642
2643 *space = 0;
2644
2645 mutex_enter(&fil_system->mutex);
2646 fil_space_t* sp = fil_space_get_by_id(id);
2647 if (sp) {
2648 sp->stop_new_ops = true;
2649 }
2650 mutex_exit(&fil_system->mutex);
2651
2652 /* Check for pending operations. */
2653
2654 do {
2655 mutex_enter(&fil_system->mutex);
2656
2657 sp = fil_space_get_by_id(id);
2658
2659 count = fil_check_pending_ops(sp, count);
2660
2661 mutex_exit(&fil_system->mutex);
2662
2663 if (count > 0) {
2664 os_thread_sleep(20000);
2665 }
2666
2667 } while (count > 0);
2668
2669 /* Check for pending IO. */
2670
2671 *path = 0;
2672
2673 do {
2674 mutex_enter(&fil_system->mutex);
2675
2676 sp = fil_space_get_by_id(id);
2677
2678 if (sp == NULL) {
2679 mutex_exit(&fil_system->mutex);
2680 return(DB_TABLESPACE_NOT_FOUND);
2681 }
2682
2683 fil_node_t* node;
2684
2685 count = fil_check_pending_io(operation, sp, &node, count);
2686
2687 if (count == 0) {
2688 *path = mem_strdup(node->name);
2689 }
2690
2691 mutex_exit(&fil_system->mutex);
2692
2693 if (count > 0) {
2694 os_thread_sleep(20000);
2695 }
2696
2697 } while (count > 0);
2698
2699 ut_ad(sp);
2700
2701 *space = sp;
2702 return(DB_SUCCESS);
2703 }
2704
2705 /*******************************************************************//**
2706 Closes a single-table tablespace. The tablespace must be cached in the
2707 memory cache. Free all pages used by the tablespace.
2708 @return DB_SUCCESS or error */
2709 dberr_t
fil_close_tablespace(trx_t * trx,ulint id)2710 fil_close_tablespace(
2711 /*=================*/
2712 trx_t* trx, /*!< in/out: Transaction covering the close */
2713 ulint id) /*!< in: space id */
2714 {
2715 char* path = 0;
2716 fil_space_t* space = 0;
2717 dberr_t err;
2718
2719 ut_a(!is_system_tablespace(id));
2720
2721 err = fil_check_pending_operations(id, FIL_OPERATION_CLOSE,
2722 &space, &path);
2723
2724 if (err != DB_SUCCESS) {
2725 return(err);
2726 }
2727
2728 ut_a(space);
2729 ut_a(path != 0);
2730
2731 rw_lock_x_lock(&space->latch);
2732
2733 /* Invalidate in the buffer pool all pages belonging to the
2734 tablespace. Since we have set space->stop_new_ops = true, readahead
2735 or ibuf merge can no longer read more pages of this tablespace to the
2736 buffer pool. Thus we can clean the tablespace out of the buffer pool
2737 completely and permanently. The flag stop_new_ops also prevents
2738 fil_flush() from being applied to this tablespace. */
2739
2740 buf_LRU_flush_or_remove_pages(id, BUF_REMOVE_FLUSH_WRITE, trx);
2741
2742 /* If the free is successful, the X lock will be released before
2743 the space memory data structure is freed. */
2744
2745 if (!fil_space_free(id, true)) {
2746 rw_lock_x_unlock(&space->latch);
2747 err = DB_TABLESPACE_NOT_FOUND;
2748 } else {
2749 err = DB_SUCCESS;
2750 }
2751
2752 /* If it is a delete then also delete any generated files, otherwise
2753 when we drop the database the remove directory will fail. */
2754
2755 char* cfg_name = fil_make_filepath(path, NULL, CFG, false);
2756 if (cfg_name != NULL) {
2757 os_file_delete_if_exists(innodb_data_file_key, cfg_name, NULL);
2758 ut_free(cfg_name);
2759 }
2760
2761 char* cfp_name = fil_make_filepath(path, NULL, CFP, false);
2762 if (cfp_name != NULL) {
2763 os_file_delete_if_exists(innodb_data_file_key, cfp_name, NULL);
2764 ut_free(cfp_name);
2765 }
2766
2767 ut_free(path);
2768
2769 return(err);
2770 }
2771
2772 /** Deletes an IBD tablespace, either general or single-table.
2773 The tablespace must be cached in the memory cache. This will delete the
2774 datafile, fil_space_t & fil_node_t entries from the file_system_t cache.
2775 @param[in] space_id Tablespace id
2776 @param[in] buf_remove Specify the action to take on the pages
2777 for this table in the buffer pool.
2778 @return DB_SUCCESS or error */
2779 dberr_t
fil_delete_tablespace(ulint id,buf_remove_t buf_remove)2780 fil_delete_tablespace(
2781 ulint id,
2782 buf_remove_t buf_remove)
2783 {
2784 char* path = 0;
2785 fil_space_t* space = 0;
2786
2787 ut_a(!is_system_tablespace(id));
2788
2789 dberr_t err = fil_check_pending_operations(
2790 id, FIL_OPERATION_DELETE, &space, &path);
2791
2792 if (err != DB_SUCCESS) {
2793
2794 ib::error() << "Cannot delete tablespace " << id
2795 << " because it is not found in the tablespace"
2796 " memory cache.";
2797
2798 return(err);
2799 }
2800
2801 ut_a(space);
2802 ut_a(path != 0);
2803
2804 #ifndef UNIV_HOTBACKUP
2805 /* IMPORTANT: Because we have set space::stop_new_ops there
2806 can't be any new ibuf merges, reads or flushes. We are here
2807 because node::n_pending was zero above. However, it is still
2808 possible to have pending read and write requests:
2809
2810 A read request can happen because the reader thread has
2811 gone through the ::stop_new_ops check in buf_page_init_for_read()
2812 before the flag was set and has not yet incremented ::n_pending
2813 when we checked it above.
2814
2815 A write request can be issued any time because we don't check
2816 the ::stop_new_ops flag when queueing a block for write.
2817
2818 We deal with pending write requests in the following function
2819 where we'd minimally evict all dirty pages belonging to this
2820 space from the flush_list. Note that if a block is IO-fixed
2821 we'll wait for IO to complete.
2822
2823 To deal with potential read requests, we will check the
2824 ::stop_new_ops flag in fil_io(). */
2825
2826 buf_LRU_flush_or_remove_pages(id, buf_remove, 0);
2827
2828 #endif /* !UNIV_HOTBACKUP */
2829
2830 /* If it is a delete then also delete any generated files, otherwise
2831 when we drop the database the remove directory will fail. */
2832 {
2833 #ifdef UNIV_HOTBACKUP
2834 /* When replaying the operation in MySQL Enterprise
2835 Backup, we do not try to write any log record. */
2836 #else /* UNIV_HOTBACKUP */
2837 /* Before deleting the file, write a log record about
2838 it, so that InnoDB crash recovery will expect the file
2839 to be gone. */
2840 mtr_t mtr;
2841
2842 mtr_start(&mtr);
2843 fil_op_write_log(MLOG_FILE_DELETE, id, 0, path, NULL, 0, &mtr);
2844 mtr_commit(&mtr);
2845 /* Even if we got killed shortly after deleting the
2846 tablespace file, the record must have already been
2847 written to the redo log. */
2848 log_write_up_to(mtr.commit_lsn(), true);
2849 #endif /* UNIV_HOTBACKUP */
2850
2851 char* cfg_name = fil_make_filepath(path, NULL, CFG, false);
2852 if (cfg_name != NULL) {
2853 os_file_delete_if_exists(innodb_data_file_key, cfg_name, NULL);
2854 ut_free(cfg_name);
2855 }
2856
2857 char* cfp_name = fil_make_filepath(path, NULL, CFP, false);
2858 if (cfp_name != NULL) {
2859 os_file_delete_if_exists(innodb_data_file_key, cfp_name, NULL);
2860 ut_free(cfp_name);
2861 }
2862 }
2863
2864 /* Delete the link file pointing to the ibd file we are deleting. */
2865 if (FSP_FLAGS_HAS_DATA_DIR(space->flags)) {
2866
2867 RemoteDatafile::delete_link_file(space->name);
2868
2869 } else if (FSP_FLAGS_GET_SHARED(space->flags)) {
2870
2871 RemoteDatafile::delete_link_file(base_name(path));
2872
2873 }
2874
2875 mutex_enter(&fil_system->mutex);
2876
2877 /* Double check the sanity of pending ops after reacquiring
2878 the fil_system::mutex. */
2879 if (const fil_space_t* s = fil_space_get_by_id(id)) {
2880 ut_a(s == space);
2881 ut_a(space->n_pending_ops == 0);
2882 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2883 fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
2884 ut_a(node->n_pending == 0);
2885
2886 fil_space_detach(space);
2887 mutex_exit(&fil_system->mutex);
2888
2889 log_mutex_enter();
2890
2891 if (space->max_lsn != 0) {
2892 ut_d(space->max_lsn = 0);
2893 UT_LIST_REMOVE(fil_system->named_spaces, space);
2894 }
2895
2896 log_mutex_exit();
2897 fil_space_free_low(space);
2898
2899 if (!os_file_delete(innodb_data_file_key, path)
2900 && !os_file_delete_if_exists(
2901 innodb_data_file_key, path, NULL)) {
2902
2903 /* Note: This is because we have removed the
2904 tablespace instance from the cache. */
2905
2906 err = DB_IO_ERROR;
2907 }
2908 } else {
2909 mutex_exit(&fil_system->mutex);
2910 err = DB_TABLESPACE_NOT_FOUND;
2911 }
2912
2913 ut_free(path);
2914
2915 return(err);
2916 }
2917 #ifndef UNIV_HOTBACKUP
2918 /** Truncate the tablespace to needed size.
2919 @param[in] space_id id of tablespace to truncate
2920 @param[in] size_in_pages truncate size.
2921 @return true if truncate was successful. */
2922 bool
fil_truncate_tablespace(ulint space_id,ulint size_in_pages)2923 fil_truncate_tablespace(
2924 ulint space_id,
2925 ulint size_in_pages)
2926 {
2927 /* Step-1: Prepare tablespace for truncate. This involves
2928 stopping all the new operations + IO on that tablespace
2929 and ensuring that related pages are flushed to disk. */
2930 if (fil_prepare_for_truncate(space_id) != DB_SUCCESS) {
2931 return(false);
2932 }
2933
2934 /* Step-2: Invalidate buffer pool pages belonging to the tablespace
2935 to re-create. Remove all insert buffer entries for the tablespace */
2936 buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_ALL_NO_WRITE, 0);
2937
2938 /* Step-3: Truncate the tablespace and accordingly update
2939 the fil_space_t handler that is used to access this tablespace. */
2940 mutex_enter(&fil_system->mutex);
2941 fil_space_t* space = fil_space_get_by_id(space_id);
2942
2943 /* The following code must change when InnoDB supports
2944 multiple datafiles per tablespace. */
2945 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
2946
2947 fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
2948
2949 ut_ad(node->is_open);
2950
2951 space->size = node->size = size_in_pages;
2952
2953 bool success = os_file_truncate(node->name, node->handle, 0);
2954 if (success) {
2955
2956 os_offset_t size = size_in_pages * UNIV_PAGE_SIZE;
2957
2958 success = os_file_set_size(
2959 node->name, node->handle, size, srv_read_only_mode);
2960
2961 if (success) {
2962 space->stop_new_ops = false;
2963 space->is_being_truncated = false;
2964 }
2965 }
2966
2967 mutex_exit(&fil_system->mutex);
2968
2969 return(success);
2970 }
2971
2972 /*******************************************************************//**
2973 Prepare for truncating a single-table tablespace.
2974 1) Check pending operations on a tablespace;
2975 2) Remove all insert buffer entries for the tablespace;
2976 @return DB_SUCCESS or error */
2977 dberr_t
fil_prepare_for_truncate(ulint id)2978 fil_prepare_for_truncate(
2979 /*=====================*/
2980 ulint id) /*!< in: space id */
2981 {
2982 char* path = 0;
2983 fil_space_t* space = 0;
2984
2985 ut_a(!is_system_tablespace(id));
2986
2987 dberr_t err = fil_check_pending_operations(
2988 id, FIL_OPERATION_TRUNCATE, &space, &path);
2989
2990 ut_free(path);
2991
2992 if (err == DB_TABLESPACE_NOT_FOUND) {
2993 ib::error() << "Cannot truncate tablespace " << id
2994 << " because it is not found in the tablespace"
2995 " memory cache.";
2996 }
2997
2998 return(err);
2999 }
3000
3001 /** Reinitialize the original tablespace header with the same space id
3002 for single tablespace
3003 @param[in] table table belongs to tablespace
3004 @param[in] size size in blocks
3005 @param[in] trx Transaction covering truncate */
3006 void
fil_reinit_space_header_for_table(dict_table_t * table,ulint size,trx_t * trx)3007 fil_reinit_space_header_for_table(
3008 dict_table_t* table,
3009 ulint size,
3010 trx_t* trx)
3011 {
3012 ulint id = table->space;
3013
3014 ut_a(!is_system_tablespace(id));
3015
3016 /* Invalidate in the buffer pool all pages belonging
3017 to the tablespace. The buffer pool scan may take long
3018 time to complete, therefore we release dict_sys->mutex
3019 and the dict operation lock during the scan and aquire
3020 it again after the buffer pool scan.*/
3021
3022 /* Release the lock on the indexes too. So that
3023 they won't violate the latch ordering. */
3024 dict_table_x_unlock_indexes(table);
3025 row_mysql_unlock_data_dictionary(trx);
3026 DEBUG_SYNC_C("trunc_table_index_dropped_release_dict_lock");
3027
3028 DEBUG_SYNC_C("simulate_buffer_pool_scan");
3029 buf_LRU_flush_or_remove_pages(id, BUF_REMOVE_ALL_NO_WRITE, 0);
3030
3031 row_mysql_lock_data_dictionary(trx);
3032
3033 dict_table_x_lock_indexes(table);
3034
3035 /* Remove all insert buffer entries for the tablespace */
3036 ibuf_delete_for_discarded_space(id);
3037
3038 mutex_enter(&fil_system->mutex);
3039
3040 fil_space_t* space = fil_space_get_by_id(id);
3041
3042 /* The following code must change when InnoDB supports
3043 multiple datafiles per tablespace. */
3044 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
3045
3046 fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
3047
3048 space->size = node->size = size;
3049
3050 mutex_exit(&fil_system->mutex);
3051
3052 mtr_t mtr;
3053
3054 mtr_start(&mtr);
3055 mtr.set_named_space(id);
3056
3057 fsp_header_init(id, size, &mtr);
3058
3059 mtr_commit(&mtr);
3060 }
3061
3062 #ifdef UNIV_DEBUG
3063 /** Increase redo skipped count for a tablespace.
3064 @param[in] id space id */
3065 void
fil_space_inc_redo_skipped_count(ulint id)3066 fil_space_inc_redo_skipped_count(
3067 ulint id)
3068 {
3069 fil_space_t* space;
3070
3071 mutex_enter(&fil_system->mutex);
3072
3073 space = fil_space_get_by_id(id);
3074
3075 ut_a(space != NULL);
3076
3077 space->redo_skipped_count++;
3078
3079 mutex_exit(&fil_system->mutex);
3080 }
3081
3082 /** Decrease redo skipped count for a tablespace.
3083 @param[in] id space id */
3084 void
fil_space_dec_redo_skipped_count(ulint id)3085 fil_space_dec_redo_skipped_count(
3086 ulint id)
3087 {
3088 fil_space_t* space;
3089
3090 mutex_enter(&fil_system->mutex);
3091
3092 space = fil_space_get_by_id(id);
3093
3094 ut_a(space != NULL);
3095 ut_a(space->redo_skipped_count > 0);
3096
3097 space->redo_skipped_count--;
3098
3099 mutex_exit(&fil_system->mutex);
3100 }
3101
3102 /**
3103 Check whether a single-table tablespace is redo skipped.
3104 @param[in] id space id
3105 @return true if redo skipped */
3106 bool
fil_space_is_redo_skipped(ulint id)3107 fil_space_is_redo_skipped(
3108 ulint id)
3109 {
3110 fil_space_t* space;
3111 bool is_redo_skipped;
3112
3113 mutex_enter(&fil_system->mutex);
3114
3115 space = fil_space_get_by_id(id);
3116
3117 ut_a(space != NULL);
3118
3119 is_redo_skipped = space->redo_skipped_count > 0;
3120
3121 mutex_exit(&fil_system->mutex);
3122
3123 return(is_redo_skipped);
3124 }
3125 #endif
3126
3127 /*******************************************************************//**
3128 Discards a single-table tablespace. The tablespace must be cached in the
3129 memory cache. Discarding is like deleting a tablespace, but
3130
3131 1. We do not drop the table from the data dictionary;
3132
3133 2. We remove all insert buffer entries for the tablespace immediately;
3134 in DROP TABLE they are only removed gradually in the background;
3135
3136 3. Free all the pages in use by the tablespace.
3137 @return DB_SUCCESS or error */
3138 dberr_t
fil_discard_tablespace(ulint id)3139 fil_discard_tablespace(
3140 /*===================*/
3141 ulint id) /*!< in: space id */
3142 {
3143 dberr_t err;
3144
3145 switch (err = fil_delete_tablespace(id, BUF_REMOVE_ALL_NO_WRITE)) {
3146 case DB_SUCCESS:
3147 break;
3148
3149 case DB_IO_ERROR:
3150 ib::warn() << "While deleting tablespace " << id
3151 << " in DISCARD TABLESPACE. File rename/delete"
3152 " failed: " << ut_strerr(err);
3153 break;
3154
3155 case DB_TABLESPACE_NOT_FOUND:
3156 ib::warn() << "Cannot delete tablespace " << id
3157 << " in DISCARD TABLESPACE: " << ut_strerr(err);
3158 break;
3159
3160 default:
3161 ut_error;
3162 }
3163
3164 /* Remove all insert buffer entries for the tablespace */
3165
3166 ibuf_delete_for_discarded_space(id);
3167
3168 return(err);
3169 }
3170 #endif /* !UNIV_HOTBACKUP */
3171
3172 /*******************************************************************//**
3173 Allocates and builds a file name from a path, a table or tablespace name
3174 and a suffix. The string must be freed by caller with ut_free().
3175 @param[in] path NULL or the direcory path or the full path and filename.
3176 @param[in] name NULL if path is full, or Table/Tablespace name
3177 @param[in] suffix NULL or the file extention to use.
3178 @param[in] trim_name true if the last name on the path should be trimmed.
3179 @return own: file name */
3180 char*
fil_make_filepath(const char * path,const char * name,ib_extention ext,bool trim_name)3181 fil_make_filepath(
3182 const char* path,
3183 const char* name,
3184 ib_extention ext,
3185 bool trim_name)
3186 {
3187 /* The path may contain the basename of the file, if so we do not
3188 need the name. If the path is NULL, we can use the default path,
3189 but there needs to be a name. */
3190 ut_ad(path != NULL || name != NULL);
3191
3192 /* If we are going to strip a name off the path, there better be a
3193 path and a new name to put back on. */
3194 ut_ad(!trim_name || (path != NULL && name != NULL));
3195
3196 if (path == NULL) {
3197 path = fil_path_to_mysql_datadir;
3198 }
3199
3200 ulint len = 0; /* current length */
3201 ulint path_len = strlen(path);
3202 ulint name_len = (name ? strlen(name) : 0);
3203 const char* suffix = dot_ext[ext];
3204 ulint suffix_len = strlen(suffix);
3205 ulint full_len = path_len + 1 + name_len + suffix_len + 1;
3206
3207 char* full_name = static_cast<char*>(ut_malloc_nokey(full_len));
3208 if (full_name == NULL) {
3209 return NULL;
3210 }
3211
3212 /* If the name is a relative path, do not prepend "./". */
3213 if (path[0] == '.'
3214 && (path[1] == '\0' || path[1] == OS_PATH_SEPARATOR)
3215 && name != NULL && name[0] == '.') {
3216 path = NULL;
3217 path_len = 0;
3218 }
3219
3220 if (path != NULL) {
3221 memcpy(full_name, path, path_len);
3222 len = path_len;
3223 full_name[len] = '\0';
3224 os_normalize_path(full_name);
3225 }
3226
3227 if (trim_name) {
3228 /* Find the offset of the last DIR separator and set it to
3229 null in order to strip off the old basename from this path. */
3230 char* last_dir_sep = strrchr(full_name, OS_PATH_SEPARATOR);
3231 if (last_dir_sep) {
3232 last_dir_sep[0] = '\0';
3233 len = strlen(full_name);
3234 }
3235 }
3236
3237 if (name != NULL) {
3238 if (len && full_name[len - 1] != OS_PATH_SEPARATOR) {
3239 /* Add a DIR separator */
3240 full_name[len] = OS_PATH_SEPARATOR;
3241 full_name[++len] = '\0';
3242 }
3243
3244 char* ptr = &full_name[len];
3245 memcpy(ptr, name, name_len);
3246 len += name_len;
3247 full_name[len] = '\0';
3248 os_normalize_path(ptr);
3249 }
3250
3251 /* Make sure that the specified suffix is at the end of the filepath
3252 string provided. This assumes that the suffix starts with '.'.
3253 If the first char of the suffix is found in the filepath at the same
3254 length as the suffix from the end, then we will assume that there is
3255 a previous suffix that needs to be replaced. */
3256 if (suffix != NULL) {
3257 /* Need room for the trailing null byte. */
3258 ut_ad(len < full_len);
3259
3260 if ((len > suffix_len)
3261 && (full_name[len - suffix_len] == suffix[0])) {
3262 /* Another suffix exists, make it the one requested. */
3263 memcpy(&full_name[len - suffix_len], suffix, suffix_len);
3264
3265 } else {
3266 /* No previous suffix, add it. */
3267 ut_ad(len + suffix_len < full_len);
3268 memcpy(&full_name[len], suffix, suffix_len);
3269 full_name[len + suffix_len] = '\0';
3270 }
3271 }
3272
3273 return(full_name);
3274 }
3275
3276 /** Test if a tablespace file can be renamed to a new filepath by checking
3277 if that the old filepath exists and the new filepath does not exist.
3278 @param[in] space_id tablespace id
3279 @param[in] old_path old filepath
3280 @param[in] new_path new filepath
3281 @param[in] is_discarded whether the tablespace is discarded
3282 @return innodb error code */
3283 dberr_t
fil_rename_tablespace_check(ulint space_id,const char * old_path,const char * new_path,bool is_discarded)3284 fil_rename_tablespace_check(
3285 ulint space_id,
3286 const char* old_path,
3287 const char* new_path,
3288 bool is_discarded)
3289 {
3290 bool exists = false;
3291 os_file_type_t ftype;
3292
3293 if (!is_discarded
3294 && os_file_status(old_path, &exists, &ftype)
3295 && !exists) {
3296 ib::error() << "Cannot rename '" << old_path
3297 << "' to '" << new_path
3298 << "' for space ID " << space_id
3299 << " because the source file"
3300 << " does not exist.";
3301 return(DB_TABLESPACE_NOT_FOUND);
3302 }
3303
3304 exists = false;
3305 if (!os_file_status(new_path, &exists, &ftype) || exists) {
3306 ib::error() << "Cannot rename '" << old_path
3307 << "' to '" << new_path
3308 << "' for space ID " << space_id
3309 << " because the target file exists."
3310 " Remove the target file and try again.";
3311 return(DB_TABLESPACE_EXISTS);
3312 }
3313
3314 return(DB_SUCCESS);
3315 }
3316
3317 /** Rename a single-table tablespace.
3318 The tablespace must exist in the memory cache.
3319 @param[in] id tablespace identifier
3320 @param[in] old_path old file name
3321 @param[in] new_name new table name in the
3322 databasename/tablename format
3323 @param[in] new_path_in new file name,
3324 or NULL if it is located in the normal data directory
3325 @return true if success */
3326 bool
fil_rename_tablespace(ulint id,const char * old_path,const char * new_name,const char * new_path_in)3327 fil_rename_tablespace(
3328 ulint id,
3329 const char* old_path,
3330 const char* new_name,
3331 const char* new_path_in)
3332 {
3333 bool sleep = false;
3334 bool flush = false;
3335 fil_space_t* space;
3336 fil_node_t* node;
3337 ulint count = 0;
3338 ut_a(id != 0);
3339
3340 ut_ad(strchr(new_name, '/') != NULL);
3341 retry:
3342 count++;
3343
3344 if (!(count % 1000)) {
3345 ib::warn() << "Cannot rename file " << old_path
3346 << " (space id " << id << "), retried " << count
3347 << " times."
3348 " There are either pending IOs or flushes or"
3349 " the file is being extended.";
3350 }
3351
3352 mutex_enter(&fil_system->mutex);
3353
3354 space = fil_space_get_by_id(id);
3355
3356 DBUG_EXECUTE_IF("fil_rename_tablespace_failure_1", space = NULL; );
3357
3358 if (space == NULL) {
3359 ib::error() << "Cannot find space id " << id
3360 << " in the tablespace memory cache, though the file '"
3361 << old_path
3362 << "' in a rename operation should have that id.";
3363 func_exit:
3364 mutex_exit(&fil_system->mutex);
3365 return(false);
3366 }
3367
3368 if (count > 25000) {
3369 space->stop_ios = false;
3370 goto func_exit;
3371 }
3372
3373 if (space != fil_space_get_by_name(space->name)) {
3374 ib::error() << "Cannot find " << space->name
3375 << " in tablespace memory cache";
3376 space->stop_ios = false;
3377 goto func_exit;
3378 }
3379
3380 if (fil_space_get_by_name(new_name)) {
3381 ib::error() << new_name
3382 << " is already in tablespace memory cache";
3383 space->stop_ios = false;
3384 goto func_exit;
3385 }
3386
3387 /* We temporarily close the .ibd file because we do not trust that
3388 operating systems can rename an open file. For the closing we have to
3389 wait until there are no pending i/o's or flushes on the file. */
3390
3391 space->stop_ios = true;
3392
3393 /* The following code must change when InnoDB supports
3394 multiple datafiles per tablespace. */
3395 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
3396 node = UT_LIST_GET_FIRST(space->chain);
3397
3398 if (node->n_pending > 0
3399 || node->n_pending_flushes > 0
3400 || node->being_extended) {
3401 /* There are pending i/o's or flushes or the file is
3402 currently being extended, sleep for a while and
3403 retry */
3404 sleep = true;
3405
3406 } else if (node->modification_counter > node->flush_counter) {
3407 /* Flush the space */
3408 sleep = flush = true;
3409
3410 } else if (node->is_open) {
3411 /* Close the file */
3412
3413 fil_node_close_file(node);
3414 }
3415
3416 mutex_exit(&fil_system->mutex);
3417
3418 if (sleep) {
3419 os_thread_sleep(20000);
3420
3421 if (flush) {
3422 fil_flush(id);
3423 }
3424
3425 sleep = flush = false;
3426 goto retry;
3427 }
3428
3429 ut_ad(space->stop_ios);
3430
3431 char* new_file_name = new_path_in == NULL
3432 ? fil_make_filepath(NULL, new_name, IBD, false)
3433 : mem_strdup(new_path_in);
3434 char* old_file_name = node->name;
3435 char* new_space_name = mem_strdup(new_name);
3436 char* old_space_name = space->name;
3437 ulint old_fold = ut_fold_string(old_space_name);
3438 ulint new_fold = ut_fold_string(new_space_name);
3439
3440 ut_ad(strchr(old_file_name, OS_PATH_SEPARATOR) != NULL);
3441 ut_ad(strchr(new_file_name, OS_PATH_SEPARATOR) != NULL);
3442
3443 #ifndef UNIV_HOTBACKUP
3444 if (!recv_recovery_on) {
3445 mtr_t mtr;
3446
3447 mtr.start();
3448 fil_name_write_rename(
3449 id, 0, old_file_name, new_file_name, &mtr);
3450 mtr.commit();
3451 log_mutex_enter();
3452 }
3453 #endif /* !UNIV_HOTBACKUP */
3454
3455 /* log_sys->mutex is above fil_system->mutex in the latching order */
3456 ut_ad(log_mutex_own());
3457 mutex_enter(&fil_system->mutex);
3458
3459 ut_ad(space->name == old_space_name);
3460 /* We already checked these. */
3461 ut_ad(space == fil_space_get_by_name(old_space_name));
3462 ut_ad(!fil_space_get_by_name(new_space_name));
3463 ut_ad(node->name == old_file_name);
3464
3465 bool success;
3466
3467 DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
3468 goto skip_rename; );
3469
3470 success = os_file_rename(
3471 innodb_data_file_key, old_file_name, new_file_name);
3472
3473 DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2",
3474 skip_rename: success = false; );
3475
3476 ut_ad(node->name == old_file_name);
3477
3478 if (success) {
3479 node->name = new_file_name;
3480 }
3481
3482 #ifndef UNIV_HOTBACKUP
3483 if (!recv_recovery_on) {
3484 log_mutex_exit();
3485 }
3486 #endif /* !UNIV_HOTBACKUP */
3487
3488 ut_ad(space->name == old_space_name);
3489
3490 if (success) {
3491 HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
3492 old_fold, space);
3493 space->name = new_space_name;
3494 HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
3495 new_fold, space);
3496 } else {
3497 /* Because nothing was renamed, we must free the new
3498 names, not the old ones. */
3499 old_file_name = new_file_name;
3500 old_space_name = new_space_name;
3501 }
3502
3503 ut_ad(space->stop_ios);
3504 space->stop_ios = false;
3505 mutex_exit(&fil_system->mutex);
3506
3507 ut_free(old_file_name);
3508 ut_free(old_space_name);
3509
3510 return(success);
3511 }
3512
3513 /** Create a new General or Single-Table tablespace
3514 @param[in] space_id Tablespace ID
3515 @param[in] name Tablespace name in dbname/tablename format.
3516 For general tablespaces, the 'dbname/' part may be missing.
3517 @param[in] path Path and filename of the datafile to create.
3518 @param[in] flags Tablespace flags
3519 @param[in] size Initial size of the tablespace file in pages,
3520 must be >= FIL_IBD_FILE_INITIAL_SIZE
3521 @return DB_SUCCESS or error code */
3522 dberr_t
fil_ibd_create(ulint space_id,const char * name,const char * path,ulint flags,ulint size)3523 fil_ibd_create(
3524 ulint space_id,
3525 const char* name,
3526 const char* path,
3527 ulint flags,
3528 ulint size)
3529 {
3530 pfs_os_file_t file;
3531 dberr_t err;
3532 byte* buf2;
3533 byte* page;
3534 bool success;
3535 bool is_temp = FSP_FLAGS_GET_TEMPORARY(flags);
3536 bool has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
3537 bool has_shared_space = FSP_FLAGS_GET_SHARED(flags);
3538 fil_space_t* space = NULL;
3539
3540 ut_ad(!is_system_tablespace(space_id));
3541 ut_ad(!srv_read_only_mode);
3542 ut_a(space_id < SRV_LOG_SPACE_FIRST_ID);
3543 ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
3544 ut_a(fsp_flags_is_valid(flags));
3545
3546 /* Create the subdirectories in the path, if they are
3547 not there already. */
3548 if (!has_shared_space) {
3549 err = os_file_create_subdirs_if_needed(path);
3550 if (err != DB_SUCCESS) {
3551 return(err);
3552 }
3553 }
3554
3555 file = os_file_create(
3556 innodb_data_file_key, path,
3557 OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT,
3558 OS_FILE_NORMAL,
3559 OS_DATA_FILE,
3560 srv_read_only_mode,
3561 &success);
3562
3563 if (!success) {
3564 /* The following call will print an error message */
3565 ulint error = os_file_get_last_error(true);
3566
3567 ib::error() << "Cannot create file '" << path << "'";
3568
3569 if (error == OS_FILE_ALREADY_EXISTS) {
3570 ib::error() << "The file '" << path << "'"
3571 " already exists though the"
3572 " corresponding table did not exist"
3573 " in the InnoDB data dictionary."
3574 " Have you moved InnoDB .ibd files"
3575 " around without using the SQL commands"
3576 " DISCARD TABLESPACE and IMPORT TABLESPACE,"
3577 " or did mysqld crash in the middle of"
3578 " CREATE TABLE?"
3579 " You can resolve the problem by removing"
3580 " the file '" << path
3581 << "' under the 'datadir' of MySQL.";
3582
3583 return(DB_TABLESPACE_EXISTS);
3584 }
3585
3586 if (error == OS_FILE_DISK_FULL) {
3587 return(DB_OUT_OF_FILE_SPACE);
3588 }
3589
3590 return(DB_ERROR);
3591 }
3592
3593 bool atomic_write;
3594
3595 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
3596 if (fil_fusionio_enable_atomic_write(file)) {
3597
3598 /* This is required by FusionIO HW/Firmware */
3599 int ret = posix_fallocate(file.m_file, 0, size * UNIV_PAGE_SIZE);
3600
3601 if (ret != 0) {
3602
3603 ib::error() <<
3604 "posix_fallocate(): Failed to preallocate"
3605 " data for file " << path
3606 << ", desired size "
3607 << size * UNIV_PAGE_SIZE
3608 << " Operating system error number " << ret
3609 << ". Check"
3610 " that the disk is not full or a disk quota"
3611 " exceeded. Make sure the file system supports"
3612 " this function. Some operating system error"
3613 " numbers are described at " REFMAN
3614 " operating-system-error-codes.html";
3615
3616 success = false;
3617 } else {
3618 success = true;
3619 }
3620
3621 atomic_write = true;
3622 } else {
3623 atomic_write = false;
3624
3625 success = os_file_set_size(
3626 path, file, size * UNIV_PAGE_SIZE, srv_read_only_mode);
3627 }
3628 #else
3629 atomic_write = false;
3630
3631 success = os_file_set_size(
3632 path, file, size * UNIV_PAGE_SIZE, srv_read_only_mode);
3633
3634 #endif /* !NO_FALLOCATE && UNIV_LINUX */
3635
3636 if (!success) {
3637 os_file_close(file);
3638 os_file_delete(innodb_data_file_key, path);
3639 return(DB_OUT_OF_FILE_SPACE);
3640 }
3641
3642 /* Note: We are actually punching a hole, previous contents will
3643 be lost after this call, if it succeeds. In this case the file
3644 should be full of NULs. */
3645
3646 bool punch_hole = os_is_sparse_file_supported(path, file);
3647
3648 if (punch_hole) {
3649
3650 dberr_t punch_err;
3651 punch_err = os_file_punch_hole(file.m_file, 0, size * UNIV_PAGE_SIZE);
3652 if (punch_err != DB_SUCCESS) {
3653 punch_hole = false;
3654 }
3655 }
3656
3657 /* printf("Creating tablespace %s id %lu\n", path, space_id); */
3658
3659 /* We have to write the space id to the file immediately and flush the
3660 file to disk. This is because in crash recovery we must be aware what
3661 tablespaces exist and what are their space id's, so that we can apply
3662 the log records to the right file. It may take quite a while until
3663 buffer pool flush algorithms write anything to the file and flush it to
3664 disk. If we would not write here anything, the file would be filled
3665 with zeros from the call of os_file_set_size(), until a buffer pool
3666 flush would write to it. */
3667
3668 buf2 = static_cast<byte*>(ut_malloc_nokey(3 * UNIV_PAGE_SIZE));
3669 /* Align the memory for file i/o if we might have O_DIRECT set */
3670 page = static_cast<byte*>(ut_align(buf2, UNIV_PAGE_SIZE));
3671
3672 memset(page, '\0', UNIV_PAGE_SIZE);
3673 #ifndef UNIV_HOTBACKUP
3674 /* Add the UNIV_PAGE_SIZE to the table flags and write them to the
3675 tablespace header. */
3676 flags = fsp_flags_set_page_size(flags, univ_page_size);
3677 #endif /* !UNIV_HOTBACKUP */
3678 fsp_header_init_fields(page, space_id, flags);
3679 mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
3680
3681 const page_size_t page_size(flags);
3682 IORequest request(IORequest::WRITE);
3683
3684 if (!page_size.is_compressed()) {
3685
3686 buf_flush_init_for_writing(
3687 NULL, page, NULL, 0,
3688 fsp_is_checksum_disabled(space_id));
3689
3690 err = os_file_write(
3691 request, path, file, page, 0, page_size.physical());
3692
3693 ut_ad(err != DB_IO_NO_PUNCH_HOLE);
3694
3695 } else {
3696 page_zip_des_t page_zip;
3697
3698 page_zip_set_size(&page_zip, page_size.physical());
3699 page_zip.data = page + UNIV_PAGE_SIZE;
3700 #ifdef UNIV_DEBUG
3701 page_zip.m_start =
3702 #endif /* UNIV_DEBUG */
3703 page_zip.m_end = page_zip.m_nonempty =
3704 page_zip.n_blobs = 0;
3705
3706 buf_flush_init_for_writing(
3707 NULL, page, &page_zip, 0,
3708 fsp_is_checksum_disabled(space_id));
3709
3710 err = os_file_write(
3711 request, path, file, page_zip.data, 0,
3712 page_size.physical());
3713
3714 ut_a(err != DB_IO_NO_PUNCH_HOLE);
3715
3716 punch_hole = false;
3717 }
3718
3719 ut_free(buf2);
3720
3721 if (err != DB_SUCCESS) {
3722
3723 ib::error()
3724 << "Could not write the first page to"
3725 << " tablespace '" << path << "'";
3726
3727 os_file_close(file);
3728 os_file_delete(innodb_data_file_key, path);
3729
3730 return(DB_ERROR);
3731 }
3732
3733 success = os_file_flush(file);
3734
3735 if (!success) {
3736 ib::error() << "File flush of tablespace '"
3737 << path << "' failed";
3738 os_file_close(file);
3739 os_file_delete(innodb_data_file_key, path);
3740 return(DB_ERROR);
3741 }
3742
3743 /* MEB creates isl files during copy-back, hence they
3744 should not be created during apply log operation. */
3745 #ifndef UNIV_HOTBACKUP
3746 if (has_data_dir || has_shared_space) {
3747 /* Make the ISL file if the IBD file is not
3748 in the default location. */
3749 err = RemoteDatafile::create_link_file(name, path,
3750 has_shared_space);
3751 if (err != DB_SUCCESS) {
3752 os_file_close(file);
3753 os_file_delete(innodb_data_file_key, path);
3754 return(err);
3755 }
3756 }
3757 #endif /* !UNIV_HOTBACKUP */
3758 space = fil_space_create(name, space_id, flags, is_temp
3759 ? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE);
3760
3761 DEBUG_SYNC_C("fil_ibd_created_space");
3762
3763 if (!fil_node_create_low(
3764 path, size, space, false, punch_hole, atomic_write)) {
3765
3766 err = DB_ERROR;
3767 goto error_exit_1;
3768 }
3769
3770 /* For encryption tablespace, initial encryption information. */
3771 if (FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
3772 err = fil_set_encryption(space->id,
3773 Encryption::AES,
3774 NULL,
3775 NULL);
3776 ut_ad(err == DB_SUCCESS);
3777 }
3778
3779 #ifndef UNIV_HOTBACKUP
3780 if (!is_temp) {
3781 mtr_t mtr;
3782 const fil_node_t* file = UT_LIST_GET_FIRST(space->chain);
3783
3784 mtr_start(&mtr);
3785 fil_op_write_log(
3786 MLOG_FILE_CREATE2, space_id, 0, file->name,
3787 NULL, space->flags, &mtr);
3788 fil_name_write(space, 0, file, &mtr);
3789 mtr_commit(&mtr);
3790 }
3791 #endif /* !UNIV_HOTBACKUP */
3792 err = DB_SUCCESS;
3793
3794 /* Error code is set. Cleanup the various variables used.
3795 These labels reflect the order in which variables are assigned or
3796 actions are done. */
3797 error_exit_1:
3798 if (err != DB_SUCCESS && (has_data_dir || has_shared_space)) {
3799 RemoteDatafile::delete_link_file(name);
3800 }
3801
3802 os_file_close(file);
3803 if (err != DB_SUCCESS) {
3804 os_file_delete(innodb_data_file_key, path);
3805 }
3806
3807 return(err);
3808 }
3809
3810 #ifndef UNIV_HOTBACKUP
3811 /** Try to open a single-table tablespace and optionally check that the
3812 space id in it is correct. If this does not succeed, print an error message
3813 to the .err log. This function is used to open a tablespace when we start
3814 mysqld after the dictionary has been booted, and also in IMPORT TABLESPACE.
3815
3816 NOTE that we assume this operation is used either at the database startup
3817 or under the protection of the dictionary mutex, so that two users cannot
3818 race here. This operation does not leave the file associated with the
3819 tablespace open, but closes it after we have looked at the space id in it.
3820
3821 If the validate boolean is set, we read the first page of the file and
3822 check that the space id in the file is what we expect. We assume that
3823 this function runs much faster if no check is made, since accessing the
3824 file inode probably is much faster (the OS caches them) than accessing
3825 the first page of the file. This boolean may be initially false, but if
3826 a remote tablespace is found it will be changed to true.
3827
3828 If the fix_dict boolean is set, then it is safe to use an internal SQL
3829 statement to update the dictionary tables if they are incorrect.
3830
3831 @param[in] validate true if we should validate the tablespace
3832 @param[in] fix_dict true if the dictionary is available to be fixed
3833 @param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY
3834 @param[in] id tablespace ID
3835 @param[in] flags tablespace flags
3836 @param[in] space_name tablespace name of the datafile
3837 If file-per-table, it is the table name in the databasename/tablename format
3838 @param[in] path_in expected filepath, usually read from dictionary
3839 @return DB_SUCCESS or error code */
3840 dberr_t
fil_ibd_open(bool validate,bool fix_dict,fil_type_t purpose,ulint id,ulint flags,const char * space_name,const char * path_in)3841 fil_ibd_open(
3842 bool validate,
3843 bool fix_dict,
3844 fil_type_t purpose,
3845 ulint id,
3846 ulint flags,
3847 const char* space_name,
3848 const char* path_in)
3849 {
3850 dberr_t err = DB_SUCCESS;
3851 bool dict_filepath_same_as_default = false;
3852 bool link_file_found = false;
3853 bool link_file_is_bad = false;
3854 bool is_shared = FSP_FLAGS_GET_SHARED(flags);
3855 bool is_encrypted = FSP_FLAGS_GET_ENCRYPTION(flags);
3856 Datafile df_default; /* default location */
3857 Datafile df_dict; /* dictionary location */
3858 RemoteDatafile df_remote; /* remote location */
3859 ulint tablespaces_found = 0;
3860 ulint valid_tablespaces_found = 0;
3861 bool for_import = (purpose == FIL_TYPE_IMPORT);
3862
3863 ut_ad(!fix_dict || rw_lock_own(dict_operation_lock, RW_LOCK_X));
3864
3865 ut_ad(!fix_dict || mutex_own(&dict_sys->mutex));
3866 ut_ad(!fix_dict || !srv_read_only_mode);
3867 ut_ad(!fix_dict || srv_log_file_size != 0);
3868 ut_ad(fil_type_is_data(purpose));
3869
3870 if (!fsp_flags_is_valid(flags)) {
3871 return(DB_CORRUPTION);
3872 }
3873
3874 df_default.init(space_name, flags);
3875 df_dict.init(space_name, flags);
3876 df_remote.init(space_name, flags);
3877
3878 /* Discover the correct file by looking in three possible locations
3879 while avoiding unecessary effort. */
3880
3881 if (is_shared) {
3882 /* Shared tablespaces will have a path_in since the filename
3883 is not generated from the tablespace name. Use the basename
3884 from this path_in with the default datadir as a filepath to
3885 the default location */
3886 ut_a(path_in);
3887 const char* sep = strrchr(path_in, OS_PATH_SEPARATOR);
3888 const char* basename = (sep == NULL) ? path_in : &sep[1];
3889 df_default.make_filepath(NULL, basename, IBD);
3890
3891 /* Always validate shared tablespaces. */
3892 validate = true;
3893
3894 /* Set the ISL filepath in the default location. */
3895 df_remote.set_link_filepath(path_in);
3896 } else {
3897 /* We will always look for an ibd in the default location. */
3898 df_default.make_filepath(NULL, space_name, IBD);
3899 }
3900
3901 /* Look for a filepath embedded in an ISL where the default file
3902 would be. */
3903 if (df_remote.open_read_only(true) == DB_SUCCESS) {
3904 ut_ad(df_remote.is_open());
3905
3906 /* Always validate a file opened from an ISL pointer */
3907 validate = true;
3908 ++tablespaces_found;
3909 link_file_found = true;
3910 } else if (df_remote.filepath() != NULL) {
3911 /* An ISL file was found but contained a bad filepath in it.
3912 Better validate anything we do find. */
3913 validate = true;
3914 }
3915
3916 /* Attempt to open the tablespace at the dictionary filepath. */
3917 if (path_in) {
3918 if (df_default.same_filepath_as(path_in)) {
3919 dict_filepath_same_as_default = true;
3920 } else {
3921 /* Dict path is not the default path. Always validate
3922 remote files. If default is opened, it was moved. */
3923 validate = true;
3924
3925 df_dict.set_filepath(path_in);
3926 if (df_dict.open_read_only(true) == DB_SUCCESS) {
3927 ut_ad(df_dict.is_open());
3928 ++tablespaces_found;
3929 }
3930 }
3931 }
3932
3933 /* Always look for a file at the default location. But don't log
3934 an error if the tablespace is already open in remote or dict. */
3935 ut_a(df_default.filepath());
3936 const bool strict = (tablespaces_found == 0);
3937 if (df_default.open_read_only(strict) == DB_SUCCESS) {
3938 ut_ad(df_default.is_open());
3939 ++tablespaces_found;
3940 }
3941
3942 /* Check if multiple locations point to the same file. */
3943 if (tablespaces_found > 1 && df_default.same_as(df_remote)) {
3944 /* A link file was found with the default path in it.
3945 Use the default path and delete the link file. */
3946 --tablespaces_found;
3947 df_remote.delete_link_file();
3948 df_remote.close();
3949 }
3950 if (tablespaces_found > 1 && df_default.same_as(df_dict)) {
3951 --tablespaces_found;
3952 df_dict.close();
3953 }
3954 if (tablespaces_found > 1 && df_remote.same_as(df_dict)) {
3955 --tablespaces_found;
3956 df_dict.close();
3957 }
3958
3959 bool atomic_write;
3960
3961 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
3962 if (!srv_use_doublewrite_buf && df_default.is_open()) {
3963
3964 atomic_write = fil_fusionio_enable_atomic_write(
3965 df_default.handle());
3966
3967 } else {
3968 atomic_write = false;
3969 }
3970 #else
3971 atomic_write = false;
3972 #endif /* !NO_FALLOCATE && UNIV_LINUX */
3973
3974 /* We have now checked all possible tablespace locations and
3975 have a count of how many unique files we found. If things are
3976 normal, we only found 1. */
3977 /* For encrypted tablespace, we need to check the
3978 encryption in header of first page. */
3979 if (!validate && tablespaces_found == 1 && !is_encrypted) {
3980
3981 goto skip_validate;
3982 }
3983
3984 /* Read and validate the first page of these three tablespace
3985 locations, if found. */
3986 valid_tablespaces_found +=
3987 (df_remote.validate_to_dd(id, flags, for_import)
3988 == DB_SUCCESS) ? 1 : 0;
3989
3990 valid_tablespaces_found +=
3991 (df_default.validate_to_dd(id, flags, for_import)
3992 == DB_SUCCESS) ? 1 : 0;
3993
3994 valid_tablespaces_found +=
3995 (df_dict.validate_to_dd(id, flags, for_import)
3996 == DB_SUCCESS) ? 1 : 0;
3997
3998 /* Make sense of these three possible locations.
3999 First, bail out if no tablespace files were found. */
4000 if (valid_tablespaces_found == 0) {
4001 if (!is_encrypted) {
4002 /* The following call prints an error message.
4003 For encrypted tablespace we skip print, since it should
4004 be keyring plugin issues. */
4005 os_file_get_last_error(true);
4006 ib::error() << "Could not find a valid tablespace file for `"
4007 << space_name << "`. " << TROUBLESHOOT_DATADICT_MSG;
4008 }
4009
4010 return(DB_CORRUPTION);
4011 }
4012
4013 if (!validate && !is_encrypted) {
4014 return(DB_SUCCESS);
4015 }
4016
4017 if (validate && is_encrypted && fil_space_get(id)) {
4018 return(DB_SUCCESS);
4019 }
4020
4021 /* Do not open any tablespaces if more than one tablespace with
4022 the correct space ID and flags were found. */
4023 if (tablespaces_found > 1) {
4024 ib::error() << "A tablespace for `" << space_name
4025 << "` has been found in multiple places;";
4026
4027 if (df_default.is_open()) {
4028 ib::error() << "Default location: "
4029 << df_default.filepath()
4030 << ", Space ID=" << df_default.space_id()
4031 << ", Flags=" << df_default.flags();
4032 }
4033 if (df_remote.is_open()) {
4034 ib::error() << "Remote location: "
4035 << df_remote.filepath()
4036 << ", Space ID=" << df_remote.space_id()
4037 << ", Flags=" << df_remote.flags();
4038 }
4039 if (df_dict.is_open()) {
4040 ib::error() << "Dictionary location: "
4041 << df_dict.filepath()
4042 << ", Space ID=" << df_dict.space_id()
4043 << ", Flags=" << df_dict.flags();
4044 }
4045
4046 /* Force-recovery will allow some tablespaces to be
4047 skipped by REDO if there was more than one file found.
4048 Unlike during the REDO phase of recovery, we now know
4049 if the tablespace is valid according to the dictionary,
4050 which was not available then. So if we did not force
4051 recovery and there is only one good tablespace, ignore
4052 any bad tablespaces. */
4053 if (valid_tablespaces_found > 1 || srv_force_recovery > 0) {
4054 ib::error() << "Will not open tablespace `"
4055 << space_name << "`";
4056
4057 /* If the file is not open it cannot be valid. */
4058 ut_ad(df_default.is_open() || !df_default.is_valid());
4059 ut_ad(df_dict.is_open() || !df_dict.is_valid());
4060 ut_ad(df_remote.is_open() || !df_remote.is_valid());
4061
4062 /* Having established that, this is an easy way to
4063 look for corrupted data files. */
4064 if (df_default.is_open() != df_default.is_valid()
4065 || df_dict.is_open() != df_dict.is_valid()
4066 || df_remote.is_open() != df_remote.is_valid()) {
4067 return(DB_CORRUPTION);
4068 }
4069 return(DB_ERROR);
4070 }
4071
4072 /* There is only one valid tablespace found and we did
4073 not use srv_force_recovery during REDO. Use this one
4074 tablespace and clean up invalid tablespace pointers */
4075 if (df_default.is_open() && !df_default.is_valid()) {
4076 df_default.close();
4077 tablespaces_found--;
4078 }
4079 if (df_dict.is_open() && !df_dict.is_valid()) {
4080 df_dict.close();
4081 /* Leave dict.filepath so that SYS_DATAFILES
4082 can be corrected below. */
4083 tablespaces_found--;
4084 }
4085 if (df_remote.is_open() && !df_remote.is_valid()) {
4086 df_remote.close();
4087 tablespaces_found--;
4088 link_file_is_bad = true;
4089 }
4090 }
4091
4092 /* At this point, there should be only one filepath. */
4093 ut_a(tablespaces_found == 1);
4094 ut_a(valid_tablespaces_found == 1);
4095
4096 /* Only fix the dictionary at startup when there is only one thread.
4097 Calls to dict_load_table() can be done while holding other latches. */
4098 if (!fix_dict) {
4099 goto skip_validate;
4100 }
4101
4102 /* We may need to update what is stored in SYS_DATAFILES or
4103 SYS_TABLESPACES or adjust the link file. Since a failure to
4104 update SYS_TABLESPACES or SYS_DATAFILES does not prevent opening
4105 and using the tablespace either this time or the next, we do not
4106 check the return code or fail to open the tablespace. But if it
4107 fails, dict_update_filepath() will issue a warning to the log. */
4108 if (df_dict.filepath()) {
4109 ut_ad(path_in != NULL);
4110 ut_ad(df_dict.same_filepath_as(path_in));
4111
4112 if (df_remote.is_open()) {
4113 if (!df_remote.same_filepath_as(path_in)) {
4114 dict_update_filepath(id, df_remote.filepath());
4115 }
4116
4117 } else if (df_default.is_open()) {
4118 ut_ad(!dict_filepath_same_as_default);
4119 dict_update_filepath(id, df_default.filepath());
4120 if (link_file_is_bad) {
4121 RemoteDatafile::delete_link_file(space_name);
4122 }
4123
4124 } else if (!is_shared
4125 && (!link_file_found || link_file_is_bad)) {
4126 ut_ad(df_dict.is_open());
4127 /* Fix the link file if we got our filepath
4128 from the dictionary but a link file did not
4129 exist or it did not point to a valid file. */
4130 RemoteDatafile::delete_link_file(space_name);
4131 RemoteDatafile::create_link_file(
4132 space_name, df_dict.filepath());
4133 }
4134
4135 } else if (df_remote.is_open()) {
4136 if (dict_filepath_same_as_default) {
4137 dict_update_filepath(id, df_remote.filepath());
4138
4139 } else if (path_in == NULL) {
4140 /* SYS_DATAFILES record for this space ID
4141 was not found. */
4142 dict_replace_tablespace_and_filepath(
4143 id, space_name, df_remote.filepath(), flags);
4144 }
4145
4146 } else if (df_default.is_open()) {
4147 /* We opened the tablespace in the default location.
4148 SYS_DATAFILES.PATH needs to be updated if it is different
4149 from this default path or if the SYS_DATAFILES.PATH was not
4150 supplied and it should have been. Also update the dictionary
4151 if we found an ISL file (since !df_remote.is_open). Since
4152 path_in is not suppled for file-per-table, we must assume
4153 that it matched the ISL. */
4154 if ((path_in != NULL && !dict_filepath_same_as_default)
4155 || (path_in == NULL
4156 && (DICT_TF_HAS_DATA_DIR(flags)
4157 || DICT_TF_HAS_SHARED_SPACE(flags)))
4158 || df_remote.filepath() != NULL) {
4159 dict_replace_tablespace_and_filepath(
4160 id, space_name, df_default.filepath(), flags);
4161 }
4162 }
4163
4164 skip_validate:
4165 if (err == DB_SUCCESS) {
4166 fil_space_t* space = fil_space_create(
4167 space_name, id, flags, purpose);
4168
4169 /* We do not measure the size of the file, that is why
4170 we pass the 0 below */
4171
4172 if (fil_node_create_low(
4173 df_remote.is_open() ? df_remote.filepath() :
4174 df_dict.is_open() ? df_dict.filepath() :
4175 df_default.filepath(), 0, space, false,
4176 true, atomic_write) == NULL) {
4177
4178 err = DB_ERROR;
4179 }
4180
4181 /* For encryption tablespace, initialize encryption
4182 information.*/
4183 if (err == DB_SUCCESS && is_encrypted && !for_import) {
4184 Datafile& df_current = df_remote.is_open() ?
4185 df_remote: df_dict.is_open() ?
4186 df_dict : df_default;
4187
4188 byte* key = df_current.m_encryption_key;
4189 byte* iv = df_current.m_encryption_iv;
4190 ut_ad(key && iv);
4191
4192 err = fil_set_encryption(space->id, Encryption::AES,
4193 key, iv);
4194 ut_ad(err == DB_SUCCESS);
4195 }
4196 }
4197
4198 return(err);
4199 }
4200 #endif /* !UNIV_HOTBACKUP */
4201
4202 #ifdef UNIV_HOTBACKUP
4203 /*******************************************************************//**
4204 Allocates a file name for an old version of a single-table tablespace.
4205 The string must be freed by caller with ut_free()!
4206 @return own: file name */
4207 static
4208 char*
fil_make_ibbackup_old_name(const char * name)4209 fil_make_ibbackup_old_name(
4210 /*=======================*/
4211 const char* name) /*!< in: original file name */
4212 {
4213 static const char suffix[] = "_ibbackup_old_vers_";
4214 char* path;
4215 ulint len = strlen(name);
4216
4217 path = static_cast<char*>(ut_malloc_nokey(len + 15 + sizeof(suffix)));
4218
4219 memcpy(path, name, len);
4220 memcpy(path + len, suffix, sizeof(suffix) - 1);
4221 ut_sprintf_timestamp_without_extra_chars(
4222 path + len + sizeof(suffix) - 1);
4223 return(path);
4224 }
4225 #endif /* UNIV_HOTBACKUP */
4226
4227 /** Looks for a pre-existing fil_space_t with the given tablespace ID
4228 and, if found, returns the name and filepath in newly allocated buffers
4229 that the caller must free.
4230 @param[in] space_id The tablespace ID to search for.
4231 @param[out] name Name of the tablespace found.
4232 @param[out] filepath The filepath of the first datafile for the
4233 tablespace.
4234 @return true if tablespace is found, false if not. */
4235 bool
fil_space_read_name_and_filepath(ulint space_id,char ** name,char ** filepath)4236 fil_space_read_name_and_filepath(
4237 ulint space_id,
4238 char** name,
4239 char** filepath)
4240 {
4241 bool success = false;
4242 *name = NULL;
4243 *filepath = NULL;
4244
4245 mutex_enter(&fil_system->mutex);
4246
4247 fil_space_t* space = fil_space_get_by_id(space_id);
4248
4249 if (space != NULL) {
4250 *name = mem_strdup(space->name);
4251
4252 fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
4253 *filepath = mem_strdup(node->name);
4254
4255 success = true;
4256 }
4257
4258 mutex_exit(&fil_system->mutex);
4259
4260 return(success);
4261 }
4262
4263 /** Convert a file name to a tablespace name.
4264 @param[in] filename directory/databasename/tablename.ibd
4265 @return database/tablename string, to be freed with ut_free() */
4266 char*
fil_path_to_space_name(const char * filename)4267 fil_path_to_space_name(
4268 const char* filename)
4269 {
4270 /* Strip the file name prefix and suffix, leaving
4271 only databasename/tablename. */
4272 ulint filename_len = strlen(filename);
4273 const char* end = filename + filename_len;
4274 #ifdef HAVE_MEMRCHR
4275 const char* tablename = 1 + static_cast<const char*>(
4276 memrchr(filename, OS_PATH_SEPARATOR,
4277 filename_len));
4278 const char* dbname = 1 + static_cast<const char*>(
4279 memrchr(filename, OS_PATH_SEPARATOR,
4280 tablename - filename - 1));
4281 #else /* HAVE_MEMRCHR */
4282 const char* tablename = filename;
4283 const char* dbname = NULL;
4284
4285 while (const char* t = static_cast<const char*>(
4286 memchr(tablename, OS_PATH_SEPARATOR,
4287 end - tablename))) {
4288 dbname = tablename;
4289 tablename = t + 1;
4290 }
4291 #endif /* HAVE_MEMRCHR */
4292
4293 ut_ad(dbname != NULL);
4294 ut_ad(tablename > dbname);
4295 ut_ad(tablename < end);
4296 ut_ad(end - tablename > 4);
4297 ut_ad(memcmp(end - 4, DOT_IBD, 4) == 0);
4298
4299 char* name = mem_strdupl(dbname, end - dbname - 4);
4300
4301 ut_ad(name[tablename - dbname - 1] == OS_PATH_SEPARATOR);
4302 #if OS_PATH_SEPARATOR != '/'
4303 /* space->name uses '/', not OS_PATH_SEPARATOR. */
4304 name[tablename - dbname - 1] = '/';
4305 #endif
4306
4307 return(name);
4308 }
4309
4310 /** Discover the correct IBD file to open given a remote or missing
4311 filepath from the REDO log. MEB and administrators can move a crashed
4312 database to another location on the same machine and try to recover it.
4313 Remote IBD files might be moved as well to the new location.
4314 The problem with this is that the REDO log contains the old location
4315 which may be still accessible. During recovery, if files are found in
4316 both locations, we can chose on based on these priorities;
4317 1. Default location
4318 2. ISL location
4319 3. REDO location
4320 @param[in] space_id tablespace ID
4321 @param[in] df Datafile object with path from redo
4322 @return true if a valid datafile was found, false if not */
4323 bool
fil_ibd_discover(ulint space_id,Datafile & df)4324 fil_ibd_discover(
4325 ulint space_id,
4326 Datafile& df)
4327 {
4328 Datafile df_def_gen; /* default general datafile */
4329 Datafile df_def_per; /* default file-per-table datafile */
4330 RemoteDatafile df_rem_gen; /* remote general datafile*/
4331 RemoteDatafile df_rem_per; /* remote file-per-table datafile */
4332
4333 /* Look for the datafile in the default location. If it is
4334 a general tablespace, it will be in the datadir. */
4335 const char* filename = df.filepath();
4336 const char* basename = base_name(filename);
4337 df_def_gen.init(basename, 0);
4338 df_def_gen.make_filepath(NULL, basename, IBD);
4339 if (df_def_gen.open_read_only(false) == DB_SUCCESS
4340 && df_def_gen.validate_for_recovery() == DB_SUCCESS
4341 && df_def_gen.space_id() == space_id) {
4342 df.set_filepath(df_def_gen.filepath());
4343 df.open_read_only(false);
4344 return(true);
4345 }
4346
4347 /* If this datafile is file-per-table it will have a schema dir. */
4348 ulint sep_found = 0;
4349 const char* db = basename;
4350 for (; db > filename && sep_found < 2; db--) {
4351 if (db[0] == OS_PATH_SEPARATOR) {
4352 sep_found++;
4353 }
4354 }
4355 if (sep_found == 2) {
4356 db += 2;
4357 df_def_per.init(db, 0);
4358 df_def_per.make_filepath(NULL, db, IBD);
4359 if (df_def_per.open_read_only(false) == DB_SUCCESS
4360 && df_def_per.validate_for_recovery() == DB_SUCCESS
4361 && df_def_per.space_id() == space_id) {
4362 df.set_filepath(df_def_per.filepath());
4363 df.open_read_only(false);
4364 return(true);
4365 }
4366 }
4367
4368 /* Did not find a general or file-per-table datafile in the
4369 default location. Look for a remote general tablespace. */
4370 df_rem_gen.set_name(basename);
4371 if (df_rem_gen.open_link_file() == DB_SUCCESS) {
4372
4373 /* An ISL file was found with contents. */
4374 if (df_rem_gen.open_read_only(false) != DB_SUCCESS
4375 || df_rem_gen.validate_for_recovery() != DB_SUCCESS) {
4376
4377 /* Assume that this ISL file is intended to be used.
4378 Do not continue looking for another if this file
4379 cannot be opened or is not a valid IBD file. */
4380 ib::error() << "ISL file '"
4381 << df_rem_gen.link_filepath()
4382 << "' was found but the linked file '"
4383 << df_rem_gen.filepath()
4384 << "' could not be opened or is not correct.";
4385 return(false);
4386 }
4387
4388 /* Use this file if it has the space_id from the MLOG
4389 record. */
4390 if (df_rem_gen.space_id() == space_id) {
4391 df.set_filepath(df_rem_gen.filepath());
4392 df.open_read_only(false);
4393 return(true);
4394 }
4395
4396 /* Since old MLOG records can use the same basename in
4397 multiple CREATE/DROP sequences, this ISL file could be
4398 pointing to a later version of this basename.ibd file
4399 which has a different space_id. Keep looking. */
4400 }
4401
4402 /* Look for a remote file-per-table tablespace. */
4403 if (sep_found == 2) {
4404 df_rem_per.set_name(db);
4405 if (df_rem_per.open_link_file() == DB_SUCCESS) {
4406
4407 /* An ISL file was found with contents. */
4408 if (df_rem_per.open_read_only(false) != DB_SUCCESS
4409 || df_rem_per.validate_for_recovery()
4410 != DB_SUCCESS) {
4411
4412 /* Assume that this ISL file is intended to
4413 be used. Do not continue looking for another
4414 if this file cannot be opened or is not
4415 a valid IBD file. */
4416 ib::error() << "ISL file '"
4417 << df_rem_per.link_filepath()
4418 << "' was found but the linked file '"
4419 << df_rem_per.filepath()
4420 << "' could not be opened or is"
4421 " not correct.";
4422 return(false);
4423 }
4424
4425 /* Use this file if it has the space_id from the
4426 MLOG record. */
4427 if (df_rem_per.space_id() == space_id) {
4428 df.set_filepath(df_rem_per.filepath());
4429 df.open_read_only(false);
4430 return(true);
4431 }
4432
4433 /* Since old MLOG records can use the same basename
4434 in multiple CREATE/DROP TABLE sequences, this ISL
4435 file could be pointing to a later version of this
4436 basename.ibd file which has a different space_id.
4437 Keep looking. */
4438 }
4439 }
4440
4441 /* No ISL files were found in the default location. Use the location
4442 given in the redo log. */
4443 if (df.open_read_only(false) == DB_SUCCESS
4444 && df.validate_for_recovery() == DB_SUCCESS
4445 && df.space_id() == space_id) {
4446 return(true);
4447 }
4448
4449 /* A datafile was not discovered for the filename given. */
4450 return(false);
4451 }
4452
4453 /** Open an ibd tablespace and add it to the InnoDB data structures.
4454 This is similar to fil_ibd_open() except that it is used while processing
4455 the REDO log, so the data dictionary is not available and very little
4456 validation is done. The tablespace name is extracred from the
4457 dbname/tablename.ibd portion of the filename, which assumes that the file
4458 is a file-per-table tablespace. Any name will do for now. General
4459 tablespace names will be read from the dictionary after it has been
4460 recovered. The tablespace flags are read at this time from the first page
4461 of the file in validate_for_recovery().
4462 @param[in] space_id tablespace ID
4463 @param[in] filename path/to/databasename/tablename.ibd
4464 @param[out] space the tablespace, or NULL on error
4465 @return status of the operation */
4466 enum fil_load_status
fil_ibd_load(ulint space_id,const char * filename,fil_space_t * & space)4467 fil_ibd_load(
4468 ulint space_id,
4469 const char* filename,
4470 fil_space_t*& space)
4471 {
4472 /* If the a space is already in the file system cache with this
4473 space ID, then there is nothing to do. */
4474 mutex_enter(&fil_system->mutex);
4475 space = fil_space_get_by_id(space_id);
4476 mutex_exit(&fil_system->mutex);
4477
4478 if (space != NULL) {
4479 /* Compare the filename we are trying to open with the
4480 filename from the first node of the tablespace we opened
4481 previously. Fail if it is different. */
4482 fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
4483
4484 if (0 != strcmp(innobase_basename(filename),
4485 innobase_basename(node->name))) {
4486 #ifdef UNIV_HOTBACKUP
4487 ib::trace()
4488 #else
4489 ib::info()
4490 #endif /* UNIV_HOTBACKUP */
4491 << "Ignoring data file '" << filename
4492 << "' with space ID " << space->id
4493 << ". Another data file called " << node->name
4494 << " exists with the same space ID.";
4495
4496 space = NULL;
4497 return(FIL_LOAD_ID_CHANGED);
4498 }
4499 return(FIL_LOAD_OK);
4500 }
4501
4502 /* If the filepath in the redo log is a default location in or
4503 under the datadir, then just try to open it there. */
4504 Datafile file;
4505 file.set_filepath(filename);
4506
4507 Folder folder(filename, dirname_length(filename));
4508 if (folder_mysql_datadir >= folder) {
4509 file.open_read_only(false);
4510 }
4511
4512 if (!file.is_open()) {
4513 /* The file has been moved or it is a remote datafile. */
4514 if (!fil_ibd_discover(space_id, file)
4515 || !file.is_open()) {
4516 return(FIL_LOAD_NOT_FOUND);
4517 }
4518 }
4519
4520 os_offset_t size;
4521
4522 /* Read and validate the first page of the tablespace.
4523 Assign a tablespace name based on the tablespace type. */
4524 switch (file.validate_for_recovery()) {
4525 os_offset_t minimum_size;
4526 case DB_SUCCESS:
4527 if (file.space_id() != space_id) {
4528 #ifdef UNIV_HOTBACKUP
4529 ib::trace()
4530 #else /* !UNIV_HOTBACKUP */
4531 ib::info()
4532 #endif /* UNIV_HOTBACKUP */
4533 << "Ignoring data file '"
4534 << file.filepath()
4535 << "' with space ID " << file.space_id()
4536 << ", since the redo log references "
4537 << file.filepath() << " with space ID "
4538 << space_id << ".";
4539 return(FIL_LOAD_ID_CHANGED);
4540 }
4541
4542 /* Get and test the file size. */
4543 size = os_file_get_size(file.handle());
4544
4545 /* Every .ibd file is created >= 4 pages in size.
4546 Smaller files cannot be OK. */
4547 minimum_size = FIL_IBD_FILE_INITIAL_SIZE * UNIV_PAGE_SIZE;
4548
4549 if (size == static_cast<os_offset_t>(-1)) {
4550 /* The following call prints an error message */
4551 os_file_get_last_error(true);
4552
4553 ib::error() << "Could not measure the size of"
4554 " single-table tablespace file '"
4555 << file.filepath() << "'";
4556
4557 } else if (size < minimum_size) {
4558 #ifndef UNIV_HOTBACKUP
4559 ib::error() << "The size of tablespace file '"
4560 << file.filepath() << "' is only " << size
4561 << ", should be at least " << minimum_size
4562 << "!";
4563 #else
4564 /* In MEB, we work around this error. */
4565 file.set_space_id(ULINT_UNDEFINED);
4566 file.set_flags(0);
4567 break;
4568 #endif /* !UNIV_HOTBACKUP */
4569 } else {
4570 /* Everything is fine so far. */
4571 break;
4572 }
4573
4574 /* Fall through to error handling */
4575
4576 case DB_TABLESPACE_EXISTS:
4577 #ifdef UNIV_HOTBACKUP
4578 if (file.flags() == ~(ulint)0) {
4579 return FIL_LOAD_OK;
4580 }
4581 #endif /* UNIV_HOTBACKUP */
4582
4583 return(FIL_LOAD_INVALID);
4584
4585 default:
4586 return(FIL_LOAD_NOT_FOUND);
4587 }
4588
4589 ut_ad(space == NULL);
4590
4591 #ifdef UNIV_HOTBACKUP
4592 if (file.space_id() == ULINT_UNDEFINED || file.space_id() == 0) {
4593 char* new_path;
4594
4595 ib::info() << "Renaming tablespace file '" << file.filepath()
4596 << "' with space ID " << file.space_id() << " to "
4597 << file.name() << "_ibbackup_old_vers_<timestamp>"
4598 " because its size " << size() << " is too small"
4599 " (< 4 pages 16 kB each), or the space id in the"
4600 " file header is not sensible. This can happen in"
4601 " an mysqlbackup run, and is not dangerous.";
4602 file.close();
4603
4604 new_path = fil_make_ibbackup_old_name(file.filepath());
4605
4606 bool success = os_file_rename(
4607 innodb_data_file_key, file.filepath(), new_path);
4608
4609 ut_a(success);
4610
4611 ut_free(new_path);
4612
4613 return(FIL_LOAD_ID_CHANGED);
4614 }
4615
4616 /* A backup may contain the same space several times, if the space got
4617 renamed at a sensitive time. Since it is enough to have one version of
4618 the space, we rename the file if a space with the same space id
4619 already exists in the tablespace memory cache. We rather rename the
4620 file than delete it, because if there is a bug, we do not want to
4621 destroy valuable data. */
4622
4623 mutex_enter(&fil_system->mutex);
4624 space = fil_space_get_by_id(space_id);
4625 mutex_exit(&fil_system->mutex);
4626
4627 if (space != NULL) {
4628 ib::info() << "Renaming data file '" << file.filepath()
4629 << "' with space ID " << space_id << " to "
4630 << file.name()
4631 << "_ibbackup_old_vers_<timestamp> because space "
4632 << space->name << " with the same id was scanned"
4633 " earlier. This can happen if you have renamed tables"
4634 " during an mysqlbackup run.";
4635 file.close();
4636
4637 char* new_path = fil_make_ibbackup_old_name(file.filepath());
4638
4639 bool success = os_file_rename(
4640 innodb_data_file_key, file.filepath(), new_path);
4641
4642 ut_a(success);
4643
4644 ut_free(new_path);
4645 return(FIL_LOAD_OK);
4646 }
4647 #endif /* UNIV_HOTBACKUP */
4648
4649 bool is_temp = FSP_FLAGS_GET_TEMPORARY(file.flags());
4650 space = fil_space_create(
4651 file.name(), space_id, file.flags(),
4652 is_temp ? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE);
4653
4654 if (space == NULL) {
4655 return(FIL_LOAD_INVALID);
4656 }
4657
4658 ut_ad(space->id == file.space_id());
4659 ut_ad(space->id == space_id);
4660
4661 /* We do not use the size information we have about the file, because
4662 the rounding formula for extents and pages is somewhat complex; we
4663 let fil_node_open() do that task. */
4664
4665 if (!fil_node_create_low(file.filepath(), 0, space,
4666 false, true, false)) {
4667 ut_error;
4668 }
4669
4670 /* For encryption tablespace, initial encryption information. */
4671 if (FSP_FLAGS_GET_ENCRYPTION(space->flags)
4672 && file.m_encryption_key != NULL) {
4673 dberr_t err = fil_set_encryption(space->id,
4674 Encryption::AES,
4675 file.m_encryption_key,
4676 file.m_encryption_iv);
4677 if (err != DB_SUCCESS) {
4678 ib::error() << "Can't set encryption information for"
4679 " tablespace " << space->name << "!";
4680 }
4681 }
4682
4683
4684 return(FIL_LOAD_OK);
4685 }
4686
4687 /***********************************************************************//**
4688 A fault-tolerant function that tries to read the next file name in the
4689 directory. We retry 100 times if os_file_readdir_next_file() returns -1. The
4690 idea is to read as much good data as we can and jump over bad data.
4691 @return 0 if ok, -1 if error even after the retries, 1 if at the end
4692 of the directory */
4693 int
fil_file_readdir_next_file(dberr_t * err,const char * dirname,os_file_dir_t dir,os_file_stat_t * info)4694 fil_file_readdir_next_file(
4695 /*=======================*/
4696 dberr_t* err, /*!< out: this is set to DB_ERROR if an error
4697 was encountered, otherwise not changed */
4698 const char* dirname,/*!< in: directory name or path */
4699 os_file_dir_t dir, /*!< in: directory stream */
4700 os_file_stat_t* info) /*!< in/out: buffer where the
4701 info is returned */
4702 {
4703 for (ulint i = 0; i < 100; i++) {
4704 int ret = os_file_readdir_next_file(dirname, dir, info);
4705
4706 if (ret != -1) {
4707
4708 return(ret);
4709 }
4710
4711 ib::error() << "os_file_readdir_next_file() returned -1 in"
4712 " directory " << dirname
4713 << ", crash recovery may have failed"
4714 " for some .ibd files!";
4715
4716 *err = DB_ERROR;
4717 }
4718
4719 return(-1);
4720 }
4721
4722 /*******************************************************************//**
4723 Report that a tablespace for a table was not found. */
4724 static
4725 void
fil_report_missing_tablespace(const char * name,ulint space_id)4726 fil_report_missing_tablespace(
4727 /*===========================*/
4728 const char* name, /*!< in: table name */
4729 ulint space_id) /*!< in: table's space id */
4730 {
4731 ib::error() << "Table " << name
4732 << " in the InnoDB data dictionary has tablespace id "
4733 << space_id << ","
4734 " but tablespace with that id or name does not exist. Have"
4735 " you deleted or moved .ibd files? This may also be a table"
4736 " created with CREATE TEMPORARY TABLE whose .ibd and .frm"
4737 " files MySQL automatically removed, but the table still"
4738 " exists in the InnoDB internal data dictionary.";
4739 }
4740
4741 #ifndef UNIV_HOTBACKUP
4742 /** Returns true if a matching tablespace exists in the InnoDB tablespace
4743 memory cache. Note that if we have not done a crash recovery at the database
4744 startup, there may be many tablespaces which are not yet in the memory cache.
4745 @param[in] id Tablespace ID
4746 @param[in] name Tablespace name used in fil_space_create().
4747 @param[in] print_error_if_does_not_exist
4748 Print detailed error information to the
4749 error log if a matching tablespace is not found from memory.
4750 @param[in] adjust_space Whether to adjust space id on mismatch
4751 @param[in] heap Heap memory
4752 @param[in] table_id table id
4753 @return true if a matching tablespace exists in the memory cache */
4754 bool
fil_space_for_table_exists_in_mem(ulint id,const char * name,bool print_error_if_does_not_exist,bool adjust_space,mem_heap_t * heap,table_id_t table_id)4755 fil_space_for_table_exists_in_mem(
4756 ulint id,
4757 const char* name,
4758 bool print_error_if_does_not_exist,
4759 bool adjust_space,
4760 mem_heap_t* heap,
4761 table_id_t table_id)
4762 {
4763 fil_space_t* fnamespace = NULL;
4764 fil_space_t* space;
4765
4766 ut_ad(fil_system);
4767
4768 mutex_enter(&fil_system->mutex);
4769
4770 /* Look if there is a space with the same id */
4771
4772 space = fil_space_get_by_id(id);
4773
4774 if (space != NULL
4775 && FSP_FLAGS_GET_SHARED(space->flags)
4776 && adjust_space
4777 && srv_sys_tablespaces_open
4778 && 0 == strncmp(space->name, general_space_name,
4779 strlen(general_space_name))) {
4780 /* This name was assigned during recovery in fil_ibd_load().
4781 This general tablespace was opened from an MLOG_FILE_NAME log
4782 entry where the tablespace name does not exist. Replace the
4783 temporary name with this name and return this space. */
4784 HASH_DELETE(fil_space_t, name_hash, fil_system->name_hash,
4785 ut_fold_string(space->name), space);
4786 ut_free(space->name);
4787 space->name = mem_strdup(name);
4788 HASH_INSERT(fil_space_t, name_hash, fil_system->name_hash,
4789 ut_fold_string(space->name), space);
4790
4791 mutex_exit(&fil_system->mutex);
4792
4793 return(true);
4794 }
4795
4796 if (space != NULL) {
4797 if (FSP_FLAGS_GET_SHARED(space->flags)
4798 && !srv_sys_tablespaces_open) {
4799
4800 /* No need to check the name */
4801 mutex_exit(&fil_system->mutex);
4802 return(true);
4803 }
4804
4805 /* If this space has the expected name, use it. */
4806 fnamespace = fil_space_get_by_name(name);
4807 if (space == fnamespace) {
4808 /* Found */
4809
4810 mutex_exit(&fil_system->mutex);
4811
4812 return(true);
4813 }
4814 }
4815
4816 /* Info from "fnamespace" comes from the ibd file itself, it can
4817 be different from data obtained from System tables since file
4818 operations are not transactional. If adjust_space is set, and the
4819 mismatching space are between a user table and its temp table, we
4820 shall adjust the ibd file name according to system table info */
4821 if (adjust_space
4822 && space != NULL
4823 && row_is_mysql_tmp_table_name(space->name)
4824 && !row_is_mysql_tmp_table_name(name)) {
4825
4826 mutex_exit(&fil_system->mutex);
4827
4828 DBUG_EXECUTE_IF("ib_crash_before_adjust_fil_space",
4829 DBUG_SUICIDE(););
4830
4831 if (fnamespace) {
4832 const char* tmp_name;
4833
4834 tmp_name = dict_mem_create_temporary_tablename(
4835 heap, name, table_id);
4836
4837 fil_rename_tablespace(
4838 fnamespace->id,
4839 UT_LIST_GET_FIRST(fnamespace->chain)->name,
4840 tmp_name, NULL);
4841 }
4842
4843 DBUG_EXECUTE_IF("ib_crash_after_adjust_one_fil_space",
4844 DBUG_SUICIDE(););
4845
4846 fil_rename_tablespace(
4847 id, UT_LIST_GET_FIRST(space->chain)->name,
4848 name, NULL);
4849
4850 DBUG_EXECUTE_IF("ib_crash_after_adjust_fil_space",
4851 DBUG_SUICIDE(););
4852
4853 mutex_enter(&fil_system->mutex);
4854 fnamespace = fil_space_get_by_name(name);
4855 ut_ad(space == fnamespace);
4856 mutex_exit(&fil_system->mutex);
4857
4858 return(true);
4859 }
4860
4861 if (!print_error_if_does_not_exist) {
4862
4863 mutex_exit(&fil_system->mutex);
4864
4865 return(false);
4866 }
4867
4868 if (space == NULL) {
4869 if (fnamespace == NULL) {
4870 if (print_error_if_does_not_exist) {
4871 fil_report_missing_tablespace(name, id);
4872 }
4873 } else {
4874 ib::error() << "Table " << name << " in InnoDB data"
4875 " dictionary has tablespace id " << id
4876 << ", but a tablespace with that id does not"
4877 " exist. There is a tablespace of name "
4878 << fnamespace->name << " and id "
4879 << fnamespace->id << ", though. Have you"
4880 " deleted or moved .ibd files?";
4881 }
4882 error_exit:
4883 ib::warn() << TROUBLESHOOT_DATADICT_MSG;
4884
4885 mutex_exit(&fil_system->mutex);
4886
4887 return(false);
4888 }
4889
4890 if (0 != strcmp(space->name, name)) {
4891
4892 ib::error() << "Table " << name << " in InnoDB data dictionary"
4893 " has tablespace id " << id << ", but the tablespace"
4894 " with that id has name " << space->name << "."
4895 " Have you deleted or moved .ibd files?";
4896
4897 if (fnamespace != NULL) {
4898 ib::error() << "There is a tablespace with the right"
4899 " name: " << fnamespace->name << ", but its id"
4900 " is " << fnamespace->id << ".";
4901 }
4902
4903 goto error_exit;
4904 }
4905
4906 mutex_exit(&fil_system->mutex);
4907
4908 return(false);
4909 }
4910 #endif /* !UNIV_HOTBACKUP */
4911 /** Return the space ID based on the tablespace name.
4912 The tablespace must be found in the tablespace memory cache.
4913 This call is made from external to this module, so the mutex is not owned.
4914 @param[in] tablespace Tablespace name
4915 @return space ID if tablespace found, ULINT_UNDEFINED if space not. */
4916 ulint
fil_space_get_id_by_name(const char * tablespace)4917 fil_space_get_id_by_name(
4918 const char* tablespace)
4919 {
4920 mutex_enter(&fil_system->mutex);
4921
4922 /* Search for a space with the same name. */
4923 fil_space_t* space = fil_space_get_by_name(tablespace);
4924 ulint id = (space == NULL) ? ULINT_UNDEFINED : space->id;
4925
4926 mutex_exit(&fil_system->mutex);
4927
4928 return(id);
4929 }
4930
4931 /**
4932 Fill the pages with NULs
4933 @param[in] node File node
4934 @param[in] page_size physical page size
4935 @param[in] start Offset from the start of the file in bytes
4936 @param[in] len Length in bytes
4937 @param[in] read_only_mode
4938 if true, then read only mode checks are enforced.
4939 @return DB_SUCCESS or error code */
4940 static
4941 dberr_t
fil_write_zeros(const fil_node_t * node,ulint page_size,os_offset_t start,ulint len,bool read_only_mode)4942 fil_write_zeros(
4943 const fil_node_t* node,
4944 ulint page_size,
4945 os_offset_t start,
4946 ulint len,
4947 bool read_only_mode)
4948 {
4949 ut_a(len > 0);
4950
4951 /* Extend at most 1M at a time */
4952 ulint n_bytes = ut_min(static_cast<ulint>(1024 * 1024), len);
4953 byte* ptr = reinterpret_cast<byte*>(ut_zalloc_nokey(n_bytes
4954 + page_size));
4955 byte* buf = reinterpret_cast<byte*>(ut_align(ptr, page_size));
4956
4957 os_offset_t offset = start;
4958 dberr_t err = DB_SUCCESS;
4959 const os_offset_t end = start + len;
4960 IORequest request(IORequest::WRITE);
4961
4962 while (offset < end) {
4963
4964 #ifdef UNIV_HOTBACKUP
4965 err = os_file_write(
4966 request, node->name, node->handle, buf, offset,
4967 n_bytes);
4968 #else
4969 err = os_aio_func(
4970 request, OS_AIO_SYNC, node->name,
4971 node->handle, buf, offset, n_bytes, read_only_mode,
4972 NULL, NULL);
4973 #endif /* UNIV_HOTBACKUP */
4974
4975 if (err != DB_SUCCESS) {
4976 break;
4977 }
4978
4979 offset += n_bytes;
4980
4981 n_bytes = ut_min(n_bytes, static_cast<ulint>(end - offset));
4982
4983 DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension",
4984 DBUG_SUICIDE(););
4985 }
4986
4987 ut_free(ptr);
4988
4989 return(err);
4990 }
4991
4992 /** Try to extend a tablespace if it is smaller than the specified size.
4993 @param[in,out] space tablespace
4994 @param[in] size desired size in pages
4995 @return whether the tablespace is at least as big as requested */
4996 bool
fil_space_extend(fil_space_t * space,ulint size)4997 fil_space_extend(
4998 fil_space_t* space,
4999 ulint size)
5000 {
5001 /* In read-only mode we allow write to shared temporary tablespace
5002 as intrinsic table created by Optimizer reside in this tablespace. */
5003 ut_ad(!srv_read_only_mode || fsp_is_system_temporary(space->id));
5004
5005 retry:
5006
5007 #ifdef UNIV_HOTBACKUP
5008 page_size_t page_length(space->flags);
5009 ulint actual_size = space->size;
5010 ib::trace() << "space id : " << space->id << ", space name : "
5011 << space->name << ", space size : " << actual_size << " pages,"
5012 << " desired space size : " << size << " pages,"
5013 << " page size : " << page_length.physical();
5014 #endif /* UNIV_HOTBACKUP */
5015
5016 bool success = true;
5017
5018 fil_mutex_enter_and_prepare_for_io(space->id);
5019
5020 if (space->size >= size) {
5021 /* Space already big enough */
5022 mutex_exit(&fil_system->mutex);
5023 return(true);
5024 }
5025
5026 page_size_t pageSize(space->flags);
5027 const ulint page_size = pageSize.physical();
5028 fil_node_t* node = UT_LIST_GET_LAST(space->chain);
5029
5030 if (!node->being_extended) {
5031 /* Mark this node as undergoing extension. This flag
5032 is used by other threads to wait for the extension
5033 opereation to finish. */
5034 node->being_extended = true;
5035 } else {
5036 /* Another thread is currently extending the file. Wait
5037 for it to finish. It'd have been better to use an event
5038 driven mechanism but the entire module is peppered with
5039 polling code. */
5040
5041 mutex_exit(&fil_system->mutex);
5042 os_thread_sleep(100000);
5043 goto retry;
5044 }
5045
5046 if (!fil_node_prepare_for_io(node, fil_system, space)) {
5047 /* The tablespace data file, such as .ibd file, is missing */
5048 node->being_extended = false;
5049 mutex_exit(&fil_system->mutex);
5050
5051 return(false);
5052 }
5053
5054 /* At this point it is safe to release fil_system mutex. No
5055 other thread can rename, delete or close the file because
5056 we have set the node->being_extended flag. */
5057 mutex_exit(&fil_system->mutex);
5058
5059 ulint pages_added;
5060
5061 /* Note: This code is going to be executed independent of FusionIO HW
5062 if the OS supports posix_fallocate() */
5063
5064 ut_ad(size > space->size);
5065
5066 os_offset_t node_start = os_file_get_size(node->handle);
5067 ut_a(node_start != (os_offset_t) -1);
5068
5069 /* Node first page number */
5070 ulint node_first_page = space->size - node->size;
5071
5072 /* Number of physical pages in the node/file */
5073 ulint n_node_physical_pages
5074 = static_cast<ulint>(node_start) / page_size;
5075
5076 /* Number of pages to extend in the node/file */
5077 lint n_node_extend;
5078
5079 n_node_extend = size - (node_first_page + node->size);
5080
5081 /* If we already have enough physical pages to satisfy the
5082 extend request on the node then ignore it */
5083 if (node->size + n_node_extend > n_node_physical_pages) {
5084
5085 DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension",
5086 DBUG_SUICIDE(););
5087
5088 os_offset_t len;
5089 dberr_t err = DB_SUCCESS;
5090
5091 len = ((node->size + n_node_extend) * page_size) - node_start;
5092 ut_ad(len > 0);
5093
5094 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
5095 int ret = posix_fallocate(node->handle.m_file, node_start, len);
5096
5097 DBUG_EXECUTE_IF("ib_posix_fallocate_fail_eintr",
5098 ret = EINTR;);
5099
5100 DBUG_EXECUTE_IF("ib_posix_fallocate_fail_einval",
5101 ret = EINVAL;);
5102
5103 if (ret != 0) {
5104 /* We already pass the valid offset and len in,
5105 if EINVAL is returned, it could only mean that the
5106 file system doesn't support fallocate(), currently
5107 one known case is ext3 with O_DIRECT.
5108
5109 Also because above call could be interrupted,
5110 in this case, simply go to plan B by writing zeroes.
5111
5112 Both error messages for above two scenarios are
5113 skipped in case of flooding error messages, because
5114 they can be ignored by users. */
5115 if (ret != EINTR && ret != EINVAL) {
5116 ib::error()
5117 << "posix_fallocate(): Failed to"
5118 " preallocate data for file "
5119 << node->name << ", desired size "
5120 << len << " bytes."
5121 " Operating system error number "
5122 << ret << ". Check"
5123 " that the disk is not full or a disk"
5124 " quota exceeded. Make sure the file"
5125 " system supports this function."
5126 " Some operating system error"
5127 " numbers are described at " REFMAN
5128 "operating-system-error-codes.html";
5129 }
5130
5131 err = DB_IO_ERROR;
5132 }
5133 #endif /* NO_FALLOCATE || !UNIV_LINUX */
5134
5135 if (!node->atomic_write || err == DB_IO_ERROR) {
5136
5137 bool read_only_mode;
5138
5139 read_only_mode = (space->purpose != FIL_TYPE_TEMPORARY
5140 ? false : srv_read_only_mode);
5141
5142 err = fil_write_zeros(
5143 node, page_size, node_start,
5144 static_cast<ulint>(len), read_only_mode);
5145
5146 if (err != DB_SUCCESS) {
5147
5148 ib::warn()
5149 << "Error while writing " << len
5150 << " zeroes to " << node->name
5151 << " starting at offset " << node_start;
5152 }
5153 }
5154
5155 /* Check how many pages actually added */
5156 os_offset_t end = os_file_get_size(node->handle);
5157 ut_a(end != static_cast<os_offset_t>(-1) && end >= node_start);
5158
5159 os_has_said_disk_full = !(success = (end == node_start + len));
5160
5161 pages_added = static_cast<ulint>(end - node_start) / page_size;
5162
5163 } else {
5164 success = true;
5165 pages_added = n_node_extend;
5166 os_has_said_disk_full = FALSE;
5167 }
5168
5169 mutex_enter(&fil_system->mutex);
5170
5171 ut_a(node->being_extended);
5172
5173 node->size += pages_added;
5174 space->size += pages_added;
5175 node->being_extended = false;
5176
5177 fil_node_complete_io(node, fil_system, IORequestWrite);
5178
5179 #ifndef UNIV_HOTBACKUP
5180 /* Keep the last data file size info up to date, rounded to
5181 full megabytes */
5182 ulint pages_per_mb = (1024 * 1024) / page_size;
5183 ulint size_in_pages = ((node->size / pages_per_mb) * pages_per_mb);
5184
5185 if (space->id == srv_sys_space.space_id()) {
5186 srv_sys_space.set_last_file_size(size_in_pages);
5187 } else if (space->id == srv_tmp_space.space_id()) {
5188 srv_tmp_space.set_last_file_size(size_in_pages);
5189 }
5190 #else
5191 ib::trace() << "extended space : " << space->name << " from "
5192 << actual_size << " pages to " << space->size << " pages "
5193 << ", desired space size : " << size << " pages.";
5194 #endif /* !UNIV_HOTBACKUP */
5195
5196 mutex_exit(&fil_system->mutex);
5197
5198 fil_flush(space->id);
5199
5200 return(success);
5201 }
5202
5203 #ifdef UNIV_HOTBACKUP
5204 /********************************************************************//**
5205 Extends all tablespaces to the size stored in the space header. During the
5206 mysqlbackup --apply-log phase we extended the spaces on-demand so that log
5207 records could be applied, but that may have left spaces still too small
5208 compared to the size stored in the space header. */
5209 void
fil_extend_tablespaces_to_stored_len(void)5210 fil_extend_tablespaces_to_stored_len(void)
5211 /*======================================*/
5212 {
5213 byte* buf;
5214 ulint actual_size;
5215 ulint size_in_header;
5216 dberr_t error;
5217 bool success;
5218
5219 buf = (byte*)ut_malloc_nokey(UNIV_PAGE_SIZE);
5220
5221 mutex_enter(&fil_system->mutex);
5222
5223 for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system->space_list);
5224 space != NULL;
5225 space = UT_LIST_GET_NEXT(space_list, space)) {
5226
5227 ut_a(space->purpose == FIL_TYPE_TABLESPACE);
5228
5229 mutex_exit(&fil_system->mutex); /* no need to protect with a
5230 mutex, because this is a
5231 single-threaded operation */
5232 error = fil_read(
5233 page_id_t(space->id, 0),
5234 page_size_t(space->flags),
5235 0, univ_page_size.physical(), buf);
5236
5237 ut_a(error == DB_SUCCESS);
5238
5239 size_in_header = fsp_header_get_field(buf, FSP_SIZE);
5240
5241 success = fil_space_extend(space, size_in_header);
5242 if (!success) {
5243 ib::error() << "Could not extend the tablespace of "
5244 << space->name << " to the size stored in"
5245 " header, " << size_in_header << " pages;"
5246 " size after extension " << actual_size
5247 << " pages. Check that you have free disk"
5248 " space and retry!";
5249 ut_a(success);
5250 }
5251
5252 mutex_enter(&fil_system->mutex);
5253 }
5254
5255 mutex_exit(&fil_system->mutex);
5256
5257 ut_free(buf);
5258 }
5259 #endif
5260
5261 /*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
5262
5263 /*******************************************************************//**
5264 Tries to reserve free extents in a file space.
5265 @return true if succeed */
5266 bool
fil_space_reserve_free_extents(ulint id,ulint n_free_now,ulint n_to_reserve)5267 fil_space_reserve_free_extents(
5268 /*===========================*/
5269 ulint id, /*!< in: space id */
5270 ulint n_free_now, /*!< in: number of free extents now */
5271 ulint n_to_reserve) /*!< in: how many one wants to reserve */
5272 {
5273 fil_space_t* space;
5274 bool success;
5275
5276 ut_ad(fil_system);
5277
5278 mutex_enter(&fil_system->mutex);
5279
5280 space = fil_space_get_by_id(id);
5281
5282 ut_a(space);
5283
5284 if (space->n_reserved_extents + n_to_reserve > n_free_now) {
5285 success = false;
5286 } else {
5287 space->n_reserved_extents += n_to_reserve;
5288 success = true;
5289 }
5290
5291 mutex_exit(&fil_system->mutex);
5292
5293 return(success);
5294 }
5295
5296 /*******************************************************************//**
5297 Releases free extents in a file space. */
5298 void
fil_space_release_free_extents(ulint id,ulint n_reserved)5299 fil_space_release_free_extents(
5300 /*===========================*/
5301 ulint id, /*!< in: space id */
5302 ulint n_reserved) /*!< in: how many one reserved */
5303 {
5304 fil_space_t* space;
5305
5306 ut_ad(fil_system);
5307
5308 mutex_enter(&fil_system->mutex);
5309
5310 space = fil_space_get_by_id(id);
5311
5312 ut_a(space);
5313 ut_a(space->n_reserved_extents >= n_reserved);
5314
5315 space->n_reserved_extents -= n_reserved;
5316
5317 mutex_exit(&fil_system->mutex);
5318 }
5319
5320 /*******************************************************************//**
5321 Gets the number of reserved extents. If the database is silent, this number
5322 should be zero. */
5323 ulint
fil_space_get_n_reserved_extents(ulint id)5324 fil_space_get_n_reserved_extents(
5325 /*=============================*/
5326 ulint id) /*!< in: space id */
5327 {
5328 fil_space_t* space;
5329 ulint n;
5330
5331 ut_ad(fil_system);
5332
5333 mutex_enter(&fil_system->mutex);
5334
5335 space = fil_space_get_by_id(id);
5336
5337 ut_a(space);
5338
5339 n = space->n_reserved_extents;
5340
5341 mutex_exit(&fil_system->mutex);
5342
5343 return(n);
5344 }
5345
5346 /*============================ FILE I/O ================================*/
5347
5348 /********************************************************************//**
5349 NOTE: you must call fil_mutex_enter_and_prepare_for_io() first!
5350
5351 Prepares a file node for i/o. Opens the file if it is closed. Updates the
5352 pending i/o's field in the node and the system appropriately. Takes the node
5353 off the LRU list if it is in the LRU list. The caller must hold the fil_sys
5354 mutex.
5355 @return false if the file can't be opened, otherwise true */
5356 static
5357 bool
fil_node_prepare_for_io(fil_node_t * node,fil_system_t * system,fil_space_t * space)5358 fil_node_prepare_for_io(
5359 /*====================*/
5360 fil_node_t* node, /*!< in: file node */
5361 fil_system_t* system, /*!< in: tablespace memory cache */
5362 fil_space_t* space) /*!< in: space */
5363 {
5364 ut_ad(node && system && space);
5365 ut_ad(mutex_own(&(system->mutex)));
5366
5367 if (system->n_open > system->max_n_open + 5) {
5368 ib::warn() << "Open files " << system->n_open
5369 << " exceeds the limit " << system->max_n_open;
5370 }
5371
5372 if (!node->is_open) {
5373 /* File is closed: open it */
5374 ut_a(node->n_pending == 0);
5375
5376 if (!fil_node_open_file(node)) {
5377 return(false);
5378 }
5379 }
5380
5381 if (node->n_pending == 0 && fil_space_belongs_in_lru(space)) {
5382 /* The node is in the LRU list, remove it */
5383
5384 ut_a(UT_LIST_GET_LEN(system->LRU) > 0);
5385
5386 UT_LIST_REMOVE(system->LRU, node);
5387 }
5388
5389 node->n_pending++;
5390
5391 return(true);
5392 }
5393
5394 /********************************************************************//**
5395 Updates the data structures when an i/o operation finishes. Updates the
5396 pending i/o's field in the node appropriately. */
5397 static
5398 void
fil_node_complete_io(fil_node_t * node,fil_system_t * system,const IORequest & type)5399 fil_node_complete_io(
5400 /*=================*/
5401 fil_node_t* node, /*!< in: file node */
5402 fil_system_t* system, /*!< in: tablespace memory cache */
5403 const IORequest&type) /*!< in: IO_TYPE_*, marks the node as
5404 modified if TYPE_IS_WRITE() */
5405 {
5406 ut_ad(mutex_own(&system->mutex));
5407 ut_a(node->n_pending > 0);
5408
5409 --node->n_pending;
5410
5411 ut_ad(type.validate());
5412
5413 if (type.is_write()) {
5414
5415 ut_ad(!srv_read_only_mode
5416 || fsp_is_system_temporary(node->space->id));
5417
5418 ++system->modification_counter;
5419
5420 node->modification_counter = system->modification_counter;
5421
5422 if (fil_buffering_disabled(node->space)) {
5423
5424 /* We don't need to keep track of unflushed
5425 changes as user has explicitly disabled
5426 buffering. */
5427 ut_ad(!node->space->is_in_unflushed_spaces);
5428 node->flush_counter = node->modification_counter;
5429
5430 } else if (!node->space->is_in_unflushed_spaces) {
5431
5432 node->space->is_in_unflushed_spaces = true;
5433
5434 UT_LIST_ADD_FIRST(
5435 system->unflushed_spaces, node->space);
5436 }
5437 }
5438
5439 if (node->n_pending == 0 && fil_space_belongs_in_lru(node->space)) {
5440
5441 /* The node must be put back to the LRU list */
5442 UT_LIST_ADD_FIRST(system->LRU, node);
5443 }
5444 }
5445
5446 /** Report information about an invalid page access. */
5447 static
5448 void
fil_report_invalid_page_access(ulint block_offset,ulint space_id,const char * space_name,ulint byte_offset,ulint len,bool is_read)5449 fil_report_invalid_page_access(
5450 ulint block_offset, /*!< in: block offset */
5451 ulint space_id, /*!< in: space id */
5452 const char* space_name, /*!< in: space name */
5453 ulint byte_offset, /*!< in: byte offset */
5454 ulint len, /*!< in: I/O length */
5455 bool is_read) /*!< in: I/O type */
5456 {
5457 ib::error()
5458 << "Trying to access page number " << block_offset << " in"
5459 " space " << space_id << ", space name " << space_name << ","
5460 " which is outside the tablespace bounds. Byte offset "
5461 << byte_offset << ", len " << len << ", i/o type " <<
5462 (is_read ? "read" : "write")
5463 << ". If you get this error at mysqld startup, please check"
5464 " that your my.cnf matches the ibdata files that you have in"
5465 " the MySQL server.";
5466
5467 ib::error() << "Server exits"
5468 #ifdef UNIV_DEBUG
5469 << " at " << __FILE__ << "[" << __LINE__ << "]"
5470 #endif
5471 << ".";
5472
5473 _exit(1);
5474 }
5475
5476 /** Set encryption information for IORequest.
5477 @param[in,out] req_type IO request
5478 @param[in] page_id page id
5479 @param[in] space table space */
5480 inline
5481 void
fil_io_set_encryption(IORequest & req_type,const page_id_t & page_id,fil_space_t * space)5482 fil_io_set_encryption(
5483 IORequest& req_type,
5484 const page_id_t& page_id,
5485 fil_space_t* space)
5486 {
5487 /* Don't encrypt the log, page 0 of all tablespaces, all pages
5488 from the system tablespace. */
5489 if (!req_type.is_log() && page_id.page_no() > 0
5490 && space->encryption_type != Encryption::NONE)
5491 {
5492 req_type.encryption_key(space->encryption_key,
5493 space->encryption_klen,
5494 space->encryption_iv);
5495 req_type.encryption_algorithm(Encryption::AES);
5496 } else {
5497 req_type.clear_encrypted();
5498 }
5499 }
5500
5501 /** Reads or writes data. This operation could be asynchronous (aio).
5502
5503 @param[in,out] type IO context
5504 @param[in] sync true if synchronous aio is desired
5505 @param[in] page_id page id
5506 @param[in] page_size page size
5507 @param[in] byte_offset remainder of offset in bytes; in aio this
5508 must be divisible by the OS block size
5509 @param[in] len how many bytes to read or write; this must
5510 not cross a file boundary; in aio this must
5511 be a block size multiple
5512 @param[in,out] buf buffer where to store read data or from where
5513 to write; in aio this must be appropriately
5514 aligned
5515 @param[in] message message for aio handler if non-sync aio
5516 used, else ignored
5517
5518 @return DB_SUCCESS, DB_TABLESPACE_DELETED or DB_TABLESPACE_TRUNCATED
5519 if we are trying to do i/o on a tablespace which does not exist */
5520 dberr_t
fil_io(const IORequest & type,bool sync,const page_id_t & page_id,const page_size_t & page_size,ulint byte_offset,ulint len,void * buf,void * message)5521 fil_io(
5522 const IORequest& type,
5523 bool sync,
5524 const page_id_t& page_id,
5525 const page_size_t& page_size,
5526 ulint byte_offset,
5527 ulint len,
5528 void* buf,
5529 void* message)
5530 {
5531 os_offset_t offset;
5532 IORequest req_type(type);
5533
5534 ut_ad(req_type.validate());
5535
5536 ut_ad(len > 0);
5537 ut_ad(byte_offset < UNIV_PAGE_SIZE);
5538 ut_ad(!page_size.is_compressed() || byte_offset == 0);
5539 ut_ad(UNIV_PAGE_SIZE == (ulong)(1 << UNIV_PAGE_SIZE_SHIFT));
5540 #if (1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX
5541 # error "(1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX"
5542 #endif
5543 #if (1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN
5544 # error "(1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN"
5545 #endif
5546 ut_ad(fil_validate_skip());
5547
5548 #ifndef UNIV_HOTBACKUP
5549
5550 /* ibuf bitmap pages must be read in the sync AIO mode: */
5551 ut_ad(recv_no_ibuf_operations
5552 || req_type.is_write()
5553 || !ibuf_bitmap_page(page_id, page_size)
5554 || sync
5555 || req_type.is_log());
5556
5557 ulint mode;
5558
5559 if (sync) {
5560
5561 mode = OS_AIO_SYNC;
5562
5563 } else if (req_type.is_log()) {
5564
5565 mode = OS_AIO_LOG;
5566
5567 } else if (req_type.is_read()
5568 && !recv_no_ibuf_operations
5569 && ibuf_page(page_id, page_size, NULL)) {
5570
5571 mode = OS_AIO_IBUF;
5572
5573 /* Reduce probability of deadlock bugs in connection with ibuf:
5574 do not let the ibuf i/o handler sleep */
5575
5576 req_type.clear_do_not_wake();
5577 } else {
5578 mode = OS_AIO_NORMAL;
5579 }
5580 #else /* !UNIV_HOTBACKUP */
5581 ut_a(sync);
5582 ulint mode = OS_AIO_SYNC;
5583 #endif /* !UNIV_HOTBACKUP */
5584
5585 #ifndef UNIV_HOTBACKUP
5586 if (req_type.is_read()) {
5587
5588 srv_stats.data_read.add(len);
5589
5590 } else if (req_type.is_write()) {
5591
5592 ut_ad(!srv_read_only_mode
5593 || fsp_is_system_temporary(page_id.space()));
5594
5595 srv_stats.data_written.add(len);
5596 }
5597 #endif /* !UNIV_HOTBACKUP */
5598
5599 /* Reserve the fil_system mutex and make sure that we can open at
5600 least one file while holding it, if the file is not already open */
5601
5602 fil_mutex_enter_and_prepare_for_io(page_id.space());
5603
5604 fil_space_t* space = fil_space_get_by_id(page_id.space());
5605
5606 /* If we are deleting a tablespace we don't allow async read operations
5607 on that. However, we do allow write operations and sync read operations. */
5608 if (space == NULL
5609 || (req_type.is_read()
5610 && !sync
5611 && space->stop_new_ops
5612 && !space->is_being_truncated)) {
5613
5614 mutex_exit(&fil_system->mutex);
5615
5616 if (!req_type.ignore_missing()) {
5617 ib::error()
5618 << "Trying to do I/O to a tablespace which"
5619 " does not exist. I/O type: "
5620 << (req_type.is_read() ? "read" : "write")
5621 << ", page: " << page_id
5622 << ", I/O length: " << len << " bytes";
5623 }
5624
5625 return(DB_TABLESPACE_DELETED);
5626 }
5627
5628 ut_ad(mode != OS_AIO_IBUF || fil_type_is_data(space->purpose));
5629
5630 ulint cur_page_no = page_id.page_no();
5631 fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
5632
5633 for (;;) {
5634
5635 if (node == NULL) {
5636
5637 if (req_type.ignore_missing()) {
5638 mutex_exit(&fil_system->mutex);
5639 return(DB_ERROR);
5640 }
5641
5642 fil_report_invalid_page_access(
5643 page_id.page_no(), page_id.space(),
5644 space->name, byte_offset, len,
5645 req_type.is_read());
5646
5647 } else if (fil_is_user_tablespace_id(space->id)
5648 && node->size == 0) {
5649
5650 /* We do not know the size of a single-table tablespace
5651 before we open the file */
5652 break;
5653
5654 } else if (node->size > cur_page_no) {
5655 /* Found! */
5656 break;
5657
5658 } else {
5659 if (space->id != srv_sys_space.space_id()
5660 && UT_LIST_GET_LEN(space->chain) == 1
5661 && (srv_is_tablespace_truncated(space->id)
5662 || space->is_being_truncated
5663 || srv_was_tablespace_truncated(space))
5664 && req_type.is_read()) {
5665
5666 /* Handle page which is outside the truncated
5667 tablespace bounds when recovering from a crash
5668 happened during a truncation */
5669 mutex_exit(&fil_system->mutex);
5670 return(DB_TABLESPACE_TRUNCATED);
5671 }
5672
5673 cur_page_no -= node->size;
5674
5675 node = UT_LIST_GET_NEXT(chain, node);
5676 }
5677 }
5678
5679 /* Open file if closed */
5680 if (!fil_node_prepare_for_io(node, fil_system, space)) {
5681 if (fil_type_is_data(space->purpose)
5682 && fil_is_user_tablespace_id(space->id)) {
5683 mutex_exit(&fil_system->mutex);
5684
5685 if (!req_type.ignore_missing()) {
5686 ib::error()
5687 << "Trying to do I/O to a tablespace"
5688 " which exists without .ibd data file."
5689 " I/O type: "
5690 << (req_type.is_read()
5691 ? "read" : "write")
5692 << ", page: "
5693 << page_id_t(page_id.space(),
5694 cur_page_no)
5695 << ", I/O length: " << len << " bytes";
5696 }
5697
5698 return(DB_TABLESPACE_DELETED);
5699 }
5700
5701 /* The tablespace is for log. Currently, we just assert here
5702 to prevent handling errors along the way fil_io returns.
5703 Also, if the log files are missing, it would be hard to
5704 promise the server can continue running. */
5705 ut_a(0);
5706 }
5707
5708 /* Check that at least the start offset is within the bounds of a
5709 single-table tablespace, including rollback tablespaces. */
5710 if (node->size <= cur_page_no
5711 && space->id != srv_sys_space.space_id()
5712 && fil_type_is_data(space->purpose)) {
5713
5714 if (req_type.ignore_missing()) {
5715 /* If we can tolerate the non-existent pages, we
5716 should return with DB_ERROR and let caller decide
5717 what to do. */
5718 fil_node_complete_io(node, fil_system, req_type);
5719 mutex_exit(&fil_system->mutex);
5720 return(DB_ERROR);
5721 }
5722
5723 fil_report_invalid_page_access(
5724 page_id.page_no(), page_id.space(),
5725 space->name, byte_offset, len, req_type.is_read());
5726 }
5727
5728 /* Now we have made the changes in the data structures of fil_system */
5729 mutex_exit(&fil_system->mutex);
5730
5731 /* Calculate the low 32 bits and the high 32 bits of the file offset */
5732
5733 if (!page_size.is_compressed()) {
5734
5735 offset = ((os_offset_t) cur_page_no
5736 << UNIV_PAGE_SIZE_SHIFT) + byte_offset;
5737
5738 ut_a(node->size - cur_page_no
5739 >= ((byte_offset + len + (UNIV_PAGE_SIZE - 1))
5740 / UNIV_PAGE_SIZE));
5741 } else {
5742 ulint size_shift;
5743
5744 switch (page_size.physical()) {
5745 case 1024: size_shift = 10; break;
5746 case 2048: size_shift = 11; break;
5747 case 4096: size_shift = 12; break;
5748 case 8192: size_shift = 13; break;
5749 case 16384: size_shift = 14; break;
5750 case 32768: size_shift = 15; break;
5751 case 65536: size_shift = 16; break;
5752 default: ut_error;
5753 }
5754
5755 offset = ((os_offset_t) cur_page_no << size_shift)
5756 + byte_offset;
5757
5758 ut_a(node->size - cur_page_no
5759 >= (len + (page_size.physical() - 1))
5760 / page_size.physical());
5761 }
5762
5763 /* Do AIO */
5764
5765 ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
5766 ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
5767
5768 /* Don't compress the log, page 0 of all tablespaces, tables
5769 compresssed with the old scheme and all pages from the system
5770 tablespace. */
5771
5772 if (req_type.is_write()
5773 && !req_type.is_log()
5774 && !page_size.is_compressed()
5775 && page_id.page_no() > 0
5776 && IORequest::is_punch_hole_supported()
5777 && node->punch_hole) {
5778
5779 ut_ad(!req_type.is_log());
5780
5781 req_type.set_punch_hole();
5782
5783 req_type.compression_algorithm(space->compression_type);
5784
5785 } else {
5786 req_type.clear_compressed();
5787 }
5788
5789 /* Set encryption information. */
5790 fil_io_set_encryption(req_type, page_id, space);
5791
5792 req_type.block_size(node->block_size);
5793
5794 dberr_t err;
5795
5796 #ifdef UNIV_HOTBACKUP
5797 /* In mysqlbackup do normal i/o, not aio */
5798 if (req_type.is_read()) {
5799
5800 err = os_file_read(req_type, node->handle, buf, offset, len);
5801
5802 } else {
5803
5804 ut_ad(!srv_read_only_mode
5805 || fsp_is_system_temporary(page_id.space()));
5806
5807 err = os_file_write(
5808 req_type, node->name, node->handle, buf, offset, len);
5809 }
5810 #else /* UNIV_HOTBACKUP */
5811 /* Queue the aio request */
5812 err = os_aio(
5813 req_type,
5814 mode, node->name, node->handle, buf, offset, len,
5815 fsp_is_system_temporary(page_id.space())
5816 ? false : srv_read_only_mode,
5817 node, message);
5818
5819 #endif /* UNIV_HOTBACKUP */
5820
5821 if (err == DB_IO_NO_PUNCH_HOLE) {
5822
5823 err = DB_SUCCESS;
5824
5825 if (node->punch_hole) {
5826
5827 ib::warn()
5828 << "Punch hole failed for '"
5829 << node->name << "'";
5830 }
5831
5832 fil_no_punch_hole(node);
5833 }
5834
5835 /* We an try to recover the page from the double write buffer if
5836 the decompression fails or the page is corrupt. */
5837
5838 ut_a(req_type.is_dblwr_recover() || err == DB_SUCCESS);
5839
5840 if (sync) {
5841 /* The i/o operation is already completed when we return from
5842 os_aio: */
5843
5844 mutex_enter(&fil_system->mutex);
5845
5846 fil_node_complete_io(node, fil_system, req_type);
5847
5848 mutex_exit(&fil_system->mutex);
5849
5850 ut_ad(fil_validate_skip());
5851 }
5852
5853 return(err);
5854 }
5855
5856 #ifndef UNIV_HOTBACKUP
5857 /**********************************************************************//**
5858 Waits for an aio operation to complete. This function is used to write the
5859 handler for completed requests. The aio array of pending requests is divided
5860 into segments (see os0file.cc for more info). The thread specifies which
5861 segment it wants to wait for. */
5862 void
fil_aio_wait(ulint segment)5863 fil_aio_wait(
5864 /*=========*/
5865 ulint segment) /*!< in: the number of the segment in the aio
5866 array to wait for */
5867 {
5868 fil_node_t* node;
5869 IORequest type;
5870 void* message;
5871
5872 ut_ad(fil_validate_skip());
5873
5874 dberr_t err = os_aio_handler(segment, &node, &message, &type);
5875
5876 ut_a(err == DB_SUCCESS);
5877
5878 if (node == NULL) {
5879 ut_ad(srv_shutdown_state == SRV_SHUTDOWN_EXIT_THREADS);
5880 return;
5881 }
5882
5883 srv_set_io_thread_op_info(segment, "complete io for fil node");
5884
5885 mutex_enter(&fil_system->mutex);
5886
5887 fil_node_complete_io(node, fil_system, type);
5888
5889 mutex_exit(&fil_system->mutex);
5890
5891 ut_ad(fil_validate_skip());
5892
5893 /* Do the i/o handling */
5894 /* IMPORTANT: since i/o handling for reads will read also the insert
5895 buffer in tablespace 0, you have to be very careful not to introduce
5896 deadlocks in the i/o system. We keep tablespace 0 data files always
5897 open, and use a special i/o thread to serve insert buffer requests. */
5898
5899 switch (node->space->purpose) {
5900 case FIL_TYPE_TABLESPACE:
5901 case FIL_TYPE_TEMPORARY:
5902 case FIL_TYPE_IMPORT:
5903 srv_set_io_thread_op_info(segment, "complete io for buf page");
5904
5905 /* async single page writes from the dblwr buffer don't have
5906 access to the page */
5907 if (message != NULL) {
5908 buf_page_io_complete(static_cast<buf_page_t*>(message));
5909 }
5910 return;
5911 case FIL_TYPE_LOG:
5912 srv_set_io_thread_op_info(segment, "complete io for log");
5913 log_io_complete(static_cast<log_group_t*>(message));
5914 return;
5915 }
5916
5917 ut_ad(0);
5918 }
5919 #endif /* !UNIV_HOTBACKUP */
5920
5921 /**********************************************************************//**
5922 Flushes to disk possible writes cached by the OS. If the space does not exist
5923 or is being dropped, does not do anything. */
5924 void
fil_flush(ulint space_id)5925 fil_flush(
5926 /*======*/
5927 ulint space_id) /*!< in: file space id (this can be a group of
5928 log files or a tablespace of the database) */
5929 {
5930 fil_node_t* node;
5931 pfs_os_file_t file;
5932
5933 mutex_enter(&fil_system->mutex);
5934
5935 fil_space_t* space = fil_space_get_by_id(space_id);
5936
5937 if (space == NULL
5938 || space->purpose == FIL_TYPE_TEMPORARY
5939 || space->stop_new_ops
5940 || space->is_being_truncated) {
5941 mutex_exit(&fil_system->mutex);
5942
5943 return;
5944 }
5945
5946 bool fbd = fil_buffering_disabled(space);
5947 if (fbd) {
5948
5949 /* No need to flush. User has explicitly disabled
5950 buffering. However, flush should be called if the file
5951 size changes to keep OS metadata in sync. */
5952 ut_ad(!space->is_in_unflushed_spaces);
5953 ut_ad(fil_space_is_flushed(space));
5954
5955 /* Flush only if the file size changes */
5956 bool no_flush = true;
5957 for (node = UT_LIST_GET_FIRST(space->chain);
5958 node != NULL;
5959 node = UT_LIST_GET_NEXT(chain, node)) {
5960 #ifdef UNIV_DEBUG
5961 ut_ad(node->modification_counter
5962 == node->flush_counter);
5963 #endif /* UNIV_DEBUG */
5964 if (node->flush_size != node->size) {
5965 /* Found at least one file whose size has changed */
5966 no_flush = false;
5967 break;
5968 }
5969 }
5970
5971 if (no_flush) {
5972 mutex_exit(&fil_system->mutex);
5973 return;
5974 }
5975 }
5976
5977 space->n_pending_flushes++; /*!< prevent dropping of the space while
5978 we are flushing */
5979 for (node = UT_LIST_GET_FIRST(space->chain);
5980 node != NULL;
5981 node = UT_LIST_GET_NEXT(chain, node)) {
5982
5983 int64_t old_mod_counter = node->modification_counter;
5984
5985 if (!node->is_open) {
5986 continue;
5987 }
5988
5989 /* Skip flushing if the file size has not changed since
5990 last flush was done and the flush mode is O_DIRECT_NO_FSYNC */
5991 if (fbd && (node->flush_size == node->size)) {
5992 continue;
5993 }
5994
5995 /* If we are here and the flush mode is O_DIRECT_NO_FSYNC, then
5996 it means that the file size has changed and hence, it shold be
5997 flushed, irrespective of the mod_counter and flush counter values,
5998 which are always same in case of O_DIRECT_NO_FSYNC to avoid flush
5999 on every write operation.
6000 For other flush modes, if the flush_counter is same or ahead of
6001 the mode_counter, skip the flush. */
6002 if (!fbd && (old_mod_counter <= node->flush_counter)) {
6003 continue;
6004 }
6005
6006 switch (space->purpose) {
6007 case FIL_TYPE_TEMPORARY:
6008 ut_ad(0); // we already checked for this
6009 case FIL_TYPE_TABLESPACE:
6010 case FIL_TYPE_IMPORT:
6011 fil_n_pending_tablespace_flushes++;
6012 break;
6013 case FIL_TYPE_LOG:
6014 fil_n_pending_log_flushes++;
6015 fil_n_log_flushes++;
6016 break;
6017 }
6018 #ifdef _WIN32
6019 if (node->is_raw_disk) {
6020
6021 goto skip_flush;
6022 }
6023 #endif /* _WIN32 */
6024 retry:
6025 if (node->n_pending_flushes > 0) {
6026 /* We want to avoid calling os_file_flush() on
6027 the file twice at the same time, because we do
6028 not know what bugs OS's may contain in file
6029 i/o */
6030
6031 #ifndef UNIV_HOTBACKUP
6032 int64_t sig_count = os_event_reset(node->sync_event);
6033 #endif /* !UNIV_HOTBACKUP */
6034
6035 mutex_exit(&fil_system->mutex);
6036
6037 os_event_wait_low(node->sync_event, sig_count);
6038
6039 mutex_enter(&fil_system->mutex);
6040
6041 if (node->flush_counter >= old_mod_counter) {
6042
6043 goto skip_flush;
6044 }
6045
6046 goto retry;
6047 }
6048
6049 ut_a(node->is_open);
6050 file = node->handle;
6051 node->n_pending_flushes++;
6052
6053 mutex_exit(&fil_system->mutex);
6054
6055 os_file_flush(file);
6056
6057 node->flush_size = node->size;
6058
6059 mutex_enter(&fil_system->mutex);
6060
6061 os_event_set(node->sync_event);
6062
6063 node->n_pending_flushes--;
6064 skip_flush:
6065 if (node->flush_counter < old_mod_counter) {
6066 node->flush_counter = old_mod_counter;
6067
6068 if (space->is_in_unflushed_spaces
6069 && fil_space_is_flushed(space)) {
6070
6071 space->is_in_unflushed_spaces = false;
6072
6073 UT_LIST_REMOVE(
6074 fil_system->unflushed_spaces,
6075 space);
6076 }
6077 }
6078
6079 switch (space->purpose) {
6080 case FIL_TYPE_TEMPORARY:
6081 ut_ad(0); // we already checked for this
6082 case FIL_TYPE_TABLESPACE:
6083 case FIL_TYPE_IMPORT:
6084 fil_n_pending_tablespace_flushes--;
6085 continue;
6086 case FIL_TYPE_LOG:
6087 fil_n_pending_log_flushes--;
6088 continue;
6089 }
6090
6091 ut_ad(0);
6092 }
6093
6094 space->n_pending_flushes--;
6095
6096 mutex_exit(&fil_system->mutex);
6097 }
6098
6099 /** Flush to disk the writes in file spaces of the given type
6100 possibly cached by the OS.
6101 @param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_LOG */
6102 void
fil_flush_file_spaces(fil_type_t purpose)6103 fil_flush_file_spaces(
6104 fil_type_t purpose)
6105 {
6106 fil_space_t* space;
6107 ulint* space_ids;
6108 ulint n_space_ids;
6109
6110 ut_ad(purpose == FIL_TYPE_TABLESPACE || purpose == FIL_TYPE_LOG);
6111
6112 mutex_enter(&fil_system->mutex);
6113
6114 n_space_ids = UT_LIST_GET_LEN(fil_system->unflushed_spaces);
6115 if (n_space_ids == 0) {
6116
6117 mutex_exit(&fil_system->mutex);
6118 return;
6119 }
6120
6121 /* Assemble a list of space ids to flush. Previously, we
6122 traversed fil_system->unflushed_spaces and called UT_LIST_GET_NEXT()
6123 on a space that was just removed from the list by fil_flush().
6124 Thus, the space could be dropped and the memory overwritten. */
6125 space_ids = static_cast<ulint*>(
6126 ut_malloc_nokey(n_space_ids * sizeof(*space_ids)));
6127
6128 n_space_ids = 0;
6129
6130 for (space = UT_LIST_GET_FIRST(fil_system->unflushed_spaces);
6131 space;
6132 space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
6133
6134 if (space->purpose == purpose
6135 && !space->stop_new_ops
6136 && !space->is_being_truncated) {
6137
6138 space_ids[n_space_ids++] = space->id;
6139 }
6140 }
6141
6142 mutex_exit(&fil_system->mutex);
6143
6144 /* Flush the spaces. It will not hurt to call fil_flush() on
6145 a non-existing space id. */
6146 for (ulint i = 0; i < n_space_ids; i++) {
6147
6148 fil_flush(space_ids[i]);
6149 }
6150
6151 ut_free(space_ids);
6152 }
6153
6154 /** Functor to validate the file node list of a tablespace. */
6155 struct Check {
6156 /** Total size of file nodes visited so far */
6157 ulint size;
6158 /** Total number of open files visited so far */
6159 ulint n_open;
6160
6161 /** Constructor */
CheckCheck6162 Check() : size(0), n_open(0) {}
6163
6164 /** Visit a file node
6165 @param[in] elem file node to visit */
operator ()Check6166 void operator()(const fil_node_t* elem)
6167 {
6168 ut_a(elem->is_open || !elem->n_pending);
6169 n_open += elem->is_open;
6170 size += elem->size;
6171 }
6172
6173 /** Validate a tablespace.
6174 @param[in] space tablespace to validate
6175 @return number of open file nodes */
validateCheck6176 static ulint validate(const fil_space_t* space)
6177 {
6178 ut_ad(mutex_own(&fil_system->mutex));
6179 Check check;
6180 ut_list_validate(space->chain, check);
6181 ut_a(space->size == check.size);
6182 return(check.n_open);
6183 }
6184 };
6185
6186 /******************************************************************//**
6187 Checks the consistency of the tablespace cache.
6188 @return true if ok */
6189 bool
fil_validate(void)6190 fil_validate(void)
6191 /*==============*/
6192 {
6193 fil_space_t* space;
6194 fil_node_t* fil_node;
6195 ulint n_open = 0;
6196
6197 mutex_enter(&fil_system->mutex);
6198
6199 /* Look for spaces in the hash table */
6200
6201 for (ulint i = 0; i < hash_get_n_cells(fil_system->spaces); i++) {
6202
6203 for (space = static_cast<fil_space_t*>(
6204 HASH_GET_FIRST(fil_system->spaces, i));
6205 space != 0;
6206 space = static_cast<fil_space_t*>(
6207 HASH_GET_NEXT(hash, space))) {
6208
6209 n_open += Check::validate(space);
6210 }
6211 }
6212
6213 ut_a(fil_system->n_open == n_open);
6214
6215 UT_LIST_CHECK(fil_system->LRU);
6216
6217 for (fil_node = UT_LIST_GET_FIRST(fil_system->LRU);
6218 fil_node != 0;
6219 fil_node = UT_LIST_GET_NEXT(LRU, fil_node)) {
6220
6221 ut_a(fil_node->n_pending == 0);
6222 ut_a(!fil_node->being_extended);
6223 ut_a(fil_node->is_open);
6224 ut_a(fil_space_belongs_in_lru(fil_node->space));
6225 }
6226
6227 mutex_exit(&fil_system->mutex);
6228
6229 return(true);
6230 }
6231
6232 /********************************************************************//**
6233 Returns true if file address is undefined.
6234 @return true if undefined */
6235 bool
fil_addr_is_null(fil_addr_t addr)6236 fil_addr_is_null(
6237 /*=============*/
6238 fil_addr_t addr) /*!< in: address */
6239 {
6240 return(addr.page == FIL_NULL);
6241 }
6242
6243 /********************************************************************//**
6244 Get the predecessor of a file page.
6245 @return FIL_PAGE_PREV */
6246 ulint
fil_page_get_prev(const byte * page)6247 fil_page_get_prev(
6248 /*==============*/
6249 const byte* page) /*!< in: file page */
6250 {
6251 return(mach_read_from_4(page + FIL_PAGE_PREV));
6252 }
6253
6254 /********************************************************************//**
6255 Get the successor of a file page.
6256 @return FIL_PAGE_NEXT */
6257 ulint
fil_page_get_next(const byte * page)6258 fil_page_get_next(
6259 /*==============*/
6260 const byte* page) /*!< in: file page */
6261 {
6262 return(mach_read_from_4(page + FIL_PAGE_NEXT));
6263 }
6264
6265 /*********************************************************************//**
6266 Sets the file page type. */
6267 void
fil_page_set_type(byte * page,ulint type)6268 fil_page_set_type(
6269 /*==============*/
6270 byte* page, /*!< in/out: file page */
6271 ulint type) /*!< in: type */
6272 {
6273 ut_ad(page);
6274
6275 mach_write_to_2(page + FIL_PAGE_TYPE, type);
6276 }
6277
6278 #ifndef UNIV_HOTBACKUP
6279 /** Reset the page type.
6280 Data files created before MySQL 5.1 may contain garbage in FIL_PAGE_TYPE.
6281 In MySQL 3.23.53, only undo log pages and index pages were tagged.
6282 Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
6283 @param[in] page_id page number
6284 @param[in,out] page page with invalid FIL_PAGE_TYPE
6285 @param[in] type expected page type
6286 @param[in,out] mtr mini-transaction */
6287 void
fil_page_reset_type(const page_id_t & page_id,byte * page,ulint type,mtr_t * mtr)6288 fil_page_reset_type(
6289 const page_id_t& page_id,
6290 byte* page,
6291 ulint type,
6292 mtr_t* mtr)
6293 {
6294 ib::info()
6295 << "Resetting invalid page " << page_id << " type "
6296 << fil_page_get_type(page) << " to " << type << ".";
6297 mlog_write_ulint(page + FIL_PAGE_TYPE, type, MLOG_2BYTES, mtr);
6298 }
6299 #endif /* !UNIV_HOTBACKUP */
6300
6301 /****************************************************************//**
6302 Closes the tablespace memory cache. */
6303 void
fil_close(void)6304 fil_close(void)
6305 /*===========*/
6306 {
6307 hash_table_free(fil_system->spaces);
6308
6309 hash_table_free(fil_system->name_hash);
6310
6311 ut_a(UT_LIST_GET_LEN(fil_system->LRU) == 0);
6312 ut_a(UT_LIST_GET_LEN(fil_system->unflushed_spaces) == 0);
6313 ut_a(UT_LIST_GET_LEN(fil_system->space_list) == 0);
6314
6315 mutex_free(&fil_system->mutex);
6316
6317 ut_free(fil_system);
6318 fil_system = NULL;
6319 }
6320
6321 #ifndef UNIV_HOTBACKUP
6322 /********************************************************************//**
6323 Initializes a buffer control block when the buf_pool is created. */
6324 static
6325 void
fil_buf_block_init(buf_block_t * block,byte * frame)6326 fil_buf_block_init(
6327 /*===============*/
6328 buf_block_t* block, /*!< in: pointer to control block */
6329 byte* frame) /*!< in: pointer to buffer frame */
6330 {
6331 UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
6332
6333 block->frame = frame;
6334
6335 block->page.io_fix = BUF_IO_NONE;
6336 /* There are assertions that check for this. */
6337 block->page.buf_fix_count = 1;
6338 block->page.state = BUF_BLOCK_READY_FOR_USE;
6339
6340 page_zip_des_init(&block->page.zip);
6341 }
6342
6343 struct fil_iterator_t {
6344 pfs_os_file_t file; /*!< File handle */
6345 const char* filepath; /*!< File path name */
6346 os_offset_t start; /*!< From where to start */
6347 os_offset_t end; /*!< Where to stop */
6348 os_offset_t file_size; /*!< File size in bytes */
6349 ulint page_size; /*!< Page size */
6350 ulint n_io_buffers; /*!< Number of pages to use
6351 for IO */
6352 byte* io_buffer; /*!< Buffer to use for IO */
6353 byte* encryption_key; /*!< Encryption key */
6354 byte* encryption_iv; /*!< Encryption iv */
6355 size_t block_size; /*!< FS Block Size */
6356 };
6357
6358 /********************************************************************//**
6359 TODO: This can be made parallel trivially by chunking up the file and creating
6360 a callback per thread. Main benefit will be to use multiple CPUs for
6361 checksums and compressed tables. We have to do compressed tables block by
6362 block right now. Secondly we need to decompress/compress and copy too much
6363 of data. These are CPU intensive.
6364
6365 Iterate over all the pages in the tablespace.
6366 @param iter Tablespace iterator
6367 @param block block to use for IO
6368 @param callback Callback to inspect and update page contents
6369 @retval DB_SUCCESS or error code */
6370 static
6371 dberr_t
fil_iterate(const fil_iterator_t & iter,buf_block_t * block,PageCallback & callback)6372 fil_iterate(
6373 /*========*/
6374 const fil_iterator_t& iter,
6375 buf_block_t* block,
6376 PageCallback& callback)
6377 {
6378 os_offset_t offset;
6379 ulint page_no = 0;
6380 ulint space_id = callback.get_space_id();
6381 ulint n_bytes = iter.n_io_buffers * iter.page_size;
6382
6383 ut_ad(!srv_read_only_mode);
6384
6385 /* For old style compressed tables we do a lot of useless copying
6386 for non-index pages. Unfortunately, it is required by
6387 buf_zip_decompress() */
6388
6389 ulint read_type = IORequest::READ;
6390 ulint write_type = IORequest::WRITE;
6391
6392 for (offset = iter.start; offset < iter.end; offset += n_bytes) {
6393
6394 byte* io_buffer = iter.io_buffer;
6395
6396 block->frame = io_buffer;
6397
6398 if (callback.get_page_size().is_compressed()) {
6399 page_zip_des_init(&block->page.zip);
6400 page_zip_set_size(&block->page.zip, iter.page_size);
6401
6402 block->page.size.copy_from(
6403 page_size_t(iter.page_size,
6404 univ_page_size.logical(),
6405 true));
6406
6407 block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
6408 ut_d(block->page.zip.m_external = true);
6409 ut_ad(iter.page_size
6410 == callback.get_page_size().physical());
6411
6412 /* Zip IO is done in the compressed page buffer. */
6413 io_buffer = block->page.zip.data;
6414 } else {
6415 io_buffer = iter.io_buffer;
6416 }
6417
6418 /* We have to read the exact number of bytes. Otherwise the
6419 InnoDB IO functions croak on failed reads. */
6420
6421 n_bytes = static_cast<ulint>(
6422 ut_min(static_cast<os_offset_t>(n_bytes),
6423 iter.end - offset));
6424
6425 ut_ad(n_bytes > 0);
6426 ut_ad(!(n_bytes % iter.page_size));
6427
6428 dberr_t err;
6429 IORequest read_request(read_type);
6430 read_request.block_size(iter.block_size);
6431
6432 /* For encrypted table, set encryption information. */
6433 if (iter.encryption_key != NULL && offset != 0) {
6434 read_request.encryption_key(iter.encryption_key,
6435 ENCRYPTION_KEY_LEN,
6436 iter.encryption_iv);
6437 read_request.encryption_algorithm(Encryption::AES);
6438 }
6439
6440 err = os_file_read(
6441 read_request, iter.file, io_buffer, offset,
6442 (ulint) n_bytes);
6443
6444 if (err != DB_SUCCESS) {
6445
6446 ib::error() << "os_file_read() failed";
6447
6448 return(err);
6449 }
6450
6451 bool updated = false;
6452 os_offset_t page_off = offset;
6453 ulint n_pages_read = (ulint) n_bytes / iter.page_size;
6454
6455 for (ulint i = 0; i < n_pages_read; ++i) {
6456
6457 buf_block_set_file_page(
6458 block, page_id_t(space_id, page_no++));
6459
6460 if ((err = callback(page_off, block)) != DB_SUCCESS) {
6461
6462 return(err);
6463
6464 } else if (!updated) {
6465 updated = buf_block_get_state(block)
6466 == BUF_BLOCK_FILE_PAGE;
6467 }
6468
6469 buf_block_set_state(block, BUF_BLOCK_NOT_USED);
6470 buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
6471
6472 page_off += iter.page_size;
6473 block->frame += iter.page_size;
6474 }
6475
6476 IORequest write_request(write_type);
6477 write_request.block_size(iter.block_size);
6478
6479 /* For encrypted table, set encryption information. */
6480 if (iter.encryption_key != NULL && offset != 0) {
6481 write_request.encryption_key(iter.encryption_key,
6482 ENCRYPTION_KEY_LEN,
6483 iter.encryption_iv);
6484 write_request.encryption_algorithm(Encryption::AES);
6485 }
6486
6487 /* A page was updated in the set, write back to disk.
6488 Note: We don't have the compression algorithm, we write
6489 out the imported file as uncompressed. */
6490
6491 if (updated
6492 && (err = os_file_write(
6493 write_request,
6494 iter.filepath, iter.file, io_buffer,
6495 offset, (ulint) n_bytes)) != DB_SUCCESS) {
6496
6497 /* This is not a hard error */
6498 if (err == DB_IO_NO_PUNCH_HOLE) {
6499
6500 err = DB_SUCCESS;
6501 write_type &= ~IORequest::PUNCH_HOLE;
6502
6503 } else {
6504 ib::error() << "os_file_write() failed";
6505
6506 return(err);
6507 }
6508 }
6509 }
6510
6511 return(DB_SUCCESS);
6512 }
6513
6514 /********************************************************************//**
6515 Iterate over all the pages in the tablespace.
6516 @param table the table definiton in the server
6517 @param n_io_buffers number of blocks to read and write together
6518 @param callback functor that will do the page updates
6519 @return DB_SUCCESS or error code */
6520 dberr_t
fil_tablespace_iterate(dict_table_t * table,ulint n_io_buffers,PageCallback & callback)6521 fil_tablespace_iterate(
6522 /*===================*/
6523 dict_table_t* table,
6524 ulint n_io_buffers,
6525 PageCallback& callback)
6526 {
6527 dberr_t err;
6528 pfs_os_file_t file;
6529 char* filepath;
6530 bool success;
6531
6532 ut_a(n_io_buffers > 0);
6533 ut_ad(!srv_read_only_mode);
6534
6535 DBUG_EXECUTE_IF("ib_import_trigger_corruption_1",
6536 return(DB_CORRUPTION););
6537
6538 /* Make sure the data_dir_path is set. */
6539 dict_get_and_save_data_dir_path(table, false);
6540
6541 if (DICT_TF_HAS_DATA_DIR(table->flags)) {
6542 ut_a(table->data_dir_path);
6543
6544 filepath = fil_make_filepath(
6545 table->data_dir_path, table->name.m_name, IBD, true);
6546 } else {
6547 filepath = fil_make_filepath(
6548 NULL, table->name.m_name, IBD, false);
6549 }
6550
6551 if (filepath == NULL) {
6552 return(DB_OUT_OF_MEMORY);
6553 }
6554
6555 file = os_file_create_simple_no_error_handling(
6556 innodb_data_file_key, filepath,
6557 OS_FILE_OPEN, OS_FILE_READ_WRITE, srv_read_only_mode, &success);
6558
6559 DBUG_EXECUTE_IF("fil_tablespace_iterate_failure",
6560 {
6561 static bool once;
6562
6563 if (!once || ut_rnd_interval(0, 10) == 5) {
6564 once = true;
6565 success = false;
6566 os_file_close(file);
6567 }
6568 });
6569
6570 if (!success) {
6571 /* The following call prints an error message */
6572 os_file_get_last_error(true);
6573
6574 ib::error() << "Trying to import a tablespace, but could not"
6575 " open the tablespace file " << filepath;
6576
6577 ut_free(filepath);
6578
6579 return(DB_TABLESPACE_NOT_FOUND);
6580
6581 } else {
6582 err = DB_SUCCESS;
6583 }
6584
6585 /* Set File System Block Size */
6586 size_t block_size;
6587 {
6588 os_file_stat_t stat_info;
6589
6590 ut_d(dberr_t err =) os_file_get_status(filepath, &stat_info, false, false);
6591 ut_ad(err == DB_SUCCESS);
6592
6593 block_size = stat_info.block_size;
6594 }
6595
6596 callback.set_file(filepath, file);
6597
6598 os_offset_t file_size = os_file_get_size(file);
6599 ut_a(file_size != (os_offset_t) -1);
6600
6601 /* The block we will use for every physical page */
6602 buf_block_t* block;
6603
6604 block = reinterpret_cast<buf_block_t*>(ut_zalloc_nokey(sizeof(*block)));
6605
6606 mutex_create(LATCH_ID_BUF_BLOCK_MUTEX, &block->mutex);
6607
6608 /* Allocate a page to read in the tablespace header, so that we
6609 can determine the page size and zip size (if it is compressed).
6610 We allocate an extra page in case it is a compressed table. One
6611 page is to ensure alignement. */
6612
6613 void* page_ptr = ut_malloc_nokey(3 * UNIV_PAGE_SIZE);
6614 byte* page = static_cast<byte*>(ut_align(page_ptr, UNIV_PAGE_SIZE));
6615
6616 fil_buf_block_init(block, page);
6617
6618 /* Read the first page and determine the page and zip size. */
6619
6620 IORequest request(IORequest::READ);
6621
6622 err = os_file_read(request, file, page, 0, UNIV_PAGE_SIZE);
6623
6624 if (err != DB_SUCCESS) {
6625
6626 err = DB_IO_ERROR;
6627
6628 } else if ((err = callback.init(file_size, block)) == DB_SUCCESS) {
6629 fil_iterator_t iter;
6630
6631 iter.file = file;
6632 iter.start = 0;
6633 iter.end = file_size;
6634 iter.filepath = filepath;
6635 iter.file_size = file_size;
6636 iter.n_io_buffers = n_io_buffers;
6637 iter.page_size = callback.get_page_size().physical();
6638 iter.block_size = block_size;
6639
6640 /* Set encryption info. */
6641 iter.encryption_key = table->encryption_key;
6642 iter.encryption_iv = table->encryption_iv;
6643
6644 /* Check encryption is matched or not. */
6645 ulint space_flags = callback.get_space_flags();
6646 if (FSP_FLAGS_GET_ENCRYPTION(space_flags)) {
6647 if (!dict_table_is_encrypted(table)) {
6648 ib::error() << "Table is not in an encrypted"
6649 " tablespace, but the data file"
6650 " intended for import is an encrypted"
6651 " tablespace";
6652 err = DB_IO_NO_ENCRYPT_TABLESPACE;
6653 } else {
6654 /* encryption_key must have been populated
6655 while reading CFP file. */
6656 ut_ad(table->encryption_key != NULL &&
6657 table->encryption_iv != NULL);
6658
6659 if (table->encryption_key == NULL ||
6660 table->encryption_iv == NULL) {
6661 err = DB_ERROR;
6662 }
6663 }
6664 }
6665
6666 if (err == DB_SUCCESS) {
6667
6668 /* Compressed pages can't be optimised for block IO
6669 for now. We do the IMPORT page by page. */
6670
6671 if (callback.get_page_size().is_compressed()) {
6672 iter.n_io_buffers = 1;
6673 ut_a(iter.page_size
6674 == callback.get_page_size().physical());
6675 }
6676
6677 /** Add an extra page for compressed page scratch
6678 area. */
6679 void* io_buffer = ut_malloc_nokey(
6680 (2 + iter.n_io_buffers) * UNIV_PAGE_SIZE);
6681
6682 iter.io_buffer = static_cast<byte*>(
6683 ut_align(io_buffer, UNIV_PAGE_SIZE));
6684
6685 err = fil_iterate(iter, block, callback);
6686
6687 ut_free(io_buffer);
6688 }
6689 }
6690
6691 if (err == DB_SUCCESS) {
6692
6693 ib::info() << "Sync to disk";
6694
6695 if (!os_file_flush(file)) {
6696 ib::info() << "os_file_flush() failed!";
6697 err = DB_IO_ERROR;
6698 } else {
6699 ib::info() << "Sync to disk - done!";
6700 }
6701 }
6702
6703 os_file_close(file);
6704
6705 ut_free(page_ptr);
6706 ut_free(filepath);
6707
6708 mutex_free(&block->mutex);
6709
6710 ut_free(block);
6711
6712 return(err);
6713 }
6714 #endif /* !UNIV_HOTBACKUP */
6715
6716 /** Set the tablespace table size.
6717 @param[in] page a page belonging to the tablespace */
6718 void
set_page_size(const buf_frame_t * page)6719 PageCallback::set_page_size(
6720 const buf_frame_t* page) UNIV_NOTHROW
6721 {
6722 m_page_size.copy_from(fsp_header_get_page_size(page));
6723 }
6724
6725 /********************************************************************//**
6726 Delete the tablespace file and any related files like .cfg.
6727 This should not be called for temporary tables.
6728 @param[in] ibd_filepath File path of the IBD tablespace */
6729 void
fil_delete_file(const char * ibd_filepath)6730 fil_delete_file(
6731 /*============*/
6732 const char* ibd_filepath)
6733 {
6734 /* Force a delete of any stale .ibd files that are lying around. */
6735
6736 ib::info() << "Deleting " << ibd_filepath;
6737
6738 os_file_delete_if_exists(innodb_data_file_key, ibd_filepath, NULL);
6739
6740 char* cfg_filepath = fil_make_filepath(
6741 ibd_filepath, NULL, CFG, false);
6742 if (cfg_filepath != NULL) {
6743 os_file_delete_if_exists(
6744 innodb_data_file_key, cfg_filepath, NULL);
6745 ut_free(cfg_filepath);
6746 }
6747
6748 char* cfp_filepath = fil_make_filepath(
6749 ibd_filepath, NULL, CFP, false);
6750 if (cfp_filepath != NULL) {
6751 os_file_delete_if_exists(
6752 innodb_data_file_key, cfp_filepath, NULL);
6753 ut_free(cfp_filepath);
6754 }
6755 }
6756
6757 /**
6758 Iterate over all the spaces in the space list and fetch the
6759 tablespace names. It will return a copy of the name that must be
6760 freed by the caller using: delete[].
6761 @return DB_SUCCESS if all OK. */
6762 dberr_t
fil_get_space_names(space_name_list_t & space_name_list)6763 fil_get_space_names(
6764 /*================*/
6765 space_name_list_t& space_name_list)
6766 /*!< in/out: List to append to */
6767 {
6768 fil_space_t* space;
6769 dberr_t err = DB_SUCCESS;
6770
6771 mutex_enter(&fil_system->mutex);
6772
6773 for (space = UT_LIST_GET_FIRST(fil_system->space_list);
6774 space != NULL;
6775 space = UT_LIST_GET_NEXT(space_list, space)) {
6776
6777 if (space->purpose == FIL_TYPE_TABLESPACE) {
6778 ulint len;
6779 char* name;
6780
6781 len = ::strlen(space->name);
6782 name = UT_NEW_ARRAY_NOKEY(char, len + 1);
6783
6784 if (name == 0) {
6785 /* Caller to free elements allocated so far. */
6786 err = DB_OUT_OF_MEMORY;
6787 break;
6788 }
6789
6790 memcpy(name, space->name, len);
6791 name[len] = 0;
6792
6793 space_name_list.push_back(name);
6794 }
6795 }
6796
6797 mutex_exit(&fil_system->mutex);
6798
6799 return(err);
6800 }
6801
6802 #ifndef UNIV_HOTBACKUP
6803 /** Return the next fil_node_t in the current or next fil_space_t.
6804 Once started, the caller must keep calling this until it returns NULL.
6805 fil_space_acquire() and fil_space_release() are invoked here which
6806 blocks a concurrent operation from dropping the tablespace.
6807 @param[in] prev_node Pointer to the previous fil_node_t.
6808 If NULL, use the first fil_space_t on fil_system->space_list.
6809 @return pointer to the next fil_node_t.
6810 @retval NULL if this was the last file node */
6811 const fil_node_t*
fil_node_next(const fil_node_t * prev_node)6812 fil_node_next(
6813 const fil_node_t* prev_node)
6814 {
6815 fil_space_t* space;
6816 const fil_node_t* node = prev_node;
6817
6818 mutex_enter(&fil_system->mutex);
6819
6820 if (node == NULL) {
6821 space = UT_LIST_GET_FIRST(fil_system->space_list);
6822
6823 /* We can trust that space is not NULL because at least the
6824 system tablespace is always present and loaded first. */
6825 space->n_pending_ops++;
6826
6827 node = UT_LIST_GET_FIRST(space->chain);
6828 ut_ad(node != NULL);
6829 } else {
6830 space = node->space;
6831 ut_ad(space->n_pending_ops > 0);
6832 node = UT_LIST_GET_NEXT(chain, node);
6833
6834 if (node == NULL) {
6835 /* Move on to the next fil_space_t */
6836 space->n_pending_ops--;
6837 space = UT_LIST_GET_NEXT(space_list, space);
6838
6839 /* Skip spaces that are being
6840 created by fil_ibd_create(),
6841 or dropped or truncated. */
6842 while (space != NULL
6843 && (UT_LIST_GET_LEN(space->chain) == 0
6844 || space->stop_new_ops
6845 || space->is_being_truncated)) {
6846 space = UT_LIST_GET_NEXT(space_list, space);
6847 }
6848
6849 if (space != NULL) {
6850 space->n_pending_ops++;
6851 node = UT_LIST_GET_FIRST(space->chain);
6852 ut_ad(node != NULL);
6853 }
6854 }
6855 }
6856
6857 mutex_exit(&fil_system->mutex);
6858
6859 return(node);
6860 }
6861
6862 /** Generate redo log for swapping two .ibd files
6863 @param[in] old_table old table
6864 @param[in] new_table new table
6865 @param[in] tmp_name temporary table name
6866 @param[in,out] mtr mini-transaction
6867 @return innodb error code */
6868 dberr_t
fil_mtr_rename_log(const dict_table_t * old_table,const dict_table_t * new_table,const char * tmp_name,mtr_t * mtr)6869 fil_mtr_rename_log(
6870 const dict_table_t* old_table,
6871 const dict_table_t* new_table,
6872 const char* tmp_name,
6873 mtr_t* mtr)
6874 {
6875 dberr_t err;
6876
6877 bool old_is_file_per_table =
6878 !is_system_tablespace(old_table->space)
6879 && !DICT_TF_HAS_SHARED_SPACE(old_table->flags);
6880
6881 bool new_is_file_per_table =
6882 !is_system_tablespace(new_table->space)
6883 && !DICT_TF_HAS_SHARED_SPACE(new_table->flags);
6884
6885 /* If neither table is file-per-table,
6886 there will be no renaming of files. */
6887 if (!old_is_file_per_table && !new_is_file_per_table) {
6888 return(DB_SUCCESS);
6889 }
6890
6891 const char* old_dir = DICT_TF_HAS_DATA_DIR(old_table->flags)
6892 ? old_table->data_dir_path
6893 : NULL;
6894
6895 char* old_path = fil_make_filepath(
6896 old_dir, old_table->name.m_name, IBD, (old_dir != NULL));
6897 if (old_path == NULL) {
6898 return(DB_OUT_OF_MEMORY);
6899 }
6900
6901 if (old_is_file_per_table) {
6902 char* tmp_path = fil_make_filepath(
6903 old_dir, tmp_name, IBD, (old_dir != NULL));
6904 if (tmp_path == NULL) {
6905 ut_free(old_path);
6906 return(DB_OUT_OF_MEMORY);
6907 }
6908
6909 /* Temp filepath must not exist. */
6910 err = fil_rename_tablespace_check(
6911 old_table->space, old_path, tmp_path,
6912 dict_table_is_discarded(old_table));
6913 if (err != DB_SUCCESS) {
6914 ut_free(old_path);
6915 ut_free(tmp_path);
6916 return(err);
6917 }
6918
6919 fil_name_write_rename(
6920 old_table->space, 0, old_path, tmp_path, mtr);
6921
6922 ut_free(tmp_path);
6923 }
6924
6925 if (new_is_file_per_table) {
6926 const char* new_dir = DICT_TF_HAS_DATA_DIR(new_table->flags)
6927 ? new_table->data_dir_path
6928 : NULL;
6929 char* new_path = fil_make_filepath(
6930 new_dir, new_table->name.m_name,
6931 IBD, (new_dir != NULL));
6932 if (new_path == NULL) {
6933 ut_free(old_path);
6934 return(DB_OUT_OF_MEMORY);
6935 }
6936
6937 /* Destination filepath must not exist unless this ALTER
6938 TABLE starts and ends with a file_per-table tablespace. */
6939 if (!old_is_file_per_table) {
6940 err = fil_rename_tablespace_check(
6941 new_table->space, new_path, old_path,
6942 dict_table_is_discarded(new_table));
6943 if (err != DB_SUCCESS) {
6944 ut_free(old_path);
6945 ut_free(new_path);
6946 return(err);
6947 }
6948 }
6949
6950 fil_name_write_rename(
6951 new_table->space, 0, new_path, old_path, mtr);
6952
6953 ut_free(new_path);
6954 }
6955
6956 ut_free(old_path);
6957
6958 return(DB_SUCCESS);
6959 }
6960 #endif /* !UNIV_HOTBACKUP */
6961 #ifdef UNIV_DEBUG
6962 /** Check that a tablespace is valid for mtr_commit().
6963 @param[in] space persistent tablespace that has been changed */
6964 static
6965 void
fil_space_validate_for_mtr_commit(const fil_space_t * space)6966 fil_space_validate_for_mtr_commit(
6967 const fil_space_t* space)
6968 {
6969 ut_ad(!mutex_own(&fil_system->mutex));
6970 ut_ad(space != NULL);
6971 ut_ad(space->purpose == FIL_TYPE_TABLESPACE);
6972 ut_ad(!is_predefined_tablespace(space->id));
6973
6974 /* We are serving mtr_commit(). While there is an active
6975 mini-transaction, we should have !space->stop_new_ops. This is
6976 guaranteed by meta-data locks or transactional locks, or
6977 dict_operation_lock (X-lock in DROP, S-lock in purge).
6978
6979 However, a file I/O thread can invoke change buffer merge
6980 while fil_check_pending_operations() is waiting for operations
6981 to quiesce. This is not a problem, because
6982 ibuf_merge_or_delete_for_page() would call
6983 fil_space_acquire() before mtr_start() and
6984 fil_space_release() after mtr_commit(). This is why
6985 n_pending_ops should not be zero if stop_new_ops is set. */
6986 ut_ad(!space->stop_new_ops
6987 || space->is_being_truncated /* TRUNCATE sets stop_new_ops */
6988 || space->n_pending_ops > 0);
6989 }
6990 #endif /* UNIV_DEBUG */
6991
6992 /** Write a MLOG_FILE_NAME record for a persistent tablespace.
6993 @param[in] space tablespace
6994 @param[in,out] mtr mini-transaction */
6995 static
6996 void
fil_names_write(const fil_space_t * space,mtr_t * mtr)6997 fil_names_write(
6998 const fil_space_t* space,
6999 mtr_t* mtr)
7000 {
7001 ut_ad(UT_LIST_GET_LEN(space->chain) == 1);
7002 fil_name_write(space, 0, UT_LIST_GET_FIRST(space->chain), mtr);
7003 }
7004
7005 /** Note that a non-predefined persistent tablespace has been modified
7006 by redo log.
7007 @param[in,out] space tablespace */
7008 void
fil_names_dirty(fil_space_t * space)7009 fil_names_dirty(
7010 fil_space_t* space)
7011 {
7012 ut_ad(log_mutex_own());
7013 ut_ad(recv_recovery_is_on());
7014 ut_ad(log_sys->lsn != 0);
7015 ut_ad(space->max_lsn == 0);
7016 ut_d(fil_space_validate_for_mtr_commit(space));
7017
7018 UT_LIST_ADD_LAST(fil_system->named_spaces, space);
7019 space->max_lsn = log_sys->lsn;
7020 }
7021
7022 /** Write MLOG_FILE_NAME records when a non-predefined persistent
7023 tablespace was modified for the first time since the latest
7024 fil_names_clear().
7025 @param[in,out] space tablespace
7026 @param[in,out] mtr mini-transaction */
7027 void
fil_names_dirty_and_write(fil_space_t * space,mtr_t * mtr)7028 fil_names_dirty_and_write(
7029 fil_space_t* space,
7030 mtr_t* mtr)
7031 {
7032 ut_ad(log_mutex_own());
7033 ut_d(fil_space_validate_for_mtr_commit(space));
7034 ut_ad(space->max_lsn == log_sys->lsn);
7035
7036 UT_LIST_ADD_LAST(fil_system->named_spaces, space);
7037 fil_names_write(space, mtr);
7038
7039 DBUG_EXECUTE_IF("fil_names_write_bogus",
7040 {
7041 char bogus_name[] = "./test/bogus file.ibd";
7042 os_normalize_path(bogus_name);
7043 fil_name_write(
7044 SRV_LOG_SPACE_FIRST_ID, 0,
7045 bogus_name, mtr);
7046 });
7047 }
7048 #ifndef UNIV_HOTBACKUP
7049 /** On a log checkpoint, reset fil_names_dirty_and_write() flags
7050 and write out MLOG_FILE_NAME and MLOG_CHECKPOINT if needed.
7051 @param[in] lsn checkpoint LSN
7052 @param[in] do_write whether to always write MLOG_CHECKPOINT
7053 @return whether anything was written to the redo log
7054 @retval false if no flags were set and nothing written
7055 @retval true if anything was written to the redo log */
7056 bool
fil_names_clear(lsn_t lsn,bool do_write)7057 fil_names_clear(
7058 lsn_t lsn,
7059 bool do_write)
7060 {
7061 mtr_t mtr;
7062 ulint mtr_checkpoint_size = LOG_CHECKPOINT_FREE_PER_THREAD;
7063
7064 DBUG_EXECUTE_IF(
7065 "increase_mtr_checkpoint_size",
7066 mtr_checkpoint_size = 75 * 1024;
7067 );
7068
7069 ut_ad(log_mutex_own());
7070
7071 if (log_sys->append_on_checkpoint) {
7072 mtr_write_log(log_sys->append_on_checkpoint);
7073 do_write = true;
7074 }
7075
7076 mtr.start();
7077
7078 for (fil_space_t* space = UT_LIST_GET_FIRST(fil_system->named_spaces);
7079 space != NULL; ) {
7080 fil_space_t* next = UT_LIST_GET_NEXT(named_spaces, space);
7081
7082 ut_ad(space->max_lsn > 0);
7083 if (space->max_lsn < lsn) {
7084 /* The tablespace was last dirtied before the
7085 checkpoint LSN. Remove it from the list, so
7086 that if the tablespace is not going to be
7087 modified any more, subsequent checkpoints will
7088 avoid calling fil_names_write() on it. */
7089 space->max_lsn = 0;
7090 UT_LIST_REMOVE(fil_system->named_spaces, space);
7091 }
7092
7093 /* max_lsn is the last LSN where fil_names_dirty_and_write()
7094 was called. If we kept track of "min_lsn" (the first LSN
7095 where max_lsn turned nonzero), we could avoid the
7096 fil_names_write() call if min_lsn > lsn. */
7097
7098 fil_names_write(space, &mtr);
7099 do_write = true;
7100
7101 const mtr_buf_t* mtr_log = mtr_get_log(&mtr);
7102
7103 /** If the mtr buffer size exceeds the size of
7104 LOG_CHECKPOINT_FREE_PER_THREAD then commit the multi record
7105 mini-transaction, start the new mini-transaction to
7106 avoid the parsing buffer overflow error during recovery. */
7107
7108 if (mtr_log->size() > mtr_checkpoint_size) {
7109 ut_ad(mtr_log->size() < (RECV_PARSING_BUF_SIZE / 2));
7110 mtr.commit_checkpoint(lsn, false);
7111 mtr.start();
7112 }
7113
7114 space = next;
7115 }
7116
7117 if (do_write) {
7118 mtr.commit_checkpoint(lsn, true);
7119 } else {
7120 ut_ad(!mtr.has_modifications());
7121 }
7122
7123 return(do_write);
7124 }
7125
7126 /** Truncate a single-table tablespace. The tablespace must be cached
7127 in the memory cache.
7128 @param space_id space id
7129 @param dir_path directory path
7130 @param tablename the table name in the usual
7131 databasename/tablename format of InnoDB
7132 @param flags tablespace flags
7133 @param trunc_to_default truncate to default size if tablespace
7134 is being newly re-initialized.
7135 @return DB_SUCCESS or error */
7136 dberr_t
truncate(ulint space_id,const char * dir_path,const char * tablename,ulint flags,bool trunc_to_default)7137 truncate_t::truncate(
7138 /*=================*/
7139 ulint space_id,
7140 const char* dir_path,
7141 const char* tablename,
7142 ulint flags,
7143 bool trunc_to_default)
7144 {
7145 dberr_t err = DB_SUCCESS;
7146 char* path;
7147 bool has_data_dir = FSP_FLAGS_HAS_DATA_DIR(flags);
7148
7149 ut_a(!is_system_tablespace(space_id));
7150
7151 if (has_data_dir) {
7152 ut_ad(dir_path != NULL);
7153
7154 path = fil_make_filepath(dir_path, tablename, IBD, true);
7155
7156 } else {
7157 path = fil_make_filepath(NULL, tablename, IBD, false);
7158 }
7159
7160 if (path == NULL) {
7161 return(DB_OUT_OF_MEMORY);
7162 }
7163
7164 mutex_enter(&fil_system->mutex);
7165
7166 fil_space_t* space = fil_space_get_by_id(space_id);
7167
7168 /* The following code must change when InnoDB supports
7169 multiple datafiles per tablespace. */
7170 ut_a(UT_LIST_GET_LEN(space->chain) == 1);
7171
7172 fil_node_t* node = UT_LIST_GET_FIRST(space->chain);
7173
7174 if (trunc_to_default) {
7175 space->size = node->size = FIL_IBD_FILE_INITIAL_SIZE;
7176 }
7177
7178 const bool already_open = node->is_open;
7179
7180 if (!already_open) {
7181
7182 bool ret;
7183
7184 node->handle = os_file_create_simple_no_error_handling(
7185 innodb_data_file_key, path, OS_FILE_OPEN,
7186 OS_FILE_READ_WRITE,
7187 fsp_is_system_temporary(space_id)
7188 ? false : srv_read_only_mode, &ret);
7189
7190 if (!ret) {
7191 ib::error() << "Failed to open tablespace file "
7192 << path << ".";
7193
7194 ut_free(path);
7195
7196 return(DB_ERROR);
7197 }
7198
7199 node->is_open = true;
7200 }
7201
7202 os_offset_t trunc_size = trunc_to_default
7203 ? FIL_IBD_FILE_INITIAL_SIZE
7204 : space->size;
7205
7206 const bool success = os_file_truncate(
7207 path, node->handle, trunc_size * UNIV_PAGE_SIZE);
7208
7209 if (!success) {
7210 ib::error() << "Cannot truncate file " << path
7211 << " in TRUNCATE TABLESPACE.";
7212 err = DB_ERROR;
7213 }
7214
7215 space->stop_new_ops = false;
7216 space->is_being_truncated = false;
7217
7218 /* If we opened the file in this function, close it. */
7219 if (!already_open) {
7220 bool closed = os_file_close(node->handle);
7221
7222 if (!closed) {
7223
7224 ib::error() << "Failed to close tablespace file "
7225 << path << ".";
7226
7227 err = DB_ERROR;
7228 } else {
7229 node->is_open = false;
7230 }
7231 }
7232
7233 mutex_exit(&fil_system->mutex);
7234
7235 ut_free(path);
7236
7237 return(err);
7238 }
7239 #endif /* !UNIV_HOTBACKUP */
7240
7241 /**
7242 Note that the file system where the file resides doesn't support PUNCH HOLE.
7243 Called from AIO handlers when IO returns DB_IO_NO_PUNCH_HOLE
7244 @param[in,out] node Node to set */
7245 void
fil_no_punch_hole(fil_node_t * node)7246 fil_no_punch_hole(fil_node_t* node)
7247 {
7248 node->punch_hole = false;
7249 }
7250
7251 /** Set the compression type for the tablespace of a table
7252 @param[in] table The table that should be compressed
7253 @param[in] algorithm Text representation of the algorithm
7254 @return DB_SUCCESS or error code */
7255 dberr_t
fil_set_compression(dict_table_t * table,const char * algorithm)7256 fil_set_compression(
7257 dict_table_t* table,
7258 const char* algorithm)
7259 {
7260 ut_ad(table != NULL);
7261
7262 /* We don't support Page Compression for the system tablespace,
7263 the temporary tablespace, or any general tablespace because
7264 COMPRESSION is set by TABLE DDL, not TABLESPACE DDL. There is
7265 no other technical reason. Also, do not use it for missing
7266 tables or tables with compressed row_format. */
7267 if (table->ibd_file_missing
7268 || !DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_FILE_PER_TABLE)
7269 || DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY)
7270 || page_size_t(table->flags).is_compressed()) {
7271
7272 return(DB_IO_NO_PUNCH_HOLE_TABLESPACE);
7273 }
7274
7275 dberr_t err;
7276 Compression compression;
7277
7278 if (algorithm == NULL || strlen(algorithm) == 0) {
7279
7280 #ifndef UNIV_DEBUG
7281 compression.m_type = Compression::NONE;
7282 #else
7283 /* This is a Debug tool for setting compression on all
7284 compressible tables not otherwise specified. */
7285 switch (srv_debug_compress) {
7286 case Compression::LZ4:
7287 case Compression::ZLIB:
7288 case Compression::NONE:
7289
7290 compression.m_type =
7291 static_cast<Compression::Type>(
7292 srv_debug_compress);
7293 break;
7294
7295 default:
7296 compression.m_type = Compression::NONE;
7297 }
7298
7299 #endif /* UNIV_DEBUG */
7300
7301 err = DB_SUCCESS;
7302
7303 } else {
7304
7305 err = Compression::check(algorithm, &compression);
7306 }
7307
7308 fil_space_t* space = fil_space_get(table->space);
7309
7310 if (space == NULL) {
7311 return(DB_NOT_FOUND);
7312 }
7313
7314 space->compression_type = compression.m_type;
7315
7316 if (space->compression_type != Compression::NONE) {
7317
7318 const fil_node_t* node;
7319
7320 node = UT_LIST_GET_FIRST(space->chain);
7321
7322 if (!node->punch_hole) {
7323
7324 return(DB_IO_NO_PUNCH_HOLE_FS);
7325 }
7326 }
7327
7328 return(err);
7329 }
7330
7331 /** Get the compression algorithm for a tablespace.
7332 @param[in] space_id Space ID to check
7333 @return the compression algorithm */
7334 Compression::Type
fil_get_compression(ulint space_id)7335 fil_get_compression(
7336 ulint space_id)
7337 {
7338 fil_space_t* space = fil_space_get(space_id);
7339
7340 return(space == NULL ? Compression::NONE : space->compression_type);
7341 }
7342
7343 /** Set the encryption type for the tablespace
7344 @param[in] space_id Space ID of tablespace for which to set
7345 @param[in] algorithm Encryption algorithm
7346 @param[in] key Encryption key
7347 @param[in] iv Encryption iv
7348 @return DB_SUCCESS or error code */
7349 dberr_t
fil_set_encryption(ulint space_id,Encryption::Type algorithm,byte * key,byte * iv)7350 fil_set_encryption(
7351 ulint space_id,
7352 Encryption::Type algorithm,
7353 byte* key,
7354 byte* iv)
7355 {
7356 ut_ad(!is_system_or_undo_tablespace(space_id));
7357
7358 if (is_system_tablespace(space_id)) {
7359 return(DB_IO_NO_ENCRYPT_TABLESPACE);
7360 }
7361
7362 mutex_enter(&fil_system->mutex);
7363
7364 fil_space_t* space = fil_space_get_by_id(space_id);
7365
7366 if (space == NULL) {
7367 mutex_exit(&fil_system->mutex);
7368 return(DB_NOT_FOUND);
7369 }
7370
7371 ut_ad(algorithm != Encryption::NONE);
7372 space->encryption_type = algorithm;
7373 if (key == NULL) {
7374 Encryption::random_value(space->encryption_key);
7375 } else {
7376 memcpy(space->encryption_key,
7377 key, ENCRYPTION_KEY_LEN);
7378 }
7379
7380 space->encryption_klen = ENCRYPTION_KEY_LEN;
7381 if (iv == NULL) {
7382 Encryption::random_value(space->encryption_iv);
7383 } else {
7384 memcpy(space->encryption_iv,
7385 iv, ENCRYPTION_KEY_LEN);
7386 }
7387
7388 mutex_exit(&fil_system->mutex);
7389
7390 return(DB_SUCCESS);
7391 }
7392
7393 /** Rotate the tablespace keys by new master key.
7394 @return true if the re-encrypt suceeds */
7395 bool
fil_encryption_rotate()7396 fil_encryption_rotate()
7397 {
7398 fil_space_t* space;
7399 mtr_t mtr;
7400 byte encrypt_info[ENCRYPTION_INFO_SIZE_V2];
7401
7402 for (space = UT_LIST_GET_FIRST(fil_system->space_list);
7403 space != NULL; ) {
7404 /* Skip unencypted tablespaces. */
7405 if (is_system_or_undo_tablespace(space->id)
7406 || fsp_is_system_temporary(space->id)
7407 || space->purpose == FIL_TYPE_LOG) {
7408 space = UT_LIST_GET_NEXT(space_list, space);
7409 continue;
7410 }
7411
7412 if (space->encryption_type != Encryption::NONE) {
7413 mtr_start(&mtr);
7414 mtr.set_named_space(space->id);
7415
7416 space = mtr_x_lock_space(space->id, &mtr);
7417
7418 memset(encrypt_info, 0, ENCRYPTION_INFO_SIZE_V2);
7419
7420 if (!fsp_header_rotate_encryption(space,
7421 encrypt_info,
7422 &mtr)) {
7423 mtr_commit(&mtr);
7424 return(false);
7425 }
7426
7427 mtr_commit(&mtr);
7428 }
7429
7430 space = UT_LIST_GET_NEXT(space_list, space);
7431 DBUG_EXECUTE_IF("ib_crash_during_rotation_for_encryption",
7432 DBUG_SUICIDE(););
7433 }
7434
7435 return(true);
7436 }
7437
7438 /** Build the basic folder name from the path and length provided
7439 @param[in] path pathname (may also include the file basename)
7440 @param[in] len length of the path, in bytes */
7441 void
make_path(const char * path,size_t len)7442 Folder::make_path(const char* path, size_t len)
7443 {
7444 if (is_absolute_path(path)) {
7445 m_folder = mem_strdupl(path, len);
7446 m_folder_len = len;
7447 }
7448 else {
7449 size_t n = 2 + len + strlen(fil_path_to_mysql_datadir);
7450 m_folder = static_cast<char*>(ut_malloc_nokey(n));
7451 m_folder_len = 0;
7452
7453 if (path != fil_path_to_mysql_datadir) {
7454 /* Put the mysqld datadir into m_folder first. */
7455 ut_ad(fil_path_to_mysql_datadir[0] != '\0');
7456 m_folder_len = strlen(fil_path_to_mysql_datadir);
7457 memcpy(m_folder, fil_path_to_mysql_datadir,
7458 m_folder_len);
7459 if (m_folder[m_folder_len - 1] != OS_PATH_SEPARATOR) {
7460 m_folder[m_folder_len++] = OS_PATH_SEPARATOR;
7461 }
7462 }
7463
7464 /* Append the path. */
7465 memcpy(m_folder + m_folder_len, path, len);
7466 m_folder_len += len;
7467 m_folder[m_folder_len] = '\0';
7468 }
7469
7470 os_normalize_path(m_folder);
7471 }
7472
7473 /** Resolve a relative path in m_folder to an absolute path
7474 in m_abs_path setting m_abs_len. */
7475 void
make_abs_path()7476 Folder::make_abs_path()
7477 {
7478 my_realpath(m_abs_path, m_folder, MYF(0));
7479 m_abs_len = strlen(m_abs_path);
7480
7481 ut_ad(m_abs_len + 1 < sizeof(m_abs_path));
7482
7483 /* Folder::related_to() needs a trailing separator. */
7484 if (m_abs_path[m_abs_len - 1] != OS_PATH_SEPARATOR) {
7485 m_abs_path[m_abs_len] = OS_PATH_SEPARATOR;
7486 m_abs_path[++m_abs_len] = '\0';
7487 }
7488 }
7489
7490 /** Constructor
7491 @param[in] path pathname (may also include the file basename)
7492 @param[in] len length of the path, in bytes */
Folder(const char * path,size_t len)7493 Folder::Folder(const char* path, size_t len)
7494 {
7495 make_path(path, len);
7496 make_abs_path();
7497 }
7498
7499 /** Assignment operator
7500 @param[in] folder folder string provided */
7501 class Folder&
operator =(const char * path)7502 Folder::operator=(const char* path)
7503 {
7504 ut_free(m_folder);
7505 make_path(path, strlen(path));
7506 make_abs_path();
7507
7508 return(*this);
7509 }
7510
7511 /** Determine if two folders are equal
7512 @param[in] other folder to compare to
7513 @return whether the folders are equal */
operator ==(const Folder & other) const7514 bool Folder::operator==(const Folder& other) const
7515 {
7516 return(m_abs_len == other.m_abs_len
7517 && !memcmp(m_abs_path, other.m_abs_path, m_abs_len));
7518 }
7519
7520 /** Determine if the left folder is the same or an ancestor of
7521 (contains) the right folder.
7522 @param[in] other folder to compare to
7523 @return whether this is the same or an ancestor of the other folder. */
operator >=(const Folder & other) const7524 bool Folder::operator>=(const Folder& other) const
7525 {
7526 return(m_abs_len <= other.m_abs_len
7527 && (!memcmp(other.m_abs_path, m_abs_path, m_abs_len)));
7528 }
7529
7530 /** Determine if the left folder is an ancestor of (contains)
7531 the right folder.
7532 @param[in] other folder to compare to
7533 @return whether this is an ancestor of the other folder */
operator >(const Folder & other) const7534 bool Folder::operator>(const Folder& other) const
7535 {
7536 return(m_abs_len < other.m_abs_len
7537 && (!memcmp(other.m_abs_path, m_abs_path, m_abs_len)));
7538 }
7539
7540 /** Determine if the directory referenced by m_folder exists.
7541 @return whether the directory exists */
7542 bool
exists()7543 Folder::exists()
7544 {
7545 bool exists;
7546 os_file_type_t type;
7547
7548 #ifdef _WIN32
7549 /* Temporarily strip the trailing_separator since it will cause
7550 _stat64() to fail on Windows unless the path is the root of some
7551 drive; like "c:\". _stat64() will fail if it is "c:". */
7552 size_t len = strlen(m_abs_path);
7553 if (m_abs_path[m_abs_len - 1] == OS_PATH_SEPARATOR
7554 && m_abs_path[m_abs_len - 2] != ':') {
7555 m_abs_path[m_abs_len - 1] = '\0';
7556 }
7557 #endif /* WIN32 */
7558
7559 bool ret = os_file_status(m_abs_path, &exists, &type);
7560
7561 #ifdef _WIN32
7562 /* Put the separator back on. */
7563 if (m_abs_path[m_abs_len - 1] == '\0') {
7564 m_abs_path[m_abs_len - 1] = OS_PATH_SEPARATOR;
7565 }
7566 #endif /* WIN32 */
7567
7568 return(ret && exists && type == OS_FILE_TYPE_DIR);
7569 }
7570
7571 /* Unit Tests */
7572 #ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
7573 #define MF fil_make_filepath
7574 #define DISPLAY ib::info() << path
7575 void
test_make_filepath()7576 test_make_filepath()
7577 {
7578 char* path;
7579 const char* long_path =
7580 "this/is/a/very/long/path/including/a/very/"
7581 "looooooooooooooooooooooooooooooooooooooooooooooooo"
7582 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
7583 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
7584 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
7585 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
7586 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
7587 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
7588 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
7589 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
7590 "oooooooooooooooooooooooooooooooooooooooooooooooong"
7591 "/folder/name";
7592 path = MF("/this/is/a/path/with/a/filename", NULL, IBD, false); DISPLAY;
7593 path = MF("/this/is/a/path/with/a/filename", NULL, ISL, false); DISPLAY;
7594 path = MF("/this/is/a/path/with/a/filename", NULL, CFG, false); DISPLAY;
7595 path = MF("/this/is/a/path/with/a/filename", NULL, CFP, false); DISPLAY;
7596 path = MF("/this/is/a/path/with/a/filename.ibd", NULL, IBD, false); DISPLAY;
7597 path = MF("/this/is/a/path/with/a/filename.ibd", NULL, IBD, false); DISPLAY;
7598 path = MF("/this/is/a/path/with/a/filename.dat", NULL, IBD, false); DISPLAY;
7599 path = MF(NULL, "tablespacename", NO_EXT, false); DISPLAY;
7600 path = MF(NULL, "tablespacename", IBD, false); DISPLAY;
7601 path = MF(NULL, "dbname/tablespacename", NO_EXT, false); DISPLAY;
7602 path = MF(NULL, "dbname/tablespacename", IBD, false); DISPLAY;
7603 path = MF(NULL, "dbname/tablespacename", ISL, false); DISPLAY;
7604 path = MF(NULL, "dbname/tablespacename", CFG, false); DISPLAY;
7605 path = MF(NULL, "dbname/tablespacename", CFP, false); DISPLAY;
7606 path = MF(NULL, "dbname\\tablespacename", NO_EXT, false); DISPLAY;
7607 path = MF(NULL, "dbname\\tablespacename", IBD, false); DISPLAY;
7608 path = MF("/this/is/a/path", "dbname/tablespacename", IBD, false); DISPLAY;
7609 path = MF("/this/is/a/path", "dbname/tablespacename", IBD, true); DISPLAY;
7610 path = MF("./this/is/a/path", "dbname/tablespacename.ibd", IBD, true); DISPLAY;
7611 path = MF("this\\is\\a\\path", "dbname/tablespacename", IBD, true); DISPLAY;
7612 path = MF("/this/is/a/path", "dbname\\tablespacename", IBD, true); DISPLAY;
7613 path = MF(long_path, NULL, IBD, false); DISPLAY;
7614 path = MF(long_path, "tablespacename", IBD, false); DISPLAY;
7615 path = MF(long_path, "tablespacename", IBD, true); DISPLAY;
7616 }
7617 #endif /* UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
7618 /* @} */
7619
7620 /** Release the reserved free extents.
7621 @param[in] n_reserved number of reserved extents */
7622 void
release_free_extents(ulint n_reserved)7623 fil_space_t::release_free_extents(ulint n_reserved)
7624 {
7625 ut_ad(rw_lock_own(&latch, RW_LOCK_X));
7626
7627 ut_a(n_reserved_extents >= n_reserved);
7628 n_reserved_extents -= n_reserved;
7629 }
7630