1 /*****************************************************************************
2
3 Copyright (c) 1995, 2020, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation. The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License, version 2.0, for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24
25 *****************************************************************************/
26
27 /** @file fil/fil0fil.cc
28 The tablespace memory cache */
29
30 #include "my_config.h"
31
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <sys/types.h>
35
36 #include "arch0page.h"
37 #include "btr0btr.h"
38 #include "buf0buf.h"
39 #include "buf0flu.h"
40 #include "dict0boot.h"
41 #include "dict0dd.h"
42 #include "dict0dict.h"
43 #include "fsp0file.h"
44 #include "fsp0fsp.h"
45 #include "fsp0space.h"
46 #include "fsp0sysspace.h"
47 #include "ha_prototypes.h"
48 #include "hash0hash.h"
49 #include "log0recv.h"
50 #include "mach0data.h"
51 #include "mem0mem.h"
52 #include "mtr0log.h"
53 #include "my_dbug.h"
54
55 #include "clone0api.h"
56 #include "os0file.h"
57 #include "page0zip.h"
58 #include "sql/mysqld.h" // lower_case_file_system
59 #include "srv0srv.h"
60 #include "srv0start.h"
61
62 #ifndef UNIV_HOTBACKUP
63 #include "buf0lru.h"
64 #include "ibuf0ibuf.h"
65 #include "os0event.h"
66 #include "row0mysql.h"
67 #include "sql_backup_lock.h"
68 #include "sql_class.h"
69 #include "sync0sync.h"
70 #include "trx0purge.h"
71 #else /* !UNIV_HOTBACKUP */
72 #include <cstring>
73 #include "srv0srv.h"
74 #endif /* !UNIV_HOTBACKUP */
75 #include "xb0xb.h"
76
77 #include "os0thread-create.h"
78
79 #include "current_thd.h"
80 #include "ha_prototypes.h"
81
82 #include <array>
83 #include <fstream>
84 #include <functional>
85 #include <list>
86 #include <mutex>
87 #include <thread>
88 #include <tuple>
89 #include <unordered_map>
90
91 using Dirs = std::vector<std::string>;
92 using Space_id_set = std::set<space_id_t>;
93
94 constexpr char Fil_path::DB_SEPARATOR;
95 constexpr char Fil_path::OS_SEPARATOR;
96 constexpr const char *Fil_path::SEPARATOR;
97 constexpr const char *Fil_path::DOT_SLASH;
98 constexpr const char *Fil_path::DOT_DOT_SLASH;
99 constexpr const char *Fil_path::SLASH_DOT_DOT_SLASH;
100
101 dberr_t dict_stats_rename_table(const char *old_name, const char *new_name,
102 char *errstr, size_t errstr_sz);
103
104 /** Used for collecting the data in boot_tablespaces() */
105 namespace dd_fil {
106
107 enum {
108 /** DD Object ID */
109 OBJECT_ID,
110
111 /** InnoDB tablspace ID */
112 SPACE_ID,
113
114 /** DD/InnoDB tablespace name */
115 SPACE_NAME,
116
117 /** Path in DD tablespace */
118 OLD_PATH,
119
120 /** Path where it was found during the scan. */
121 NEW_PATH
122 };
123
124 using Moved = std::tuple<dd::Object_id, space_id_t, std::string, std::string,
125 std::string>;
126
127 using Tablespaces = std::vector<Moved>;
128 } // namespace dd_fil
129
fil_get_scan_threads(size_t num_files)130 size_t fil_get_scan_threads(size_t num_files) {
131 /* Number of additional threads required to scan all the files.
132 n_threads == 0 means that the main thread itself will do all the
133 work instead of spawning any additional threads. */
134 size_t n_threads = num_files / FIL_SCAN_MAX_TABLESPACES_PER_THREAD;
135
136 /* Return if no additional threads are needed. */
137 if (n_threads == 0) {
138 return 0;
139 }
140
141 /* Number of concurrent threads supported by the host machine. */
142 size_t max_threads =
143 FIL_SCAN_THREADS_PER_CORE * std::thread::hardware_concurrency();
144
145 /* If the number of concurrent threads supported by the host
146 machine could not be calculated, assume the supported threads
147 to be FIL_SCAN_MAX_THREADS. */
148 max_threads = max_threads == 0 ? FIL_SCAN_MAX_THREADS : max_threads;
149
150 /* Restrict the number of threads to the lower of number of threads
151 supported by the host machine or FIL_SCAN_MAX_THREADS. */
152 if (n_threads > max_threads) {
153 n_threads = max_threads;
154 }
155
156 if (n_threads > FIL_SCAN_MAX_THREADS) {
157 n_threads = FIL_SCAN_MAX_THREADS;
158 }
159
160 return n_threads;
161 }
162
163 /* uint16_t is the index into Tablespace_dirs::m_dirs */
164 using Scanned_files = std::vector<std::pair<uint16_t, std::string>>;
165
166 #ifdef UNIV_PFS_IO
167 mysql_pfs_key_t innodb_tablespace_open_file_key;
168 #endif /* UNIV_PFS_IO */
169
170 /** System tablespace. */
171 fil_space_t *fil_space_t::s_sys_space;
172
173 /** Redo log tablespace */
174 fil_space_t *fil_space_t::s_redo_space;
175
176 #ifdef UNIV_HOTBACKUP
177 /** Directories in which remote general tablespaces have been found in the
178 target directory during apply log operation */
179 Dir_set rem_gen_ts_dirs;
180
181 /** true in case the apply-log operation is being performed
182 in the data directory */
183 bool replay_in_datadir = false;
184
185 /* Re-define mutex macros to use the Mutex class defined by the MEB
186 source. MEB calls the routines in "fil0fil.cc" in parallel and,
187 therefore, the mutex protecting the critical sections of the tablespace
188 memory cache must be included also in the MEB compilation of this
189 module. */
190 #undef mutex_create
191 #undef mutex_free
192 #undef mutex_enter
193 #undef mutex_exit
194 #undef mutex_own
195 #undef mutex_validate
196
197 #define mutex_create(I, M) new (M) meb::Mutex()
198 #define mutex_free(M) delete (M)
199 #define mutex_enter(M) (M)->lock()
200 #define mutex_exit(M) (M)->unlock()
201 #define mutex_own(M) 1
202 #define mutex_validate(M) 1
203
204 /** Process a MLOG_FILE_CREATE redo record.
205 @param[in] page_id Page id of the redo log record
206 @param[in] flags Tablespace flags
207 @param[in] name Tablespace filename */
208 static void meb_tablespace_redo_create(const page_id_t &page_id, uint32_t flags,
209 const char *name);
210
211 /** Process a MLOG_FILE_RENAME redo record.
212 @param[in] page_id Page id of the redo log record
213 @param[in] from_name Tablespace from filename
214 @param[in] to_name Tablespace to filename */
215 static void meb_tablespace_redo_rename(const page_id_t &page_id,
216 const char *from_name,
217 const char *to_name);
218
219 /** Process a MLOG_FILE_DELETE redo record.
220 @param[in] page_id Page id of the redo log record
221 @param[in] name Tablespace filename */
222 static void meb_tablespace_redo_delete(const page_id_t &page_id,
223 const char *name);
224
225 #endif /* UNIV_HOTBACKUP */
226
227 /*
228 IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
229 =============================================
230
231 The tablespace cache is responsible for providing fast read/write access to
232 tablespaces and logs of the database. File creation and deletion is done
233 in other modules which know more of the logic of the operation, however.
234
235 Only the system tablespace consists of a list of files. The size of these
236 files does not have to be divisible by the database block size, because
237 we may just leave the last incomplete block unused. When a new file is
238 appended to the tablespace, the maximum size of the file is also specified.
239 At the moment, we think that it is best to extend the file to its maximum
240 size already at the creation of the file, because then we can avoid dynamically
241 extending the file when more space is needed for the tablespace.
242
243 Non system tablespaces contain only a single file.
244
245 A block's position in the tablespace is specified with a 32-bit unsigned
246 integer. The files in the list are thought to be catenated, and the block
247 corresponding to an address n is the nth block in the catenated file (where
248 the first block is named the 0th block, and the incomplete block fragments
249 at the end of files are not taken into account). A tablespace can be extended
250 by appending a new file at the end of the list.
251
252 Our tablespace concept is similar to the one of Oracle.
253
254 To have fast access to a tablespace or a log file, we put the data structures
255 to a hash table. Each tablespace and log file is given an unique 32-bit
256 identifier, its tablespace ID.
257
258 Some operating systems do not support many open files at the same time,
259 Therefore, we put the open files in an LRU-list. If we need to open another
260 file, we may close the file at the end of the LRU-list. When an I/O-operation
261 is pending on a file, the file cannot be closed. We take the file nodes with
262 pending I/O-operations out of the LRU-list and keep a count of pending
263 operations. When an operation completes, we decrement the count and return
264 the file to the LRU-list if the count drops to zero.
265
266 The data structure (Fil_shard) that keeps track of the tablespace ID to
267 fil_space_t* mapping are hashed on the tablespace ID. The tablespace name to
268 fil_space_t* mapping is stored in the same shard. A shard tracks the flushing
269 and open state of a file. When we run out open file handles, we use a ticketing
270 system to serialize the file open, see Fil_shard::reserve_open_slot() and
271 Fil_shard::release_open_slot().
272
273 When updating the global/shared data in Fil_system acquire the mutexes of
274 all shards in ascending order. The shard mutex covers the fil_space_t data
275 members as noted in the fil_space_t and fil_node_t definition. */
276
277 /** Reference to the server data directory. */
278 Fil_path MySQL_datadir_path;
279
280 /** Reference to the server undo directory. */
281 Fil_path MySQL_undo_path;
282
283 /** The undo path is different from any other known directory. */
284 bool MySQL_undo_path_is_unique;
285
286 /** Common InnoDB file extentions */
287 const char *dot_ext[] = {"", ".ibd", ".cfg", ".cfp", ".ibt", ".ibu", ".dblwr"};
288
289 /** The number of fsyncs done to the log */
290 ulint fil_n_log_flushes = 0;
291
292 /** Number of pending redo log flushes */
293 ulint fil_n_pending_log_flushes = 0;
294
295 /** Number of pending tablespace flushes */
296 ulint fil_n_pending_tablespace_flushes = 0;
297
298 /** Number of files currently open */
299 ulint fil_n_file_opened = 0;
300
301 enum fil_load_status {
302 /** The tablespace file(s) were found and valid. */
303 FIL_LOAD_OK,
304
305 /** The name no longer matches space_id */
306 FIL_LOAD_ID_CHANGED,
307
308 /** The file(s) were not found */
309 FIL_LOAD_NOT_FOUND,
310
311 /** The file(s) were not valid */
312 FIL_LOAD_INVALID,
313
314 /** Invalid encrytion metadata in page 0 */
315 FIL_LOAD_INVALID_ENCRYPTION_META,
316
317 /** The tablespace file ID in the first page doesn't match
318 expected value. */
319 FIL_LOAD_MISMATCH
320 };
321
322 /** File operations for tablespace */
323 enum fil_operation_t {
324
325 /** delete a single-table tablespace */
326 FIL_OPERATION_DELETE,
327
328 /** close a single-table tablespace */
329 FIL_OPERATION_CLOSE
330 };
331
332 /** The null file address */
333 fil_addr_t fil_addr_null = {FIL_NULL, 0};
334
335 /** Maximum number of pages to read to determine the space ID. */
336 static const size_t MAX_PAGES_TO_READ = 1;
337
338 #ifndef UNIV_HOTBACKUP
339 /** Maximum number of shards supported. */
340 static const size_t MAX_SHARDS = 64;
341
342 /** The redo log is in its own shard. */
343 static const size_t REDO_SHARD = MAX_SHARDS - 1;
344
345 /** Number of undo shards to reserve. */
346 static const size_t UNDO_SHARDS = 4;
347
348 /** The UNDO logs have their own shards (4). */
349 static const size_t UNDO_SHARDS_START = REDO_SHARD - (UNDO_SHARDS + 1);
350 #else /* !UNIV_HOTBACKUP */
351
352 /** Maximum number of shards supported. */
353 static const size_t MAX_SHARDS = 1;
354
355 /** The redo log is in its own shard. */
356 static const size_t REDO_SHARD = 0;
357
358 /** The UNDO logs have their own shards (4). */
359 static const size_t UNDO_SHARDS_START = 0;
360 #endif /* !UNIV_HOTBACKUP */
361
362 /** Sentinel for empty open slot. */
363 static const size_t EMPTY_OPEN_SLOT = std::numeric_limits<size_t>::max();
364
365 /** We want to store the line number from where it was called. */
366 #define mutex_acquire() acquire(__LINE__)
367
368 /** Hash a NUL terminated 'string' */
369 struct Char_Ptr_Hash {
370 /** Hashing function
371 @param[in] ptr NUL terminated string to hash
372 @return the hash */
operator ()Char_Ptr_Hash373 size_t operator()(const char *ptr) const { return (ut_fold_string(ptr)); }
374 };
375
376 /** Compare two 'strings' */
377 struct Char_Ptr_Compare {
378 /** Compare two NUL terminated strings
379 @param[in] lhs Left hand side
380 @param[in] rhs Right hand side
381 @return true if the contents match */
operator ()Char_Ptr_Compare382 bool operator()(const char *lhs, const char *rhs) const {
383 return (strcmp(lhs, rhs) == 0);
384 }
385 };
386
387 /** Tablespace files disovered during startup. */
388 class Tablespace_files {
389 public:
390 using Names = std::vector<std::string, ut_allocator<std::string>>;
391 using Paths = std::unordered_map<space_id_t, Names>;
392
393 /** Default constructor
394 @param[in] dir Directory that the files are under */
395 explicit Tablespace_files(const std::string &dir);
396
397 /** Add a space ID to filename mapping.
398 @param[in] space_id Tablespace ID
399 @param[in] name File name.
400 @return number of files that map to the space ID */
401 size_t add(space_id_t space_id, const std::string &name)
402 MY_ATTRIBUTE((warn_unused_result));
403
404 /** Get the file names that map to a space ID
405 @param[in] space_id Tablespace ID
406 @return the filenames that map to space id */
find(space_id_t space_id)407 Names *find(space_id_t space_id) MY_ATTRIBUTE((warn_unused_result)) {
408 ut_ad(space_id != TRX_SYS_SPACE);
409
410 if (dict_sys_t::is_reserved(space_id) &&
411 space_id != dict_sys_t::s_space_id) {
412 auto it = m_undo_paths.find(space_id);
413
414 if (it != m_undo_paths.end()) {
415 return (&it->second);
416 }
417
418 } else {
419 auto it = m_ibd_paths.find(space_id);
420
421 if (it != m_ibd_paths.end()) {
422 return (&it->second);
423 }
424 }
425
426 return (nullptr);
427 }
428
429 /** Remove the entry for the space ID.
430 @param[in] space_id Tablespace ID mapping to remove
431 @return true if erase successful */
erase_path(space_id_t space_id)432 bool erase_path(space_id_t space_id) MY_ATTRIBUTE((warn_unused_result)) {
433 ut_ad(space_id != TRX_SYS_SPACE);
434
435 if (dict_sys_t::is_reserved(space_id) &&
436 space_id != dict_sys_t::s_space_id) {
437 auto n_erased = m_undo_paths.erase(space_id);
438
439 return (n_erased == 1);
440 } else {
441 auto n_erased = m_ibd_paths.erase(space_id);
442
443 return (n_erased == 1);
444 }
445
446 return (false);
447 }
448
449 /** Clear all the tablespace data. */
clear()450 void clear() {
451 m_ibd_paths.clear();
452 m_undo_paths.clear();
453 }
454
455 /** Open all known tablespaces. */
456 void open_ibds() const;
457
458 /** @return m_dir */
root() const459 const Fil_path &root() const { return (m_dir); }
460
461 /** @return the directory path specified by the user. */
path() const462 const std::string &path() const { return (m_dir.path()); }
463
464 private:
465 /* Note: The file names in m_ibd_paths and m_undo_paths are relative
466 to m_real_path. */
467
468 /** Mapping from tablespace ID to data filenames */
469 Paths m_ibd_paths;
470
471 /** Mapping from tablespace ID to Undo files */
472 Paths m_undo_paths;
473
474 /** Top level directory where the above files were found. */
475 Fil_path m_dir;
476 };
477
478 /** Directories scanned during startup and the files discovered. */
479 class Tablespace_dirs {
480 public:
481 using Result = std::pair<std::string, Tablespace_files::Names *>;
482
483 /** Constructor */
Tablespace_dirs()484 Tablespace_dirs()
485 : m_dirs()
486 #if !defined(__SUNPRO_CC)
487 ,
488 m_checked()
489 #endif /* !__SUNPRO_CC */
490 {
491 #if defined(__SUNPRO_CC)
492 m_checked = ATOMIC_VAR_INIT(0);
493 #endif /* __SUNPRO_CC */
494 }
495
496 /** Normalize and save a directory to scan for IBD and IBU datafiles
497 before recovery.
498 @param[in] directory directory to scan for ibd and ibu files
499 @param[in] is_undo_dir true for an undo directory */
500 void set_scan_dir(const std::string &directory, bool is_undo_dir = false);
501
502 /** Normalize and save a list of directories to scan for IBD and IBU
503 datafiles before recovery.
504 @param[in] directories Directories to scan for ibd and ibu files */
505 void set_scan_dirs(const std::string &directories);
506
507 /** Discover tablespaces by reading the header from .ibd files.
508 @return DB_SUCCESS if all goes well */
509 dberr_t scan(bool populate_fil_cache) MY_ATTRIBUTE((warn_unused_result));
510
511 /** Clear all the tablespace file data but leave the list of
512 scanned directories in place. */
clear()513 void clear() {
514 for (auto &dir : m_dirs) {
515 dir.clear();
516 }
517
518 m_checked = 0;
519 }
520
521 /** Erase a space ID to filename mapping.
522 @param[in] space_id Tablespace ID to erase
523 @return true if successful */
erase_path(space_id_t space_id)524 bool erase_path(space_id_t space_id) MY_ATTRIBUTE((warn_unused_result)) {
525 for (auto &dir : m_dirs) {
526 if (dir.erase_path(space_id)) {
527 return (true);
528 }
529 }
530
531 return (false);
532 }
533
534 /* Find the first matching space ID -> name mapping.
535 @param[in] space_id Tablespace ID
536 @return directory searched and pointer to names that map to the
537 tablespace ID */
find(space_id_t space_id)538 Result find(space_id_t space_id) MY_ATTRIBUTE((warn_unused_result)) {
539 for (auto &dir : m_dirs) {
540 const auto names = dir.find(space_id);
541
542 if (names != nullptr) {
543 return (Result{dir.path(), names});
544 }
545 }
546
547 return (Result{"", nullptr});
548 }
549
550 /** Determine if this Fil_path contains the path provided.
551 @param[in] path file or directory path to compare.
552 @return true if this Fil_path contains path */
contains(const std::string & path) const553 bool contains(const std::string &path) const
554 MY_ATTRIBUTE((warn_unused_result)) {
555 const Fil_path descendant{path};
556
557 for (const auto &dir : m_dirs) {
558 if (dir.root().is_same_as(descendant) ||
559 dir.root().is_ancestor(descendant)) {
560 return (true);
561 }
562 }
563 return (false);
564 }
565
566 /** Insert a file with given space ID to filename mapping.
567 @param[in] space_id Tablespace ID to insert
568 @param[in] filename file name to insert
569 @return true if successful */
insert(space_id_t space_id,const std::string & filename)570 bool insert(space_id_t space_id, const std::string &filename)
571 MY_ATTRIBUTE((warn_unused_result)) {
572 Fil_path file{filename};
573
574 for (auto &dir : m_dirs) {
575 const auto &d = dir.root().abs_path();
576 auto abs_path = Fil_path::get_real_path(d);
577
578 if (dir.root().is_ancestor(file) ||
579 abs_path.compare(file.abs_path()) == 0) {
580 return (dir.add(space_id, filename));
581 }
582 }
583
584 return (false);
585 }
586
587 /** Get the list of directories that InnoDB knows about.
588 @return the list of directories 'dir1;dir2;....;dirN' */
get_dirs() const589 std::string get_dirs() const {
590 std::string dirs;
591
592 ut_ad(!m_dirs.empty());
593
594 for (const auto &dir : m_dirs) {
595 dirs.append(dir.root());
596 dirs.push_back(FIL_PATH_SEPARATOR);
597 }
598
599 dirs.pop_back();
600
601 ut_ad(!dirs.empty());
602
603 return (dirs);
604 }
605
606 /** Open all known tablespaces. */
607 void open_ibds() const;
608
609 private:
610 /** Print the duplicate filenames for a tablespce ID to the log
611 @param[in] duplicates Duplicate tablespace IDs*/
612 void print_duplicates(const Space_id_set &duplicates);
613
614 /** first=dir path from the user, second=files found under first. */
615 using Scanned = std::vector<Tablespace_files>;
616
617 /** Report a warning that a path is being ignored and include the reason. */
618 void warn_ignore(std::string path_in, const char *reason);
619
620 /** Add a single path specification to this list of tablespace directories.
621 Convert it to an absolute path. Check if the path is valid. Ignore
622 unreadable, duplicate or invalid directories.
623 @param[in] str Path specification to tokenize
624 @param[in] is_undo_dir true for an undo directory */
625 void add_path(const std::string &str, bool is_undo_dir = false);
626
627 /** Add a delimited list of path specifications to this list of tablespace
628 directories. Convert relative paths to absolute paths. Check if the paths
629 are valid. Ignore unreadable, duplicate or invalid directories.
630 @param[in] str Path specification to tokenize
631 @param[in] delimiters Delimiters */
632 void add_paths(const std::string &str, const std::string &delimiters);
633
634 using Const_iter = Scanned_files::const_iterator;
635
636 /** Check for duplicate tablespace IDs.
637 @param[in] start Start of slice
638 @param[in] end End of slice
639 @param[in] thread_id Thread ID
640 @param[in,out] mutex Mutex protecting the global state
641 @param[in,out] unique To check for duplciates
642 @param[in,out] duplicates Duplicate space IDs found */
643 void duplicate_check(const Const_iter &start, const Const_iter &end,
644 size_t thread_id, std::mutex *mutex,
645 Space_id_set *unique, Space_id_set *duplicates);
646
647 /** Open IBD tablespaces.
648 @param[in] start Start of slice
649 @param[in] end End of slice
650 @param[in] thread_id Thread ID
651 @param[out] result false in case of failure */
652 void open_ibd(const Const_iter &start, const Const_iter &end,
653 size_t thread_id, bool &result);
654
655 private:
656 /** Directories scanned and the files discovered under them. */
657 Scanned m_dirs;
658
659 /** Number of files checked. */
660 std::atomic_size_t m_checked;
661 };
662
663 /** Determine if user has explicitly disabled fsync(). */
664 #ifndef _WIN32
665 #define fil_buffering_disabled(s) \
666 ((s)->purpose == FIL_TYPE_TABLESPACE && \
667 srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)
668 #else /* _WIN32 */
669 #define fil_buffering_disabled(s) (0)
670 #endif /* _WIN32 */
671
672 class Fil_shard {
673 using File_list = UT_LIST_BASE_NODE_T(fil_node_t);
674 using Space_list = UT_LIST_BASE_NODE_T(fil_space_t);
675 using Spaces = std::unordered_map<space_id_t, fil_space_t *>;
676
677 using Names = std::unordered_map<const char *, fil_space_t *, Char_Ptr_Hash,
678 Char_Ptr_Compare>;
679
680 public:
681 /** Constructor
682 @param[in] shard_id Shard ID */
683 explicit Fil_shard(size_t shard_id);
684
685 /** Destructor */
~Fil_shard()686 ~Fil_shard() {
687 mutex_destroy(&m_mutex);
688 ut_a(UT_LIST_GET_LEN(m_LRU) == 0);
689 ut_a(UT_LIST_GET_LEN(m_unflushed_spaces) == 0);
690 }
691
692 /** @return the shard ID */
id() const693 size_t id() const { return (m_id); }
694
695 /** Acquire the mutex.
696 @param[in] line Line number from where it was called */
acquire(int line) const697 void acquire(int line) const {
698 #ifndef UNIV_HOTBACKUP
699 m_mutex.enter(srv_n_spin_wait_rounds, srv_spin_wait_delay, __FILE__, line);
700 #else
701 mutex_enter(&m_mutex);
702 #endif /* !UNIV_HOTBACKUP */
703 }
704
705 /** Release the mutex. */
mutex_release() const706 void mutex_release() const { mutex_exit(&m_mutex); }
707
708 #ifdef UNIV_DEBUG
709 /** @return true if the mutex is owned. */
mutex_owned() const710 bool mutex_owned() const { return (mutex_own(&m_mutex)); }
711 #endif /* UNIV_DEBUG */
712
713 /** Mutex protecting this shard. */
714
715 #ifndef UNIV_HOTBACKUP
716 mutable ib_mutex_t m_mutex;
717 #else
718 mutable meb::Mutex m_mutex;
719 #endif /* !UNIV_HOTBACKUP */
720
721 /** Fetch the fil_space_t instance that maps to space_id.
722 @param[in] space_id Tablespace ID to lookup
723 @return tablespace instance or nullptr if not found. */
get_space_by_id(space_id_t space_id) const724 fil_space_t *get_space_by_id(space_id_t space_id) const
725 MY_ATTRIBUTE((warn_unused_result)) {
726 ut_ad(m_id == REDO_SHARD || mutex_owned());
727
728 auto it = m_spaces.find(space_id);
729
730 if (it == m_spaces.end()) {
731 return (nullptr);
732 }
733
734 ut_ad(it->second->magic_n == FIL_SPACE_MAGIC_N);
735
736 return (it->second);
737 }
738
739 /** Fetch the fil_space_t instance that maps to the name.
740 @param[in] name Tablespace name to lookup
741 @return tablespace instance or nullptr if not found. */
get_space_by_name(const char * name) const742 fil_space_t *get_space_by_name(const char *name) const
743 MY_ATTRIBUTE((warn_unused_result)) {
744 ut_ad(mutex_owned());
745
746 auto it = m_names.find(name);
747
748 if (it == m_names.end()) {
749 return (nullptr);
750 }
751
752 ut_ad(it->second->magic_n == FIL_SPACE_MAGIC_N);
753
754 return (it->second);
755 }
756
757 /** Tries to close a file in the shard LRU list.
758 The caller must hold the Fil_shard::m_mutex.
759 @param[in] print_info if true, prints information
760 why it cannot close a file
761 @return true if success, false if should retry later */
762 bool close_files_in_LRU(bool print_info) MY_ATTRIBUTE((warn_unused_result));
763
764 /** Remove the file node from the LRU list.
765 @param[in,out] file File for the tablespace */
766 void remove_from_LRU(fil_node_t *file);
767
768 /** Add the file node to the LRU list if required.
769 @param[in,out] file File for the tablespace */
770 void file_opened(fil_node_t *file);
771
772 /** Open all the system files.
773 @param[in] max_n_open Max files that can be opened.
774 @param[in] n_open Current number of open files */
775 void open_system_tablespaces(size_t max_n_open, size_t *n_open);
776
777 /** Close a tablespace file.
778 @param[in,out] file Tablespace file to close
779 @param[in] LRU_close true if called from LRU close */
780 void close_file(fil_node_t *file, bool LRU_close);
781
782 /** Close a tablespace file based on tablespace ID.
783 @param[in] space_id Tablespace ID
784 @return false if space_id was not found. */
785 bool close_file(space_id_t space_id);
786
787 /** Prepare to free a file object from a tablespace
788 memory cache.
789 @param[in,out] file Tablespace file
790 @param[in] space tablespace */
791 void file_close_to_free(fil_node_t *file, fil_space_t *space);
792
793 /** Close log files.
794 @param[in] free_all If set then free all */
795 void close_log_files(bool free_all);
796
797 /** Close all open files. */
798 void close_all_files();
799
800 /** Detach a space object from the tablespace memory cache and
801 closes the tablespace files but does not delete them.
802 There must not be any pending I/O's or flushes on the files.
803 @param[in,out] space tablespace */
804 void space_detach(fil_space_t *space);
805
806 /** Delete the instance that maps to space_id
807 @param[in] space_id Tablespace ID to delete */
space_delete(space_id_t space_id)808 void space_delete(space_id_t space_id) {
809 ut_ad(mutex_owned());
810
811 auto it = m_spaces.find(space_id);
812
813 if (it != m_spaces.end()) {
814 m_names.erase(it->second->name);
815 m_spaces.erase(it);
816 }
817 }
818
819 #ifndef UNIV_HOTBACKUP
820 /** Purge entries from m_deleted that are lower than LWM.
821 @param[in] lwm No dirty pages in the buffer pool less than this LSN. */
checkpoint(lsn_t lwm)822 void checkpoint(lsn_t lwm) {
823 /* Avoid cleaning up old undo files while this is on. */
824 DBUG_EXECUTE_IF("ib_undo_trunc_checkpoint_off", return;);
825
826 mutex_acquire();
827
828 for (auto it = m_deleted.begin(); it != m_deleted.end(); /* No op */) {
829 auto space = it->second;
830
831 if (space->m_deleted_lsn <= lwm) {
832 ut_a(space->files.front().n_pending == 0);
833
834 space_delete(space->id);
835 space_free_low(space);
836
837 it = m_deleted.erase(it);
838 } else {
839 ++it;
840 }
841 }
842
843 mutex_release();
844 }
845
count_deleted(space_id_t undo_num)846 size_t count_deleted(space_id_t undo_num) {
847 size_t count = 0;
848
849 mutex_acquire();
850
851 for (auto deleted : m_deleted) {
852 if (undo::id2num(deleted.first) == undo_num) {
853 count++;
854 }
855 }
856
857 mutex_release();
858
859 return (count);
860 }
861
862 /** Check if a particular undo space_id for a page in the buffer pool has
863 been deleted recently. Its space_id will be found in m_deleted until
864 Fil:shard::checkpoint removes all its pages from the buffer pool and the
865 fil_space_t from Fil_system.
866 @return true if this space_id is in the list of recently deleted spaces. */
is_deleted(space_id_t space_id)867 bool is_deleted(space_id_t space_id) {
868 bool found = false;
869
870 mutex_acquire();
871
872 for (auto deleted : m_deleted) {
873 if (deleted.first == space_id) {
874 found = true;
875 break;
876 }
877 }
878
879 mutex_release();
880
881 return (found);
882 }
883
884 #endif /* !UNIV_HOTBACKUP */
885
886 /** Frees a space object from the tablespace memory cache.
887 Closes a tablespaces' files but does not delete them.
888 There must not be any pending I/O's or flushes on the files.
889 @param[in] space_id Tablespace ID
890 @return fil_space_t instance on success or nullptr */
891 fil_space_t *space_free(space_id_t space_id)
892 MY_ATTRIBUTE((warn_unused_result));
893
894 /** Map the space ID and name to the tablespace instance.
895 @param[in] space Tablespace instance */
896 void space_add(fil_space_t *space);
897
898 /** Prepare to free a file. Remove from the unflushed list
899 if there are no pending flushes.
900 @param[in,out] file File instance to free */
901 void prepare_to_free_file(fil_node_t *file);
902
903 /** If the tablespace is on the unflushed list and there
904 are no pending flushes then remove from the unflushed list.
905 @param[in,out] space Tablespace to remove*/
906 void remove_from_unflushed_list(fil_space_t *space);
907
908 /** Updates the data structures when an I/O operation
909 finishes. Updates the pending I/O's field in the file
910 appropriately.
911 @param[in] file Tablespace file
912 @param[in] type Marks the file as modified
913 if type == WRITE */
914 void complete_io(fil_node_t *file, const IORequest &type);
915
916 /** Prepares a file for I/O. Opens the file if it is closed.
917 Updates the pending I/O's field in the file and the system
918 appropriately. Takes the file off the LRU list if it is in
919 the LRU list.
920 @param[in] file Tablespace file for IO
921 @param[in] extend true if file is being extended
922 @return false if the file can't be opened, otherwise true */
923 bool prepare_file_for_io(fil_node_t *file, bool extend)
924 MY_ATTRIBUTE((warn_unused_result));
925
926 /** Reserves the mutex and tries to make sure we can
927 open at least one file while holding it. This should be called
928 before calling prepare_file_for_io(), because that function
929 may need to open a file.
930 @param[in] space_id Tablespace ID
931 @param[out] space Tablespace instance
932 @return true if a slot was reserved. */
933 bool mutex_acquire_and_get_space(space_id_t space_id, fil_space_t *&space)
934 MY_ATTRIBUTE((warn_unused_result));
935
936 /** Remap the tablespace to the new name.
937 @param[in] space Tablespace instance with old name
938 @param[in] new_name New tablespace name */
939 void update_space_name_map(fil_space_t *space, const char *new_name);
940
941 /** Flush the redo log writes to disk, possibly cached by the OS. */
942 void flush_file_redo();
943
944 /** Collect the tablespace IDs of unflushed tablespaces in space_ids.
945 @param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_LOG,
946 can be ORred */
947 void flush_file_spaces(uint8_t purpose);
948
949 /** Try to extend a tablespace if it is smaller than the specified size.
950 @param[in,out] space tablespace
951 @param[in] size desired size in pages
952 @return whether the tablespace is at least as big as requested */
953 bool space_extend(fil_space_t *space, page_no_t size)
954 MY_ATTRIBUTE((warn_unused_result));
955
956 /** Flushes to disk possible writes cached by the OS. If the space does
957 not exist or is being dropped, does not do anything.
958 @param[in] space_id File space ID (this can be a group of
959 log files or a tablespace of the
960 database) */
961 void space_flush(space_id_t space_id);
962
963 /** Open a file of a tablespace.
964 The caller must own the fil_system mutex.
965 @param[in,out] file Tablespace file
966 @param[in] extend true if the file is being extended
967 @return false if the file can't be opened, otherwise true */
968 bool open_file(fil_node_t *file, bool extend)
969 MY_ATTRIBUTE((warn_unused_result));
970
971 /** Checks if all the file nodes in a space are flushed.
972 The caller must hold all fil_system mutexes.
973 @param[in] space Tablespace to check
974 @return true if all are flushed */
975 bool space_is_flushed(const fil_space_t *space)
976 MY_ATTRIBUTE((warn_unused_result));
977
978 /** Open each file of a tablespace if not already open.
979 @param[in] space_id tablespace identifier
980 @retval true if all file nodes were opened
981 @retval false on failure */
982 bool space_open(space_id_t space_id) MY_ATTRIBUTE((warn_unused_result));
983
984 /** Opens the files associated with a tablespace and returns a
985 pointer to the fil_space_t that is in the memory cache associated
986 with a space id.
987 @param[in] space_id Get the tablespace instance or this ID
988 @return file_space_t pointer, nullptr if space not found */
989 fil_space_t *space_load(space_id_t space_id)
990 MY_ATTRIBUTE((warn_unused_result));
991
992 /** Check pending operations on a tablespace.
993 @param[in] space_id Tablespace ID
994 @param[out] space tablespace instance in memory
995 @param[out] path tablespace path
996 @return DB_SUCCESS or DB_TABLESPACE_NOT_FOUND. */
997 dberr_t space_check_pending_operations(space_id_t space_id,
998 fil_space_t *&space, char **path) const
999 MY_ATTRIBUTE((warn_unused_result));
1000
1001 /** Rename a single-table tablespace.
1002 The tablespace must exist in the memory cache.
1003 @param[in] space_id Tablespace ID
1004 @param[in] old_path Old file name
1005 @param[in] new_name New tablespace name in the schema/space
1006 @param[in] new_path_in New file name, or nullptr if it
1007 is located in the normal data directory
1008 @return InnoDB error code */
1009 dberr_t space_rename(space_id_t space_id, const char *old_path,
1010 const char *new_name, const char *new_path_in)
1011 MY_ATTRIBUTE((warn_unused_result));
1012
1013 /** Deletes an IBD or IBU tablespace.
1014 The tablespace must be cached in the memory cache. This will delete the
1015 datafile, fil_space_t & fil_node_t entries from the file_system_t cache.
1016 @param[in] space_id Tablespace ID
1017 @param[in] buf_remove Specify the action to take on the pages
1018 for this table in the buffer pool.
1019 @return DB_SUCCESS, DB_TABLESPCE_NOT_FOUND or DB_IO_ERROR */
1020 dberr_t space_delete(space_id_t space_id, buf_remove_t buf_remove)
1021 MY_ATTRIBUTE((warn_unused_result));
1022
1023 /** Truncate the tablespace to needed size.
1024 @param[in] space_id Tablespace ID to truncate
1025 @param[in] size_in_pages Truncate size.
1026 @return true if truncate was successful. */
1027 bool space_truncate(space_id_t space_id, page_no_t size_in_pages)
1028 MY_ATTRIBUTE((warn_unused_result));
1029
1030 /** Create a space memory object and put it to the fil_system hash
1031 table. The tablespace name is independent from the tablespace file-name.
1032 Error messages are issued to the server log.
1033 @param[in] name Tablespace name
1034 @param[in] space_id Tablespace ID
1035 @param[in] flags Tablespace flags
1036 @param[in] purpose Tablespace purpose
1037 @return pointer to created tablespace
1038 @retval nullptr on failure (such as when the same tablespace exists) */
1039 fil_space_t *space_create(const char *name, space_id_t space_id,
1040 uint32_t flags, fil_type_t purpose)
1041 MY_ATTRIBUTE((warn_unused_result));
1042
1043 /** Adjust temporary auto-generated names created during
1044 file discovery with correct tablespace names from the DD.
1045 @param[in,out] space Tablespace
1046 @param[in] dd_space_name Tablespace name from the DD
1047 @return true if the tablespace is a general or undo tablespace. */
1048 bool adjust_space_name(fil_space_t *space, const char *dd_space_name);
1049
1050 /** Returns true if a matching tablespace exists in the InnoDB
1051 tablespace memory cache.
1052 @param[in] space_id Tablespace ID
1053 @param[in] name Tablespace name used in space_create().
1054 @param[in] print_err Print detailed error information to the
1055 error log if a matching tablespace is
1056 not found from memory.
1057 @param[in] adjust_space Whether to adjust space id on mismatch
1058 @param[in] heap Heap memory
1059 @param[in] table_id table id
1060 @return true if a matching tablespace exists in the memory cache */
1061 bool space_check_exists(space_id_t space_id, const char *name, bool print_err,
1062 bool adjust_space, mem_heap_t *heap,
1063 table_id_t table_id)
1064 MY_ATTRIBUTE((warn_unused_result));
1065
1066 /** Read or write log file data synchronously.
1067 @param[in] type IO context
1068 @param[in] page_id page id
1069 @param[in] page_size page size
1070 @param[in] byte_offset remainder of offset in bytes; in AIO
1071 this must be divisible by the OS block
1072 size
1073 @param[in] len how many bytes to read or write; this
1074 must not cross a file boundary; in AIO
1075 this must be a block size multiple
1076 @param[in,out] buf buffer where to store read data or
1077 from where to write
1078 @return error code
1079 @retval DB_SUCCESS on success */
1080 dberr_t do_redo_io(const IORequest &type, const page_id_t &page_id,
1081 const page_size_t &page_size, ulint byte_offset, ulint len,
1082 void *buf) MY_ATTRIBUTE((warn_unused_result));
1083
1084 /** Read or write data. This operation could be asynchronous (aio).
1085 @param[in] type IO context
1086 @param[in] sync whether synchronous aio is desired
1087 @param[in] page_id page id
1088 @param[in] page_size page size
1089 @param[in] byte_offset remainder of offset in bytes; in AIO
1090 this must be divisible by the OS
1091 block size
1092 @param[in] len how many bytes to read or write;
1093 this must not cross a file boundary;
1094 in AIO this must be a block size
1095 multiple
1096 @param[in,out] buf buffer where to store read data
1097 or from where to write; in AIO
1098 this must be appropriately aligned
1099 @param[in] message message for AIO handler if !sync,
1100 else ignored
1101 @return error code
1102 @retval DB_SUCCESS on success
1103 @retval DB_TABLESPACE_DELETED if the tablespace does not exist */
1104 dberr_t do_io(const IORequest &type, bool sync, const page_id_t &page_id,
1105 const page_size_t &page_size, ulint byte_offset, ulint len,
1106 void *buf, void *message) MY_ATTRIBUTE((warn_unused_result));
1107
1108 /** Iterate through all tablespaces
1109 @param[in] include_log Include redo log space, if true
1110 @param[in] f Callback
1111 @return any error returned by the callback function. */
1112 dberr_t iterate_spaces(bool include_log, Fil_space_iterator::Function &f)
1113 MY_ATTRIBUTE((warn_unused_result));
1114
1115 /** Iterate through all persistent tablespace files
1116 (FIL_TYPE_TABLESPACE) returning the nodes via callback function cbk.
1117 @param[in] include_log include log files, if true
1118 @param[in] f Callback
1119 @return any error returned by the callback function. */
1120 dberr_t iterate(bool include_log, Fil_iterator::Function &f)
1121 MY_ATTRIBUTE((warn_unused_result));
1122
1123 /** Open an ibd tablespace and add it to the InnoDB data structures.
1124 This is similar to fil_ibd_open() except that it is used while
1125 processing the redo and DDL log, so the data dictionary is not
1126 available and very little validation is done. The tablespace name
1127 is extracted from the dbname/tablename.ibd portion of the filename,
1128 which assumes that the file is a file-per-table tablespace. Any name
1129 will do for now. General tablespace names will be read from the
1130 dictionary after it has been recovered. The tablespace flags are read
1131 at this time from the first page of the file in validate_for_recovery().
1132 @param[in] space_id tablespace ID
1133 @param[in] path path/to/databasename/tablename.ibd
1134 @param[out] space the tablespace, or nullptr on error
1135 @return status of the operation */
1136 fil_load_status ibd_open_for_recovery(space_id_t space_id,
1137 const std::string &path,
1138 fil_space_t *&space)
1139 MY_ATTRIBUTE((warn_unused_result));
1140
1141 /** Attach a file to a tablespace
1142 @param[in] name file name of a file that is not open
1143 @param[in] size file size in entire database blocks
1144 @param[in,out] space tablespace from fil_space_create()
1145 @param[in] is_raw true if this is a raw device
1146 or partition
1147 @param[in] punch_hole true if supported for this file
1148 @param[in] atomic_write true if the file has atomic write
1149 enabled
1150 @param[in] max_pages maximum number of pages in file
1151 @return pointer to the file name
1152 @retval nullptr if error */
1153 fil_node_t *create_node(const char *name, page_no_t size, fil_space_t *space,
1154 bool is_raw, bool punch_hole, bool atomic_write,
1155 page_no_t max_pages = PAGE_NO_MAX)
1156 MY_ATTRIBUTE((warn_unused_result));
1157
1158 #ifdef UNIV_DEBUG
1159 /** Validate a shard. */
1160 void validate() const;
1161 #endif /* UNIV_DEBUG */
1162
1163 #ifdef UNIV_HOTBACKUP
1164 /** Extends all tablespaces to the size stored in the space header.
1165 During the mysqlbackup --apply-log phase we extended the spaces
1166 on-demand so that log records could be applied, but that may have
1167 left spaces still too small compared to the size stored in the space
1168 header. */
1169 void meb_extend_tablespaces_to_stored_len();
1170 #endif /* UNIV_HOTBACKUP */
1171
1172 /** Free a tablespace object on which fil_space_detach() was invoked.
1173 There must not be any pending i/o's or flushes on the files.
1174 @param[in,out] space tablespace */
1175 static void space_free_low(fil_space_t *&space);
1176
1177 /** Wait for an empty slot to reserve for opening a file.
1178 @return true on success. */
1179 static bool reserve_open_slot(size_t shard_id)
1180 MY_ATTRIBUTE((warn_unused_result));
1181
1182 /** Release the slot reserved for opening a file.
1183 @param[in] shard_id ID of shard relasing the slot */
1184 static void release_open_slot(size_t shard_id);
1185
1186 /** We are going to do a rename file and want to stop new I/O
1187 for a while.
1188 @param[in] space Tablespace for which we want to
1189 wait for IO to stop */
1190 static void wait_for_io_to_stop(const fil_space_t *space);
1191
1192 private:
1193 /** We keep log files and system tablespace files always open; this is
1194 important in preventing deadlocks in this module, as a page read
1195 completion often performs another read from the insert buffer. The
1196 insert buffer is in tablespace TRX_SYS_SPACE, and we cannot end up
1197 waiting in this function.
1198 @param[in] space_id Tablespace ID to look up
1199 @return tablespace instance */
1200 fil_space_t *get_reserved_space(space_id_t space_id)
1201 MY_ATTRIBUTE((warn_unused_result));
1202
1203 /** Prepare for truncating a single-table tablespace.
1204 1) Check pending operations on a tablespace;
1205 2) Remove all insert buffer entries for the tablespace;
1206 @param[in] space_id Tablespace ID
1207 @return DB_SUCCESS or error */
1208 dberr_t space_prepare_for_truncate(space_id_t space_id)
1209 MY_ATTRIBUTE((warn_unused_result));
1210
1211 /** Note that a write IO has completed.
1212 @param[in,out] file File on which a write was
1213 completed */
1214 void write_completed(fil_node_t *file);
1215
1216 /** If the tablespace is not on the unflushed list, add it.
1217 @param[in,out] space Tablespace to add */
1218 void add_to_unflushed_list(fil_space_t *space);
1219
1220 /** Check for pending operations.
1221 @param[in] space tablespace
1222 @param[in] count number of attempts so far
1223 @return 0 if no pending operations else count + 1. */
1224 ulint space_check_pending_operations(fil_space_t *space, ulint count) const
1225 MY_ATTRIBUTE((warn_unused_result));
1226
1227 /** Check for pending IO.
1228 @param[in] space Tablespace to check
1229 @param[in] file File in space list
1230 @param[in] count number of attempts so far
1231 @return 0 if no pending else count + 1. */
1232 ulint check_pending_io(const fil_space_t *space, const fil_node_t &file,
1233 ulint count) const MY_ATTRIBUTE((warn_unused_result));
1234
1235 /** Flushes to disk possible writes cached by the OS. */
1236 void redo_space_flush();
1237
1238 /** First we open the file in the normal mode, no async I/O here, for
1239 simplicity. Then do some checks, and close the file again. NOTE that we
1240 could not use the simple file read function os_file_read() in Windows
1241 to read from a file opened for async I/O!
1242 @param[in,out] file Get the size of this file
1243 @param[in] read_only_mode true if read only mode set
1244 @return DB_SUCCESS or error */
1245 dberr_t get_file_size(fil_node_t *file, bool read_only_mode)
1246 MY_ATTRIBUTE((warn_unused_result));
1247
1248 /** Get the AIO mode.
1249 @param[in] req_type IO request type
1250 @param[in] sync true if Synchronous IO
1251 return the AIO mode */
1252 static AIO_mode get_AIO_mode(const IORequest &req_type, bool sync)
1253 MY_ATTRIBUTE((warn_unused_result));
1254
1255 /** Get the file name for IO and the local offset within that file.
1256 @param[in] req_type IO context
1257 @param[in,out] space Tablespace for IO
1258 @param[in,out] page_no The relative page number in the file
1259 @param[out] file File node if DB_SUCCESS, NULL if not
1260 @retval DB_SUCCESS if the file is found with the page_no
1261 @retval DB_ERROR if the file is not found or does not contain the page.
1262 in this case file == nullptr */
1263 static dberr_t get_file_for_io(const IORequest &req_type, fil_space_t *space,
1264 page_no_t *page_no, fil_node_t *&file)
1265 MY_ATTRIBUTE((warn_unused_result));
1266
1267 private:
1268 /** Fil_shard ID */
1269
1270 const size_t m_id;
1271
1272 /** Tablespace instances hashed on the space id */
1273
1274 Spaces m_spaces;
1275
1276 /** Tablespace instances hashed on the space name */
1277
1278 Names m_names;
1279
1280 #ifndef UNIV_HOTBACKUP
1281 /** Deleted space IDs, ignore writes to these tablespaces. Note the
1282 LSN at which the tablespace was deleted. All pages before this LSN
1283 should not be flushed to disk. Once the LWM is >= the recorded LSN
1284 we can delete the entry from m_deleted. */
1285
1286 std::vector<std::pair<space_id_t, fil_space_t *>> m_deleted;
1287 #endif /* !UNIV_HOTBACKUP */
1288
1289 /** Base node for the LRU list of the most recently used open
1290 files with no pending I/O's; if we start an I/O on the file,
1291 we first remove it from this list, and return it to the start
1292 of the list when the I/O ends; log files and the system
1293 tablespace are not put to this list: they are opened after
1294 the startup, and kept open until shutdown */
1295
1296 File_list m_LRU;
1297
1298 /** Base node for the list of those tablespaces whose files
1299 contain unflushed writes; those spaces have at least one file
1300 where modification_counter > flush_counter */
1301
1302 Space_list m_unflushed_spaces;
1303
1304 /** When we write to a file we increment this by one */
1305
1306 int64_t m_modification_counter;
1307
1308 /** Number of files currently open */
1309
1310 static std::atomic_size_t s_n_open;
1311
1312 /** ID of shard that has reserved the open slot. */
1313
1314 static std::atomic_size_t s_open_slot;
1315
1316 // Disable copying
1317 Fil_shard(Fil_shard &&) = delete;
1318 Fil_shard(const Fil_shard &) = delete;
1319 Fil_shard &operator=(const Fil_shard &) = delete;
1320
1321 friend class Fil_system;
1322 };
1323
1324 /** The tablespace memory cache; also the totality of logs (the log
1325 data space) is stored here; below we talk about tablespaces, but also
1326 the ib_logfiles form a 'space' and it is handled here */
1327 class Fil_system {
1328 public:
1329 using Fil_shards = std::vector<Fil_shard *>;
1330
1331 /** Constructor.
1332 @param[in] n_shards Number of shards to create
1333 @param[in] max_open Maximum number of open files */
1334 Fil_system(size_t n_shards, size_t max_open);
1335
1336 /** Destructor */
1337 ~Fil_system();
1338
1339 /** Fetch the file names opened for a space_id during recovery.
1340 @param[in] space_id Tablespace ID to lookup
1341 @return pair of top level directory scanned and names that map
1342 to space_id or nullptr if not found for names */
get_scanned_files(space_id_t space_id)1343 Tablespace_dirs::Result get_scanned_files(space_id_t space_id)
1344 MY_ATTRIBUTE((warn_unused_result)) {
1345 return (m_dirs.find(space_id));
1346 }
1347
1348 /** Fetch the file name opened for a space_id during recovery
1349 from the file map.
1350 @param[in] space_id Undo tablespace ID
1351 @return Full path to the file name that was opened, empty string
1352 if space ID not found. */
find(space_id_t space_id)1353 std::string find(space_id_t space_id) MY_ATTRIBUTE((warn_unused_result)) {
1354 auto result = get_scanned_files(space_id);
1355
1356 if (result.second != nullptr) {
1357 return (result.first + result.second->front());
1358 }
1359
1360 return ("");
1361 }
1362
1363 /** Erase a tablespace ID and its mapping from the scanned files.
1364 @param[in] space_id Tablespace ID to erase
1365 @return true if successful */
erase_path(space_id_t space_id)1366 bool erase_path(space_id_t space_id) MY_ATTRIBUTE((warn_unused_result)) {
1367 return (m_dirs.erase_path(space_id));
1368 }
1369
1370 /** Add file to old file list. The list is used during 5.7 upgrade failure
1371 to revert back the modified file names. We modify partitioned file names
1372 to lower case.
1373 @param[in] file_path old file name with path */
add_old_file(const std::string & file_path)1374 void add_old_file(const std::string &file_path) {
1375 m_old_paths.push_back(file_path);
1376 }
1377
1378 /** Rename partition files during upgrade.
1379 @param[in] revert if true, revert to old names */
1380 void rename_partition_files(bool revert);
1381
1382 /** Clear all accumulated old files. */
clear_old_files()1383 void clear_old_files() { m_old_paths.clear(); }
1384
1385 /** Get the top level directory where this filename was found.
1386 @param[in] path Path to look for.
1387 @return the top level directory under which this file was found. */
1388 const std::string &get_root(const std::string &path) const
1389 MY_ATTRIBUTE((warn_unused_result));
1390
1391 /** Update the DD if any files were moved to a new location.
1392 Free the Tablespace_files instance.
1393 @param[in] read_only_mode true if InnoDB is started in
1394 read only mode.
1395 @return DB_SUCCESS if all OK */
1396 dberr_t prepare_open_for_business(bool read_only_mode)
1397 MY_ATTRIBUTE((warn_unused_result));
1398
1399 /** Flush the redo log writes to disk, possibly cached by the OS. */
1400 void flush_file_redo();
1401
1402 /** Flush to disk the writes in file spaces of the given type
1403 possibly cached by the OS.
1404 @param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_LOG,
1405 can be ORred */
1406 void flush_file_spaces(uint8_t purpose);
1407
1408 #ifndef UNIV_HOTBACKUP
1409 /** Clean up the shards.
1410 @param[in] lwm No dirty pages less than this LSN in the buffer pool. */
checkpoint(lsn_t lwm)1411 void checkpoint(lsn_t lwm) {
1412 for (auto shard : m_shards) {
1413 shard->checkpoint(lwm);
1414 }
1415 }
1416
1417 /** Count how many truncated undo space IDs are still tracked in
1418 the buffer pool and the file_system cache.
1419 @param[in] undo_num undo tablespace number.
1420 @return number of undo tablespaces that are still in memory. */
count_deleted(space_id_t undo_num)1421 size_t count_deleted(space_id_t undo_num) {
1422 size_t count = 0;
1423
1424 for (auto shard : m_shards) {
1425 count += shard->count_deleted(undo_num);
1426 }
1427
1428 return (count);
1429 }
1430
1431 /** Check if a particular undo space_id for a page in the buffer pool has
1432 been deleted recently. Its space_id will be found in Fil_shard::m_deleted
1433 until Fil:shard::checkpoint removes all its pages from the buffer pool and
1434 the fil_space_t from Fil_system.
1435 @return true if this space_id is in the list of recently deleted spaces. */
is_deleted(space_id_t space_id)1436 bool is_deleted(space_id_t space_id) {
1437 auto shard = shard_by_id(space_id);
1438
1439 return (shard->is_deleted(space_id));
1440 }
1441 #endif /* !UNIV_HOTBACKUP */
1442
1443 /** Fetch the fil_space_t instance that maps to the name.
1444 @param[in] name Tablespace name to lookup
1445 @return tablespace instance or nullptr if not found. */
get_space_by_name(const char * name)1446 fil_space_t *get_space_by_name(const char *name)
1447 MY_ATTRIBUTE((warn_unused_result)) {
1448 for (auto shard : m_shards) {
1449 shard->mutex_acquire();
1450
1451 auto space = shard->get_space_by_name(name);
1452
1453 shard->mutex_release();
1454
1455 if (space != nullptr) {
1456 return (space);
1457 }
1458 }
1459
1460 return (nullptr);
1461 }
1462
1463 /** Check a space ID against the maximum known tablespace ID.
1464 @param[in] space_id Tablespace ID to check
1465 @return true if it is > than maximum known tablespace ID. */
is_greater_than_max_id(space_id_t space_id) const1466 bool is_greater_than_max_id(space_id_t space_id) const
1467 MY_ATTRIBUTE((warn_unused_result)) {
1468 ut_ad(mutex_owned_all());
1469
1470 return (space_id > m_max_assigned_id);
1471 }
1472
1473 /** Update the maximum known tablespace ID.
1474 @param[in] space Tablespace instance */
set_maximum_space_id(const fil_space_t * space)1475 void set_maximum_space_id(const fil_space_t *space) {
1476 ut_ad(mutex_owned_all());
1477
1478 if (!m_space_id_reuse_warned) {
1479 m_space_id_reuse_warned = true;
1480
1481 ib::warn(ER_IB_MSG_266) << "Allocated tablespace ID " << space->id
1482 << " for " << space->name << ", old maximum"
1483 << " was " << m_max_assigned_id;
1484 }
1485
1486 m_max_assigned_id = space->id;
1487 }
1488
1489 /** Update the maximim known space ID if it's smaller than max_id.
1490 @param[in] space_id Value to set if it's greater */
update_maximum_space_id(space_id_t space_id)1491 void update_maximum_space_id(space_id_t space_id) {
1492 mutex_acquire_all();
1493
1494 if (is_greater_than_max_id(space_id)) {
1495 m_max_assigned_id = space_id;
1496 }
1497
1498 mutex_release_all();
1499 }
1500
1501 /** Assigns a new space id for a new single-table tablespace. This
1502 works simply by incrementing the global counter. If 4 billion ids
1503 is not enough, we may need to recycle ids.
1504 @param[out] space_id Set this to the new tablespace ID
1505 @return true if assigned, false if not */
1506 bool assign_new_space_id(space_id_t *space_id)
1507 MY_ATTRIBUTE((warn_unused_result));
1508
1509 /** Tries to close a file in all the LRU lists.
1510 The caller must hold the mutex.
1511 @param[in] print_info if true, prints information why it
1512 cannot close a file
1513 @return true if success, false if should retry later */
1514 bool close_file_in_all_LRU(bool print_info)
1515 MY_ATTRIBUTE((warn_unused_result));
1516
1517 /** Opens all log files and system tablespace data files in
1518 all shards. */
1519 void open_all_system_tablespaces();
1520
1521 /** Close all open files in a shard
1522 @param[in,out] shard Close files of this shard */
1523 void close_files_in_a_shard(Fil_shard *shard);
1524
1525 /** Close all open files. */
1526 void close_all_files();
1527
1528 /** Close all the log files in all shards.
1529 @param[in] free_all If set then free all instances */
1530 void close_all_log_files(bool free_all);
1531
1532 /** Iterate through all tablespaces
1533 @param[in] include_log Include redo log space, if true
1534 @param[in] f Callback
1535 @return any error returned by the callback function. */
1536 dberr_t iterate_spaces(bool include_log, Fil_space_iterator::Function &f)
1537 MY_ATTRIBUTE((warn_unused_result));
1538
1539 /** Iterate through all persistent tablespace files
1540 (FIL_TYPE_TABLESPACE) returning the nodes via callback function cbk.
1541 @param[in] include_log Include log files, if true
1542 @param[in] f Callback
1543 @return any error returned by the callback function. */
1544 dberr_t iterate(bool include_log, Fil_iterator::Function &f)
1545 MY_ATTRIBUTE((warn_unused_result));
1546
1547 /** Rotate the tablespace keys by new master key.
1548 @param[in,out] shard Rotate the keys in this shard
1549 @return true if the re-encrypt succeeds */
1550 bool encryption_rotate_in_a_shard(Fil_shard *shard);
1551
1552 /** Rotate the tablespace keys by new master key.
1553 @return true if the re-encrypt succeeds */
1554 bool encryption_rotate_all() MY_ATTRIBUTE((warn_unused_result));
1555
1556 /** Detach a space object from the tablespace memory cache.
1557 Closes the tablespace files but does not delete them.
1558 There must not be any pending I/O's or flushes on the files.
1559 @param[in,out] space tablespace */
1560 void space_detach(fil_space_t *space);
1561
1562 /** @return the maximum assigned ID so far */
get_max_space_id() const1563 space_id_t get_max_space_id() const { return (m_max_assigned_id); }
1564
1565 /** Lookup the tablespace ID.
1566 @param[in] space_id Tablespace ID to lookup
1567 @return true if the space ID is known. */
1568 bool lookup_for_recovery(space_id_t space_id)
1569 MY_ATTRIBUTE((warn_unused_result));
1570
1571 /** Open a tablespace that has a redo log record to apply.
1572 @param[in] space_id Tablespace ID
1573 @return true if the open was successful */
1574 bool open_for_recovery(space_id_t space_id)
1575 MY_ATTRIBUTE((warn_unused_result));
1576
1577 /** This function should be called after recovery has completed.
1578 Check for tablespace files for which we did not see any
1579 MLOG_FILE_DELETE or MLOG_FILE_RENAME record. These could not
1580 be recovered.
1581 @return true if there were some filenames missing for which we had to
1582 ignore redo log records during the apply phase */
1583 bool check_missing_tablespaces() MY_ATTRIBUTE((warn_unused_result));
1584
1585 /** Note that a file has been relocated.
1586 @param[in] object_id Server DD tablespace ID
1587 @param[in] space_id InnoDB tablespace ID
1588 @param[in] space_name Tablespace name
1589 @param[in] old_path Path to the old location
1590 @param[in] new_path Path scanned from disk */
moved(dd::Object_id object_id,space_id_t space_id,const char * space_name,const std::string & old_path,const std::string & new_path)1591 void moved(dd::Object_id object_id, space_id_t space_id,
1592 const char *space_name, const std::string &old_path,
1593 const std::string &new_path) {
1594 auto tuple =
1595 std::make_tuple(object_id, space_id, space_name, old_path, new_path);
1596
1597 m_moved.push_back(tuple);
1598 }
1599
1600 /** Check if a path is known to InnoDB.
1601 @param[in] path Path to check
1602 @return true if path is known to InnoDB */
check_path(const std::string & path) const1603 bool check_path(const std::string &path) const {
1604 return (m_dirs.contains(path));
1605 }
1606
1607 /** Get the list of directories that InnoDB knows about.
1608 @return the list of directories 'dir1;dir2;....;dirN' */
get_dirs() const1609 std::string get_dirs() const { return (m_dirs.get_dirs()); }
1610
1611 /** Determines if a file belongs to the least-recently-used list.
1612 @param[in] space Tablespace to check
1613 @return true if the file belongs to fil_system->m_LRU mutex. */
1614 static bool space_belongs_in_LRU(const fil_space_t *space)
1615 MY_ATTRIBUTE((warn_unused_result));
1616
1617 /** Normalize and save a directory to scan for IBD and IBU datafiles
1618 before recovery.
1619 @param[in] directory Directory to scan
1620 @param[in] is_undo_dir true for an undo directory */
set_scan_dir(const std::string & directory,bool is_undo_dir)1621 void set_scan_dir(const std::string &directory, bool is_undo_dir) {
1622 m_dirs.set_scan_dir(directory, is_undo_dir);
1623 }
1624
1625 /** Normalize and save a list of directories to scan for IBD and IBU
1626 datafiles before recovery.
1627 @param[in] directories Directories to scan */
set_scan_dirs(const std::string & directories)1628 void set_scan_dirs(const std::string &directories) {
1629 m_dirs.set_scan_dirs(directories);
1630 }
1631
1632 /** Scan the directories to build the tablespace ID to file name
1633 mapping table. */
scan(bool populate_fil_cache)1634 dberr_t scan(bool populate_fil_cache) {
1635 return (m_dirs.scan(populate_fil_cache));
1636 }
1637
1638 /** Open all known tablespaces. */
open_ibds() const1639 void open_ibds() const { m_dirs.open_ibds(); }
1640
1641 /** Insert a file with given space ID to filename mapping.
1642 @param[in] space_id Tablespace ID to insert
1643 @param[in] filename file name to insert
1644 @return true if successful */
insert(space_id_t space_id,const std::string & filename)1645 bool insert(space_id_t space_id, const std::string &filename)
1646 MY_ATTRIBUTE((warn_unused_result)) {
1647 return (m_dirs.insert(space_id, filename));
1648 }
1649
1650 /** Get the tablespace ID from an .ibd and/or an undo tablespace.
1651 If the ID is == 0 on the first page then try with Datafile::find_space_id().
1652 @param[in] filename File name to check
1653 @return s_invalid_space_id if not found, otherwise the space ID */
1654 static space_id_t get_tablespace_id(const std::string &filename)
1655 MY_ATTRIBUTE((warn_unused_result));
1656
1657 /** Fil_shard by space ID.
1658 @param[in] space_id Tablespace ID
1659 @return reference to the shard */
shard_by_id(space_id_t space_id) const1660 Fil_shard *shard_by_id(space_id_t space_id) const
1661 MY_ATTRIBUTE((warn_unused_result)) {
1662 #ifndef UNIV_HOTBACKUP
1663 if (space_id == dict_sys_t::s_log_space_first_id) {
1664 return (m_shards[REDO_SHARD]);
1665
1666 } else if (fsp_is_undo_tablespace(space_id)) {
1667 const size_t limit = space_id % UNDO_SHARDS;
1668
1669 return (m_shards[UNDO_SHARDS_START + limit]);
1670 }
1671
1672 ut_ad(m_shards.size() == MAX_SHARDS);
1673
1674 return (m_shards[space_id % UNDO_SHARDS_START]);
1675 #else /* !UNIV_HOTBACKUP */
1676 ut_ad(m_shards.size() == 1);
1677
1678 return (m_shards[0]);
1679 #endif /* !UNIV_HOTBACKUP */
1680 }
1681
1682 /** Acquire all the mutexes. */
mutex_acquire_all() const1683 void mutex_acquire_all() const {
1684 #ifdef UNIV_HOTBACKUP
1685 ut_ad(m_shards.size() == 1);
1686 #endif /* UNIV_HOTBACKUP */
1687
1688 for (auto shard : m_shards) {
1689 shard->mutex_acquire();
1690 }
1691 }
1692
1693 /** Release all the mutexes. */
mutex_release_all() const1694 void mutex_release_all() const {
1695 #ifdef UNIV_HOTBACKUP
1696 ut_ad(m_shards.size() == 1);
1697 #endif /* UNIV_HOTBACKUP */
1698
1699 for (auto shard : m_shards) {
1700 shard->mutex_release();
1701 }
1702 }
1703
1704 #ifdef UNIV_DEBUG
1705
1706 /** Checks the consistency of the tablespace cache.
1707 @return true if ok */
1708 bool validate() const MY_ATTRIBUTE((warn_unused_result));
1709
1710 /** Check if all mutexes are owned
1711 @return true if all owned. */
mutex_owned_all() const1712 bool mutex_owned_all() const MY_ATTRIBUTE((warn_unused_result)) {
1713 #ifdef UNIV_HOTBACKUP
1714 ut_ad(m_shards.size() == 1);
1715 #endif /* UNIV_HOTBACKUP */
1716
1717 for (const auto shard : m_shards) {
1718 ut_ad(shard->mutex_owned());
1719 }
1720
1721 return (true);
1722 }
1723
1724 #endif /* UNIV_DEBUG */
1725
1726 /** Rename a tablespace. Use the space_id to find the shard.
1727 @param[in] space_id tablespace ID
1728 @param[in] old_name old tablespace name
1729 @param[in] new_name new tablespace name
1730 @return DB_SUCCESS on success */
1731 dberr_t rename_tablespace_name(space_id_t space_id, const char *old_name,
1732 const char *new_name)
1733 MY_ATTRIBUTE((warn_unused_result));
1734
1735 /** Free the data structures required for recovery. */
free_scanned_files()1736 void free_scanned_files() { m_dirs.clear(); }
1737
1738 #ifdef UNIV_HOTBACKUP
1739 /** Extends all tablespaces to the size stored in the space header.
1740 During the mysqlbackup --apply-log phase we extended the spaces
1741 on-demand so that log records could be applied, but that may have
1742 left spaces still too small compared to the size stored in the space
1743 header. */
meb_extend_tablespaces_to_stored_len()1744 void meb_extend_tablespaces_to_stored_len() {
1745 ut_ad(m_shards.size() == 1);
1746
1747 /* We use a single shard for MEB. */
1748 auto shard = shard_by_id(SPACE_UNKNOWN);
1749
1750 shard->mutex_acquire();
1751
1752 shard->meb_extend_tablespaces_to_stored_len();
1753
1754 shard->mutex_release();
1755 }
1756
1757 /** Process a file name passed as an input
1758 Wrapper around meb_name_process()
1759 @param[in,out] name absolute path of tablespace file
1760 @param[in] space_id The tablespace ID
1761 @param[in] deleted true if MLOG_FILE_DELETE */
1762 void meb_name_process(char *name, space_id_t space_id, bool deleted);
1763
1764 #endif /* UNIV_HOTBACKUP */
1765
1766 private:
1767 /** Open an ibd tablespace and add it to the InnoDB data structures.
1768 This is similar to fil_ibd_open() except that it is used while
1769 processing the redo log, so the data dictionary is not available
1770 and very little validation is done. The tablespace name is extracted
1771 from the dbname/tablename.ibd portion of the filename, which assumes
1772 that the file is a file-per-table tablespace. Any name will do for
1773 now. General tablespace names will be read from the dictionary after
1774 it has been recovered. The tablespace flags are read at this time
1775 from the first page of the file in validate_for_recovery().
1776 @param[in] space_id tablespace ID
1777 @param[in] path path/to/databasename/tablename.ibd
1778 @param[out] space the tablespace, or nullptr on error
1779 @return status of the operation */
1780 fil_load_status ibd_open_for_recovery(space_id_t space_id,
1781 const std::string &path,
1782 fil_space_t *&space)
1783 MY_ATTRIBUTE((warn_unused_result));
1784
1785 private:
1786 /** Fil_shards managed */
1787 Fil_shards m_shards;
1788
1789 /** n_open is not allowed to exceed this */
1790 const size_t m_max_n_open;
1791
1792 /** Maximum space id in the existing tables, or assigned during
1793 the time mysqld has been up; at an InnoDB startup we scan the
1794 data dictionary and set here the maximum of the space id's of
1795 the tables there */
1796 space_id_t m_max_assigned_id;
1797
1798 /** true if fil_space_create() has issued a warning about
1799 potential space_id reuse */
1800 bool m_space_id_reuse_warned;
1801
1802 /** List of tablespaces that have been relocated. We need to
1803 update the DD when it is safe to do so. */
1804 dd_fil::Tablespaces m_moved;
1805
1806 /** Tablespace directories scanned at startup */
1807 Tablespace_dirs m_dirs;
1808
1809 /** Old file paths during 5.7 upgrade. */
1810 std::vector<std::string> m_old_paths;
1811
1812 // Disable copying
1813 Fil_system(Fil_system &&) = delete;
1814 Fil_system(const Fil_system &) = delete;
1815 Fil_system &operator=(const Fil_system &) = delete;
1816
1817 friend class Fil_shard;
1818 };
1819
1820 /** The tablespace memory cache. This variable is nullptr before the module is
1821 initialized. */
1822 static Fil_system *fil_system = nullptr;
1823
1824 /** Total number of open files. */
1825 std::atomic_size_t Fil_shard::s_n_open;
1826
1827 /** Slot reserved for opening a file. */
1828 std::atomic_size_t Fil_shard::s_open_slot;
1829
1830 #ifdef UNIV_HOTBACKUP
1831 static ulint srv_data_read;
1832 static ulint srv_data_written;
1833 #endif /* UNIV_HOTBACKUP */
1834
1835 /** Replay a file rename operation if possible.
1836 @param[in] page_id Space ID and first page number in the file
1837 @param[in] old_name old file name
1838 @param[in] new_name new file name
1839 @return whether the operation was successfully applied (the name did not exist,
1840 or new_name did not exist and name was successfully renamed to new_name) */
1841 static bool fil_op_replay_rename(const page_id_t &page_id,
1842 const std::string &old_name,
1843 const std::string &new_name)
1844 MY_ATTRIBUTE((warn_unused_result));
1845
1846 #if !defined(UNIV_HOTBACKUP) && !defined(XTRABACKUP)
1847 /** Rename partition file.
1848 @param[in] old_path old file path
1849 @param[in] extn file extension suffix
1850 @param[in] revert if true, rename from new to old file
1851 @param[in] import if called during import */
1852 static void fil_rename_partition_file(const std::string &old_path,
1853 ib_file_suffix extn, bool revert,
1854 bool import);
1855 #endif /* !UNIV_HOTBACKUP && !XTRABACKUP */
1856
1857 #ifndef XTRABACKUP
1858 /** Get modified name for partition file. During upgrade we change all
1859 partition files to have lower case separator and partition name.
1860 @param[in] old_path old file name and path
1861 @param[in] extn file extension suffix
1862 @param[out] new_path modified new name for partitioned file
1863 @return true, iff name needs modification. */
1864 static bool fil_get_partition_file(const std::string &old_path,
1865 ib_file_suffix extn, std::string &new_path);
1866 #endif /* XTRABACKUP */
1867
1868 #ifdef UNIV_DEBUG
1869 /** Try fil_validate() every this many times */
1870 static const size_t FIL_VALIDATE_SKIP = 17;
1871 /** Checks the consistency of the tablespace cache some of the time.
1872 @return true if ok or the check was skipped */
fil_validate_skip()1873 static bool fil_validate_skip() {
1874 /** The fil_validate() call skip counter. Use a signed type
1875 because of the race condition below. */
1876 #ifdef UNIV_HOTBACKUP
1877 static meb::Mutex meb_mutex;
1878
1879 meb_mutex.lock();
1880 #endif /* UNIV_HOTBACKUP */
1881 static int fil_validate_count = FIL_VALIDATE_SKIP;
1882
1883 /* There is a race condition below, but it does not matter,
1884 because this call is only for heuristic purposes. We want to
1885 reduce the call frequency of the costly fil_validate() check
1886 in debug builds. */
1887 --fil_validate_count;
1888
1889 if (fil_validate_count > 0) {
1890 #ifdef UNIV_HOTBACKUP
1891 meb_mutex.unlock();
1892 #endif /* UNIV_HOTBACKUP */
1893 return (true);
1894 }
1895
1896 fil_validate_count = FIL_VALIDATE_SKIP;
1897 #ifdef UNIV_HOTBACKUP
1898 meb_mutex.unlock();
1899 #endif /* UNIV_HOTBACKUP */
1900
1901 return (fil_validate());
1902 }
1903
1904 /** Validate a shard */
validate() const1905 void Fil_shard::validate() const {
1906 mutex_acquire();
1907
1908 size_t n_open = 0;
1909
1910 for (auto elem : m_spaces) {
1911 page_no_t size = 0;
1912 auto space = elem.second;
1913
1914 for (const auto &file : space->files) {
1915 ut_a(file.is_open || !file.n_pending);
1916
1917 if (file.is_open) {
1918 ++n_open;
1919 }
1920
1921 size += file.size;
1922 }
1923
1924 ut_a(space->size == size);
1925 }
1926
1927 UT_LIST_CHECK(m_LRU);
1928
1929 for (auto file = UT_LIST_GET_FIRST(m_LRU); file != nullptr;
1930 file = UT_LIST_GET_NEXT(LRU, file)) {
1931 ut_a(file->is_open);
1932 ut_a(file->n_pending == 0);
1933 ut_a(fil_system->space_belongs_in_LRU(file->space));
1934 }
1935
1936 mutex_release();
1937 }
1938
1939 /** Checks the consistency of the tablespace cache.
1940 @return true if ok */
validate() const1941 bool Fil_system::validate() const {
1942 for (const auto shard : m_shards) {
1943 shard->validate();
1944 }
1945
1946 return (true);
1947 }
1948 /** Checks the consistency of the tablespace cache.
1949 @return true if ok */
fil_validate()1950 bool fil_validate() { return (fil_system->validate()); }
1951 #endif /* UNIV_DEBUG */
1952
1953 /** Constructor.
1954 @param[in] n_shards Number of shards to create
1955 @param[in] max_open Maximum number of open files */
Fil_system(size_t n_shards,size_t max_open)1956 Fil_system::Fil_system(size_t n_shards, size_t max_open)
1957 : m_shards(),
1958 m_max_n_open(max_open),
1959 m_max_assigned_id(),
1960 m_space_id_reuse_warned() {
1961 ut_ad(Fil_shard::s_open_slot == 0);
1962 Fil_shard::s_open_slot = EMPTY_OPEN_SLOT;
1963
1964 for (size_t i = 0; i < n_shards; ++i) {
1965 auto shard = UT_NEW_NOKEY(Fil_shard(i));
1966
1967 m_shards.push_back(shard);
1968 }
1969 }
1970
1971 /** Destructor */
~Fil_system()1972 Fil_system::~Fil_system() {
1973 ut_ad(Fil_shard::s_open_slot == EMPTY_OPEN_SLOT);
1974
1975 Fil_shard::s_open_slot = 0;
1976
1977 for (auto shard : m_shards) {
1978 UT_DELETE(shard);
1979 }
1980
1981 m_shards.clear();
1982 }
1983
1984 /** Determines if a file belongs to the least-recently-used list.
1985 @param[in] space Tablespace to check
1986 @return true if the file belongs to m_LRU. */
space_belongs_in_LRU(const fil_space_t * space)1987 bool Fil_system::space_belongs_in_LRU(const fil_space_t *space) {
1988 switch (space->purpose) {
1989 case FIL_TYPE_TEMPORARY:
1990 case FIL_TYPE_LOG:
1991 return (false);
1992
1993 case FIL_TYPE_TABLESPACE:
1994 return (fsp_is_ibd_tablespace(space->id));
1995
1996 case FIL_TYPE_IMPORT:
1997 return (true);
1998 }
1999
2000 ut_ad(0);
2001 return (false);
2002 }
2003
2004 /** Constructor
2005 @param[in] shard_id Shard ID */
Fil_shard(size_t shard_id)2006 Fil_shard::Fil_shard(size_t shard_id)
2007 : m_id(shard_id), m_spaces(), m_names(), m_modification_counter() {
2008 mutex_create(LATCH_ID_FIL_SHARD, &m_mutex);
2009
2010 UT_LIST_INIT(m_LRU, &fil_node_t::LRU);
2011
2012 UT_LIST_INIT(m_unflushed_spaces, &fil_space_t::unflushed_spaces);
2013 }
2014
2015 /** Wait for an empty slot to reserve for opening a file.
2016 @return true on success. */
reserve_open_slot(size_t shard_id)2017 bool Fil_shard::reserve_open_slot(size_t shard_id) {
2018 size_t expected = EMPTY_OPEN_SLOT;
2019
2020 return (s_open_slot.compare_exchange_weak(expected, shard_id));
2021 }
2022
2023 /** Release the slot reserved for opening a file.
2024 @param[in] shard_id ID of shard relasing the slot */
release_open_slot(size_t shard_id)2025 void Fil_shard::release_open_slot(size_t shard_id) {
2026 size_t expected = shard_id;
2027
2028 while (!s_open_slot.compare_exchange_weak(expected, EMPTY_OPEN_SLOT)) {
2029 };
2030 }
2031
2032 /** Map the space ID and name to the tablespace instance.
2033 @param[in] space Tablespace instance */
space_add(fil_space_t * space)2034 void Fil_shard::space_add(fil_space_t *space) {
2035 ut_ad(mutex_owned());
2036
2037 {
2038 auto it = m_spaces.insert(Spaces::value_type(space->id, space));
2039
2040 ut_a(it.second);
2041 }
2042
2043 {
2044 auto name = space->name;
2045
2046 auto it = m_names.insert(Names::value_type(name, space));
2047
2048 ut_a(it.second);
2049 }
2050 }
2051
2052 /** Add the file node to the LRU list if required.
2053 @param[in,out] file File for the tablespace */
file_opened(fil_node_t * file)2054 void Fil_shard::file_opened(fil_node_t *file) {
2055 ut_ad(m_id == REDO_SHARD || mutex_owned());
2056
2057 if (Fil_system::space_belongs_in_LRU(file->space)) {
2058 /* Put the file to the LRU list */
2059 UT_LIST_ADD_FIRST(m_LRU, file);
2060 }
2061
2062 ++s_n_open;
2063
2064 file->is_open = true;
2065
2066 fil_n_file_opened = s_n_open;
2067 }
2068
2069 /** Remove the file node from the LRU list.
2070 @param[in,out] file File for the tablespace */
remove_from_LRU(fil_node_t * file)2071 void Fil_shard::remove_from_LRU(fil_node_t *file) {
2072 ut_ad(mutex_owned());
2073
2074 if (Fil_system::space_belongs_in_LRU(file->space)) {
2075 ut_ad(mutex_owned());
2076
2077 ut_a(UT_LIST_GET_LEN(m_LRU) > 0);
2078
2079 /* The file is in the LRU list, remove it */
2080 UT_LIST_REMOVE(m_LRU, file);
2081 }
2082 }
2083
2084 /** Close a tablespace file based on tablespace ID.
2085 @param[in] space_id Tablespace ID
2086 @return false if space_id was not found. */
close_file(space_id_t space_id)2087 bool Fil_shard::close_file(space_id_t space_id) {
2088 mutex_acquire();
2089
2090 auto space = get_space_by_id(space_id);
2091
2092 if (space == nullptr) {
2093 mutex_release();
2094
2095 return (false);
2096 }
2097
2098 for (auto &file : space->files) {
2099 while (file.in_use > 0) {
2100 mutex_release();
2101
2102 os_thread_sleep(10000);
2103
2104 mutex_acquire();
2105 }
2106
2107 if (file.is_open) {
2108 close_file(&file, false);
2109 }
2110 }
2111
2112 mutex_release();
2113
2114 return (true);
2115 }
2116
2117 /** Remap the tablespace to the new name.
2118 @param[in] space Tablespace instance, with old name.
2119 @param[in] new_name New tablespace name */
update_space_name_map(fil_space_t * space,const char * new_name)2120 void Fil_shard::update_space_name_map(fil_space_t *space,
2121 const char *new_name) {
2122 ut_ad(mutex_owned());
2123
2124 ut_ad(m_spaces.find(space->id) != m_spaces.end());
2125
2126 m_names.erase(space->name);
2127
2128 auto it = m_names.insert(Names::value_type(new_name, space));
2129
2130 ut_a(it.second);
2131 }
2132
2133 /** Check if the basename of a filepath is an undo tablespace name
2134 @param[in] name Tablespace name
2135 @return true if it is an undo tablespace name */
is_undo_tablespace_name(const std::string & name)2136 bool Fil_path::is_undo_tablespace_name(const std::string &name) {
2137 if (name.empty()) {
2138 return (false);
2139 }
2140
2141 std::string basename(name);
2142
2143 auto sep = basename.find_last_of(SEPARATOR);
2144
2145 if (sep != std::string::npos) {
2146 basename.erase(basename.begin(), basename.begin() + sep + 1);
2147 }
2148
2149 const auto end = basename.end();
2150
2151 /* 5 is the minimum length for an explicit undo space name.
2152 It must be at least this long; "_.ibu". */
2153 if (basename.length() <= strlen(DOT_IBU)) {
2154 return (false);
2155 }
2156
2157 /* Implicit undo names can come in two formats: undo_000 and undo000.
2158 Check for both. */
2159 size_t u = (*(end - 4) == '_') ? 1 : 0;
2160
2161 if (basename.length() == sizeof("undo000") - 1 + u &&
2162 *(end - 7 - u) == 'u' && /* 'u' */
2163 *(end - 6 - u) == 'n' && /* 'n' */
2164 *(end - 5 - u) == 'd' && /* 'd' */
2165 *(end - 4 - u) == 'o' && /* 'o' */
2166 isdigit(*(end - 3)) && /* 'n' */
2167 isdigit(*(end - 2)) && /* 'n' */
2168 isdigit(*(end - 1))) { /* 'n' */
2169 return (true);
2170 }
2171
2172 if (basename.substr(basename.length() - 4, 4) == DOT_IBU) {
2173 return (true);
2174 }
2175
2176 return (false);
2177 }
2178
2179 /** Add a space ID to filename mapping.
2180 @param[in] space_id Tablespace ID
2181 @param[in] name File name.
2182 @return number of files that map to the space ID */
add(space_id_t space_id,const std::string & name)2183 size_t Tablespace_files::add(space_id_t space_id, const std::string &name) {
2184 ut_a(space_id != TRX_SYS_SPACE);
2185
2186 Names *names;
2187
2188 if (Fil_path::is_undo_tablespace_name(name)) {
2189 if (!dict_sys_t::is_reserved(space_id) &&
2190 0 == strncmp(name.c_str(), "undo_", 5)) {
2191 ib::warn(ER_IB_MSG_267) << "Tablespace '" << name << "' naming"
2192 << " format is like an undo tablespace"
2193 << " but its ID " << space_id << " is not"
2194 << " in the undo tablespace range";
2195 }
2196
2197 names = &m_undo_paths[space_id];
2198
2199 } else {
2200 ut_ad(Fil_path::has_suffix(IBD, name.c_str()));
2201
2202 names = &m_ibd_paths[space_id];
2203 }
2204
2205 names->push_back(name);
2206
2207 return (names->size());
2208 }
2209
2210 /** Open all known tablespaces. */
open_ibds() const2211 void Tablespace_files::open_ibds() const {
2212 for (auto path : m_ibd_paths) {
2213 for (auto name : path.second) {
2214 fil_open_for_xtrabackup(m_dir.path() + name,
2215 name.substr(0, name.length() - 4));
2216 }
2217 }
2218 }
2219
2220 /** Reads data from a space to a buffer. Remember that the possible incomplete
2221 blocks at the end of file are ignored: they are not taken into account when
2222 calculating the byte offset within a space.
2223 @param[in] page_id page id
2224 @param[in] page_size page size
2225 @param[in] byte_offset remainder of offset in bytes; in aio this
2226 must be divisible by the OS block size
2227 @param[in] len how many bytes to read; this must not cross a
2228 file boundary; in aio this must be a block size multiple
2229 @param[in,out] buf buffer where to store data read; in aio this
2230 must be appropriately aligned
2231 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
2232 i/o on a tablespace which does not exist */
fil_read(const page_id_t & page_id,const page_size_t & page_size,ulint byte_offset,ulint len,void * buf)2233 static dberr_t fil_read(const page_id_t &page_id, const page_size_t &page_size,
2234 ulint byte_offset, ulint len, void *buf) {
2235 return (fil_io(IORequestRead, true, page_id, page_size, byte_offset, len, buf,
2236 nullptr));
2237 }
2238
2239 /** Writes data to a space from a buffer. Remember that the possible incomplete
2240 blocks at the end of file are ignored: they are not taken into account when
2241 calculating the byte offset within a space.
2242 @param[in] page_id page id
2243 @param[in] page_size page size
2244 @param[in] byte_offset remainder of offset in bytes; in aio this
2245 must be divisible by the OS block size
2246 @param[in] len how many bytes to write; this must not cross
2247 a file boundary; in aio this must be a block size multiple
2248 @param[in] buf buffer from which to write; in aio this must
2249 be appropriately aligned
2250 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
2251 I/O on a tablespace which does not exist */
fil_write(const page_id_t & page_id,const page_size_t & page_size,ulint byte_offset,ulint len,void * buf)2252 static dberr_t fil_write(const page_id_t &page_id, const page_size_t &page_size,
2253 ulint byte_offset, ulint len, void *buf) {
2254 ut_ad(!srv_read_only_mode);
2255
2256 return (fil_io(IORequestWrite, true, page_id, page_size, byte_offset, len,
2257 buf, nullptr));
2258 }
2259
2260 /** Look up a tablespace. The caller should hold an InnoDB table lock or
2261 a MDL that prevents the tablespace from being dropped during the operation,
2262 or the caller should be in single-threaded crash recovery mode (no user
2263 connections that could drop tablespaces). If this is not the case,
2264 fil_space_acquire() and fil_space_release() should be used instead.
2265 @param[in] space_id Tablespace ID
2266 @return tablespace, or nullptr if not found */
fil_space_get(space_id_t space_id)2267 fil_space_t *fil_space_get(space_id_t space_id) {
2268 auto shard = fil_system->shard_by_id(space_id);
2269
2270 shard->mutex_acquire();
2271
2272 fil_space_t *space = shard->get_space_by_id(space_id);
2273
2274 shard->mutex_release();
2275
2276 return (space);
2277 }
2278
2279 #ifndef UNIV_HOTBACKUP
2280
2281 /** Returns the latch of a file space.
2282 @param[in] space_id Tablespace ID
2283 @return latch protecting storage allocation */
fil_space_get_latch(space_id_t space_id)2284 rw_lock_t *fil_space_get_latch(space_id_t space_id) {
2285 auto shard = fil_system->shard_by_id(space_id);
2286
2287 shard->mutex_acquire();
2288
2289 fil_space_t *space = shard->get_space_by_id(space_id);
2290
2291 shard->mutex_release();
2292
2293 return (&space->latch);
2294 }
2295
2296 #ifdef UNIV_DEBUG
2297
2298 /** Gets the type of a file space.
2299 @param[in] space_id Tablespace ID
2300 @return file type */
fil_space_get_type(space_id_t space_id)2301 fil_type_t fil_space_get_type(space_id_t space_id) {
2302 auto shard = fil_system->shard_by_id(space_id);
2303
2304 shard->mutex_acquire();
2305
2306 auto space = shard->get_space_by_id(space_id);
2307
2308 shard->mutex_release();
2309
2310 return (space->purpose);
2311 }
2312
2313 #endif /* UNIV_DEBUG */
2314
2315 /** Note that a tablespace has been imported.
2316 It is initially marked as FIL_TYPE_IMPORT so that no logging is
2317 done during the import process when the space ID is stamped to each page.
2318 Now we change it to FIL_SPACE_TABLESPACE to start redo and undo logging.
2319 NOTE: temporary tablespaces are never imported.
2320 @param[in] space_id Tablespace ID */
fil_space_set_imported(space_id_t space_id)2321 void fil_space_set_imported(space_id_t space_id) {
2322 auto shard = fil_system->shard_by_id(space_id);
2323
2324 shard->mutex_acquire();
2325
2326 fil_space_t *space = shard->get_space_by_id(space_id);
2327
2328 ut_ad(space->purpose == FIL_TYPE_IMPORT);
2329 space->purpose = FIL_TYPE_TABLESPACE;
2330
2331 shard->mutex_release();
2332 }
2333 #endif /* !UNIV_HOTBACKUP */
2334
2335 /** Checks if all the file nodes in a space are flushed. The caller must hold
2336 the fil_system mutex.
2337 @param[in] space Tablespace to check
2338 @return true if all are flushed */
space_is_flushed(const fil_space_t * space)2339 bool Fil_shard::space_is_flushed(const fil_space_t *space) {
2340 ut_ad(mutex_owned());
2341
2342 for (const auto &file : space->files) {
2343 if (file.modification_counter > file.flush_counter) {
2344 ut_ad(!fil_buffering_disabled(space));
2345 return (false);
2346 }
2347 }
2348
2349 return (true);
2350 }
2351
2352 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
2353
2354 #include <sys/ioctl.h>
2355
2356 /** FusionIO atomic write control info */
2357 #define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint)
2358
2359 /** Try and enable FusionIO atomic writes.
2360 @param[in] file OS file handle
2361 @return true if successful */
fil_fusionio_enable_atomic_write(pfs_os_file_t file)2362 bool fil_fusionio_enable_atomic_write(pfs_os_file_t file) {
2363 if (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT) {
2364 uint atomic = 1;
2365
2366 ut_a(file.m_file != -1);
2367
2368 if (ioctl(file.m_file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic) != -1) {
2369 return (true);
2370 }
2371 }
2372
2373 return (false);
2374 }
2375 #endif /* !NO_FALLOCATE && UNIV_LINUX */
2376
2377 /** Attach a file to a tablespace
2378 @param[in] name file name of a file that is not open
2379 @param[in] size file size in entire database blocks
2380 @param[in,out] space tablespace from fil_space_create()
2381 @param[in] is_raw whether this is a raw device or partition
2382 @param[in] punch_hole true if supported for this file
2383 @param[in] atomic_write true if the file has atomic write enabled
2384 @param[in] max_pages maximum number of pages in file
2385 @return pointer to the file name
2386 @retval nullptr if error */
create_node(const char * name,page_no_t size,fil_space_t * space,bool is_raw,bool punch_hole,bool atomic_write,page_no_t max_pages)2387 fil_node_t *Fil_shard::create_node(const char *name, page_no_t size,
2388 fil_space_t *space, bool is_raw,
2389 bool punch_hole, bool atomic_write,
2390 page_no_t max_pages) {
2391 ut_ad(name != nullptr);
2392 ut_ad(fil_system != nullptr);
2393
2394 if (space == nullptr) {
2395 return (nullptr);
2396 }
2397
2398 fil_node_t file{};
2399
2400 file.name = mem_strdup(name);
2401
2402 ut_a(!is_raw || srv_start_raw_disk_in_use);
2403
2404 file.sync_event = os_event_create();
2405
2406 file.is_raw_disk = is_raw;
2407
2408 file.size = size;
2409
2410 file.flush_size = size;
2411
2412 file.magic_n = FIL_NODE_MAGIC_N;
2413
2414 file.init_size = size;
2415
2416 file.max_size = max_pages;
2417
2418 file.space = space;
2419
2420 os_file_stat_t stat_info;
2421
2422 #ifdef UNIV_DEBUG
2423 dberr_t err =
2424 #endif /* UNIV_DEBUG */
2425
2426 os_file_get_status(
2427 file.name, &stat_info, false,
2428 fsp_is_system_temporary(space->id) ? true : srv_read_only_mode);
2429
2430 ut_ad(err == DB_SUCCESS);
2431
2432 file.block_size = stat_info.block_size;
2433
2434 /* In this debugging mode, we can overcome the limitation of some
2435 OSes like Windows that support Punch Hole but have a hole size
2436 effectively too large. By setting the block size to be half the
2437 page size, we can bypass one of the checks that would normally
2438 turn Page Compression off. This execution mode allows compression
2439 to be tested even when full punch hole support is not available. */
2440 DBUG_EXECUTE_IF(
2441 "ignore_punch_hole",
2442 file.block_size = ut_min(static_cast<ulint>(stat_info.block_size),
2443 UNIV_PAGE_SIZE / 2););
2444
2445 if (!IORequest::is_punch_hole_supported() || !punch_hole ||
2446 file.block_size >= srv_page_size) {
2447 fil_no_punch_hole(&file);
2448 } else {
2449 file.punch_hole = punch_hole;
2450 }
2451
2452 file.atomic_write = atomic_write;
2453
2454 mutex_acquire();
2455
2456 space->size += size;
2457
2458 space->files.push_back(file);
2459
2460 mutex_release();
2461
2462 ut_a(space->id == TRX_SYS_SPACE ||
2463 space->id == dict_sys_t::s_log_space_first_id ||
2464 space->purpose == FIL_TYPE_TEMPORARY || space->files.size() == 1);
2465
2466 return (&space->files.front());
2467 }
2468
2469 /** Attach a file to a tablespace. File must be closed.
2470 @param[in] name file name (file must be closed)
2471 @param[in] size file size in database blocks, rounded
2472 downwards to an integer
2473 @param[in,out] space space where to append
2474 @param[in] is_raw true if a raw device or a raw disk partition
2475 @param[in] atomic_write true if the file has atomic write enabled
2476 @param[in] max_pages maximum number of pages in file
2477 @return pointer to the file name
2478 @retval nullptr if error */
fil_node_create(const char * name,page_no_t size,fil_space_t * space,bool is_raw,bool atomic_write,page_no_t max_pages)2479 char *fil_node_create(const char *name, page_no_t size, fil_space_t *space,
2480 bool is_raw, bool atomic_write, page_no_t max_pages) {
2481 auto shard = fil_system->shard_by_id(space->id);
2482
2483 fil_node_t *file;
2484
2485 file = shard->create_node(name, size, space, is_raw,
2486 IORequest::is_punch_hole_supported(), atomic_write,
2487 max_pages);
2488
2489 return (file == nullptr ? nullptr : file->name);
2490 }
2491
2492 /** First we open the file in the normal mode, no async I/O here, for
2493 simplicity. Then do some checks, and close the file again. NOTE that we
2494 could not use the simple file read function os_file_read() in Windows
2495 to read from a file opened for async I/O!
2496 @param[in,out] file Get the size of this file
2497 @param[in] read_only_mode true if read only mode set
2498 @return DB_SUCCESS or error */
get_file_size(fil_node_t * file,bool read_only_mode)2499 dberr_t Fil_shard::get_file_size(fil_node_t *file, bool read_only_mode) {
2500 bool success;
2501 fil_space_t *space = file->space;
2502
2503 do {
2504 ut_a(!file->is_open);
2505
2506 file->handle = os_file_create_simple_no_error_handling(
2507 innodb_data_file_key, file->name, OS_FILE_OPEN, OS_FILE_READ_ONLY,
2508 read_only_mode, &success);
2509
2510 if (!success) {
2511 /* The following call prints an error message */
2512 ulint err = os_file_get_last_error(true);
2513
2514 if (err == EMFILE + 100) {
2515 if (close_files_in_LRU(true)) {
2516 continue;
2517 }
2518 }
2519
2520 ib::warn(ER_IB_MSG_268) << "Cannot open '" << file->name
2521 << "'."
2522 " Have you deleted .ibd files under a"
2523 " running mysqld server?";
2524
2525 return (DB_ERROR);
2526 }
2527
2528 } while (!success);
2529
2530 os_offset_t size_bytes = os_file_get_size(file->handle);
2531
2532 ut_a(size_bytes != (os_offset_t)-1);
2533
2534 #ifdef UNIV_HOTBACKUP
2535 if (space->id == TRX_SYS_SPACE) {
2536 file->size = (ulint)(size_bytes / UNIV_PAGE_SIZE);
2537 space->size += file->size;
2538 os_file_close(file->handle);
2539 return (DB_SUCCESS);
2540 }
2541 #endif /* UNIV_HOTBACKUP */
2542
2543 ut_a(space->purpose != FIL_TYPE_LOG);
2544
2545 /* Read the first page of the tablespace */
2546
2547 byte *buf2 = static_cast<byte *>(ut_malloc_nokey(2 * UNIV_PAGE_SIZE));
2548
2549 /* Align memory for file I/O if we might have O_DIRECT set */
2550
2551 byte *page = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
2552
2553 ut_ad(page == page_align(page));
2554
2555 IORequest request(IORequest::READ);
2556
2557 dberr_t err = os_file_read_first_page(request, file->name, file->handle, page,
2558 UNIV_PAGE_SIZE);
2559
2560 ut_a(err == DB_SUCCESS);
2561
2562 os_file_close(file->handle);
2563
2564 uint32_t flags = fsp_header_get_flags(page);
2565 space_id_t space_id = fsp_header_get_space_id(page);
2566
2567 /* To determine if tablespace is from 5.7 or not, we
2568 rely on SDI flag. For IBDs from 5.7, which are opened
2569 during import or during upgrade, their initial size
2570 is lesser than the initial size in 8.0 */
2571 bool has_sdi = FSP_FLAGS_HAS_SDI(flags);
2572
2573 uint8_t expected_size =
2574 has_sdi ? FIL_IBD_FILE_INITIAL_SIZE : FIL_IBD_FILE_INITIAL_SIZE_5_7;
2575
2576 const page_size_t page_size(flags);
2577
2578 ulint min_size = expected_size * page_size.physical();
2579
2580 if (size_bytes < min_size) {
2581 if (has_sdi) {
2582 /** Add some tolerance when the tablespace is upgraded. If an empty
2583 general tablespace is created in 5.7, and then upgraded to 8.0, then
2584 its size changes from FIL_IBD_FILE_INITIAL_SIZE_5_7 pages to
2585 FIL_IBD_FILE_INITIAL_SIZE-1. */
2586
2587 ut_ad(expected_size == FIL_IBD_FILE_INITIAL_SIZE);
2588 ulint upgrade_size = (expected_size - 1) * page_size.physical();
2589
2590 if (size_bytes < upgrade_size) {
2591 ib::error(ER_IB_MSG_269)
2592 << "The size of tablespace file " << file->name << " is only "
2593 << size_bytes << ", should be at least " << upgrade_size << "!";
2594
2595 ut_error;
2596 }
2597
2598 } else {
2599 ib::error(ER_IB_MSG_269)
2600 << "The size of tablespace file " << file->name << " is only "
2601 << size_bytes << ", should be at least " << min_size << "!";
2602
2603 ut_error;
2604 }
2605 }
2606
2607 if (space_id != space->id) {
2608 ib::fatal(ER_IB_MSG_270) << "Tablespace id is " << space->id
2609 << " in the data dictionary but in file "
2610 << file->name << " it is " << space_id << "!";
2611 }
2612
2613 /* We need to adjust for compressed pages. */
2614 const page_size_t space_page_size(space->flags);
2615
2616 if (!page_size.equals_to(space_page_size)) {
2617 ib::fatal(ER_IB_MSG_271)
2618 << "Tablespace file " << file->name << " has page size " << page_size
2619 << " (flags=" << ib::hex(flags) << ") but the data dictionary expects"
2620 << " page size " << space_page_size
2621 << " (flags=" << ib::hex(space->flags) << ")!";
2622 }
2623
2624 /* If the SDI flag is set in the file header page, set it in space->flags. */
2625 space->flags |= flags & FSP_FLAGS_MASK_SDI;
2626
2627 #ifndef UNIV_HOTBACKUP
2628 /* It is possible that
2629 - For general tablespace, encryption flag is updated on disk but server
2630 crashed before DD could be updated OR
2631 - For DD tablespace, encryption flag is updated on disk.
2632 */
2633 if (FSP_FLAGS_GET_ENCRYPTION(flags)) {
2634 space->flags |= flags & FSP_FLAGS_MASK_ENCRYPTION;
2635 }
2636 #endif /* UNIV_HOTBACKUP */
2637
2638 /* Make a copy of space->flags and flags from the page header
2639 so that they can be compared. */
2640 uint32_t fil_space_flags = space->flags;
2641 uint32_t header_fsp_flags = flags;
2642
2643 /* If a crash occurs while an UNDO space is being truncated,
2644 it will be created new at startup. In that case, the fil_space_t
2645 object will have the ENCRYPTION flag set, but the header page will
2646 not be marked until the srv_master_thread gets around to it.
2647 The opposite can occur where the header page contains the encryption
2648 flag but the fil_space_t does not. It could happen that undo
2649 encryption was turned off just before the crash or shutdown so that
2650 the srv_master_thread did not yet have time to apply it.
2651 So don't compare the encryption flag for undo tablespaces. */
2652 if (fsp_is_undo_tablespace(space->id)) {
2653 fsp_flags_unset_encryption(fil_space_flags);
2654 fsp_flags_unset_encryption(header_fsp_flags);
2655 }
2656
2657 /* Make sure the space_flags are the same as the header page flags. */
2658 if (fil_space_flags != header_fsp_flags) {
2659 ib::error(ER_IB_MSG_272, ulong{space->flags}, file->name, ulonglong{flags});
2660 ut_error;
2661 }
2662
2663 {
2664 page_no_t size = fsp_header_get_field(page, FSP_SIZE);
2665
2666 page_no_t free_limit;
2667
2668 free_limit = fsp_header_get_field(page, FSP_FREE_LIMIT);
2669
2670 ulint free_len;
2671
2672 free_len = flst_get_len(FSP_HEADER_OFFSET + FSP_FREE + page);
2673
2674 ut_ad(space->free_limit == 0 || space->free_limit == free_limit);
2675
2676 ut_ad(space->free_len == 0 || space->free_len == free_len);
2677
2678 space->size_in_header = size;
2679 space->free_limit = free_limit;
2680
2681 ut_a(free_len < std::numeric_limits<uint32_t>::max());
2682
2683 space->free_len = (uint32_t)free_len;
2684 }
2685
2686 ut_free(buf2);
2687
2688 /* For encrypted tablespace, we need to check the
2689 encryption key and iv(initial vector) is read. */
2690 if (FSP_FLAGS_GET_ENCRYPTION(space->flags) && !recv_recovery_is_on() &&
2691 space->encryption_type != Encryption::AES) {
2692 ib::error(ER_IB_MSG_273, file->name);
2693
2694 return (DB_ERROR);
2695 }
2696
2697 if (file->size == 0) {
2698 ulint extent_size;
2699
2700 extent_size = page_size.physical() * FSP_EXTENT_SIZE;
2701
2702 #ifndef UNIV_HOTBACKUP
2703 /* Truncate the size to a multiple of extent size. */
2704 if (size_bytes >= extent_size) {
2705 size_bytes = ut_2pow_round(size_bytes, extent_size);
2706 }
2707 #else /* !UNIV_HOTBACKUP */
2708
2709 /* After apply-incremental, tablespaces are not
2710 extended to a whole megabyte. Do not cut off
2711 valid data. */
2712
2713 #endif /* !UNIV_HOTBACKUP */
2714
2715 file->size = static_cast<page_no_t>(size_bytes / page_size.physical());
2716
2717 space->size += file->size;
2718 }
2719
2720 return (DB_SUCCESS);
2721 }
2722
2723 /** Open a file of a tablespace.
2724 The caller must own the shard mutex.
2725 @param[in,out] file Tablespace file
2726 @param[in] extend true if the file is being extended
2727 @return false if the file can't be opened, otherwise true */
open_file(fil_node_t * file,bool extend)2728 bool Fil_shard::open_file(fil_node_t *file, bool extend) {
2729 bool success;
2730 fil_space_t *space = file->space;
2731
2732 ut_ad(m_id == REDO_SHARD || mutex_owned());
2733
2734 ut_a(!file->is_open);
2735 ut_a(file->n_pending == 0);
2736
2737 while (file->in_use > 0) {
2738 /* We increment the reference count when extending
2739 the file. */
2740 if (file->in_use == 1 && extend) {
2741 break;
2742 }
2743
2744 mutex_release();
2745
2746 os_thread_sleep(100000);
2747
2748 mutex_acquire();
2749 }
2750
2751 if (file->is_open) {
2752 return (true);
2753 }
2754
2755 bool read_only_mode;
2756
2757 read_only_mode = !fsp_is_system_temporary(space->id) && srv_read_only_mode;
2758
2759 if (file->size == 0 ||
2760 (space->size_in_header == 0 && space->purpose == FIL_TYPE_TABLESPACE &&
2761 file == &space->files.front()
2762 #if !defined(UNIV_HOTBACKUP) && !defined(XTRABACKUP)
2763 && undo::is_active(space->id, false) &&
2764 srv_startup_is_before_trx_rollback_phase
2765 #endif /* !UNIV_HOTBACKUP && !XTRABACKUP */
2766 )) {
2767
2768 /* We don't know the file size yet. */
2769 dberr_t err = get_file_size(file, read_only_mode);
2770
2771 if (err != DB_SUCCESS) {
2772 return (false);
2773 }
2774 }
2775
2776 /* Open the file for reading and writing, in Windows normally in the
2777 unbuffered async I/O mode, though global variables may make
2778 os_file_create() to fall back to the normal file I/O mode. */
2779
2780 if (space->purpose == FIL_TYPE_LOG) {
2781 file->handle =
2782 os_file_create(innodb_log_file_key, file->name, OS_FILE_OPEN,
2783 OS_FILE_AIO, OS_LOG_FILE, read_only_mode, &success);
2784 } else if (file->is_raw_disk) {
2785 file->handle =
2786 os_file_create(innodb_data_file_key, file->name, OS_FILE_OPEN_RAW,
2787 OS_FILE_AIO, OS_DATA_FILE, read_only_mode, &success);
2788 } else {
2789 file->handle =
2790 os_file_create(innodb_data_file_key, file->name, OS_FILE_OPEN,
2791 OS_FILE_AIO, OS_DATA_FILE, read_only_mode, &success);
2792 }
2793
2794 if (success) {
2795 /* The file is ready for IO. */
2796 file_opened(file);
2797 }
2798
2799 return (success);
2800 }
2801
2802 /** Close a tablespace file.
2803 @param[in] LRU_close true if called from LRU close
2804 @param[in,out] file Tablespace file to close */
close_file(fil_node_t * file,bool LRU_close)2805 void Fil_shard::close_file(fil_node_t *file, bool LRU_close) {
2806 ut_ad(mutex_owned());
2807
2808 ut_a(file->is_open);
2809 ut_a(file->in_use == 0);
2810 ut_a(file->n_pending == 0);
2811 ut_a(file->n_pending_flushes == 0);
2812
2813 #ifndef UNIV_HOTBACKUP
2814 ut_a(file->modification_counter == file->flush_counter ||
2815 file->space->purpose == FIL_TYPE_TEMPORARY || srv_fast_shutdown == 2);
2816 #endif /* !UNIV_HOTBACKUP */
2817
2818 bool ret = os_file_close(file->handle);
2819
2820 ut_a(ret);
2821
2822 file->handle.m_file = (os_file_t)-1;
2823
2824 file->is_open = false;
2825
2826 ut_a(s_n_open > 0);
2827
2828 --s_n_open;
2829
2830 fil_n_file_opened = s_n_open;
2831
2832 remove_from_LRU(file);
2833 }
2834
2835 /** Tries to close a file in the LRU list.
2836 @param[in] print_info if true, prints information why it cannot close
2837 a file
2838 @return true if success, false if should retry later */
close_files_in_LRU(bool print_info)2839 bool Fil_shard::close_files_in_LRU(bool print_info) {
2840 ut_ad(mutex_owned());
2841
2842 for (auto file = UT_LIST_GET_LAST(m_LRU); file != nullptr;
2843 file = UT_LIST_GET_PREV(LRU, file)) {
2844 if (file->modification_counter == file->flush_counter &&
2845 file->n_pending_flushes == 0 && file->in_use == 0) {
2846 close_file(file, true);
2847
2848 return (true);
2849 }
2850
2851 if (!print_info) {
2852 continue;
2853 }
2854
2855 if (file->n_pending_flushes > 0) {
2856 ib::info(ER_IB_MSG_274, file->name, file->n_pending_flushes);
2857 }
2858
2859 /* Prior to sharding the counters were under a global
2860 mutex. Now they are spread across the shards. Therefore
2861 it is normal for the modification counter to be out of
2862 sync with the flush counter for files that are in differnet
2863 shards. */
2864
2865 if (file->modification_counter != file->flush_counter) {
2866 ib::info(ER_IB_MSG_275, file->name, longlong{file->modification_counter},
2867 longlong{file->flush_counter});
2868 }
2869
2870 if (file->in_use > 0) {
2871 ib::info(ER_IB_MSG_276, file->name);
2872 }
2873 }
2874
2875 return (false);
2876 }
2877
2878 /** Tries to close a file in the LRU list.
2879 @param[in] print_info if true, prints information why it cannot close a file
2880 @return true if success, false if should retry later */
close_file_in_all_LRU(bool print_info)2881 bool Fil_system::close_file_in_all_LRU(bool print_info) {
2882 for (auto shard : m_shards) {
2883 shard->mutex_acquire();
2884
2885 if (print_info) {
2886 ib::info(ER_IB_MSG_277, shard->id(),
2887 ulonglong{UT_LIST_GET_LEN(shard->m_LRU)});
2888 }
2889
2890 bool success = shard->close_files_in_LRU(print_info);
2891
2892 shard->mutex_release();
2893
2894 if (success) {
2895 return (true);
2896 }
2897 }
2898
2899 return (false);
2900 }
2901
2902 /** We are going to do a rename file and want to stop new I/O for a while.
2903 @param[in] space Tablespace for which we want to wait for IO
2904 to stop */
wait_for_io_to_stop(const fil_space_t * space)2905 void Fil_shard::wait_for_io_to_stop(const fil_space_t *space) {
2906 /* Note: We are reading the value of space->stop_ios without the
2907 cover of the Fil_shard::mutex. We incremented the in_use counter
2908 before waiting for IO to stop. */
2909
2910 auto begin_time = ut_time_monotonic();
2911 auto start_time = begin_time;
2912
2913 /* Spam the log after every minute. Ignore any race here. */
2914
2915 while (space->stop_ios) {
2916 if ((ut_time_monotonic() - start_time) >= PRINT_INTERVAL_SECS) {
2917 start_time = ut_time_monotonic();
2918
2919 ib::warn(ER_IB_MSG_278, space->name,
2920 (long long)(ut_time_monotonic() - begin_time));
2921 }
2922
2923 #ifndef UNIV_HOTBACKUP
2924
2925 /* Wake the I/O handler threads to make sure
2926 pending I/O's are performed */
2927 os_aio_simulated_wake_handler_threads();
2928
2929 #endif /* UNIV_HOTBACKUP */
2930
2931 /* Give the IO threads some time to work. */
2932 os_thread_yield();
2933 }
2934 }
2935
2936 /** We keep log files and system tablespace files always open; this is
2937 important in preventing deadlocks in this module, as a page read
2938 completion often performs another read from the insert buffer. The
2939 insert buffer is in tablespace TRX_SYS_SPACE, and we cannot end up
2940 waiting in this function.
2941 @param[in] space_id Tablespace ID to look up
2942 @return tablespace instance */
get_reserved_space(space_id_t space_id)2943 fil_space_t *Fil_shard::get_reserved_space(space_id_t space_id) {
2944 if (space_id == TRX_SYS_SPACE) {
2945 return (fil_space_t::s_sys_space);
2946
2947 } else if (space_id == dict_sys_t::s_log_space_first_id &&
2948 fil_space_t::s_redo_space != nullptr) {
2949 return (fil_space_t::s_redo_space);
2950 }
2951
2952 return (get_space_by_id(space_id));
2953 }
2954
2955 /** Reserves the mutex and tries to make sure we can open at least
2956 one file while holding it. This should be called before calling
2957 prepare_file_for_io(), because that function may need to open a file.
2958 @param[in] space_id Tablespace ID
2959 @param[out] space Tablespace instance
2960 @return true if a slot was reserved. */
mutex_acquire_and_get_space(space_id_t space_id,fil_space_t * & space)2961 bool Fil_shard::mutex_acquire_and_get_space(space_id_t space_id,
2962 fil_space_t *&space) {
2963 mutex_acquire();
2964
2965 if (space_id == TRX_SYS_SPACE || dict_sys_t::is_reserved(space_id)) {
2966 space = get_reserved_space(space_id);
2967
2968 return (false);
2969 }
2970
2971 space = get_space_by_id(space_id);
2972
2973 if (space == nullptr) {
2974 /* Caller handles the case of a missing tablespce. */
2975 return (false);
2976 }
2977
2978 ut_ad(space->files.size() == 1);
2979
2980 auto is_open = space->files.front().is_open;
2981
2982 if (is_open) {
2983 /* Ensure that the file is not closed behind our back. */
2984 ++space->files.front().in_use;
2985 }
2986
2987 mutex_release();
2988
2989 if (is_open) {
2990 wait_for_io_to_stop(space);
2991
2992 mutex_acquire();
2993
2994 /* We are guaranteed that this file cannot be closed
2995 because we now own the mutex. */
2996
2997 ut_ad(space->files.front().in_use > 0);
2998 --space->files.front().in_use;
2999
3000 return (false);
3001 }
3002
3003 /* The number of open file descriptors is a shared resource, in
3004 order to guarantee that we don't over commit, we use a ticket system
3005 to reserve a slot/ticket to open a file. This slot/ticket should
3006 be released after the file is opened. */
3007
3008 while (!reserve_open_slot(m_id)) {
3009 os_thread_yield();
3010 }
3011
3012 auto begin_time = ut_time_monotonic();
3013 auto start_time = begin_time;
3014 auto last_wake_time = begin_time;
3015
3016 for (size_t i = 0; i < 3; ++i) {
3017 /* Flush tablespaces so that we can close modified
3018 files in the LRU list */
3019
3020 auto type = to_int(FIL_TYPE_TABLESPACE);
3021
3022 fil_system->flush_file_spaces(type);
3023
3024 os_thread_yield();
3025
3026 /* Reserve an open slot for this shard. So that this
3027 shard's open file succeeds. */
3028
3029 while (fil_system->m_max_n_open <= s_n_open &&
3030 !fil_system->close_file_in_all_LRU(i > 1)) {
3031 if (ut_time_monotonic() - start_time >= PRINT_INTERVAL_SECS) {
3032 start_time = ut_time_monotonic();
3033
3034 ib::warn(ER_IB_MSG_279) << "Trying to close a file for "
3035 << start_time - begin_time << " seconds"
3036 << ". Configuration only allows for "
3037 << fil_system->m_max_n_open << " open files.";
3038 }
3039 if (ut_difftime(ut_time(), last_wake_time) > 1.0) {
3040 /* We've spent more than a second trying to close some of the open files
3041 without any luck. We can hang in this loop forewer, because:
3042 - files cannot be closed, they have pending IO requests
3043 - aio handler threads are waiting in os_aio_simulated_handler */
3044
3045 /* in order to break the loop, lets wake aio handler threads */
3046 os_aio_simulated_wake_handler_threads();
3047
3048 os_thread_yield();
3049
3050 /* and flush the changes so that files with no pending IOs can be
3051 closed */
3052 fil_system->flush_file_spaces(type);
3053
3054 last_wake_time = ut_time();
3055 }
3056 }
3057
3058 if (fil_system->m_max_n_open > s_n_open) {
3059 break;
3060 }
3061
3062 #ifndef UNIV_HOTBACKUP
3063 /* Wake the I/O-handler threads to make sure pending I/Os are
3064 performed */
3065 os_aio_simulated_wake_handler_threads();
3066
3067 os_thread_yield();
3068 #endif /* !UNIV_HOTBACKUP */
3069 }
3070
3071 mutex_acquire();
3072
3073 return (true);
3074 }
3075
3076 /** Prepare to free a file. Remove from the unflushed list if there
3077 are no pending flushes.
3078 @param[in,out] file File instance to free */
prepare_to_free_file(fil_node_t * file)3079 void Fil_shard::prepare_to_free_file(fil_node_t *file) {
3080 ut_ad(mutex_owned());
3081
3082 fil_space_t *space = file->space;
3083
3084 if (space->is_in_unflushed_spaces && space_is_flushed(space)) {
3085 space->is_in_unflushed_spaces = false;
3086
3087 UT_LIST_REMOVE(m_unflushed_spaces, space);
3088 }
3089 }
3090
3091 /** Prepare to free a file object from a tablespace memory cache.
3092 @param[in,out] file Tablespace file
3093 @param[in] space tablespace */
file_close_to_free(fil_node_t * file,fil_space_t * space)3094 void Fil_shard::file_close_to_free(fil_node_t *file, fil_space_t *space) {
3095 ut_ad(mutex_owned());
3096 ut_a(file->magic_n == FIL_NODE_MAGIC_N);
3097 ut_a(file->n_pending == 0);
3098 ut_a(file->in_use == 0);
3099 ut_a(file->space == space);
3100
3101 if (file->is_open) {
3102 /* We fool the assertion in Fil_system::close_file() to think
3103 there are no unflushed modifications in the file */
3104
3105 file->modification_counter = file->flush_counter;
3106
3107 os_event_set(file->sync_event);
3108
3109 if (fil_buffering_disabled(space)) {
3110 ut_ad(!space->is_in_unflushed_spaces);
3111 ut_ad(space_is_flushed(space));
3112
3113 } else {
3114 prepare_to_free_file(file);
3115 }
3116
3117 /* TODO: set second parameter to true, so to release
3118 fil_system mutex before logging tablespace name and id.
3119 To go around Bug#26271853 - POTENTIAL DEADLOCK BETWEEN
3120 FIL_SYSTEM MUTEX AND LOG MUTEX */
3121 close_file(file, true);
3122 }
3123 }
3124
space_detach(fil_space_t * space)3125 void Fil_shard::space_detach(fil_space_t *space) {
3126 ut_ad(mutex_owned());
3127
3128 m_names.erase(space->name);
3129
3130 if (space->is_in_unflushed_spaces) {
3131 ut_ad(!fil_buffering_disabled(space));
3132
3133 space->is_in_unflushed_spaces = false;
3134
3135 UT_LIST_REMOVE(m_unflushed_spaces, space);
3136 }
3137
3138 ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
3139 ut_a(space->n_pending_flushes == 0);
3140
3141 for (auto &file : space->files) {
3142 file_close_to_free(&file, space);
3143 }
3144 }
3145
3146 /** Free a tablespace object on which fil_space_detach() was invoked.
3147 There must not be any pending I/O's or flushes on the files.
3148 @param[in,out] space tablespace */
space_free_low(fil_space_t * & space)3149 void Fil_shard::space_free_low(fil_space_t *&space) {
3150 for (auto &file : space->files) {
3151 ut_d(space->size -= file.size);
3152
3153 os_event_destroy(file.sync_event);
3154
3155 ut_free(file.name);
3156 }
3157
3158 call_destructor(&space->files);
3159
3160 ut_ad(space->size == 0);
3161
3162 rw_lock_free(&space->latch);
3163
3164 ut_free(space->name);
3165 ut_free(space);
3166
3167 space = nullptr;
3168 }
3169
3170 /** Frees a space object from the tablespace memory cache.
3171 Closes a tablespaces' files but does not delete them.
3172 There must not be any pending I/O's or flushes on the files.
3173 @param[in] space_id Tablespace ID
3174 @return fil_space_t instance on success or nullptr */
space_free(space_id_t space_id)3175 fil_space_t *Fil_shard::space_free(space_id_t space_id) {
3176 mutex_acquire();
3177
3178 fil_space_t *space = get_space_by_id(space_id);
3179
3180 if (space != nullptr) {
3181 space_detach(space);
3182
3183 space_delete(space_id);
3184 }
3185
3186 mutex_release();
3187
3188 return (space);
3189 }
3190
3191 /** Frees a space object from the tablespace memory cache.
3192 Closes a tablespaces' files but does not delete them.
3193 There must not be any pending i/o's or flushes on the files.
3194 @param[in] space_id Tablespace ID
3195 @param[in] x_latched Whether the caller holds X-mode space->latch
3196 @return true if success */
fil_space_free(space_id_t space_id,bool x_latched)3197 static bool fil_space_free(space_id_t space_id, bool x_latched) {
3198 ut_ad(space_id != TRX_SYS_SPACE);
3199
3200 auto shard = fil_system->shard_by_id(space_id);
3201 auto space = shard->space_free(space_id);
3202
3203 if (space == nullptr) {
3204 return (false);
3205 }
3206
3207 if (x_latched) {
3208 rw_lock_x_unlock(&space->latch);
3209 }
3210
3211 Fil_shard::space_free_low(space);
3212 ut_a(space == nullptr);
3213
3214 return (true);
3215 }
3216
3217 #ifdef UNIV_HOTBACKUP
3218 /** Frees a space object from the tablespace memory cache.
3219 Closes a tablespaces' files but does not delete them.
3220 There must not be any pending i/o's or flushes on the files.
3221 @param[in] space_id Tablespace ID
3222 @return true if success */
meb_fil_space_free(space_id_t space_id)3223 bool meb_fil_space_free(space_id_t space_id) {
3224 bool success = fil_space_free(space_id, false);
3225
3226 if (success && space_id == dict_sys_t::s_log_space_first_id) {
3227 /* we freed redo log tablespace, clear the global variable for it */
3228 fil_space_t::s_redo_space = nullptr;
3229 }
3230
3231 return (success);
3232 }
3233 #endif /* UNIV_HOTBACKUP */
3234
3235 /** Create a space memory object and put it to the fil_system hash table.
3236 The tablespace name is independent from the tablespace file-name.
3237 Error messages are issued to the server log.
3238 @param[in] name Tablespace name
3239 @param[in] space_id Tablespace identifier
3240 @param[in] flags Tablespace flags
3241 @param[in] purpose Tablespace purpose
3242 @return pointer to created tablespace, to be filled in with fil_node_create()
3243 @retval nullptr on failure (such as when the same tablespace exists) */
space_create(const char * name,space_id_t space_id,uint32_t flags,fil_type_t purpose)3244 fil_space_t *Fil_shard::space_create(const char *name, space_id_t space_id,
3245 uint32_t flags, fil_type_t purpose) {
3246 ut_ad(mutex_owned());
3247
3248 /* Look for a matching tablespace. */
3249 fil_space_t *space = get_space_by_name(name);
3250
3251 if (space == nullptr) {
3252 space = get_space_by_id(space_id);
3253 }
3254
3255 if (space != nullptr) {
3256 std::ostringstream oss;
3257
3258 for (size_t i = 0; i < space->files.size(); ++i) {
3259 oss << "'" << space->files[i].name << "'";
3260
3261 if (i < space->files.size() - 1) {
3262 oss << ", ";
3263 }
3264 }
3265
3266 ib::info(ER_IB_MSG_281)
3267 << "Trying to add tablespace '" << name << "'"
3268 << " with id " << space_id << " to the tablespace"
3269 << " memory cache, but tablespace"
3270 << " '" << space->name << "'"
3271 << " already exists in the cache with space ID " << space->id
3272 << ". It maps to the following file(s): " << oss.str();
3273
3274 return (nullptr);
3275 }
3276
3277 space = static_cast<fil_space_t *>(ut_zalloc_nokey(sizeof(*space)));
3278
3279 space->id = space_id;
3280
3281 space->name = mem_strdup(name);
3282
3283 new (&space->files) fil_space_t::Files();
3284
3285 #ifndef UNIV_HOTBACKUP
3286 if (fil_system->is_greater_than_max_id(space_id) &&
3287 fil_type_is_data(purpose) && !recv_recovery_on &&
3288 !dict_sys_t::is_reserved(space_id) &&
3289 !fsp_is_system_temporary(space_id)) {
3290 fil_system->set_maximum_space_id(space);
3291 }
3292 #endif /* !UNIV_HOTBACKUP */
3293
3294 space->purpose = purpose;
3295
3296 ut_a(flags < std::numeric_limits<uint32_t>::max());
3297 space->flags = (uint32_t)flags;
3298
3299 space->magic_n = FIL_SPACE_MAGIC_N;
3300
3301 space->encryption_type = Encryption::NONE;
3302
3303 rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
3304
3305 #ifndef UNIV_HOTBACKUP
3306 if (space->purpose == FIL_TYPE_TEMPORARY) {
3307 ut_d(space->latch.set_temp_fsp());
3308 }
3309 #endif /* !UNIV_HOTBACKUP */
3310
3311 space_add(space);
3312
3313 return (space);
3314 }
3315
3316 /** Create a space memory object and put it to the fil_system hash table.
3317 The tablespace name is independent from the tablespace file-name.
3318 Error messages are issued to the server log.
3319 @param[in] name Tablespace name
3320 @param[in] space_id Tablespace ID
3321 @param[in] flags Tablespace flags
3322 @param[in] purpose Tablespace purpose
3323 @return pointer to created tablespace, to be filled in with fil_node_create()
3324 @retval nullptr on failure (such as when the same tablespace exists) */
fil_space_create(const char * name,space_id_t space_id,uint32_t flags,fil_type_t purpose)3325 fil_space_t *fil_space_create(const char *name, space_id_t space_id,
3326 uint32_t flags, fil_type_t purpose) {
3327 ut_ad(fsp_flags_is_valid(flags));
3328 ut_ad(srv_page_size == UNIV_PAGE_SIZE_ORIG || flags != 0);
3329
3330 DBUG_EXECUTE_IF("fil_space_create_failure", return (nullptr););
3331
3332 if (purpose != FIL_TYPE_TEMPORARY) {
3333 /* Mark the clone as aborted only while executing a DDL which creates
3334 a base table, as any temporary table is ignored while cloning the database.
3335 Clone state must be set back to active before returning from function. */
3336 clone_mark_abort(true);
3337 }
3338
3339 fil_system->mutex_acquire_all();
3340
3341 auto shard = fil_system->shard_by_id(space_id);
3342
3343 auto space = shard->space_create(name, space_id, flags, purpose);
3344
3345 if (space == nullptr) {
3346 /* Duplicate error. */
3347 fil_system->mutex_release_all();
3348
3349 if (purpose != FIL_TYPE_TEMPORARY) {
3350 clone_mark_active();
3351 }
3352
3353 return (nullptr);
3354 }
3355
3356 /* Cache the system tablespaces, avoid looking them up during IO. */
3357
3358 if (space->id == TRX_SYS_SPACE) {
3359 ut_a(fil_space_t::s_sys_space == nullptr ||
3360 fil_space_t::s_sys_space == space);
3361
3362 fil_space_t::s_sys_space = space;
3363
3364 } else if (space->id == dict_sys_t::s_log_space_first_id) {
3365 ut_a(fil_space_t::s_redo_space == nullptr ||
3366 fil_space_t::s_redo_space == space);
3367
3368 fil_space_t::s_redo_space = space;
3369 }
3370
3371 fil_system->mutex_release_all();
3372
3373 if (purpose != FIL_TYPE_TEMPORARY) {
3374 clone_mark_active();
3375 }
3376
3377 return (space);
3378 }
3379
3380 /** Assigns a new space id for a new single-table tablespace. This works
3381 simply by incrementing the global counter. If 4 billion id's is not enough,
3382 we may need to recycle id's.
3383 @param[out] space_id Set this to the new tablespace ID
3384 @return true if assigned, false if not */
assign_new_space_id(space_id_t * space_id)3385 bool Fil_system::assign_new_space_id(space_id_t *space_id) {
3386 mutex_acquire_all();
3387
3388 space_id_t id = *space_id;
3389
3390 if (id < m_max_assigned_id) {
3391 id = m_max_assigned_id;
3392 }
3393
3394 ++id;
3395
3396 space_id_t reserved_space_id = dict_sys_t::s_reserved_space_id;
3397
3398 if (id > (reserved_space_id / 2) && (id % 1000000UL == 0)) {
3399 ib::warn(ER_IB_MSG_282)
3400 << "You are running out of new single-table"
3401 " tablespace id's. Current counter is "
3402 << id << " and it must not exceed " << reserved_space_id
3403 << "! To reset the counter to zero you have to dump"
3404 " all your tables and recreate the whole InnoDB"
3405 " installation.";
3406 }
3407
3408 bool success = !dict_sys_t::is_reserved(id);
3409
3410 if (success) {
3411 *space_id = m_max_assigned_id = id;
3412
3413 } else {
3414 ib::warn(ER_IB_MSG_283) << "You have run out of single-table tablespace"
3415 " id's! Current counter is "
3416 << id
3417 << ". To reset the counter to zero"
3418 " you have to dump all your tables and"
3419 " recreate the whole InnoDB installation.";
3420
3421 *space_id = SPACE_UNKNOWN;
3422 }
3423
3424 mutex_release_all();
3425
3426 return (success);
3427 }
3428
3429 /** Assigns a new space id for a new single-table tablespace. This works
3430 simply by incrementing the global counter. If 4 billion id's is not enough,
3431 we may need to recycle id's.
3432 @param[out] space_id Set this to the new tablespace ID
3433 @return true if assigned, false if not */
fil_assign_new_space_id(space_id_t * space_id)3434 bool fil_assign_new_space_id(space_id_t *space_id) {
3435 return (fil_system->assign_new_space_id(space_id));
3436 }
3437
3438 /** Opens the files associated with a tablespace and returns a pointer to
3439 the fil_space_t that is in the memory cache associated with a space id.
3440 @param[in] space_id Get the tablespace instance or this ID
3441 @return file_space_t pointer, nullptr if space not found */
space_load(space_id_t space_id)3442 fil_space_t *Fil_shard::space_load(space_id_t space_id) {
3443 ut_ad(mutex_owned());
3444
3445 fil_space_t *space = get_space_by_id(space_id);
3446
3447 if (space == nullptr || space->size != 0) {
3448 return (space);
3449 }
3450
3451 switch (space->purpose) {
3452 case FIL_TYPE_LOG:
3453 break;
3454
3455 case FIL_TYPE_IMPORT:
3456 case FIL_TYPE_TEMPORARY:
3457 case FIL_TYPE_TABLESPACE:
3458
3459 ut_a(space_id != TRX_SYS_SPACE);
3460
3461 mutex_release();
3462
3463 auto slot = mutex_acquire_and_get_space(space_id, space);
3464
3465 if (space == nullptr) {
3466 if (slot) {
3467 release_open_slot(m_id);
3468 }
3469
3470 return (nullptr);
3471 }
3472
3473 ut_a(1 == space->files.size());
3474
3475 {
3476 auto file = &space->files.front();
3477
3478 /* It must be a single-table tablespace and
3479 we have not opened the file yet; the following
3480 calls will open it and update the size fields */
3481
3482 bool success = prepare_file_for_io(file, false);
3483
3484 if (slot) {
3485 release_open_slot(m_id);
3486 }
3487
3488 if (!success) {
3489 /* The single-table tablespace can't be opened,
3490 because the ibd file is missing. */
3491
3492 return (nullptr);
3493 }
3494
3495 complete_io(file, IORequestRead);
3496 }
3497 }
3498
3499 return (space);
3500 }
3501
3502 /** Returns the path from the first fil_node_t found with this space ID.
3503 The caller is responsible for freeing the memory allocated here for the
3504 value returned.
3505 @param[in] space_id Tablespace ID
3506 @return own: A copy of fil_node_t::path, nullptr if space ID is zero
3507 or not found. */
fil_space_get_first_path(space_id_t space_id)3508 char *fil_space_get_first_path(space_id_t space_id) {
3509 auto shard = fil_system->shard_by_id(space_id);
3510
3511 shard->mutex_acquire();
3512
3513 fil_space_t *space = shard->space_load(space_id);
3514
3515 char *path;
3516
3517 if (space != nullptr) {
3518 path = mem_strdup(space->files.front().name);
3519 } else {
3520 path = nullptr;
3521 }
3522
3523 shard->mutex_release();
3524
3525 return (path);
3526 }
3527
3528 /** Returns the size of the space in pages. The tablespace must be cached
3529 in the memory cache.
3530 @param[in] space_id Tablespace ID
3531 @return space size, 0 if space not found */
fil_space_get_size(space_id_t space_id)3532 page_no_t fil_space_get_size(space_id_t space_id) {
3533 auto shard = fil_system->shard_by_id(space_id);
3534
3535 shard->mutex_acquire();
3536
3537 fil_space_t *space = shard->space_load(space_id);
3538
3539 page_no_t size = space ? space->size : 0;
3540
3541 shard->mutex_release();
3542
3543 return (size);
3544 }
3545
3546 /** Returns the flags of the space. The tablespace must be cached
3547 in the memory cache.
3548 @param[in] space_id Tablespace ID for which to get the flags
3549 @return flags, ULINT_UNDEFINED if space not found */
fil_space_get_flags(space_id_t space_id)3550 uint32_t fil_space_get_flags(space_id_t space_id) {
3551 auto shard = fil_system->shard_by_id(space_id);
3552
3553 shard->mutex_acquire();
3554
3555 fil_space_t *space = shard->space_load(space_id);
3556
3557 uint32_t flags;
3558
3559 flags = (space != nullptr) ? space->flags : UINT32_UNDEFINED;
3560
3561 shard->mutex_release();
3562
3563 return (flags);
3564 }
3565
3566 /** Open each file of a tablespace if not already open.
3567 @param[in] space_id tablespace identifier
3568 @retval true if all file nodes were opened
3569 @retval false on failure */
space_open(space_id_t space_id)3570 bool Fil_shard::space_open(space_id_t space_id) {
3571 ut_ad(mutex_owned());
3572
3573 fil_space_t *space = get_space_by_id(space_id);
3574
3575 for (auto &file : space->files) {
3576 if (!file.is_open && !open_file(&file, false)) {
3577 return (false);
3578 }
3579 }
3580
3581 return (true);
3582 }
3583
3584 /** Open each file of a tablespace if not already open.
3585 @param[in] space_id tablespace identifier
3586 @retval true if all file nodes were opened
3587 @retval false on failure */
fil_space_open(space_id_t space_id)3588 bool fil_space_open(space_id_t space_id) {
3589 auto shard = fil_system->shard_by_id(space_id);
3590
3591 shard->mutex_acquire();
3592
3593 bool success = shard->space_open(space_id);
3594
3595 shard->mutex_release();
3596
3597 return (success);
3598 }
3599
3600 /** Close each file of a tablespace if open.
3601 @param[in] space_id tablespace identifier */
fil_space_close(space_id_t space_id)3602 void fil_space_close(space_id_t space_id) {
3603 if (fil_system == nullptr) {
3604 return;
3605 }
3606
3607 auto shard = fil_system->shard_by_id(space_id);
3608
3609 shard->close_file(space_id);
3610 }
3611
3612 /** Returns the page size of the space and whether it is compressed or not.
3613 The tablespace must be cached in the memory cache.
3614 @param[in] space_id Tablespace ID
3615 @param[out] found true if tablespace was found
3616 @return page size */
fil_space_get_page_size(space_id_t space_id,bool * found)3617 const page_size_t fil_space_get_page_size(space_id_t space_id, bool *found) {
3618 const uint32_t flags = fil_space_get_flags(space_id);
3619
3620 if (flags == UINT32_UNDEFINED) {
3621 *found = false;
3622 return (univ_page_size);
3623 }
3624
3625 *found = true;
3626
3627 return (page_size_t(flags));
3628 }
3629
3630 /** Initializes the tablespace memory cache.
3631 @param[in] max_n_open Maximum number of open files */
fil_init(ulint max_n_open)3632 void fil_init(ulint max_n_open) {
3633 static_assert((1 << UNIV_PAGE_SIZE_SHIFT_MAX) == UNIV_PAGE_SIZE_MAX,
3634 "(1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX");
3635
3636 static_assert((1 << UNIV_PAGE_SIZE_SHIFT_MIN) == UNIV_PAGE_SIZE_MIN,
3637 "(1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN");
3638
3639 ut_a(fil_system == nullptr);
3640
3641 ut_a(max_n_open > 0);
3642
3643 fil_system = UT_NEW_NOKEY(Fil_system(MAX_SHARDS, max_n_open));
3644 }
3645
3646 /** Open all the system files.
3647 @param[in] max_n_open Maximum number of open files allowed
3648 @param[in,out] n_open Current number of open files */
open_system_tablespaces(size_t max_n_open,size_t * n_open)3649 void Fil_shard::open_system_tablespaces(size_t max_n_open, size_t *n_open) {
3650 mutex_acquire();
3651
3652 for (auto elem : m_spaces) {
3653 auto space = elem.second;
3654
3655 if (Fil_system::space_belongs_in_LRU(space)) {
3656 continue;
3657 }
3658
3659 for (auto &file : space->files) {
3660 if (!file.is_open) {
3661 if (!open_file(&file, false)) {
3662 /* This func is called during server's
3663 startup. If some file of log or system
3664 tablespace is missing, the server
3665 can't start successfully. So we should
3666 assert for it. */
3667 ut_a(0);
3668 }
3669
3670 ++*n_open;
3671 }
3672
3673 if (max_n_open < 10 + *n_open) {
3674 ib::warn(ER_IB_MSG_284, *n_open, max_n_open);
3675 }
3676 }
3677 }
3678
3679 mutex_release();
3680 }
3681
3682 /** Opens all log files and system tablespace data files in all shards. */
open_all_system_tablespaces()3683 void Fil_system::open_all_system_tablespaces() {
3684 size_t n_open = 0;
3685
3686 for (auto shard : m_shards) {
3687 shard->open_system_tablespaces(m_max_n_open, &n_open);
3688 }
3689 }
3690
3691 /** Opens all log files and system tablespace data files. They stay open
3692 until the database server shutdown. This should be called at a server
3693 startup after the space objects for the log and the system tablespace
3694 have been created. The purpose of this operation is to make sure we
3695 never run out of file descriptors if we need to read from the insert
3696 buffer or to write to the log. */
fil_open_log_and_system_tablespace_files()3697 void fil_open_log_and_system_tablespace_files() {
3698 fil_system->open_all_system_tablespaces();
3699 }
3700
3701 /** Close all open files. */
close_all_files()3702 void Fil_shard::close_all_files() {
3703 ut_ad(mutex_owned());
3704
3705 auto end = m_spaces.end();
3706
3707 for (auto it = m_spaces.begin(); it != end; it = m_spaces.erase(it)) {
3708 auto space = it->second;
3709
3710 ut_a(space->id == TRX_SYS_SPACE || space->purpose == FIL_TYPE_TEMPORARY ||
3711 space->id == dict_sys_t::s_log_space_first_id ||
3712 space->files.size() == 1);
3713
3714 if (space->id == dict_sys_t::s_log_space_first_id) {
3715 fil_space_t::s_redo_space = nullptr;
3716 }
3717
3718 for (auto &file : space->files) {
3719 if (file.is_open) {
3720 close_file(&file, false);
3721 }
3722 }
3723
3724 space_detach(space);
3725
3726 space_free_low(space);
3727
3728 ut_a(space == nullptr);
3729 }
3730 }
3731
3732 /** Close all open files. */
close_all_files()3733 void Fil_system::close_all_files() {
3734 for (auto shard : m_shards) {
3735 shard->mutex_acquire();
3736
3737 shard->close_all_files();
3738
3739 shard->mutex_release();
3740 }
3741
3742 #ifndef UNIV_HOTBACKUP
3743 /* Revert to old names if downgrading after upgrade failure. */
3744 if (srv_downgrade_partition_files) {
3745 rename_partition_files(true);
3746 }
3747
3748 clear_old_files();
3749 #endif /* !UNIV_HOTBACKUP */
3750 }
3751
3752 /** Closes all open files. There must not be any pending i/o's or not flushed
3753 modifications in the files. */
fil_close_all_files()3754 void fil_close_all_files() { fil_system->close_all_files(); }
3755
3756 /** Open a file of a tablespace.
3757 The caller must own the shard mutex.
3758 @param[in,out] file Tablespace file
3759 @return false if the file can't be opened, otherwise true */
fil_node_open_file(fil_node_t * file)3760 bool fil_node_open_file(fil_node_t *file) {
3761 fil_space_t *space = file->space;
3762
3763 auto shard = fil_system->shard_by_id(space->id);
3764
3765 shard->mutex_acquire();
3766
3767 bool res = shard->open_file(file, false);
3768
3769 shard->mutex_release();
3770
3771 return res;
3772 }
3773
3774 /** Closes a file.
3775 @param[in] file file to close. */
fil_node_close_file(fil_node_t * file)3776 void fil_node_close_file(fil_node_t *file) {
3777 if (!file->is_open) {
3778 return;
3779 }
3780
3781 fil_space_t *space = file->space;
3782
3783 auto shard = fil_system->shard_by_id(space->id);
3784
3785 shard->mutex_acquire();
3786
3787 shard->close_file(file, true);
3788
3789 shard->mutex_release();
3790 }
3791
3792 /** Close log files.
3793 @param[in] free_all If set then free all instances */
close_log_files(bool free_all)3794 void Fil_shard::close_log_files(bool free_all) {
3795 mutex_acquire();
3796
3797 auto end = m_spaces.end();
3798
3799 for (auto it = m_spaces.begin(); it != end; /* No op */) {
3800 auto space = it->second;
3801
3802 if (space->purpose != FIL_TYPE_LOG) {
3803 ++it;
3804 continue;
3805 }
3806
3807 if (space->id == dict_sys_t::s_log_space_first_id) {
3808 ut_a(fil_space_t::s_redo_space == space);
3809
3810 fil_space_t::s_redo_space = nullptr;
3811 }
3812
3813 for (auto &file : space->files) {
3814 if (file.is_open) {
3815 close_file(&file, false);
3816 }
3817 }
3818
3819 if (free_all) {
3820 space_detach(space);
3821 space_free_low(space);
3822 ut_a(space == nullptr);
3823
3824 it = m_spaces.erase(it);
3825
3826 } else {
3827 ++it;
3828 }
3829 }
3830
3831 mutex_release();
3832 }
3833
3834 /** Close all log files in all shards.
3835 @param[in] free_all If set then free all instances */
close_all_log_files(bool free_all)3836 void Fil_system::close_all_log_files(bool free_all) {
3837 for (auto shard : m_shards) {
3838 shard->close_log_files(free_all);
3839 }
3840 }
3841
3842 /** Closes the redo log files. There must not be any pending i/o's or not
3843 flushed modifications in the files.
3844 @param[in] free_all If set then free all instances */
fil_close_log_files(bool free_all)3845 void fil_close_log_files(bool free_all) {
3846 fil_system->close_all_log_files(free_all);
3847 }
3848
3849 /** Iterate through all tablespaces
3850 @param[in] include_log Include redo log space, if true
3851 @param[in] f Callback
3852 @return any error returned by the callback function. */
iterate_spaces(bool include_log,Fil_space_iterator::Function & f)3853 dberr_t Fil_shard::iterate_spaces(bool include_log,
3854 Fil_space_iterator::Function &f) {
3855 mutex_acquire();
3856
3857 for (auto &elem : m_spaces) {
3858 auto space = elem.second;
3859
3860 if (space->purpose != FIL_TYPE_TABLESPACE &&
3861 (!include_log || space->purpose != FIL_TYPE_LOG)) {
3862 continue;
3863 }
3864
3865 dberr_t err = f(space);
3866
3867 if (err != DB_SUCCESS) {
3868 mutex_release();
3869
3870 return (err);
3871 ;
3872 }
3873 }
3874
3875 mutex_release();
3876
3877 return (DB_SUCCESS);
3878 }
3879
3880 /** Iterate through all persistent tablespace files (FIL_TYPE_TABLESPACE)
3881 returning the nodes via callback function cbk.
3882 @param[in] include_log Include log files, if true
3883 @param[in] f Callback
3884 @return any error returned by the callback function. */
iterate(bool include_log,Fil_iterator::Function & f)3885 dberr_t Fil_shard::iterate(bool include_log, Fil_iterator::Function &f) {
3886 mutex_acquire();
3887
3888 for (auto &elem : m_spaces) {
3889 auto space = elem.second;
3890
3891 if (space->purpose != FIL_TYPE_TABLESPACE &&
3892 (!include_log || space->purpose != FIL_TYPE_LOG)) {
3893 continue;
3894 }
3895
3896 for (auto &file : space->files) {
3897 /* Note: The callback can release the mutex. */
3898
3899 dberr_t err = f(&file);
3900
3901 if (err != DB_SUCCESS) {
3902 mutex_release();
3903
3904 return (err);
3905 }
3906 }
3907 }
3908
3909 mutex_release();
3910
3911 return (DB_SUCCESS);
3912 }
3913
3914 /** Iterate through all tablespaces
3915 @param[in] include_log Include redo log space, if true
3916 @param[in] f Callback
3917 @return any error returned by the callback function. */
iterate_spaces(bool include_log,Fil_space_iterator::Function & f)3918 dberr_t Fil_system::iterate_spaces(bool include_log,
3919 Fil_space_iterator::Function &f) {
3920 for (auto shard : m_shards) {
3921 dberr_t err = shard->iterate_spaces(include_log, f);
3922
3923 if (err != DB_SUCCESS) {
3924 return (err);
3925 }
3926 }
3927
3928 return (DB_SUCCESS);
3929 }
3930
3931 /** Iterate through all persistent tablespace files (FIL_TYPE_TABLESPACE)
3932 returning the nodes via callback function cbk.
3933 @param[in] include_log include log files, if true
3934 @param[in] f callback function
3935 @return any error returned by the callback function. */
iterate(bool include_log,Fil_iterator::Function & f)3936 dberr_t Fil_system::iterate(bool include_log, Fil_iterator::Function &f) {
3937 for (auto shard : m_shards) {
3938 dberr_t err = shard->iterate(include_log, f);
3939
3940 if (err != DB_SUCCESS) {
3941 return (err);
3942 }
3943 }
3944
3945 return (DB_SUCCESS);
3946 }
3947
3948 /** Iterate through all spaces
3949 returning the them via callback function cbk.
3950 @param[in] include_log include log files, if true
3951 @param[in] f Callback
3952 @return any error returned by the callback function. */
iterate(bool include_log,Function && f)3953 dberr_t Fil_space_iterator::iterate(bool include_log, Function &&f) {
3954 return (fil_system->iterate_spaces(include_log, f));
3955 }
3956
3957 /** Iterate through all persistent tablespace files (FIL_TYPE_TABLESPACE)
3958 returning the nodes via callback function cbk.
3959 @param[in] include_log include log files, if true
3960 @param[in] f Callback
3961 @return any error returned by the callback function. */
iterate(bool include_log,Function && f)3962 dberr_t Fil_iterator::iterate(bool include_log, Function &&f) {
3963 return (fil_system->iterate(include_log, f));
3964 }
3965
3966 /** Sets the max tablespace id counter if the given number is bigger than the
3967 previous value.
3968 @param[in] max_id Maximum known tablespace ID */
fil_set_max_space_id_if_bigger(space_id_t max_id)3969 void fil_set_max_space_id_if_bigger(space_id_t max_id) {
3970 if (dict_sys_t::is_reserved(max_id)) {
3971 ib::fatal(ER_IB_MSG_285, ulong{max_id});
3972 }
3973
3974 fil_system->update_maximum_space_id(max_id);
3975 }
3976
3977 /** Write the flushed LSN to the page header of the first page in the
3978 system tablespace.
3979 @param[in] lsn flushed LSN
3980 @return DB_SUCCESS or error number */
fil_write_flushed_lsn(lsn_t lsn)3981 dberr_t fil_write_flushed_lsn(lsn_t lsn) {
3982 byte *buf1;
3983 byte *buf;
3984 dberr_t err;
3985
3986 buf1 = static_cast<byte *>(ut_malloc_nokey(2 * UNIV_PAGE_SIZE));
3987 buf = static_cast<byte *>(ut_align(buf1, UNIV_PAGE_SIZE));
3988
3989 const page_id_t page_id(TRX_SYS_SPACE, 0);
3990
3991 err = fil_read(page_id, univ_page_size, 0, univ_page_size.physical(), buf);
3992
3993 if (err == DB_SUCCESS) {
3994 mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
3995
3996 err = fil_write(page_id, univ_page_size, 0, univ_page_size.physical(), buf);
3997
3998 fil_system->flush_file_spaces(to_int(FIL_TYPE_TABLESPACE));
3999 }
4000
4001 ut_free(buf1);
4002
4003 return (err);
4004 }
4005
4006 /** Acquire a tablespace when it could be dropped concurrently.
4007 Used by background threads that do not necessarily hold proper locks
4008 for concurrency control.
4009 @param[in] space_id Tablespace ID
4010 @param[in] silent Whether to silently ignore missing tablespaces
4011 @return the tablespace, or nullptr if missing or being deleted */
fil_space_acquire_low(space_id_t space_id,bool silent)4012 inline fil_space_t *fil_space_acquire_low(space_id_t space_id, bool silent) {
4013 auto shard = fil_system->shard_by_id(space_id);
4014
4015 shard->mutex_acquire();
4016
4017 fil_space_t *space = shard->get_space_by_id(space_id);
4018
4019 if (space == nullptr) {
4020 if (!silent) {
4021 ib::warn(ER_IB_MSG_286, ulong{space_id});
4022 }
4023 } else if (space->stop_new_ops) {
4024 space = nullptr;
4025 } else {
4026 ++space->n_pending_ops;
4027 }
4028
4029 shard->mutex_release();
4030
4031 return (space);
4032 }
4033
4034 /** Acquire a tablespace when it could be dropped concurrently.
4035 Used by background threads that do not necessarily hold proper locks
4036 for concurrency control.
4037 @param[in] space_id Tablespace ID
4038 @return the tablespace, or nullptr if missing or being deleted */
fil_space_acquire(space_id_t space_id)4039 fil_space_t *fil_space_acquire(space_id_t space_id) {
4040 return (fil_space_acquire_low(space_id, false));
4041 }
4042
4043 /** Acquire a tablespace that may not exist.
4044 Used by background threads that do not necessarily hold proper locks
4045 for concurrency control.
4046 @param[in] space_id Tablespace ID
4047 @return the tablespace, or nullptr if missing or being deleted */
fil_space_acquire_silent(space_id_t space_id)4048 fil_space_t *fil_space_acquire_silent(space_id_t space_id) {
4049 return (fil_space_acquire_low(space_id, true));
4050 }
4051
4052 /** Release a tablespace acquired with fil_space_acquire().
4053 @param[in,out] space tablespace to release */
fil_space_release(fil_space_t * space)4054 void fil_space_release(fil_space_t *space) {
4055 auto shard = fil_system->shard_by_id(space->id);
4056
4057 shard->mutex_acquire();
4058
4059 ut_ad(space->magic_n == FIL_SPACE_MAGIC_N);
4060 ut_ad(space->n_pending_ops > 0);
4061
4062 --space->n_pending_ops;
4063
4064 shard->mutex_release();
4065 }
4066
4067 /** Check for pending operations.
4068 @param[in] space tablespace
4069 @param[in] count number of attempts so far
4070 @return 0 if no pending operations else count + 1. */
space_check_pending_operations(fil_space_t * space,ulint count) const4071 ulint Fil_shard::space_check_pending_operations(fil_space_t *space,
4072 ulint count) const {
4073 ut_ad(mutex_owned());
4074
4075 if (space != nullptr && space->n_pending_ops > 0) {
4076 if (count > 5000) {
4077 ib::warn(ER_IB_MSG_287, space->name, ulong{space->n_pending_ops});
4078 }
4079
4080 return (count + 1);
4081 }
4082
4083 return (0);
4084 }
4085
4086 /** Check for pending IO.
4087 @param[in] space Tablespace to check
4088 @param[in] file File in space list
4089 @param[in] count number of attempts so far
4090 @return 0 if no pending else count + 1. */
check_pending_io(const fil_space_t * space,const fil_node_t & file,ulint count) const4091 ulint Fil_shard::check_pending_io(const fil_space_t *space,
4092 const fil_node_t &file, ulint count) const {
4093 ut_ad(mutex_owned());
4094 ut_a(space->n_pending_ops == 0);
4095
4096 ut_a(space->id == TRX_SYS_SPACE || space->purpose == FIL_TYPE_TEMPORARY ||
4097 space->id == dict_sys_t::s_log_space_first_id ||
4098 space->files.size() == 1);
4099
4100 if (space->n_pending_flushes > 0 || file.n_pending > 0) {
4101 if (count > 1000) {
4102 ib::warn(ER_IB_MSG_288, space->name, ulong{space->n_pending_flushes},
4103 size_t{file.n_pending});
4104 }
4105
4106 return (count + 1);
4107 }
4108
4109 return (0);
4110 }
4111
4112 /** Check pending operations on a tablespace.
4113 @param[in] space_id Tablespace ID
4114 @param[out] space tablespace instance in memory
4115 @param[out] path tablespace path
4116 @return DB_SUCCESS or DB_TABLESPACE_NOT_FOUND. */
space_check_pending_operations(space_id_t space_id,fil_space_t * & space,char ** path) const4117 dberr_t Fil_shard::space_check_pending_operations(space_id_t space_id,
4118 fil_space_t *&space,
4119 char **path) const {
4120 ut_ad(!fsp_is_system_tablespace(space_id));
4121 ut_ad(!fsp_is_global_temporary(space_id));
4122
4123 space = nullptr;
4124
4125 mutex_acquire();
4126
4127 fil_space_t *sp = get_space_by_id(space_id);
4128
4129 if (sp != nullptr) {
4130 sp->stop_new_ops = true;
4131 }
4132
4133 mutex_release();
4134
4135 /* Check for pending operations. */
4136
4137 ulint count = 0;
4138
4139 do {
4140 mutex_acquire();
4141
4142 sp = get_space_by_id(space_id);
4143
4144 count = space_check_pending_operations(sp, count);
4145
4146 mutex_release();
4147
4148 if (count > 0) {
4149 os_thread_sleep(20000);
4150 }
4151
4152 } while (count > 0);
4153
4154 /* Check for pending IO. */
4155
4156 *path = nullptr;
4157
4158 do {
4159 mutex_acquire();
4160
4161 sp = get_space_by_id(space_id);
4162
4163 if (sp == nullptr) {
4164 mutex_release();
4165
4166 return (DB_TABLESPACE_NOT_FOUND);
4167 }
4168
4169 const fil_node_t &file = sp->files.front();
4170
4171 count = check_pending_io(sp, file, count);
4172
4173 if (count == 0) {
4174 *path = mem_strdup(file.name);
4175 }
4176
4177 mutex_release();
4178
4179 if (count > 0) {
4180 os_thread_sleep(20000);
4181 }
4182
4183 } while (count > 0);
4184
4185 ut_ad(sp != nullptr);
4186
4187 space = sp;
4188
4189 return (DB_SUCCESS);
4190 }
4191
get_existing_path(const std::string & path,std::string & ghost)4192 std::string Fil_path::get_existing_path(const std::string &path,
4193 std::string &ghost) {
4194 std::string existing_path{path};
4195
4196 /* This is only called for non-existing paths. */
4197 while (!os_file_exists(existing_path.c_str())) {
4198 /* Some part of this path does not exist.
4199 If the last char is a separator, strip it off. */
4200 trim_separator(existing_path);
4201
4202 auto sep = existing_path.find_last_of(SEPARATOR);
4203 if (sep == std::string::npos) {
4204 /* If no separator is found, it must be relative to the current dir. */
4205 if (existing_path == ".") {
4206 /* This probably cannot happen, but break here to ensure that the
4207 loop always has a way out. */
4208 break;
4209 }
4210 ghost.assign(path);
4211 existing_path.assign(".");
4212 existing_path.push_back(OS_SEPARATOR);
4213 } else {
4214 ghost.assign(path.substr(sep + 1, path.length()));
4215 existing_path.resize(sep + 1);
4216 }
4217 }
4218
4219 return (existing_path);
4220 }
4221
get_real_path(const std::string & path,bool force)4222 std::string Fil_path::get_real_path(const std::string &path, bool force) {
4223 bool path_exists;
4224 os_file_type_t path_type;
4225 char abspath[OS_FILE_MAX_PATH];
4226 std::string in_path{path};
4227 std::string real_path;
4228
4229 if (path.empty()) {
4230 return (std::string(""));
4231 }
4232
4233 /* We do not need a separator at the end in order to determine what
4234 kind of object it is. So take it off. If it is there and the last
4235 part is actually a file, the correct real path will be returned. */
4236 if (in_path.length() > 1 && is_separator(in_path.back())) {
4237 trim_separator(in_path);
4238 }
4239
4240 /* Before we make an absolute path, check if this path exists,
4241 and if so, what type it is. */
4242 os_file_status(in_path.c_str(), &path_exists, &path_type);
4243
4244 int ret = my_realpath(abspath, in_path.c_str(), MYF(0));
4245
4246 if (ret == 0) {
4247 real_path.assign(abspath);
4248 } else {
4249 /* This often happens on non-Windows platforms when the path does not
4250 fully exist yet. */
4251
4252 if (path_exists) {
4253 /* my_realpath() failed for some reason other than the path does not
4254 exist. */
4255 if (force) {
4256 /* Use the given path and make it comparable. */
4257 real_path.assign(in_path);
4258 } else {
4259 /* Return null and make a note of it. Another attempt will be made
4260 later when Fil_path::get_real_path() is called with force=true. */
4261 ib::info(ER_IB_MSG_289) << "my_realpath('" << path
4262 << "') failed for path type " << path_type;
4263 return (std::string(""));
4264 }
4265 } else {
4266 /* The path does not exist. Try my_realpath() again with the
4267 existing portion of the path. */
4268 std::string ghost;
4269 std::string dir = get_existing_path(in_path, ghost);
4270
4271 ret = my_realpath(abspath, dir.c_str(), MYF(0));
4272 ut_ad(ret == 0);
4273
4274 /* Concatenate the absolute path with the non-existing sub-path.
4275 NOTE: If this path existed, my_realpath() would put a separator
4276 at the end if it is a directory. But since the ghost portion
4277 does not yet exist, we don't know if it is a dir or a file, so
4278 we cannot attach a trailing separator for a directory. So we
4279 trim them off in Fil_path::is_same_as() and is_ancestor(). */
4280 real_path.assign(abspath);
4281 append_separator(real_path);
4282 real_path.append(ghost);
4283 }
4284 }
4285
4286 if (lower_case_file_system) {
4287 Fil_path::to_lower(real_path);
4288 }
4289
4290 /* Try to consistently end a directory name with a separator.
4291 On Windows, my_realpath() usually puts a separator at the end
4292 of a directory path (it does not do that for the path ".").
4293 On non-Windows it never does.
4294 So if the separator is missing, decide whether to append it. */
4295 ut_ad(!real_path.empty());
4296 if (!is_separator(real_path.back())) {
4297 bool add_sep = true;
4298 switch (path_type) {
4299 case OS_FILE_TYPE_DIR:
4300 case OS_FILE_TYPE_BLOCK:
4301 break;
4302 case OS_FILE_TYPE_FILE:
4303 case OS_FILE_TYPE_LINK:
4304 add_sep = false;
4305 break;
4306 case OS_FILE_TYPE_FAILED:
4307 case OS_FILE_TYPE_MISSING:
4308 case OS_FILE_TYPE_NAME_TOO_LONG:
4309 case OS_FILE_PERMISSION_ERROR:
4310 case OS_FILE_TYPE_UNKNOWN:
4311 /* This filepath is missing or cannot be identified for some other
4312 reason. If it ends in a three letter extension, assume it is a file
4313 name and do not add the trailing separator. Otherwise, assume it is
4314 intended to be a directory.*/
4315 size_t s = real_path.size();
4316 if (s > 4 && real_path[s - 4] == '.' && real_path[s - 3] != '.' &&
4317 real_path[s - 2] != '.' && real_path[s - 1] != '.' &&
4318 !is_separator(real_path[s - 3]) &&
4319 !is_separator(real_path[s - 2])) {
4320 add_sep = false;
4321 }
4322 }
4323
4324 if (add_sep) {
4325 append_separator(real_path);
4326 }
4327 }
4328
4329 return (real_path);
4330 }
4331
4332 /** Constructor
4333 @param[in] dir Directory that the files are under */
Tablespace_files(const std::string & dir)4334 Tablespace_files::Tablespace_files(const std::string &dir)
4335 : m_ibd_paths(), m_undo_paths(), m_dir(dir) {
4336 ut_ad(Fil_path::is_separator(dir.back()));
4337 }
4338
4339 /** Closes a single-table tablespace. The tablespace must be cached in the
4340 memory cache. Free all pages used by the tablespace.
4341 @param[in,out] trx Transaction covering the close
4342 @param[in] space_id Tablespace ID
4343 @return DB_SUCCESS or error */
fil_close_tablespace(trx_t * trx,space_id_t space_id)4344 dberr_t fil_close_tablespace(trx_t *trx, space_id_t space_id) {
4345 char *path = nullptr;
4346 fil_space_t *space = nullptr;
4347
4348 ut_ad(!fsp_is_undo_tablespace(space_id));
4349 ut_ad(!fsp_is_system_or_temp_tablespace(space_id));
4350
4351 auto shard = fil_system->shard_by_id(space_id);
4352
4353 dberr_t err;
4354
4355 err = shard->space_check_pending_operations(space_id, space, &path);
4356
4357 if (err != DB_SUCCESS) {
4358 return (err);
4359 }
4360
4361 ut_a(path != nullptr);
4362
4363 rw_lock_x_lock(&space->latch);
4364
4365 #ifndef UNIV_HOTBACKUP
4366 /* Invalidate in the buffer pool all pages belonging to the
4367 tablespace. Since we have set space->stop_new_ops = true, readahead
4368 or ibuf merge can no longer read more pages of this tablespace to the
4369 buffer pool. Thus we can clean the tablespace out of the buffer pool
4370 completely and permanently. The flag stop_new_ops also prevents
4371 fil_flush() from being applied to this tablespace. */
4372
4373 buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_FLUSH_WRITE, trx);
4374 #endif /* !UNIV_HOTBACKUP */
4375
4376 /* If the free is successful, the X lock will be released before
4377 the space memory data structure is freed. */
4378
4379 if (!fil_space_free(space_id, true)) {
4380 rw_lock_x_unlock(&space->latch);
4381 err = DB_TABLESPACE_NOT_FOUND;
4382 } else {
4383 err = DB_SUCCESS;
4384 }
4385
4386 /* If it is a delete then also delete any generated files, otherwise
4387 when we drop the database the remove directory will fail. */
4388
4389 char *cfg_name = Fil_path::make_cfg(path);
4390
4391 if (cfg_name != nullptr) {
4392 os_file_delete_if_exists(innodb_data_file_key, cfg_name, nullptr);
4393
4394 ut_free(cfg_name);
4395 }
4396
4397 char *cfp_name = Fil_path::make_cfp(path);
4398
4399 if (cfp_name != nullptr) {
4400 os_file_delete_if_exists(innodb_data_file_key, cfp_name, nullptr);
4401
4402 ut_free(cfp_name);
4403 }
4404
4405 ut_free(path);
4406
4407 return (err);
4408 }
4409
4410 #ifndef UNIV_HOTBACKUP
4411 #ifndef XTRABACKUP
4412 /** Write a log record about an operation on a tablespace file.
4413 @param[in] type MLOG_FILE_OPEN or MLOG_FILE_DELETE
4414 or MLOG_FILE_CREATE or MLOG_FILE_RENAME
4415 @param[in] space_id tablespace identifier
4416 @param[in] path file path
4417 @param[in] new_path if type is MLOG_FILE_RENAME, the new name
4418 @param[in] flags if type is MLOG_FILE_CREATE, the space flags
4419 @param[in,out] mtr mini-transaction */
fil_op_write_log(mlog_id_t type,space_id_t space_id,const char * path,const char * new_path,uint32_t flags,mtr_t * mtr)4420 static void fil_op_write_log(mlog_id_t type, space_id_t space_id,
4421 const char *path, const char *new_path,
4422 uint32_t flags, mtr_t *mtr) {
4423 ut_ad(space_id != TRX_SYS_SPACE);
4424
4425 byte *log_ptr = nullptr;
4426
4427 if (!mlog_open(mtr, 11 + 4 + 2 + 1, log_ptr)) {
4428 /* Logging in mtr is switched off during crash recovery:
4429 in that case mlog_open returns nullptr */
4430 return;
4431 }
4432
4433 log_ptr = mlog_write_initial_log_record_low(type, space_id, 0, log_ptr, mtr);
4434
4435 if (type == MLOG_FILE_CREATE) {
4436 mach_write_to_4(log_ptr, flags);
4437 log_ptr += 4;
4438 }
4439
4440 /* Let us store the strings as null-terminated for easier readability
4441 and handling */
4442
4443 ulint len = strlen(path) + 1;
4444
4445 mach_write_to_2(log_ptr, len);
4446 log_ptr += 2;
4447
4448 mlog_close(mtr, log_ptr);
4449
4450 mlog_catenate_string(mtr, reinterpret_cast<const byte *>(path), len);
4451
4452 switch (type) {
4453 case MLOG_FILE_RENAME:
4454
4455 ut_ad(strchr(new_path, Fil_path::OS_SEPARATOR) != nullptr);
4456
4457 len = strlen(new_path) + 1;
4458
4459 ut_a(mlog_open(mtr, 2 + len, log_ptr));
4460
4461 mach_write_to_2(log_ptr, len);
4462
4463 log_ptr += 2;
4464
4465 mlog_close(mtr, log_ptr);
4466
4467 mlog_catenate_string(mtr, reinterpret_cast<const byte *>(new_path), len);
4468 break;
4469 case MLOG_FILE_DELETE:
4470 case MLOG_FILE_CREATE:
4471 break;
4472 default:
4473 ut_ad(0);
4474 }
4475 }
4476
4477 #endif /* !XTRABACKUP */
4478
4479 /** Fetch the file name opened for a space_id during recovery
4480 from the file map.
4481 @param[in] space_id Undo tablespace ID
4482 @return file name that was opened, empty string if space ID not found. */
fil_system_open_fetch(space_id_t space_id)4483 std::string fil_system_open_fetch(space_id_t space_id) {
4484 ut_a(dict_sys_t::is_reserved(space_id) || srv_is_upgrade_mode);
4485
4486 return (fil_system->find(space_id));
4487 }
4488
4489 #endif /* !UNIV_HOTBACKUP */
4490
space_delete(space_id_t space_id,buf_remove_t buf_remove)4491 dberr_t Fil_shard::space_delete(space_id_t space_id, buf_remove_t buf_remove) {
4492 char *path = nullptr;
4493 fil_space_t *space = nullptr;
4494
4495 ut_ad(!fsp_is_system_tablespace(space_id));
4496 ut_ad(!fsp_is_global_temporary(space_id));
4497
4498 dberr_t err = space_check_pending_operations(space_id, space, &path);
4499
4500 if (err != DB_SUCCESS) {
4501 ut_a(err == DB_TABLESPACE_NOT_FOUND);
4502 return (err);
4503 }
4504
4505 ut_a(path != nullptr);
4506 ut_a(space != nullptr);
4507
4508 #if !defined(UNIV_HOTBACKUP) && !defined(XTRABACKUP)
4509 /* IMPORTANT: Because we have set space::stop_new_ops there
4510 can't be any new ibuf merges, reads or flushes. We are here
4511 because file::n_pending was zero above. However, it is still
4512 possible to have pending read and write requests:
4513
4514 A read request can happen because the reader thread has
4515 gone through the ::stop_new_ops check in buf_page_init_for_read()
4516 before the flag was set and has not yet incremented ::n_pending
4517 when we checked it above.
4518
4519 A write request can be issued any time because we don't check
4520 the ::stop_new_ops flag when queueing a block for write.
4521
4522 We deal with pending write requests in the following function
4523 where we'd minimally evict all dirty pages belonging to this
4524 space from the flush_list. Note that if a block is IO-fixed
4525 we'll wait for IO to complete.
4526
4527 For buf_remove == BUF_REMOVE_NONE we mark the fil_space_t instance
4528 as deleted by setting the fil_space_t::m_deleted_lsn to the current
4529 LSN. We wait for any pending IO to complete after that.
4530
4531 To deal with potential read requests, we will check the
4532 ::stop_new_ops flag in fil_io(). */
4533
4534 if (buf_remove != BUF_REMOVE_NONE) {
4535 buf_LRU_flush_or_remove_pages(space_id, buf_remove, nullptr);
4536 }
4537
4538 #endif /* !UNIV_HOTBACKUP && !XTRABACKUP */
4539
4540 /* If it is a delete then also delete any generated files, otherwise
4541 when we drop the database the remove directory will fail. */
4542 if (space->purpose != FIL_TYPE_TEMPORARY) {
4543 #if defined(UNIV_HOTBACKUP) || defined(XTRABACKUP)
4544 /* When replaying the operation in MySQL Enterprise
4545 Backup, we do not try to write any log record. */
4546 #else /* UNIV_HOTBACKUP */
4547 /* Before deleting the file, write a log record about it, so that
4548 InnoDB crash recovery will expect the file to be gone. */
4549 mtr_t mtr;
4550
4551 mtr.start();
4552
4553 fil_op_write_log(MLOG_FILE_DELETE, space_id, path, nullptr, 0, &mtr);
4554
4555 mtr.commit();
4556
4557 /* Even if we got killed shortly after deleting the
4558 tablespace file, the record must have already been
4559 written to the redo log. */
4560 log_write_up_to(*log_sys, mtr.commit_lsn(), true);
4561 #endif /* UNIV_HOTBACKUP || XTRABACKUP */
4562
4563 char *cfg_name = Fil_path::make_cfg(path);
4564
4565 if (cfg_name != nullptr) {
4566 os_file_delete_if_exists(innodb_data_file_key, cfg_name, nullptr);
4567
4568 ut_free(cfg_name);
4569 }
4570
4571 char *cfp_name = Fil_path::make_cfp(path);
4572
4573 if (cfp_name != nullptr) {
4574 os_file_delete_if_exists(innodb_data_file_key, cfp_name, nullptr);
4575
4576 ut_free(cfp_name);
4577 }
4578 }
4579
4580 /* Must set back to active before returning from function. */
4581 clone_mark_abort(true);
4582
4583 #ifndef UNIV_HOTBACKUP
4584 lsn_t lsn = log_get_lsn(*log_sys);
4585 #endif /* !UNIV_HOTBACKUP */
4586
4587 mutex_acquire();
4588
4589 /* Double check the sanity of pending ops after reacquiring
4590 the fil_system::mutex. */
4591 if (const fil_space_t *s = get_space_by_id(space_id)) {
4592 ut_a(s == space);
4593 ut_a(space->files.size() == 1);
4594 ut_a(space->n_pending_ops == 0);
4595
4596 #ifndef UNIV_HOTBACKUP
4597 if (buf_remove == BUF_REMOVE_NONE) {
4598 ut_a(space->m_deleted_lsn == 0);
4599
4600 /* Mark the instance as deleted, this should inform any writer
4601 threads that the tablespace can't be written to anymore. */
4602 space->m_deleted_lsn = lsn;
4603
4604 /* Release the mutex because we want the IO to complete. */
4605 mutex_release();
4606
4607 os_thread_yield();
4608
4609 mutex_acquire();
4610
4611 /* Wait for any pending writes. */
4612 while (space->files.front().n_pending > 0) {
4613 mutex_release();
4614
4615 os_thread_yield();
4616
4617 mutex_acquire();
4618 }
4619
4620 m_deleted.push_back({space->id, space});
4621 }
4622 #endif /* !UNIV_HOTBACKUP */
4623
4624 space_detach(space);
4625
4626 /* Delete the tablespace unless BUF_REMOVE_NONE was used. */
4627 if (space->m_deleted_lsn == 0) {
4628 ut_a(space->files.front().n_pending == 0);
4629
4630 space_delete(space_id);
4631 }
4632
4633 mutex_release();
4634
4635 if (space->m_deleted_lsn == 0) {
4636 space_free_low(space);
4637 ut_a(space == nullptr);
4638 }
4639
4640 if (!os_file_delete(innodb_data_file_key, path) &&
4641 !os_file_delete_if_exists(innodb_data_file_key, path, nullptr)) {
4642 /* Note: This is because we have removed the
4643 tablespace instance from the cache. */
4644
4645 err = DB_IO_ERROR;
4646 }
4647 } else {
4648 mutex_release();
4649
4650 err = DB_TABLESPACE_NOT_FOUND;
4651 }
4652
4653 ut_free(path);
4654
4655 clone_mark_active();
4656
4657 return (err);
4658 }
4659
fil_delete_tablespace(space_id_t space_id,buf_remove_t buf_remove)4660 dberr_t fil_delete_tablespace(space_id_t space_id, buf_remove_t buf_remove) {
4661 auto shard = fil_system->shard_by_id(space_id);
4662
4663 return (shard->space_delete(space_id, buf_remove));
4664 }
4665
4666 /** Prepare for truncating a single-table tablespace.
4667 1) Check pending operations on a tablespace;
4668 2) Remove all insert buffer entries for the tablespace;
4669 @param[in] space_id Tablespace ID
4670 @return DB_SUCCESS or error */
space_prepare_for_truncate(space_id_t space_id)4671 dberr_t Fil_shard::space_prepare_for_truncate(space_id_t space_id) {
4672 char *path = nullptr;
4673 fil_space_t *space = nullptr;
4674
4675 ut_ad(space_id != TRX_SYS_SPACE);
4676 ut_ad(!fsp_is_system_tablespace(space_id));
4677 ut_ad(!fsp_is_global_temporary(space_id));
4678 ut_ad(fsp_is_undo_tablespace(space_id) || fsp_is_session_temporary(space_id));
4679
4680 dberr_t err = space_check_pending_operations(space_id, space, &path);
4681
4682 ut_free(path);
4683
4684 return (err);
4685 }
4686
4687 /** Truncate the tablespace to needed size.
4688 @param[in] space_id Tablespace ID to truncate
4689 @param[in] size_in_pages Truncate size.
4690 @return true if truncate was successful. */
space_truncate(space_id_t space_id,page_no_t size_in_pages)4691 bool Fil_shard::space_truncate(space_id_t space_id, page_no_t size_in_pages) {
4692 /* Step-1: Prepare tablespace for truncate. This involves
4693 stopping all the new operations + IO on that tablespace
4694 and ensuring that related pages are flushed to disk. */
4695 if (space_prepare_for_truncate(space_id) != DB_SUCCESS) {
4696 return (false);
4697 }
4698
4699 #ifndef UNIV_HOTBACKUP
4700 /* Step-2: Invalidate buffer pool pages belonging to the tablespace
4701 to re-create. Remove all insert buffer entries for the tablespace */
4702 buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_ALL_NO_WRITE, nullptr);
4703 #endif /* !UNIV_HOTBACKUP */
4704
4705 /* Step-3: Truncate the tablespace and accordingly update
4706 the fil_space_t handler that is used to access this tablespace. */
4707 mutex_acquire();
4708
4709 fil_space_t *space = get_space_by_id(space_id);
4710
4711 ut_a(space->files.size() == 1);
4712
4713 fil_node_t &file = space->files.front();
4714
4715 ut_ad(file.is_open);
4716
4717 space->size = file.size = size_in_pages;
4718
4719 bool success = os_file_truncate(file.name, file.handle, 0);
4720
4721 if (success) {
4722 os_offset_t size = size_in_pages * UNIV_PAGE_SIZE;
4723
4724 success = os_file_set_size(file.name, file.handle, 0, size,
4725 srv_read_only_mode, true);
4726
4727 if (success) {
4728 space->stop_new_ops = false;
4729 }
4730 }
4731
4732 mutex_release();
4733
4734 return (success);
4735 }
4736
4737 /** Truncate the tablespace to needed size.
4738 @param[in] space_id Tablespace ID to truncate
4739 @param[in] size_in_pages Truncate size.
4740 @return true if truncate was successful. */
fil_truncate_tablespace(space_id_t space_id,page_no_t size_in_pages)4741 bool fil_truncate_tablespace(space_id_t space_id, page_no_t size_in_pages) {
4742 auto shard = fil_system->shard_by_id(space_id);
4743
4744 return (shard->space_truncate(space_id, size_in_pages));
4745 }
4746
4747 #ifdef UNIV_DEBUG
4748 /** Increase redo skipped count for a tablespace.
4749 @param[in] space_id Tablespace ID */
fil_space_inc_redo_skipped_count(space_id_t space_id)4750 void fil_space_inc_redo_skipped_count(space_id_t space_id) {
4751 auto shard = fil_system->shard_by_id(space_id);
4752
4753 shard->mutex_acquire();
4754
4755 fil_space_t *space = shard->get_space_by_id(space_id);
4756
4757 ut_a(space != nullptr);
4758
4759 ++space->redo_skipped_count;
4760
4761 shard->mutex_release();
4762 }
4763
4764 /** Decrease redo skipped count for a tablespace.
4765 @param[in] space_id Tablespace id */
fil_space_dec_redo_skipped_count(space_id_t space_id)4766 void fil_space_dec_redo_skipped_count(space_id_t space_id) {
4767 auto shard = fil_system->shard_by_id(space_id);
4768
4769 shard->mutex_acquire();
4770
4771 fil_space_t *space = shard->get_space_by_id(space_id);
4772
4773 ut_a(space != nullptr);
4774 ut_a(space->redo_skipped_count > 0);
4775
4776 --space->redo_skipped_count;
4777
4778 shard->mutex_release();
4779 }
4780
4781 /** Check whether a single-table tablespace is redo skipped.
4782 @param[in] space_id Tablespace id
4783 @return true if redo skipped */
fil_space_is_redo_skipped(space_id_t space_id)4784 bool fil_space_is_redo_skipped(space_id_t space_id) {
4785 auto shard = fil_system->shard_by_id(space_id);
4786
4787 shard->mutex_acquire();
4788
4789 fil_space_t *space = shard->get_space_by_id(space_id);
4790
4791 ut_a(space != nullptr);
4792
4793 bool is_redo_skipped = space->redo_skipped_count > 0;
4794
4795 shard->mutex_release();
4796
4797 return (is_redo_skipped);
4798 }
4799 #endif /* UNIV_DEBUG */
4800
4801 #ifndef UNIV_HOTBACKUP
4802 /** Discards a single-table tablespace. The tablespace must be cached in the
4803 memory cache. Discarding is like deleting a tablespace, but
4804
4805 1. We do not drop the table from the data dictionary;
4806
4807 2. We remove all insert buffer entries for the tablespace immediately;
4808 in DROP TABLE they are only removed gradually in the background;
4809
4810 3. Free all the pages in use by the tablespace.
4811 @param[in] space_id Tablespace ID
4812 @return DB_SUCCESS or error */
fil_discard_tablespace(space_id_t space_id)4813 dberr_t fil_discard_tablespace(space_id_t space_id) {
4814 dberr_t err;
4815
4816 err = fil_delete_tablespace(space_id, BUF_REMOVE_ALL_NO_WRITE);
4817
4818 switch (err) {
4819 case DB_SUCCESS:
4820 break;
4821
4822 case DB_IO_ERROR:
4823
4824 ib::warn(ER_IB_MSG_291, ulong{space_id}, ut_strerr(err));
4825 break;
4826
4827 case DB_TABLESPACE_NOT_FOUND:
4828
4829 ib::warn(ER_IB_MSG_292, ulong{space_id}, ut_strerr(err));
4830 break;
4831
4832 default:
4833 ut_error;
4834 }
4835
4836 /* Remove all insert buffer entries for the tablespace */
4837
4838 ibuf_delete_for_discarded_space(space_id);
4839
4840 return (err);
4841 }
4842
4843 #if !defined(XTRABACKUP)
4844 /** Write redo log for renaming a file.
4845 @param[in] space_id tablespace id
4846 @param[in] old_name tablespace file name
4847 @param[in] new_name tablespace file name after renaming
4848 @param[in,out] mtr mini-transaction */
fil_name_write_rename(space_id_t space_id,const char * old_name,const char * new_name,mtr_t * mtr)4849 static void fil_name_write_rename(space_id_t space_id, const char *old_name,
4850 const char *new_name, mtr_t *mtr) {
4851 ut_ad(!fsp_is_system_or_temp_tablespace(space_id));
4852 ut_ad(!fsp_is_undo_tablespace(space_id));
4853
4854 /* Note: A checkpoint can take place here. */
4855
4856 DBUG_EXECUTE_IF("ib_crash_rename_log_1", DBUG_SUICIDE(););
4857
4858 static const auto type = MLOG_FILE_RENAME;
4859
4860 fil_op_write_log(type, space_id, old_name, new_name, 0, mtr);
4861
4862 DBUG_EXECUTE_IF("ib_crash_rename_log_2", DBUG_SUICIDE(););
4863
4864 /* Note: A checkpoint can take place here too before we
4865 have physically renamed the file. */
4866 }
4867 #endif /* !XTRABACKUP */
4868
4869 #endif /* !UNIV_HOTBACKUP */
4870
4871 /** Allocate and build a file name from a path, a table or tablespace name
4872 and a suffix.
4873 @param[in] path_in nullptr or the direcory path or the full path
4874 and filename
4875 @param[in] name_in nullptr if path is full, or Table/Tablespace
4876 name
4877 @param[in] ext the file extension to use
4878 @param[in] trim whether last name on the path should be trimmed
4879 @return own: file name; must be freed by ut_free() */
make(const std::string & path_in,const std::string & name_in,ib_file_suffix ext,bool trim)4880 char *Fil_path::make(const std::string &path_in, const std::string &name_in,
4881 ib_file_suffix ext, bool trim) {
4882 /* The path should be a directory and should not contain the
4883 basename of the file. If the path is empty, we will use the
4884 default path, */
4885
4886 ut_ad(!path_in.empty() || !name_in.empty());
4887
4888 std::string path;
4889
4890 if (path_in.empty()) {
4891 if (is_absolute_path(name_in)) {
4892 path = "";
4893 } else {
4894 path.assign(MySQL_datadir_path);
4895 }
4896 } else {
4897 path.assign(path_in);
4898 }
4899
4900 std::string name;
4901
4902 if (!name_in.empty()) {
4903 name.assign(name_in);
4904 }
4905
4906 /* Do not prepend the datadir path (which must be DOT_SLASH)
4907 if the name is an absolute path or a relative path like
4908 DOT_SLASH or DOT_DOT_SLASH. */
4909 if (is_absolute_path(name) || has_prefix(name, DOT_SLASH) ||
4910 has_prefix(name, DOT_DOT_SLASH)) {
4911 path.clear();
4912 }
4913
4914 std::string filepath;
4915
4916 if (!path.empty()) {
4917 filepath.assign(path);
4918 }
4919
4920 if (trim) {
4921 /* Find the offset of the last DIR separator and set it to
4922 null in order to strip off the old basename from this path. */
4923 auto pos = filepath.find_last_of(SEPARATOR);
4924
4925 if (pos != std::string::npos) {
4926 filepath.resize(pos);
4927 }
4928 }
4929
4930 if (!name.empty()) {
4931 append_separator(filepath);
4932
4933 filepath.append(name);
4934 }
4935
4936 /* Make sure that the specified suffix is at the end. */
4937 if (ext != NO_EXT) {
4938 const auto suffix = dot_ext[ext];
4939 size_t len = strlen(suffix);
4940
4941 /* This assumes that the suffix starts with '.'. If the
4942 first char of the suffix is found in the filepath at the
4943 same length as the suffix from the end, then we will assume
4944 that there is a previous suffix that needs to be replaced. */
4945
4946 ut_ad(*suffix == '.');
4947
4948 if (filepath.length() > len && *(filepath.end() - len) == *suffix) {
4949 filepath.replace(filepath.end() - len, filepath.end(), suffix);
4950 } else {
4951 filepath.append(suffix);
4952 }
4953 }
4954
4955 normalize(filepath);
4956
4957 return (mem_strdup(filepath.c_str()));
4958 }
4959
parse_file_path(const std::string & file_path,ib_file_suffix extn,std::string & dict_name)4960 bool Fil_path::parse_file_path(const std::string &file_path,
4961 ib_file_suffix extn, std::string &dict_name) {
4962 dict_name.assign(file_path);
4963 if (!Fil_path::truncate_suffix(extn, dict_name)) {
4964 dict_name.clear();
4965 return (false);
4966 }
4967
4968 /* Extract table name */
4969 auto table_pos = dict_name.find_last_of(SEPARATOR);
4970 if (table_pos == std::string::npos) {
4971 dict_name.clear();
4972 return (false);
4973 }
4974 std::string table_name = dict_name.substr(table_pos + 1);
4975 dict_name.resize(table_pos);
4976
4977 /* Extract schema name */
4978 auto schema_pos = dict_name.find_last_of(SEPARATOR);
4979 if (schema_pos == std::string::npos) {
4980 dict_name.clear();
4981 return (false);
4982 }
4983 std::string schema_name = dict_name.substr(schema_pos + 1);
4984
4985 /* Build dictionary table name schema/table form. */
4986 dict_name.assign(schema_name);
4987 dict_name.push_back(DB_SEPARATOR);
4988 dict_name.append(table_name);
4989 return (true);
4990 }
4991
make_new_path(const std::string & path_in,const std::string & name_in,ib_file_suffix extn)4992 std::string Fil_path::make_new_path(const std::string &path_in,
4993 const std::string &name_in,
4994 ib_file_suffix extn) {
4995 ut_a(Fil_path::has_suffix(extn, path_in));
4996 ut_a(!Fil_path::has_suffix(extn, name_in));
4997
4998 std::string path(path_in);
4999
5000 auto pos = path.find_last_of(SEPARATOR);
5001
5002 ut_a(pos != std::string::npos);
5003
5004 path.resize(pos);
5005
5006 pos = path.find_last_of(SEPARATOR);
5007
5008 ut_a(pos != std::string::npos);
5009
5010 path.resize(pos + 1);
5011
5012 path.append(name_in + dot_ext[extn]);
5013
5014 normalize(path);
5015
5016 return (path);
5017 }
5018
5019 /** This function reduces a null-terminated full remote path name
5020 into the path that is sent by MySQL for DATA DIRECTORY clause.
5021 It replaces the 'databasename/tablename.ibd' found at the end of the
5022 path with just 'tablename'.
5023
5024 Since the result is always smaller than the path sent in, no new
5025 memory is allocated. The caller should allocate memory for the path
5026 sent in. This function manipulates that path in place. If the path
5027 format is not as expected, set data_dir_path to "" and return.
5028
5029 The result is used to inform a SHOW CREATE TABLE command.
5030 @param[in,out] data_dir_path Full path/data_dir_path */
make_data_dir_path(char * data_dir_path)5031 void Fil_path::make_data_dir_path(char *data_dir_path) {
5032 /* Replace the period before the extension with a null byte. */
5033 ut_ad(has_suffix(IBD, data_dir_path));
5034 char *dot = strrchr((char *)data_dir_path, '.');
5035 *dot = '\0';
5036
5037 /* The tablename starts after the last slash. */
5038 char *base_slash = strrchr((char *)data_dir_path, OS_PATH_SEPARATOR);
5039 ut_ad(base_slash != nullptr);
5040
5041 *base_slash = '\0';
5042
5043 std::string base_name{base_slash + 1};
5044
5045 /* The database name starts after the next to last slash. */
5046 char *db_slash = strrchr((char *)data_dir_path, OS_SEPARATOR);
5047 ut_ad(db_slash != nullptr);
5048 char *db_name = db_slash + 1;
5049
5050 /* Overwrite the db_name with the base_name. */
5051 memmove(db_name, base_name.c_str(), base_name.length());
5052 db_name[base_name.length()] = '\0';
5053 }
5054
5055 /** Test if a tablespace file can be renamed to a new filepath by checking
5056 if that the old filepath exists and the new filepath does not exist.
5057 @param[in] space_id tablespace id
5058 @param[in] old_path old filepath
5059 @param[in] new_path new filepath
5060 @param[in] is_discarded whether the tablespace is discarded
5061 @return innodb error code */
fil_rename_tablespace_check(space_id_t space_id,const char * old_path,const char * new_path,bool is_discarded)5062 dberr_t fil_rename_tablespace_check(space_id_t space_id, const char *old_path,
5063 const char *new_path, bool is_discarded) {
5064 bool exists;
5065 os_file_type_t ftype;
5066
5067 if (!is_discarded && os_file_status(old_path, &exists, &ftype) && !exists) {
5068 ib::error(ER_IB_MSG_293, old_path, new_path, ulong{space_id});
5069 return (DB_TABLESPACE_NOT_FOUND);
5070 }
5071
5072 if (!os_file_status(new_path, &exists, &ftype) || exists) {
5073 ib::error(ER_IB_MSG_294, old_path, new_path, ulong{space_id});
5074 return (DB_TABLESPACE_EXISTS);
5075 }
5076
5077 return (DB_SUCCESS);
5078 }
5079
5080 /** Rename a single-table tablespace.
5081 The tablespace must exist in the memory cache.
5082 @param[in] space_id Tablespace ID
5083 @param[in] old_path Old file name
5084 @param[in] new_name New tablespace name in the schema/space
5085 @param[in] new_path_in New file name, or nullptr if it is located
5086 in the normal data directory
5087 @return InnoDB error code */
space_rename(space_id_t space_id,const char * old_path,const char * new_name,const char * new_path_in)5088 dberr_t Fil_shard::space_rename(space_id_t space_id, const char *old_path,
5089 const char *new_name, const char *new_path_in) {
5090 fil_space_t *space;
5091 ulint count = 0;
5092 fil_node_t *file = nullptr;
5093 bool write_ddl_log = true;
5094 auto start_time = ut_time_monotonic();
5095
5096 #ifdef UNIV_DEBUG
5097 static uint32_t crash_injection_rename_tablespace_counter = 1;
5098 #endif /* UNIV_DEBUG */
5099
5100 ut_a(space_id != TRX_SYS_SPACE);
5101 ut_ad(strchr(new_name, '/') != nullptr);
5102
5103 for (;;) {
5104 bool retry = false;
5105 bool flush = false;
5106
5107 ++count;
5108
5109 if (!(count % 1000)) {
5110 ib::warn(ER_IB_MSG_295, old_path, ulong{space_id}, ulonglong{count});
5111 }
5112
5113 /* The name map and space ID map are in the same shard. */
5114 mutex_acquire();
5115
5116 space = get_space_by_id(space_id);
5117
5118 DBUG_EXECUTE_IF("fil_rename_tablespace_failure_1", space = nullptr;);
5119
5120 if (space == nullptr) {
5121 ib::error(ER_IB_MSG_296, ulong{space_id}, old_path);
5122
5123 mutex_release();
5124
5125 return (DB_ERROR);
5126
5127 } else if (space->stop_ios) {
5128 /* Some other thread has stopped the IO. We need to
5129 wait for the other thread to complete its operation. */
5130 mutex_release();
5131
5132 if (ut_time_monotonic() - start_time >= PRINT_INTERVAL_SECS) {
5133 ib::warn(ER_IB_MSG_297);
5134
5135 start_time = ut_time_monotonic();
5136 }
5137
5138 os_thread_sleep(1000000);
5139
5140 continue;
5141
5142 } else if (count > 25000) {
5143 mutex_release();
5144
5145 return (DB_ERROR);
5146
5147 } else if (space != get_space_by_name(space->name)) {
5148 ib::error(ER_IB_MSG_298, space->name);
5149
5150 mutex_release();
5151
5152 return (DB_ERROR);
5153
5154 } else {
5155 auto new_space = get_space_by_name(new_name);
5156
5157 if (new_space != nullptr) {
5158 if (new_space == space) {
5159 mutex_release();
5160
5161 return (DB_SUCCESS);
5162 }
5163
5164 ut_a(new_space->id == space->id);
5165 }
5166 }
5167
5168 ut_a(space->files.size() == 1);
5169
5170 #ifndef UNIV_HOTBACKUP
5171 /* Don't write DDL log during recovery when log_ddl is
5172 not initialized. */
5173
5174 if (write_ddl_log && log_ddl != nullptr) {
5175 /* Write ddl log when space->stop_ios is true
5176 can cause deadlock:
5177 a. buffer flush thread waits for rename thread to set
5178 stop_ios to false;
5179 b. rename thread waits for buffer flush thread to flush
5180 a page and release page lock. The page is ready for
5181 flush in double write buffer. */
5182
5183 ut_ad(!space->stop_ios);
5184
5185 file = &space->files.front();
5186
5187 char *new_file_name = new_path_in == nullptr
5188 ? Fil_path::make_ibd_from_table_name(new_name)
5189 : mem_strdup(new_path_in);
5190
5191 char *old_file_name = file->name;
5192
5193 ut_ad(strchr(old_file_name, OS_PATH_SEPARATOR) != nullptr);
5194
5195 ut_ad(strchr(new_file_name, OS_PATH_SEPARATOR) != nullptr);
5196
5197 mutex_release();
5198
5199 /* Rename ddl log is for rollback, so we exchange
5200 old file name with new file name. */
5201 dberr_t err = log_ddl->write_rename_space_log(space_id, new_file_name,
5202 old_file_name);
5203 ut_free(new_file_name);
5204 if (err != DB_SUCCESS) {
5205 return (err);
5206 }
5207
5208 write_ddl_log = false;
5209 continue;
5210 }
5211 #endif /* !UNIV_HOTBACKUP */
5212
5213 /* We temporarily close the .ibd file because we do
5214 not trust that operating systems can rename an open
5215 file. For the closing we have to wait until there
5216 are no pending I/O's or flushes on the file. */
5217
5218 space->stop_ios = true;
5219
5220 file = &space->files.front();
5221
5222 if (file->n_pending > 0 || file->n_pending_flushes > 0 ||
5223 file->in_use > 0) {
5224 /* There are pending I/O's or flushes or the
5225 file is currently being extended, sleep for
5226 a while and retry */
5227
5228 retry = true;
5229
5230 space->stop_ios = false;
5231
5232 } else if (file->modification_counter > file->flush_counter) {
5233 /* Flush the space */
5234
5235 retry = flush = true;
5236
5237 space->stop_ios = false;
5238
5239 } else if (file->is_open) {
5240 close_file(file, false);
5241 }
5242
5243 mutex_release();
5244
5245 if (!retry) {
5246 ut_ad(space->stop_ios);
5247 break;
5248 }
5249
5250 os_thread_sleep(100000);
5251
5252 if (flush) {
5253 mutex_acquire();
5254
5255 space_flush(space->id);
5256
5257 mutex_release();
5258 }
5259 }
5260
5261 ut_ad(space->stop_ios);
5262
5263 char *new_file_name;
5264
5265 if (new_path_in == nullptr) {
5266 new_file_name = Fil_path::make_ibd_from_table_name(new_name);
5267 } else {
5268 new_file_name = mem_strdup(new_path_in);
5269 }
5270
5271 char *old_file_name = file->name;
5272 char *old_space_name = space->name;
5273 char *new_space_name = mem_strdup(new_name);
5274
5275 #if !defined(UNIV_HOTBACKUP) && !defined(XTRABACKUP)
5276 if (!recv_recovery_on) {
5277 mtr_t mtr;
5278
5279 mtr.start();
5280
5281 fil_name_write_rename(space_id, old_file_name, new_file_name, &mtr);
5282
5283 mtr.commit();
5284 }
5285 #endif /* !UNIV_HOTBACKUP && !XTRABACKUP */
5286
5287 ut_ad(strchr(old_file_name, OS_PATH_SEPARATOR) != nullptr);
5288 ut_ad(strchr(new_file_name, OS_PATH_SEPARATOR) != nullptr);
5289
5290 mutex_acquire();
5291
5292 /* We already checked these. */
5293 ut_ad(space == get_space_by_name(old_space_name));
5294 ut_ad(get_space_by_name(new_space_name) == nullptr);
5295
5296 bool success;
5297
5298 DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2", goto skip_rename;);
5299
5300 DBUG_INJECT_CRASH("ddl_crash_before_rename_tablespace",
5301 crash_injection_rename_tablespace_counter++);
5302
5303 success = os_file_rename(innodb_data_file_key, old_file_name, new_file_name);
5304
5305 DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2", skip_rename
5306 : success = false;);
5307
5308 DBUG_INJECT_CRASH("ddl_crash_after_rename_tablespace",
5309 crash_injection_rename_tablespace_counter++);
5310
5311 if (success) {
5312 file->name = new_file_name;
5313
5314 update_space_name_map(space, new_space_name);
5315
5316 space->name = new_space_name;
5317
5318 } else {
5319 /* Because nothing was renamed, we must free the new
5320 names, not the old ones. */
5321 old_file_name = new_file_name;
5322 old_space_name = new_space_name;
5323 }
5324
5325 ut_ad(space->stop_ios);
5326 space->stop_ios = false;
5327
5328 mutex_release();
5329
5330 ut_free(old_file_name);
5331 ut_free(old_space_name);
5332
5333 return (success ? DB_SUCCESS : DB_ERROR);
5334 }
5335
5336 /** Rename a single-table tablespace.
5337 The tablespace must exist in the memory cache.
5338 @param[in] space_id Tablespace ID
5339 @param[in] old_path Old file name
5340 @param[in] new_name New tablespace name in the schema/name format
5341 @param[in] new_path_in New file name, or nullptr if it is located
5342 in the normal data directory
5343 @return InnoDB error code */
fil_rename_tablespace(space_id_t space_id,const char * old_path,const char * new_name,const char * new_path_in)5344 dberr_t fil_rename_tablespace(space_id_t space_id, const char *old_path,
5345 const char *new_name, const char *new_path_in) {
5346 auto shard = fil_system->shard_by_id(space_id);
5347
5348 dberr_t err = shard->space_rename(space_id, old_path, new_name, new_path_in);
5349
5350 return (err);
5351 }
5352
5353 /** Rename a tablespace. Use the space_id to find the shard.
5354 @param[in] space_id tablespace ID
5355 @param[in] old_name old tablespace name
5356 @param[in] new_name new tablespace name
5357 @return DB_SUCCESS on success */
rename_tablespace_name(space_id_t space_id,const char * old_name,const char * new_name)5358 dberr_t Fil_system::rename_tablespace_name(space_id_t space_id,
5359 const char *old_name,
5360 const char *new_name) {
5361 auto old_shard = fil_system->shard_by_id(space_id);
5362
5363 old_shard->mutex_acquire();
5364
5365 auto old_space = old_shard->get_space_by_id(space_id);
5366
5367 if (old_space == nullptr) {
5368 old_shard->mutex_release();
5369
5370 ib::error(ER_IB_MSG_299, old_name);
5371
5372 return (DB_TABLESPACE_NOT_FOUND);
5373 }
5374
5375 ut_ad(old_space == old_shard->get_space_by_name(old_name));
5376 old_shard->mutex_release();
5377
5378 Fil_shard *new_shard{};
5379 fil_space_t *new_space{};
5380
5381 mutex_acquire_all();
5382
5383 for (auto shard : m_shards) {
5384 new_space = shard->get_space_by_name(new_name);
5385
5386 if (new_space != nullptr) {
5387 new_shard = shard;
5388 break;
5389 }
5390 }
5391
5392 if (new_space != nullptr) {
5393 mutex_release_all();
5394
5395 if (new_space->id != old_space->id) {
5396 ib::error(ER_IB_MSG_300, new_name);
5397
5398 return (DB_TABLESPACE_EXISTS);
5399 } else {
5400 ut_a(new_shard == old_shard);
5401 }
5402
5403 return (DB_SUCCESS);
5404 }
5405
5406 auto new_space_name = mem_strdup(new_name);
5407 auto old_space_name = old_space->name;
5408
5409 old_shard->update_space_name_map(old_space, new_space_name);
5410
5411 old_space->name = new_space_name;
5412
5413 mutex_release_all();
5414
5415 ut_free(old_space_name);
5416
5417 return (DB_SUCCESS);
5418 }
5419
5420 /** Rename a tablespace. Use the space_id to find the shard.
5421 @param[in] space_id tablespace ID
5422 @param[in] old_name old tablespace name
5423 @param[in] new_name new tablespace name
5424 @return DB_SUCCESS on success */
fil_rename_tablespace_by_id(space_id_t space_id,const char * old_name,const char * new_name)5425 dberr_t fil_rename_tablespace_by_id(space_id_t space_id, const char *old_name,
5426 const char *new_name) {
5427 return (fil_system->rename_tablespace_name(space_id, old_name, new_name));
5428 }
5429
5430 /** Create a tablespace (an IBD or IBT) file
5431 @param[in] space_id Tablespace ID
5432 @param[in] name Tablespace name in dbname/tablename format.
5433 For general tablespaces, the 'dbname/' part
5434 may be missing.
5435 @param[in] path Path and filename of the datafile to create.
5436 @param[in] flags Tablespace flags
5437 @param[in] size Initial size of the tablespace file in pages,
5438 must be >= FIL_IBD_FILE_INITIAL_SIZE
5439 @param[in] type FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY
5440 @return DB_SUCCESS or error code */
fil_create_tablespace(space_id_t space_id,const char * name,const char * path,uint32_t flags,page_no_t size,fil_type_t type)5441 static dberr_t fil_create_tablespace(space_id_t space_id, const char *name,
5442 const char *path, uint32_t flags,
5443 page_no_t size, fil_type_t type) {
5444 pfs_os_file_t file;
5445 dberr_t err;
5446 byte *buf2;
5447 byte *page;
5448 bool success;
5449 bool has_shared_space = FSP_FLAGS_GET_SHARED(flags);
5450 fil_space_t *space = nullptr;
5451
5452 ut_ad(!fsp_is_system_tablespace(space_id));
5453 ut_ad(!fsp_is_global_temporary(space_id));
5454 ut_a(fsp_flags_is_valid(flags));
5455 ut_a(type == FIL_TYPE_TEMPORARY || type == FIL_TYPE_TABLESPACE);
5456
5457 const page_size_t page_size(flags);
5458
5459 /* Create the subdirectories in the path, if they are
5460 not there already. */
5461 if (!has_shared_space) {
5462 err = os_file_create_subdirs_if_needed(path);
5463
5464 if (err != DB_SUCCESS) {
5465 return (err);
5466 }
5467 }
5468
5469 file = os_file_create(
5470 type == FIL_TYPE_TEMPORARY ? innodb_temp_file_key : innodb_data_file_key,
5471 path, OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
5472 OS_DATA_FILE, srv_read_only_mode && (type != FIL_TYPE_TEMPORARY),
5473 &success);
5474
5475 if (!success) {
5476 /* The following call will print an error message */
5477 ulint error = os_file_get_last_error(true);
5478
5479 ib::error(ER_IB_MSG_301, path);
5480
5481 switch (error) {
5482 case OS_FILE_ALREADY_EXISTS:
5483 #ifndef UNIV_HOTBACKUP
5484 ib::error(ER_IB_MSG_UNEXPECTED_FILE_EXISTS, path, path);
5485 return (DB_TABLESPACE_EXISTS);
5486 #else /* !UNIV_HOTBACKUP */
5487 return (DB_SUCCESS); /* Already existing file not an error here. */
5488 #endif /* !UNIV_HOTBACKUP */
5489
5490 case OS_FILE_NAME_TOO_LONG:
5491 ib::error(ER_IB_MSG_TOO_LONG_PATH, path);
5492 return (DB_TOO_LONG_PATH);
5493
5494 case OS_FILE_DISK_FULL:
5495 return (DB_OUT_OF_DISK_SPACE);
5496
5497 default:
5498 return (DB_ERROR);
5499 }
5500 }
5501
5502 bool atomic_write;
5503
5504 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
5505 if (fil_fusionio_enable_atomic_write(file)) {
5506 int ret = posix_fallocate(file.m_file, 0, size * page_size.physical());
5507
5508 if (ret != 0) {
5509 ib::error(ER_IB_MSG_303, path, ulonglong{size * page_size.physical()},
5510 ret, REFMAN);
5511 success = false;
5512 } else {
5513 success = true;
5514 }
5515
5516 atomic_write = true;
5517 } else {
5518 atomic_write = false;
5519
5520 success = os_file_set_size(path, file, 0, size * page_size.physical(),
5521 srv_read_only_mode, true);
5522 }
5523 #else
5524 atomic_write = false;
5525
5526 success = os_file_set_size(path, file, 0, size * page_size.physical(),
5527 srv_read_only_mode, true);
5528
5529 #endif /* !NO_FALLOCATE && UNIV_LINUX */
5530
5531 if (!success) {
5532 os_file_close(file);
5533 os_file_delete(innodb_data_file_key, path);
5534 return (DB_OUT_OF_DISK_SPACE);
5535 }
5536
5537 /* Note: We are actually punching a hole, previous contents will
5538 be lost after this call, if it succeeds. In this case the file
5539 should be full of NULs. */
5540
5541 bool punch_hole = os_is_sparse_file_supported(path, file);
5542
5543 if (punch_hole) {
5544 dberr_t punch_err;
5545
5546 punch_err = os_file_punch_hole(file.m_file, 0, size * page_size.physical());
5547
5548 if (punch_err != DB_SUCCESS) {
5549 punch_hole = false;
5550 }
5551 }
5552
5553 /* We have to write the space id to the file immediately and flush the
5554 file to disk. This is because in crash recovery we must be aware what
5555 tablespaces exist and what are their space id's, so that we can apply
5556 the log records to the right file. It may take quite a while until
5557 buffer pool flush algorithms write anything to the file and flush it to
5558 disk. If we would not write here anything, the file would be filled
5559 with zeros from the call of os_file_set_size(), until a buffer pool
5560 flush would write to it. */
5561
5562 buf2 = static_cast<byte *>(ut_malloc_nokey(3 * page_size.logical()));
5563
5564 /* Align the memory for file i/o if we might have O_DIRECT set */
5565 page = static_cast<byte *>(ut_align(buf2, page_size.logical()));
5566
5567 memset(page, '\0', page_size.logical());
5568
5569 /* Add the UNIV_PAGE_SIZE to the table flags and write them to the
5570 tablespace header. */
5571 flags = fsp_flags_set_page_size(flags, page_size);
5572 fsp_header_init_fields(page, space_id, flags);
5573 mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
5574
5575 mach_write_to_4(page + FIL_PAGE_SRV_VERSION, DD_SPACE_CURRENT_SRV_VERSION);
5576 mach_write_to_4(page + FIL_PAGE_SPACE_VERSION,
5577 DD_SPACE_CURRENT_SPACE_VERSION);
5578
5579 IORequest request(IORequest::WRITE);
5580
5581 if (!page_size.is_compressed()) {
5582 buf_flush_init_for_writing(nullptr, page, nullptr, 0,
5583 fsp_is_checksum_disabled(space_id),
5584 true /* skip_lsn_check */);
5585
5586 err = os_file_write(request, path, file, page, 0, page_size.physical());
5587
5588 ut_ad(err != DB_IO_NO_PUNCH_HOLE);
5589
5590 } else {
5591 page_zip_des_t page_zip;
5592
5593 page_zip_set_size(&page_zip, page_size.physical());
5594 page_zip.data = page + page_size.logical();
5595 #ifdef UNIV_DEBUG
5596 page_zip.m_start =
5597 #endif /* UNIV_DEBUG */
5598 page_zip.m_end = page_zip.m_nonempty = page_zip.n_blobs = 0;
5599
5600 buf_flush_init_for_writing(nullptr, page, &page_zip, 0,
5601 fsp_is_checksum_disabled(space_id),
5602 true /* skip_lsn_check */);
5603
5604 err = os_file_write(request, path, file, page_zip.data, 0,
5605 page_size.physical());
5606
5607 ut_a(err != DB_IO_NO_PUNCH_HOLE);
5608
5609 punch_hole = false;
5610 }
5611
5612 ut_free(buf2);
5613
5614 if (err != DB_SUCCESS) {
5615 ib::error(ER_IB_MSG_304, path);
5616
5617 os_file_close(file);
5618 os_file_delete(innodb_data_file_key, path);
5619
5620 return (DB_ERROR);
5621 }
5622
5623 success = os_file_flush(file);
5624
5625 if (!success) {
5626 ib::error(ER_IB_MSG_305, path);
5627
5628 os_file_close(file);
5629 os_file_delete(innodb_data_file_key, path);
5630 return (DB_ERROR);
5631 }
5632
5633 space = fil_space_create(name, space_id, flags, type);
5634
5635 if (space == nullptr) {
5636 os_file_close(file);
5637 os_file_delete(innodb_data_file_key, path);
5638 return (DB_ERROR);
5639 }
5640
5641 DEBUG_SYNC_C("fil_ibd_created_space");
5642
5643 auto shard = fil_system->shard_by_id(space_id);
5644
5645 fil_node_t *file_node =
5646 shard->create_node(path, size, space, false, punch_hole, atomic_write);
5647
5648 err = (file_node == nullptr) ? DB_ERROR : DB_SUCCESS;
5649
5650 #if !defined(UNIV_HOTBACKUP) && !defined(XTRABACKUP)
5651 /* Temporary tablespace creation need not be redo logged */
5652 if (err == DB_SUCCESS && type != FIL_TYPE_TEMPORARY) {
5653 const auto &file = space->files.front();
5654
5655 mtr_t mtr;
5656
5657 mtr_start(&mtr);
5658
5659 fil_op_write_log(MLOG_FILE_CREATE, space_id, file.name, nullptr,
5660 space->flags, &mtr);
5661
5662 mtr_commit(&mtr);
5663
5664 DBUG_EXECUTE_IF("fil_ibd_create_log", log_make_latest_checkpoint(););
5665 }
5666
5667 #endif /* !UNIV_HOTBACKUP && !XTRABACKUP */
5668
5669 /* For encryption tablespace, initial encryption information. */
5670 if (space != nullptr && FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
5671 err = fil_set_encryption(space->id, Encryption::AES, nullptr, nullptr);
5672
5673 ut_ad(err == DB_SUCCESS);
5674 }
5675
5676 os_file_close(file);
5677 if (err != DB_SUCCESS) {
5678 os_file_delete(innodb_data_file_key, path);
5679 }
5680
5681 return (err);
5682 }
5683
5684 /** Create a IBD tablespace file.
5685 @param[in] space_id Tablespace ID
5686 @param[in] name Tablespace name in dbname/tablename format.
5687 For general tablespaces, the 'dbname/' part
5688 may be missing.
5689 @param[in] path Path and filename of the datafile to create.
5690 @param[in] flags Tablespace flags
5691 @param[in] size Initial size of the tablespace file in pages,
5692 must be >= FIL_IBD_FILE_INITIAL_SIZE
5693 @return DB_SUCCESS or error code */
fil_ibd_create(space_id_t space_id,const char * name,const char * path,uint32_t flags,page_no_t size)5694 dberr_t fil_ibd_create(space_id_t space_id, const char *name, const char *path,
5695 uint32_t flags, page_no_t size) {
5696 ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
5697 ut_ad(!srv_read_only_mode);
5698 return (fil_create_tablespace(space_id, name, path, flags, size,
5699 FIL_TYPE_TABLESPACE));
5700 }
5701
5702 /** Create a session temporary tablespace (IBT) file.
5703 @param[in] space_id Tablespace ID
5704 @param[in] name Tablespace name
5705 @param[in] path Path and filename of the datafile to create.
5706 @param[in] flags Tablespace flags
5707 @param[in] size Initial size of the tablespace file in pages,
5708 must be >= FIL_IBT_FILE_INITIAL_SIZE
5709 @return DB_SUCCESS or error code */
fil_ibt_create(space_id_t space_id,const char * name,const char * path,uint32_t flags,page_no_t size)5710 dberr_t fil_ibt_create(space_id_t space_id, const char *name, const char *path,
5711 uint32_t flags, page_no_t size) {
5712 ut_a(size >= FIL_IBT_FILE_INITIAL_SIZE);
5713 return (fil_create_tablespace(space_id, name, path, flags, size,
5714 FIL_TYPE_TEMPORARY));
5715 }
5716
fil_replace_tablespace(space_id_t old_space_id,space_id_t new_space_id,page_no_t size_in_pages)5717 bool fil_replace_tablespace(space_id_t old_space_id, space_id_t new_space_id,
5718 page_no_t size_in_pages) {
5719 auto space = fil_space_get(old_space_id);
5720 std::string space_name(space->name);
5721 std::string file_name(space->files.front().name);
5722
5723 /* Mark the old tablespace to be deleted. We defer the actual deletion
5724 to avoid concurrency bottleneck. Leave the pages in the buffer pool
5725 and record the lsn in fil_space_t::m_deleted_lsn. */
5726 dberr_t err = fil_delete_tablespace(old_space_id, BUF_REMOVE_NONE);
5727
5728 if (err != DB_SUCCESS) {
5729 return (false);
5730 }
5731
5732 ulint flags = fsp_flags_init(univ_page_size, false, false, false, false);
5733
5734 /* Create the new UNDO tablespace. */
5735 err =
5736 fil_create_tablespace(new_space_id, space_name.c_str(), file_name.c_str(),
5737 flags, size_in_pages, FIL_TYPE_TABLESPACE);
5738
5739 return (err == DB_SUCCESS);
5740 }
5741
5742 #ifndef UNIV_HOTBACKUP
5743
5744 /** Open a single-table tablespace and optionally check the space id is
5745 right in it. If not successful, print an error message to the error log. This
5746 function is used to open a tablespace when we start up mysqld, and also in
5747 IMPORT TABLESPACE.
5748 NOTE that we assume this operation is used either at the database startup
5749 or under the protection of the dictionary mutex, so that two users cannot
5750 race here.
5751
5752 The fil_node_t::handle will not be left open.
5753
5754 @param[in] validate whether we should validate the tablespace
5755 (read the first page of the file and
5756 check that the space id in it matches id)
5757 @param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY
5758 @param[in] space_id Tablespace ID
5759 @param[in] flags tablespace flags
5760 @param[in] space_name tablespace name of the datafile
5761 If file-per-table, it is the table name in
5762 the databasename/tablename format
5763 @param[in] table_name table name in case if need to construct
5764 file path
5765 @param[in] path_in expected filepath, usually read from dictionary
5766 @param[in] strict whether to report error when open ibd failed
5767 @param[in] old_space whether it is a 5.7 tablespace opening
5768 by upgrade
5769 @return DB_SUCCESS or error code */
fil_ibd_open(bool validate,fil_type_t purpose,space_id_t space_id,uint32_t flags,const char * space_name,const char * table_name,const char * path_in,bool strict,bool old_space)5770 dberr_t fil_ibd_open(bool validate, fil_type_t purpose, space_id_t space_id,
5771 uint32_t flags, const char *space_name,
5772 const char *table_name, const char *path_in, bool strict,
5773 bool old_space) {
5774 Datafile df;
5775 bool is_encrypted = FSP_FLAGS_GET_ENCRYPTION(flags);
5776 bool for_import = (purpose == FIL_TYPE_IMPORT);
5777
5778 ut_ad(fil_type_is_data(purpose));
5779
5780 if (!fsp_flags_is_valid(flags)) {
5781 return (DB_CORRUPTION);
5782 }
5783
5784 /* Check if the file is already open. The space can be loaded
5785 via fil_space_get_first_path() on startup. This is a problem
5786 for partitioning code. It's a convoluted call graph via the DD.
5787 On Windows this can lead to a sharing violation when we attempt
5788 to open it again. */
5789
5790 auto shard = fil_system->shard_by_id(space_id);
5791
5792 shard->mutex_acquire();
5793
5794 auto space = shard->get_space_by_id(space_id);
5795
5796 if (space != nullptr) {
5797 shard->space_detach(space);
5798 shard->space_delete(space->id);
5799 shard->space_free_low(space);
5800 ut_a(space == nullptr);
5801 }
5802
5803 shard->mutex_release();
5804
5805 df.init(space_name, flags);
5806
5807 if (path_in == nullptr) {
5808 df.make_filepath(nullptr, space_name, IBD);
5809 } else {
5810 df.set_filepath(path_in);
5811 }
5812
5813 /* Attempt to open the tablespace. */
5814 if (df.open_read_only(strict) == DB_SUCCESS) {
5815 ut_ad(df.is_open());
5816 } else {
5817 ut_ad(!df.is_open());
5818 return (DB_CANNOT_OPEN_FILE);
5819 }
5820
5821 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
5822 const bool atomic_write =
5823 !dblwr::enabled && fil_fusionio_enable_atomic_write(df.handle());
5824 #else
5825 const bool atomic_write = false;
5826 #endif /* !NO_FALLOCATE && UNIV_LINUX */
5827
5828 dberr_t err;
5829
5830 if ((validate || is_encrypted) &&
5831 (err = df.validate_to_dd(space_id, flags, for_import)) != DB_SUCCESS) {
5832 /* We don't reply the rename via the redo log anymore.
5833 Therefore we can get a space ID mismatch when validating
5834 the files during bootstrap. */
5835
5836 if (!is_encrypted && err != DB_WRONG_FILE_NAME) {
5837 /* The following call prints an error message.
5838 For encrypted tablespace we skip print, since it should
5839 be keyring plugin issues. */
5840
5841 os_file_get_last_error(true);
5842
5843 ib::error(ER_IB_MSG_306, space_name, TROUBLESHOOT_DATADICT_MSG);
5844 }
5845
5846 return (err);
5847 }
5848
5849 /* If the encrypted tablespace is already opened,
5850 return success. */
5851 if (validate && is_encrypted && fil_space_get(space_id)) {
5852 return (DB_SUCCESS);
5853 }
5854
5855 /* We pass UNINITIALIZED flags while we try to open DD tablespace. In that
5856 case, set the flags now based on what is read from disk.*/
5857 if (FSP_FLAGS_ARE_NOT_SET(flags) && fsp_is_dd_tablespace(space_id)) {
5858 flags = df.flags();
5859 is_encrypted = FSP_FLAGS_GET_ENCRYPTION(flags);
5860 }
5861
5862 space = fil_space_create(space_name, space_id, flags, purpose);
5863
5864 if (space == nullptr) {
5865 return (DB_ERROR);
5866 }
5867
5868 /* We do not measure the size of the file, that is why
5869 we pass the 0 below */
5870
5871 const fil_node_t *file =
5872 shard->create_node(df.filepath(), 0, space, false, atomic_write, false);
5873
5874 if (file == nullptr) {
5875 return (DB_ERROR);
5876 }
5877
5878 if (validate && !old_space && !for_import) {
5879 if (df.server_version() > DD_SPACE_CURRENT_SRV_VERSION) {
5880 ib::error(ER_IB_MSG_1272, ulong{DD_SPACE_CURRENT_SRV_VERSION},
5881 ulonglong{df.server_version()});
5882 /* Server version is less than the tablespace server version.
5883 We don't support downgrade for 8.0 server, so report error */
5884 return (DB_SERVER_VERSION_LOW);
5885 }
5886 ut_ad(df.space_version() == DD_SPACE_CURRENT_SPACE_VERSION);
5887 }
5888
5889 /* Set unencryption in progress flag */
5890 space->encryption_op_in_progress = df.m_encryption_op_in_progress;
5891
5892 /* Its possible during Encryption processing, space flag for encryption
5893 has been updated in ibd file but server crashed before DD flags are
5894 updated. Thus, consider ibd setting too for encryption.
5895
5896 It is safe because m_encryption_op_in_progress will be set to NONE
5897 always unless there is a crash before finishing Encryption. */
5898 if (space->encryption_op_in_progress == ENCRYPTION) {
5899 space->flags |= flags & FSP_FLAGS_MASK_ENCRYPTION;
5900 }
5901
5902 /* For encryption tablespace, initialize encryption information.*/
5903 if ((is_encrypted || space->encryption_op_in_progress == ENCRYPTION) &&
5904 !for_import) {
5905 dberr_t err;
5906 byte *iv = df.m_encryption_iv;
5907 byte *key = df.m_encryption_key;
5908
5909 err = fil_set_encryption(space->id, Encryption::AES, key, iv);
5910
5911 if (err != DB_SUCCESS) {
5912 return (DB_ERROR);
5913 }
5914 }
5915
5916 return (DB_SUCCESS);
5917 }
5918
5919 #else /* !UNIV_HOTBACKUP */
5920
5921 /** Allocates a file name for an old version of a single-table tablespace.
5922 The string must be freed by caller with ut_free()!
5923 @param[in] name Original file name
5924 @return own: file name */
meb_make_ibbackup_old_name(const char * name)5925 static char *meb_make_ibbackup_old_name(const char *name) {
5926 char *path;
5927 ulint len = strlen(name);
5928 static const char suffix[] = "_ibbackup_old_vers_";
5929
5930 path = static_cast<char *>(ut_malloc_nokey(len + 15 + sizeof(suffix)));
5931
5932 memcpy(path, name, len);
5933 memcpy(path + len, suffix, sizeof(suffix) - 1);
5934
5935 meb_sprintf_timestamp_without_extra_chars(path + len + sizeof(suffix) - 1);
5936
5937 return (path);
5938 }
5939 #endif /* UNIV_HOTBACKUP */
5940
5941 /** Looks for a pre-existing fil_space_t with the given tablespace ID
5942 and, if found, returns the name and filepath in newly allocated buffers
5943 that the caller must free.
5944 @param[in] space_id The tablespace ID to search for.
5945 @param[out] name Name of the tablespace found.
5946 @param[out] filepath The filepath of the first datafile for the
5947 tablespace.
5948 @return true if tablespace is found, false if not. */
fil_space_read_name_and_filepath(space_id_t space_id,char ** name,char ** filepath)5949 bool fil_space_read_name_and_filepath(space_id_t space_id, char **name,
5950 char **filepath) {
5951 bool success = false;
5952
5953 *name = nullptr;
5954 *filepath = nullptr;
5955
5956 auto shard = fil_system->shard_by_id(space_id);
5957
5958 shard->mutex_acquire();
5959
5960 fil_space_t *space = shard->get_space_by_id(space_id);
5961
5962 if (space != nullptr) {
5963 *name = mem_strdup(space->name);
5964
5965 *filepath = mem_strdup(space->files.front().name);
5966
5967 success = true;
5968 }
5969
5970 shard->mutex_release();
5971
5972 return (success);
5973 }
5974
5975 /** Convert a file name to a tablespace name. Strip the file name
5976 prefix and suffix, leaving only databasename/tablename.
5977 @param[in] filename directory/databasename/tablename.ibd
5978 @return database/tablename string, to be freed with ut_free() */
fil_path_to_space_name(const char * filename)5979 char *fil_path_to_space_name(const char *filename) {
5980 std::string path{filename};
5981 auto pos = path.find_last_of(Fil_path::SEPARATOR);
5982
5983 ut_a(pos != std::string::npos && !Fil_path::is_separator(path.back()));
5984
5985 std::string db_name = path.substr(0, pos);
5986 std::string space_name = path.substr(pos + 1, path.length());
5987
5988 /* If it is a path such as a/b/c.ibd, ignore everything before 'b'. */
5989 pos = db_name.find_last_of(Fil_path::SEPARATOR);
5990
5991 if (pos != std::string::npos) {
5992 db_name = db_name.substr(pos + 1);
5993 }
5994
5995 char *name;
5996
5997 if (Fil_path::has_suffix(IBD, space_name)) {
5998 /* fil_space_t::name always uses '/' . */
5999
6000 path = db_name;
6001 path.push_back('/');
6002
6003 /* Strip the ".ibd" suffix. */
6004 path.append(space_name.substr(0, space_name.length() - 4));
6005
6006 name = mem_strdupl(path.c_str(), path.length());
6007
6008 } else {
6009 /* Must have an "undo" prefix. */
6010 ut_ad(space_name.find("undo") == 0);
6011
6012 name = mem_strdupl(space_name.c_str(), space_name.length());
6013 }
6014
6015 return (name);
6016 }
6017
6018 /** Open an ibd tablespace and add it to the InnoDB data structures.
6019 This is similar to fil_ibd_open() except that it is used while processing
6020 the redo and DDL log, so the data dictionary is not available and very little
6021 validation is done. The tablespace name is extracted from the
6022 dbname/tablename.ibd portion of the filename, which assumes that the file
6023 is a file-per-table tablespace. Any name will do for now. General
6024 tablespace names will be read from the dictionary after it has been
6025 recovered. The tablespace flags are read at this time from the first page
6026 of the file in validate_for_recovery().
6027 @param[in] space_id tablespace ID
6028 @param[in] path path/to/databasename/tablename.ibd
6029 @param[out] space the tablespace, or nullptr on error
6030 @return status of the operation */
ibd_open_for_recovery(space_id_t space_id,const std::string & path,fil_space_t * & space)6031 fil_load_status Fil_shard::ibd_open_for_recovery(space_id_t space_id,
6032 const std::string &path,
6033 fil_space_t *&space) {
6034 /* If the a space is already in the file system cache with this
6035 space ID, then there is nothing to do. */
6036
6037 mutex_acquire();
6038
6039 space = get_space_by_id(space_id);
6040
6041 mutex_release();
6042
6043 const char *filename = path.c_str();
6044
6045 if (space != nullptr) {
6046 ut_a(space->files.size() == 1);
6047
6048 const auto &file = space->files.front();
6049
6050 /* Compare the real paths. */
6051 if (Fil_path::is_same_as(filename, file.name)) {
6052 return (FIL_LOAD_OK);
6053 }
6054
6055 #ifdef UNIV_HOTBACKUP
6056 ib::trace_2() << "Ignoring data file '" << filename << "' with space ID "
6057 << space->id << ". Another data file called '" << file.name
6058 << "' exists with the same space ID";
6059 #else /* UNIV_HOTBACKUP */
6060 ib::info(ER_IB_MSG_307, filename, ulong{space->id}, file.name);
6061 #endif /* UNIV_HOTBACKUP */
6062
6063 space = nullptr;
6064
6065 return (FIL_LOAD_ID_CHANGED);
6066 }
6067
6068 Datafile df;
6069
6070 df.set_filepath(filename);
6071
6072 if (df.open_read_only(false) != DB_SUCCESS) {
6073 return (FIL_LOAD_NOT_FOUND);
6074 }
6075
6076 ut_ad(df.is_open());
6077
6078 /* Read and validate the first page of the tablespace.
6079 Assign a tablespace name based on the tablespace type. */
6080 dberr_t err = df.validate_for_recovery(space_id);
6081
6082 ut_a(err == DB_SUCCESS || err == DB_INVALID_ENCRYPTION_META);
6083 if (err == DB_INVALID_ENCRYPTION_META) {
6084 bool success = fil_system->erase_path(space_id);
6085 ut_a(success);
6086 return (FIL_LOAD_INVALID_ENCRYPTION_META);
6087 }
6088
6089 ut_a(df.space_id() == space_id);
6090
6091 /* Get and test the file size. */
6092 os_offset_t size = os_file_get_size(df.handle());
6093
6094 /* Every .ibd file is created >= 4 pages in size.
6095 Smaller files cannot be OK. */
6096 os_offset_t minimum_size;
6097
6098 /* Every .ibd file is created >= FIL_IBD_FILE_INITIAL_SIZE
6099 pages in size. Smaller files cannot be OK. */
6100 {
6101 const page_size_t page_size(df.flags());
6102
6103 minimum_size = FIL_IBD_FILE_INITIAL_SIZE * page_size.physical();
6104 }
6105
6106 if (size == static_cast<os_offset_t>(-1)) {
6107 /* The following call prints an error message */
6108 os_file_get_last_error(true);
6109
6110 ib::error(ER_IB_MSG_308) << "Could not measure the size of"
6111 " single-table tablespace file '"
6112 << df.filepath() << "'";
6113
6114 } else if (size < minimum_size) {
6115 #ifndef UNIV_HOTBACKUP
6116 ib::error(ER_IB_MSG_309)
6117 << "The size of tablespace file '" << df.filepath() << "' is only "
6118 << size << ", should be at least " << minimum_size << "!";
6119 #else
6120 /* In MEB, we work around this error. */
6121 df.set_space_id(SPACE_UNKNOWN);
6122 df.set_flags(0);
6123 #endif /* !UNIV_HOTBACKUP */
6124 }
6125
6126 ut_ad(space == nullptr);
6127
6128 #ifdef UNIV_HOTBACKUP
6129 if (df.space_id() == SPACE_UNKNOWN || df.space_id() == 0) {
6130 char *new_path;
6131
6132 ib::info(ER_IB_MSG_310)
6133 << "Renaming tablespace file '" << df.filepath() << "' with space ID "
6134 << df.space_id() << " to " << df.name()
6135 << "_ibbackup_old_vers_<timestamp>"
6136 " because its size "
6137 << df.size()
6138 << " is too small"
6139 " (< 4 pages 16 kB each), or the space id in the"
6140 " file header is not sensible. This can happen in"
6141 " an mysqlbackup run, and is not dangerous.";
6142 df.close();
6143
6144 new_path = meb_make_ibbackup_old_name(df.filepath());
6145
6146 bool success =
6147 os_file_rename(innodb_data_file_key, df.filepath(), new_path);
6148
6149 ut_a(success);
6150
6151 ut_free(new_path);
6152
6153 return (FIL_LOAD_ID_CHANGED);
6154 }
6155
6156 /* A backup may contain the same space several times, if the space got
6157 renamed at a sensitive time. Since it is enough to have one version of
6158 the space, we rename the file if a space with the same space id
6159 already exists in the tablespace memory cache. We rather rename the
6160 file than delete it, because if there is a bug, we do not want to
6161 destroy valuable data. */
6162
6163 mutex_acquire();
6164
6165 space = get_space_by_id(space_id);
6166
6167 mutex_release();
6168
6169 if (space != nullptr) {
6170 ib::info(ER_IB_MSG_311)
6171 << "Renaming data file '" << df.filepath() << "' with space ID "
6172 << space_id << " to " << df.name()
6173 << "_ibbackup_old_vers_<timestamp> because space " << space->name
6174 << " with the same id was scanned"
6175 " earlier. This can happen if you have renamed tables"
6176 " during an mysqlbackup run.";
6177
6178 df.close();
6179
6180 char *new_path = meb_make_ibbackup_old_name(df.filepath());
6181
6182 bool success =
6183 os_file_rename(innodb_data_file_key, df.filepath(), new_path);
6184
6185 ut_a(success);
6186
6187 ut_free(new_path);
6188 return (FIL_LOAD_OK);
6189 }
6190 #endif /* UNIV_HOTBACKUP */
6191 std::string tablespace_name(df.name());
6192
6193 /* During the apply-log operation, MEB already has translated the
6194 file name, so file name to space name conversion is not required. */
6195 #ifndef UNIV_HOTBACKUP
6196 dict_name::convert_to_space(tablespace_name);
6197 #endif /* !UNIV_HOTBACKUP */
6198
6199 fil_system->mutex_acquire_all();
6200
6201 space = space_create(tablespace_name.c_str(), space_id, df.flags(),
6202 FIL_TYPE_TABLESPACE);
6203
6204 fil_system->mutex_release_all();
6205
6206 if (space == nullptr) {
6207 return (FIL_LOAD_INVALID);
6208 }
6209
6210 ut_ad(space->id == df.space_id());
6211 ut_ad(space->id == space_id);
6212
6213 /* We do not use the size information we have about the file, because
6214 the rounding formula for extents and pages is somewhat complex; we
6215 let create_node() do that task. */
6216
6217 const fil_node_t *file;
6218
6219 file = create_node(df.filepath(), 0, space, false, true, false);
6220
6221 ut_a(file != nullptr);
6222
6223 /* For encryption tablespace, initial encryption information. */
6224 if (FSP_FLAGS_GET_ENCRYPTION(space->flags) &&
6225 df.m_encryption_key != nullptr) {
6226 dberr_t err = fil_set_encryption(space->id, Encryption::AES,
6227 df.m_encryption_key, df.m_encryption_iv);
6228
6229 if (err != DB_SUCCESS) {
6230 ib::error(ER_IB_MSG_312, space->name);
6231 }
6232 }
6233
6234 if (FSP_FLAGS_GET_ENCRYPTION(space->flags) && !srv_backup_mode &&
6235 use_dumped_tablespace_keys) {
6236 err = xb_set_encryption(space);
6237 if (err != DB_SUCCESS) {
6238 ib::error() << "Cannot find encryption key for tablespace '%s'."
6239 << space->name;
6240 return (FIL_LOAD_INVALID);
6241 }
6242 }
6243
6244 /* Set unencryption in progress flag */
6245 space->encryption_op_in_progress = df.m_encryption_op_in_progress;
6246
6247 return (FIL_LOAD_OK);
6248 }
6249
6250 /** Open an ibd tablespace and add it to the InnoDB data structures.
6251 This is similar to fil_ibd_open() except that it is used while processing
6252 the redo log, so the data dictionary is not available and very little
6253 validation is done. The tablespace name is extracted from the
6254 dbname/tablename.ibd portion of the filename, which assumes that the file
6255 is a file-per-table tablespace. Any name will do for now. General
6256 tablespace names will be read from the dictionary after it has been
6257 recovered. The tablespace flags are read at this time from the first page
6258 of the file in validate_for_recovery().
6259 @param[in] space_id tablespace ID
6260 @param[in] path path/to/databasename/tablename.ibd
6261 @param[out] space the tablespace, or nullptr on error
6262 @return status of the operation */
ibd_open_for_recovery(space_id_t space_id,const std::string & path,fil_space_t * & space)6263 fil_load_status Fil_system::ibd_open_for_recovery(space_id_t space_id,
6264 const std::string &path,
6265 fil_space_t *&space) {
6266 /* System tablespace open should never come here. It should be
6267 opened explicitly using the config path. */
6268 ut_a(space_id != TRX_SYS_SPACE);
6269
6270 #ifndef UNIV_HOTBACKUP
6271 /* Do not attempt to open or load for recovery any undo tablespace that
6272 is currently being truncated. */
6273 if (fsp_is_undo_tablespace(space_id) &&
6274 undo::is_active_truncate_log_present(undo::id2num(space_id))) {
6275 return (FIL_LOAD_NOT_FOUND);
6276 }
6277 #endif /* !UNIV_HOTBACKUP */
6278
6279 auto shard = shard_by_id(space_id);
6280
6281 return (shard->ibd_open_for_recovery(space_id, path, space));
6282 }
6283
6284 #ifndef UNIV_HOTBACKUP
6285
6286 /** Report that a tablespace for a table was not found.
6287 @param[in] name Table name
6288 @param[in] space_id Table's space ID */
fil_report_missing_tablespace(const char * name,space_id_t space_id)6289 static void fil_report_missing_tablespace(const char *name,
6290 space_id_t space_id) {
6291 ib::error(ER_IB_MSG_313)
6292 << "Table " << name << " in the InnoDB data dictionary has tablespace id "
6293 << space_id
6294 << ","
6295 " but tablespace with that id or name does not exist. Have"
6296 " you deleted or moved .ibd files?";
6297 }
6298
adjust_space_name(fil_space_t * space,const char * dd_space_name)6299 bool Fil_shard::adjust_space_name(fil_space_t *space,
6300 const char *dd_space_name) {
6301 if (!strcmp(space->name, dd_space_name)) {
6302 return (true);
6303 }
6304
6305 bool replace_general =
6306 FSP_FLAGS_GET_SHARED(space->flags) &&
6307 0 == strncmp(space->name, general_space_name, strlen(general_space_name));
6308 bool replace_undo =
6309 fsp_is_undo_tablespace(space->id) &&
6310 0 == strncmp(space->name, undo_space_name, strlen(undo_space_name));
6311
6312 /* Update the auto-generated fil_space_t::name */
6313 if (replace_general || replace_undo) {
6314 char *old_space_name = space->name;
6315 char *new_space_name = mem_strdup(dd_space_name);
6316
6317 update_space_name_map(space, new_space_name);
6318
6319 space->name = new_space_name;
6320
6321 ut_free(old_space_name);
6322 }
6323
6324 /* Update the undo::Tablespace::name. Since the fil_shard mutex is held by
6325 the caller, it would be a sync order violation to get undo::spaces->s_lock.
6326 It is OK to skip this s_lock since this occurs during boot_tablespaces()
6327 which is still single threaded. */
6328 if (replace_undo) {
6329 space_id_t space_num = undo::id2num(space->id);
6330 undo::Tablespace *undo_space = undo::spaces->find(space_num);
6331 undo_space->set_space_name(dd_space_name);
6332 }
6333
6334 return (replace_general || replace_undo);
6335 }
6336
6337 /** Returns true if a matching tablespace exists in the InnoDB tablespace
6338 memory cache.
6339 @param[in] space_id Tablespace ID
6340 @param[in] name Tablespace name used in
6341 fil_space_create().
6342 @param[in] print_err Print detailed error information to the
6343 error log if a matching tablespace is
6344 not found from memory.
6345 @param[in] adjust_space Whether to adjust space id on mismatch
6346 @param[in] heap Heap memory
6347 @param[in] table_id table id
6348 @return true if a matching tablespace exists in the memory cache */
space_check_exists(space_id_t space_id,const char * name,bool print_err,bool adjust_space,mem_heap_t * heap,table_id_t table_id)6349 bool Fil_shard::space_check_exists(space_id_t space_id, const char *name,
6350 bool print_err, bool adjust_space,
6351 mem_heap_t *heap, table_id_t table_id) {
6352 fil_space_t *fnamespace = nullptr;
6353
6354 mutex_acquire();
6355
6356 /* Look if there is a space with the same id */
6357 fil_space_t *space = get_space_by_id(space_id);
6358
6359 /* name is nullptr when replaying a DELETE ddl log. */
6360 if (name == nullptr) {
6361 mutex_release();
6362 return (space != nullptr);
6363 }
6364
6365 if (space != nullptr) {
6366 /* No need to check a general tablespace name if the DD
6367 is not yet available. */
6368 if (!srv_sys_tablespaces_open && FSP_FLAGS_GET_SHARED(space->flags)) {
6369 mutex_release();
6370 return (true);
6371 }
6372
6373 /* Sometimes the name has been auto-generated when the
6374 datafile is discovered and needs to be adjusted to that
6375 of the DD. This happens for general and undo tablespaces. */
6376 if (srv_sys_tablespaces_open && adjust_space &&
6377 adjust_space_name(space, name)) {
6378 mutex_release();
6379 return (true);
6380 }
6381
6382 /* If this space has the expected name, use it. */
6383 fnamespace = get_space_by_name(name);
6384
6385 if (space == fnamespace) {
6386 /* Found */
6387 mutex_release();
6388 return (true);
6389 }
6390 }
6391
6392 /* Info from "fnamespace" comes from the ibd file itself, it can
6393 be different from data obtained from System tables since file
6394 operations are not transactional. If adjust_space is set, and the
6395 mismatching space are between a user table and its temp table, we
6396 shall adjust the ibd file name according to system table info */
6397 if (adjust_space && space != nullptr &&
6398 row_is_mysql_tmp_table_name(space->name) &&
6399 !row_is_mysql_tmp_table_name(name)) {
6400 /* Atomic DDL's "ddl_log" will adjust the tablespace name. */
6401 mutex_release();
6402
6403 return (true);
6404
6405 } else if (!print_err) {
6406 ;
6407
6408 } else if (space == nullptr) {
6409 if (fnamespace == nullptr) {
6410 if (print_err) {
6411 fil_report_missing_tablespace(name, space_id);
6412 }
6413
6414 } else {
6415 ib::error(ER_IB_MSG_314)
6416 << "Table " << name
6417 << " in InnoDB data"
6418 " dictionary has tablespace id "
6419 << space_id
6420 << ", but a tablespace with that id does not"
6421 " exist. There is a tablespace of name "
6422 << fnamespace->name << " and id " << fnamespace->id
6423 << ", though. Have you"
6424 " deleted or moved .ibd files?";
6425 }
6426
6427 ib::warn(ER_IB_MSG_315) << TROUBLESHOOT_DATADICT_MSG;
6428
6429 } else if (0 != strcmp(space->name, name)) {
6430 ib::error(ER_IB_MSG_316) << "Table " << name
6431 << " in InnoDB data dictionary"
6432 " has tablespace id "
6433 << space_id
6434 << ", but the"
6435 " tablespace with that id has name "
6436 << space->name
6437 << ". Have you deleted or moved .ibd"
6438 " files?";
6439
6440 if (fnamespace != nullptr) {
6441 ib::error(ER_IB_MSG_317) << "There is a tablespace with the right"
6442 " name: "
6443 << fnamespace->name
6444 << ", but its id"
6445 " is "
6446 << fnamespace->id << ".";
6447 }
6448
6449 ib::warn(ER_IB_MSG_318) << TROUBLESHOOT_DATADICT_MSG;
6450 }
6451
6452 mutex_release();
6453
6454 return (false);
6455 }
6456
6457 /** Returns true if a matching tablespace exists in the InnoDB tablespace
6458 memory cache.
6459 @param[in] space_id Tablespace ID
6460 @param[in] name Tablespace name used in space_create().
6461 @param[in] print_err Print detailed error information to the
6462 error log if a matching tablespace is
6463 not found from memory.
6464 @param[in] adjust_space Whether to adjust space id on mismatch
6465 @param[in] heap Heap memory
6466 @param[in] table_id table ID
6467 @return true if a matching tablespace exists in the memory cache */
fil_space_exists_in_mem(space_id_t space_id,const char * name,bool print_err,bool adjust_space,mem_heap_t * heap,table_id_t table_id)6468 bool fil_space_exists_in_mem(space_id_t space_id, const char *name,
6469 bool print_err, bool adjust_space,
6470 mem_heap_t *heap, table_id_t table_id) {
6471 auto shard = fil_system->shard_by_id(space_id);
6472
6473 return (shard->space_check_exists(space_id, name, print_err, adjust_space,
6474 heap, table_id));
6475 }
6476 #endif /* !UNIV_HOTBACKUP */
6477
6478 /** Return the space ID based on the tablespace name.
6479 The tablespace must be found in the tablespace memory cache.
6480 @param[in] name Tablespace name
6481 @return space ID if tablespace found, SPACE_UNKNOWN if space not. */
fil_space_get_id_by_name(const char * name)6482 space_id_t fil_space_get_id_by_name(const char *name) {
6483 auto space = fil_system->get_space_by_name(name);
6484
6485 return ((space == nullptr) ? SPACE_UNKNOWN : space->id);
6486 }
6487
6488 /** Fill the pages with NULs
6489 @param[in] file Tablespace file
6490 @param[in] page_size physical page size
6491 @param[in] start Offset from the start of the file in bytes
6492 @param[in] len Length in bytes
6493 @param[in] read_only_mode
6494 if true, then read only mode checks are enforced.
6495 @return DB_SUCCESS or error code */
fil_write_zeros(const fil_node_t * file,ulint page_size,os_offset_t start,ulint len,bool read_only_mode)6496 static dberr_t fil_write_zeros(const fil_node_t *file, ulint page_size,
6497 os_offset_t start, ulint len,
6498 bool read_only_mode) {
6499 ut_a(len > 0);
6500
6501 /* Extend at most 1M at a time */
6502 ulint n_bytes = ut_min(static_cast<ulint>(1024 * 1024), len);
6503
6504 byte *ptr = reinterpret_cast<byte *>(ut_zalloc_nokey(n_bytes + page_size));
6505
6506 byte *buf = reinterpret_cast<byte *>(ut_align(ptr, page_size));
6507
6508 os_offset_t offset = start;
6509 dberr_t err = DB_SUCCESS;
6510 const os_offset_t end = start + len;
6511 IORequest request(IORequest::WRITE);
6512
6513 while (offset < end) {
6514 #ifdef UNIV_HOTBACKUP
6515 err =
6516 os_file_write(request, file->name, file->handle, buf, offset, n_bytes);
6517 #else /* UNIV_HOTBACKUP */
6518 err = os_aio_func(request, AIO_mode::SYNC, file->name, file->handle, buf,
6519 offset, n_bytes, read_only_mode, nullptr, nullptr);
6520 #endif /* UNIV_HOTBACKUP */
6521
6522 if (err != DB_SUCCESS) {
6523 break;
6524 }
6525
6526 offset += n_bytes;
6527
6528 n_bytes = ut_min(n_bytes, static_cast<ulint>(end - offset));
6529
6530 DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension", DBUG_SUICIDE(););
6531 }
6532
6533 ut_free(ptr);
6534
6535 return (err);
6536 }
6537
6538 /** Try to extend a tablespace if it is smaller than the specified size.
6539 @param[in,out] space tablespace
6540 @param[in] size desired size in pages
6541 @return whether the tablespace is at least as big as requested */
space_extend(fil_space_t * space,page_no_t size)6542 bool Fil_shard::space_extend(fil_space_t *space, page_no_t size) {
6543 /* In read-only mode we allow write to shared temporary tablespace
6544 as intrinsic table created by Optimizer reside in this tablespace. */
6545 ut_ad(!srv_read_only_mode || fsp_is_system_temporary(space->id));
6546
6547 #ifndef UNIV_HOTBACKUP
6548 DBUG_EXECUTE_IF("fil_space_print_xdes_pages",
6549 space->print_xdes_pages("xdes_pages.log"););
6550 #endif /* !UNIV_HOTBACKUP */
6551
6552 fil_node_t *file;
6553 bool slot;
6554 size_t phy_page_size;
6555 bool success = true;
6556
6557 #ifdef UNIV_HOTBACKUP
6558 page_no_t prev_size = 0;
6559 #endif /* UNIV_HOTBACKUP */
6560
6561 for (;;) {
6562 slot = mutex_acquire_and_get_space(space->id, space);
6563
6564 /* Note:If the file is being opened for the first time then
6565 we don't have the file physical size. There is no guarantee
6566 that the file has been opened at this stage. */
6567
6568 if (size < space->size) {
6569 /* Space already big enough */
6570 mutex_release();
6571
6572 if (slot) {
6573 release_open_slot(m_id);
6574 }
6575
6576 return (true);
6577 }
6578
6579 file = &space->files.back();
6580
6581 page_size_t page_size(space->flags);
6582
6583 phy_page_size = page_size.physical();
6584
6585 #ifdef UNIV_HOTBACKUP
6586 prev_size = space->size;
6587
6588 ib::trace_1() << "Extending space id : " << space->id
6589 << ", space name : " << space->name
6590 << ", space size : " << space->size
6591 << " page, page size : " << phy_page_size;
6592 #endif /* UNIV_HOTBACKUP */
6593
6594 if (file->in_use == 0) {
6595 /* Mark this file as undergoing extension. This flag
6596 is used by other threads to wait for the extension
6597 opereation to finish or wait for open to complete. */
6598
6599 ++file->in_use;
6600
6601 break;
6602 }
6603
6604 if (slot) {
6605 release_open_slot(m_id);
6606 }
6607
6608 /* Another thread is currently using the file. Wait
6609 for it to finish. It'd have been better to use an event
6610 driven mechanism but the entire module is peppered with
6611 polling code. */
6612
6613 mutex_release();
6614
6615 os_thread_sleep(100000);
6616 }
6617
6618 bool opened = prepare_file_for_io(file, true);
6619
6620 if (slot) {
6621 release_open_slot(m_id);
6622 }
6623
6624 if (!opened) {
6625 /* The tablespace data file, such as .ibd file, is missing */
6626 ut_a(file->in_use > 0);
6627 --file->in_use;
6628
6629 mutex_release();
6630
6631 return (false);
6632 }
6633
6634 ut_a(file->is_open);
6635
6636 if (size <= space->size) {
6637 ut_a(file->in_use > 0);
6638 --file->in_use;
6639
6640 complete_io(file, IORequestRead);
6641
6642 mutex_release();
6643
6644 return (true);
6645 }
6646
6647 /* At this point it is safe to release the shard mutex. No
6648 other thread can rename, delete or close the file because
6649 we have set the file->in_use flag. */
6650
6651 mutex_release();
6652
6653 page_no_t pages_added;
6654 os_offset_t node_start = os_file_get_size(file->handle);
6655
6656 ut_a(node_start != (os_offset_t)-1);
6657
6658 /* File first page number */
6659 page_no_t node_first_page = space->size - file->size;
6660
6661 /* Number of physical pages in the file */
6662 page_no_t n_node_physical_pages =
6663 static_cast<page_no_t>(node_start / phy_page_size);
6664
6665 /* Number of pages to extend in the file */
6666 page_no_t n_node_extend;
6667
6668 n_node_extend = size - (node_first_page + file->size);
6669
6670 /* If we already have enough physical pages to satisfy the
6671 extend request on the file then ignore it */
6672 if (file->size + n_node_extend > n_node_physical_pages) {
6673 DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension", DBUG_SUICIDE(););
6674
6675 os_offset_t len;
6676 dberr_t err = DB_SUCCESS;
6677
6678 len = ((file->size + n_node_extend) * phy_page_size) - node_start;
6679
6680 ut_ad(len > 0);
6681
6682 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
6683 /* This is required by FusionIO HW/Firmware */
6684
6685 int ret = posix_fallocate(file->handle.m_file, node_start, len);
6686
6687 DBUG_EXECUTE_IF("ib_posix_fallocate_fail_eintr", ret = EINTR;);
6688
6689 DBUG_EXECUTE_IF("ib_posix_fallocate_fail_einval", ret = EINVAL;);
6690
6691 if (ret != 0) {
6692 /* We already pass the valid offset and len in, if EINVAL
6693 is returned, it could only mean that the file system doesn't
6694 support fallocate(), currently one known case is ext3 with O_DIRECT.
6695
6696 Also because above call could be interrupted, in this case,
6697 simply go to plan B by writing zeroes.
6698
6699 Both error messages for above two scenarios are skipped in case
6700 of flooding error messages, because they can be ignored by users. */
6701 if (ret != EINTR && ret != EINVAL) {
6702 ib::error(ER_IB_MSG_319)
6703 << "posix_fallocate(): Failed to preallocate"
6704 " data for file "
6705 << file->name << ", desired size " << len
6706 << " bytes."
6707 " Operating system error number "
6708 << ret
6709 << ". Check"
6710 " that the disk is not full or a disk quota"
6711 " exceeded. Make sure the file system supports"
6712 " this function. Some operating system error"
6713 " numbers are described at " REFMAN
6714 "operating-system-error-codes.html";
6715 }
6716
6717 err = DB_IO_ERROR;
6718 }
6719 #endif /* NO_FALLOCATE || !UNIV_LINUX */
6720
6721 if (!file->atomic_write || err == DB_IO_ERROR) {
6722 bool read_only_mode;
6723
6724 read_only_mode =
6725 (space->purpose != FIL_TYPE_TEMPORARY ? false : srv_read_only_mode);
6726
6727 err = fil_write_zeros(file, phy_page_size, node_start,
6728 static_cast<ulint>(len), read_only_mode);
6729
6730 if (err != DB_SUCCESS) {
6731 ib::warn(ER_IB_MSG_320)
6732 << "Error while writing " << len << " zeroes to " << file->name
6733 << " starting at offset " << node_start;
6734 }
6735 }
6736
6737 /* Check how many pages actually added */
6738 os_offset_t end = os_file_get_size(file->handle);
6739 ut_a(end != static_cast<os_offset_t>(-1) && end >= node_start);
6740
6741 os_has_said_disk_full = !(success = (end == node_start + len));
6742
6743 pages_added = static_cast<page_no_t>(end / phy_page_size);
6744
6745 ut_a(pages_added >= file->size);
6746 pages_added -= file->size;
6747
6748 } else {
6749 success = true;
6750 pages_added = n_node_extend;
6751 os_has_said_disk_full = FALSE;
6752 }
6753
6754 mutex_acquire();
6755
6756 file->size += pages_added;
6757 space->size += pages_added;
6758
6759 ut_a(file->in_use > 0);
6760 --file->in_use;
6761
6762 complete_io(file, IORequestWrite);
6763
6764 #ifndef UNIV_HOTBACKUP
6765 /* Keep the last data file size info up to date, rounded to
6766 full megabytes */
6767 page_no_t pages_per_mb =
6768 static_cast<page_no_t>((1024 * 1024) / phy_page_size);
6769
6770 page_no_t size_in_pages = ((file->size / pages_per_mb) * pages_per_mb);
6771
6772 if (space->id == TRX_SYS_SPACE) {
6773 srv_sys_space.set_last_file_size(size_in_pages);
6774 } else if (fsp_is_system_temporary(space->id)) {
6775 srv_tmp_space.set_last_file_size(size_in_pages);
6776 }
6777 #else /* !UNIV_HOTBACKUP */
6778 ib::trace_2() << "Extended space : " << space->name << " from " << prev_size
6779 << " pages to " << space->size << " pages "
6780 << ", desired space size : " << size << " pages.";
6781 #endif /* !UNIV_HOTBACKUP */
6782
6783 space_flush(space->id);
6784
6785 mutex_release();
6786
6787 return (success);
6788 }
6789
6790 /** Try to extend a tablespace if it is smaller than the specified size.
6791 @param[in,out] space tablespace
6792 @param[in] size desired size in pages
6793 @return whether the tablespace is at least as big as requested */
fil_space_extend(fil_space_t * space,page_no_t size)6794 bool fil_space_extend(fil_space_t *space, page_no_t size) {
6795 auto shard = fil_system->shard_by_id(space->id);
6796
6797 return (shard->space_extend(space, size));
6798 }
6799
6800 #ifdef UNIV_HOTBACKUP
6801 /** Extends all tablespaces to the size stored in the space header. During the
6802 mysqlbackup --apply-log phase we extended the spaces on-demand so that log
6803 records could be applied, but that may have left spaces still too small
6804 compared to the size stored in the space header. */
meb_extend_tablespaces_to_stored_len()6805 void Fil_shard::meb_extend_tablespaces_to_stored_len() {
6806 ut_ad(mutex_owned());
6807
6808 byte *buf = static_cast<byte *>(ut_malloc_nokey(UNIV_PAGE_SIZE));
6809
6810 ut_a(buf != nullptr);
6811
6812 for (auto &elem : m_spaces) {
6813 auto space = elem.second;
6814
6815 if (space->purpose == FIL_TYPE_LOG) {
6816 /* ignore redo log tablespace */
6817 continue;
6818 }
6819
6820 ut_a(space->purpose == FIL_TYPE_TABLESPACE);
6821
6822 /* No need to protect with a mutex, because this is
6823 a single-threaded operation */
6824
6825 mutex_release();
6826
6827 dberr_t error;
6828
6829 const page_size_t page_size(space->flags);
6830
6831 error = fil_read(page_id_t(space->id, 0), page_size, 0,
6832 page_size.physical(), buf);
6833
6834 ut_a(error == DB_SUCCESS);
6835
6836 ulint size_in_header;
6837
6838 size_in_header = fsp_header_get_field(buf, FSP_SIZE);
6839
6840 bool success;
6841
6842 success = space_extend(space, size_in_header);
6843
6844 if (!success) {
6845 ib::error(ER_IB_MSG_321)
6846 << "Could not extend the tablespace of " << space->name
6847 << " to the size stored in"
6848 " header, "
6849 << size_in_header
6850 << " pages;"
6851 " size after extension "
6852 << 0
6853 << " pages. Check that you have free disk"
6854 " space and retry!";
6855
6856 ut_a(success);
6857 }
6858
6859 mutex_acquire();
6860 }
6861
6862 ut_free(buf);
6863 }
6864
6865 /** Extends all tablespaces to the size stored in the space header. During the
6866 mysqlbackup --apply-log phase we extended the spaces on-demand so that log
6867 records could be applied, but that may have left spaces still too small
6868 compared to the size stored in the space header. */
meb_extend_tablespaces_to_stored_len()6869 void meb_extend_tablespaces_to_stored_len() {
6870 fil_system->meb_extend_tablespaces_to_stored_len();
6871 }
6872
6873 bool meb_is_redo_log_only_restore = false;
6874
6875 /** Determine if file is intermediate / temporary. These files are
6876 created during reorganize partition, rename tables, add / drop columns etc.
6877 @param[in] filepath absolute / relative or simply file name
6878 @retvalue true if it is intermediate file
6879 @retvalue false if it is normal file */
meb_is_intermediate_file(const std::string & filepath)6880 bool meb_is_intermediate_file(const std::string &filepath) {
6881 std::string file_name = filepath;
6882
6883 {
6884 /** If its redo only restore, apply log needs to got through the
6885 intermediate steps to apply a ddl.
6886 Some of these operation might result in intermediate files.
6887 */
6888 if (meb_is_redo_log_only_restore) return false;
6889 /* extract file name from relative or absolute file name */
6890 auto pos = file_name.rfind(OS_PATH_SEPARATOR);
6891
6892 if (pos != std::string::npos) {
6893 ++pos;
6894 file_name = file_name.substr(pos);
6895 }
6896 }
6897
6898 transform(file_name.begin(), file_name.end(), file_name.begin(), ::tolower);
6899
6900 if (file_name[0] != '#') {
6901 auto pos = file_name.rfind("#tmp#.ibd");
6902 if (pos != std::string::npos) {
6903 return (true);
6904 } else {
6905 return (false); /* normal file name */
6906 }
6907 }
6908
6909 static std::vector<std::string> prefixes = {"#sql-", "#sql2-", "#tmp#",
6910 "#ren#"};
6911
6912 /* search for the unsupported patterns */
6913 for (const auto &prefix : prefixes) {
6914 if (Fil_path::has_prefix(file_name, prefix)) {
6915 return (true);
6916 }
6917 }
6918
6919 return (false);
6920 }
6921
6922 /** Return the space ID based of the remote general tablespace name.
6923 This is a wrapper over fil_space_get_id_by_name() method. it means,
6924 the tablespace must be found in the tablespace memory cache.
6925 This method extracts the tablespace name from input parameters and checks if
6926 it has been loaded in memory cache through either any of the remote general
6927 tablespaces directories identified at the time memory cache created.
6928 @param[in, out] tablespace Tablespace name
6929 @return space ID if tablespace found, SPACE_UNKNOWN if not found. */
meb_fil_space_get_rem_gen_ts_id_by_name(std::string & tablespace)6930 space_id_t meb_fil_space_get_rem_gen_ts_id_by_name(std::string &tablespace) {
6931 space_id_t space_id = SPACE_UNKNOWN;
6932
6933 for (auto newpath : rem_gen_ts_dirs) {
6934 auto pos = tablespace.rfind(OS_PATH_SEPARATOR);
6935
6936 if (pos == std::string::npos) {
6937 break;
6938 }
6939
6940 newpath += tablespace.substr(pos);
6941
6942 space_id = fil_space_get_id_by_name(newpath.c_str());
6943
6944 if (space_id != SPACE_UNKNOWN) {
6945 tablespace = newpath;
6946 break;
6947 }
6948 }
6949
6950 return (space_id);
6951 }
6952
6953 /** Tablespace item during recovery */
6954 struct MEB_file_name {
6955 /** Constructor */
MEB_file_nameMEB_file_name6956 MEB_file_name(std::string name, bool deleted)
6957 : m_name(name), m_space(), m_deleted(deleted) {}
6958
6959 /** Tablespace file name (MLOG_FILE_NAME) */
6960 std::string m_name;
6961
6962 /** Tablespace object (NULL if not valid or not found) */
6963 fil_space_t *m_space;
6964
6965 /** Whether the tablespace has been deleted */
6966 bool m_deleted;
6967 };
6968
6969 /** Map of dirty tablespaces during recovery */
6970 using MEB_recv_spaces =
6971 std::map<space_id_t, MEB_file_name, std::less<space_id_t>,
6972 ut_allocator<std::pair<const space_id_t, MEB_file_name>>>;
6973
6974 static MEB_recv_spaces recv_spaces;
6975
6976 /** Checks if MEB has loaded this space for reovery.
6977 @param[in] space_id Tablespace ID
6978 @return true if the space_id is loaded */
meb_is_space_loaded(const space_id_t space_id)6979 bool meb_is_space_loaded(const space_id_t space_id) {
6980 return (recv_spaces.find(space_id) != recv_spaces.end());
6981 }
6982
6983 /** Set the keys for an encrypted tablespace.
6984 @param[in] space Tablespace for which to set the key */
meb_set_encryption_key(const fil_space_t * space)6985 static void meb_set_encryption_key(const fil_space_t *space) {
6986 ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
6987
6988 for (auto &key : *recv_sys->keys) {
6989 if (key.space_id != space->id) {
6990 continue;
6991 }
6992
6993 dberr_t err;
6994
6995 err = fil_set_encryption(space->id, Encryption::AES, key.ptr, key.iv);
6996
6997 if (err != DB_SUCCESS) {
6998 ib::error(ER_IB_MSG_322) << "Can't set encryption information"
6999 << " for tablespace" << space->name << "!";
7000 }
7001
7002 ut_free(key.iv);
7003 ut_free(key.ptr);
7004
7005 key.iv = nullptr;
7006 key.ptr = nullptr;
7007 key.space_id = 0;
7008 }
7009 }
7010
7011 /** Process a file name passed as an input
7012 Wrapper around meb_name_process()
7013 @param[in,out] name absolute path of tablespace file
7014 @param[in] space_id The tablespace ID
7015 @param[in] deleted true if MLOG_FILE_DELETE */
meb_name_process(char * name,space_id_t space_id,bool deleted)7016 void Fil_system::meb_name_process(char *name, space_id_t space_id,
7017 bool deleted) {
7018 ut_ad(space_id != TRX_SYS_SPACE);
7019
7020 /* We will also insert space=nullptr into the map, so that
7021 further checks can ensure that a MLOG_FILE_NAME record was
7022 scanned before applying any page records for the space_id. */
7023
7024 Fil_path::normalize(name);
7025
7026 size_t len = std::strlen(name);
7027
7028 MEB_file_name fname(std::string(name, len - 1), deleted);
7029
7030 auto p = recv_spaces.insert(std::make_pair(space_id, fname));
7031
7032 ut_ad(p.first->first == space_id);
7033
7034 MEB_file_name &f = p.first->second;
7035
7036 if (deleted) {
7037 /* Got MLOG_FILE_DELETE */
7038
7039 if (!p.second && !f.m_deleted) {
7040 f.m_deleted = true;
7041
7042 if (f.m_space != nullptr) {
7043 f.m_space = nullptr;
7044 }
7045 }
7046
7047 ut_ad(f.m_space == nullptr);
7048
7049 } else if (p.second || f.m_name != fname.m_name) {
7050 fil_space_t *space;
7051
7052 /* Check if the tablespace file exists and contains
7053 the space_id. If not, ignore the file after displaying
7054 a note. Abort if there are multiple files with the
7055 same space_id. */
7056
7057 switch (ibd_open_for_recovery(space_id, name, space)) {
7058 case FIL_LOAD_OK:
7059 ut_ad(space != nullptr);
7060
7061 /* For encrypted tablespace, set key and iv. */
7062 if (FSP_FLAGS_GET_ENCRYPTION(space->flags) &&
7063 recv_sys->keys != nullptr) {
7064 meb_set_encryption_key(space);
7065 }
7066
7067 if (f.m_space == nullptr || f.m_space == space) {
7068 f.m_name = fname.m_name;
7069 f.m_space = space;
7070 f.m_deleted = false;
7071
7072 } else {
7073 ib::error(ER_IB_MSG_323)
7074 << "Tablespace " << space_id << " has been found in two places: '"
7075 << f.m_name << "' and '" << name
7076 << "'."
7077 " You must delete one of them.";
7078
7079 recv_sys->found_corrupt_fs = true;
7080 }
7081 break;
7082
7083 case FIL_LOAD_ID_CHANGED:
7084 ut_ad(space == nullptr);
7085
7086 ib::trace_1() << "Ignoring file " << name << " for space-id mismatch "
7087 << space_id;
7088 break;
7089
7090 case FIL_LOAD_NOT_FOUND:
7091 /* No matching tablespace was found; maybe it
7092 was renamed, and we will find a subsequent
7093 MLOG_FILE_* record. */
7094 ut_ad(space == nullptr);
7095 break;
7096
7097 case FIL_LOAD_INVALID:
7098 ut_ad(space == nullptr);
7099
7100 ib::warn(ER_IB_MSG_324) << "Invalid tablespace " << name;
7101 break;
7102
7103 case FIL_LOAD_MISMATCH:
7104 ut_ad(space == nullptr);
7105 break;
7106 }
7107 }
7108 }
7109
7110 /** Process a file name passed as an input
7111 Wrapper around meb_name_process()
7112 @param[in] name absolute path of tablespace file
7113 @param[in] space_id the tablespace ID */
meb_fil_name_process(const char * name,space_id_t space_id)7114 void meb_fil_name_process(const char *name, space_id_t space_id) {
7115 char *file_name = static_cast<char *>(mem_strdup(name));
7116
7117 fil_system->meb_name_process(file_name, space_id, false);
7118
7119 ut_free(file_name);
7120 }
7121
7122 /** Test, if a file path name contains a back-link ("../").
7123 We assume a path to a file. So we don't check for a trailing "/..".
7124 @param[in] path path to check
7125 @return whether the path contains a back-link.
7126 */
meb_has_back_link(const std::string & path)7127 static bool meb_has_back_link(const std::string &path) {
7128 #ifdef _WIN32
7129 static const std::string DOT_DOT_SLASH = "..\\";
7130 static const std::string SLASH_DOT_DOT_SLASH = "\\..\\";
7131 #else
7132 static const std::string DOT_DOT_SLASH = "../";
7133 static const std::string SLASH_DOT_DOT_SLASH = "/../";
7134 #endif /* _WIN32 */
7135 return ((0 == path.compare(0, 3, DOT_DOT_SLASH)) ||
7136 (std::string::npos != path.find(SLASH_DOT_DOT_SLASH)));
7137 }
7138
7139 /** Parse a file name retrieved from a MLOG_FILE_* record,
7140 and return the absolute file path corresponds to backup dir
7141 as well as in the form of database/tablespace
7142 @param[in] name path emitted by the redo log
7143 @param[in] flags flags emitted by the redo log
7144 @param[in] space_id space_id emmited by the redo log
7145 @param[out] absolute_path absolute path of tablespace
7146 corresponds to target dir
7147 @param[out] tablespace_name name in the form of database/table */
meb_make_abs_file_path(const std::string & name,uint32_t flags,space_id_t space_id,std::string & absolute_path,std::string & tablespace_name)7148 static void meb_make_abs_file_path(const std::string &name, uint32_t flags,
7149 space_id_t space_id,
7150 std::string &absolute_path,
7151 std::string &tablespace_name) {
7152 Datafile df;
7153 std::string file_name = name;
7154
7155 /* If the tablespace path name is absolute or has back-links ("../"),
7156 we assume, that it is located outside of datadir. */
7157 if (Fil_path::is_absolute_path(file_name.c_str()) ||
7158 (meb_has_back_link(file_name) && !replay_in_datadir)) {
7159 if (replay_in_datadir) {
7160 /* This is an apply-log in the restored datadir. Take the path as is. */
7161 df.set_filepath(file_name.c_str());
7162 } else {
7163 /* This is an apply-log in backup_dir/datadir. Get the file inside. */
7164 auto pos = file_name.rfind(OS_PATH_SEPARATOR);
7165
7166 /* if it is file per tablespace, then include the schema
7167 directory as well */
7168 if (fsp_is_file_per_table(space_id, flags) && pos != std::string::npos) {
7169 pos = file_name.rfind(OS_PATH_SEPARATOR, pos - 1);
7170 }
7171
7172 if (pos == std::string::npos) {
7173 ib::fatal(ER_IB_MSG_325)
7174 << "Could not extract the tabelspace"
7175 << " file name from the in the path : " << name;
7176 }
7177
7178 ++pos;
7179
7180 file_name = file_name.substr(pos);
7181
7182 df.make_filepath(MySQL_datadir_path, file_name.c_str(), IBD);
7183 }
7184
7185 } else {
7186 /* This is an apply-log with a relative path, either in the restored
7187 datadir, or in backup_dir/datadir. If in the restored datadir, the
7188 path might start with "../" to reach outside of datadir. */
7189 auto pos = file_name.find(OS_PATH_SEPARATOR);
7190
7191 /* Remove the cur dir from the path as this will cause the
7192 path name mismatch when we try to find out the space_id based
7193 on tablespace name */
7194
7195 if (file_name.substr(0, pos) == ".") {
7196 ++pos;
7197 file_name = file_name.substr(pos);
7198 }
7199
7200 /* make_filepath() does not prepend the directory, if the file name
7201 starts with "../". Prepend it unconditionally here. */
7202 file_name.insert(0, 1, OS_PATH_SEPARATOR);
7203 file_name.insert(0, MySQL_datadir_path);
7204
7205 df.make_filepath(nullptr, file_name.c_str(), IBD);
7206 }
7207
7208 df.set_flags(flags);
7209 df.set_space_id(space_id);
7210 df.set_name(nullptr);
7211
7212 absolute_path = df.filepath();
7213
7214 tablespace_name = df.name();
7215 }
7216
7217 /** Process a MLOG_FILE_CREATE redo record.
7218 @param[in] page_id Page id of the redo log record
7219 @param[in] flags Tablespace flags
7220 @param[in] name Tablespace filename */
meb_tablespace_redo_create(const page_id_t & page_id,uint32_t flags,const char * name)7221 static void meb_tablespace_redo_create(const page_id_t &page_id, uint32_t flags,
7222 const char *name) {
7223 std::string abs_file_path;
7224 std::string tablespace_name;
7225
7226 meb_make_abs_file_path(name, flags, page_id.space(), abs_file_path,
7227 tablespace_name);
7228
7229 if (meb_is_intermediate_file(abs_file_path.c_str()) ||
7230 fil_space_get(page_id.space()) ||
7231 fil_space_get_id_by_name(tablespace_name.c_str()) != SPACE_UNKNOWN ||
7232 meb_fil_space_get_rem_gen_ts_id_by_name(tablespace_name) !=
7233 SPACE_UNKNOWN) {
7234 /* Don't create table while :-
7235 1. scanning the redo logs during backup
7236 2. apply-log on a partial backup
7237 3. if it is intermediate file
7238 4. tablespace is already loaded in memory
7239 5. tablespace is a remote general tablespace which is
7240 already loaded for recovery/apply-log from different
7241 directory path */
7242
7243 ib::trace_1() << "Ignoring the log record. No need to "
7244 << "create the tablespace : " << abs_file_path;
7245 } else {
7246 auto it = recv_spaces.find(page_id.space());
7247
7248 if (it == recv_spaces.end() || it->second.m_name != abs_file_path) {
7249 ib::trace_1() << "Creating the tablespace : " << abs_file_path
7250 << ", space_id : " << page_id.space();
7251
7252 dberr_t ret = fil_ibd_create(page_id.space(), tablespace_name.c_str(),
7253 abs_file_path.c_str(), flags,
7254 FIL_IBD_FILE_INITIAL_SIZE);
7255
7256 if (ret != DB_SUCCESS) {
7257 ib::fatal(ER_IB_MSG_326)
7258 << "Could not create the tablespace : " << abs_file_path
7259 << " with space Id : " << page_id.space();
7260 }
7261 }
7262 }
7263 }
7264
7265 /** Process a MLOG_FILE_RENAME redo record.
7266 @param[in] page_id Page id of the redo log record
7267 @param[in] from_name Tablespace from filename
7268 @param[in] to_name Tablespace to filename */
meb_tablespace_redo_rename(const page_id_t & page_id,const char * from_name,const char * to_name)7269 static void meb_tablespace_redo_rename(const page_id_t &page_id,
7270 const char *from_name,
7271 const char *to_name) {
7272 std::string abs_to_path;
7273 std::string abs_from_path;
7274 std::string tablespace_name;
7275
7276 meb_make_abs_file_path(from_name, 0, page_id.space(), abs_from_path,
7277 tablespace_name);
7278
7279 meb_make_abs_file_path(to_name, 0, page_id.space(), abs_to_path,
7280 tablespace_name);
7281
7282 char *new_name = nullptr;
7283
7284 if (meb_is_intermediate_file(from_name) ||
7285 meb_is_intermediate_file(to_name) ||
7286 fil_space_get_id_by_name(tablespace_name.c_str()) != SPACE_UNKNOWN ||
7287 meb_fil_space_get_rem_gen_ts_id_by_name(tablespace_name) !=
7288 SPACE_UNKNOWN ||
7289 fil_space_get(page_id.space()) == nullptr) {
7290 /* Don't rename table while :
7291 1. Scanning the redo logs during backup
7292 2. Apply-log on a partial backup
7293 3. Either of old or new tables are intermediate table
7294 4. The new name is already loaded for recovery/apply-log
7295 5. The new name is a remote general tablespace which is
7296 already loaded for recovery/apply-log from different
7297 directory path
7298 6. Tablespace is not yet loaded in memory.
7299 This will prevent unintended renames during recovery. */
7300
7301 ib::trace_1() << "Ignoring the log record. "
7302 << "No need to rename tablespace";
7303
7304 return;
7305
7306 } else {
7307 ib::trace_1() << "Renaming space id : " << page_id.space()
7308 << ", old tablespace name : " << from_name
7309 << " to new tablespace name : " << to_name;
7310
7311 new_name = static_cast<char *>(mem_strdup(abs_to_path.c_str()));
7312 }
7313
7314 meb_fil_name_process(from_name, page_id.space());
7315 meb_fil_name_process(new_name, page_id.space());
7316
7317 if (!fil_op_replay_rename(page_id, abs_from_path.c_str(),
7318 abs_to_path.c_str())) {
7319 recv_sys->found_corrupt_fs = true;
7320 }
7321
7322 meb_fil_name_process(to_name, page_id.space());
7323
7324 ut_free(new_name);
7325 }
7326
7327 /** Process a MLOG_FILE_DELETE redo record.
7328 @param[in] page_id Page id of the redo log record
7329 @param[in] name Tablespace filename */
meb_tablespace_redo_delete(const page_id_t & page_id,const char * name)7330 static void meb_tablespace_redo_delete(const page_id_t &page_id,
7331 const char *name) {
7332 std::string abs_file_path;
7333 std::string tablespace_name;
7334
7335 meb_make_abs_file_path(name, 0, page_id.space(), abs_file_path,
7336 tablespace_name);
7337
7338 char *file_name = static_cast<char *>(mem_strdup(name));
7339
7340 fil_system->meb_name_process(file_name, page_id.space(), true);
7341
7342 if (fil_space_get(page_id.space())) {
7343 ib::trace_1() << "Deleting the tablespace : " << abs_file_path
7344 << ", space_id : " << page_id.space();
7345 dberr_t err =
7346 fil_delete_tablespace(page_id.space(), BUF_REMOVE_FLUSH_NO_WRITE);
7347
7348 ut_a(err == DB_SUCCESS);
7349 }
7350
7351 ut_free(file_name);
7352 }
7353
7354 #endif /* UNIV_HOTBACKUP */
7355
7356 /*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
7357
7358 /** Tries to reserve free extents in a file space.
7359 @param[in] space_id Tablespace ID
7360 @param[in] n_free_now Number of free extents now
7361 @param[in] n_to_reserve How many one wants to reserve
7362 @return true if succeed */
fil_space_reserve_free_extents(space_id_t space_id,ulint n_free_now,ulint n_to_reserve)7363 bool fil_space_reserve_free_extents(space_id_t space_id, ulint n_free_now,
7364 ulint n_to_reserve) {
7365 auto shard = fil_system->shard_by_id(space_id);
7366
7367 shard->mutex_acquire();
7368
7369 fil_space_t *space = shard->get_space_by_id(space_id);
7370
7371 bool success;
7372
7373 if (space->n_reserved_extents + n_to_reserve > n_free_now) {
7374 success = false;
7375 } else {
7376 ut_a(n_to_reserve < std::numeric_limits<uint32_t>::max());
7377 space->n_reserved_extents += (uint32_t)n_to_reserve;
7378 success = true;
7379 }
7380
7381 shard->mutex_release();
7382
7383 return (success);
7384 }
7385
7386 /** Releases free extents in a file space.
7387 @param[in] space_id Tablespace ID
7388 @param[in] n_reserved How many were reserved */
fil_space_release_free_extents(space_id_t space_id,ulint n_reserved)7389 void fil_space_release_free_extents(space_id_t space_id, ulint n_reserved) {
7390 auto shard = fil_system->shard_by_id(space_id);
7391
7392 shard->mutex_acquire();
7393
7394 fil_space_t *space = shard->get_space_by_id(space_id);
7395
7396 ut_a(n_reserved < std::numeric_limits<uint32_t>::max());
7397 ut_a(space->n_reserved_extents >= n_reserved);
7398
7399 space->n_reserved_extents -= (uint32_t)n_reserved;
7400
7401 shard->mutex_release();
7402 }
7403
7404 /** Gets the number of reserved extents. If the database is silent, this number
7405 should be zero.
7406 @param[in] space_id Tablespace ID
7407 @return the number of reserved extents */
fil_space_get_n_reserved_extents(space_id_t space_id)7408 ulint fil_space_get_n_reserved_extents(space_id_t space_id) {
7409 auto shard = fil_system->shard_by_id(space_id);
7410
7411 shard->mutex_acquire();
7412
7413 fil_space_t *space = shard->get_space_by_id(space_id);
7414
7415 ulint n = space->n_reserved_extents;
7416
7417 shard->mutex_release();
7418
7419 return (n);
7420 }
7421
7422 /*============================ FILE I/O ================================*/
7423
prepare_file_for_io(fil_node_t * file,bool extend)7424 bool Fil_shard::prepare_file_for_io(fil_node_t *file, bool extend) {
7425 ut_ad(mutex_owned());
7426
7427 fil_space_t *space = file->space;
7428
7429 if (s_n_open > fil_system->m_max_n_open + 5) {
7430 static ulint prev_time;
7431 auto curr_time = ut_time_monotonic();
7432
7433 /* Spam the log after every minute. Ignore any race here. */
7434
7435 if ((curr_time - prev_time) > 60) {
7436 ib::warn(ER_IB_MSG_327)
7437 << "Open files " << s_n_open.load() << " exceeds the limit "
7438 << fil_system->m_max_n_open;
7439
7440 prev_time = curr_time;
7441 }
7442 }
7443
7444 if (space->is_deleted()) {
7445 return (false);
7446 }
7447
7448 if (!file->is_open) {
7449 ut_a(file->n_pending == 0);
7450
7451 if (!open_file(file, extend)) {
7452 return (false);
7453 }
7454 }
7455
7456 if (file->n_pending == 0 && Fil_system::space_belongs_in_LRU(space)) {
7457 /* The file is in the LRU list, remove it */
7458
7459 ut_a(UT_LIST_GET_LEN(m_LRU) > 0);
7460
7461 UT_LIST_REMOVE(m_LRU, file);
7462 }
7463
7464 ++file->n_pending;
7465
7466 return (true);
7467 }
7468
7469 /** If the tablespace is not on the unflushed list, add it.
7470 @param[in,out] space Tablespace to add */
add_to_unflushed_list(fil_space_t * space)7471 void Fil_shard::add_to_unflushed_list(fil_space_t *space) {
7472 ut_ad(m_id == REDO_SHARD || mutex_owned());
7473
7474 if (!space->is_in_unflushed_spaces) {
7475 space->is_in_unflushed_spaces = true;
7476
7477 UT_LIST_ADD_FIRST(m_unflushed_spaces, space);
7478 }
7479 }
7480
7481 /** Note that a write IO has completed.
7482 @param[in,out] file File on which a write was completed */
write_completed(fil_node_t * file)7483 void Fil_shard::write_completed(fil_node_t *file) {
7484 ut_ad(m_id == REDO_SHARD || mutex_owned());
7485
7486 ++m_modification_counter;
7487
7488 file->modification_counter = m_modification_counter;
7489
7490 if (fil_buffering_disabled(file->space)) {
7491 /* We don't need to keep track of unflushed
7492 changes as user has explicitly disabled
7493 buffering. */
7494 ut_ad(!file->space->is_in_unflushed_spaces);
7495
7496 file->flush_counter = file->modification_counter;
7497
7498 } else {
7499 add_to_unflushed_list(file->space);
7500 }
7501 }
7502
7503 /** Updates the data structures when an I/O operation finishes. Updates the
7504 pending i/o's field in the file appropriately.
7505 @param[in] file Tablespace file
7506 @param[in] type Marks the file as modified if type == WRITE */
complete_io(fil_node_t * file,const IORequest & type)7507 void Fil_shard::complete_io(fil_node_t *file, const IORequest &type) {
7508 ut_ad(m_id == REDO_SHARD || mutex_owned());
7509
7510 ut_a(file->n_pending > 0);
7511
7512 --file->n_pending;
7513
7514 ut_ad(type.validate());
7515
7516 if (type.is_write()) {
7517 ut_ad(!srv_read_only_mode || fsp_is_system_temporary(file->space->id));
7518
7519 write_completed(file);
7520 }
7521
7522 if (file->n_pending == 0 && Fil_system::space_belongs_in_LRU(file->space)) {
7523 /* The file must be put back to the LRU list */
7524 UT_LIST_ADD_FIRST(m_LRU, file);
7525 }
7526 }
7527
7528 /** Report information about an invalid page access.
7529 @param[in] block_offset Block offset
7530 @param[in] space_id Tablespace ID
7531 @param[in] space_name Tablespace name
7532 @param[in] byte_offset Byte offset
7533 @param[in] len I/O length
7534 @param[in] is_read I/O type
7535 @param[in] line Line called from */
fil_report_invalid_page_access_low(page_no_t block_offset,space_id_t space_id,const char * space_name,ulint byte_offset,ulint len,bool is_read,int line)7536 static void fil_report_invalid_page_access_low(page_no_t block_offset,
7537 space_id_t space_id,
7538 const char *space_name,
7539 ulint byte_offset, ulint len,
7540 bool is_read, int line) {
7541 ib::error(ER_IB_MSG_328)
7542 << "Trying to access page number " << block_offset
7543 << " in"
7544 " space "
7545 << space_id << ", space name " << space_name
7546 << ","
7547 " which is outside the tablespace bounds. Byte offset "
7548 << byte_offset << ", len " << len << ", i/o type "
7549 << (is_read ? "read" : "write")
7550 << ". If you get this error at mysqld startup, please check"
7551 " that your my.cnf matches the ibdata files that you have in"
7552 " the MySQL server.";
7553
7554 ib::error(ER_IB_MSG_329) << "Server exits"
7555 #ifdef UNIV_DEBUG
7556 << " at "
7557 << "fil0fil.cc"
7558 << "[" << line << "]"
7559 #endif /* UNIV_DEBUG */
7560 << ".";
7561
7562 ut_error;
7563 }
7564
7565 #define fil_report_invalid_page_access(b, s, n, o, l, t) \
7566 fil_report_invalid_page_access_low((b), (s), (n), (o), (l), (t), __LINE__)
7567
7568 /** Set encryption information for IORequest.
7569 @param[in,out] req_type IO request
7570 @param[in] page_id page id
7571 @param[in] space table space */
fil_io_set_encryption(IORequest & req_type,const page_id_t & page_id,fil_space_t * space)7572 void fil_io_set_encryption(IORequest &req_type, const page_id_t &page_id,
7573 fil_space_t *space) {
7574 /* Don't encrypt pages of system tablespace upto TRX_SYS_PAGE(including). The
7575 doublewrite buffer header is on TRX_SYS_PAGE */
7576 if (fsp_is_system_tablespace(space->id) &&
7577 page_id.page_no() <= FSP_TRX_SYS_PAGE_NO) {
7578 req_type.clear_encrypted();
7579 return;
7580 }
7581
7582 /* Don't encrypt page 0 of all tablespaces except redo log
7583 tablespace, all pages from the system tablespace. */
7584 if (space->encryption_type == Encryption::NONE ||
7585 (space->encryption_op_in_progress == DECRYPTION && req_type.is_write()) ||
7586 (page_id.page_no() == 0 && !req_type.is_log())) {
7587 req_type.clear_encrypted();
7588 return;
7589 }
7590
7591 /* For writting redo log, if encryption for redo log is disabled,
7592 skip set encryption. */
7593 if (req_type.is_log() && req_type.is_write() && !srv_redo_log_encrypt) {
7594 req_type.clear_encrypted();
7595 return;
7596 }
7597
7598 /* For writting undo log, if encryption for undo log is disabled,
7599 skip set encryption. */
7600 if (fsp_is_undo_tablespace(space->id) && !srv_undo_log_encrypt &&
7601 req_type.is_write()) {
7602 req_type.clear_encrypted();
7603 return;
7604 }
7605
7606 req_type.encryption_key(space->encryption_key, space->encryption_klen,
7607 space->encryption_iv);
7608
7609 req_type.encryption_algorithm(Encryption::AES);
7610 }
7611
7612 /** Get the AIO mode.
7613 @param[in] req_type IO request type
7614 @param[in] sync true if Synchronous IO
7615 return the AIO mode */
get_AIO_mode(const IORequest & req_type,bool sync)7616 AIO_mode Fil_shard::get_AIO_mode(const IORequest &req_type, bool sync) {
7617 #ifndef UNIV_HOTBACKUP
7618 if (sync) {
7619 return (AIO_mode::SYNC);
7620
7621 } else if (req_type.is_log()) {
7622 return (AIO_mode::LOG);
7623
7624 } else {
7625 return (AIO_mode::NORMAL);
7626 }
7627 #else /* !UNIV_HOTBACKUP */
7628 ut_a(sync);
7629 return (AIO_mode::SYNC);
7630 #endif /* !UNIV_HOTBACKUP */
7631 }
7632
get_file_for_io(const IORequest & req_type,fil_space_t * space,page_no_t * page_no,fil_node_t * & file)7633 dberr_t Fil_shard::get_file_for_io(const IORequest &req_type,
7634 fil_space_t *space, page_no_t *page_no,
7635 fil_node_t *&file) {
7636 if (space->files.size() > 1) {
7637 ut_a(space->id == TRX_SYS_SPACE || space->purpose == FIL_TYPE_TEMPORARY ||
7638 space->id == dict_sys_t::s_log_space_first_id);
7639
7640 for (auto &f : space->files) {
7641 if (f.size > *page_no) {
7642 file = &f;
7643 return (DB_SUCCESS);
7644 }
7645
7646 *page_no -= f.size;
7647 }
7648
7649 } else if (!space->files.empty()) {
7650 fil_node_t &f = space->files.front();
7651
7652 file = &f;
7653
7654 return (DB_SUCCESS);
7655 }
7656
7657 file = nullptr;
7658 return (DB_ERROR);
7659 }
7660
7661 /** Read or write log file data synchronously.
7662 @param[in] type IO context
7663 @param[in] page_id page id
7664 @param[in] page_size page size
7665 @param[in] byte_offset remainder of offset in bytes; in AIO
7666 this must be divisible by the OS block
7667 size
7668 @param[in] len how many bytes to read or write; this
7669 must not cross a file boundary; in AIO
7670 this must be a block size multiple
7671 @param[in,out] buf buffer where to store read data or
7672 from where to write
7673 @return error code
7674 @retval DB_SUCCESS on success */
do_redo_io(const IORequest & type,const page_id_t & page_id,const page_size_t & page_size,ulint byte_offset,ulint len,void * buf)7675 dberr_t Fil_shard::do_redo_io(const IORequest &type, const page_id_t &page_id,
7676 const page_size_t &page_size, ulint byte_offset,
7677 ulint len, void *buf) {
7678 IORequest req_type(type);
7679
7680 ut_ad(len > 0);
7681 ut_ad(req_type.is_log());
7682 ut_ad(req_type.validate());
7683 ut_ad(fil_validate_skip());
7684 ut_ad(byte_offset < UNIV_PAGE_SIZE);
7685 ut_ad(UNIV_PAGE_SIZE == (ulong)(1 << UNIV_PAGE_SIZE_SHIFT));
7686
7687 #ifndef UNIV_HOTBACKUP
7688 if (req_type.is_read()) {
7689 srv_stats.data_read.add(len);
7690
7691 } else if (req_type.is_write()) {
7692 ut_ad(!srv_read_only_mode);
7693 srv_stats.data_written.add(len);
7694 }
7695 #endif
7696
7697 fil_space_t *space = get_space_by_id(page_id.space());
7698
7699 fil_node_t *file;
7700 page_no_t page_no = page_id.page_no();
7701 dberr_t err = get_file_for_io(req_type, space, &page_no, file);
7702
7703 ut_a(file != nullptr);
7704 ut_a(err == DB_SUCCESS);
7705 ut_a(page_size.physical() == page_size.logical());
7706
7707 os_offset_t offset = (os_offset_t)page_no * page_size.physical();
7708
7709 offset += byte_offset;
7710
7711 ut_a(file->size - page_no >=
7712 (byte_offset + len + (page_size.physical() - 1)) / page_size.physical());
7713
7714 ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
7715 ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
7716
7717 /* Set encryption information. */
7718 fil_io_set_encryption(req_type, page_id, space);
7719
7720 req_type.block_size(file->block_size);
7721
7722 if (!file->is_open) {
7723 ut_a(file->n_pending == 0);
7724
7725 bool success = open_file(file, false);
7726
7727 ut_a(success);
7728 }
7729
7730 if (req_type.is_read()) {
7731 err = os_file_read(req_type, file->name, file->handle, buf, offset, len);
7732
7733 } else {
7734 ut_ad(!srv_read_only_mode);
7735
7736 err = os_file_write(req_type, file->name, file->handle, buf, offset, len);
7737 }
7738
7739 if (type.is_write()) {
7740 mutex_acquire();
7741
7742 ++m_modification_counter;
7743
7744 file->modification_counter = m_modification_counter;
7745
7746 add_to_unflushed_list(file->space);
7747
7748 mutex_release();
7749 }
7750
7751 return (err);
7752 }
7753
do_io(const IORequest & type,bool sync,const page_id_t & page_id,const page_size_t & page_size,ulint byte_offset,ulint len,void * buf,void * message)7754 dberr_t Fil_shard::do_io(const IORequest &type, bool sync,
7755 const page_id_t &page_id, const page_size_t &page_size,
7756 ulint byte_offset, ulint len, void *buf,
7757 void *message) {
7758 IORequest req_type(type);
7759
7760 ut_ad(req_type.validate());
7761
7762 ut_ad(len > 0);
7763 ut_ad(byte_offset < UNIV_PAGE_SIZE);
7764 ut_ad(!page_size.is_compressed() || byte_offset == 0);
7765 ut_ad(UNIV_PAGE_SIZE == (ulong)(1 << UNIV_PAGE_SIZE_SHIFT));
7766
7767 ut_ad(fil_validate_skip());
7768
7769 #ifndef UNIV_HOTBACKUP
7770 /* ibuf bitmap pages must be read in the sync AIO mode: */
7771 ut_ad(recv_no_ibuf_operations || req_type.is_write() ||
7772 !ibuf_bitmap_page(page_id, page_size) || sync || req_type.is_log());
7773
7774 auto aio_mode = get_AIO_mode(req_type, sync);
7775
7776 if (req_type.is_read()) {
7777 srv_stats.data_read.add(len);
7778
7779 if (aio_mode == AIO_mode::NORMAL && !recv_no_ibuf_operations &&
7780 ibuf_page(page_id, page_size, nullptr)) {
7781 /* Reduce probability of deadlock bugs
7782 in connection with ibuf: do not let the
7783 ibuf I/O handler sleep */
7784
7785 req_type.clear_do_not_wake();
7786
7787 aio_mode = AIO_mode::IBUF;
7788 }
7789
7790 #ifdef UNIV_DEBUG
7791 /* Should never attempt to read from a deleted tablespace. */
7792 for (auto pair : m_deleted) {
7793 ut_ad(pair.first != page_id.space());
7794 }
7795 #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
7796
7797 } else if (req_type.is_write()) {
7798 ut_ad(!srv_read_only_mode || fsp_is_system_temporary(page_id.space()));
7799
7800 srv_stats.data_written.add(len);
7801 }
7802 #else /* !UNIV_HOTBACKUP */
7803 ut_a(sync);
7804 auto aio_mode = AIO_mode::SYNC;
7805 #endif /* !UNIV_HOTBACKUP */
7806
7807 /* Reserve the mutex and make sure that we can open at
7808 least one file while holding it, if the file is not already open */
7809
7810 fil_space_t *space;
7811
7812 bool slot = mutex_acquire_and_get_space(page_id.space(), space);
7813
7814 /* If we are deleting a tablespace we don't allow async read
7815 operations on that. However, we do allow write operations and
7816 sync read operations. */
7817 if (space == nullptr ||
7818 (req_type.is_read() && !sync && space->stop_new_ops)) {
7819 if (slot) {
7820 release_open_slot(m_id);
7821 }
7822
7823 mutex_release();
7824
7825 if (!req_type.ignore_missing()) {
7826 if (space == nullptr) {
7827 ib::error(ER_IB_MSG_330)
7828 << "Trying to do I/O on a tablespace"
7829 << " which does not exist. I/O type: "
7830 << (req_type.is_read() ? "read" : "write") << ", page: " << page_id
7831 << ", I/O length: " << len << " bytes";
7832 } else {
7833 ib::error(ER_IB_MSG_331)
7834 << "Trying to do async read on a"
7835 << " tablespace which is being deleted."
7836 << " Tablespace name: \"" << space->name << "\", page: " << page_id
7837 << ", read length: " << len << " bytes";
7838 }
7839 }
7840
7841 return (DB_TABLESPACE_DELETED);
7842 }
7843
7844 ut_ad(aio_mode != AIO_mode::IBUF || fil_type_is_data(space->purpose));
7845
7846 fil_node_t *file;
7847 auto page_no = page_id.page_no();
7848 auto err = get_file_for_io(req_type, space, &page_no, file);
7849
7850 if (file == nullptr) {
7851 ut_ad(err == DB_ERROR);
7852
7853 if (req_type.ignore_missing()) {
7854 if (slot) {
7855 release_open_slot(m_id);
7856 }
7857
7858 mutex_release();
7859
7860 return (DB_ERROR);
7861 }
7862
7863 /* This is a hard error. */
7864 fil_report_invalid_page_access(page_id.page_no(), page_id.space(),
7865 space->name, byte_offset, len,
7866 req_type.is_read());
7867 }
7868
7869 bool opened = prepare_file_for_io(file, false);
7870
7871 if (slot) {
7872 release_open_slot(m_id);
7873 }
7874
7875 if (!opened) {
7876 #ifndef UNIV_HOTBACKUP
7877 if (space->is_deleted()) {
7878 ut_a(fsp_is_undo_tablespace(space->id));
7879 mutex_release();
7880
7881 if (!sync) {
7882 buf_page_io_complete(static_cast<buf_page_t *>(message), false);
7883 }
7884
7885 return (DB_TABLESPACE_DELETED);
7886 }
7887 #endif /* !UNIV_HOTBACKUP */
7888
7889 if (fil_type_is_data(space->purpose) && fsp_is_ibd_tablespace(space->id)) {
7890 mutex_release();
7891
7892 if (!req_type.ignore_missing()) {
7893 ib::error(ER_IB_MSG_332)
7894 << "Trying to do I/O to a tablespace"
7895 " which exists without an .ibd data"
7896 << " file. I/O type: " << (req_type.is_read() ? "read" : "write")
7897 << ", page: " << page_id_t(page_id.space(), page_no)
7898 << ", I/O length: " << len << " bytes";
7899 }
7900
7901 return (DB_TABLESPACE_DELETED);
7902 }
7903
7904 /* The tablespace is for log. Currently, we just assert here
7905 to prevent handling errors along the way fil_io returns.
7906 Also, if the log files are missing, it would be hard to
7907 promise the server can continue running. */
7908 ut_a(0);
7909 }
7910
7911 /* Check that at least the start offset is within the bounds of a
7912 single-table tablespace, including rollback tablespaces. */
7913 if (file->size <= page_no && space->id != TRX_SYS_SPACE &&
7914 fil_type_is_data(space->purpose)) {
7915 if (req_type.ignore_missing()) {
7916 /* If we can tolerate the non-existent pages, we
7917 should return with DB_ERROR and let caller decide
7918 what to do. */
7919
7920 complete_io(file, req_type);
7921
7922 mutex_release();
7923
7924 return (DB_ERROR);
7925 }
7926
7927 /* Extend the file if the page_no does not fall inside its bounds
7928 because xtrabackup may have copied it when it was smaller */
7929 mutex_release();
7930
7931 bool success = space_extend(space, page_no + 1);
7932
7933 if (!success) {
7934 return (DB_ERROR);
7935 }
7936 } else {
7937 mutex_release();
7938 }
7939
7940 ut_a(page_size.is_compressed() ||
7941 page_size.physical() == page_size.logical());
7942
7943 auto offset = (os_offset_t)page_no * page_size.physical();
7944
7945 offset += byte_offset;
7946
7947 ut_a(file->size - page_no >=
7948 (byte_offset + len + (page_size.physical() - 1)) / page_size.physical());
7949
7950 ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
7951 ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
7952
7953 /* Don't compress the log, page 0 of all tablespaces, tables
7954 compresssed with the old compression scheme and all pages from
7955 the system tablespace. */
7956
7957 if (req_type.is_write() && !req_type.is_log() && !page_size.is_compressed() &&
7958 page_id.page_no() > 0 && IORequest::is_punch_hole_supported() &&
7959 file->punch_hole) {
7960 ut_ad(!req_type.is_log());
7961
7962 req_type.set_punch_hole();
7963
7964 req_type.compression_algorithm(space->compression_type);
7965
7966 } else {
7967 req_type.clear_compressed();
7968 }
7969
7970 /* Set encryption information. */
7971 fil_io_set_encryption(req_type, page_id, space);
7972
7973 req_type.block_size(file->block_size);
7974
7975 #ifdef UNIV_HOTBACKUP
7976 /* In mysqlbackup do normal I/O, not AIO */
7977 if (req_type.is_read()) {
7978 err = os_file_read(req_type, file->name, file->handle, buf, offset, len);
7979
7980 } else {
7981 ut_ad(!srv_read_only_mode || fsp_is_system_temporary(page_id.space()));
7982
7983 err = os_file_write(req_type, file->name, file->handle, buf, offset, len);
7984 }
7985 #else /* UNIV_HOTBACKUP */
7986 /* Queue the aio request */
7987 err = os_aio(
7988 req_type, aio_mode, file->name, file->handle, buf, offset, len,
7989 fsp_is_system_temporary(page_id.space()) ? false : srv_read_only_mode,
7990 file, message);
7991
7992 #endif /* UNIV_HOTBACKUP */
7993
7994 if (err == DB_IO_NO_PUNCH_HOLE) {
7995 err = DB_SUCCESS;
7996
7997 if (file->punch_hole) {
7998 ib::warn(ER_IB_MSG_333) << "Punch hole failed for '" << file->name << "'";
7999 }
8000
8001 fil_no_punch_hole(file);
8002 }
8003
8004 /* We an try to recover the page from the double write buffer if
8005 the decompression fails or the page is corrupt. */
8006
8007 ut_a(req_type.is_dblwr() || err == DB_SUCCESS);
8008
8009 if (sync) {
8010 /* The i/o operation is already completed when we return from
8011 os_aio: */
8012
8013 mutex_acquire();
8014
8015 complete_io(file, req_type);
8016
8017 mutex_release();
8018
8019 ut_ad(fil_validate_skip());
8020 }
8021
8022 return (err);
8023 }
8024
8025 /** Read or write redo log data (synchronous buffered IO).
8026 @param[in] type IO context
8027 @param[in] page_id where to read or write
8028 @param[in] page_size page size
8029 @param[in] byte_offset remainder of offset in bytes
8030 @param[in] len this must not cross a file boundary;
8031 @param[in,out] buf buffer where to store read data or from where
8032 to write
8033 @retval DB_SUCCESS if all OK */
fil_redo_io(const IORequest & type,const page_id_t & page_id,const page_size_t & page_size,ulint byte_offset,ulint len,void * buf)8034 dberr_t fil_redo_io(const IORequest &type, const page_id_t &page_id,
8035 const page_size_t &page_size, ulint byte_offset, ulint len,
8036 void *buf) {
8037 ut_ad(type.is_log());
8038
8039 auto shard = fil_system->shard_by_id(page_id.space());
8040 #if defined(_WIN32) && defined(WIN_ASYNC_IO)
8041 /* On Windows we always open the redo log file in AIO mode. ie. we
8042 use the AIO API for the read/write even for sync IO. */
8043 return (shard->do_io(type, true, page_id, page_size, byte_offset, len, buf,
8044 nullptr));
8045 #else
8046 return (shard->do_redo_io(type, page_id, page_size, byte_offset, len, buf));
8047 #endif /* _WIN32 || WIN_ASYNC_IO*/
8048 }
8049
8050 #ifndef UNIV_HOTBACKUP
8051 /** Waits for an AIO operation to complete. This function is used to write the
8052 handler for completed requests. The aio array of pending requests is divided
8053 into segments (see os0file.cc for more info). The thread specifies which
8054 segment it wants to wait for.
8055 @param[in] segment The number of the segment in the AIO array
8056 to wait for */
fil_aio_wait(ulint segment)8057 void fil_aio_wait(ulint segment) {
8058 void *m2;
8059 fil_node_t *m1;
8060 IORequest type;
8061
8062 ut_ad(fil_validate_skip());
8063
8064 auto err = os_aio_handler(segment, &m1, &m2, &type);
8065 ut_a(err == DB_SUCCESS);
8066
8067 auto file = reinterpret_cast<fil_node_t *>(m1);
8068
8069 if (file == nullptr) {
8070 ut_ad(srv_shutdown_state.load() == SRV_SHUTDOWN_EXIT_THREADS);
8071 return;
8072 }
8073
8074 ut_a(!type.is_dblwr());
8075
8076 srv_set_io_thread_op_info(segment, "complete io for file");
8077
8078 auto shard = fil_system->shard_by_id(file->space->id);
8079
8080 shard->mutex_acquire();
8081
8082 shard->complete_io(file, type);
8083
8084 shard->mutex_release();
8085
8086 ut_ad(fil_validate_skip());
8087
8088 /* Do the i/o handling */
8089 /* IMPORTANT: since i/o handling for reads will read also the insert
8090 buffer in tablespace 0, you have to be very careful not to introduce
8091 deadlocks in the i/o system. We keep tablespace 0 data files always
8092 open, and use a special i/o thread to serve insert buffer requests. */
8093
8094 switch (file->space->purpose) {
8095 case FIL_TYPE_IMPORT:
8096 case FIL_TYPE_TEMPORARY:
8097 case FIL_TYPE_TABLESPACE:
8098 srv_set_io_thread_op_info(segment, "complete io for buf page");
8099
8100 /* async single page writes from the dblwr buffer don't have
8101 access to the page */
8102 if (m2 != nullptr) {
8103 buf_page_io_complete(static_cast<buf_page_t *>(m2), false);
8104 }
8105 return;
8106 case FIL_TYPE_LOG:
8107 return;
8108 }
8109
8110 ut_ad(0);
8111 }
8112 #endif /* !UNIV_HOTBACKUP */
8113
8114 /** Read or write data from a file.
8115 @param[in] type IO context
8116 @param[in] sync If true then do synchronous IO
8117 @param[in] page_id page id
8118 @param[in] page_size page size
8119 @param[in] byte_offset remainder of offset in bytes; in aio this
8120 must be divisible by the OS block size
8121 @param[in] len how many bytes to read or write; this must
8122 not cross a file boundary; in AIO this must
8123 be a block size multiple
8124 @param[in,out] buf buffer where to store read data or from where
8125 to write; in AIO this must be appropriately
8126 aligned
8127 @param[in] message message for AIO handler if !sync, else ignored
8128 @return error code
8129 @retval DB_SUCCESS on success
8130 @retval DB_TABLESPACE_DELETED if the tablespace does not exist */
fil_io(const IORequest & type,bool sync,const page_id_t & page_id,const page_size_t & page_size,ulint byte_offset,ulint len,void * buf,void * message)8131 dberr_t fil_io(const IORequest &type, bool sync, const page_id_t &page_id,
8132 const page_size_t &page_size, ulint byte_offset, ulint len,
8133 void *buf, void *message) {
8134 auto shard = fil_system->shard_by_id(page_id.space());
8135
8136 return (shard->do_io(type, sync, page_id, page_size, byte_offset, len, buf,
8137 message));
8138 }
8139
8140 /** If the tablespace is on the unflushed list and there are no pending
8141 flushes then remove from the unflushed list.
8142 @param[in,out] space Tablespace to remove */
remove_from_unflushed_list(fil_space_t * space)8143 void Fil_shard::remove_from_unflushed_list(fil_space_t *space) {
8144 ut_ad(mutex_owned());
8145
8146 if (space->is_in_unflushed_spaces && space_is_flushed(space)) {
8147 space->is_in_unflushed_spaces = false;
8148
8149 UT_LIST_REMOVE(m_unflushed_spaces, space);
8150 }
8151 }
8152
8153 /** Flushes to disk possible writes cached by the OS. */
redo_space_flush()8154 void Fil_shard::redo_space_flush() {
8155 ut_ad(mutex_owned());
8156 ut_ad(m_id == REDO_SHARD);
8157
8158 fil_space_t *space = fil_space_t::s_redo_space;
8159
8160 if (space == nullptr) {
8161 space = get_space_by_id(dict_sys_t::s_log_space_first_id);
8162 } else {
8163 ut_ad(space == get_space_by_id(dict_sys_t::s_log_space_first_id));
8164 }
8165
8166 ut_a(!space->stop_new_ops);
8167 ut_a(space->purpose == FIL_TYPE_LOG);
8168
8169 /* Prevent dropping of the space while we are flushing */
8170 ++space->n_pending_flushes;
8171
8172 for (auto &file : space->files) {
8173 ut_a(!file.is_raw_disk);
8174
8175 int64_t old_mod_counter = file.modification_counter;
8176
8177 if (old_mod_counter <= file.flush_counter) {
8178 continue;
8179 }
8180
8181 ut_a(file.is_open);
8182 ut_a(file.space == space);
8183
8184 ++fil_n_log_flushes;
8185 ++fil_n_pending_log_flushes;
8186
8187 bool skip_flush = false;
8188
8189 /* Wait for some other thread that is flushing. */
8190 while (file.n_pending_flushes > 0 && !skip_flush) {
8191 /* Release the mutex to avoid deadlock with
8192 the flushing thread. */
8193
8194 int64_t sig_count = os_event_reset(file.sync_event);
8195
8196 mutex_release();
8197
8198 os_event_wait_low(file.sync_event, sig_count);
8199
8200 mutex_acquire();
8201
8202 if (file.flush_counter >= old_mod_counter) {
8203 skip_flush = true;
8204 }
8205 }
8206
8207 if (!skip_flush) {
8208 ut_a(file.is_open);
8209
8210 ++file.n_pending_flushes;
8211
8212 mutex_release();
8213
8214 os_file_flush(file.handle);
8215
8216 mutex_acquire();
8217
8218 os_event_set(file.sync_event);
8219
8220 --file.n_pending_flushes;
8221 }
8222
8223 if (file.flush_counter < old_mod_counter) {
8224 file.flush_counter = old_mod_counter;
8225
8226 remove_from_unflushed_list(space);
8227 }
8228
8229 --fil_n_pending_log_flushes;
8230 }
8231
8232 --space->n_pending_flushes;
8233 }
8234
8235 /** Flushes to disk possible writes cached by the OS. If the space does
8236 not exist or is being dropped, does not do anything.
8237 @param[in] space_id File space ID (this can be a group of log files
8238 or a tablespace of the database) */
space_flush(space_id_t space_id)8239 void Fil_shard::space_flush(space_id_t space_id) {
8240 ut_ad(mutex_owned());
8241
8242 if (space_id == dict_sys_t::s_log_space_first_id) {
8243 redo_space_flush();
8244 return;
8245 }
8246
8247 fil_space_t *space = get_space_by_id(space_id);
8248
8249 if (space == nullptr || space->purpose == FIL_TYPE_TEMPORARY ||
8250 space->stop_new_ops) {
8251 return;
8252 }
8253
8254 bool fbd = fil_buffering_disabled(space);
8255
8256 if (fbd) {
8257 /* No need to flush. User has explicitly disabled
8258 buffering. However, flush should be called if the file
8259 size changes to keep OЅ metadata in sync. */
8260 ut_ad(!space->is_in_unflushed_spaces);
8261 ut_ad(space_is_flushed(space));
8262
8263 /* Flush only if the file size changes */
8264 bool no_flush = true;
8265 for (const auto &file : space->files) {
8266 #ifdef UNIV_DEBUG
8267 ut_ad(file.modification_counter == file.flush_counter);
8268 #endif /* UNIV_DEBUG */
8269 if (file.flush_size != file.size) {
8270 /* Found at least one file whose size has changed */
8271 no_flush = false;
8272 break;
8273 }
8274 }
8275
8276 if (no_flush) {
8277 /* Nothing to flush. Just return */
8278 return;
8279 }
8280 }
8281
8282 /* Prevent dropping of the space while we are flushing */
8283 ++space->n_pending_flushes;
8284
8285 for (auto &file : space->files) {
8286 int64_t old_mod_counter = file.modification_counter;
8287
8288 if (!file.is_open) {
8289 continue;
8290 }
8291
8292 /* Skip flushing if the file size has not changed since
8293 last flush was done and the flush mode is O_DIRECT_NO_FSYNC */
8294 if (fbd && (file.flush_size == file.size)) {
8295 ut_ad(old_mod_counter <= file.flush_counter);
8296 continue;
8297 }
8298
8299 /* If we are here and the flush mode is O_DIRECT_NO_FSYNC, then
8300 it means that the file size has changed and hence, it should be
8301 flushed, irrespective of the mod_counter and flush counter values,
8302 which are always same in case of O_DIRECT_NO_FSYNC to avoid flush
8303 on every write operation.
8304 For other flush modes, if the flush_counter is same or ahead of
8305 the mod_counter, skip the flush. */
8306 if (!fbd && (old_mod_counter <= file.flush_counter)) {
8307 continue;
8308 }
8309
8310 switch (space->purpose) {
8311 case FIL_TYPE_TEMPORARY:
8312 ut_ad(0); // we already checked for this
8313
8314 case FIL_TYPE_TABLESPACE:
8315 case FIL_TYPE_IMPORT:
8316 ++fil_n_pending_tablespace_flushes;
8317 break;
8318
8319 case FIL_TYPE_LOG:
8320 ut_error;
8321 break;
8322 }
8323
8324 bool skip_flush = false;
8325 #ifdef _WIN32
8326 if (file.is_raw_disk) {
8327 skip_flush = true;
8328 }
8329 #endif /* _WIN32 */
8330
8331 while (file.n_pending_flushes > 0 && !skip_flush) {
8332 /* We want to avoid calling os_file_flush() on
8333 the file twice at the same time, because we do
8334 not know what bugs OS's may contain in file
8335 I/O */
8336
8337 int64_t sig_count = os_event_reset(file.sync_event);
8338
8339 mutex_release();
8340
8341 os_event_wait_low(file.sync_event, sig_count);
8342
8343 mutex_acquire();
8344
8345 if (file.flush_counter >= old_mod_counter) {
8346 skip_flush = true;
8347 }
8348 }
8349
8350 if (!skip_flush) {
8351 ut_a(file.is_open);
8352
8353 ++file.n_pending_flushes;
8354
8355 mutex_release();
8356
8357 os_file_flush(file.handle);
8358
8359 file.flush_size = file.size;
8360
8361 mutex_acquire();
8362
8363 os_event_set(file.sync_event);
8364
8365 --file.n_pending_flushes;
8366 }
8367
8368 if (file.flush_counter < old_mod_counter) {
8369 file.flush_counter = old_mod_counter;
8370
8371 remove_from_unflushed_list(space);
8372 }
8373
8374 switch (space->purpose) {
8375 case FIL_TYPE_TEMPORARY:
8376 ut_ad(0); // we already checked for this
8377
8378 case FIL_TYPE_TABLESPACE:
8379 case FIL_TYPE_IMPORT:
8380 --fil_n_pending_tablespace_flushes;
8381 continue;
8382
8383 case FIL_TYPE_LOG:
8384 ut_error;
8385 }
8386
8387 ut_ad(0);
8388 }
8389
8390 --space->n_pending_flushes;
8391 }
8392
8393 /** Flushes to disk possible writes cached by the OS. If the space does
8394 not exist or is being dropped, does not do anything.
8395 @param[in] space_id File space ID (this can be a group of log files
8396 or a tablespace of the database) */
fil_flush(space_id_t space_id)8397 void fil_flush(space_id_t space_id) {
8398 auto shard = fil_system->shard_by_id(space_id);
8399
8400 shard->mutex_acquire();
8401
8402 /* Note: Will release and reacquire the Fil_shard::mutex. */
8403 shard->space_flush(space_id);
8404
8405 shard->mutex_release();
8406 }
8407
8408 /** Flush any pending writes to disk for the redo log. */
flush_file_redo()8409 void Fil_shard::flush_file_redo() {
8410 /* We never evict the redo log tablespace. It's for all
8411 practical purposes a read-only data structure. */
8412
8413 mutex_acquire();
8414
8415 redo_space_flush();
8416
8417 mutex_release();
8418 }
8419
8420 /** Collect the tablespace IDs of unflushed tablespaces in space_ids.
8421 @param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_LOG,
8422 can be ORred */
flush_file_spaces(uint8_t purpose)8423 void Fil_shard::flush_file_spaces(uint8_t purpose) {
8424 Space_ids space_ids;
8425
8426 ut_ad((purpose & FIL_TYPE_TABLESPACE) || (purpose & FIL_TYPE_LOG));
8427
8428 mutex_acquire();
8429
8430 for (auto space = UT_LIST_GET_FIRST(m_unflushed_spaces); space != nullptr;
8431 space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
8432 if ((to_int(space->purpose) & purpose) && !space->stop_new_ops) {
8433 space_ids.push_back(space->id);
8434 }
8435 }
8436
8437 mutex_release();
8438
8439 /* Flush the spaces. It will not hurt to call fil_flush() on
8440 a non-existing space id. */
8441 for (auto space_id : space_ids) {
8442 mutex_acquire();
8443
8444 space_flush(space_id);
8445
8446 mutex_release();
8447 }
8448 }
8449
8450 /** Flush the redo log writes to disk, possibly cached by the OS. */
flush_file_redo()8451 void Fil_system::flush_file_redo() { m_shards[REDO_SHARD]->flush_file_redo(); }
8452
8453 /** Flush to disk the writes in file spaces of the given type
8454 possibly cached by the OS.
8455 @param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_LOG,
8456 can be ORred */
flush_file_spaces(uint8_t purpose)8457 void Fil_system::flush_file_spaces(uint8_t purpose) {
8458 for (auto shard : m_shards) {
8459 shard->flush_file_spaces(purpose);
8460 }
8461 }
8462
8463 /** Flush to disk the writes in file spaces of the given type
8464 possibly cached by the OS.
8465 @param[in] purpose FIL_TYPE_TABLESPACE or FIL_TYPE_LOG, can be ORred */
fil_flush_file_spaces(uint8_t purpose)8466 void fil_flush_file_spaces(uint8_t purpose) {
8467 fil_system->flush_file_spaces(purpose);
8468 }
8469
8470 /** Flush to disk the writes in file spaces of the given type
8471 possibly cached by the OS. */
fil_flush_file_redo()8472 void fil_flush_file_redo() { fil_system->flush_file_redo(); }
8473
8474 /** Returns true if file address is undefined.
8475 @param[in] addr Address
8476 @return true if undefined */
fil_addr_is_null(const fil_addr_t & addr)8477 bool fil_addr_is_null(const fil_addr_t &addr) {
8478 return (addr.page == FIL_NULL);
8479 }
8480
8481 /** Get the predecessor of a file page.
8482 @param[in] page File page
8483 @return FIL_PAGE_PREV */
fil_page_get_prev(const byte * page)8484 page_no_t fil_page_get_prev(const byte *page) {
8485 return (mach_read_from_4(page + FIL_PAGE_PREV));
8486 }
8487
8488 /** Get the successor of a file page.
8489 @param[in] page File page
8490 @return FIL_PAGE_NEXT */
fil_page_get_next(const byte * page)8491 page_no_t fil_page_get_next(const byte *page) {
8492 return (mach_read_from_4(page + FIL_PAGE_NEXT));
8493 }
8494
8495 /** Sets the file page type.
8496 @param[in,out] page File page
8497 @param[in] type Page type */
fil_page_set_type(byte * page,ulint type)8498 void fil_page_set_type(byte *page, ulint type) {
8499 mach_write_to_2(page + FIL_PAGE_TYPE, type);
8500 }
8501
8502 /** Reset the page type.
8503 Data files created before MySQL 5.1 may contain garbage in FIL_PAGE_TYPE.
8504 In MySQL 3.23.53, only undo log pages and index pages were tagged.
8505 Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
8506 @param[in] page_id page number
8507 @param[in,out] page page with invalid FIL_PAGE_TYPE
8508 @param[in] type expected page type
8509 @param[in,out] mtr mini-transaction */
fil_page_reset_type(const page_id_t & page_id,byte * page,ulint type,mtr_t * mtr)8510 void fil_page_reset_type(const page_id_t &page_id, byte *page, ulint type,
8511 mtr_t *mtr) {
8512 ib::info(ER_IB_MSG_334) << "Resetting invalid page " << page_id << " type "
8513 << fil_page_get_type(page) << " to " << type << ".";
8514 mlog_write_ulint(page + FIL_PAGE_TYPE, type, MLOG_2BYTES, mtr);
8515 }
8516
8517 /** Closes the tablespace memory cache. */
fil_close()8518 void fil_close() {
8519 if (fil_system == nullptr) {
8520 return;
8521 }
8522
8523 UT_DELETE(fil_system);
8524
8525 fil_system = nullptr;
8526 }
8527
8528 #ifndef UNIV_HOTBACKUP
8529 /** Initializes a buffer control block when the buf_pool is created.
8530 @param[in] block Pointer to the control block
8531 @param[in] frame Pointer to buffer frame */
fil_buf_block_init(buf_block_t * block,byte * frame)8532 static void fil_buf_block_init(buf_block_t *block, byte *frame) {
8533 UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
8534
8535 block->frame = frame;
8536
8537 block->page.io_fix = BUF_IO_NONE;
8538 /* There are assertions that check for this. */
8539 block->page.buf_fix_count = 1;
8540 block->page.state = BUF_BLOCK_READY_FOR_USE;
8541
8542 page_zip_des_init(&block->page.zip);
8543 }
8544
8545 struct Fil_page_iterator {
8546 /** File handle */
8547 pfs_os_file_t m_file;
8548
8549 /** File path name */
8550 const char *m_filepath;
8551
8552 /** From where to start */
8553 os_offset_t m_start;
8554
8555 /** Where to stop */
8556 os_offset_t m_end;
8557
8558 /* File size in bytes */
8559 os_offset_t m_file_size;
8560
8561 /** Page size */
8562 size_t m_page_size;
8563
8564 /** Number of pages to use for I/O */
8565 size_t m_n_io_buffers;
8566
8567 /** Buffer to use for IO */
8568 byte *m_io_buffer;
8569
8570 /** Encryption key */
8571 byte *m_encryption_key;
8572
8573 /** Encruption iv */
8574 byte *m_encryption_iv;
8575 };
8576
8577 /** TODO: This can be made parallel trivially by chunking up the file
8578 and creating a callback per thread. Main benefit will be to use multiple
8579 CPUs for checksums and compressed tables. We have to do compressed tables
8580 block by block right now. Secondly we need to decompress/compress and copy
8581 too much of data. These are CPU intensive.
8582
8583 Iterate over all the pages in the tablespace.
8584 @param[in] iter Tablespace iterator
8585 @param[in,out] block Block to use for IO
8586 @param[in] callback Callback to inspect and update page contents
8587 @retval DB_SUCCESS or error code */
fil_iterate(const Fil_page_iterator & iter,buf_block_t * block,PageCallback & callback)8588 static dberr_t fil_iterate(const Fil_page_iterator &iter, buf_block_t *block,
8589 PageCallback &callback) {
8590 os_offset_t offset;
8591 size_t n_bytes;
8592 page_no_t page_no = 0;
8593 space_id_t space_id = callback.get_space_id();
8594
8595 n_bytes = iter.m_n_io_buffers * iter.m_page_size;
8596
8597 ut_ad(!srv_read_only_mode);
8598
8599 /* For old style compressed tables we do a lot of useless copying
8600 for non-index pages. Unfortunately, it is required by
8601 buf_zip_decompress() */
8602
8603 ulint read_type = IORequest::READ;
8604 ulint write_type = IORequest::WRITE;
8605
8606 for (offset = iter.m_start; offset < iter.m_end; offset += n_bytes) {
8607 byte *io_buffer = iter.m_io_buffer;
8608
8609 block->frame = io_buffer;
8610
8611 if (callback.get_page_size().is_compressed()) {
8612 page_zip_des_init(&block->page.zip);
8613 page_zip_set_size(&block->page.zip, iter.m_page_size);
8614
8615 block->page.size.copy_from(
8616 page_size_t(static_cast<uint32_t>(iter.m_page_size),
8617 static_cast<uint32_t>(univ_page_size.logical()), true));
8618
8619 block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
8620 ut_d(block->page.zip.m_external = true);
8621 ut_ad(iter.m_page_size == callback.get_page_size().physical());
8622
8623 /* Zip IO is done in the compressed page buffer. */
8624 io_buffer = block->page.zip.data;
8625 } else {
8626 io_buffer = iter.m_io_buffer;
8627 }
8628
8629 /* We have to read the exact number of bytes. Otherwise the
8630 InnoDB IO functions croak on failed reads. */
8631
8632 n_bytes = static_cast<ulint>(
8633 ut_min(static_cast<os_offset_t>(n_bytes), iter.m_end - offset));
8634
8635 ut_ad(n_bytes > 0);
8636 ut_ad(!(n_bytes % iter.m_page_size));
8637
8638 dberr_t err;
8639 IORequest read_request(read_type);
8640
8641 /* For encrypted table, set encryption information. */
8642 if (iter.m_encryption_key != nullptr && offset != 0) {
8643 read_request.encryption_key(iter.m_encryption_key, Encryption::KEY_LEN,
8644 iter.m_encryption_iv);
8645
8646 read_request.encryption_algorithm(Encryption::AES);
8647 }
8648
8649 err = os_file_read(read_request, iter.m_filepath, iter.m_file, io_buffer,
8650 offset, (ulint)n_bytes);
8651
8652 if (err != DB_SUCCESS) {
8653 ib::error(ER_IB_MSG_335) << "os_file_read() failed";
8654
8655 return (err);
8656 }
8657
8658 size_t n_pages_read;
8659 bool updated = false;
8660 os_offset_t page_off = offset;
8661
8662 n_pages_read = (ulint)n_bytes / iter.m_page_size;
8663
8664 for (size_t i = 0; i < n_pages_read; ++i) {
8665 buf_block_set_file_page(block, page_id_t(space_id, page_no++));
8666
8667 /* We are going to modify the page. Add to page tracking system. */
8668 arch_page_sys->track_page(&block->page, LSN_MAX, LSN_MAX, true);
8669
8670 if ((err = callback(page_off, block)) != DB_SUCCESS) {
8671 return (err);
8672
8673 } else if (!updated) {
8674 updated = buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE;
8675 }
8676
8677 buf_block_set_state(block, BUF_BLOCK_NOT_USED);
8678 buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
8679
8680 page_off += iter.m_page_size;
8681 block->frame += iter.m_page_size;
8682 }
8683
8684 IORequest write_request(write_type);
8685
8686 /* For encrypted table, set encryption information. */
8687 if (iter.m_encryption_key != nullptr && offset != 0) {
8688 write_request.encryption_key(iter.m_encryption_key, Encryption::KEY_LEN,
8689 iter.m_encryption_iv);
8690
8691 write_request.encryption_algorithm(Encryption::AES);
8692 }
8693
8694 /* A page was updated in the set, write back to disk.
8695 Note: We don't have the compression algorithm, we write
8696 out the imported file as uncompressed. */
8697
8698 if (updated && (err = os_file_write(write_request, iter.m_filepath,
8699 iter.m_file, io_buffer, offset,
8700 (ulint)n_bytes)) != DB_SUCCESS) {
8701 /* This is not a hard error */
8702 if (err == DB_IO_NO_PUNCH_HOLE) {
8703 err = DB_SUCCESS;
8704 write_type &= ~IORequest::PUNCH_HOLE;
8705
8706 } else {
8707 ib::error(ER_IB_MSG_336) << "os_file_write() failed";
8708
8709 return (err);
8710 }
8711 }
8712 }
8713
8714 return (DB_SUCCESS);
8715 }
8716
fil_adjust_name_import(dict_table_t * table,const char * path,ib_file_suffix extn)8717 void fil_adjust_name_import(dict_table_t *table, const char *path,
8718 ib_file_suffix extn) {
8719 /* Try to open with current name first. */
8720 if (os_file_exists(path)) {
8721 return;
8722 }
8723
8724 /* On failure we need to check if file exists in different letter case
8725 for partitioned table. */
8726 #ifdef _WIN32
8727 /* Safe check. Never needed on Windows. */
8728 return;
8729 #endif /* WIN32 */
8730
8731 /* Needed only for case sensitive file system. */
8732 if (lower_case_file_system) {
8733 return;
8734 }
8735
8736 /* Only needed for partition file. */
8737 if (!dict_name::is_partition(table->name.m_name)) {
8738 return;
8739 }
8740
8741 /* Get Import directory path. */
8742 std::string import_dir(path);
8743 Fil_path::normalize(import_dir);
8744
8745 auto pos = import_dir.find_last_of(Fil_path::SEPARATOR);
8746 if (pos == std::string::npos) {
8747 import_dir.assign(Fil_path::DOT_SLASH);
8748
8749 } else {
8750 import_dir.resize(pos + 1);
8751 ut_ad(Fil_path::is_separator(import_dir.back()));
8752 }
8753
8754 /* Walk through all files under the directory and match the import file
8755 after adjusting case. This is a safe check to allow files exported from
8756 earlier versions where the case for partition name and separator could
8757 be different. */
8758 bool found_path = false;
8759 std::string saved_path;
8760
8761 Dir_Walker::walk(import_dir, false, [&](const std::string &file_path) {
8762 /* Skip entry if already found. */
8763 if (found_path) {
8764 return;
8765 }
8766 /* Check only for partition files. */
8767 if (!dict_name::is_partition(file_path)) {
8768 return;
8769 }
8770
8771 /* Extract table name from path. */
8772 std::string table_name;
8773 if (!Fil_path::parse_file_path(file_path, extn, table_name)) {
8774 /* Not a valid file-per-table path */
8775 return;
8776 }
8777
8778 /* Check if the file name would match after correcting the case. */
8779 dict_name::rebuild(table_name);
8780 if (table_name.compare(table->name.m_name) != 0) {
8781 return;
8782 }
8783
8784 saved_path.assign(file_path);
8785 found_path = true;
8786 });
8787
8788 return;
8789 }
8790
8791 /** Iterate over all the pages in the tablespace.
8792 @param[in,out] table the table definiton in the server
8793 @param[in] n_io_buffers number of blocks to read and write together
8794 @param[in] callback functor that will do the page updates
8795 @return DB_SUCCESS or error code */
fil_tablespace_iterate(dict_table_t * table,ulint n_io_buffers,PageCallback & callback)8796 dberr_t fil_tablespace_iterate(dict_table_t *table, ulint n_io_buffers,
8797 PageCallback &callback) {
8798 dberr_t err;
8799 pfs_os_file_t file;
8800 char *filepath;
8801 bool success;
8802
8803 ut_a(n_io_buffers > 0);
8804 ut_ad(!srv_read_only_mode);
8805
8806 DBUG_EXECUTE_IF("ib_import_trigger_corruption_1", return (DB_CORRUPTION););
8807
8808 /* Make sure the data_dir_path is set. */
8809 dd_get_and_save_data_dir_path<dd::Table>(table, nullptr, false);
8810
8811 std::string path = dict_table_get_datadir(table);
8812
8813 filepath = Fil_path::make(path, table->name.m_name, IBD, true);
8814
8815 if (filepath == nullptr) {
8816 return (DB_OUT_OF_MEMORY);
8817 }
8818
8819 /* Adjust filename for partition file if in different letter case. */
8820 fil_adjust_name_import(table, filepath, IBD);
8821
8822 file = os_file_create_simple_no_error_handling(
8823 innodb_data_file_key, filepath, OS_FILE_OPEN, OS_FILE_READ_WRITE,
8824 srv_read_only_mode, &success);
8825
8826 DBUG_EXECUTE_IF("fil_tablespace_iterate_failure", {
8827 static bool once;
8828
8829 if (!once || ut_rnd_interval(0, 10) == 5) {
8830 once = true;
8831 success = false;
8832 os_file_close(file);
8833 }
8834 });
8835
8836 if (!success) {
8837 /* The following call prints an error message */
8838 os_file_get_last_error(true);
8839
8840 ib::error(ER_IB_MSG_337) << "Trying to import a tablespace, but could not"
8841 " open the tablespace file "
8842 << filepath;
8843
8844 ut_free(filepath);
8845
8846 return (DB_TABLESPACE_NOT_FOUND);
8847
8848 } else {
8849 err = DB_SUCCESS;
8850 }
8851
8852 callback.set_file(filepath, file);
8853
8854 os_offset_t file_size = os_file_get_size(file);
8855 ut_a(file_size != (os_offset_t)-1);
8856
8857 /* The block we will use for every physical page */
8858 buf_block_t *block;
8859
8860 block = reinterpret_cast<buf_block_t *>(ut_zalloc_nokey(sizeof(*block)));
8861
8862 mutex_create(LATCH_ID_BUF_BLOCK_MUTEX, &block->mutex);
8863
8864 /* Allocate a page to read in the tablespace header, so that we
8865 can determine the page size and zip size (if it is compressed).
8866 We allocate an extra page in case it is a compressed table. One
8867 page is to ensure alignement. */
8868
8869 void *page_ptr = ut_malloc_nokey(3 * UNIV_PAGE_SIZE);
8870 byte *page = static_cast<byte *>(ut_align(page_ptr, UNIV_PAGE_SIZE));
8871
8872 fil_buf_block_init(block, page);
8873
8874 /* Read the first page and determine the page and zip size. */
8875
8876 IORequest request(IORequest::READ);
8877
8878 err = os_file_read_first_page(request, path.c_str(), file, page,
8879 UNIV_PAGE_SIZE);
8880
8881 if (err != DB_SUCCESS) {
8882 err = DB_IO_ERROR;
8883
8884 } else if ((err = callback.init(file_size, block)) == DB_SUCCESS) {
8885 Fil_page_iterator iter;
8886
8887 iter.m_file = file;
8888 iter.m_start = 0;
8889 iter.m_end = file_size;
8890 iter.m_filepath = filepath;
8891 iter.m_file_size = file_size;
8892 iter.m_n_io_buffers = n_io_buffers;
8893 iter.m_page_size = callback.get_page_size().physical();
8894
8895 /* Set encryption info. */
8896 iter.m_encryption_key = table->encryption_key;
8897 iter.m_encryption_iv = table->encryption_iv;
8898
8899 /* Check encryption is matched or not. */
8900 ulint space_flags = callback.get_space_flags();
8901
8902 if (FSP_FLAGS_GET_ENCRYPTION(space_flags)) {
8903 if (!dd_is_table_in_encrypted_tablespace(table)) {
8904 ib::error(ER_IB_MSG_338) << "Table is not in an encrypted tablespace,"
8905 " but the data file intended for import"
8906 " is an encrypted tablespace";
8907
8908 err = DB_IO_NO_ENCRYPT_TABLESPACE;
8909 } else {
8910 /* encryption_key must have been populated while reading CFP file. */
8911 ut_ad(table->encryption_key != nullptr &&
8912 table->encryption_iv != nullptr);
8913
8914 if (table->encryption_key == nullptr ||
8915 table->encryption_iv == nullptr) {
8916 err = DB_ERROR;
8917 }
8918 }
8919 }
8920
8921 if (err == DB_SUCCESS) {
8922 /* Compressed pages can't be optimised for block IO
8923 for now. We do the IMPORT page by page. */
8924
8925 if (callback.get_page_size().is_compressed()) {
8926 iter.m_n_io_buffers = 1;
8927 ut_a(iter.m_page_size == callback.get_page_size().physical());
8928 }
8929
8930 /** Add an extra page for compressed page scratch
8931 area. */
8932 void *io_buffer =
8933 ut_malloc_nokey((2 + iter.m_n_io_buffers) * UNIV_PAGE_SIZE);
8934
8935 iter.m_io_buffer =
8936 static_cast<byte *>(ut_align(io_buffer, UNIV_PAGE_SIZE));
8937
8938 err = fil_iterate(iter, block, callback);
8939
8940 ut_free(io_buffer);
8941 }
8942 }
8943
8944 if (err == DB_SUCCESS) {
8945 ib::info(ER_IB_MSG_339) << "Sync to disk";
8946
8947 if (!os_file_flush(file)) {
8948 ib::info(ER_IB_MSG_340) << "os_file_flush() failed!";
8949 err = DB_IO_ERROR;
8950 } else {
8951 ib::info(ER_IB_MSG_341) << "Sync to disk - done!";
8952 }
8953 }
8954
8955 os_file_close(file);
8956
8957 ut_free(page_ptr);
8958 ut_free(filepath);
8959
8960 mutex_free(&block->mutex);
8961
8962 ut_free(block);
8963
8964 return (err);
8965 }
8966 #endif /* !UNIV_HOTBACKUP */
8967
8968 /** Set the tablespace table size.
8969 @param[in] page a page belonging to the tablespace */
set_page_size(const buf_frame_t * page)8970 void PageCallback::set_page_size(const buf_frame_t *page) UNIV_NOTHROW {
8971 m_page_size.copy_from(fsp_header_get_page_size(page));
8972 }
8973
8974 /** Delete the tablespace file and any related files like .cfg.
8975 This should not be called for temporary tables.
8976 @param[in] path File path of the IBD tablespace
8977 @return true on success */
fil_delete_file(const char * path)8978 bool fil_delete_file(const char *path) {
8979 bool success = true;
8980
8981 /* Force a delete of any stale .ibd files that are lying around. */
8982 success = os_file_delete_if_exists(innodb_data_file_key, path, nullptr);
8983
8984 char *cfg_filepath = Fil_path::make_cfg(path);
8985
8986 if (cfg_filepath != nullptr) {
8987 os_file_delete_if_exists(innodb_data_file_key, cfg_filepath, nullptr);
8988
8989 ut_free(cfg_filepath);
8990 }
8991
8992 char *cfp_filepath = Fil_path::make_cfp(path);
8993
8994 if (cfp_filepath != nullptr) {
8995 os_file_delete_if_exists(innodb_data_file_key, cfp_filepath, nullptr);
8996
8997 ut_free(cfp_filepath);
8998 }
8999
9000 return (success);
9001 }
9002
9003 #ifndef UNIV_HOTBACKUP
9004 /** Check if swapping two .ibd files can be done without failure.
9005 @param[in] old_table old table
9006 @param[in] new_table new table
9007 @param[in] tmp_name temporary table name
9008 @return innodb error code */
fil_rename_precheck(const dict_table_t * old_table,const dict_table_t * new_table,const char * tmp_name)9009 dberr_t fil_rename_precheck(const dict_table_t *old_table,
9010 const dict_table_t *new_table,
9011 const char *tmp_name) {
9012 dberr_t err;
9013
9014 bool old_is_file_per_table = dict_table_is_file_per_table(old_table);
9015
9016 bool new_is_file_per_table = dict_table_is_file_per_table(new_table);
9017
9018 /* If neither table is file-per-table,
9019 there will be no renaming of files. */
9020 if (!old_is_file_per_table && !new_is_file_per_table) {
9021 return (DB_SUCCESS);
9022 }
9023
9024 auto old_dir = dict_table_get_datadir(old_table);
9025
9026 char *old_path =
9027 Fil_path::make(old_dir, old_table->name.m_name, IBD, !old_dir.empty());
9028
9029 if (old_path == nullptr) {
9030 return (DB_OUT_OF_MEMORY);
9031 }
9032
9033 if (old_is_file_per_table) {
9034 char *tmp_path = Fil_path::make(old_dir, tmp_name, IBD, !old_dir.empty());
9035
9036 if (tmp_path == nullptr) {
9037 ut_free(old_path);
9038 return (DB_OUT_OF_MEMORY);
9039 }
9040
9041 /* Temp filepath must not exist. */
9042 err = fil_rename_tablespace_check(old_table->space, old_path, tmp_path,
9043 dict_table_is_discarded(old_table));
9044
9045 if (err != DB_SUCCESS) {
9046 ut_free(old_path);
9047 ut_free(tmp_path);
9048 return (err);
9049 }
9050
9051 ut_free(tmp_path);
9052 }
9053
9054 if (new_is_file_per_table) {
9055 auto new_dir = dict_table_get_datadir(new_table);
9056
9057 char *new_path =
9058 Fil_path::make(new_dir, new_table->name.m_name, IBD, !new_dir.empty());
9059
9060 if (new_path == nullptr) {
9061 ut_free(old_path);
9062 return (DB_OUT_OF_MEMORY);
9063 }
9064
9065 /* Destination filepath must not exist unless this ALTER
9066 TABLE starts and ends with a file_per-table tablespace. */
9067 if (!old_is_file_per_table) {
9068 err = fil_rename_tablespace_check(new_table->space, new_path, old_path,
9069 dict_table_is_discarded(new_table));
9070
9071 if (err != DB_SUCCESS) {
9072 ut_free(old_path);
9073 ut_free(new_path);
9074 return (err);
9075 }
9076 }
9077
9078 ut_free(new_path);
9079 }
9080
9081 ut_free(old_path);
9082
9083 return (DB_SUCCESS);
9084 }
9085 #endif /* !UNIV_HOTBACKUP */
9086
9087 /** Note that the file system where the file resides doesn't support PUNCH HOLE.
9088 Called from AIO handlers when IO returns DB_IO_NO_PUNCH_HOLE
9089 @param[in,out] file file to set */
fil_no_punch_hole(fil_node_t * file)9090 void fil_no_punch_hole(fil_node_t *file) { file->punch_hole = false; }
9091
9092 /** Set the compression type for the tablespace of a table
9093 @param[in] table The table that should be compressed
9094 @param[in] algorithm Text representation of the algorithm
9095 @return DB_SUCCESS or error code */
fil_set_compression(dict_table_t * table,const char * algorithm)9096 dberr_t fil_set_compression(dict_table_t *table, const char *algorithm) {
9097 ut_ad(table != nullptr);
9098
9099 /* We don't support Page Compression for the system tablespace,
9100 the temporary tablespace, or any general tablespace because
9101 COMPRESSION is set by TABLE DDL, not TABLESPACE DDL. There is
9102 no other technical reason. Also, do not use it for missing
9103 tables or tables with compressed row_format. */
9104 if (table->ibd_file_missing ||
9105 !DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_FILE_PER_TABLE) ||
9106 DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY) ||
9107 page_size_t(table->flags).is_compressed()) {
9108 return (DB_IO_NO_PUNCH_HOLE_TABLESPACE);
9109 }
9110
9111 dberr_t err;
9112 Compression compression;
9113
9114 if (algorithm == nullptr || strlen(algorithm) == 0) {
9115 #ifndef UNIV_DEBUG
9116 compression.m_type = Compression::NONE;
9117 #else /* UNIV_DEBUG */
9118 /* This is a Debug tool for setting compression on all
9119 compressible tables not otherwise specified. */
9120 switch (srv_debug_compress) {
9121 case Compression::LZ4:
9122 case Compression::ZLIB:
9123 case Compression::NONE:
9124
9125 compression.m_type = static_cast<Compression::Type>(srv_debug_compress);
9126 break;
9127
9128 default:
9129 compression.m_type = Compression::NONE;
9130 }
9131
9132 #endif /* UNIV_DEBUG */
9133
9134 err = DB_SUCCESS;
9135
9136 } else {
9137 err = Compression::check(algorithm, &compression);
9138 }
9139
9140 fil_space_t *space = fil_space_get(table->space);
9141
9142 if (space == nullptr) {
9143 return (DB_NOT_FOUND);
9144 }
9145
9146 space->compression_type = compression.m_type;
9147
9148 if (space->compression_type != Compression::NONE) {
9149 if (!space->files.front().punch_hole) {
9150 return (DB_IO_NO_PUNCH_HOLE_FS);
9151 }
9152 }
9153
9154 return (err);
9155 }
9156
9157 /** Get the compression algorithm for a tablespace.
9158 @param[in] space_id Space ID to check
9159 @return the compression algorithm */
fil_get_compression(space_id_t space_id)9160 Compression::Type fil_get_compression(space_id_t space_id) {
9161 fil_space_t *space = fil_space_get(space_id);
9162
9163 return (space == nullptr ? Compression::NONE : space->compression_type);
9164 }
9165
9166 /** Set the encryption type for the tablespace
9167 @param[in] space_id Space ID of tablespace for which to set
9168 @param[in] algorithm Encryption algorithm
9169 @param[in] key Encryption key
9170 @param[in] iv Encryption iv
9171 @return DB_SUCCESS or error code */
fil_set_encryption(space_id_t space_id,Encryption::Type algorithm,byte * key,byte * iv)9172 dberr_t fil_set_encryption(space_id_t space_id, Encryption::Type algorithm,
9173 byte *key, byte *iv) {
9174 auto shard = fil_system->shard_by_id(space_id);
9175
9176 shard->mutex_acquire();
9177
9178 fil_space_t *space = shard->get_space_by_id(space_id);
9179
9180 if (space == nullptr) {
9181 shard->mutex_release();
9182 return (DB_NOT_FOUND);
9183 }
9184
9185 if (key == nullptr) {
9186 Encryption::random_value(space->encryption_key);
9187 } else {
9188 memcpy(space->encryption_key, key, Encryption::KEY_LEN);
9189 }
9190
9191 space->encryption_klen = Encryption::KEY_LEN;
9192
9193 if (iv == nullptr) {
9194 Encryption::random_value(space->encryption_iv);
9195 } else {
9196 memcpy(space->encryption_iv, iv, Encryption::KEY_LEN);
9197 }
9198
9199 ut_ad(algorithm != Encryption::NONE);
9200 space->encryption_type = algorithm;
9201
9202 shard->mutex_release();
9203
9204 return (DB_SUCCESS);
9205 }
9206
9207 /** Reset the encryption type for the tablespace
9208 @param[in] space_id Space ID of tablespace for which to set
9209 @return DB_SUCCESS or error code */
fil_reset_encryption(space_id_t space_id)9210 dberr_t fil_reset_encryption(space_id_t space_id) {
9211 ut_ad(space_id != TRX_SYS_SPACE);
9212
9213 if (fsp_is_system_or_temp_tablespace(space_id)) {
9214 return (DB_IO_NO_ENCRYPT_TABLESPACE);
9215 }
9216
9217 auto shard = fil_system->shard_by_id(space_id);
9218
9219 shard->mutex_acquire();
9220
9221 fil_space_t *space = shard->get_space_by_id(space_id);
9222
9223 if (space == nullptr) {
9224 shard->mutex_release();
9225 return (DB_NOT_FOUND);
9226 }
9227
9228 memset(space->encryption_key, 0, Encryption::KEY_LEN);
9229 space->encryption_klen = 0;
9230
9231 memset(space->encryption_iv, 0, Encryption::KEY_LEN);
9232
9233 space->encryption_type = Encryption::NONE;
9234
9235 shard->mutex_release();
9236
9237 return (DB_SUCCESS);
9238 }
9239
9240 #ifndef UNIV_HOTBACKUP
9241 /** Rotate the tablespace keys by new master key.
9242 @param[in,out] shard Rotate the keys in this shard
9243 @return true if the re-encrypt succeeds */
encryption_rotate_in_a_shard(Fil_shard * shard)9244 bool Fil_system::encryption_rotate_in_a_shard(Fil_shard *shard) {
9245 byte encrypt_info[Encryption::INFO_SIZE];
9246
9247 for (auto &elem : shard->m_spaces) {
9248 auto space = elem.second;
9249
9250 /* Skip unencypted tablespaces. Encrypted redo log
9251 tablespaces is handled in function log_rotate_encryption. */
9252
9253 if (fsp_is_system_or_temp_tablespace(space->id) ||
9254 space->purpose == FIL_TYPE_LOG) {
9255 continue;
9256 }
9257
9258 /* Skip the undo tablespace when it's in default key status,
9259 since it's the first server startup after bootstrap, and the
9260 server uuid is not ready yet. */
9261
9262 if (fsp_is_undo_tablespace(space->id) &&
9263 Encryption::get_master_key_id() == Encryption::DEFAULT_MASTER_KEY_ID) {
9264 continue;
9265 }
9266
9267 /* Rotate the encrypted tablespaces. */
9268 if (space->encryption_type != Encryption::NONE) {
9269 memset(encrypt_info, 0, Encryption::INFO_SIZE);
9270
9271 MDL_ticket *mdl_ticket = nullptr;
9272 #if !defined(XTRABACKUP)
9273 /* Take MDL on UNDO tablespace to make it mutually exclusive with
9274 UNDO tablespace truncation. For other tablespaces MDL is not required
9275 here. */
9276 if (fsp_is_undo_tablespace(space->id)) {
9277 THD *thd = current_thd;
9278 while (
9279 acquire_shared_backup_lock(thd, thd->variables.lock_wait_timeout)) {
9280 os_thread_sleep(20);
9281 }
9282
9283 while (dd::acquire_exclusive_tablespace_mdl(thd, space->name, false,
9284 &mdl_ticket, false)) {
9285 os_thread_sleep(20);
9286 }
9287 ut_ad(mdl_ticket != nullptr);
9288 }
9289 #endif
9290
9291 mtr_t mtr;
9292 mtr_start(&mtr);
9293 bool ret = fsp_header_rotate_encryption(space, encrypt_info, &mtr);
9294 mtr_commit(&mtr);
9295
9296 if (mdl_ticket != nullptr) {
9297 dd_release_mdl(mdl_ticket);
9298 }
9299 if (!ret) {
9300 return (false);
9301 }
9302 }
9303
9304 DBUG_EXECUTE_IF("ib_crash_during_rotation_for_encryption", DBUG_SUICIDE(););
9305 }
9306
9307 return (true);
9308 }
9309
9310 /** Rotate the tablespace keys by new master key.
9311 @return true if the re-encrypt succeeds */
encryption_rotate_all()9312 bool Fil_system::encryption_rotate_all() {
9313 for (auto shard : m_shards) {
9314 // FIXME: We don't acquire the fil_sys::mutex here. Why?
9315
9316 bool success = encryption_rotate_in_a_shard(shard);
9317
9318 if (!success) {
9319 return (false);
9320 }
9321 }
9322
9323 return (true);
9324 }
9325
9326 /** Rotate the tablespace keys by new master key.
9327 @return true if the re-encrypt succeeds */
fil_encryption_rotate()9328 bool fil_encryption_rotate() { return (fil_system->encryption_rotate_all()); }
9329
9330 #endif /* !UNIV_HOTBACKUP */
9331
9332 /** Constructor
9333 @param[in] path pathname (may also include the file basename)
9334 @param[in] normalize_path If false, it's the callers responsibility to
9335 ensure that the path is normalized. */
Fil_path(const std::string & path,bool normalize_path)9336 Fil_path::Fil_path(const std::string &path, bool normalize_path)
9337 : m_path(path) {
9338 if (normalize_path) {
9339 normalize(m_path);
9340 }
9341
9342 m_abs_path = get_real_path(m_path, false);
9343 }
9344
9345 /** Constructor
9346 @param[in] path pathname (may also include the file basename)
9347 @param[in] normalize_path If false, it's the callers responsibility to
9348 ensure that the path is normalized. */
Fil_path(const char * path,bool normalize_path)9349 Fil_path::Fil_path(const char *path, bool normalize_path) : m_path(path) {
9350 if (normalize_path) {
9351 normalize(m_path);
9352 }
9353
9354 m_abs_path = get_real_path(m_path, false);
9355 }
9356
9357 /** Constructor
9358 @param[in] path pathname (may also include the file basename)
9359 @param[in] len Length of path
9360 @param[in] normalize_path If false, it's the callers responsibility to
9361 ensure that the path is normalized. */
Fil_path(const char * path,size_t len,bool normalize_path)9362 Fil_path::Fil_path(const char *path, size_t len, bool normalize_path)
9363 : m_path(path, len) {
9364 if (normalize_path) {
9365 normalize(m_path);
9366 }
9367
9368 m_abs_path = get_real_path(m_path, false);
9369 }
9370
9371 /** Default constructor. */
Fil_path()9372 Fil_path::Fil_path() : m_path(), m_abs_path() { /* No op */
9373 }
9374
is_same_as(const Fil_path & other) const9375 bool Fil_path::is_same_as(const Fil_path &other) const {
9376 if (path().empty() || other.path().empty()) {
9377 return (false);
9378 }
9379
9380 std::string first = abs_path();
9381 trim_separator(first);
9382
9383 std::string second = other.abs_path();
9384 trim_separator(second);
9385
9386 return (first == second);
9387 }
9388
is_same_as(const std::string & other) const9389 bool Fil_path::is_same_as(const std::string &other) const {
9390 if (path().empty() || other.empty()) {
9391 return (false);
9392 }
9393
9394 Fil_path other_path(other);
9395
9396 return (is_same_as(other_path));
9397 }
9398
is_ancestor(const Fil_path & other) const9399 bool Fil_path::is_ancestor(const Fil_path &other) const {
9400 if (path().empty() || other.path().empty()) {
9401 return (false);
9402 }
9403
9404 std::string ancestor = abs_path();
9405 std::string descendant = other.abs_path();
9406
9407 /* We do not know if the descendant is a dir or a file.
9408 But the ancestor in this routine is always a directory.
9409 If it does not yet exist, it may not have a trailing separator.
9410 If there is no trailing separator, add it. */
9411 append_separator(ancestor);
9412
9413 if (descendant.length() <= ancestor.length()) {
9414 return (false);
9415 }
9416
9417 return (std::equal(ancestor.begin(), ancestor.end(), descendant.begin()));
9418 }
9419
is_ancestor(const std::string & other) const9420 bool Fil_path::is_ancestor(const std::string &other) const {
9421 if (path().empty() || other.empty()) {
9422 return (false);
9423 }
9424
9425 Fil_path descendant(other);
9426
9427 return (is_ancestor(descendant));
9428 }
9429
is_hidden(std::string path)9430 bool Fil_path::is_hidden(std::string path) {
9431 std::string basename(path);
9432 while (!basename.empty()) {
9433 char c = basename.back();
9434 if (!(Fil_path::is_separator(c) || c == '*')) {
9435 break;
9436 }
9437 basename.resize(basename.size() - 1);
9438 }
9439 auto sep = basename.find_last_of(SEPARATOR);
9440
9441 return (sep != std::string::npos && basename[sep + 1] == '.');
9442 }
9443
9444 #ifdef _WIN32
is_hidden(WIN32_FIND_DATA & dirent)9445 bool Fil_path::is_hidden(WIN32_FIND_DATA &dirent) {
9446 if (dirent.dwFileAttributes & FILE_ATTRIBUTE_HIDDEN ||
9447 dirent.dwFileAttributes & FILE_ATTRIBUTE_SYSTEM) {
9448 return (true);
9449 }
9450
9451 return (false);
9452 }
9453 #endif /* WIN32 */
9454
9455 /** @return true if the path exists and is a file . */
get_file_type(const std::string & path)9456 os_file_type_t Fil_path::get_file_type(const std::string &path) {
9457 os_file_type_t type;
9458
9459 os_file_status(path.c_str(), nullptr, &type);
9460
9461 return (type);
9462 }
9463
9464 /** Return a string to display the file type of a path.
9465 @param[in] path path name
9466 @return true if the path exists and is a file . */
get_file_type_string(const std::string & path)9467 const char *Fil_path::get_file_type_string(const std::string &path) {
9468 return (get_file_type_string(Fil_path::get_file_type(path)));
9469 }
9470
9471 /** Return a string to display the file type of a path.
9472 @param[in] type OS file type
9473 @return true if the path exists and is a file . */
get_file_type_string(os_file_type_t type)9474 const char *Fil_path::get_file_type_string(os_file_type_t type) {
9475 switch (type) {
9476 case OS_FILE_TYPE_FILE:
9477 return ("file");
9478 case OS_FILE_TYPE_LINK:
9479 return ("symbolic link");
9480 case OS_FILE_TYPE_DIR:
9481 return ("directory");
9482 case OS_FILE_TYPE_BLOCK:
9483 return ("block device");
9484 case OS_FILE_TYPE_NAME_TOO_LONG:
9485 return ("name too long");
9486 case OS_FILE_PERMISSION_ERROR:
9487 return ("permission error");
9488 case OS_FILE_TYPE_MISSING:
9489 return ("missing");
9490 case OS_FILE_TYPE_UNKNOWN:
9491 case OS_FILE_TYPE_FAILED:
9492 break;
9493 }
9494 return ("unknown");
9495 }
9496
9497 /** @return true if the path exists and is a file . */
is_file_and_exists() const9498 bool Fil_path::is_file_and_exists() const {
9499 return (get_file_type(abs_path()) == OS_FILE_TYPE_FILE);
9500 }
9501
9502 /** @return true if the path exists and is a directory. */
is_directory_and_exists() const9503 bool Fil_path::is_directory_and_exists() const {
9504 return (get_file_type(abs_path()) == OS_FILE_TYPE_DIR);
9505 }
9506
9507 /** This validation is only for ':'.
9508 @return true if the path is valid. */
is_valid() const9509 bool Fil_path::is_valid() const {
9510 auto count = std::count(m_path.begin(), m_path.end(), ':');
9511
9512 if (count == 0) {
9513 return (true);
9514 }
9515
9516 #ifdef _WIN32
9517 /* Do not allow names like "C:name.ibd" because it
9518 specifies the "C:" drive but allows a relative location.
9519 It should be like "c:\". If a single colon is used it
9520 must be the second byte and the third byte must be a
9521 separator. */
9522
9523 /* 8 == strlen("c:\a,ibd") */
9524 if (count == 1 && m_path.length() >= 8 && isalpha(m_path.at(0)) &&
9525 m_path.at(1) == ':' && (m_path.at(2) == '\\' || m_path.at(2) == '/')) {
9526 return (true);
9527 }
9528 #endif /* _WIN32 */
9529
9530 return (false);
9531 }
9532
is_circular() const9533 bool Fil_path::is_circular() const {
9534 size_t first;
9535
9536 /* Find the first named directory. It is OK for a path to
9537 start with "../../../dir". */
9538 for (first = 0; m_path[first] == OS_SEPARATOR || m_path[first] == '.';
9539 ++first)
9540 ;
9541
9542 size_t back_up = m_path.find(SLASH_DOT_DOT_SLASH, first);
9543 if (back_up == std::string::npos) {
9544 return (false);
9545 }
9546
9547 #ifndef _WIN32
9548 /* If the path contains a symlink before the /../ and the platform
9549 is not Windows, then '/../' does not go bback through the symlink,
9550 so it is not circular. It refers to the parent of the symlinked
9551 location and we must allow it. On Windows, it backs up to the directory
9552 where the symlink starts, which is a circular reference. */
9553 std::string up_path = m_path.substr(0, back_up);
9554 if (my_is_symlink(up_path.c_str(), nullptr)) {
9555 return (false);
9556 }
9557 #endif /* _WIN32 */
9558
9559 return (true);
9560 }
9561
9562 /** Sets the flags of the tablespace. The tablespace must be locked
9563 in MDL_EXCLUSIVE MODE.
9564 @param[in] space tablespace in-memory struct
9565 @param[in] flags tablespace flags */
fil_space_set_flags(fil_space_t * space,uint32_t flags)9566 void fil_space_set_flags(fil_space_t *space, uint32_t flags) {
9567 ut_ad(fsp_flags_is_valid(flags));
9568
9569 rw_lock_x_lock(&space->latch);
9570
9571 ut_a(flags < std::numeric_limits<uint32_t>::max());
9572 space->flags = (uint32_t)flags;
9573
9574 rw_lock_x_unlock(&space->latch);
9575 }
9576
9577 /* Unit Tests */
9578 #ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
9579 #define MF Fil_path::make
9580 #define DISPLAY ib::info(ER_IB_MSG_342) << path
test_make_filepath()9581 void test_make_filepath() {
9582 char *path;
9583 const char *long_path =
9584 "this/is/a/very/long/path/including/a/very/"
9585 "looooooooooooooooooooooooooooooooooooooooooooooooo"
9586 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
9587 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
9588 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
9589 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
9590 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
9591 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
9592 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
9593 "oooooooooooooooooooooooooooooooooooooooooooooooooo"
9594 "oooooooooooooooooooooooooooooooooooooooooooooooong"
9595 "/folder/name";
9596 path = MF("/this/is/a/path/with/a/filename", nullptr, IBD, false);
9597 DISPLAY;
9598 path = MF("/this/is/a/path/with/a/filename", nullptr, ISL, false);
9599 DISPLAY;
9600 path = MF("/this/is/a/path/with/a/filename", nullptr, CFG, false);
9601 DISPLAY;
9602 path = MF("/this/is/a/path/with/a/filename", nullptr, CFP, false);
9603 DISPLAY;
9604 path = MF("/this/is/a/path/with/a/filename.ibd", nullptr, IBD, false);
9605 DISPLAY;
9606 path = MF("/this/is/a/path/with/a/filename.ibd", nullptr, IBD, false);
9607 DISPLAY;
9608 path = MF("/this/is/a/path/with/a/filename.dat", nullptr, IBD, false);
9609 DISPLAY;
9610 path = MF(nullptr, "tablespacename", NO_EXT, false);
9611 DISPLAY;
9612 path = MF(nullptr, "tablespacename", IBD, false);
9613 DISPLAY;
9614 path = MF(nullptr, "dbname/tablespacename", NO_EXT, false);
9615 DISPLAY;
9616 path = MF(nullptr, "dbname/tablespacename", IBD, false);
9617 DISPLAY;
9618 path = MF(nullptr, "dbname/tablespacename", ISL, false);
9619 DISPLAY;
9620 path = MF(nullptr, "dbname/tablespacename", CFG, false);
9621 DISPLAY;
9622 path = MF(nullptr, "dbname/tablespacename", CFP, false);
9623 DISPLAY;
9624 path = MF(nullptr, "dbname\\tablespacename", NO_EXT, false);
9625 DISPLAY;
9626 path = MF(nullptr, "dbname\\tablespacename", IBD, false);
9627 DISPLAY;
9628 path = MF("/this/is/a/path", "dbname/tablespacename", IBD, false);
9629 DISPLAY;
9630 path = MF("/this/is/a/path", "dbname/tablespacename", IBD, true);
9631 DISPLAY;
9632 path = MF("./this/is/a/path", "dbname/tablespacename.ibd", IBD, true);
9633 DISPLAY;
9634 path = MF("this\\is\\a\\path", "dbname/tablespacename", IBD, true);
9635 DISPLAY;
9636 path = MF("/this/is/a/path", "dbname\\tablespacename", IBD, true);
9637 DISPLAY;
9638 path = MF(long_path, nullptr, IBD, false);
9639 DISPLAY;
9640 path = MF(long_path, "tablespacename", IBD, false);
9641 DISPLAY;
9642 path = MF(long_path, "tablespacename", IBD, true);
9643 DISPLAY;
9644 }
9645 #endif /* UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
9646
9647 /** Release the reserved free extents.
9648 @param[in] n_reserved number of reserved extents */
release_free_extents(ulint n_reserved)9649 void fil_space_t::release_free_extents(ulint n_reserved) {
9650 #ifndef UNIV_HOTBACKUP
9651 ut_ad(rw_lock_own(&latch, RW_LOCK_X));
9652 #endif /* !UNIV_HOTBACKUP */
9653
9654 ut_a(n_reserved < std::numeric_limits<uint32_t>::max());
9655 ut_a(n_reserved_extents >= n_reserved);
9656
9657 n_reserved_extents -= (uint32_t)n_reserved;
9658 }
9659
9660 #ifndef UNIV_HOTBACKUP
9661
9662 #ifdef UNIV_DEBUG
9663
9664 /** Print the extent descriptor pages of this tablespace into
9665 the given file.
9666 @param[in] filename the output file name. */
print_xdes_pages(const char * filename) const9667 void fil_space_t::print_xdes_pages(const char *filename) const {
9668 std::ofstream out(filename);
9669 print_xdes_pages(out);
9670 }
9671
9672 /** Print the extent descriptor pages of this tablespace into
9673 the given file.
9674 @param[in] out the output file name.
9675 @return the output stream. */
print_xdes_pages(std::ostream & out) const9676 std::ostream &fil_space_t::print_xdes_pages(std::ostream &out) const {
9677 mtr_t mtr;
9678 const page_size_t page_size(flags);
9679
9680 mtr_start(&mtr);
9681
9682 for (page_no_t i = 0; i < 100; ++i) {
9683 page_no_t xdes_page_no = i * UNIV_PAGE_SIZE;
9684
9685 if (xdes_page_no >= size) {
9686 break;
9687 }
9688
9689 buf_block_t *xdes_block =
9690 buf_page_get(page_id_t(id, xdes_page_no), page_size, RW_S_LATCH, &mtr);
9691
9692 page_t *page = buf_block_get_frame(xdes_block);
9693
9694 ulint page_type = fil_page_get_type(page);
9695
9696 switch (page_type) {
9697 case FIL_PAGE_TYPE_ALLOCATED:
9698
9699 ut_ad(xdes_page_no >= free_limit);
9700
9701 mtr_commit(&mtr);
9702 return (out);
9703
9704 case FIL_PAGE_TYPE_FSP_HDR:
9705 case FIL_PAGE_TYPE_XDES:
9706 break;
9707 default:
9708 ut_error;
9709 }
9710
9711 xdes_page_print(out, page, xdes_page_no, &mtr);
9712 }
9713
9714 mtr_commit(&mtr);
9715 return (out);
9716 }
9717 #endif /* UNIV_DEBUG */
9718
9719 /** Initialize the table space encryption
9720 @param[in,out] space Tablespace instance */
fil_tablespace_encryption_init(const fil_space_t * space)9721 static void fil_tablespace_encryption_init(const fil_space_t *space) {
9722 for (auto &key : *recv_sys->keys) {
9723 if (key.space_id != space->id) {
9724 continue;
9725 }
9726
9727 dberr_t err = DB_SUCCESS;
9728
9729 ut_ad(!fsp_is_system_tablespace(space->id));
9730
9731 /* Here we try to populate space tablespace_key which is read during
9732 REDO scan.
9733
9734 Consider following scenario:
9735 1. Alter tablespce .. encrypt=y (KEY1)
9736 2. Alter tablespce .. encrypt=n
9737 3. Alter tablespce .. encrypt=y (KEY2)
9738
9739 Lets say there is a crash after (3) is finished successfully. All the pages
9740 of tablespace are encrypted with KEY2.
9741
9742 During recovery:
9743 ----------------
9744 - Let's say we scanned till REDO of (1) but couldn't reach to REDO of (3).
9745 - So we've got tablespace key as KEY1.
9746 - Note, tablespace pages were encrypted using KEY2 which would have been
9747 found on page 0 and thus loaded already in file_space_t.
9748
9749 If we overwrite this space key (KEY2) with the one we got from REDO log
9750 scan (KEY1), then when we try to read a page from Disk, we will try to
9751 decrypt it using KEY1 whereas page was encrypted with KEY2. ERROR.
9752
9753 Therefore, for a general tablespace, if tablespace key is already populated
9754 it is the latest key and should be used instead of the one read during
9755 REDO log scan.
9756
9757 For file-per-table tablespace, which is not INPLACE algorithm, copy what
9758 is found on REDO Log.
9759 */
9760 if (fsp_is_file_per_table(space->id, space->flags) ||
9761 space->encryption_klen == 0) {
9762 err = fil_set_encryption(space->id, Encryption::AES, key.ptr, key.iv);
9763 }
9764
9765 if (err != DB_SUCCESS) {
9766 ib::error(ER_IB_MSG_343) << "Can't set encryption information"
9767 << " for tablespace" << space->name << "!";
9768 }
9769 }
9770 }
9771
9772 /** Modify table name in Innodb persistent stat tables, if needed. Required
9773 when partitioned table file names from old versions are modified to change
9774 the letter case.
9775 @param[in] old_path path to old file
9776 @param[in] new_path path to new file */
fil_adjust_partition_stat(const std::string & old_path,const std::string & new_path)9777 static void fil_adjust_partition_stat(const std::string &old_path,
9778 const std::string &new_path) {
9779 char errstr[FN_REFLEN];
9780 std::string path;
9781
9782 /* Skip if not IBD file extension. */
9783 if (!Fil_path::has_suffix(IBD, old_path) ||
9784 !Fil_path::has_suffix(IBD, new_path)) {
9785 return;
9786 }
9787
9788 /* Check if partitioned table. */
9789 if (!dict_name::is_partition(old_path) ||
9790 !dict_name::is_partition(new_path)) {
9791 return;
9792 }
9793
9794 std::string old_name;
9795 path.assign(old_path);
9796 if (!Fil_path::parse_file_path(path, IBD, old_name)) {
9797 return;
9798 }
9799 ut_ad(!old_name.empty());
9800
9801 std::string new_name;
9802 path.assign(new_path);
9803 if (!Fil_path::parse_file_path(path, IBD, new_name)) {
9804 return;
9805 }
9806 ut_ad(!new_name.empty());
9807
9808 /* Required for case insensitive file system where file path letter case
9809 doesn't matter. We need to keep the name in stat table consistent. */
9810 dict_name::rebuild(new_name);
9811
9812 if (old_name.compare(new_name) != 0) {
9813 dict_stats_rename_table(old_name.c_str(), new_name.c_str(), errstr,
9814 sizeof(errstr));
9815 }
9816 }
9817
9818 /** Update the DD if any files were moved to a new location.
9819 Free the Tablespace_files instance.
9820 @param[in] read_only_mode true if InnoDB is started in read only mode.
9821 @return DB_SUCCESS if all OK */
prepare_open_for_business(bool read_only_mode)9822 dberr_t Fil_system::prepare_open_for_business(bool read_only_mode) {
9823 if (read_only_mode && !m_moved.empty()) {
9824 ib::error(ER_IB_MSG_344)
9825 << m_moved.size() << " files have been relocated"
9826 << " and the server has been started in read"
9827 << " only mode. Cannot update the data dictionary.";
9828
9829 return (DB_READ_ONLY);
9830 }
9831
9832 trx_t *trx = check_trx_exists(current_thd);
9833
9834 TrxInInnoDB trx_in_innodb(trx);
9835
9836 /* The transaction should not be active yet, start it */
9837
9838 trx->isolation_level = trx_t::READ_UNCOMMITTED;
9839
9840 trx_start_if_not_started_xa(trx, false);
9841
9842 size_t count = 0;
9843 size_t failed = 0;
9844 size_t batch_size = 0;
9845 bool print_msg = false;
9846 auto start_time = ut_time_monotonic();
9847
9848 /* If some file paths have changed then update the DD */
9849 for (auto &tablespace : m_moved) {
9850 dberr_t err;
9851
9852 auto old_path = std::get<dd_fil::OLD_PATH>(tablespace);
9853
9854 auto space_name = std::get<dd_fil::SPACE_NAME>(tablespace);
9855
9856 auto new_path = std::get<dd_fil::NEW_PATH>(tablespace);
9857 auto object_id = std::get<dd_fil::OBJECT_ID>(tablespace);
9858
9859 /* We already have the space name in system cs. */
9860 err = dd_tablespace_rename(object_id, true, space_name.c_str(),
9861 new_path.c_str());
9862
9863 if (err != DB_SUCCESS) {
9864 ib::error(ER_IB_MSG_345) << "Unable to update tablespace ID"
9865 << " " << object_id << " "
9866 << " '" << old_path << "' to"
9867 << " '" << new_path << "'";
9868
9869 ++failed;
9870 }
9871
9872 /* Update persistent stat table if table name is modified. */
9873 fil_adjust_partition_stat(old_path, new_path);
9874
9875 ++count;
9876
9877 if (ut_time_monotonic() - start_time >= PRINT_INTERVAL_SECS) {
9878 ib::info(ER_IB_MSG_346) << "Processed " << count << "/" << m_moved.size()
9879 << " tablespace paths. Failures " << failed;
9880
9881 start_time = ut_time_monotonic();
9882 print_msg = true;
9883 }
9884
9885 ++batch_size;
9886
9887 if (batch_size > 10000) {
9888 innobase_commit_low(trx);
9889
9890 ib::info(ER_IB_MSG_347) << "Committed : " << batch_size;
9891
9892 batch_size = 0;
9893
9894 trx_start_if_not_started_xa(trx, false);
9895 }
9896 }
9897
9898 if (batch_size > 0) {
9899 ib::info(ER_IB_MSG_348) << "Committed : " << batch_size;
9900 }
9901
9902 innobase_commit_low(trx);
9903
9904 if (print_msg) {
9905 ib::info(ER_IB_MSG_349) << "Updated " << count << " tablespace paths"
9906 << ", failures " << failed;
9907 }
9908
9909 return (failed == 0 ? DB_SUCCESS : DB_ERROR);
9910 }
9911
9912 /** Free the Tablespace_files instance.
9913 @param[in] read_only_mode true if InnoDB is started in read only mode.
9914 @return DB_SUCCESS if all OK */
fil_open_for_business(bool read_only_mode)9915 dberr_t fil_open_for_business(bool read_only_mode) {
9916 return (fil_system->prepare_open_for_business(read_only_mode));
9917 }
9918
9919 /** Replay a file rename operation for ddl replay.
9920 @param[in] page_id Space ID and first page number in the file
9921 @param[in] old_name old file name
9922 @param[in] new_name new file name
9923 @return whether the operation was successfully applied (the name did not
9924 exist, or new_name did not exist and name was successfully renamed to
9925 new_name) */
fil_op_replay_rename_for_ddl(const page_id_t & page_id,const char * old_name,const char * new_name)9926 bool fil_op_replay_rename_for_ddl(const page_id_t &page_id,
9927 const char *old_name, const char *new_name) {
9928 space_id_t space_id = page_id.space();
9929 fil_space_t *space = fil_space_get(space_id);
9930
9931 if (space == nullptr && !fil_system->open_for_recovery(space_id)) {
9932 ib::info(ER_IB_MSG_350)
9933 << "Can not find space with space ID " << space_id
9934 << " when replaying the DDL log "
9935 << "rename from '" << old_name << "' to '" << new_name << "'";
9936
9937 return (true);
9938 }
9939
9940 return (fil_op_replay_rename(page_id, old_name, new_name));
9941 }
9942
9943 /** Lookup the tablespace ID for recovery and DDL log apply.
9944 @param[in] space_id Tablespace ID to lookup
9945 @return true if the space ID is known. */
lookup_for_recovery(space_id_t space_id)9946 bool Fil_system::lookup_for_recovery(space_id_t space_id) {
9947 ut_ad(recv_recovery_is_on() || Log_DDL::is_in_recovery());
9948
9949 /* Single threaded code, no need to acquire mutex. */
9950 const auto result = get_scanned_files(space_id);
9951
9952 if (recv_recovery_is_on()) {
9953 const auto &end = recv_sys->deleted.end();
9954 const auto &it = recv_sys->deleted.find(space_id);
9955
9956 if (result.second == nullptr) {
9957 /* If it wasn't deleted after finding it on disk then
9958 we tag it as missing. */
9959
9960 if (it == end) {
9961 recv_sys->missing_ids.insert(space_id);
9962 }
9963
9964 return (false);
9965 }
9966
9967 /* Check that it wasn't deleted. */
9968
9969 return (it == end);
9970 }
9971
9972 return (result.second != nullptr);
9973 }
9974
9975 /** Lookup the tablespace ID.
9976 @param[in] space_id Tablespace ID to lookup
9977 @return true if the space ID is known. */
fil_tablespace_lookup_for_recovery(space_id_t space_id)9978 bool fil_tablespace_lookup_for_recovery(space_id_t space_id) {
9979 return (fil_system->lookup_for_recovery(space_id));
9980 }
9981
9982 /** Open a tablespace that has a redo/DDL log record to apply.
9983 @param[in] space_id Tablespace ID
9984 @return true if the open was successful */
open_for_recovery(space_id_t space_id)9985 bool Fil_system::open_for_recovery(space_id_t space_id) {
9986 ut_ad(recv_recovery_is_on() || Log_DDL::is_in_recovery());
9987
9988 if (!lookup_for_recovery(space_id)) {
9989 return (false);
9990 }
9991
9992 const auto result = get_scanned_files(space_id);
9993
9994 /* Duplicates should have been sorted out before start of recovery. */
9995 ut_a(result.second->size() == 1);
9996
9997 const auto &filename = result.second->front();
9998 const std::string path = result.first + filename;
9999
10000 fil_space_t *space;
10001
10002 auto status = ibd_open_for_recovery(space_id, path, space);
10003
10004 if (status == FIL_LOAD_OK) {
10005 /* For encrypted tablespace, set key and iv. */
10006 if (FSP_FLAGS_GET_ENCRYPTION(space->flags) && recv_sys->keys != nullptr) {
10007 fil_tablespace_encryption_init(space);
10008 }
10009
10010 if (!recv_sys->dblwr->empty()) {
10011 recv_sys->dblwr->recover(space);
10012
10013 } else {
10014 ib::info(ER_IB_MSG_DBLWR_1317) << "DBLWR recovery skipped for "
10015 << space->name << " ID: " << space->id;
10016 }
10017
10018 return (true);
10019 } else if (status == FIL_LOAD_INVALID_ENCRYPTION_META) {
10020 ib::error() << "Invalid encryption metadata in tablespace header.";
10021 exit(EXIT_FAILURE);
10022 }
10023
10024 return (false);
10025 }
10026
10027 /** Open a tablespace that has a redo log record to apply.
10028 @param[in] space_id Tablespace ID
10029 @return true if the open was successful */
fil_tablespace_open_for_recovery(space_id_t space_id)10030 bool fil_tablespace_open_for_recovery(space_id_t space_id) {
10031 return (fil_system->open_for_recovery(space_id));
10032 }
10033
fil_tablespace_path_equals(dd::Object_id dd_object_id,space_id_t space_id,const char * space_name,ulint fsp_flags,std::string old_path,std::string * new_path)10034 Fil_state fil_tablespace_path_equals(dd::Object_id dd_object_id,
10035 space_id_t space_id,
10036 const char *space_name, ulint fsp_flags,
10037 std::string old_path,
10038 std::string *new_path) {
10039 ut_ad((fsp_is_ibd_tablespace(space_id) &&
10040 Fil_path::has_suffix(IBD, old_path)) ||
10041 fsp_is_undo_tablespace(space_id));
10042
10043 /* Watch out for implicit undo tablespaces that are created during startup.
10044 They will not be in the list of scanned files. But the DD might need to be
10045 updated if the undo directory is different now from when the database was
10046 initialized. The DD will be updated if we put it in fil_system->moved. */
10047 if (fsp_is_undo_tablespace(space_id)) {
10048 undo::spaces->s_lock();
10049 space_id_t space_num = undo::id2num(space_id);
10050 undo::Tablespace *undo_space = undo::spaces->find(space_num);
10051
10052 if (undo_space != nullptr && undo_space->is_new()) {
10053 *new_path = undo_space->file_name();
10054 Fil_state state = ((old_path.compare(*new_path) == 0) ? Fil_state::MATCHES
10055 : Fil_state::MOVED);
10056 undo::spaces->s_unlock();
10057 return (state);
10058 }
10059 undo::spaces->s_unlock();
10060 }
10061
10062 /* Single threaded code, no need to acquire mutex. */
10063 const auto &end = recv_sys->deleted.end();
10064 const auto &it = recv_sys->deleted.find(space_id);
10065 const auto result = fil_system->get_scanned_files(space_id);
10066
10067 if (result.second == nullptr) {
10068 /* The file was not scanned but the DD has the tablespace. Either;
10069 1. This file is missing
10070 2. The file could not be opened because of encryption or something else,
10071 3. The path is not included in --innodb-directories.
10072 We need to check if the DD path is valid before we tag the file
10073 as missing. */
10074
10075 if (Fil_path::get_file_type(old_path) == OS_FILE_TYPE_FILE) {
10076 /* This file from the DD exists where the DD thinks it is. It will be
10077 opened later. Make some noise if the location is unknown. */
10078 if (!fil_path_is_known(old_path)) {
10079 ib::warn(ER_IB_MSG_UNPROTECTED_LOCATION_ALLOWED, old_path.c_str(),
10080 space_name);
10081 }
10082 return (Fil_state::MATCHES);
10083 }
10084
10085 /* If it wasn't deleted during redo apply, we tag it as missing. */
10086
10087 if (it == end && recv_recovery_is_on()) {
10088 recv_sys->missing_ids.insert(space_id);
10089 }
10090
10091 return (Fil_state::MISSING);
10092 }
10093
10094 /* Check if it was deleted according to the redo log. */
10095 if (it != end) {
10096 return (Fil_state::DELETED);
10097 }
10098
10099 /* A file with this space_id was found during scanning.
10100 Validate its location and check if it was moved from where
10101 the DD thinks it is.
10102
10103 Don't compare the full filename, there can be a mismatch if
10104 there was a DDL in progress and we will end up renaming the path
10105 in the DD dictionary. Such renames should be handled by the
10106 atomic DDL "ddl_log". */
10107
10108 std::string old_dir{old_path};
10109
10110 /* Ignore the filename component of the old path. */
10111 auto pos = old_dir.find_last_of(Fil_path::SEPARATOR);
10112 if (pos == std::string::npos) {
10113 old_dir = MySQL_datadir_path;
10114 } else {
10115 old_dir.resize(pos + 1);
10116 ut_ad(Fil_path::is_separator(old_dir.back()));
10117 }
10118 old_dir = Fil_path::get_real_path(old_dir);
10119
10120 /* Build the new path from the scan path and the found path. */
10121 std::string new_dir{result.first};
10122
10123 ut_ad(Fil_path::is_separator(new_dir.back()));
10124
10125 new_dir.append(result.second->front());
10126
10127 new_dir = Fil_path::get_real_path(new_dir);
10128
10129 /* Do not use a datafile that is in the wrong place. */
10130 if (!Fil_path::is_valid_location(space_name, space_id, fsp_flags, new_dir)) {
10131 return (Fil_state::MISSING);
10132 }
10133
10134 /* Ignore the filename component of the new path. */
10135 pos = new_dir.find_last_of(Fil_path::SEPARATOR);
10136
10137 ut_ad(pos != std::string::npos);
10138
10139 new_dir.resize(pos + 1);
10140
10141 if (old_dir.compare(new_dir) != 0) {
10142 *new_path = result.first + result.second->front();
10143 return (Fil_state::MOVED);
10144 }
10145
10146 *new_path = old_path;
10147 return (Fil_state::MATCHES);
10148 }
10149
fil_add_moved_space(dd::Object_id dd_object_id,space_id_t space_id,const char * space_name,const std::string & old_path,const std::string & new_path)10150 void fil_add_moved_space(dd::Object_id dd_object_id, space_id_t space_id,
10151 const char *space_name, const std::string &old_path,
10152 const std::string &new_path) {
10153 /* Keep space_name in system cs. We handle it while modifying DD. */
10154 fil_system->moved(dd_object_id, space_id, space_name, old_path, new_path);
10155 }
10156
fil_update_partition_name(space_id_t space_id,uint32_t fsp_flags,bool update_space,std::string & space_name,std::string & dd_path)10157 bool fil_update_partition_name(space_id_t space_id, uint32_t fsp_flags,
10158 bool update_space, std::string &space_name,
10159 std::string &dd_path) {
10160 #ifdef _WIN32
10161 /* Safe check. Never needed on Windows for path. */
10162 if (!update_space) {
10163 return (false);
10164 }
10165 #endif /* WIN32 */
10166
10167 /* Never needed in case insensitive file system for path. */
10168 if (!update_space && lower_case_file_system) {
10169 return (false);
10170 }
10171
10172 /* Only needed for file per table. */
10173 if (update_space && !fsp_is_file_per_table(space_id, fsp_flags)) {
10174 return (false);
10175 }
10176
10177 /* Extract dictionary name schema_name/table_name from dd path. */
10178 std::string table_name;
10179
10180 if (!Fil_path::parse_file_path(dd_path, IBD, table_name)) {
10181 /* Not a valid file-per-table IBD path */
10182 return (false);
10183 }
10184 ut_ad(!table_name.empty());
10185
10186 /* Only needed for partition file. */
10187 if (!dict_name::is_partition(table_name)) {
10188 return (false);
10189 }
10190
10191 /* Rebuild dictionary name to convert partition names to lower case. */
10192 dict_name::rebuild(table_name);
10193
10194 if (update_space) {
10195 /* Rebuild space name if required. */
10196 dict_name::rebuild_space(table_name, space_name);
10197 }
10198
10199 /* No need to update file name for lower case file system. */
10200 if (lower_case_file_system) {
10201 return (false);
10202 }
10203
10204 /* Rebuild path and compare. */
10205 std::string table_path = Fil_path::make_new_path(dd_path, table_name, IBD);
10206 ut_ad(!table_path.empty());
10207
10208 if (dd_path.compare(table_path) != 0) {
10209 /* Validate that the file exists. */
10210 if (os_file_exists(table_path.c_str())) {
10211 dd_path.assign(table_path);
10212 return (true);
10213
10214 } else {
10215 ib::warn(ER_IB_WARN_OPEN_PARTITION_FILE, table_path.c_str());
10216 }
10217 }
10218
10219 return (false);
10220 }
10221
10222 #endif /* !UNIV_HOTBACKUP */
10223
10224 /** This function should be called after recovery has completed.
10225 Check for tablespace files for which we did not see any MLOG_FILE_DELETE
10226 or MLOG_FILE_RENAME record. These could not be recovered.
10227 @return true if there were some filenames missing for which we had to
10228 ignore redo log records during the apply phase */
check_missing_tablespaces()10229 bool Fil_system::check_missing_tablespaces() {
10230 bool missing = false;
10231 const auto end = recv_sys->deleted.end();
10232
10233 /* Called in single threaded mode, no need to acquire the mutex. */
10234
10235 recv_sys->dblwr->check_missing_tablespaces();
10236
10237 for (auto space_id : recv_sys->missing_ids) {
10238 if (recv_sys->deleted.find(space_id) != end) {
10239 continue;
10240 }
10241
10242 const auto result = get_scanned_files(space_id);
10243
10244 if (result.second == nullptr) {
10245 if (fsp_is_undo_tablespace(space_id)) {
10246 /* This could happen if an undo truncate is in progress because
10247 undo tablespace construction is not redo logged. The DD is updated
10248 at the end and may be out of sync. */
10249 continue;
10250 }
10251
10252 ib::error(ER_IB_MSG_354) << "Could not find any file associated with"
10253 << " the tablespace ID: " << space_id;
10254 missing = true;
10255
10256 } else {
10257 ut_a(!result.second->empty());
10258 }
10259 }
10260
10261 return (missing);
10262 }
10263
10264 /** This function should be called after recovery has completed.
10265 Check for tablespace files for which we did not see any MLOG_FILE_DELETE
10266 or MLOG_FILE_RENAME record. These could not be recovered
10267 @return true if there were some filenames missing for which we had to
10268 ignore redo log records during the apply phase */
fil_check_missing_tablespaces()10269 bool fil_check_missing_tablespaces() {
10270 return (fil_system->check_missing_tablespaces());
10271 }
10272
10273 /** Parse a file name retrieved from a MLOG_FILE_* record,
10274 and return the absolute file path and tablespace name
10275 @param[in] file_name path emitted by the redo log
10276 @param[in] flags tablespace flags emitted by the redo log
10277 @param[in] space_id tablesapce ID emitted by the redo log
10278 @param[out] absolute_path absolute path of tablespace
10279 @param[out] tablespace_name name in the form of database/table */
fil_make_abs_file_path(const char * file_name,ulint flags,space_id_t space_id,std::string & absolute_path,std::string & tablespace_name)10280 static void fil_make_abs_file_path(const char *file_name, ulint flags,
10281 space_id_t space_id,
10282 std::string &absolute_path,
10283 std::string &tablespace_name) {
10284 Datafile df;
10285
10286 df.set_filepath(file_name);
10287 df.set_flags(flags);
10288 df.set_space_id(space_id);
10289 df.set_name(nullptr);
10290
10291 absolute_path = df.filepath();
10292 tablespace_name = df.name();
10293 }
10294
10295 /** Redo a tablespace create.
10296 @param[in] ptr redo log record
10297 @param[in] end end of the redo log buffer
10298 @param[in] page_id Tablespace Id and first page in file
10299 @param[in] parsed_bytes Number of bytes parsed so far
10300 @param[in] parse_only Don't apply, parse only
10301 @return pointer to next redo log record
10302 @retval nullptr if this log record was truncated */
fil_tablespace_redo_create(byte * ptr,const byte * end,const page_id_t & page_id,ulint parsed_bytes,bool parse_only)10303 byte *fil_tablespace_redo_create(byte *ptr, const byte *end,
10304 const page_id_t &page_id, ulint parsed_bytes,
10305 bool parse_only) {
10306 ut_a(page_id.page_no() == 0);
10307
10308 /* We never recreate the system tablespace. */
10309 ut_a(page_id.space() != TRX_SYS_SPACE);
10310
10311 ut_a(parsed_bytes != ULINT_UNDEFINED);
10312
10313 /* Where 6 = flags (uint32_t) + name len (uint16_t). */
10314 if (end <= ptr + 6) {
10315 return (nullptr);
10316 }
10317
10318 #if defined(UNIV_HOTBACKUP) || defined(XTRABACKUP)
10319 uint32_t flags = mach_read_from_4(ptr);
10320 #else
10321 /* Skip the flags, not used here. */
10322 #endif /* UNIV_HOTBACKUP || XTRABACKUP */
10323
10324 ptr += 4;
10325
10326 ulint len = mach_read_from_2(ptr);
10327
10328 ptr += 2;
10329
10330 /* Do we have the full/valid file name. */
10331 if (end < ptr + len || len < 5) {
10332 if (len < 5) {
10333 char name[6];
10334
10335 snprintf(name, sizeof(name), "%.*s", (int)len, ptr);
10336
10337 ib::error(ER_IB_MSG_355) << "MLOG_FILE_CREATE : Invalid file name."
10338 << " Length (" << len << ") must be >= 5"
10339 << " and end in '.ibd'. File name in the"
10340 << " redo log is '" << name << "'";
10341
10342 recv_sys->found_corrupt_log = true;
10343 }
10344
10345 return (nullptr);
10346 }
10347
10348 char *name = reinterpret_cast<char *>(ptr);
10349
10350 Fil_path::normalize(name);
10351
10352 ptr += len;
10353
10354 if (!(Fil_path::has_suffix(IBD, name) ||
10355 fsp_is_undo_tablespace(page_id.space()))) {
10356 recv_sys->found_corrupt_log = true;
10357
10358 return (nullptr);
10359 }
10360
10361 if (parse_only) {
10362 return (ptr);
10363 }
10364 #ifdef UNIV_HOTBACKUP
10365
10366 meb_tablespace_redo_create(page_id, flags, name);
10367
10368 #else /* !UNIV_HOTBACKUP */
10369
10370 const auto files = fil_system->get_scanned_files(page_id.space());
10371
10372 std::string abs_file_path;
10373 std::string tablespace_name;
10374
10375 fil_make_abs_file_path(name, flags, page_id.space(), abs_file_path,
10376 tablespace_name);
10377
10378 if (!srv_backup_mode &&
10379 (files.second == nullptr || files.second->size() == 0)) {
10380 abs_file_path = xb_tablespace_backup_file_path(abs_file_path.c_str());
10381 bool exists = Fil_path(abs_file_path).is_file_and_exists();
10382
10383 if (!exists && !fil_space_get(page_id.space())) {
10384 ib::info() << "Creating the tablespace : " << abs_file_path
10385 << ", space_id : " << page_id.space();
10386
10387 dberr_t ret = fil_ibd_create(page_id.space(), tablespace_name.c_str(),
10388 abs_file_path.c_str(), flags,
10389 FIL_IBD_FILE_INITIAL_SIZE);
10390
10391 if (ret != DB_SUCCESS) {
10392 ib::fatal() << "Could not create the tablespace : " << abs_file_path
10393 << " with space Id : " << page_id.space();
10394 }
10395
10396 bool success = fil_system->insert(page_id.space(), abs_file_path);
10397
10398 if (!success) {
10399 ib::fatal() << "Could not insert the tablespace : " << abs_file_path
10400 << " with space Id : " << page_id.space() << " to "
10401 << "the list of known tablespaces";
10402 }
10403 }
10404 }
10405
10406 if (srv_backup_mode) {
10407 xb_tablespace_map_add(abs_file_path.c_str(), tablespace_name.c_str());
10408 }
10409
10410 const auto result = fil_system->get_scanned_files(page_id.space());
10411
10412 if (result.second == nullptr) {
10413 /* No file maps to this tablespace ID. It's possible that
10414 the file was deleted later or is misisng. */
10415
10416 return (ptr);
10417 }
10418
10419 /* Duplicates should have been sorted out before we get here. */
10420 ut_a(result.second->size() == 1);
10421
10422 /* It's possible that the tablespace file was renamed later. */
10423 if (result.second->front().compare(abs_file_path) == 0) {
10424 bool success;
10425
10426 success = fil_tablespace_open_for_recovery(page_id.space());
10427
10428 if (!success) {
10429 ib::info(ER_IB_MSG_356) << "Create '" << abs_file_path << "' failed!";
10430 }
10431 }
10432 #endif /* UNIV_HOTBACKUP */
10433
10434 return (ptr);
10435 }
10436
10437 /** Redo a tablespace rename.
10438 This function doesn't do anything, simply parses the redo log record.
10439 @param[in] ptr redo log record
10440 @param[in] end end of the redo log buffer
10441 @param[in] page_id Tablespace Id and first page in file
10442 @param[in] parsed_bytes Number of bytes parsed so far
10443 @param[in] parse_only Don't apply, parse only
10444 @return pointer to next redo log record
10445 @retval nullptr if this log record was truncated */
fil_tablespace_redo_rename(byte * ptr,const byte * end,const page_id_t & page_id,ulint parsed_bytes,bool parse_only)10446 byte *fil_tablespace_redo_rename(byte *ptr, const byte *end,
10447 const page_id_t &page_id, ulint parsed_bytes,
10448 bool parse_only) {
10449 ut_a(page_id.page_no() == 0);
10450
10451 /* We never recreate the system tablespace. */
10452 ut_a(page_id.space() != TRX_SYS_SPACE);
10453
10454 ut_a(parsed_bytes != ULINT_UNDEFINED);
10455
10456 /* Where 2 = from name len (uint16_t). */
10457 if (end <= ptr + 2) {
10458 return (nullptr);
10459 }
10460
10461 /* Read and check the RENAME FROM_NAME. */
10462 ulint from_len = mach_read_from_2(ptr);
10463 ptr += 2;
10464 char *from_name = reinterpret_cast<char *>(ptr);
10465
10466 /* Check if the 'from' file name is valid. */
10467 if (end < ptr + from_len) {
10468 return (nullptr);
10469 }
10470
10471 std::string whats_wrong;
10472 constexpr char more_than_five[] = "The length must be >= 5.";
10473 constexpr char end_with_ibd[] = "The file suffix must be '.ibd'.";
10474 if (from_len < 5) {
10475 recv_sys->found_corrupt_log = true;
10476 whats_wrong.assign(more_than_five);
10477 } else {
10478 std::string name{from_name};
10479
10480 if (!Fil_path::has_suffix(IBD, name)) {
10481 recv_sys->found_corrupt_log = true;
10482 whats_wrong.assign(end_with_ibd);
10483 }
10484 }
10485
10486 if (recv_sys->found_corrupt_log) {
10487 ib::info(ER_IB_MSG_357) << "MLOG_FILE_RENAME: Invalid {from} file name: '"
10488 << from_name << "'. " << whats_wrong;
10489
10490 return (nullptr);
10491 }
10492
10493 ptr += from_len;
10494 Fil_path::normalize(from_name);
10495
10496 /* Read and check the RENAME TO_NAME. */
10497 ulint to_len = mach_read_from_2(ptr);
10498 ptr += 2;
10499 char *to_name = reinterpret_cast<char *>(ptr);
10500
10501 /* Check if the 'to' file name is valid. */
10502 if (end < ptr + to_len) {
10503 return (nullptr);
10504 }
10505
10506 if (to_len < 5) {
10507 recv_sys->found_corrupt_log = true;
10508 whats_wrong.assign(more_than_five);
10509 } else {
10510 std::string name{to_name};
10511
10512 if (!Fil_path::has_suffix(IBD, name)) {
10513 recv_sys->found_corrupt_log = true;
10514 whats_wrong.assign(end_with_ibd);
10515 }
10516 }
10517
10518 if (recv_sys->found_corrupt_log) {
10519 ib::info(ER_IB_MSG_357) << "MLOG_FILE_RENAME: Invalid {to} file name: '"
10520 << to_name << "'. " << whats_wrong;
10521
10522 return (nullptr);
10523 }
10524
10525 ptr += to_len;
10526 Fil_path::normalize(to_name);
10527
10528 #ifdef UNIV_HOTBACKUP
10529
10530 if (!parse_only) {
10531 meb_tablespace_redo_rename(page_id, from_name, to_name);
10532 }
10533
10534 #else /* !UNIV_HOTBACKUP */
10535
10536 /* Update filename with correct partition case, if needed. */
10537 std::string to_name_str(to_name);
10538 std::string space_name;
10539 fil_update_partition_name(page_id.space(), 0, false, space_name, to_name_str);
10540
10541 if (from_len == to_len && strncmp(to_name, from_name, to_len) == 0) {
10542 ib::error(ER_IB_MSG_360)
10543 << "MLOG_FILE_RENAME: The from and to name are the"
10544 << " same: '" << from_name << "', '" << to_name << "'";
10545
10546 recv_sys->found_corrupt_log = true;
10547
10548 return (nullptr);
10549 }
10550
10551 #endif /* UNIV_HOTBACKUP */
10552
10553 if (!srv_backup_mode) {
10554 bool success;
10555
10556 success = fil_tablespace_open_for_recovery(page_id.space());
10557
10558 if (!success) {
10559 ib::info() << "Rename failed. Cannot find '" << from_name << "'!";
10560 return (ptr);
10561 }
10562
10563 fil_space_t *space = fil_space_get(page_id.space());
10564
10565 ut_a(space != nullptr);
10566
10567 xb_tablespace_map_delete(space->name);
10568 std::string abs_file_path;
10569 std::string tablespace_name;
10570
10571 fil_make_abs_file_path(to_name, space->flags, space->id, abs_file_path,
10572 tablespace_name);
10573
10574 success = fil_op_replay_rename(page_id, from_name, to_name);
10575 ut_a(success);
10576
10577 xb_tablespace_map_add(abs_file_path.c_str(), tablespace_name.c_str());
10578
10579 fil_space_free(page_id.space(), false);
10580
10581 success = fil_system->erase_path(page_id.space());
10582 ut_a(success);
10583
10584 success = fil_system->insert(page_id.space(), to_name);
10585
10586 if (!success) {
10587 ib::fatal() << "Could not insert the tablespace : " << to_name
10588 << " with space Id : " << page_id.space() << " to "
10589 << "the list of known tablespaces";
10590 }
10591 }
10592
10593 return (ptr);
10594 }
10595
10596 /** Redo a tablespace delete.
10597 @param[in] ptr redo log record
10598 @param[in] end end of the redo log buffer
10599 @param[in] page_id Tablespace Id and first page in file
10600 @param[in] parsed_bytes Number of bytes parsed so far
10601 @param[in] parse_only Don't apply, parse only
10602 @return pointer to next redo log record
10603 @retval nullptr if this log record was truncated */
fil_tablespace_redo_delete(byte * ptr,const byte * end,const page_id_t & page_id,ulint parsed_bytes,bool parse_only)10604 byte *fil_tablespace_redo_delete(byte *ptr, const byte *end,
10605 const page_id_t &page_id, ulint parsed_bytes,
10606 bool parse_only) {
10607 ut_a(page_id.page_no() == 0);
10608
10609 /* We never recreate the system tablespace. */
10610 ut_a(page_id.space() != TRX_SYS_SPACE);
10611
10612 ut_a(parsed_bytes != ULINT_UNDEFINED);
10613
10614 /* Where 2 = len (uint16_t). */
10615 if (end <= ptr + 2) {
10616 return (nullptr);
10617 }
10618
10619 ulint len = mach_read_from_2(ptr);
10620
10621 ptr += 2;
10622
10623 /* Do we have the full/valid file name. */
10624 if (end < ptr + len || len < 5) {
10625 if (len < 5) {
10626 char name[6];
10627
10628 snprintf(name, sizeof(name), "%.*s", (int)len, ptr);
10629
10630 ib::error(ER_IB_MSG_362) << "MLOG_FILE_DELETE : Invalid file name."
10631 << " Length (" << len << ") must be >= 5"
10632 << " and end in '.ibd'. File name in the"
10633 << " redo log is '" << name << "'";
10634 }
10635
10636 return (nullptr);
10637 }
10638
10639 char *name = reinterpret_cast<char *>(ptr);
10640
10641 Fil_path::normalize(name);
10642
10643 ptr += len;
10644
10645 if (!(Fil_path::has_suffix(IBD, name) ||
10646 fsp_is_undo_tablespace(page_id.space()))) {
10647 recv_sys->found_corrupt_log = true;
10648
10649 return (nullptr);
10650 }
10651
10652 if (parse_only) {
10653 return (ptr);
10654 }
10655 #ifdef UNIV_HOTBACKUP
10656
10657 meb_tablespace_redo_delete(page_id, name);
10658
10659 #else /* !UNIV_HOTBACKUP */
10660
10661 if (!srv_backup_mode) {
10662 bool success;
10663
10664 success = fil_tablespace_open_for_recovery(page_id.space());
10665
10666 if (!success) {
10667 ib::info(ER_IB_MSG_356) << "Delete '" << name << "' failed!";
10668 return (ptr);
10669 }
10670
10671 fil_space_t *space = fil_space_get(page_id.space());
10672
10673 if (space != nullptr) {
10674 xb_tablespace_map_delete(space->name);
10675
10676 dberr_t err =
10677 fil_delete_tablespace(page_id.space(), BUF_REMOVE_FLUSH_NO_WRITE);
10678
10679 ut_a(err == DB_SUCCESS);
10680 }
10681 }
10682
10683 const auto result = fil_system->get_scanned_files(page_id.space());
10684
10685 recv_sys->deleted.insert(page_id.space());
10686 recv_sys->missing_ids.erase(page_id.space());
10687
10688 if (result.second == nullptr) {
10689 /* No files map to this tablespace ID. The drop must
10690 have succeeded. */
10691
10692 return (ptr);
10693 }
10694
10695 /* Space_id_set should have been sorted out before we get here. */
10696
10697 ut_a(result.second->size() == 1);
10698
10699 /* Update filename with correct partition case, if needed. */
10700 std::string name_str(name);
10701 std::string space_name;
10702 fil_update_partition_name(page_id.space(), 0, false, space_name, name_str);
10703
10704 fil_space_free(page_id.space(), false);
10705
10706 bool success = fil_system->erase_path(page_id.space());
10707 ut_a(success);
10708 #endif /* UNIV_HOTBACKUP */
10709
10710 return (ptr);
10711 }
10712
10713 /** Parse and process an encryption redo record.
10714 @param[in] ptr redo log record
10715 @param[in] end end of the redo log buffer
10716 @param[in] space_id the tablespace ID
10717 @return log record end, nullptr if not a complete record */
fil_tablespace_redo_encryption(byte * ptr,const byte * end,space_id_t space_id)10718 byte *fil_tablespace_redo_encryption(byte *ptr, const byte *end,
10719 space_id_t space_id) {
10720 byte *iv = nullptr;
10721 byte *key = nullptr;
10722 bool is_new = false;
10723
10724 #ifdef UNIV_DEBUG
10725 bool is_allocated = false;
10726 #endif
10727
10728 fil_space_t *space = fil_space_get(space_id);
10729
10730 /* An undo space might be open but not have the ENCRYPTION bit set
10731 in its header if the current value of innodb_undo_log_encrypt=OFF
10732 and a crash occurred between flushing this redo record and the header
10733 page of the undo space. So if the flag is missing, ignore the header
10734 page. */
10735 if (fsp_is_undo_tablespace(space_id) && space != nullptr &&
10736 !FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
10737 space = nullptr;
10738 }
10739
10740 if (space == nullptr) {
10741 if (recv_sys->keys == nullptr) {
10742 recv_sys->keys = UT_NEW_NOKEY(recv_sys_t::Encryption_Keys());
10743 }
10744
10745 for (auto &recv_key : *recv_sys->keys) {
10746 if (recv_key.space_id == space_id) {
10747 iv = recv_key.iv;
10748 key = recv_key.ptr;
10749 }
10750 }
10751
10752 #ifdef UNIV_DEBUG
10753 if (key != nullptr) {
10754 DBUG_EXECUTE_IF(
10755 "dont_update_key_found_during_REDO_scan", is_allocated = true;
10756 key = static_cast<byte *>(ut_malloc_nokey(Encryption::KEY_LEN));
10757 iv = static_cast<byte *>(ut_malloc_nokey(Encryption::KEY_LEN)););
10758 }
10759 #endif
10760
10761 if (key == nullptr) {
10762 key = static_cast<byte *>(ut_malloc_nokey(Encryption::KEY_LEN));
10763
10764 iv = static_cast<byte *>(ut_malloc_nokey(Encryption::KEY_LEN));
10765
10766 is_new = true;
10767 }
10768
10769 } else {
10770 iv = space->encryption_iv;
10771 key = space->encryption_key;
10772 }
10773
10774 ulint offset;
10775
10776 offset = mach_read_from_2(ptr);
10777 ptr += 2;
10778
10779 ulint len;
10780
10781 len = mach_read_from_2(ptr);
10782 ptr += 2;
10783
10784 if (end < ptr + len) {
10785 if (is_new) {
10786 ut_free(key);
10787 ut_free(iv);
10788 }
10789 return (nullptr);
10790 }
10791
10792 if (offset >= UNIV_PAGE_SIZE || len + offset > UNIV_PAGE_SIZE ||
10793 len != Encryption::INFO_SIZE) {
10794 recv_sys->found_corrupt_log = true;
10795 if (is_new) {
10796 ut_free(key);
10797 ut_free(iv);
10798 }
10799 return (nullptr);
10800 }
10801
10802 if (srv_backup_mode || !use_dumped_tablespace_keys) {
10803 if (!Encryption::decode_encryption_info(key, iv, ptr, true)) {
10804 if (is_new) {
10805 ut_free(key);
10806 ut_free(iv);
10807 }
10808 if (!srv_backup_mode) {
10809 ib::error() << "Cannot decode encryption information in the redo log.";
10810 exit(EXIT_FAILURE);
10811 }
10812 return (ptr + len);
10813 }
10814 } else {
10815 ulint master_key_id = mach_read_from_4(ptr + Encryption::MAGIC_SIZE);
10816 if (Encryption::get_master_key_id() < master_key_id) {
10817 Encryption::set_master_key(master_key_id);
10818 }
10819 bool found = xb_fetch_tablespace_key(space_id, key, iv);
10820 ut_a(found);
10821 }
10822
10823 ut_ad(len == Encryption::INFO_SIZE);
10824
10825 ptr += len;
10826
10827 if (space == nullptr) {
10828 if (is_new) {
10829 recv_sys_t::Encryption_Key new_key;
10830
10831 new_key.iv = iv;
10832 new_key.ptr = key;
10833 new_key.space_id = space_id;
10834
10835 recv_sys->keys->push_back(new_key);
10836 }
10837 } else {
10838 if (FSP_FLAGS_GET_ENCRYPTION(space->flags) ||
10839 space->encryption_op_in_progress == ENCRYPTION) {
10840 space->encryption_type = Encryption::AES;
10841 space->encryption_klen = Encryption::KEY_LEN;
10842 }
10843 }
10844
10845 #ifdef UNIV_DEBUG
10846 if (is_allocated) {
10847 DBUG_EXECUTE_IF("dont_update_key_found_during_REDO_scan", ut_free(key);
10848 ut_free(iv););
10849 }
10850 #endif
10851
10852 return (ptr);
10853 }
10854
warn_ignore(std::string ignore_path,const char * reason)10855 void Tablespace_dirs::warn_ignore(std::string ignore_path, const char *reason) {
10856 ib::warn(ER_IB_MSG_IGNORE_SCAN_PATH, ignore_path.c_str(), reason);
10857 }
10858
add_path(const std::string & path_in,bool is_undo_dir)10859 void Tablespace_dirs::add_path(const std::string &path_in, bool is_undo_dir) {
10860 /* Ignore an invalid path. */
10861 if (path_in == "") {
10862 return;
10863 }
10864 if (path_in == "/") {
10865 warn_ignore(path_in,
10866 "the root directory '/' is not allowed to be scanned.");
10867 return;
10868 }
10869 if (std::string::npos != path_in.find('*')) {
10870 warn_ignore(path_in, "it contains '*'.");
10871 return;
10872 }
10873
10874 /* Assume this path is a directory and put a trailing slash on it. */
10875 std::string dir_in(path_in);
10876 Fil_path::append_separator(dir_in);
10877
10878 Fil_path found_path(dir_in, true);
10879
10880 /* Exclude this path if it is a duplicate of a path already stored or
10881 if a previously stored path is an ancestor. Remove any previously stored
10882 path that is a descendant of this path. */
10883 for (auto it = m_dirs.cbegin(); it != m_dirs.cend(); /* No op */) {
10884 if (it->root().is_same_as(found_path)) {
10885 /* The exact same path is obviously ignored, so there is no need to
10886 log a warning. */
10887 return;
10888 }
10889
10890 /* Check if dir_abs_path is an ancestor of this path */
10891 if (it->root().is_ancestor(found_path)) {
10892 /* Descendant directories will be scanned recursively, so don't
10893 add it to the scan list. Log a warning unless this descendant
10894 is the undo directory since it must be supplied even if it is
10895 a descendant of another data location. */
10896 if (!is_undo_dir) {
10897 std::string reason = "it is a sub-directory of '";
10898 reason += it->root().abs_path();
10899 warn_ignore(path_in, reason.c_str());
10900 }
10901 return;
10902 }
10903
10904 if (found_path.is_ancestor(it->root())) {
10905 /* This path is an ancestor of an existing dir in fil_system::m_dirs.
10906 The settings have overlapping locations. Put a note about it to
10907 the error log. The undo_dir is added last, so if it is an ancestor,
10908 the descendant was listed as a datafile directory. So always issue
10909 this message*/
10910 std::string reason = "it is a sub-directory of '";
10911 reason += found_path;
10912 warn_ignore(it->root().path(), reason.c_str());
10913
10914 /* It might also be an ancestor to another dir as well, so keep looking.
10915 We must delete this descendant because we know that this ancestor path
10916 will be inserted and all its descendants will be scanned. */
10917 it = m_dirs.erase(it);
10918 } else {
10919 it++;
10920 }
10921 }
10922
10923 m_dirs.push_back(Tablespace_files{found_path.path()});
10924 return;
10925 }
10926
add_paths(const std::string & str,const std::string & delimiters)10927 void Tablespace_dirs::add_paths(const std::string &str,
10928 const std::string &delimiters) {
10929 std::string::size_type start = 0;
10930 std::string::size_type end = 0;
10931
10932 /* Scan until 'start' reaches the end of the string (npos) */
10933 for (;;) {
10934 start = str.find_first_not_of(delimiters, end);
10935 if (std::string::npos == start) {
10936 break;
10937 }
10938
10939 end = str.find_first_of(delimiters, start);
10940
10941 const auto path = str.substr(start, end - start);
10942
10943 add_path(path);
10944 }
10945 }
10946
10947 /** Check whether we can rename the file
10948 @param[in] space Tablespace for which to rename
10949 @param[in] name Source file name
10950 @param[in] df Target file that exists on disk
10951 @return DB_SUCCESS if all OK */
fil_rename_validate(fil_space_t * space,const std::string & name,Datafile & df)10952 static dberr_t fil_rename_validate(fil_space_t *space, const std::string &name,
10953 Datafile &df) {
10954 dberr_t err = df.validate_for_recovery(space->id);
10955
10956 if (err == DB_TABLESPACE_NOT_FOUND) {
10957 /* Tablespace header doesn't contain the expected
10958 tablespace ID. This is can happen during truncate. */
10959
10960 return (err);
10961
10962 } else if (err != DB_SUCCESS) {
10963 ib::warn(ER_IB_MSG_367) << "Failed to read the first page of the"
10964 << " file '" << df.filepath() << "'."
10965 << " You will need to verify and move the"
10966 << " file out of the way retry recovery.";
10967
10968 return (err);
10969 }
10970
10971 auto file = &space->files.front();
10972
10973 if (strcmp(df.filepath(), file->name) == 0) {
10974 /* Check if already points to the correct file.
10975 Must have the same space ID */
10976
10977 ib::info(ER_IB_MSG_368) << "Tablespace ID already maps to: '"
10978 << df.filepath() << "', rename ignored.";
10979
10980 ut_a(df.space_id() == space->id);
10981
10982 return (DB_SUCCESS);
10983
10984 } else if (df.space_id() != space->id) {
10985 /* Target file exists on disk but has a different
10986 tablespce ID. The user should manually delete it. */
10987
10988 ib::error(ER_IB_MSG_369)
10989 << "Cannot rename '" << name << "' to '" << df.filepath() << "'. File '"
10990 << df.filepath() << "' tablespace ID " << df.space_id()
10991 << " doesn't match the expected tablespace"
10992 << " ID " << space->id << ". You will need to verify and move '"
10993 << df.filepath() << "' manually and retry recovery!";
10994
10995 return (DB_ERROR);
10996 }
10997
10998 /* Target file exists on disk and has the same ID. */
10999
11000 ib::error(ER_IB_MSG_370)
11001 << "Cannot rename '" << name << "' to '" << df.filepath()
11002 << "'. The File '" << df.filepath() << " already exists on"
11003 << " disk. You will need to verify and move either file"
11004 << " manually and retry recovery!";
11005
11006 return (DB_ERROR);
11007 }
11008
11009 /** Replay a file rename operation if possible.
11010 @param[in] page_id Space ID and first page number in the file
11011 @param[in] old_name old file name
11012 @param[in] new_name new file name
11013 @return whether the operation was successfully applied (the name did not exist,
11014 or new_name did not exist and name was successfully renamed to new_name) */
fil_op_replay_rename(const page_id_t & page_id,const std::string & old_name,const std::string & new_name)11015 static bool fil_op_replay_rename(const page_id_t &page_id,
11016 const std::string &old_name,
11017 const std::string &new_name) {
11018 ut_ad(page_id.page_no() == 0);
11019 ut_ad(old_name.compare(new_name) != 0);
11020 ut_ad(Fil_path::has_suffix(IBD, new_name));
11021 ut_ad(page_id.space() != TRX_SYS_SPACE);
11022
11023 /* In order to replay the rename, the following must hold:
11024 1. The new name is not already used.
11025 2. A tablespace exists with the old name.
11026 3. The space ID for that tablepace matches this log entry.
11027 This will prevent unintended renames during recovery. */
11028
11029 space_id_t space_id = page_id.space();
11030 fil_space_t *space = fil_space_get(space_id);
11031
11032 if (space == nullptr) {
11033 return (true);
11034 }
11035
11036 Datafile df;
11037 std::string name{new_name};
11038
11039 df.set_filepath(name.c_str());
11040
11041 if (df.open_read_only(false) == DB_SUCCESS) {
11042 dberr_t err = fil_rename_validate(space, old_name, df);
11043
11044 if (err == DB_TABLESPACE_NOT_FOUND) {
11045 /* This can happend during truncate. */
11046 ib::info(ER_IB_MSG_371) << "Tablespace ID mismatch in '" << name << "'";
11047 }
11048
11049 if (err == DB_WRONG_FILE_NAME) {
11050 df.close();
11051 os_file_delete(innodb_data_file_key, df.filepath());
11052 bool success = fil_system->erase_path(df.space_id());
11053 ut_a(success);
11054 } else {
11055 df.close();
11056 return (err == DB_SUCCESS);
11057 }
11058 }
11059
11060 auto path_sep_pos = name.find_last_of(Fil_path::SEPARATOR);
11061
11062 ut_a(path_sep_pos != std::string::npos);
11063
11064 /* Create the database directory for the new name, if
11065 it does not exist yet */
11066
11067 name.resize(path_sep_pos);
11068
11069 bool success = os_file_create_directory(name.c_str(), false);
11070 ut_a(success);
11071
11072 auto datadir_pos = name.find_last_of(Fil_path::SEPARATOR);
11073
11074 ut_ad(datadir_pos != std::string::npos);
11075
11076 name.erase(0, datadir_pos + 1);
11077
11078 ut_ad(!Fil_path::is_separator(name.back()));
11079
11080 /* schema/table separator is always a '/'. */
11081 name.push_back('/');
11082
11083 /* Strip the '.ibd' suffix. */
11084 name.append(new_name.begin() + path_sep_pos + 1, new_name.end() - 4);
11085
11086 ut_ad(!Fil_path::has_suffix(IBD, name));
11087
11088 clone_mark_abort(true);
11089
11090 const auto ptr = name.c_str();
11091
11092 dberr_t err =
11093 fil_rename_tablespace(space_id, old_name.c_str(), ptr, new_name.c_str());
11094
11095 /* Stop recovery if this does not succeed. */
11096 ut_a(err == DB_SUCCESS);
11097
11098 clone_mark_active();
11099
11100 return (true);
11101 }
11102
11103 /** Get the tablespace ID from an .ibd and/or an undo tablespace. If the ID
11104 is == 0 on the first page then try finding the ID with
11105 Datafile::find_space_id().
11106 @param[in] filename File name to check
11107 @return s_invalid_space_id if not found, otherwise the space ID */
get_tablespace_id(const std::string & filename)11108 space_id_t Fil_system::get_tablespace_id(const std::string &filename) {
11109 FILE *fp = fopen(filename.c_str(), "rb");
11110
11111 if (fp == nullptr) {
11112 ib::warn(ER_IB_MSG_372) << "Unable to open '" << filename << "'";
11113 return (dict_sys_t::s_invalid_space_id);
11114 }
11115
11116 std::vector<space_id_t> space_ids;
11117 auto page_size = srv_page_size;
11118
11119 space_ids.reserve(MAX_PAGES_TO_READ);
11120
11121 const auto n_bytes = page_size * MAX_PAGES_TO_READ;
11122
11123 std::unique_ptr<byte[]> buf(new byte[n_bytes]);
11124
11125 if (!buf) {
11126 return dict_sys_t::s_invalid_space_id;
11127 }
11128
11129 auto pages_read = fread(buf.get(), page_size, MAX_PAGES_TO_READ, fp);
11130
11131 DBUG_EXECUTE_IF("invalid_header", pages_read = 0;);
11132
11133 /* Find the space id from the pages read if enough pages could be read.
11134 Fall back to the more heavier method of finding the space id from
11135 Datafile::find_space_id() if pages cannot be read properly. */
11136 if (pages_read >= MAX_PAGES_TO_READ) {
11137 auto bytes_read = pages_read * page_size;
11138
11139 #ifdef POSIX_FADV_DONTNEED
11140 posix_fadvise(fileno(fp), 0, bytes_read, POSIX_FADV_DONTNEED);
11141 #endif /* POSIX_FADV_DONTNEED */
11142
11143 for (page_no_t i = 0; i < MAX_PAGES_TO_READ; ++i) {
11144 const auto off = i * page_size + FIL_PAGE_SPACE_ID;
11145
11146 if (off == FIL_PAGE_SPACE_ID) {
11147 /* Find out the page size of the tablespace from the first page.
11148 In case of compressed pages, the subsequent pages can be of different
11149 sizes. If MAX_PAGES_TO_READ is changed to a different value, then the
11150 page size of subsequent pages is needed to find out the offset for
11151 space ID. */
11152
11153 auto space_flags_offset = FSP_HEADER_OFFSET + FSP_SPACE_FLAGS;
11154
11155 ut_a(space_flags_offset + 4 < n_bytes);
11156
11157 const auto flags = mach_read_from_4(buf.get() + space_flags_offset);
11158
11159 page_size_t space_page_size(flags);
11160
11161 page_size = space_page_size.physical();
11162 }
11163
11164 space_ids.push_back(mach_read_from_4(buf.get() + off));
11165
11166 if ((i + 1) * page_size >= bytes_read) {
11167 break;
11168 }
11169 }
11170 }
11171
11172 fclose(fp);
11173
11174 space_id_t space_id;
11175
11176 if (!space_ids.empty()) {
11177 space_id = space_ids.front();
11178
11179 for (auto id : space_ids) {
11180 if (id == 0 || space_id != id) {
11181 space_id = UINT32_UNDEFINED;
11182
11183 break;
11184 }
11185 }
11186 } else {
11187 space_id = UINT32_UNDEFINED;
11188 }
11189
11190 /* Try the more heavy duty method, as a last resort. */
11191 if (space_id == UINT32_UNDEFINED) {
11192 /* If the first page cannot be read properly, then for compressed
11193 tablespaces we don't know where the page boundary starts because
11194 we don't know the page size. */
11195
11196 Datafile file;
11197
11198 file.set_filepath(filename.c_str());
11199
11200 dberr_t err = file.open_read_only(false);
11201
11202 ut_a(file.is_open());
11203 ut_a(err == DB_SUCCESS);
11204
11205 /* Use the heavier Datafile::find_space_id() method to
11206 find the space id. */
11207 err = file.find_space_id();
11208
11209 if (err == DB_SUCCESS) {
11210 space_id = file.space_id();
11211 }
11212
11213 file.close();
11214 }
11215
11216 return (space_id);
11217 }
11218
11219 /** Open tablespace file for backup.
11220 @param[in] path file path.
11221 @param[in] name space name.
11222 @return DB_SUCCESS if all OK */
fil_open_for_xtrabackup(const std::string & path,const std::string & name)11223 dberr_t fil_open_for_xtrabackup(const std::string &path,
11224 const std::string &name) {
11225 Datafile file;
11226 file.set_name(name.c_str());
11227 file.set_filepath(path.c_str());
11228
11229 dberr_t err = file.open_read_only(true);
11230 if (err != DB_SUCCESS) {
11231 return (err);
11232 }
11233
11234 lsn_t flush_lsn;
11235 err = file.validate_first_page(SPACE_UNKNOWN, &flush_lsn, false);
11236
11237 if (err == DB_PAGE_IS_BLANK) {
11238 /* allow corrupted first page for xtrabackup, it could be just
11239 zero-filled page, which we'll restore from redo log later */
11240 return (DB_SUCCESS);
11241 } else if (err != DB_SUCCESS) {
11242 return (err);
11243 }
11244
11245 if (fil_space_get(file.space_id())) {
11246 /* space already exists */
11247 return (DB_TABLESPACE_EXISTS);
11248 }
11249
11250 os_offset_t node_size = os_file_get_size(file.handle());
11251 bool is_tmp = FSP_FLAGS_GET_TEMPORARY(file.flags());
11252 os_offset_t n_pages;
11253
11254 ut_a(node_size != (os_offset_t)-1);
11255
11256 n_pages = node_size / page_size_t(file.flags()).physical();
11257
11258 fil_space_t *space =
11259 fil_space_create(name.c_str(), file.space_id(), file.flags(),
11260 is_tmp ? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE);
11261
11262 ut_a(space != NULL);
11263
11264 /* For encrypted tablespace, initialize encryption
11265 information.*/
11266 if (FSP_FLAGS_GET_ENCRYPTION(file.flags())) {
11267 if (srv_backup_mode || !use_dumped_tablespace_keys) {
11268 byte *key = file.m_encryption_key;
11269 byte *iv = file.m_encryption_iv;
11270
11271 fsp_flags_set_encryption(space->flags);
11272 if (key && iv) {
11273 err = fil_set_encryption(space->id, Encryption::AES, key, iv);
11274 }
11275 } else {
11276 err = xb_set_encryption(space);
11277 }
11278
11279 ut_ad(err == DB_SUCCESS);
11280 }
11281
11282 char *fn = fil_node_create(file.filepath(), n_pages, space, false, false);
11283 if (fn == nullptr) {
11284 return (DB_ERROR);
11285 }
11286
11287 /* by opening the tablespace we forcing node and space objects
11288 in the cache to be populated with fields from space header */
11289 if (!fil_space_open(space->id)) {
11290 ib::error() << "Failed to open tablespace " << space->name;
11291 }
11292
11293 if (!srv_backup_mode || srv_close_files) {
11294 fil_space_close(space->id);
11295 }
11296
11297 return (DB_SUCCESS);
11298 }
11299
11300 /** Open IBD tablespaces.
11301 @param[in] start Start of slice
11302 @param[in] end End of slice
11303 @param[in] thread_id Thread ID */
open_ibd(const Const_iter & start,const Const_iter & end,size_t thread_id,bool & result)11304 void Tablespace_dirs::open_ibd(const Const_iter &start, const Const_iter &end,
11305 size_t thread_id, bool &result) {
11306 if (!result) return;
11307
11308 for (auto it = start; it != end; ++it) {
11309 const std::string filename = it->second;
11310 const auto &files = m_dirs[it->first];
11311 const std::string phy_filename = files.path() + filename;
11312
11313 if (check_if_skip_table(filename.c_str())) {
11314 continue;
11315 }
11316
11317 dberr_t err = fil_open_for_xtrabackup(
11318 phy_filename, filename.substr(0, filename.length() - 4));
11319 if (err != DB_SUCCESS) {
11320 result = false;
11321 }
11322 }
11323 }
11324
11325 /** Open all known tablespaces. */
open_ibds() const11326 void Tablespace_dirs::open_ibds() const {
11327 for (auto dir : m_dirs) {
11328 dir.open_ibds();
11329 }
11330 }
11331
rename_partition_files(bool revert)11332 void Fil_system::rename_partition_files(bool revert) {
11333 /* If revert, then we are downgrading after upgrade failure from 5.7 */
11334 ut_ad(!revert || srv_downgrade_partition_files);
11335
11336 if (m_old_paths.empty()) {
11337 return;
11338 }
11339
11340 #if !defined(UNIV_HOTBACKUP) && !defined(XTRABACKUP)
11341 ut_ad(!lower_case_file_system);
11342
11343 for (auto &old_path : m_old_paths) {
11344 ut_ad(Fil_path::has_suffix(IBD, old_path));
11345 ut_ad(dict_name::is_partition(old_path));
11346
11347 fil_rename_partition_file(old_path, IBD, revert, false);
11348 }
11349 #endif /* !UNIV_HOTBACKUP && !XTRABACKUP */
11350 }
11351
11352 /** Check for duplicate tablespace IDs.
11353 @param[in] start Slice start
11354 @param[in] end Slice end
11355 @param[in] thread_id Thread ID
11356 @param[in,out] mutex Mutex that covers the global state
11357 @param[in,out] unique To check for duplciates
11358 @param[in,out] duplicates Duplicate space IDs found */
duplicate_check(const Const_iter & start,const Const_iter & end,size_t thread_id,std::mutex * mutex,Space_id_set * unique,Space_id_set * duplicates)11359 void Tablespace_dirs::duplicate_check(const Const_iter &start,
11360 const Const_iter &end, size_t thread_id,
11361 std::mutex *mutex, Space_id_set *unique,
11362 Space_id_set *duplicates) {
11363 size_t count = 0;
11364 bool printed_msg = false;
11365 auto start_time = ut_time_monotonic();
11366
11367 for (auto it = start; it != end; ++it, ++m_checked) {
11368 const std::string filename = it->second;
11369 auto &files = m_dirs[it->first];
11370 const std::string phy_filename = files.path() + filename;
11371
11372 space_id_t space_id;
11373
11374 space_id = Fil_system::get_tablespace_id(phy_filename);
11375
11376 if (space_id != 0 && space_id != dict_sys_t::s_invalid_space_id) {
11377 std::lock_guard<std::mutex> guard(*mutex);
11378
11379 auto ret = unique->insert(space_id);
11380
11381 size_t n_files;
11382
11383 n_files = files.add(space_id, filename);
11384
11385 if (n_files > 1 || !ret.second) {
11386 duplicates->insert(space_id);
11387 }
11388
11389 } else if (space_id != 0 &&
11390 Fil_path::is_undo_tablespace_name(phy_filename)) {
11391 ib::info(ER_IB_MSG_373) << "Can't determine the undo file tablespace"
11392 << " ID for '" << phy_filename << "', could be"
11393 << " an undo truncate in progress";
11394
11395 } else {
11396 ib::info(ER_IB_MSG_374) << "Ignoring '" << phy_filename << "' invalid"
11397 << " tablespace ID in the header";
11398 }
11399
11400 ++count;
11401
11402 if (ut_time_monotonic() - start_time >= PRINT_INTERVAL_SECS) {
11403 ib::info(ER_IB_MSG_375) << "Thread# " << thread_id << " - Checked "
11404 << count << "/" << (end - start) << " files";
11405
11406 start_time = ut_time_monotonic();
11407
11408 printed_msg = true;
11409 }
11410 }
11411
11412 if (printed_msg) {
11413 ib::info(ER_IB_MSG_376) << "Checked " << count << " files";
11414 }
11415 }
11416
11417 /** Print the duplicate filenames for a tablespce ID to the log
11418 @param[in] duplicates Duplicate tablespace IDs*/
print_duplicates(const Space_id_set & duplicates)11419 void Tablespace_dirs::print_duplicates(const Space_id_set &duplicates) {
11420 /* Print the duplicate names to the error log. */
11421 for (auto space_id : duplicates) {
11422 Dirs files;
11423
11424 for (auto &dir : m_dirs) {
11425 const auto names = dir.find(space_id);
11426
11427 if (names == nullptr) {
11428 continue;
11429 }
11430
11431 files.insert(files.end(), names->begin(), names->end());
11432 }
11433
11434 /* Fixes the order in the mtr tests. */
11435 std::sort(files.begin(), files.end());
11436
11437 ut_a(files.size() > 1);
11438
11439 std::ostringstream oss;
11440
11441 oss << "Tablespace ID: " << space_id << " = [";
11442
11443 for (size_t i = 0; i < files.size(); ++i) {
11444 oss << "'" << files[i] << "'";
11445
11446 if (i < files.size() - 1) {
11447 oss << ", ";
11448 }
11449 }
11450
11451 oss << "]" << std::endl;
11452
11453 ib::error(ER_IB_MSG_377) << oss.str();
11454 }
11455 }
11456
11457 #ifndef XTRABACKUP
fil_get_partition_file(const std::string & old_path,ib_file_suffix extn,std::string & new_path)11458 static bool fil_get_partition_file(const std::string &old_path,
11459 ib_file_suffix extn, std::string &new_path) {
11460 #ifdef _WIN32
11461 /* Safe check. Never needed on Windows. */
11462 return (false);
11463 #endif /* WIN32 */
11464
11465 #ifndef UNIV_HOTBACKUP
11466
11467 /* Needed only for case sensitive file system. */
11468 if (lower_case_file_system) {
11469 return (false);
11470 }
11471
11472 /* Skip if not right file extension. */
11473 if (!Fil_path::has_suffix(extn, old_path)) {
11474 return (false);
11475 }
11476
11477 /* Check if partitioned table. */
11478 if (!dict_name::is_partition(old_path)) {
11479 return (false);
11480 }
11481
11482 std::string table_name;
11483 /* Get Innodb dictionary name from file path. */
11484 if (!Fil_path::parse_file_path(old_path, extn, table_name)) {
11485 ut_ad(false);
11486 return (false);
11487 }
11488 ut_ad(!table_name.empty());
11489
11490 /* Rebuild partition table name with lower case. */
11491 std::string save_name(table_name);
11492 dict_name::rebuild(table_name);
11493
11494 if (save_name.compare(table_name) == 0) {
11495 return (false);
11496 }
11497
11498 /* Build new partition file name. */
11499 new_path = Fil_path::make_new_path(old_path, table_name, extn);
11500 ut_ad(!new_path.empty());
11501 #endif /* !UNIV_HOTBACKUP */
11502
11503 return (true);
11504 }
11505
11506 #endif /* !XTRABACKUP */
11507
11508 #if !defined(UNIV_HOTBACKUP) && !defined(XTRABACKUP)
fil_rename_partition_file(const std::string & old_path,ib_file_suffix extn,bool revert,bool import)11509 static void fil_rename_partition_file(const std::string &old_path,
11510 ib_file_suffix extn, bool revert,
11511 bool import) {
11512 std::string new_path;
11513
11514 if (!fil_get_partition_file(old_path, extn, new_path)) {
11515 ut_ad(false);
11516 return;
11517 }
11518
11519 ut_ad(!new_path.empty());
11520
11521 bool old_exists = os_file_exists(old_path.c_str());
11522 bool new_exists = os_file_exists(new_path.c_str());
11523
11524 static bool print_upgrade = true;
11525 static bool print_downgrade = true;
11526 bool ret = false;
11527
11528 if (revert) {
11529 /* Check if rename is required. */
11530 if (!new_exists || old_exists) {
11531 return;
11532 }
11533 ret = os_file_rename(innodb_data_file_key, new_path.c_str(),
11534 old_path.c_str());
11535 ut_ad(ret);
11536
11537 if (ret && print_downgrade) {
11538 ib::info(ER_IB_MSG_DOWNGRADE_PARTITION_FILE, new_path.c_str(),
11539 old_path.c_str());
11540 print_downgrade = false;
11541 }
11542 return;
11543 }
11544
11545 /* Check if rename is required. */
11546 if (new_exists || !old_exists) {
11547 return;
11548 }
11549
11550 ret =
11551 os_file_rename(innodb_data_file_key, old_path.c_str(), new_path.c_str());
11552
11553 if (!ret) {
11554 /* File rename failed. */
11555 ut_ad(false);
11556 return;
11557 }
11558
11559 if (import) {
11560 ib::info(ER_IB_MSG_UPGRADE_PARTITION_FILE_IMPORT, old_path.c_str(),
11561 new_path.c_str());
11562 return;
11563 }
11564
11565 if (print_upgrade) {
11566 ib::info(ER_IB_MSG_UPGRADE_PARTITION_FILE, old_path.c_str(),
11567 new_path.c_str());
11568 print_upgrade = false;
11569 }
11570 }
11571 #endif /* !UNIV_HOTBACKUP && !XTRABACKUP */
11572
set_scan_dir(const std::string & in_directory,bool is_undo_dir)11573 void Tablespace_dirs::set_scan_dir(const std::string &in_directory,
11574 bool is_undo_dir) {
11575 std::string directory(in_directory);
11576
11577 Fil_path::normalize(directory);
11578
11579 add_path(directory, is_undo_dir);
11580 }
11581
set_scan_dirs(const std::string & in_directories)11582 void Tablespace_dirs::set_scan_dirs(const std::string &in_directories) {
11583 std::string directories(in_directories);
11584
11585 Fil_path::normalize(directories);
11586
11587 std::string separators;
11588
11589 separators.push_back(FIL_PATH_SEPARATOR);
11590
11591 add_paths(directories, separators);
11592 }
11593
11594 /** Discover tablespaces by reading the header from .ibd files.
11595 @param[in] in_directories Directories to scan
11596 @return DB_SUCCESS if all goes well */
scan(bool populate_fil_cache)11597 dberr_t Tablespace_dirs::scan(bool populate_fil_cache) {
11598 Scanned_files ibd_files;
11599 Scanned_files undo_files;
11600 uint16_t count = 0;
11601 bool print_msg = false;
11602 auto start_time = ut_time_monotonic();
11603
11604 /* Should be trivial to parallelize the scan and ID check. */
11605 for (const auto &dir : m_dirs) {
11606 const auto real_path_dir = dir.root().abs_path();
11607
11608 ut_a(Fil_path::is_separator(dir.path().back()));
11609
11610 ib::info(ER_IB_MSG_379) << "Scanning '" << dir.path() << "'";
11611
11612 /* Walk the sub-tree of dir. */
11613
11614 Dir_Walker::walk(real_path_dir, true, [&](const std::string &path) {
11615 /* If it is a file and the suffix matches ".ibd"
11616 or the undo file name format then store it for
11617 determining the space ID. */
11618
11619 ut_a(path.length() > real_path_dir.length());
11620 ut_a(Fil_path::get_file_type(path) != OS_FILE_TYPE_DIR);
11621
11622 /* Make the filename relative to the directory that was scanned. */
11623 std::string file = path.substr(real_path_dir.length());
11624
11625 if (file.size() <= 4) {
11626 return;
11627 }
11628
11629 using value = Scanned_files::value_type;
11630
11631 if (Fil_path::has_suffix(IBD, file.c_str())) {
11632 ibd_files.push_back(value{count, file});
11633
11634 } else if (Fil_path::is_undo_tablespace_name(file)) {
11635 undo_files.push_back(value{count, file});
11636 }
11637
11638 if (ut_time_monotonic() - start_time >= PRINT_INTERVAL_SECS) {
11639 ib::info(ER_IB_MSG_380)
11640 << "Files found so far: " << ibd_files.size() << " data files"
11641 << " and " << undo_files.size() << " undo files";
11642
11643 start_time = ut_time_monotonic();
11644 print_msg = true;
11645 }
11646 });
11647
11648 ++count;
11649 }
11650
11651 /* Rename all old partition files. */
11652 // fil_system->rename_partition_files(false);
11653
11654 if (print_msg) {
11655 ib::info(ER_IB_MSG_381) << "Found " << ibd_files.size() << " '.ibd' and "
11656 << undo_files.size() << " undo files";
11657 }
11658
11659 Space_id_set unique;
11660 Space_id_set duplicates;
11661
11662 /* Get the number of additional threads needed to scan the files. */
11663 size_t n_threads = fil_get_scan_threads(ibd_files.size());
11664
11665 if (n_threads > 0) {
11666 ib::info(ER_IB_MSG_382)
11667 << "Using " << (n_threads + 1) << " threads to"
11668 << " scan " << ibd_files.size() << " tablespace files";
11669 }
11670
11671 std::mutex m;
11672
11673 using std::placeholders::_1;
11674 using std::placeholders::_2;
11675 using std::placeholders::_3;
11676 using std::placeholders::_4;
11677 using std::placeholders::_5;
11678 using std::placeholders::_6;
11679
11680 std::function<void(const Const_iter &, const Const_iter &, size_t,
11681 std::mutex *, Space_id_set *, Space_id_set *)>
11682 check = std::bind(&Tablespace_dirs::duplicate_check, this, _1, _2, _3, _4,
11683 _5, _6);
11684
11685 if (!populate_fil_cache) {
11686 par_for(PFS_NOT_INSTRUMENTED, ibd_files, n_threads, check, &m, &unique,
11687 &duplicates);
11688 }
11689
11690 duplicate_check(undo_files.begin(), undo_files.end(), n_threads, &m, &unique,
11691 &duplicates);
11692
11693 ib::info(ER_IB_MSG_383) << "Completed space ID check of " << m_checked.load()
11694 << " files.";
11695
11696 dberr_t err;
11697
11698 if (!duplicates.empty()) {
11699 ib::error(ER_IB_MSG_384)
11700 << "Multiple files found for the same tablespace ID:";
11701
11702 print_duplicates(duplicates);
11703
11704 err = DB_FAIL;
11705 } else {
11706 err = DB_SUCCESS;
11707 }
11708
11709 if (err == DB_SUCCESS && populate_fil_cache) {
11710 bool result = true;
11711 std::function<void(const Const_iter &, const Const_iter &, size_t)> open =
11712 std::bind(&Tablespace_dirs::open_ibd, this, _1, _2, _3, result);
11713
11714 par_for(PFS_NOT_INSTRUMENTED, ibd_files, n_threads, open);
11715
11716 if (!result) err = DB_FAIL;
11717 }
11718
11719 return (err);
11720 }
11721
fil_set_scan_dir(const std::string & directory,bool is_undo_dir)11722 void fil_set_scan_dir(const std::string &directory, bool is_undo_dir) {
11723 fil_system->set_scan_dir(directory, is_undo_dir);
11724 }
11725
fil_set_scan_dirs(const std::string & directories)11726 void fil_set_scan_dirs(const std::string &directories) {
11727 fil_system->set_scan_dirs(directories);
11728 }
11729
11730 /** Discover tablespaces by reading the header from .ibd files.
11731 @param[in] populate_fil_cache Whether to load tablespaces into fil cache
11732 @return DB_SUCCESS if all goes well */
fil_scan_for_tablespaces(bool populate_fil_cache)11733 dberr_t fil_scan_for_tablespaces(bool populate_fil_cache) {
11734 return (fil_system->scan(populate_fil_cache));
11735 }
11736
11737 /** Open all known tablespaces. */
fil_open_ibds()11738 void fil_open_ibds() { fil_system->open_ibds(); }
11739
11740 /** Check if a path is known to InnoDB meaning that it is in or under
11741 one of the four path settings scanned at startup for file discovery.
11742 @param[in] path Path to check
11743 @return true if path is known to InnoDB */
fil_path_is_known(const std::string & path)11744 bool fil_path_is_known(const std::string &path) {
11745 return (fil_system->check_path(path));
11746 }
11747
11748 /** Get the list of directories that datafiles can reside in.
11749 @return the list of directories 'dir1;dir2;....;dirN' */
fil_get_dirs()11750 std::string fil_get_dirs() { return (fil_system->get_dirs()); }
11751
11752 /** Free the data structures required for recovery. */
fil_free_scanned_files()11753 void fil_free_scanned_files() { fil_system->free_scanned_files(); }
11754
11755 /** Update the tablespace name. Incase, the new name
11756 and old name are same, no update done.
11757 @param[in,out] space tablespace object on which name
11758 will be updated
11759 @param[in] name new name for tablespace */
fil_space_update_name(fil_space_t * space,const char * name)11760 void fil_space_update_name(fil_space_t *space, const char *name) {
11761 if (space == nullptr || name == nullptr || space->name == nullptr ||
11762 strcmp(space->name, name) == 0) {
11763 return;
11764 }
11765
11766 dberr_t err = fil_rename_tablespace_by_id(space->id, space->name, name);
11767
11768 if (err != DB_SUCCESS) {
11769 ib::warn(ER_IB_MSG_387) << "Tablespace rename '" << space->name << "' to"
11770 << " '" << name << "' failed!";
11771 }
11772 }
11773
11774 #ifndef UNIV_HOTBACKUP
is_valid_location(const char * space_name,space_id_t space_id,uint32_t fsp_flags,const std::string & path)11775 bool Fil_path::is_valid_location(const char *space_name, space_id_t space_id,
11776 uint32_t fsp_flags, const std::string &path) {
11777 ut_ad(!path.empty());
11778 ut_ad(space_name != nullptr);
11779
11780 /* All files sent to this routine have been found by scanning known
11781 locations. */
11782 ib_file_suffix type = (fsp_is_undo_tablespace(space_id) ? IBU : IBD);
11783
11784 if (type == IBD) {
11785 size_t dirname_len = dirname_length(path.c_str());
11786 Fil_path dirpath(path.c_str(), dirname_len, true);
11787
11788 bool is_shared = fsp_is_shared_tablespace(fsp_flags);
11789 bool under_datadir = MySQL_datadir_path.is_ancestor(dirpath);
11790
11791 if (is_shared) {
11792 if (under_datadir) {
11793 ib::error(ER_IB_MSG_GENERAL_TABLESPACE_UNDER_DATADIR, path.c_str());
11794 return (false);
11795 }
11796 } else {
11797 /* file-per-table */
11798 bool in_datadir =
11799 (under_datadir ? false : MySQL_datadir_path.is_same_as(dirpath));
11800
11801 if (in_datadir) {
11802 ib::error(ER_IB_MSG_IMPLICIT_TABLESPACE_IN_DATADIR, path.c_str());
11803 return (false);
11804 }
11805
11806 /* Make sure that the last directory of an implicit tablespace is a
11807 filesystem charset version of the schema name. */
11808 if (!is_valid_location_within_db(space_name, path)) {
11809 ib::error(ER_IB_MSG_INVALID_LOCATION_WRONG_DB, path.c_str(),
11810 space_name);
11811 return (false);
11812 }
11813 }
11814 }
11815
11816 return (true);
11817 }
11818
is_valid_location_within_db(const char * space_name,const std::string & path)11819 bool Fil_path::is_valid_location_within_db(const char *space_name,
11820 const std::string &path) {
11821 /* Strip off the basename to reduce the path to a directory. */
11822 std::string dirpath{path};
11823 auto pos = dirpath.find_last_of(SEPARATOR);
11824 dirpath.resize(pos);
11825
11826 /* Only implicit tablespaces are sent to this routine.
11827 They are always prefixed by `schema/`. */
11828 ut_ad(pos != std::string::npos);
11829
11830 /* Get the subdir that the file is in. */
11831 pos = dirpath.find_last_of(SEPARATOR);
11832 std::string db_dir = (pos == std::string::npos)
11833 ? dirpath
11834 : dirpath.substr(pos + 1, dirpath.length());
11835
11836 /* Convert to lowercase if necessary. */
11837 if (innobase_get_lower_case_table_names() == 2) {
11838 Fil_path::convert_to_lower_case(db_dir);
11839 }
11840
11841 /* Make sure the db_dir matches the schema name.
11842 db_dir is in filesystem charset and space_name is usually in the
11843 system charset.
11844
11845 The problem here is that the system charset version of a schema or
11846 table name may contain a '/' and the tablespace name we were sent
11847 is a combination of the two with '/' as a delimiter.
11848 For example `my/schema` + `my/table` == `my/schema/my/table`
11849
11850 Search the space_name string backwards until we find the db name that
11851 matches the schema name from the path. */
11852
11853 std::string name(space_name);
11854 pos = name.find_last_of(SEPARATOR);
11855 while (pos < std::string::npos) {
11856 name.resize(pos);
11857 std::string temp = name;
11858 if (temp == db_dir) {
11859 return (true);
11860 }
11861
11862 /* Convert to filename charset and compare again. */
11863 Fil_path::convert_to_filename_charset(temp);
11864 if (temp == db_dir) {
11865 return (true);
11866 }
11867
11868 /* Still no match, iterate through the next SEPARATOR. */
11869 pos = name.find_last_of(SEPARATOR);
11870
11871 /* If end of string is hit, there is no match. */
11872 if (pos == std::string::npos) {
11873 return (false);
11874 }
11875 }
11876
11877 return (true);
11878 }
11879
11880 /** Convert filename to the file system charset format.
11881 @param[in,out] name Filename to convert */
convert_to_filename_charset(std::string & name)11882 void Fil_path::convert_to_filename_charset(std::string &name) {
11883 uint errors = 0;
11884 char old_name[MAX_TABLE_NAME_LEN + 20];
11885 char filename[MAX_TABLE_NAME_LEN + 20];
11886
11887 strncpy(filename, name.c_str(), sizeof(filename) - 1);
11888 strncpy(old_name, filename, sizeof(old_name));
11889
11890 innobase_convert_to_filename_charset(filename, old_name, MAX_TABLE_NAME_LEN);
11891
11892 if (errors == 0) {
11893 name.assign(filename);
11894 }
11895 }
11896
11897 /** Convert to lower case using the file system charset.
11898 @param[in,out] path Filepath to convert */
convert_to_lower_case(std::string & path)11899 void Fil_path::convert_to_lower_case(std::string &path) {
11900 char lc_path[MAX_TABLE_NAME_LEN + 20];
11901
11902 ut_ad(path.length() < sizeof(lc_path) - 1);
11903
11904 strncpy(lc_path, path.c_str(), sizeof(lc_path) - 1);
11905
11906 innobase_casedn_path(lc_path);
11907
11908 path.assign(lc_path);
11909 }
11910
fil_checkpoint(lsn_t lwm)11911 void fil_checkpoint(lsn_t lwm) { fil_system->checkpoint(lwm); }
11912
fil_count_deleted(space_id_t undo_num)11913 size_t fil_count_deleted(space_id_t undo_num) {
11914 return (fil_system->count_deleted(undo_num));
11915 }
11916
fil_is_deleted(space_id_t space_id)11917 bool fil_is_deleted(space_id_t space_id) {
11918 ut_ad(fsp_is_undo_tablespace(space_id));
11919
11920 return (fil_system->is_deleted(space_id));
11921 }
11922
11923 #endif /* !UNIV_HOTBACKUP */
11924