1 /*****************************************************************************
2 
3 Copyright (c) 1995, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License, version 2.0,
7 as published by the Free Software Foundation.
8 
9 This program is also distributed with certain software (including
10 but not limited to OpenSSL) that is licensed under separate terms,
11 as designated in a particular file or component or in included license
12 documentation.  The authors of MySQL hereby grant you an additional
13 permission to link the program and your derivative works with the
14 separately licensed software that they have included with MySQL.
15 
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19 GNU General Public License, version 2.0, for more details.
20 
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 
25 *****************************************************************************/
26 
27 /** @file fil/fil0fil.cc
28 The tablespace memory cache */
29 
30 #include "my_config.h"
31 
32 #include <errno.h>
33 #include <fcntl.h>
34 #include <sys/types.h>
35 
36 #include "arch0page.h"
37 #include "btr0btr.h"
38 #include "buf0buf.h"
39 #include "buf0flu.h"
40 #include "dict0boot.h"
41 #include "dict0dd.h"
42 #include "dict0dict.h"
43 #include "fsp0file.h"
44 #include "fsp0fsp.h"
45 #include "fsp0space.h"
46 #include "fsp0sysspace.h"
47 #include "ha_prototypes.h"
48 #include "hash0hash.h"
49 #include "log0recv.h"
50 #include "mach0data.h"
51 #include "mem0mem.h"
52 #include "mtr0log.h"
53 #include "my_dbug.h"
54 
55 #include "clone0api.h"
56 #include "os0file.h"
57 #include "page0zip.h"
58 #include "sql/mysqld.h"  // lower_case_file_system
59 #include "srv0srv.h"
60 #include "srv0start.h"
61 
62 #ifndef UNIV_HOTBACKUP
63 #include "buf0lru.h"
64 #include "ibuf0ibuf.h"
65 #include "os0event.h"
66 #include "row0mysql.h"
67 #include "sql_backup_lock.h"
68 #include "sql_class.h"
69 #include "sync0sync.h"
70 #include "trx0purge.h"
71 #else /* !UNIV_HOTBACKUP */
72 #include <cstring>
73 #include "srv0srv.h"
74 #endif /* !UNIV_HOTBACKUP */
75 #include "xb0xb.h"
76 
77 #include "os0thread-create.h"
78 
79 #include "current_thd.h"
80 #include "ha_prototypes.h"
81 
82 #include <array>
83 #include <fstream>
84 #include <functional>
85 #include <list>
86 #include <mutex>
87 #include <thread>
88 #include <tuple>
89 #include <unordered_map>
90 
91 using Dirs = std::vector<std::string>;
92 using Space_id_set = std::set<space_id_t>;
93 
94 constexpr char Fil_path::DB_SEPARATOR;
95 constexpr char Fil_path::OS_SEPARATOR;
96 constexpr const char *Fil_path::SEPARATOR;
97 constexpr const char *Fil_path::DOT_SLASH;
98 constexpr const char *Fil_path::DOT_DOT_SLASH;
99 constexpr const char *Fil_path::SLASH_DOT_DOT_SLASH;
100 
101 dberr_t dict_stats_rename_table(const char *old_name, const char *new_name,
102                                 char *errstr, size_t errstr_sz);
103 
104 /** Used for collecting the data in boot_tablespaces() */
105 namespace dd_fil {
106 
107 enum {
108   /** DD Object ID */
109   OBJECT_ID,
110 
111   /** InnoDB tablspace ID */
112   SPACE_ID,
113 
114   /** DD/InnoDB tablespace name */
115   SPACE_NAME,
116 
117   /** Path in DD tablespace */
118   OLD_PATH,
119 
120   /** Path where it was found during the scan. */
121   NEW_PATH
122 };
123 
124 using Moved = std::tuple<dd::Object_id, space_id_t, std::string, std::string,
125                          std::string>;
126 
127 using Tablespaces = std::vector<Moved>;
128 }  // namespace dd_fil
129 
fil_get_scan_threads(size_t num_files)130 size_t fil_get_scan_threads(size_t num_files) {
131   /* Number of additional threads required to scan all the files.
132   n_threads == 0 means that the main thread itself will do all the
133   work instead of spawning any additional threads. */
134   size_t n_threads = num_files / FIL_SCAN_MAX_TABLESPACES_PER_THREAD;
135 
136   /* Return if no additional threads are needed. */
137   if (n_threads == 0) {
138     return 0;
139   }
140 
141   /* Number of concurrent threads supported by the host machine. */
142   size_t max_threads =
143       FIL_SCAN_THREADS_PER_CORE * std::thread::hardware_concurrency();
144 
145   /* If the number of concurrent threads supported by the host
146   machine could not be calculated, assume the supported threads
147   to be FIL_SCAN_MAX_THREADS. */
148   max_threads = max_threads == 0 ? FIL_SCAN_MAX_THREADS : max_threads;
149 
150   /* Restrict the number of threads to the lower of number of threads
151   supported by the host machine or FIL_SCAN_MAX_THREADS. */
152   if (n_threads > max_threads) {
153     n_threads = max_threads;
154   }
155 
156   if (n_threads > FIL_SCAN_MAX_THREADS) {
157     n_threads = FIL_SCAN_MAX_THREADS;
158   }
159 
160   return n_threads;
161 }
162 
163 /* uint16_t is the index into Tablespace_dirs::m_dirs */
164 using Scanned_files = std::vector<std::pair<uint16_t, std::string>>;
165 
166 #ifdef UNIV_PFS_IO
167 mysql_pfs_key_t innodb_tablespace_open_file_key;
168 #endif /* UNIV_PFS_IO */
169 
170 /** System tablespace. */
171 fil_space_t *fil_space_t::s_sys_space;
172 
173 /** Redo log tablespace */
174 fil_space_t *fil_space_t::s_redo_space;
175 
176 #ifdef UNIV_HOTBACKUP
177 /** Directories in which remote general tablespaces have been found in the
178 target directory during apply log operation */
179 Dir_set rem_gen_ts_dirs;
180 
181 /** true in case the apply-log operation is being performed
182 in the data directory */
183 bool replay_in_datadir = false;
184 
185 /* Re-define mutex macros to use the Mutex class defined by the MEB
186 source. MEB calls the routines in "fil0fil.cc" in parallel and,
187 therefore, the mutex protecting the critical sections of the tablespace
188 memory cache must be included also in the MEB compilation of this
189 module. */
190 #undef mutex_create
191 #undef mutex_free
192 #undef mutex_enter
193 #undef mutex_exit
194 #undef mutex_own
195 #undef mutex_validate
196 
197 #define mutex_create(I, M) new (M) meb::Mutex()
198 #define mutex_free(M) delete (M)
199 #define mutex_enter(M) (M)->lock()
200 #define mutex_exit(M) (M)->unlock()
201 #define mutex_own(M) 1
202 #define mutex_validate(M) 1
203 
204 /** Process a MLOG_FILE_CREATE redo record.
205 @param[in]	page_id		Page id of the redo log record
206 @param[in]	flags		Tablespace flags
207 @param[in]	name		Tablespace filename */
208 static void meb_tablespace_redo_create(const page_id_t &page_id, uint32_t flags,
209                                        const char *name);
210 
211 /** Process a MLOG_FILE_RENAME redo record.
212 @param[in]	page_id		Page id of the redo log record
213 @param[in]	from_name	Tablespace from filename
214 @param[in]	to_name		Tablespace to filename */
215 static void meb_tablespace_redo_rename(const page_id_t &page_id,
216                                        const char *from_name,
217                                        const char *to_name);
218 
219 /** Process a MLOG_FILE_DELETE redo record.
220 @param[in]	page_id		Page id of the redo log record
221 @param[in]	name		Tablespace filename */
222 static void meb_tablespace_redo_delete(const page_id_t &page_id,
223                                        const char *name);
224 
225 #endif /* UNIV_HOTBACKUP */
226 
227 /*
228                 IMPLEMENTATION OF THE TABLESPACE MEMORY CACHE
229                 =============================================
230 
231 The tablespace cache is responsible for providing fast read/write access to
232 tablespaces and logs of the database. File creation and deletion is done
233 in other modules which know more of the logic of the operation, however.
234 
235 Only the system  tablespace consists of a list  of files. The size of these
236 files does not have to be divisible by the database block size, because
237 we may just leave the last incomplete block unused. When a new file is
238 appended to the tablespace, the maximum size of the file is also specified.
239 At the moment, we think that it is best to extend the file to its maximum
240 size already at the creation of the file, because then we can avoid dynamically
241 extending the file when more space is needed for the tablespace.
242 
243 Non system tablespaces contain only a single file.
244 
245 A block's position in the tablespace is specified with a 32-bit unsigned
246 integer. The files in the list  are thought to be catenated, and the block
247 corresponding to an address n is the nth block in the catenated file (where
248 the first block is named the 0th block, and the incomplete block fragments
249 at the end of files are not taken into account). A tablespace can be extended
250 by appending a new file at the end of the list.
251 
252 Our tablespace concept is similar to the one of Oracle.
253 
254 To have fast access to a tablespace or a log file, we put the data structures
255 to a hash table. Each tablespace and log file is given an unique 32-bit
256 identifier, its tablespace ID.
257 
258 Some operating systems do not support many open files at the same time,
259 Therefore, we put the open files in an LRU-list. If we need to open another
260 file, we may close the file at the end of the LRU-list. When an I/O-operation
261 is pending on a file, the file cannot be closed. We take the file nodes with
262 pending I/O-operations out of the LRU-list and keep a count of pending
263 operations. When an operation completes, we decrement the count and return
264 the file to the LRU-list if the count drops to zero.
265 
266 The data structure (Fil_shard) that keeps track of the tablespace ID to
267 fil_space_t* mapping are hashed on the tablespace ID. The tablespace name to
268 fil_space_t* mapping is stored in the same shard. A shard tracks the flushing
269 and open state of a file. When we run out open file handles, we use a ticketing
270 system to serialize the file open, see Fil_shard::reserve_open_slot() and
271 Fil_shard::release_open_slot().
272 
273 When updating the global/shared data in Fil_system acquire the mutexes of
274 all shards in ascending order. The shard mutex covers the fil_space_t data
275 members as noted in the fil_space_t and fil_node_t definition. */
276 
277 /** Reference to the server data directory. */
278 Fil_path MySQL_datadir_path;
279 
280 /** Reference to the server undo directory. */
281 Fil_path MySQL_undo_path;
282 
283 /** The undo path is different from any other known directory. */
284 bool MySQL_undo_path_is_unique;
285 
286 /** Common InnoDB file extentions */
287 const char *dot_ext[] = {"", ".ibd", ".cfg", ".cfp", ".ibt", ".ibu", ".dblwr"};
288 
289 /** The number of fsyncs done to the log */
290 ulint fil_n_log_flushes = 0;
291 
292 /** Number of pending redo log flushes */
293 ulint fil_n_pending_log_flushes = 0;
294 
295 /** Number of pending tablespace flushes */
296 ulint fil_n_pending_tablespace_flushes = 0;
297 
298 /** Number of files currently open */
299 ulint fil_n_file_opened = 0;
300 
301 enum fil_load_status {
302   /** The tablespace file(s) were found and valid. */
303   FIL_LOAD_OK,
304 
305   /** The name no longer matches space_id */
306   FIL_LOAD_ID_CHANGED,
307 
308   /** The file(s) were not found */
309   FIL_LOAD_NOT_FOUND,
310 
311   /** The file(s) were not valid */
312   FIL_LOAD_INVALID,
313 
314   /** Invalid encrytion metadata in page 0 */
315   FIL_LOAD_INVALID_ENCRYPTION_META,
316 
317   /** The tablespace file ID in the first page doesn't match
318   expected value. */
319   FIL_LOAD_MISMATCH
320 };
321 
322 /** File operations for tablespace */
323 enum fil_operation_t {
324 
325   /** delete a single-table tablespace */
326   FIL_OPERATION_DELETE,
327 
328   /** close a single-table tablespace */
329   FIL_OPERATION_CLOSE
330 };
331 
332 /** The null file address */
333 fil_addr_t fil_addr_null = {FIL_NULL, 0};
334 
335 /** Maximum number of pages to read to determine the space ID. */
336 static const size_t MAX_PAGES_TO_READ = 1;
337 
338 #ifndef UNIV_HOTBACKUP
339 /** Maximum number of shards supported. */
340 static const size_t MAX_SHARDS = 64;
341 
342 /** The redo log is in its own shard. */
343 static const size_t REDO_SHARD = MAX_SHARDS - 1;
344 
345 /** Number of undo shards to reserve. */
346 static const size_t UNDO_SHARDS = 4;
347 
348 /** The UNDO logs have their own shards (4). */
349 static const size_t UNDO_SHARDS_START = REDO_SHARD - (UNDO_SHARDS + 1);
350 #else  /* !UNIV_HOTBACKUP */
351 
352 /** Maximum number of shards supported. */
353 static const size_t MAX_SHARDS = 1;
354 
355 /** The redo log is in its own shard. */
356 static const size_t REDO_SHARD = 0;
357 
358 /** The UNDO logs have their own shards (4). */
359 static const size_t UNDO_SHARDS_START = 0;
360 #endif /* !UNIV_HOTBACKUP */
361 
362 /** Sentinel for empty open slot. */
363 static const size_t EMPTY_OPEN_SLOT = std::numeric_limits<size_t>::max();
364 
365 /** We want to store the line number from where it was called. */
366 #define mutex_acquire() acquire(__LINE__)
367 
368 /** Hash a NUL terminated 'string' */
369 struct Char_Ptr_Hash {
370   /** Hashing function
371   @param[in]	ptr		NUL terminated string to hash
372   @return the hash */
operator ()Char_Ptr_Hash373   size_t operator()(const char *ptr) const { return (ut_fold_string(ptr)); }
374 };
375 
376 /** Compare two 'strings' */
377 struct Char_Ptr_Compare {
378   /** Compare two NUL terminated strings
379   @param[in]	lhs		Left hand side
380   @param[in]	rhs		Right hand side
381   @return true if the contents match */
operator ()Char_Ptr_Compare382   bool operator()(const char *lhs, const char *rhs) const {
383     return (strcmp(lhs, rhs) == 0);
384   }
385 };
386 
387 /** Tablespace files disovered during startup. */
388 class Tablespace_files {
389  public:
390   using Names = std::vector<std::string, ut_allocator<std::string>>;
391   using Paths = std::unordered_map<space_id_t, Names>;
392 
393   /** Default constructor
394   @param[in]	dir		Directory that the files are under */
395   explicit Tablespace_files(const std::string &dir);
396 
397   /** Add a space ID to filename mapping.
398   @param[in]	space_id	Tablespace ID
399   @param[in]	name		File name.
400   @return number of files that map to the space ID */
401   size_t add(space_id_t space_id, const std::string &name)
402       MY_ATTRIBUTE((warn_unused_result));
403 
404   /** Get the file names that map to a space ID
405   @param[in]	space_id	Tablespace ID
406   @return the filenames that map to space id */
find(space_id_t space_id)407   Names *find(space_id_t space_id) MY_ATTRIBUTE((warn_unused_result)) {
408     ut_ad(space_id != TRX_SYS_SPACE);
409 
410     if (dict_sys_t::is_reserved(space_id) &&
411         space_id != dict_sys_t::s_space_id) {
412       auto it = m_undo_paths.find(space_id);
413 
414       if (it != m_undo_paths.end()) {
415         return (&it->second);
416       }
417 
418     } else {
419       auto it = m_ibd_paths.find(space_id);
420 
421       if (it != m_ibd_paths.end()) {
422         return (&it->second);
423       }
424     }
425 
426     return (nullptr);
427   }
428 
429   /** Remove the entry for the space ID.
430   @param[in]	space_id	Tablespace ID mapping to remove
431   @return true if erase successful */
erase_path(space_id_t space_id)432   bool erase_path(space_id_t space_id) MY_ATTRIBUTE((warn_unused_result)) {
433     ut_ad(space_id != TRX_SYS_SPACE);
434 
435     if (dict_sys_t::is_reserved(space_id) &&
436         space_id != dict_sys_t::s_space_id) {
437       auto n_erased = m_undo_paths.erase(space_id);
438 
439       return (n_erased == 1);
440     } else {
441       auto n_erased = m_ibd_paths.erase(space_id);
442 
443       return (n_erased == 1);
444     }
445 
446     return (false);
447   }
448 
449   /** Clear all the tablespace data. */
clear()450   void clear() {
451     m_ibd_paths.clear();
452     m_undo_paths.clear();
453   }
454 
455   /** Open all known tablespaces. */
456   void open_ibds() const;
457 
458   /** @return m_dir */
root() const459   const Fil_path &root() const { return (m_dir); }
460 
461   /** @return the directory path specified by the user. */
path() const462   const std::string &path() const { return (m_dir.path()); }
463 
464  private:
465   /* Note:  The file names in m_ibd_paths and m_undo_paths are relative
466   to m_real_path. */
467 
468   /** Mapping from tablespace ID to data filenames */
469   Paths m_ibd_paths;
470 
471   /** Mapping from tablespace ID to Undo files */
472   Paths m_undo_paths;
473 
474   /** Top level directory where the above files were found. */
475   Fil_path m_dir;
476 };
477 
478 /** Directories scanned during startup and the files discovered. */
479 class Tablespace_dirs {
480  public:
481   using Result = std::pair<std::string, Tablespace_files::Names *>;
482 
483   /** Constructor */
Tablespace_dirs()484   Tablespace_dirs()
485       : m_dirs()
486 #if !defined(__SUNPRO_CC)
487         ,
488         m_checked()
489 #endif /* !__SUNPRO_CC */
490   {
491 #if defined(__SUNPRO_CC)
492     m_checked = ATOMIC_VAR_INIT(0);
493 #endif /* __SUNPRO_CC */
494   }
495 
496   /** Normalize and save a directory to scan for IBD and IBU datafiles
497   before recovery.
498   @param[in]  directory    directory to scan for ibd and ibu files
499   @param[in]  is_undo_dir  true for an undo directory */
500   void set_scan_dir(const std::string &directory, bool is_undo_dir = false);
501 
502   /** Normalize and save a list of directories to scan for IBD and IBU
503   datafiles before recovery.
504   @param[in]  directories  Directories to scan for ibd and ibu files */
505   void set_scan_dirs(const std::string &directories);
506 
507   /** Discover tablespaces by reading the header from .ibd files.
508   @return DB_SUCCESS if all goes well */
509   dberr_t scan(bool populate_fil_cache) MY_ATTRIBUTE((warn_unused_result));
510 
511   /** Clear all the tablespace file data but leave the list of
512   scanned directories in place. */
clear()513   void clear() {
514     for (auto &dir : m_dirs) {
515       dir.clear();
516     }
517 
518     m_checked = 0;
519   }
520 
521   /** Erase a space ID to filename mapping.
522   @param[in]	space_id	Tablespace ID to erase
523   @return true if successful */
erase_path(space_id_t space_id)524   bool erase_path(space_id_t space_id) MY_ATTRIBUTE((warn_unused_result)) {
525     for (auto &dir : m_dirs) {
526       if (dir.erase_path(space_id)) {
527         return (true);
528       }
529     }
530 
531     return (false);
532   }
533 
534   /* Find the first matching space ID -> name mapping.
535   @param[in]	space_id	Tablespace ID
536   @return directory searched and pointer to names that map to the
537           tablespace ID */
find(space_id_t space_id)538   Result find(space_id_t space_id) MY_ATTRIBUTE((warn_unused_result)) {
539     for (auto &dir : m_dirs) {
540       const auto names = dir.find(space_id);
541 
542       if (names != nullptr) {
543         return (Result{dir.path(), names});
544       }
545     }
546 
547     return (Result{"", nullptr});
548   }
549 
550   /** Determine if this Fil_path contains the path provided.
551   @param[in]  path  file or directory path to compare.
552   @return true if this Fil_path contains path */
contains(const std::string & path) const553   bool contains(const std::string &path) const
554       MY_ATTRIBUTE((warn_unused_result)) {
555     const Fil_path descendant{path};
556 
557     for (const auto &dir : m_dirs) {
558       if (dir.root().is_same_as(descendant) ||
559           dir.root().is_ancestor(descendant)) {
560         return (true);
561       }
562     }
563     return (false);
564   }
565 
566   /** Insert a file with given space ID to filename mapping.
567   @param[in]  space_id  Tablespace ID to insert
568   @param[in]  filename  file name to insert
569   @return true if successful */
insert(space_id_t space_id,const std::string & filename)570   bool insert(space_id_t space_id, const std::string &filename)
571       MY_ATTRIBUTE((warn_unused_result)) {
572     Fil_path file{filename};
573 
574     for (auto &dir : m_dirs) {
575       const auto &d = dir.root().abs_path();
576       auto abs_path = Fil_path::get_real_path(d);
577 
578       if (dir.root().is_ancestor(file) ||
579           abs_path.compare(file.abs_path()) == 0) {
580         return (dir.add(space_id, filename));
581       }
582     }
583 
584     return (false);
585   }
586 
587   /** Get the list of directories that InnoDB knows about.
588   @return the list of directories 'dir1;dir2;....;dirN' */
get_dirs() const589   std::string get_dirs() const {
590     std::string dirs;
591 
592     ut_ad(!m_dirs.empty());
593 
594     for (const auto &dir : m_dirs) {
595       dirs.append(dir.root());
596       dirs.push_back(FIL_PATH_SEPARATOR);
597     }
598 
599     dirs.pop_back();
600 
601     ut_ad(!dirs.empty());
602 
603     return (dirs);
604   }
605 
606   /** Open all known tablespaces. */
607   void open_ibds() const;
608 
609  private:
610   /** Print the duplicate filenames for a tablespce ID to the log
611   @param[in]	duplicates	Duplicate tablespace IDs*/
612   void print_duplicates(const Space_id_set &duplicates);
613 
614   /** first=dir path from the user, second=files found under first. */
615   using Scanned = std::vector<Tablespace_files>;
616 
617   /** Report a warning that a path is being ignored and include the reason. */
618   void warn_ignore(std::string path_in, const char *reason);
619 
620   /** Add a single path specification to this list of tablespace directories.
621   Convert it to an absolute path. Check if the path is valid.  Ignore
622   unreadable, duplicate or invalid directories.
623   @param[in]  str  Path specification to tokenize
624   @param[in]  is_undo_dir  true for an undo directory */
625   void add_path(const std::string &str, bool is_undo_dir = false);
626 
627   /** Add a delimited list of path specifications to this list of tablespace
628   directories. Convert relative paths to absolute paths. Check if the paths
629   are valid.  Ignore unreadable, duplicate or invalid directories.
630   @param[in]	str		Path specification to tokenize
631   @param[in]	delimiters	Delimiters */
632   void add_paths(const std::string &str, const std::string &delimiters);
633 
634   using Const_iter = Scanned_files::const_iterator;
635 
636   /** Check for duplicate tablespace IDs.
637   @param[in]	start		Start of slice
638   @param[in]	end		End of slice
639   @param[in]	thread_id	Thread ID
640   @param[in,out]	mutex		Mutex protecting the global state
641   @param[in,out]	unique		To check for duplciates
642   @param[in,out]	duplicates	Duplicate space IDs found */
643   void duplicate_check(const Const_iter &start, const Const_iter &end,
644                        size_t thread_id, std::mutex *mutex,
645                        Space_id_set *unique, Space_id_set *duplicates);
646 
647   /** Open IBD tablespaces.
648   @param[in]  start   Start of slice
649   @param[in]  end   End of slice
650   @param[in]  thread_id Thread ID
651   @param[out] result false in case of failure */
652   void open_ibd(const Const_iter &start, const Const_iter &end,
653                 size_t thread_id, bool &result);
654 
655  private:
656   /** Directories scanned and the files discovered under them. */
657   Scanned m_dirs;
658 
659   /** Number of files checked. */
660   std::atomic_size_t m_checked;
661 };
662 
663 /** Determine if user has explicitly disabled fsync(). */
664 #ifndef _WIN32
665 #define fil_buffering_disabled(s)         \
666   ((s)->purpose == FIL_TYPE_TABLESPACE && \
667    srv_unix_file_flush_method == SRV_UNIX_O_DIRECT_NO_FSYNC)
668 #else /* _WIN32 */
669 #define fil_buffering_disabled(s) (0)
670 #endif /* _WIN32 */
671 
672 class Fil_shard {
673   using File_list = UT_LIST_BASE_NODE_T(fil_node_t);
674   using Space_list = UT_LIST_BASE_NODE_T(fil_space_t);
675   using Spaces = std::unordered_map<space_id_t, fil_space_t *>;
676 
677   using Names = std::unordered_map<const char *, fil_space_t *, Char_Ptr_Hash,
678                                    Char_Ptr_Compare>;
679 
680  public:
681   /** Constructor
682   @param[in]	shard_id	Shard ID  */
683   explicit Fil_shard(size_t shard_id);
684 
685   /** Destructor */
~Fil_shard()686   ~Fil_shard() {
687     mutex_destroy(&m_mutex);
688     ut_a(UT_LIST_GET_LEN(m_LRU) == 0);
689     ut_a(UT_LIST_GET_LEN(m_unflushed_spaces) == 0);
690   }
691 
692   /** @return the shard ID */
id() const693   size_t id() const { return (m_id); }
694 
695   /** Acquire the mutex.
696   @param[in]	line	Line number from where it was called */
acquire(int line) const697   void acquire(int line) const {
698 #ifndef UNIV_HOTBACKUP
699     m_mutex.enter(srv_n_spin_wait_rounds, srv_spin_wait_delay, __FILE__, line);
700 #else
701     mutex_enter(&m_mutex);
702 #endif /* !UNIV_HOTBACKUP */
703   }
704 
705   /** Release the mutex. */
mutex_release() const706   void mutex_release() const { mutex_exit(&m_mutex); }
707 
708 #ifdef UNIV_DEBUG
709   /** @return true if the mutex is owned. */
mutex_owned() const710   bool mutex_owned() const { return (mutex_own(&m_mutex)); }
711 #endif /* UNIV_DEBUG */
712 
713   /** Mutex protecting this shard. */
714 
715 #ifndef UNIV_HOTBACKUP
716   mutable ib_mutex_t m_mutex;
717 #else
718   mutable meb::Mutex m_mutex;
719 #endif /* !UNIV_HOTBACKUP */
720 
721   /** Fetch the fil_space_t instance that maps to space_id.
722   @param[in]	space_id	Tablespace ID to lookup
723   @return tablespace instance or nullptr if not found. */
get_space_by_id(space_id_t space_id) const724   fil_space_t *get_space_by_id(space_id_t space_id) const
725       MY_ATTRIBUTE((warn_unused_result)) {
726     ut_ad(m_id == REDO_SHARD || mutex_owned());
727 
728     auto it = m_spaces.find(space_id);
729 
730     if (it == m_spaces.end()) {
731       return (nullptr);
732     }
733 
734     ut_ad(it->second->magic_n == FIL_SPACE_MAGIC_N);
735 
736     return (it->second);
737   }
738 
739   /** Fetch the fil_space_t instance that maps to the name.
740   @param[in]	name		Tablespace name to lookup
741   @return tablespace instance or nullptr if not found. */
get_space_by_name(const char * name) const742   fil_space_t *get_space_by_name(const char *name) const
743       MY_ATTRIBUTE((warn_unused_result)) {
744     ut_ad(mutex_owned());
745 
746     auto it = m_names.find(name);
747 
748     if (it == m_names.end()) {
749       return (nullptr);
750     }
751 
752     ut_ad(it->second->magic_n == FIL_SPACE_MAGIC_N);
753 
754     return (it->second);
755   }
756 
757   /** Tries to close a file in the shard LRU list.
758   The caller must hold the Fil_shard::m_mutex.
759   @param[in] print_info		if true, prints information
760                                   why it cannot close a file
761   @return true if success, false if should retry later */
762   bool close_files_in_LRU(bool print_info) MY_ATTRIBUTE((warn_unused_result));
763 
764   /** Remove the file node from the LRU list.
765   @param[in,out]	file		File for the tablespace */
766   void remove_from_LRU(fil_node_t *file);
767 
768   /** Add the file node to the LRU list if required.
769   @param[in,out]	file		File for the tablespace */
770   void file_opened(fil_node_t *file);
771 
772   /** Open all the system files.
773   @param[in]	max_n_open	Max files that can be opened.
774   @param[in]	n_open		Current number of open files */
775   void open_system_tablespaces(size_t max_n_open, size_t *n_open);
776 
777   /** Close a tablespace file.
778   @param[in,out]	file		Tablespace file to close
779   @param[in]	LRU_close	true if called from LRU close */
780   void close_file(fil_node_t *file, bool LRU_close);
781 
782   /** Close a tablespace file based on tablespace ID.
783   @param[in]	space_id	Tablespace ID
784   @return false if space_id was not found. */
785   bool close_file(space_id_t space_id);
786 
787   /** Prepare to free a file object from a tablespace
788   memory cache.
789   @param[in,out]	file	Tablespace file
790   @param[in]	space	tablespace */
791   void file_close_to_free(fil_node_t *file, fil_space_t *space);
792 
793   /** Close log files.
794   @param[in]	free_all	If set then free all */
795   void close_log_files(bool free_all);
796 
797   /** Close all open files. */
798   void close_all_files();
799 
800   /** Detach a space object from the tablespace memory cache and
801   closes the tablespace files but does not delete them.
802   There must not be any pending I/O's or flushes on the files.
803   @param[in,out]	space		tablespace */
804   void space_detach(fil_space_t *space);
805 
806   /** Delete the instance that maps to space_id
807   @param[in]	space_id	Tablespace ID to delete */
space_delete(space_id_t space_id)808   void space_delete(space_id_t space_id) {
809     ut_ad(mutex_owned());
810 
811     auto it = m_spaces.find(space_id);
812 
813     if (it != m_spaces.end()) {
814       m_names.erase(it->second->name);
815       m_spaces.erase(it);
816     }
817   }
818 
819 #ifndef UNIV_HOTBACKUP
820   /** Purge entries from m_deleted that are lower than LWM.
821   @param[in]  lwm  No dirty pages in the buffer pool less than this LSN. */
checkpoint(lsn_t lwm)822   void checkpoint(lsn_t lwm) {
823     /* Avoid cleaning up old undo files while this is on. */
824     DBUG_EXECUTE_IF("ib_undo_trunc_checkpoint_off", return;);
825 
826     mutex_acquire();
827 
828     for (auto it = m_deleted.begin(); it != m_deleted.end(); /* No op */) {
829       auto space = it->second;
830 
831       if (space->m_deleted_lsn <= lwm) {
832         ut_a(space->files.front().n_pending == 0);
833 
834         space_delete(space->id);
835         space_free_low(space);
836 
837         it = m_deleted.erase(it);
838       } else {
839         ++it;
840       }
841     }
842 
843     mutex_release();
844   }
845 
count_deleted(space_id_t undo_num)846   size_t count_deleted(space_id_t undo_num) {
847     size_t count = 0;
848 
849     mutex_acquire();
850 
851     for (auto deleted : m_deleted) {
852       if (undo::id2num(deleted.first) == undo_num) {
853         count++;
854       }
855     }
856 
857     mutex_release();
858 
859     return (count);
860   }
861 
862   /** Check if a particular undo space_id for a page in the buffer pool has
863   been deleted recently.  Its space_id will be found in m_deleted until
864   Fil:shard::checkpoint removes all its pages from the buffer pool and the
865   fil_space_t from Fil_system.
866   @return true if this space_id is in the list of recently deleted spaces. */
is_deleted(space_id_t space_id)867   bool is_deleted(space_id_t space_id) {
868     bool found = false;
869 
870     mutex_acquire();
871 
872     for (auto deleted : m_deleted) {
873       if (deleted.first == space_id) {
874         found = true;
875         break;
876       }
877     }
878 
879     mutex_release();
880 
881     return (found);
882   }
883 
884 #endif /* !UNIV_HOTBACKUP */
885 
886   /** Frees a space object from the tablespace memory cache.
887   Closes a tablespaces' files but does not delete them.
888   There must not be any pending I/O's or flushes on the files.
889   @param[in]	space_id	Tablespace ID
890   @return fil_space_t instance on success or nullptr */
891   fil_space_t *space_free(space_id_t space_id)
892       MY_ATTRIBUTE((warn_unused_result));
893 
894   /** Map the space ID and name to the tablespace instance.
895   @param[in]	space		Tablespace instance */
896   void space_add(fil_space_t *space);
897 
898   /** Prepare to free a file. Remove from the unflushed list
899   if there are no pending flushes.
900   @param[in,out]	file		File instance to free */
901   void prepare_to_free_file(fil_node_t *file);
902 
903   /** If the tablespace is on the unflushed list and there
904   are no pending flushes then remove from the unflushed list.
905   @param[in,out]	space		Tablespace to remove*/
906   void remove_from_unflushed_list(fil_space_t *space);
907 
908   /** Updates the data structures when an I/O operation
909   finishes. Updates the pending I/O's field in the file
910   appropriately.
911   @param[in]	file		Tablespace file
912   @param[in]	type		Marks the file as modified
913                                   if type == WRITE */
914   void complete_io(fil_node_t *file, const IORequest &type);
915 
916   /** Prepares a file for I/O. Opens the file if it is closed.
917   Updates the pending I/O's field in the file and the system
918   appropriately. Takes the file off the LRU list if it is in
919   the LRU list.
920   @param[in]	file		Tablespace file for IO
921   @param[in]	extend		true if file is being extended
922   @return false if the file can't be opened, otherwise true */
923   bool prepare_file_for_io(fil_node_t *file, bool extend)
924       MY_ATTRIBUTE((warn_unused_result));
925 
926   /** Reserves the mutex and tries to make sure we can
927   open at least one file while holding it. This should be called
928   before calling prepare_file_for_io(), because that function
929   may need to open a file.
930   @param[in]	space_id	Tablespace ID
931   @param[out]	space		Tablespace instance
932   @return true if a slot was reserved. */
933   bool mutex_acquire_and_get_space(space_id_t space_id, fil_space_t *&space)
934       MY_ATTRIBUTE((warn_unused_result));
935 
936   /** Remap the tablespace to the new name.
937   @param[in]	space		Tablespace instance with old name
938   @param[in]	new_name	New tablespace name */
939   void update_space_name_map(fil_space_t *space, const char *new_name);
940 
941   /** Flush the redo log writes to disk, possibly cached by the OS. */
942   void flush_file_redo();
943 
944   /** Collect the tablespace IDs of unflushed tablespaces in space_ids.
945   @param[in]	purpose		FIL_TYPE_TABLESPACE or FIL_TYPE_LOG,
946                                   can be ORred */
947   void flush_file_spaces(uint8_t purpose);
948 
949   /** Try to extend a tablespace if it is smaller than the specified size.
950   @param[in,out]	space		tablespace
951   @param[in]	size		desired size in pages
952   @return whether the tablespace is at least as big as requested */
953   bool space_extend(fil_space_t *space, page_no_t size)
954       MY_ATTRIBUTE((warn_unused_result));
955 
956   /** Flushes to disk possible writes cached by the OS. If the space does
957   not exist or is being dropped, does not do anything.
958   @param[in]	space_id	File space ID (this can be a group of
959                                   log files or a tablespace of the
960                                   database) */
961   void space_flush(space_id_t space_id);
962 
963   /** Open a file of a tablespace.
964   The caller must own the fil_system mutex.
965   @param[in,out]	file		Tablespace file
966   @param[in]	extend		true if the file is being extended
967   @return false if the file can't be opened, otherwise true */
968   bool open_file(fil_node_t *file, bool extend)
969       MY_ATTRIBUTE((warn_unused_result));
970 
971   /** Checks if all the file nodes in a space are flushed.
972   The caller must hold all fil_system mutexes.
973   @param[in]	space		Tablespace to check
974   @return true if all are flushed */
975   bool space_is_flushed(const fil_space_t *space)
976       MY_ATTRIBUTE((warn_unused_result));
977 
978   /** Open each file of a tablespace if not already open.
979   @param[in]	space_id	tablespace identifier
980   @retval	true	if all file nodes were opened
981   @retval	false	on failure */
982   bool space_open(space_id_t space_id) MY_ATTRIBUTE((warn_unused_result));
983 
984   /** Opens the files associated with a tablespace and returns a
985   pointer to the fil_space_t that is in the memory cache associated
986   with a space id.
987   @param[in]	space_id	Get the tablespace instance or this ID
988   @return file_space_t pointer, nullptr if space not found */
989   fil_space_t *space_load(space_id_t space_id)
990       MY_ATTRIBUTE((warn_unused_result));
991 
992   /** Check pending operations on a tablespace.
993   @param[in]	space_id	Tablespace ID
994   @param[out]	space		tablespace instance in memory
995   @param[out]	path		tablespace path
996   @return DB_SUCCESS or DB_TABLESPACE_NOT_FOUND. */
997   dberr_t space_check_pending_operations(space_id_t space_id,
998                                          fil_space_t *&space, char **path) const
999       MY_ATTRIBUTE((warn_unused_result));
1000 
1001   /** Rename a single-table tablespace.
1002   The tablespace must exist in the memory cache.
1003   @param[in]	space_id	Tablespace ID
1004   @param[in]	old_path	Old file name
1005   @param[in]	new_name	New tablespace  name in the schema/space
1006   @param[in]	new_path_in	New file name, or nullptr if it
1007                                   is located in the normal data directory
1008   @return InnoDB error code */
1009   dberr_t space_rename(space_id_t space_id, const char *old_path,
1010                        const char *new_name, const char *new_path_in)
1011       MY_ATTRIBUTE((warn_unused_result));
1012 
1013   /** Deletes an IBD or IBU tablespace.
1014   The tablespace must be cached in the memory cache. This will delete the
1015   datafile, fil_space_t & fil_node_t entries from the file_system_t cache.
1016   @param[in]	space_id	Tablespace ID
1017   @param[in]	buf_remove	Specify the action to take on the pages
1018                                   for this table in the buffer pool.
1019   @return DB_SUCCESS, DB_TABLESPCE_NOT_FOUND or DB_IO_ERROR */
1020   dberr_t space_delete(space_id_t space_id, buf_remove_t buf_remove)
1021       MY_ATTRIBUTE((warn_unused_result));
1022 
1023   /** Truncate the tablespace to needed size.
1024   @param[in]	space_id	Tablespace ID to truncate
1025   @param[in]	size_in_pages	Truncate size.
1026   @return true if truncate was successful. */
1027   bool space_truncate(space_id_t space_id, page_no_t size_in_pages)
1028       MY_ATTRIBUTE((warn_unused_result));
1029 
1030   /** Create a space memory object and put it to the fil_system hash
1031   table. The tablespace name is independent from the tablespace file-name.
1032   Error messages are issued to the server log.
1033   @param[in]	name		Tablespace name
1034   @param[in]	space_id	Tablespace ID
1035   @param[in]	flags		Tablespace flags
1036   @param[in]	purpose		Tablespace purpose
1037   @return pointer to created tablespace
1038   @retval nullptr on failure (such as when the same tablespace exists) */
1039   fil_space_t *space_create(const char *name, space_id_t space_id,
1040                             uint32_t flags, fil_type_t purpose)
1041       MY_ATTRIBUTE((warn_unused_result));
1042 
1043   /** Adjust temporary auto-generated names created during
1044   file discovery with correct tablespace names from the DD.
1045   @param[in,out]	space		Tablespace
1046   @param[in]	dd_space_name	Tablespace name from the DD
1047   @return true if the tablespace is a general or undo tablespace. */
1048   bool adjust_space_name(fil_space_t *space, const char *dd_space_name);
1049 
1050   /** Returns true if a matching tablespace exists in the InnoDB
1051   tablespace memory cache.
1052   @param[in]	space_id	Tablespace ID
1053   @param[in]	name		Tablespace name used in space_create().
1054   @param[in]	print_err	Print detailed error information to the
1055                                   error log if a matching tablespace is
1056                                   not found from memory.
1057   @param[in]	adjust_space	Whether to adjust space id on mismatch
1058   @param[in]	heap			Heap memory
1059   @param[in]	table_id		table id
1060   @return true if a matching tablespace exists in the memory cache */
1061   bool space_check_exists(space_id_t space_id, const char *name, bool print_err,
1062                           bool adjust_space, mem_heap_t *heap,
1063                           table_id_t table_id)
1064       MY_ATTRIBUTE((warn_unused_result));
1065 
1066   /** Read or write log file data synchronously.
1067   @param[in]	type		IO context
1068   @param[in]	page_id		page id
1069   @param[in]	page_size	page size
1070   @param[in]	byte_offset	remainder of offset in bytes; in AIO
1071                                   this must be divisible by the OS block
1072                                   size
1073   @param[in]	len		how many bytes to read or write; this
1074                                   must not cross a file boundary; in AIO
1075                                   this must be a block size multiple
1076   @param[in,out]	buf		buffer where to store read data or
1077                                   from where to write
1078   @return error code
1079   @retval DB_SUCCESS on success */
1080   dberr_t do_redo_io(const IORequest &type, const page_id_t &page_id,
1081                      const page_size_t &page_size, ulint byte_offset, ulint len,
1082                      void *buf) MY_ATTRIBUTE((warn_unused_result));
1083 
1084   /** Read or write data. This operation could be asynchronous (aio).
1085   @param[in]	type		IO context
1086   @param[in]	sync		whether synchronous aio is desired
1087   @param[in]	page_id		page id
1088   @param[in]	page_size	page size
1089   @param[in]	byte_offset	remainder of offset in bytes; in AIO
1090                                   this must be divisible by the OS
1091                                   block size
1092   @param[in]	len		how many bytes to read or write;
1093                                   this must not cross a file boundary;
1094                                   in AIO this must be a block size
1095                                   multiple
1096   @param[in,out]	buf		buffer where to store read data
1097                                   or from where to write; in AIO
1098                                   this must be appropriately aligned
1099   @param[in]	message		message for AIO handler if !sync,
1100                                   else ignored
1101   @return error code
1102   @retval DB_SUCCESS on success
1103   @retval DB_TABLESPACE_DELETED if the tablespace does not exist */
1104   dberr_t do_io(const IORequest &type, bool sync, const page_id_t &page_id,
1105                 const page_size_t &page_size, ulint byte_offset, ulint len,
1106                 void *buf, void *message) MY_ATTRIBUTE((warn_unused_result));
1107 
1108   /** Iterate through all tablespaces
1109   @param[in]  include_log Include redo log space, if true
1110   @param[in]  f   Callback
1111   @return any error returned by the callback function. */
1112   dberr_t iterate_spaces(bool include_log, Fil_space_iterator::Function &f)
1113       MY_ATTRIBUTE((warn_unused_result));
1114 
1115   /** Iterate through all persistent tablespace files
1116   (FIL_TYPE_TABLESPACE) returning the nodes via callback function cbk.
1117   @param[in]	include_log	include log files, if true
1118   @param[in]	f		Callback
1119   @return any error returned by the callback function. */
1120   dberr_t iterate(bool include_log, Fil_iterator::Function &f)
1121       MY_ATTRIBUTE((warn_unused_result));
1122 
1123   /** Open an ibd tablespace and add it to the InnoDB data structures.
1124   This is similar to fil_ibd_open() except that it is used while
1125   processing the redo and DDL log, so the data dictionary is not
1126   available and very little validation is done. The tablespace name
1127   is extracted from the dbname/tablename.ibd portion of the filename,
1128   which assumes that the file is a file-per-table tablespace. Any name
1129   will do for now. General tablespace names will be read from the
1130   dictionary after it has been recovered. The tablespace flags are read
1131   at this time from the first page of the file in validate_for_recovery().
1132   @param[in]	space_id	tablespace ID
1133   @param[in]	path		path/to/databasename/tablename.ibd
1134   @param[out]	space		the tablespace, or nullptr on error
1135   @return status of the operation */
1136   fil_load_status ibd_open_for_recovery(space_id_t space_id,
1137                                         const std::string &path,
1138                                         fil_space_t *&space)
1139       MY_ATTRIBUTE((warn_unused_result));
1140 
1141   /** Attach a file to a tablespace
1142   @param[in]	name		file name of a file that is not open
1143   @param[in]	size		file size in entire database blocks
1144   @param[in,out]	space		tablespace from fil_space_create()
1145   @param[in]	is_raw		true if this is a raw device
1146                                   or partition
1147   @param[in]	punch_hole	true if supported for this file
1148   @param[in]	atomic_write	true if the file has atomic write
1149                                   enabled
1150   @param[in]	max_pages	maximum number of pages in file
1151   @return pointer to the file name
1152   @retval nullptr if error */
1153   fil_node_t *create_node(const char *name, page_no_t size, fil_space_t *space,
1154                           bool is_raw, bool punch_hole, bool atomic_write,
1155                           page_no_t max_pages = PAGE_NO_MAX)
1156       MY_ATTRIBUTE((warn_unused_result));
1157 
1158 #ifdef UNIV_DEBUG
1159   /** Validate a shard. */
1160   void validate() const;
1161 #endif /* UNIV_DEBUG */
1162 
1163 #ifdef UNIV_HOTBACKUP
1164   /** Extends all tablespaces to the size stored in the space header.
1165   During the mysqlbackup --apply-log phase we extended the spaces
1166   on-demand so that log records could be applied, but that may have
1167   left spaces still too small compared to the size stored in the space
1168   header. */
1169   void meb_extend_tablespaces_to_stored_len();
1170 #endif /* UNIV_HOTBACKUP */
1171 
1172   /** Free a tablespace object on which fil_space_detach() was invoked.
1173   There must not be any pending i/o's or flushes on the files.
1174   @param[in,out]	space		tablespace */
1175   static void space_free_low(fil_space_t *&space);
1176 
1177   /** Wait for an empty slot to reserve for opening a file.
1178   @return true on success. */
1179   static bool reserve_open_slot(size_t shard_id)
1180       MY_ATTRIBUTE((warn_unused_result));
1181 
1182   /** Release the slot reserved for opening a file.
1183   @param[in]	shard_id	ID of shard relasing the slot */
1184   static void release_open_slot(size_t shard_id);
1185 
1186   /** We are going to do a rename file and want to stop new I/O
1187   for a while.
1188   @param[in]	space		Tablespace for which we want to
1189                                   wait for IO to stop */
1190   static void wait_for_io_to_stop(const fil_space_t *space);
1191 
1192  private:
1193   /** We keep log files and system tablespace files always open; this is
1194   important in preventing deadlocks in this module, as a page read
1195   completion often performs another read from the insert buffer. The
1196   insert buffer is in tablespace TRX_SYS_SPACE, and we cannot end up
1197   waiting in this function.
1198   @param[in]	space_id	Tablespace ID to look up
1199   @return tablespace instance */
1200   fil_space_t *get_reserved_space(space_id_t space_id)
1201       MY_ATTRIBUTE((warn_unused_result));
1202 
1203   /** Prepare for truncating a single-table tablespace.
1204   1) Check pending operations on a tablespace;
1205   2) Remove all insert buffer entries for the tablespace;
1206   @param[in]	space_id	Tablespace ID
1207   @return DB_SUCCESS or error */
1208   dberr_t space_prepare_for_truncate(space_id_t space_id)
1209       MY_ATTRIBUTE((warn_unused_result));
1210 
1211   /** Note that a write IO has completed.
1212   @param[in,out]	file		File on which a write was
1213                                   completed */
1214   void write_completed(fil_node_t *file);
1215 
1216   /** If the tablespace is not on the unflushed list, add it.
1217   @param[in,out]	space		Tablespace to add */
1218   void add_to_unflushed_list(fil_space_t *space);
1219 
1220   /** Check for pending operations.
1221   @param[in]	space	tablespace
1222   @param[in]	count	number of attempts so far
1223   @return 0 if no pending operations else count + 1. */
1224   ulint space_check_pending_operations(fil_space_t *space, ulint count) const
1225       MY_ATTRIBUTE((warn_unused_result));
1226 
1227   /** Check for pending IO.
1228   @param[in]	space		Tablespace to check
1229   @param[in]	file		File in space list
1230   @param[in]	count		number of attempts so far
1231   @return 0 if no pending else count + 1. */
1232   ulint check_pending_io(const fil_space_t *space, const fil_node_t &file,
1233                          ulint count) const MY_ATTRIBUTE((warn_unused_result));
1234 
1235   /** Flushes to disk possible writes cached by the OS. */
1236   void redo_space_flush();
1237 
1238   /** First we open the file in the normal mode, no async I/O here, for
1239   simplicity. Then do some checks, and close the file again.  NOTE that we
1240   could not use the simple file read function os_file_read() in Windows
1241   to read from a file opened for async I/O!
1242   @param[in,out]	file		Get the size of this file
1243   @param[in]	read_only_mode	true if read only mode set
1244   @return DB_SUCCESS or error */
1245   dberr_t get_file_size(fil_node_t *file, bool read_only_mode)
1246       MY_ATTRIBUTE((warn_unused_result));
1247 
1248   /** Get the AIO mode.
1249   @param[in]	req_type	IO request type
1250   @param[in]	sync		true if Synchronous IO
1251   return the AIO mode */
1252   static AIO_mode get_AIO_mode(const IORequest &req_type, bool sync)
1253       MY_ATTRIBUTE((warn_unused_result));
1254 
1255   /** Get the file name for IO and the local offset within that file.
1256   @param[in]      req_type  IO context
1257   @param[in,out]  space     Tablespace for IO
1258   @param[in,out]  page_no   The relative page number in the file
1259   @param[out]     file      File node if DB_SUCCESS, NULL if not
1260   @retval DB_SUCCESS if the file is found with the page_no
1261   @retval DB_ERROR if the file is not found or does not contain the page.
1262                    in this case file == nullptr */
1263   static dberr_t get_file_for_io(const IORequest &req_type, fil_space_t *space,
1264                                  page_no_t *page_no, fil_node_t *&file)
1265       MY_ATTRIBUTE((warn_unused_result));
1266 
1267  private:
1268   /** Fil_shard ID */
1269 
1270   const size_t m_id;
1271 
1272   /** Tablespace instances hashed on the space id */
1273 
1274   Spaces m_spaces;
1275 
1276   /** Tablespace instances hashed on the space name */
1277 
1278   Names m_names;
1279 
1280 #ifndef UNIV_HOTBACKUP
1281   /** Deleted space IDs, ignore writes to these tablespaces. Note the
1282   LSN at which the tablespace was deleted. All pages before this LSN
1283   should not be flushed to disk. Once the LWM is >= the recorded LSN
1284   we can delete the entry from m_deleted. */
1285 
1286   std::vector<std::pair<space_id_t, fil_space_t *>> m_deleted;
1287 #endif /* !UNIV_HOTBACKUP */
1288 
1289   /** Base node for the LRU list of the most recently used open
1290   files with no pending I/O's; if we start an I/O on the file,
1291   we first remove it from this list, and return it to the start
1292   of the list when the I/O ends; log files and the system
1293   tablespace are not put to this list: they are opened after
1294   the startup, and kept open until shutdown */
1295 
1296   File_list m_LRU;
1297 
1298   /** Base node for the list of those tablespaces whose files
1299   contain unflushed writes; those spaces have at least one file
1300   where modification_counter > flush_counter */
1301 
1302   Space_list m_unflushed_spaces;
1303 
1304   /** When we write to a file we increment this by one */
1305 
1306   int64_t m_modification_counter;
1307 
1308   /** Number of files currently open */
1309 
1310   static std::atomic_size_t s_n_open;
1311 
1312   /** ID of shard that has reserved the open slot. */
1313 
1314   static std::atomic_size_t s_open_slot;
1315 
1316   // Disable copying
1317   Fil_shard(Fil_shard &&) = delete;
1318   Fil_shard(const Fil_shard &) = delete;
1319   Fil_shard &operator=(const Fil_shard &) = delete;
1320 
1321   friend class Fil_system;
1322 };
1323 
1324 /** The tablespace memory cache; also the totality of logs (the log
1325 data space) is stored here; below we talk about tablespaces, but also
1326 the ib_logfiles form a 'space' and it is handled here */
1327 class Fil_system {
1328  public:
1329   using Fil_shards = std::vector<Fil_shard *>;
1330 
1331   /** Constructor.
1332   @param[in]	n_shards	Number of shards to create
1333   @param[in]	max_open	Maximum number of open files */
1334   Fil_system(size_t n_shards, size_t max_open);
1335 
1336   /** Destructor */
1337   ~Fil_system();
1338 
1339   /** Fetch the file names opened for a space_id during recovery.
1340   @param[in]	space_id	Tablespace ID to lookup
1341   @return pair of top level directory scanned and names that map
1342           to space_id or nullptr if not found for names */
get_scanned_files(space_id_t space_id)1343   Tablespace_dirs::Result get_scanned_files(space_id_t space_id)
1344       MY_ATTRIBUTE((warn_unused_result)) {
1345     return (m_dirs.find(space_id));
1346   }
1347 
1348   /** Fetch the file name opened for a space_id during recovery
1349   from the file map.
1350   @param[in]	space_id	Undo tablespace ID
1351   @return Full path to the file name that was opened, empty string
1352           if space ID not found. */
find(space_id_t space_id)1353   std::string find(space_id_t space_id) MY_ATTRIBUTE((warn_unused_result)) {
1354     auto result = get_scanned_files(space_id);
1355 
1356     if (result.second != nullptr) {
1357       return (result.first + result.second->front());
1358     }
1359 
1360     return ("");
1361   }
1362 
1363   /** Erase a tablespace ID and its mapping from the scanned files.
1364   @param[in]	space_id	Tablespace ID to erase
1365   @return true if successful */
erase_path(space_id_t space_id)1366   bool erase_path(space_id_t space_id) MY_ATTRIBUTE((warn_unused_result)) {
1367     return (m_dirs.erase_path(space_id));
1368   }
1369 
1370   /** Add file to old file list. The list is used during 5.7 upgrade failure
1371   to revert back the modified file names. We modify partitioned file names
1372   to lower case.
1373   @param[in]	file_path	old file name with path */
add_old_file(const std::string & file_path)1374   void add_old_file(const std::string &file_path) {
1375     m_old_paths.push_back(file_path);
1376   }
1377 
1378   /** Rename partition files during upgrade.
1379   @param[in]	revert	if true, revert to old names */
1380   void rename_partition_files(bool revert);
1381 
1382   /** Clear all accumulated old files. */
clear_old_files()1383   void clear_old_files() { m_old_paths.clear(); }
1384 
1385   /** Get the top level directory where this filename was found.
1386   @param[in]	path		Path to look for.
1387   @return the top level directory under which this file was found. */
1388   const std::string &get_root(const std::string &path) const
1389       MY_ATTRIBUTE((warn_unused_result));
1390 
1391   /** Update the DD if any files were moved to a new location.
1392   Free the Tablespace_files instance.
1393   @param[in]	read_only_mode	true if InnoDB is started in
1394                                   read only mode.
1395   @return DB_SUCCESS if all OK */
1396   dberr_t prepare_open_for_business(bool read_only_mode)
1397       MY_ATTRIBUTE((warn_unused_result));
1398 
1399   /** Flush the redo log writes to disk, possibly cached by the OS. */
1400   void flush_file_redo();
1401 
1402   /** Flush to disk the writes in file spaces of the given type
1403   possibly cached by the OS.
1404   @param[in]	purpose		FIL_TYPE_TABLESPACE or FIL_TYPE_LOG,
1405                                   can be ORred */
1406   void flush_file_spaces(uint8_t purpose);
1407 
1408 #ifndef UNIV_HOTBACKUP
1409   /** Clean up the shards.
1410   @param[in] lwm No dirty pages less than this LSN in the buffer pool. */
checkpoint(lsn_t lwm)1411   void checkpoint(lsn_t lwm) {
1412     for (auto shard : m_shards) {
1413       shard->checkpoint(lwm);
1414     }
1415   }
1416 
1417   /** Count how many truncated undo space IDs are still tracked in
1418   the buffer pool and the file_system cache.
1419   @param[in]  undo_num  undo tablespace number.
1420   @return number of undo tablespaces that are still in memory. */
count_deleted(space_id_t undo_num)1421   size_t count_deleted(space_id_t undo_num) {
1422     size_t count = 0;
1423 
1424     for (auto shard : m_shards) {
1425       count += shard->count_deleted(undo_num);
1426     }
1427 
1428     return (count);
1429   }
1430 
1431   /** Check if a particular undo space_id for a page in the buffer pool has
1432   been deleted recently.  Its space_id will be found in Fil_shard::m_deleted
1433   until Fil:shard::checkpoint removes all its pages from the buffer pool and
1434   the fil_space_t from Fil_system.
1435   @return true if this space_id is in the list of recently deleted spaces. */
is_deleted(space_id_t space_id)1436   bool is_deleted(space_id_t space_id) {
1437     auto shard = shard_by_id(space_id);
1438 
1439     return (shard->is_deleted(space_id));
1440   }
1441 #endif /* !UNIV_HOTBACKUP */
1442 
1443   /** Fetch the fil_space_t instance that maps to the name.
1444   @param[in]	name		Tablespace name to lookup
1445   @return tablespace instance or nullptr if not found. */
get_space_by_name(const char * name)1446   fil_space_t *get_space_by_name(const char *name)
1447       MY_ATTRIBUTE((warn_unused_result)) {
1448     for (auto shard : m_shards) {
1449       shard->mutex_acquire();
1450 
1451       auto space = shard->get_space_by_name(name);
1452 
1453       shard->mutex_release();
1454 
1455       if (space != nullptr) {
1456         return (space);
1457       }
1458     }
1459 
1460     return (nullptr);
1461   }
1462 
1463   /** Check a space ID against the maximum known tablespace ID.
1464   @param[in]	space_id	Tablespace ID to check
1465   @return true if it is > than maximum known tablespace ID. */
is_greater_than_max_id(space_id_t space_id) const1466   bool is_greater_than_max_id(space_id_t space_id) const
1467       MY_ATTRIBUTE((warn_unused_result)) {
1468     ut_ad(mutex_owned_all());
1469 
1470     return (space_id > m_max_assigned_id);
1471   }
1472 
1473   /** Update the maximum known tablespace ID.
1474   @param[in]	space		Tablespace instance */
set_maximum_space_id(const fil_space_t * space)1475   void set_maximum_space_id(const fil_space_t *space) {
1476     ut_ad(mutex_owned_all());
1477 
1478     if (!m_space_id_reuse_warned) {
1479       m_space_id_reuse_warned = true;
1480 
1481       ib::warn(ER_IB_MSG_266) << "Allocated tablespace ID " << space->id
1482                               << " for " << space->name << ", old maximum"
1483                               << " was " << m_max_assigned_id;
1484     }
1485 
1486     m_max_assigned_id = space->id;
1487   }
1488 
1489   /** Update the maximim known space ID if it's smaller than max_id.
1490   @param[in]	space_id		Value to set if it's greater */
update_maximum_space_id(space_id_t space_id)1491   void update_maximum_space_id(space_id_t space_id) {
1492     mutex_acquire_all();
1493 
1494     if (is_greater_than_max_id(space_id)) {
1495       m_max_assigned_id = space_id;
1496     }
1497 
1498     mutex_release_all();
1499   }
1500 
1501   /** Assigns a new space id for a new single-table tablespace. This
1502   works simply by incrementing the global counter. If 4 billion ids
1503   is not enough, we may need to recycle ids.
1504   @param[out]	space_id	Set this to the new tablespace ID
1505   @return true if assigned, false if not */
1506   bool assign_new_space_id(space_id_t *space_id)
1507       MY_ATTRIBUTE((warn_unused_result));
1508 
1509   /** Tries to close a file in all the LRU lists.
1510   The caller must hold the mutex.
1511   @param[in] print_info		if true, prints information why it
1512                                   cannot close a file
1513   @return true if success, false if should retry later */
1514   bool close_file_in_all_LRU(bool print_info)
1515       MY_ATTRIBUTE((warn_unused_result));
1516 
1517   /** Opens all log files and system tablespace data files in
1518   all shards. */
1519   void open_all_system_tablespaces();
1520 
1521   /** Close all open files in a shard
1522   @param[in,out]	shard		Close files of this shard */
1523   void close_files_in_a_shard(Fil_shard *shard);
1524 
1525   /** Close all open files. */
1526   void close_all_files();
1527 
1528   /** Close all the log files in all shards.
1529   @param[in]	free_all	If set then free all instances */
1530   void close_all_log_files(bool free_all);
1531 
1532   /** Iterate through all tablespaces
1533   @param[in]  include_log Include redo log space, if true
1534   @param[in]  f   Callback
1535   @return any error returned by the callback function. */
1536   dberr_t iterate_spaces(bool include_log, Fil_space_iterator::Function &f)
1537       MY_ATTRIBUTE((warn_unused_result));
1538 
1539   /** Iterate through all persistent tablespace files
1540   (FIL_TYPE_TABLESPACE) returning the nodes via callback function cbk.
1541   @param[in]	include_log	Include log files, if true
1542   @param[in]	f		Callback
1543   @return any error returned by the callback function. */
1544   dberr_t iterate(bool include_log, Fil_iterator::Function &f)
1545       MY_ATTRIBUTE((warn_unused_result));
1546 
1547   /** Rotate the tablespace keys by new master key.
1548   @param[in,out]	shard		Rotate the keys in this shard
1549   @return true if the re-encrypt succeeds */
1550   bool encryption_rotate_in_a_shard(Fil_shard *shard);
1551 
1552   /** Rotate the tablespace keys by new master key.
1553   @return true if the re-encrypt succeeds */
1554   bool encryption_rotate_all() MY_ATTRIBUTE((warn_unused_result));
1555 
1556   /** Detach a space object from the tablespace memory cache.
1557   Closes the tablespace files but does not delete them.
1558   There must not be any pending I/O's or flushes on the files.
1559   @param[in,out]	space		tablespace */
1560   void space_detach(fil_space_t *space);
1561 
1562   /** @return the maximum assigned ID so far */
get_max_space_id() const1563   space_id_t get_max_space_id() const { return (m_max_assigned_id); }
1564 
1565   /** Lookup the tablespace ID.
1566   @param[in]	space_id	Tablespace ID to lookup
1567   @return true if the space ID is known. */
1568   bool lookup_for_recovery(space_id_t space_id)
1569       MY_ATTRIBUTE((warn_unused_result));
1570 
1571   /** Open a tablespace that has a redo log record to apply.
1572   @param[in]	space_id		Tablespace ID
1573   @return true if the open was successful */
1574   bool open_for_recovery(space_id_t space_id)
1575       MY_ATTRIBUTE((warn_unused_result));
1576 
1577   /** This function should be called after recovery has completed.
1578   Check for tablespace files for which we did not see any
1579   MLOG_FILE_DELETE or MLOG_FILE_RENAME record. These could not
1580   be recovered.
1581   @return true if there were some filenames missing for which we had to
1582           ignore redo log records during the apply phase */
1583   bool check_missing_tablespaces() MY_ATTRIBUTE((warn_unused_result));
1584 
1585   /** Note that a file has been relocated.
1586   @param[in]	object_id	Server DD tablespace ID
1587   @param[in]	space_id	InnoDB tablespace ID
1588   @param[in]	space_name	Tablespace name
1589   @param[in]	old_path	Path to the old location
1590   @param[in]	new_path	Path scanned from disk */
moved(dd::Object_id object_id,space_id_t space_id,const char * space_name,const std::string & old_path,const std::string & new_path)1591   void moved(dd::Object_id object_id, space_id_t space_id,
1592              const char *space_name, const std::string &old_path,
1593              const std::string &new_path) {
1594     auto tuple =
1595         std::make_tuple(object_id, space_id, space_name, old_path, new_path);
1596 
1597     m_moved.push_back(tuple);
1598   }
1599 
1600   /** Check if a path is known to InnoDB.
1601   @param[in]	path		Path to check
1602   @return true if path is known to InnoDB */
check_path(const std::string & path) const1603   bool check_path(const std::string &path) const {
1604     return (m_dirs.contains(path));
1605   }
1606 
1607   /** Get the list of directories that InnoDB knows about.
1608   @return the list of directories 'dir1;dir2;....;dirN' */
get_dirs() const1609   std::string get_dirs() const { return (m_dirs.get_dirs()); }
1610 
1611   /** Determines if a file belongs to the least-recently-used list.
1612   @param[in]	space		Tablespace to check
1613   @return true if the file belongs to fil_system->m_LRU mutex. */
1614   static bool space_belongs_in_LRU(const fil_space_t *space)
1615       MY_ATTRIBUTE((warn_unused_result));
1616 
1617   /** Normalize and save a directory to scan for IBD and IBU datafiles
1618   before recovery.
1619   @param[in]  directory    Directory to scan
1620   @param[in]  is_undo_dir  true for an undo directory */
set_scan_dir(const std::string & directory,bool is_undo_dir)1621   void set_scan_dir(const std::string &directory, bool is_undo_dir) {
1622     m_dirs.set_scan_dir(directory, is_undo_dir);
1623   }
1624 
1625   /** Normalize and save a list of directories to scan for IBD and IBU
1626   datafiles before recovery.
1627   @param[in]  directories  Directories to scan */
set_scan_dirs(const std::string & directories)1628   void set_scan_dirs(const std::string &directories) {
1629     m_dirs.set_scan_dirs(directories);
1630   }
1631 
1632   /** Scan the directories to build the tablespace ID to file name
1633   mapping table. */
scan(bool populate_fil_cache)1634   dberr_t scan(bool populate_fil_cache) {
1635     return (m_dirs.scan(populate_fil_cache));
1636   }
1637 
1638   /** Open all known tablespaces. */
open_ibds() const1639   void open_ibds() const { m_dirs.open_ibds(); }
1640 
1641   /** Insert a file with given space ID to filename mapping.
1642   @param[in]  space_id  Tablespace ID to insert
1643   @param[in]  filename  file name to insert
1644   @return true if successful */
insert(space_id_t space_id,const std::string & filename)1645   bool insert(space_id_t space_id, const std::string &filename)
1646       MY_ATTRIBUTE((warn_unused_result)) {
1647     return (m_dirs.insert(space_id, filename));
1648   }
1649 
1650   /** Get the tablespace ID from an .ibd and/or an undo tablespace.
1651   If the ID is == 0 on the first page then try with Datafile::find_space_id().
1652   @param[in]	filename	File name to check
1653   @return s_invalid_space_id if not found, otherwise the space ID */
1654   static space_id_t get_tablespace_id(const std::string &filename)
1655       MY_ATTRIBUTE((warn_unused_result));
1656 
1657   /** Fil_shard by space ID.
1658   @param[in]	space_id	Tablespace ID
1659   @return reference to the shard */
shard_by_id(space_id_t space_id) const1660   Fil_shard *shard_by_id(space_id_t space_id) const
1661       MY_ATTRIBUTE((warn_unused_result)) {
1662 #ifndef UNIV_HOTBACKUP
1663     if (space_id == dict_sys_t::s_log_space_first_id) {
1664       return (m_shards[REDO_SHARD]);
1665 
1666     } else if (fsp_is_undo_tablespace(space_id)) {
1667       const size_t limit = space_id % UNDO_SHARDS;
1668 
1669       return (m_shards[UNDO_SHARDS_START + limit]);
1670     }
1671 
1672     ut_ad(m_shards.size() == MAX_SHARDS);
1673 
1674     return (m_shards[space_id % UNDO_SHARDS_START]);
1675 #else  /* !UNIV_HOTBACKUP */
1676     ut_ad(m_shards.size() == 1);
1677 
1678     return (m_shards[0]);
1679 #endif /* !UNIV_HOTBACKUP */
1680   }
1681 
1682   /** Acquire all the mutexes. */
mutex_acquire_all() const1683   void mutex_acquire_all() const {
1684 #ifdef UNIV_HOTBACKUP
1685     ut_ad(m_shards.size() == 1);
1686 #endif /* UNIV_HOTBACKUP */
1687 
1688     for (auto shard : m_shards) {
1689       shard->mutex_acquire();
1690     }
1691   }
1692 
1693   /** Release all the mutexes. */
mutex_release_all() const1694   void mutex_release_all() const {
1695 #ifdef UNIV_HOTBACKUP
1696     ut_ad(m_shards.size() == 1);
1697 #endif /* UNIV_HOTBACKUP */
1698 
1699     for (auto shard : m_shards) {
1700       shard->mutex_release();
1701     }
1702   }
1703 
1704 #ifdef UNIV_DEBUG
1705 
1706   /** Checks the consistency of the tablespace cache.
1707   @return true if ok */
1708   bool validate() const MY_ATTRIBUTE((warn_unused_result));
1709 
1710   /** Check if all mutexes are owned
1711   @return true if all owned. */
mutex_owned_all() const1712   bool mutex_owned_all() const MY_ATTRIBUTE((warn_unused_result)) {
1713 #ifdef UNIV_HOTBACKUP
1714     ut_ad(m_shards.size() == 1);
1715 #endif /* UNIV_HOTBACKUP */
1716 
1717     for (const auto shard : m_shards) {
1718       ut_ad(shard->mutex_owned());
1719     }
1720 
1721     return (true);
1722   }
1723 
1724 #endif /* UNIV_DEBUG */
1725 
1726   /** Rename a tablespace.  Use the space_id to find the shard.
1727   @param[in]	space_id	tablespace ID
1728   @param[in]	old_name	old tablespace name
1729   @param[in]	new_name	new tablespace name
1730   @return DB_SUCCESS on success */
1731   dberr_t rename_tablespace_name(space_id_t space_id, const char *old_name,
1732                                  const char *new_name)
1733       MY_ATTRIBUTE((warn_unused_result));
1734 
1735   /** Free the data structures required for recovery. */
free_scanned_files()1736   void free_scanned_files() { m_dirs.clear(); }
1737 
1738 #ifdef UNIV_HOTBACKUP
1739   /** Extends all tablespaces to the size stored in the space header.
1740   During the mysqlbackup --apply-log phase we extended the spaces
1741   on-demand so that log records could be applied, but that may have
1742   left spaces still too small compared to the size stored in the space
1743   header. */
meb_extend_tablespaces_to_stored_len()1744   void meb_extend_tablespaces_to_stored_len() {
1745     ut_ad(m_shards.size() == 1);
1746 
1747     /* We use a single shard for MEB. */
1748     auto shard = shard_by_id(SPACE_UNKNOWN);
1749 
1750     shard->mutex_acquire();
1751 
1752     shard->meb_extend_tablespaces_to_stored_len();
1753 
1754     shard->mutex_release();
1755   }
1756 
1757   /** Process a file name passed as an input
1758   Wrapper around meb_name_process()
1759   @param[in,out]	name		absolute path of tablespace file
1760   @param[in]	space_id	The tablespace ID
1761   @param[in]	deleted		true if MLOG_FILE_DELETE */
1762   void meb_name_process(char *name, space_id_t space_id, bool deleted);
1763 
1764 #endif /* UNIV_HOTBACKUP */
1765 
1766  private:
1767   /** Open an ibd tablespace and add it to the InnoDB data structures.
1768   This is similar to fil_ibd_open() except that it is used while
1769   processing the redo log, so the data dictionary is not available
1770   and very little validation is done. The tablespace name is extracted
1771   from the dbname/tablename.ibd portion of the filename, which assumes
1772   that the file is a file-per-table tablespace.  Any name will do for
1773   now.  General tablespace names will be read from the dictionary after
1774   it has been recovered.  The tablespace flags are read at this time
1775   from the first page of the file in validate_for_recovery().
1776   @param[in]	space_id	tablespace ID
1777   @param[in]	path		path/to/databasename/tablename.ibd
1778   @param[out]	space		the tablespace, or nullptr on error
1779   @return status of the operation */
1780   fil_load_status ibd_open_for_recovery(space_id_t space_id,
1781                                         const std::string &path,
1782                                         fil_space_t *&space)
1783       MY_ATTRIBUTE((warn_unused_result));
1784 
1785  private:
1786   /** Fil_shards managed */
1787   Fil_shards m_shards;
1788 
1789   /** n_open is not allowed to exceed this */
1790   const size_t m_max_n_open;
1791 
1792   /** Maximum space id in the existing tables, or assigned during
1793   the time mysqld has been up; at an InnoDB startup we scan the
1794   data dictionary and set here the maximum of the space id's of
1795   the tables there */
1796   space_id_t m_max_assigned_id;
1797 
1798   /** true if fil_space_create() has issued a warning about
1799   potential space_id reuse */
1800   bool m_space_id_reuse_warned;
1801 
1802   /** List of tablespaces that have been relocated. We need to
1803   update the DD when it is safe to do so. */
1804   dd_fil::Tablespaces m_moved;
1805 
1806   /** Tablespace directories scanned at startup */
1807   Tablespace_dirs m_dirs;
1808 
1809   /** Old file paths during 5.7 upgrade. */
1810   std::vector<std::string> m_old_paths;
1811 
1812   // Disable copying
1813   Fil_system(Fil_system &&) = delete;
1814   Fil_system(const Fil_system &) = delete;
1815   Fil_system &operator=(const Fil_system &) = delete;
1816 
1817   friend class Fil_shard;
1818 };
1819 
1820 /** The tablespace memory cache. This variable is nullptr before the module is
1821 initialized. */
1822 static Fil_system *fil_system = nullptr;
1823 
1824 /** Total number of open files. */
1825 std::atomic_size_t Fil_shard::s_n_open;
1826 
1827 /** Slot reserved for opening a file. */
1828 std::atomic_size_t Fil_shard::s_open_slot;
1829 
1830 #ifdef UNIV_HOTBACKUP
1831 static ulint srv_data_read;
1832 static ulint srv_data_written;
1833 #endif /* UNIV_HOTBACKUP */
1834 
1835 /** Replay a file rename operation if possible.
1836 @param[in]	page_id		Space ID and first page number in the file
1837 @param[in]	old_name	old file name
1838 @param[in]	new_name	new file name
1839 @return	whether the operation was successfully applied (the name did not exist,
1840 or new_name did not exist and name was successfully renamed to new_name)  */
1841 static bool fil_op_replay_rename(const page_id_t &page_id,
1842                                  const std::string &old_name,
1843                                  const std::string &new_name)
1844     MY_ATTRIBUTE((warn_unused_result));
1845 
1846 #if !defined(UNIV_HOTBACKUP) && !defined(XTRABACKUP)
1847 /** Rename partition file.
1848 @param[in]	old_path	old file path
1849 @param[in]	extn		file extension suffix
1850 @param[in]	revert		if true, rename from new to old file
1851 @param[in]	import		if called during import */
1852 static void fil_rename_partition_file(const std::string &old_path,
1853                                       ib_file_suffix extn, bool revert,
1854                                       bool import);
1855 #endif /* !UNIV_HOTBACKUP && !XTRABACKUP */
1856 
1857 #ifndef XTRABACKUP
1858 /** Get modified name for partition file. During upgrade we change all
1859 partition files to have lower case separator and partition name.
1860 @param[in]	old_path	old file name and path
1861 @param[in]	extn		file extension suffix
1862 @param[out]	new_path	modified new name for partitioned file
1863 @return true, iff name needs modification. */
1864 static bool fil_get_partition_file(const std::string &old_path,
1865                                    ib_file_suffix extn, std::string &new_path);
1866 #endif /* XTRABACKUP */
1867 
1868 #ifdef UNIV_DEBUG
1869 /** Try fil_validate() every this many times */
1870 static const size_t FIL_VALIDATE_SKIP = 17;
1871 /** Checks the consistency of the tablespace cache some of the time.
1872 @return true if ok or the check was skipped */
fil_validate_skip()1873 static bool fil_validate_skip() {
1874 /** The fil_validate() call skip counter. Use a signed type
1875 because of the race condition below. */
1876 #ifdef UNIV_HOTBACKUP
1877   static meb::Mutex meb_mutex;
1878 
1879   meb_mutex.lock();
1880 #endif /* UNIV_HOTBACKUP */
1881   static int fil_validate_count = FIL_VALIDATE_SKIP;
1882 
1883   /* There is a race condition below, but it does not matter,
1884   because this call is only for heuristic purposes. We want to
1885   reduce the call frequency of the costly fil_validate() check
1886   in debug builds. */
1887   --fil_validate_count;
1888 
1889   if (fil_validate_count > 0) {
1890 #ifdef UNIV_HOTBACKUP
1891     meb_mutex.unlock();
1892 #endif /* UNIV_HOTBACKUP */
1893     return (true);
1894   }
1895 
1896   fil_validate_count = FIL_VALIDATE_SKIP;
1897 #ifdef UNIV_HOTBACKUP
1898   meb_mutex.unlock();
1899 #endif /* UNIV_HOTBACKUP */
1900 
1901   return (fil_validate());
1902 }
1903 
1904 /** Validate a shard */
validate() const1905 void Fil_shard::validate() const {
1906   mutex_acquire();
1907 
1908   size_t n_open = 0;
1909 
1910   for (auto elem : m_spaces) {
1911     page_no_t size = 0;
1912     auto space = elem.second;
1913 
1914     for (const auto &file : space->files) {
1915       ut_a(file.is_open || !file.n_pending);
1916 
1917       if (file.is_open) {
1918         ++n_open;
1919       }
1920 
1921       size += file.size;
1922     }
1923 
1924     ut_a(space->size == size);
1925   }
1926 
1927   UT_LIST_CHECK(m_LRU);
1928 
1929   for (auto file = UT_LIST_GET_FIRST(m_LRU); file != nullptr;
1930        file = UT_LIST_GET_NEXT(LRU, file)) {
1931     ut_a(file->is_open);
1932     ut_a(file->n_pending == 0);
1933     ut_a(fil_system->space_belongs_in_LRU(file->space));
1934   }
1935 
1936   mutex_release();
1937 }
1938 
1939 /** Checks the consistency of the tablespace cache.
1940 @return true if ok */
validate() const1941 bool Fil_system::validate() const {
1942   for (const auto shard : m_shards) {
1943     shard->validate();
1944   }
1945 
1946   return (true);
1947 }
1948 /** Checks the consistency of the tablespace cache.
1949 @return true if ok */
fil_validate()1950 bool fil_validate() { return (fil_system->validate()); }
1951 #endif /* UNIV_DEBUG */
1952 
1953 /** Constructor.
1954 @param[in]	n_shards	Number of shards to create
1955 @param[in]	max_open	Maximum number of open files */
Fil_system(size_t n_shards,size_t max_open)1956 Fil_system::Fil_system(size_t n_shards, size_t max_open)
1957     : m_shards(),
1958       m_max_n_open(max_open),
1959       m_max_assigned_id(),
1960       m_space_id_reuse_warned() {
1961   ut_ad(Fil_shard::s_open_slot == 0);
1962   Fil_shard::s_open_slot = EMPTY_OPEN_SLOT;
1963 
1964   for (size_t i = 0; i < n_shards; ++i) {
1965     auto shard = UT_NEW_NOKEY(Fil_shard(i));
1966 
1967     m_shards.push_back(shard);
1968   }
1969 }
1970 
1971 /** Destructor */
~Fil_system()1972 Fil_system::~Fil_system() {
1973   ut_ad(Fil_shard::s_open_slot == EMPTY_OPEN_SLOT);
1974 
1975   Fil_shard::s_open_slot = 0;
1976 
1977   for (auto shard : m_shards) {
1978     UT_DELETE(shard);
1979   }
1980 
1981   m_shards.clear();
1982 }
1983 
1984 /** Determines if a file belongs to the least-recently-used list.
1985 @param[in]	space		Tablespace to check
1986 @return true if the file belongs to m_LRU. */
space_belongs_in_LRU(const fil_space_t * space)1987 bool Fil_system::space_belongs_in_LRU(const fil_space_t *space) {
1988   switch (space->purpose) {
1989     case FIL_TYPE_TEMPORARY:
1990     case FIL_TYPE_LOG:
1991       return (false);
1992 
1993     case FIL_TYPE_TABLESPACE:
1994       return (fsp_is_ibd_tablespace(space->id));
1995 
1996     case FIL_TYPE_IMPORT:
1997       return (true);
1998   }
1999 
2000   ut_ad(0);
2001   return (false);
2002 }
2003 
2004 /** Constructor
2005 @param[in]	shard_id	Shard ID  */
Fil_shard(size_t shard_id)2006 Fil_shard::Fil_shard(size_t shard_id)
2007     : m_id(shard_id), m_spaces(), m_names(), m_modification_counter() {
2008   mutex_create(LATCH_ID_FIL_SHARD, &m_mutex);
2009 
2010   UT_LIST_INIT(m_LRU, &fil_node_t::LRU);
2011 
2012   UT_LIST_INIT(m_unflushed_spaces, &fil_space_t::unflushed_spaces);
2013 }
2014 
2015 /** Wait for an empty slot to reserve for opening a file.
2016 @return true on success. */
reserve_open_slot(size_t shard_id)2017 bool Fil_shard::reserve_open_slot(size_t shard_id) {
2018   size_t expected = EMPTY_OPEN_SLOT;
2019 
2020   return (s_open_slot.compare_exchange_weak(expected, shard_id));
2021 }
2022 
2023 /** Release the slot reserved for opening a file.
2024 @param[in]	shard_id	ID of shard relasing the slot */
release_open_slot(size_t shard_id)2025 void Fil_shard::release_open_slot(size_t shard_id) {
2026   size_t expected = shard_id;
2027 
2028   while (!s_open_slot.compare_exchange_weak(expected, EMPTY_OPEN_SLOT)) {
2029   };
2030 }
2031 
2032 /** Map the space ID and name to the tablespace instance.
2033 @param[in]	space		Tablespace instance */
space_add(fil_space_t * space)2034 void Fil_shard::space_add(fil_space_t *space) {
2035   ut_ad(mutex_owned());
2036 
2037   {
2038     auto it = m_spaces.insert(Spaces::value_type(space->id, space));
2039 
2040     ut_a(it.second);
2041   }
2042 
2043   {
2044     auto name = space->name;
2045 
2046     auto it = m_names.insert(Names::value_type(name, space));
2047 
2048     ut_a(it.second);
2049   }
2050 }
2051 
2052 /** Add the file node to the LRU list if required.
2053 @param[in,out]	file		File for the tablespace */
file_opened(fil_node_t * file)2054 void Fil_shard::file_opened(fil_node_t *file) {
2055   ut_ad(m_id == REDO_SHARD || mutex_owned());
2056 
2057   if (Fil_system::space_belongs_in_LRU(file->space)) {
2058     /* Put the file to the LRU list */
2059     UT_LIST_ADD_FIRST(m_LRU, file);
2060   }
2061 
2062   ++s_n_open;
2063 
2064   file->is_open = true;
2065 
2066   fil_n_file_opened = s_n_open;
2067 }
2068 
2069 /** Remove the file node from the LRU list.
2070 @param[in,out]	file		File for the tablespace */
remove_from_LRU(fil_node_t * file)2071 void Fil_shard::remove_from_LRU(fil_node_t *file) {
2072   ut_ad(mutex_owned());
2073 
2074   if (Fil_system::space_belongs_in_LRU(file->space)) {
2075     ut_ad(mutex_owned());
2076 
2077     ut_a(UT_LIST_GET_LEN(m_LRU) > 0);
2078 
2079     /* The file is in the LRU list, remove it */
2080     UT_LIST_REMOVE(m_LRU, file);
2081   }
2082 }
2083 
2084 /** Close a tablespace file based on tablespace ID.
2085 @param[in]	space_id	Tablespace ID
2086 @return false if space_id was not found. */
close_file(space_id_t space_id)2087 bool Fil_shard::close_file(space_id_t space_id) {
2088   mutex_acquire();
2089 
2090   auto space = get_space_by_id(space_id);
2091 
2092   if (space == nullptr) {
2093     mutex_release();
2094 
2095     return (false);
2096   }
2097 
2098   for (auto &file : space->files) {
2099     while (file.in_use > 0) {
2100       mutex_release();
2101 
2102       os_thread_sleep(10000);
2103 
2104       mutex_acquire();
2105     }
2106 
2107     if (file.is_open) {
2108       close_file(&file, false);
2109     }
2110   }
2111 
2112   mutex_release();
2113 
2114   return (true);
2115 }
2116 
2117 /** Remap the tablespace to the new name.
2118 @param[in]	space		Tablespace instance, with old name.
2119 @param[in]	new_name	New tablespace name */
update_space_name_map(fil_space_t * space,const char * new_name)2120 void Fil_shard::update_space_name_map(fil_space_t *space,
2121                                       const char *new_name) {
2122   ut_ad(mutex_owned());
2123 
2124   ut_ad(m_spaces.find(space->id) != m_spaces.end());
2125 
2126   m_names.erase(space->name);
2127 
2128   auto it = m_names.insert(Names::value_type(new_name, space));
2129 
2130   ut_a(it.second);
2131 }
2132 
2133 /** Check if the basename of a filepath is an undo tablespace name
2134 @param[in]	name	Tablespace name
2135 @return true if it is an undo tablespace name */
is_undo_tablespace_name(const std::string & name)2136 bool Fil_path::is_undo_tablespace_name(const std::string &name) {
2137   if (name.empty()) {
2138     return (false);
2139   }
2140 
2141   std::string basename(name);
2142 
2143   auto sep = basename.find_last_of(SEPARATOR);
2144 
2145   if (sep != std::string::npos) {
2146     basename.erase(basename.begin(), basename.begin() + sep + 1);
2147   }
2148 
2149   const auto end = basename.end();
2150 
2151   /* 5 is the minimum length for an explicit undo space name.
2152   It must be at least this long; "_.ibu". */
2153   if (basename.length() <= strlen(DOT_IBU)) {
2154     return (false);
2155   }
2156 
2157   /* Implicit undo names can come in two formats: undo_000 and undo000.
2158   Check for both. */
2159   size_t u = (*(end - 4) == '_') ? 1 : 0;
2160 
2161   if (basename.length() == sizeof("undo000") - 1 + u &&
2162       *(end - 7 - u) == 'u' && /* 'u' */
2163       *(end - 6 - u) == 'n' && /* 'n' */
2164       *(end - 5 - u) == 'd' && /* 'd' */
2165       *(end - 4 - u) == 'o' && /* 'o' */
2166       isdigit(*(end - 3)) &&   /* 'n' */
2167       isdigit(*(end - 2)) &&   /* 'n' */
2168       isdigit(*(end - 1))) {   /* 'n' */
2169     return (true);
2170   }
2171 
2172   if (basename.substr(basename.length() - 4, 4) == DOT_IBU) {
2173     return (true);
2174   }
2175 
2176   return (false);
2177 }
2178 
2179 /** Add a space ID to filename mapping.
2180 @param[in]	space_id	Tablespace ID
2181 @param[in]	name		File name.
2182 @return number of files that map to the space ID */
add(space_id_t space_id,const std::string & name)2183 size_t Tablespace_files::add(space_id_t space_id, const std::string &name) {
2184   ut_a(space_id != TRX_SYS_SPACE);
2185 
2186   Names *names;
2187 
2188   if (Fil_path::is_undo_tablespace_name(name)) {
2189     if (!dict_sys_t::is_reserved(space_id) &&
2190         0 == strncmp(name.c_str(), "undo_", 5)) {
2191       ib::warn(ER_IB_MSG_267) << "Tablespace '" << name << "' naming"
2192                               << " format is like an undo tablespace"
2193                               << " but its ID " << space_id << " is not"
2194                               << " in the undo tablespace range";
2195     }
2196 
2197     names = &m_undo_paths[space_id];
2198 
2199   } else {
2200     ut_ad(Fil_path::has_suffix(IBD, name.c_str()));
2201 
2202     names = &m_ibd_paths[space_id];
2203   }
2204 
2205   names->push_back(name);
2206 
2207   return (names->size());
2208 }
2209 
2210 /** Open all known tablespaces. */
open_ibds() const2211 void Tablespace_files::open_ibds() const {
2212   for (auto path : m_ibd_paths) {
2213     for (auto name : path.second) {
2214       fil_open_for_xtrabackup(m_dir.path() + name,
2215                               name.substr(0, name.length() - 4));
2216     }
2217   }
2218 }
2219 
2220 /** Reads data from a space to a buffer. Remember that the possible incomplete
2221 blocks at the end of file are ignored: they are not taken into account when
2222 calculating the byte offset within a space.
2223 @param[in]	page_id		page id
2224 @param[in]	page_size	page size
2225 @param[in]	byte_offset	remainder of offset in bytes; in aio this
2226 must be divisible by the OS block size
2227 @param[in]	len		how many bytes to read; this must not cross a
2228 file boundary; in aio this must be a block size multiple
2229 @param[in,out]	buf		buffer where to store data read; in aio this
2230 must be appropriately aligned
2231 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
2232 i/o on a tablespace which does not exist */
fil_read(const page_id_t & page_id,const page_size_t & page_size,ulint byte_offset,ulint len,void * buf)2233 static dberr_t fil_read(const page_id_t &page_id, const page_size_t &page_size,
2234                         ulint byte_offset, ulint len, void *buf) {
2235   return (fil_io(IORequestRead, true, page_id, page_size, byte_offset, len, buf,
2236                  nullptr));
2237 }
2238 
2239 /** Writes data to a space from a buffer. Remember that the possible incomplete
2240 blocks at the end of file are ignored: they are not taken into account when
2241 calculating the byte offset within a space.
2242 @param[in]	page_id		page id
2243 @param[in]	page_size	page size
2244 @param[in]	byte_offset	remainder of offset in bytes; in aio this
2245 must be divisible by the OS block size
2246 @param[in]	len		how many bytes to write; this must not cross
2247 a file boundary; in aio this must be a block size multiple
2248 @param[in]	buf		buffer from which to write; in aio this must
2249 be appropriately aligned
2250 @return DB_SUCCESS, or DB_TABLESPACE_DELETED if we are trying to do
2251         I/O on a tablespace which does not exist */
fil_write(const page_id_t & page_id,const page_size_t & page_size,ulint byte_offset,ulint len,void * buf)2252 static dberr_t fil_write(const page_id_t &page_id, const page_size_t &page_size,
2253                          ulint byte_offset, ulint len, void *buf) {
2254   ut_ad(!srv_read_only_mode);
2255 
2256   return (fil_io(IORequestWrite, true, page_id, page_size, byte_offset, len,
2257                  buf, nullptr));
2258 }
2259 
2260 /** Look up a tablespace. The caller should hold an InnoDB table lock or
2261 a MDL that prevents the tablespace from being dropped during the operation,
2262 or the caller should be in single-threaded crash recovery mode (no user
2263 connections that could drop tablespaces). If this is not the case,
2264 fil_space_acquire() and fil_space_release() should be used instead.
2265 @param[in]	space_id	Tablespace ID
2266 @return tablespace, or nullptr if not found */
fil_space_get(space_id_t space_id)2267 fil_space_t *fil_space_get(space_id_t space_id) {
2268   auto shard = fil_system->shard_by_id(space_id);
2269 
2270   shard->mutex_acquire();
2271 
2272   fil_space_t *space = shard->get_space_by_id(space_id);
2273 
2274   shard->mutex_release();
2275 
2276   return (space);
2277 }
2278 
2279 #ifndef UNIV_HOTBACKUP
2280 
2281 /** Returns the latch of a file space.
2282 @param[in]	space_id	Tablespace ID
2283 @return latch protecting storage allocation */
fil_space_get_latch(space_id_t space_id)2284 rw_lock_t *fil_space_get_latch(space_id_t space_id) {
2285   auto shard = fil_system->shard_by_id(space_id);
2286 
2287   shard->mutex_acquire();
2288 
2289   fil_space_t *space = shard->get_space_by_id(space_id);
2290 
2291   shard->mutex_release();
2292 
2293   return (&space->latch);
2294 }
2295 
2296 #ifdef UNIV_DEBUG
2297 
2298 /** Gets the type of a file space.
2299 @param[in]	space_id	Tablespace ID
2300 @return file type */
fil_space_get_type(space_id_t space_id)2301 fil_type_t fil_space_get_type(space_id_t space_id) {
2302   auto shard = fil_system->shard_by_id(space_id);
2303 
2304   shard->mutex_acquire();
2305 
2306   auto space = shard->get_space_by_id(space_id);
2307 
2308   shard->mutex_release();
2309 
2310   return (space->purpose);
2311 }
2312 
2313 #endif /* UNIV_DEBUG */
2314 
2315 /** Note that a tablespace has been imported.
2316 It is initially marked as FIL_TYPE_IMPORT so that no logging is
2317 done during the import process when the space ID is stamped to each page.
2318 Now we change it to FIL_SPACE_TABLESPACE to start redo and undo logging.
2319 NOTE: temporary tablespaces are never imported.
2320 @param[in]	space_id	Tablespace ID */
fil_space_set_imported(space_id_t space_id)2321 void fil_space_set_imported(space_id_t space_id) {
2322   auto shard = fil_system->shard_by_id(space_id);
2323 
2324   shard->mutex_acquire();
2325 
2326   fil_space_t *space = shard->get_space_by_id(space_id);
2327 
2328   ut_ad(space->purpose == FIL_TYPE_IMPORT);
2329   space->purpose = FIL_TYPE_TABLESPACE;
2330 
2331   shard->mutex_release();
2332 }
2333 #endif /* !UNIV_HOTBACKUP */
2334 
2335 /** Checks if all the file nodes in a space are flushed. The caller must hold
2336 the fil_system mutex.
2337 @param[in]	space		Tablespace to check
2338 @return true if all are flushed */
space_is_flushed(const fil_space_t * space)2339 bool Fil_shard::space_is_flushed(const fil_space_t *space) {
2340   ut_ad(mutex_owned());
2341 
2342   for (const auto &file : space->files) {
2343     if (file.modification_counter > file.flush_counter) {
2344       ut_ad(!fil_buffering_disabled(space));
2345       return (false);
2346     }
2347   }
2348 
2349   return (true);
2350 }
2351 
2352 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
2353 
2354 #include <sys/ioctl.h>
2355 
2356 /** FusionIO atomic write control info */
2357 #define DFS_IOCTL_ATOMIC_WRITE_SET _IOW(0x95, 2, uint)
2358 
2359 /** Try and enable FusionIO atomic writes.
2360 @param[in] file		OS file handle
2361 @return true if successful */
fil_fusionio_enable_atomic_write(pfs_os_file_t file)2362 bool fil_fusionio_enable_atomic_write(pfs_os_file_t file) {
2363   if (srv_unix_file_flush_method == SRV_UNIX_O_DIRECT) {
2364     uint atomic = 1;
2365 
2366     ut_a(file.m_file != -1);
2367 
2368     if (ioctl(file.m_file, DFS_IOCTL_ATOMIC_WRITE_SET, &atomic) != -1) {
2369       return (true);
2370     }
2371   }
2372 
2373   return (false);
2374 }
2375 #endif /* !NO_FALLOCATE && UNIV_LINUX */
2376 
2377 /** Attach a file to a tablespace
2378 @param[in]	name		file name of a file that is not open
2379 @param[in]	size		file size in entire database blocks
2380 @param[in,out]	space		tablespace from fil_space_create()
2381 @param[in]	is_raw		whether this is a raw device or partition
2382 @param[in]	punch_hole	true if supported for this file
2383 @param[in]	atomic_write	true if the file has atomic write enabled
2384 @param[in]	max_pages	maximum number of pages in file
2385 @return pointer to the file name
2386 @retval nullptr if error */
create_node(const char * name,page_no_t size,fil_space_t * space,bool is_raw,bool punch_hole,bool atomic_write,page_no_t max_pages)2387 fil_node_t *Fil_shard::create_node(const char *name, page_no_t size,
2388                                    fil_space_t *space, bool is_raw,
2389                                    bool punch_hole, bool atomic_write,
2390                                    page_no_t max_pages) {
2391   ut_ad(name != nullptr);
2392   ut_ad(fil_system != nullptr);
2393 
2394   if (space == nullptr) {
2395     return (nullptr);
2396   }
2397 
2398   fil_node_t file{};
2399 
2400   file.name = mem_strdup(name);
2401 
2402   ut_a(!is_raw || srv_start_raw_disk_in_use);
2403 
2404   file.sync_event = os_event_create();
2405 
2406   file.is_raw_disk = is_raw;
2407 
2408   file.size = size;
2409 
2410   file.flush_size = size;
2411 
2412   file.magic_n = FIL_NODE_MAGIC_N;
2413 
2414   file.init_size = size;
2415 
2416   file.max_size = max_pages;
2417 
2418   file.space = space;
2419 
2420   os_file_stat_t stat_info;
2421 
2422 #ifdef UNIV_DEBUG
2423   dberr_t err =
2424 #endif /* UNIV_DEBUG */
2425 
2426       os_file_get_status(
2427           file.name, &stat_info, false,
2428           fsp_is_system_temporary(space->id) ? true : srv_read_only_mode);
2429 
2430   ut_ad(err == DB_SUCCESS);
2431 
2432   file.block_size = stat_info.block_size;
2433 
2434   /* In this debugging mode, we can overcome the limitation of some
2435   OSes like Windows that support Punch Hole but have a hole size
2436   effectively too large.  By setting the block size to be half the
2437   page size, we can bypass one of the checks that would normally
2438   turn Page Compression off.  This execution mode allows compression
2439   to be tested even when full punch hole support is not available. */
2440   DBUG_EXECUTE_IF(
2441       "ignore_punch_hole",
2442       file.block_size = ut_min(static_cast<ulint>(stat_info.block_size),
2443                                UNIV_PAGE_SIZE / 2););
2444 
2445   if (!IORequest::is_punch_hole_supported() || !punch_hole ||
2446       file.block_size >= srv_page_size) {
2447     fil_no_punch_hole(&file);
2448   } else {
2449     file.punch_hole = punch_hole;
2450   }
2451 
2452   file.atomic_write = atomic_write;
2453 
2454   mutex_acquire();
2455 
2456   space->size += size;
2457 
2458   space->files.push_back(file);
2459 
2460   mutex_release();
2461 
2462   ut_a(space->id == TRX_SYS_SPACE ||
2463        space->id == dict_sys_t::s_log_space_first_id ||
2464        space->purpose == FIL_TYPE_TEMPORARY || space->files.size() == 1);
2465 
2466   return (&space->files.front());
2467 }
2468 
2469 /** Attach a file to a tablespace. File must be closed.
2470 @param[in]	name		file name (file must be closed)
2471 @param[in]	size		file size in database blocks, rounded
2472                                 downwards to an integer
2473 @param[in,out]	space		space where to append
2474 @param[in]	is_raw		true if a raw device or a raw disk partition
2475 @param[in]	atomic_write	true if the file has atomic write enabled
2476 @param[in]	max_pages	maximum number of pages in file
2477 @return pointer to the file name
2478 @retval nullptr if error */
fil_node_create(const char * name,page_no_t size,fil_space_t * space,bool is_raw,bool atomic_write,page_no_t max_pages)2479 char *fil_node_create(const char *name, page_no_t size, fil_space_t *space,
2480                       bool is_raw, bool atomic_write, page_no_t max_pages) {
2481   auto shard = fil_system->shard_by_id(space->id);
2482 
2483   fil_node_t *file;
2484 
2485   file = shard->create_node(name, size, space, is_raw,
2486                             IORequest::is_punch_hole_supported(), atomic_write,
2487                             max_pages);
2488 
2489   return (file == nullptr ? nullptr : file->name);
2490 }
2491 
2492 /** First we open the file in the normal mode, no async I/O here, for
2493 simplicity. Then do some checks, and close the file again.  NOTE that we
2494 could not use the simple file read function os_file_read() in Windows
2495 to read from a file opened for async I/O!
2496 @param[in,out]	file		Get the size of this file
2497 @param[in]	read_only_mode	true if read only mode set
2498 @return DB_SUCCESS or error */
get_file_size(fil_node_t * file,bool read_only_mode)2499 dberr_t Fil_shard::get_file_size(fil_node_t *file, bool read_only_mode) {
2500   bool success;
2501   fil_space_t *space = file->space;
2502 
2503   do {
2504     ut_a(!file->is_open);
2505 
2506     file->handle = os_file_create_simple_no_error_handling(
2507         innodb_data_file_key, file->name, OS_FILE_OPEN, OS_FILE_READ_ONLY,
2508         read_only_mode, &success);
2509 
2510     if (!success) {
2511       /* The following call prints an error message */
2512       ulint err = os_file_get_last_error(true);
2513 
2514       if (err == EMFILE + 100) {
2515         if (close_files_in_LRU(true)) {
2516           continue;
2517         }
2518       }
2519 
2520       ib::warn(ER_IB_MSG_268) << "Cannot open '" << file->name
2521                               << "'."
2522                                  " Have you deleted .ibd files under a"
2523                                  " running mysqld server?";
2524 
2525       return (DB_ERROR);
2526     }
2527 
2528   } while (!success);
2529 
2530   os_offset_t size_bytes = os_file_get_size(file->handle);
2531 
2532   ut_a(size_bytes != (os_offset_t)-1);
2533 
2534 #ifdef UNIV_HOTBACKUP
2535   if (space->id == TRX_SYS_SPACE) {
2536     file->size = (ulint)(size_bytes / UNIV_PAGE_SIZE);
2537     space->size += file->size;
2538     os_file_close(file->handle);
2539     return (DB_SUCCESS);
2540   }
2541 #endif /* UNIV_HOTBACKUP */
2542 
2543   ut_a(space->purpose != FIL_TYPE_LOG);
2544 
2545   /* Read the first page of the tablespace */
2546 
2547   byte *buf2 = static_cast<byte *>(ut_malloc_nokey(2 * UNIV_PAGE_SIZE));
2548 
2549   /* Align memory for file I/O if we might have O_DIRECT set */
2550 
2551   byte *page = static_cast<byte *>(ut_align(buf2, UNIV_PAGE_SIZE));
2552 
2553   ut_ad(page == page_align(page));
2554 
2555   IORequest request(IORequest::READ);
2556 
2557   dberr_t err = os_file_read_first_page(request, file->name, file->handle, page,
2558                                         UNIV_PAGE_SIZE);
2559 
2560   ut_a(err == DB_SUCCESS);
2561 
2562   os_file_close(file->handle);
2563 
2564   uint32_t flags = fsp_header_get_flags(page);
2565   space_id_t space_id = fsp_header_get_space_id(page);
2566 
2567   /* To determine if tablespace is from 5.7 or not, we
2568   rely on SDI flag. For IBDs from 5.7, which are opened
2569   during import or during upgrade, their initial size
2570   is lesser than the initial size in 8.0 */
2571   bool has_sdi = FSP_FLAGS_HAS_SDI(flags);
2572 
2573   uint8_t expected_size =
2574       has_sdi ? FIL_IBD_FILE_INITIAL_SIZE : FIL_IBD_FILE_INITIAL_SIZE_5_7;
2575 
2576   const page_size_t page_size(flags);
2577 
2578   ulint min_size = expected_size * page_size.physical();
2579 
2580   if (size_bytes < min_size) {
2581     if (has_sdi) {
2582       /** Add some tolerance when the tablespace is upgraded. If an empty
2583       general tablespace is created in 5.7, and then upgraded to 8.0, then
2584       its size changes from FIL_IBD_FILE_INITIAL_SIZE_5_7 pages to
2585       FIL_IBD_FILE_INITIAL_SIZE-1. */
2586 
2587       ut_ad(expected_size == FIL_IBD_FILE_INITIAL_SIZE);
2588       ulint upgrade_size = (expected_size - 1) * page_size.physical();
2589 
2590       if (size_bytes < upgrade_size) {
2591         ib::error(ER_IB_MSG_269)
2592             << "The size of tablespace file " << file->name << " is only "
2593             << size_bytes << ", should be at least " << upgrade_size << "!";
2594 
2595         ut_error;
2596       }
2597 
2598     } else {
2599       ib::error(ER_IB_MSG_269)
2600           << "The size of tablespace file " << file->name << " is only "
2601           << size_bytes << ", should be at least " << min_size << "!";
2602 
2603       ut_error;
2604     }
2605   }
2606 
2607   if (space_id != space->id) {
2608     ib::fatal(ER_IB_MSG_270) << "Tablespace id is " << space->id
2609                              << " in the data dictionary but in file "
2610                              << file->name << " it is " << space_id << "!";
2611   }
2612 
2613   /* We need to adjust for compressed pages. */
2614   const page_size_t space_page_size(space->flags);
2615 
2616   if (!page_size.equals_to(space_page_size)) {
2617     ib::fatal(ER_IB_MSG_271)
2618         << "Tablespace file " << file->name << " has page size " << page_size
2619         << " (flags=" << ib::hex(flags) << ") but the data dictionary expects"
2620         << " page size " << space_page_size
2621         << " (flags=" << ib::hex(space->flags) << ")!";
2622   }
2623 
2624   /* If the SDI flag is set in the file header page, set it in space->flags. */
2625   space->flags |= flags & FSP_FLAGS_MASK_SDI;
2626 
2627 #ifndef UNIV_HOTBACKUP
2628   /* It is possible that
2629   - For general tablespace, encryption flag is updated on disk but server
2630   crashed before DD could be updated OR
2631   - For DD tablespace, encryption flag is updated on disk.
2632   */
2633   if (FSP_FLAGS_GET_ENCRYPTION(flags)) {
2634     space->flags |= flags & FSP_FLAGS_MASK_ENCRYPTION;
2635   }
2636 #endif /* UNIV_HOTBACKUP */
2637 
2638   /* Make a copy of space->flags and flags from the page header
2639   so that they can be compared. */
2640   uint32_t fil_space_flags = space->flags;
2641   uint32_t header_fsp_flags = flags;
2642 
2643   /* If a crash occurs while an UNDO space is being truncated,
2644   it will be created new at startup. In that case, the fil_space_t
2645   object will have the ENCRYPTION flag set, but the header page will
2646   not be marked until the srv_master_thread gets around to it.
2647   The opposite can occur where the header page contains the encryption
2648   flag but the fil_space_t does not.  It could happen that undo
2649   encryption was turned off just before the crash or shutdown so that
2650   the srv_master_thread did not yet have time to apply it.
2651   So don't compare the encryption flag for undo tablespaces. */
2652   if (fsp_is_undo_tablespace(space->id)) {
2653     fsp_flags_unset_encryption(fil_space_flags);
2654     fsp_flags_unset_encryption(header_fsp_flags);
2655   }
2656 
2657   /* Make sure the space_flags are the same as the header page flags. */
2658   if (fil_space_flags != header_fsp_flags) {
2659     ib::error(ER_IB_MSG_272, ulong{space->flags}, file->name, ulonglong{flags});
2660     ut_error;
2661   }
2662 
2663   {
2664     page_no_t size = fsp_header_get_field(page, FSP_SIZE);
2665 
2666     page_no_t free_limit;
2667 
2668     free_limit = fsp_header_get_field(page, FSP_FREE_LIMIT);
2669 
2670     ulint free_len;
2671 
2672     free_len = flst_get_len(FSP_HEADER_OFFSET + FSP_FREE + page);
2673 
2674     ut_ad(space->free_limit == 0 || space->free_limit == free_limit);
2675 
2676     ut_ad(space->free_len == 0 || space->free_len == free_len);
2677 
2678     space->size_in_header = size;
2679     space->free_limit = free_limit;
2680 
2681     ut_a(free_len < std::numeric_limits<uint32_t>::max());
2682 
2683     space->free_len = (uint32_t)free_len;
2684   }
2685 
2686   ut_free(buf2);
2687 
2688   /* For encrypted tablespace, we need to check the
2689   encryption key and iv(initial vector) is read. */
2690   if (FSP_FLAGS_GET_ENCRYPTION(space->flags) && !recv_recovery_is_on() &&
2691       space->encryption_type != Encryption::AES) {
2692     ib::error(ER_IB_MSG_273, file->name);
2693 
2694     return (DB_ERROR);
2695   }
2696 
2697   if (file->size == 0) {
2698     ulint extent_size;
2699 
2700     extent_size = page_size.physical() * FSP_EXTENT_SIZE;
2701 
2702 #ifndef UNIV_HOTBACKUP
2703     /* Truncate the size to a multiple of extent size. */
2704     if (size_bytes >= extent_size) {
2705       size_bytes = ut_2pow_round(size_bytes, extent_size);
2706     }
2707 #else /* !UNIV_HOTBACKUP */
2708 
2709     /* After apply-incremental, tablespaces are not
2710     extended to a whole megabyte. Do not cut off
2711     valid data. */
2712 
2713 #endif /* !UNIV_HOTBACKUP */
2714 
2715     file->size = static_cast<page_no_t>(size_bytes / page_size.physical());
2716 
2717     space->size += file->size;
2718   }
2719 
2720   return (DB_SUCCESS);
2721 }
2722 
2723 /** Open a file of a tablespace.
2724 The caller must own the shard mutex.
2725 @param[in,out]	file		Tablespace file
2726 @param[in]	extend		true if the file is being extended
2727 @return false if the file can't be opened, otherwise true */
open_file(fil_node_t * file,bool extend)2728 bool Fil_shard::open_file(fil_node_t *file, bool extend) {
2729   bool success;
2730   fil_space_t *space = file->space;
2731 
2732   ut_ad(m_id == REDO_SHARD || mutex_owned());
2733 
2734   ut_a(!file->is_open);
2735   ut_a(file->n_pending == 0);
2736 
2737   while (file->in_use > 0) {
2738     /* We increment the reference count when extending
2739     the file. */
2740     if (file->in_use == 1 && extend) {
2741       break;
2742     }
2743 
2744     mutex_release();
2745 
2746     os_thread_sleep(100000);
2747 
2748     mutex_acquire();
2749   }
2750 
2751   if (file->is_open) {
2752     return (true);
2753   }
2754 
2755   bool read_only_mode;
2756 
2757   read_only_mode = !fsp_is_system_temporary(space->id) && srv_read_only_mode;
2758 
2759   if (file->size == 0 ||
2760       (space->size_in_header == 0 && space->purpose == FIL_TYPE_TABLESPACE &&
2761        file == &space->files.front()
2762 #if !defined(UNIV_HOTBACKUP) && !defined(XTRABACKUP)
2763        && undo::is_active(space->id, false) &&
2764        srv_startup_is_before_trx_rollback_phase
2765 #endif /* !UNIV_HOTBACKUP && !XTRABACKUP */
2766        )) {
2767 
2768     /* We don't know the file size yet. */
2769     dberr_t err = get_file_size(file, read_only_mode);
2770 
2771     if (err != DB_SUCCESS) {
2772       return (false);
2773     }
2774   }
2775 
2776   /* Open the file for reading and writing, in Windows normally in the
2777   unbuffered async I/O mode, though global variables may make
2778   os_file_create() to fall back to the normal file I/O mode. */
2779 
2780   if (space->purpose == FIL_TYPE_LOG) {
2781     file->handle =
2782         os_file_create(innodb_log_file_key, file->name, OS_FILE_OPEN,
2783                        OS_FILE_AIO, OS_LOG_FILE, read_only_mode, &success);
2784   } else if (file->is_raw_disk) {
2785     file->handle =
2786         os_file_create(innodb_data_file_key, file->name, OS_FILE_OPEN_RAW,
2787                        OS_FILE_AIO, OS_DATA_FILE, read_only_mode, &success);
2788   } else {
2789     file->handle =
2790         os_file_create(innodb_data_file_key, file->name, OS_FILE_OPEN,
2791                        OS_FILE_AIO, OS_DATA_FILE, read_only_mode, &success);
2792   }
2793 
2794   if (success) {
2795     /* The file is ready for IO. */
2796     file_opened(file);
2797   }
2798 
2799   return (success);
2800 }
2801 
2802 /** Close a tablespace file.
2803 @param[in]	LRU_close	true if called from LRU close
2804 @param[in,out]	file		Tablespace file to close */
close_file(fil_node_t * file,bool LRU_close)2805 void Fil_shard::close_file(fil_node_t *file, bool LRU_close) {
2806   ut_ad(mutex_owned());
2807 
2808   ut_a(file->is_open);
2809   ut_a(file->in_use == 0);
2810   ut_a(file->n_pending == 0);
2811   ut_a(file->n_pending_flushes == 0);
2812 
2813 #ifndef UNIV_HOTBACKUP
2814   ut_a(file->modification_counter == file->flush_counter ||
2815        file->space->purpose == FIL_TYPE_TEMPORARY || srv_fast_shutdown == 2);
2816 #endif /* !UNIV_HOTBACKUP */
2817 
2818   bool ret = os_file_close(file->handle);
2819 
2820   ut_a(ret);
2821 
2822   file->handle.m_file = (os_file_t)-1;
2823 
2824   file->is_open = false;
2825 
2826   ut_a(s_n_open > 0);
2827 
2828   --s_n_open;
2829 
2830   fil_n_file_opened = s_n_open;
2831 
2832   remove_from_LRU(file);
2833 }
2834 
2835 /** Tries to close a file in the LRU list.
2836 @param[in]	print_info	if true, prints information why it cannot close
2837                                 a file
2838 @return true if success, false if should retry later */
close_files_in_LRU(bool print_info)2839 bool Fil_shard::close_files_in_LRU(bool print_info) {
2840   ut_ad(mutex_owned());
2841 
2842   for (auto file = UT_LIST_GET_LAST(m_LRU); file != nullptr;
2843        file = UT_LIST_GET_PREV(LRU, file)) {
2844     if (file->modification_counter == file->flush_counter &&
2845         file->n_pending_flushes == 0 && file->in_use == 0) {
2846       close_file(file, true);
2847 
2848       return (true);
2849     }
2850 
2851     if (!print_info) {
2852       continue;
2853     }
2854 
2855     if (file->n_pending_flushes > 0) {
2856       ib::info(ER_IB_MSG_274, file->name, file->n_pending_flushes);
2857     }
2858 
2859     /* Prior to sharding the counters were under a global
2860     mutex. Now they are spread across the shards. Therefore
2861     it is normal for the modification counter to be out of
2862     sync with the flush counter for files that are in differnet
2863     shards. */
2864 
2865     if (file->modification_counter != file->flush_counter) {
2866       ib::info(ER_IB_MSG_275, file->name, longlong{file->modification_counter},
2867                longlong{file->flush_counter});
2868     }
2869 
2870     if (file->in_use > 0) {
2871       ib::info(ER_IB_MSG_276, file->name);
2872     }
2873   }
2874 
2875   return (false);
2876 }
2877 
2878 /** Tries to close a file in the LRU list.
2879 @param[in] print_info   if true, prints information why it cannot close a file
2880 @return true if success, false if should retry later */
close_file_in_all_LRU(bool print_info)2881 bool Fil_system::close_file_in_all_LRU(bool print_info) {
2882   for (auto shard : m_shards) {
2883     shard->mutex_acquire();
2884 
2885     if (print_info) {
2886       ib::info(ER_IB_MSG_277, shard->id(),
2887                ulonglong{UT_LIST_GET_LEN(shard->m_LRU)});
2888     }
2889 
2890     bool success = shard->close_files_in_LRU(print_info);
2891 
2892     shard->mutex_release();
2893 
2894     if (success) {
2895       return (true);
2896     }
2897   }
2898 
2899   return (false);
2900 }
2901 
2902 /** We are going to do a rename file and want to stop new I/O for a while.
2903 @param[in]	space		Tablespace for which we want to wait for IO
2904                                 to stop */
wait_for_io_to_stop(const fil_space_t * space)2905 void Fil_shard::wait_for_io_to_stop(const fil_space_t *space) {
2906   /* Note: We are reading the value of space->stop_ios without the
2907   cover of the Fil_shard::mutex. We incremented the in_use counter
2908   before waiting for IO to stop. */
2909 
2910   auto begin_time = ut_time_monotonic();
2911   auto start_time = begin_time;
2912 
2913   /* Spam the log after every minute. Ignore any race here. */
2914 
2915   while (space->stop_ios) {
2916     if ((ut_time_monotonic() - start_time) >= PRINT_INTERVAL_SECS) {
2917       start_time = ut_time_monotonic();
2918 
2919       ib::warn(ER_IB_MSG_278, space->name,
2920                (long long)(ut_time_monotonic() - begin_time));
2921     }
2922 
2923 #ifndef UNIV_HOTBACKUP
2924 
2925     /* Wake the I/O handler threads to make sure
2926     pending I/O's are performed */
2927     os_aio_simulated_wake_handler_threads();
2928 
2929 #endif /* UNIV_HOTBACKUP */
2930 
2931     /* Give the IO threads some time to work. */
2932     os_thread_yield();
2933   }
2934 }
2935 
2936 /** We keep log files and system tablespace files always open; this is
2937 important in preventing deadlocks in this module, as a page read
2938 completion often performs another read from the insert buffer. The
2939 insert buffer is in tablespace TRX_SYS_SPACE, and we cannot end up
2940 waiting in this function.
2941 @param[in]	space_id	Tablespace ID to look up
2942 @return tablespace instance */
get_reserved_space(space_id_t space_id)2943 fil_space_t *Fil_shard::get_reserved_space(space_id_t space_id) {
2944   if (space_id == TRX_SYS_SPACE) {
2945     return (fil_space_t::s_sys_space);
2946 
2947   } else if (space_id == dict_sys_t::s_log_space_first_id &&
2948              fil_space_t::s_redo_space != nullptr) {
2949     return (fil_space_t::s_redo_space);
2950   }
2951 
2952   return (get_space_by_id(space_id));
2953 }
2954 
2955 /** Reserves the mutex and tries to make sure we can open at least
2956 one file while holding it. This should be called before calling
2957 prepare_file_for_io(), because that function may need to open a file.
2958 @param[in]	space_id	Tablespace ID
2959 @param[out]	space		Tablespace instance
2960 @return true if a slot was reserved. */
mutex_acquire_and_get_space(space_id_t space_id,fil_space_t * & space)2961 bool Fil_shard::mutex_acquire_and_get_space(space_id_t space_id,
2962                                             fil_space_t *&space) {
2963   mutex_acquire();
2964 
2965   if (space_id == TRX_SYS_SPACE || dict_sys_t::is_reserved(space_id)) {
2966     space = get_reserved_space(space_id);
2967 
2968     return (false);
2969   }
2970 
2971   space = get_space_by_id(space_id);
2972 
2973   if (space == nullptr) {
2974     /* Caller handles the case of a missing tablespce. */
2975     return (false);
2976   }
2977 
2978   ut_ad(space->files.size() == 1);
2979 
2980   auto is_open = space->files.front().is_open;
2981 
2982   if (is_open) {
2983     /* Ensure that the file is not closed behind our back. */
2984     ++space->files.front().in_use;
2985   }
2986 
2987   mutex_release();
2988 
2989   if (is_open) {
2990     wait_for_io_to_stop(space);
2991 
2992     mutex_acquire();
2993 
2994     /* We are guaranteed that this file cannot be closed
2995     because we now own the mutex. */
2996 
2997     ut_ad(space->files.front().in_use > 0);
2998     --space->files.front().in_use;
2999 
3000     return (false);
3001   }
3002 
3003   /* The number of open file descriptors is a shared resource, in
3004   order to guarantee that we don't over commit, we use a ticket system
3005   to reserve a slot/ticket to open a file. This slot/ticket should
3006   be released after the file is opened. */
3007 
3008   while (!reserve_open_slot(m_id)) {
3009     os_thread_yield();
3010   }
3011 
3012   auto begin_time = ut_time_monotonic();
3013   auto start_time = begin_time;
3014   auto last_wake_time = begin_time;
3015 
3016   for (size_t i = 0; i < 3; ++i) {
3017     /* Flush tablespaces so that we can close modified
3018     files in the LRU list */
3019 
3020     auto type = to_int(FIL_TYPE_TABLESPACE);
3021 
3022     fil_system->flush_file_spaces(type);
3023 
3024     os_thread_yield();
3025 
3026     /* Reserve an open slot for this shard. So that this
3027     shard's open file succeeds. */
3028 
3029     while (fil_system->m_max_n_open <= s_n_open &&
3030            !fil_system->close_file_in_all_LRU(i > 1)) {
3031       if (ut_time_monotonic() - start_time >= PRINT_INTERVAL_SECS) {
3032         start_time = ut_time_monotonic();
3033 
3034         ib::warn(ER_IB_MSG_279) << "Trying to close a file for "
3035                                 << start_time - begin_time << " seconds"
3036                                 << ". Configuration only allows for "
3037                                 << fil_system->m_max_n_open << " open files.";
3038       }
3039       if (ut_difftime(ut_time(), last_wake_time) > 1.0) {
3040         /* We've spent more than a second trying to close some of the open files
3041         without any luck. We can hang in this loop forewer, because:
3042         - files cannot be closed, they have pending IO requests
3043         - aio handler threads are waiting in os_aio_simulated_handler */
3044 
3045         /* in order to break the loop, lets wake aio handler threads */
3046         os_aio_simulated_wake_handler_threads();
3047 
3048         os_thread_yield();
3049 
3050         /* and flush the changes so that files with no pending IOs can be
3051         closed */
3052         fil_system->flush_file_spaces(type);
3053 
3054         last_wake_time = ut_time();
3055       }
3056     }
3057 
3058     if (fil_system->m_max_n_open > s_n_open) {
3059       break;
3060     }
3061 
3062 #ifndef UNIV_HOTBACKUP
3063     /* Wake the I/O-handler threads to make sure pending I/Os are
3064     performed */
3065     os_aio_simulated_wake_handler_threads();
3066 
3067     os_thread_yield();
3068 #endif /* !UNIV_HOTBACKUP */
3069   }
3070 
3071   mutex_acquire();
3072 
3073   return (true);
3074 }
3075 
3076 /** Prepare to free a file. Remove from the unflushed list if there
3077 are no pending flushes.
3078 @param[in,out]	file		File instance to free */
prepare_to_free_file(fil_node_t * file)3079 void Fil_shard::prepare_to_free_file(fil_node_t *file) {
3080   ut_ad(mutex_owned());
3081 
3082   fil_space_t *space = file->space;
3083 
3084   if (space->is_in_unflushed_spaces && space_is_flushed(space)) {
3085     space->is_in_unflushed_spaces = false;
3086 
3087     UT_LIST_REMOVE(m_unflushed_spaces, space);
3088   }
3089 }
3090 
3091 /** Prepare to free a file object from a tablespace memory cache.
3092 @param[in,out]	file	Tablespace file
3093 @param[in]	space	tablespace */
file_close_to_free(fil_node_t * file,fil_space_t * space)3094 void Fil_shard::file_close_to_free(fil_node_t *file, fil_space_t *space) {
3095   ut_ad(mutex_owned());
3096   ut_a(file->magic_n == FIL_NODE_MAGIC_N);
3097   ut_a(file->n_pending == 0);
3098   ut_a(file->in_use == 0);
3099   ut_a(file->space == space);
3100 
3101   if (file->is_open) {
3102     /* We fool the assertion in Fil_system::close_file() to think
3103     there are no unflushed modifications in the file */
3104 
3105     file->modification_counter = file->flush_counter;
3106 
3107     os_event_set(file->sync_event);
3108 
3109     if (fil_buffering_disabled(space)) {
3110       ut_ad(!space->is_in_unflushed_spaces);
3111       ut_ad(space_is_flushed(space));
3112 
3113     } else {
3114       prepare_to_free_file(file);
3115     }
3116 
3117     /* TODO: set second parameter to true, so to release
3118     fil_system mutex before logging tablespace name and id.
3119     To go around Bug#26271853 - POTENTIAL DEADLOCK BETWEEN
3120     FIL_SYSTEM MUTEX AND LOG MUTEX */
3121     close_file(file, true);
3122   }
3123 }
3124 
space_detach(fil_space_t * space)3125 void Fil_shard::space_detach(fil_space_t *space) {
3126   ut_ad(mutex_owned());
3127 
3128   m_names.erase(space->name);
3129 
3130   if (space->is_in_unflushed_spaces) {
3131     ut_ad(!fil_buffering_disabled(space));
3132 
3133     space->is_in_unflushed_spaces = false;
3134 
3135     UT_LIST_REMOVE(m_unflushed_spaces, space);
3136   }
3137 
3138   ut_a(space->magic_n == FIL_SPACE_MAGIC_N);
3139   ut_a(space->n_pending_flushes == 0);
3140 
3141   for (auto &file : space->files) {
3142     file_close_to_free(&file, space);
3143   }
3144 }
3145 
3146 /** Free a tablespace object on which fil_space_detach() was invoked.
3147 There must not be any pending I/O's or flushes on the files.
3148 @param[in,out]	space		tablespace */
space_free_low(fil_space_t * & space)3149 void Fil_shard::space_free_low(fil_space_t *&space) {
3150   for (auto &file : space->files) {
3151     ut_d(space->size -= file.size);
3152 
3153     os_event_destroy(file.sync_event);
3154 
3155     ut_free(file.name);
3156   }
3157 
3158   call_destructor(&space->files);
3159 
3160   ut_ad(space->size == 0);
3161 
3162   rw_lock_free(&space->latch);
3163 
3164   ut_free(space->name);
3165   ut_free(space);
3166 
3167   space = nullptr;
3168 }
3169 
3170 /** Frees a space object from the tablespace memory cache.
3171 Closes a tablespaces' files but does not delete them.
3172 There must not be any pending I/O's or flushes on the files.
3173 @param[in]	space_id	Tablespace ID
3174 @return fil_space_t instance on success or nullptr */
space_free(space_id_t space_id)3175 fil_space_t *Fil_shard::space_free(space_id_t space_id) {
3176   mutex_acquire();
3177 
3178   fil_space_t *space = get_space_by_id(space_id);
3179 
3180   if (space != nullptr) {
3181     space_detach(space);
3182 
3183     space_delete(space_id);
3184   }
3185 
3186   mutex_release();
3187 
3188   return (space);
3189 }
3190 
3191 /** Frees a space object from the tablespace memory cache.
3192 Closes a tablespaces' files but does not delete them.
3193 There must not be any pending i/o's or flushes on the files.
3194 @param[in]	space_id	Tablespace ID
3195 @param[in]	x_latched	Whether the caller holds X-mode space->latch
3196 @return true if success */
fil_space_free(space_id_t space_id,bool x_latched)3197 static bool fil_space_free(space_id_t space_id, bool x_latched) {
3198   ut_ad(space_id != TRX_SYS_SPACE);
3199 
3200   auto shard = fil_system->shard_by_id(space_id);
3201   auto space = shard->space_free(space_id);
3202 
3203   if (space == nullptr) {
3204     return (false);
3205   }
3206 
3207   if (x_latched) {
3208     rw_lock_x_unlock(&space->latch);
3209   }
3210 
3211   Fil_shard::space_free_low(space);
3212   ut_a(space == nullptr);
3213 
3214   return (true);
3215 }
3216 
3217 #ifdef UNIV_HOTBACKUP
3218 /** Frees a space object from the tablespace memory cache.
3219 Closes a tablespaces' files but does not delete them.
3220 There must not be any pending i/o's or flushes on the files.
3221 @param[in]	space_id	Tablespace ID
3222 @return true if success */
meb_fil_space_free(space_id_t space_id)3223 bool meb_fil_space_free(space_id_t space_id) {
3224   bool success = fil_space_free(space_id, false);
3225 
3226   if (success && space_id == dict_sys_t::s_log_space_first_id) {
3227     /* we freed redo log tablespace, clear the global variable for it */
3228     fil_space_t::s_redo_space = nullptr;
3229   }
3230 
3231   return (success);
3232 }
3233 #endif /* UNIV_HOTBACKUP */
3234 
3235 /** Create a space memory object and put it to the fil_system hash table.
3236 The tablespace name is independent from the tablespace file-name.
3237 Error messages are issued to the server log.
3238 @param[in]	name		Tablespace name
3239 @param[in]	space_id	Tablespace identifier
3240 @param[in]	flags		Tablespace flags
3241 @param[in]	purpose		Tablespace purpose
3242 @return pointer to created tablespace, to be filled in with fil_node_create()
3243 @retval nullptr on failure (such as when the same tablespace exists) */
space_create(const char * name,space_id_t space_id,uint32_t flags,fil_type_t purpose)3244 fil_space_t *Fil_shard::space_create(const char *name, space_id_t space_id,
3245                                      uint32_t flags, fil_type_t purpose) {
3246   ut_ad(mutex_owned());
3247 
3248   /* Look for a matching tablespace. */
3249   fil_space_t *space = get_space_by_name(name);
3250 
3251   if (space == nullptr) {
3252     space = get_space_by_id(space_id);
3253   }
3254 
3255   if (space != nullptr) {
3256     std::ostringstream oss;
3257 
3258     for (size_t i = 0; i < space->files.size(); ++i) {
3259       oss << "'" << space->files[i].name << "'";
3260 
3261       if (i < space->files.size() - 1) {
3262         oss << ", ";
3263       }
3264     }
3265 
3266     ib::info(ER_IB_MSG_281)
3267         << "Trying to add tablespace '" << name << "'"
3268         << " with id " << space_id << " to the tablespace"
3269         << " memory cache, but tablespace"
3270         << " '" << space->name << "'"
3271         << " already exists in the cache with space ID " << space->id
3272         << ". It maps to the following file(s): " << oss.str();
3273 
3274     return (nullptr);
3275   }
3276 
3277   space = static_cast<fil_space_t *>(ut_zalloc_nokey(sizeof(*space)));
3278 
3279   space->id = space_id;
3280 
3281   space->name = mem_strdup(name);
3282 
3283   new (&space->files) fil_space_t::Files();
3284 
3285 #ifndef UNIV_HOTBACKUP
3286   if (fil_system->is_greater_than_max_id(space_id) &&
3287       fil_type_is_data(purpose) && !recv_recovery_on &&
3288       !dict_sys_t::is_reserved(space_id) &&
3289       !fsp_is_system_temporary(space_id)) {
3290     fil_system->set_maximum_space_id(space);
3291   }
3292 #endif /* !UNIV_HOTBACKUP */
3293 
3294   space->purpose = purpose;
3295 
3296   ut_a(flags < std::numeric_limits<uint32_t>::max());
3297   space->flags = (uint32_t)flags;
3298 
3299   space->magic_n = FIL_SPACE_MAGIC_N;
3300 
3301   space->encryption_type = Encryption::NONE;
3302 
3303   rw_lock_create(fil_space_latch_key, &space->latch, SYNC_FSP);
3304 
3305 #ifndef UNIV_HOTBACKUP
3306   if (space->purpose == FIL_TYPE_TEMPORARY) {
3307     ut_d(space->latch.set_temp_fsp());
3308   }
3309 #endif /* !UNIV_HOTBACKUP */
3310 
3311   space_add(space);
3312 
3313   return (space);
3314 }
3315 
3316 /** Create a space memory object and put it to the fil_system hash table.
3317 The tablespace name is independent from the tablespace file-name.
3318 Error messages are issued to the server log.
3319 @param[in]	name		Tablespace name
3320 @param[in]	space_id	Tablespace ID
3321 @param[in]	flags		Tablespace flags
3322 @param[in]	purpose		Tablespace purpose
3323 @return pointer to created tablespace, to be filled in with fil_node_create()
3324 @retval nullptr on failure (such as when the same tablespace exists) */
fil_space_create(const char * name,space_id_t space_id,uint32_t flags,fil_type_t purpose)3325 fil_space_t *fil_space_create(const char *name, space_id_t space_id,
3326                               uint32_t flags, fil_type_t purpose) {
3327   ut_ad(fsp_flags_is_valid(flags));
3328   ut_ad(srv_page_size == UNIV_PAGE_SIZE_ORIG || flags != 0);
3329 
3330   DBUG_EXECUTE_IF("fil_space_create_failure", return (nullptr););
3331 
3332   if (purpose != FIL_TYPE_TEMPORARY) {
3333     /* Mark the clone as aborted only while executing a DDL which creates
3334     a base table, as any temporary table is ignored while cloning the database.
3335     Clone state must be set back to active before returning from function. */
3336     clone_mark_abort(true);
3337   }
3338 
3339   fil_system->mutex_acquire_all();
3340 
3341   auto shard = fil_system->shard_by_id(space_id);
3342 
3343   auto space = shard->space_create(name, space_id, flags, purpose);
3344 
3345   if (space == nullptr) {
3346     /* Duplicate error. */
3347     fil_system->mutex_release_all();
3348 
3349     if (purpose != FIL_TYPE_TEMPORARY) {
3350       clone_mark_active();
3351     }
3352 
3353     return (nullptr);
3354   }
3355 
3356   /* Cache the system tablespaces, avoid looking them up during IO. */
3357 
3358   if (space->id == TRX_SYS_SPACE) {
3359     ut_a(fil_space_t::s_sys_space == nullptr ||
3360          fil_space_t::s_sys_space == space);
3361 
3362     fil_space_t::s_sys_space = space;
3363 
3364   } else if (space->id == dict_sys_t::s_log_space_first_id) {
3365     ut_a(fil_space_t::s_redo_space == nullptr ||
3366          fil_space_t::s_redo_space == space);
3367 
3368     fil_space_t::s_redo_space = space;
3369   }
3370 
3371   fil_system->mutex_release_all();
3372 
3373   if (purpose != FIL_TYPE_TEMPORARY) {
3374     clone_mark_active();
3375   }
3376 
3377   return (space);
3378 }
3379 
3380 /** Assigns a new space id for a new single-table tablespace. This works
3381 simply by incrementing the global counter. If 4 billion id's is not enough,
3382 we may need to recycle id's.
3383 @param[out]	space_id		Set this to the new tablespace ID
3384 @return true if assigned, false if not */
assign_new_space_id(space_id_t * space_id)3385 bool Fil_system::assign_new_space_id(space_id_t *space_id) {
3386   mutex_acquire_all();
3387 
3388   space_id_t id = *space_id;
3389 
3390   if (id < m_max_assigned_id) {
3391     id = m_max_assigned_id;
3392   }
3393 
3394   ++id;
3395 
3396   space_id_t reserved_space_id = dict_sys_t::s_reserved_space_id;
3397 
3398   if (id > (reserved_space_id / 2) && (id % 1000000UL == 0)) {
3399     ib::warn(ER_IB_MSG_282)
3400         << "You are running out of new single-table"
3401            " tablespace id's. Current counter is "
3402         << id << " and it must not exceed " << reserved_space_id
3403         << "! To reset the counter to zero you have to dump"
3404            " all your tables and recreate the whole InnoDB"
3405            " installation.";
3406   }
3407 
3408   bool success = !dict_sys_t::is_reserved(id);
3409 
3410   if (success) {
3411     *space_id = m_max_assigned_id = id;
3412 
3413   } else {
3414     ib::warn(ER_IB_MSG_283) << "You have run out of single-table tablespace"
3415                                " id's! Current counter is "
3416                             << id
3417                             << ". To reset the counter to zero"
3418                                " you have to dump all your tables and"
3419                                " recreate the whole InnoDB installation.";
3420 
3421     *space_id = SPACE_UNKNOWN;
3422   }
3423 
3424   mutex_release_all();
3425 
3426   return (success);
3427 }
3428 
3429 /** Assigns a new space id for a new single-table tablespace. This works
3430 simply by incrementing the global counter. If 4 billion id's is not enough,
3431 we may need to recycle id's.
3432 @param[out]	space_id		Set this to the new tablespace ID
3433 @return true if assigned, false if not */
fil_assign_new_space_id(space_id_t * space_id)3434 bool fil_assign_new_space_id(space_id_t *space_id) {
3435   return (fil_system->assign_new_space_id(space_id));
3436 }
3437 
3438 /** Opens the files associated with a tablespace and returns a pointer to
3439 the fil_space_t that is in the memory cache associated with a space id.
3440 @param[in]	space_id	Get the tablespace instance or this ID
3441 @return file_space_t pointer, nullptr if space not found */
space_load(space_id_t space_id)3442 fil_space_t *Fil_shard::space_load(space_id_t space_id) {
3443   ut_ad(mutex_owned());
3444 
3445   fil_space_t *space = get_space_by_id(space_id);
3446 
3447   if (space == nullptr || space->size != 0) {
3448     return (space);
3449   }
3450 
3451   switch (space->purpose) {
3452     case FIL_TYPE_LOG:
3453       break;
3454 
3455     case FIL_TYPE_IMPORT:
3456     case FIL_TYPE_TEMPORARY:
3457     case FIL_TYPE_TABLESPACE:
3458 
3459       ut_a(space_id != TRX_SYS_SPACE);
3460 
3461       mutex_release();
3462 
3463       auto slot = mutex_acquire_and_get_space(space_id, space);
3464 
3465       if (space == nullptr) {
3466         if (slot) {
3467           release_open_slot(m_id);
3468         }
3469 
3470         return (nullptr);
3471       }
3472 
3473       ut_a(1 == space->files.size());
3474 
3475       {
3476         auto file = &space->files.front();
3477 
3478         /* It must be a single-table tablespace and
3479         we have not opened the file yet; the following
3480         calls will open it and update the size fields */
3481 
3482         bool success = prepare_file_for_io(file, false);
3483 
3484         if (slot) {
3485           release_open_slot(m_id);
3486         }
3487 
3488         if (!success) {
3489           /* The single-table tablespace can't be opened,
3490           because the ibd file is missing. */
3491 
3492           return (nullptr);
3493         }
3494 
3495         complete_io(file, IORequestRead);
3496       }
3497   }
3498 
3499   return (space);
3500 }
3501 
3502 /** Returns the path from the first fil_node_t found with this space ID.
3503 The caller is responsible for freeing the memory allocated here for the
3504 value returned.
3505 @param[in]	space_id	Tablespace ID
3506 @return own: A copy of fil_node_t::path, nullptr if space ID is zero
3507 or not found. */
fil_space_get_first_path(space_id_t space_id)3508 char *fil_space_get_first_path(space_id_t space_id) {
3509   auto shard = fil_system->shard_by_id(space_id);
3510 
3511   shard->mutex_acquire();
3512 
3513   fil_space_t *space = shard->space_load(space_id);
3514 
3515   char *path;
3516 
3517   if (space != nullptr) {
3518     path = mem_strdup(space->files.front().name);
3519   } else {
3520     path = nullptr;
3521   }
3522 
3523   shard->mutex_release();
3524 
3525   return (path);
3526 }
3527 
3528 /** Returns the size of the space in pages. The tablespace must be cached
3529 in the memory cache.
3530 @param[in]	space_id	Tablespace ID
3531 @return space size, 0 if space not found */
fil_space_get_size(space_id_t space_id)3532 page_no_t fil_space_get_size(space_id_t space_id) {
3533   auto shard = fil_system->shard_by_id(space_id);
3534 
3535   shard->mutex_acquire();
3536 
3537   fil_space_t *space = shard->space_load(space_id);
3538 
3539   page_no_t size = space ? space->size : 0;
3540 
3541   shard->mutex_release();
3542 
3543   return (size);
3544 }
3545 
3546 /** Returns the flags of the space. The tablespace must be cached
3547 in the memory cache.
3548 @param[in]	space_id	Tablespace ID for which to get the flags
3549 @return flags, ULINT_UNDEFINED if space not found */
fil_space_get_flags(space_id_t space_id)3550 uint32_t fil_space_get_flags(space_id_t space_id) {
3551   auto shard = fil_system->shard_by_id(space_id);
3552 
3553   shard->mutex_acquire();
3554 
3555   fil_space_t *space = shard->space_load(space_id);
3556 
3557   uint32_t flags;
3558 
3559   flags = (space != nullptr) ? space->flags : UINT32_UNDEFINED;
3560 
3561   shard->mutex_release();
3562 
3563   return (flags);
3564 }
3565 
3566 /** Open each file of a tablespace if not already open.
3567 @param[in]	space_id	tablespace identifier
3568 @retval	true	if all file nodes were opened
3569 @retval	false	on failure */
space_open(space_id_t space_id)3570 bool Fil_shard::space_open(space_id_t space_id) {
3571   ut_ad(mutex_owned());
3572 
3573   fil_space_t *space = get_space_by_id(space_id);
3574 
3575   for (auto &file : space->files) {
3576     if (!file.is_open && !open_file(&file, false)) {
3577       return (false);
3578     }
3579   }
3580 
3581   return (true);
3582 }
3583 
3584 /** Open each file of a tablespace if not already open.
3585 @param[in]	space_id	tablespace identifier
3586 @retval	true	if all file nodes were opened
3587 @retval	false	on failure */
fil_space_open(space_id_t space_id)3588 bool fil_space_open(space_id_t space_id) {
3589   auto shard = fil_system->shard_by_id(space_id);
3590 
3591   shard->mutex_acquire();
3592 
3593   bool success = shard->space_open(space_id);
3594 
3595   shard->mutex_release();
3596 
3597   return (success);
3598 }
3599 
3600 /** Close each file of a tablespace if open.
3601 @param[in]	space_id	tablespace identifier */
fil_space_close(space_id_t space_id)3602 void fil_space_close(space_id_t space_id) {
3603   if (fil_system == nullptr) {
3604     return;
3605   }
3606 
3607   auto shard = fil_system->shard_by_id(space_id);
3608 
3609   shard->close_file(space_id);
3610 }
3611 
3612 /** Returns the page size of the space and whether it is compressed or not.
3613 The tablespace must be cached in the memory cache.
3614 @param[in]	space_id	Tablespace ID
3615 @param[out]	found		true if tablespace was found
3616 @return page size */
fil_space_get_page_size(space_id_t space_id,bool * found)3617 const page_size_t fil_space_get_page_size(space_id_t space_id, bool *found) {
3618   const uint32_t flags = fil_space_get_flags(space_id);
3619 
3620   if (flags == UINT32_UNDEFINED) {
3621     *found = false;
3622     return (univ_page_size);
3623   }
3624 
3625   *found = true;
3626 
3627   return (page_size_t(flags));
3628 }
3629 
3630 /** Initializes the tablespace memory cache.
3631 @param[in]	max_n_open	Maximum number of open files */
fil_init(ulint max_n_open)3632 void fil_init(ulint max_n_open) {
3633   static_assert((1 << UNIV_PAGE_SIZE_SHIFT_MAX) == UNIV_PAGE_SIZE_MAX,
3634                 "(1 << UNIV_PAGE_SIZE_SHIFT_MAX) != UNIV_PAGE_SIZE_MAX");
3635 
3636   static_assert((1 << UNIV_PAGE_SIZE_SHIFT_MIN) == UNIV_PAGE_SIZE_MIN,
3637                 "(1 << UNIV_PAGE_SIZE_SHIFT_MIN) != UNIV_PAGE_SIZE_MIN");
3638 
3639   ut_a(fil_system == nullptr);
3640 
3641   ut_a(max_n_open > 0);
3642 
3643   fil_system = UT_NEW_NOKEY(Fil_system(MAX_SHARDS, max_n_open));
3644 }
3645 
3646 /** Open all the system files.
3647 @param[in]	max_n_open	Maximum number of open files allowed
3648 @param[in,out]	n_open		Current number of open files */
open_system_tablespaces(size_t max_n_open,size_t * n_open)3649 void Fil_shard::open_system_tablespaces(size_t max_n_open, size_t *n_open) {
3650   mutex_acquire();
3651 
3652   for (auto elem : m_spaces) {
3653     auto space = elem.second;
3654 
3655     if (Fil_system::space_belongs_in_LRU(space)) {
3656       continue;
3657     }
3658 
3659     for (auto &file : space->files) {
3660       if (!file.is_open) {
3661         if (!open_file(&file, false)) {
3662           /* This func is called during server's
3663           startup. If some file of log or system
3664           tablespace is missing, the server
3665           can't start successfully. So we should
3666           assert for it. */
3667           ut_a(0);
3668         }
3669 
3670         ++*n_open;
3671       }
3672 
3673       if (max_n_open < 10 + *n_open) {
3674         ib::warn(ER_IB_MSG_284, *n_open, max_n_open);
3675       }
3676     }
3677   }
3678 
3679   mutex_release();
3680 }
3681 
3682 /** Opens all log files and system tablespace data files in all shards. */
open_all_system_tablespaces()3683 void Fil_system::open_all_system_tablespaces() {
3684   size_t n_open = 0;
3685 
3686   for (auto shard : m_shards) {
3687     shard->open_system_tablespaces(m_max_n_open, &n_open);
3688   }
3689 }
3690 
3691 /** Opens all log files and system tablespace data files. They stay open
3692 until the database server shutdown. This should be called at a server
3693 startup after the space objects for the log and the system tablespace
3694 have been created. The purpose of this operation is to make sure we
3695 never run out of file descriptors if we need to read from the insert
3696 buffer or to write to the log. */
fil_open_log_and_system_tablespace_files()3697 void fil_open_log_and_system_tablespace_files() {
3698   fil_system->open_all_system_tablespaces();
3699 }
3700 
3701 /** Close all open files. */
close_all_files()3702 void Fil_shard::close_all_files() {
3703   ut_ad(mutex_owned());
3704 
3705   auto end = m_spaces.end();
3706 
3707   for (auto it = m_spaces.begin(); it != end; it = m_spaces.erase(it)) {
3708     auto space = it->second;
3709 
3710     ut_a(space->id == TRX_SYS_SPACE || space->purpose == FIL_TYPE_TEMPORARY ||
3711          space->id == dict_sys_t::s_log_space_first_id ||
3712          space->files.size() == 1);
3713 
3714     if (space->id == dict_sys_t::s_log_space_first_id) {
3715       fil_space_t::s_redo_space = nullptr;
3716     }
3717 
3718     for (auto &file : space->files) {
3719       if (file.is_open) {
3720         close_file(&file, false);
3721       }
3722     }
3723 
3724     space_detach(space);
3725 
3726     space_free_low(space);
3727 
3728     ut_a(space == nullptr);
3729   }
3730 }
3731 
3732 /** Close all open files. */
close_all_files()3733 void Fil_system::close_all_files() {
3734   for (auto shard : m_shards) {
3735     shard->mutex_acquire();
3736 
3737     shard->close_all_files();
3738 
3739     shard->mutex_release();
3740   }
3741 
3742 #ifndef UNIV_HOTBACKUP
3743   /* Revert to old names if downgrading after upgrade failure. */
3744   if (srv_downgrade_partition_files) {
3745     rename_partition_files(true);
3746   }
3747 
3748   clear_old_files();
3749 #endif /* !UNIV_HOTBACKUP */
3750 }
3751 
3752 /** Closes all open files. There must not be any pending i/o's or not flushed
3753 modifications in the files. */
fil_close_all_files()3754 void fil_close_all_files() { fil_system->close_all_files(); }
3755 
3756 /** Open a file of a tablespace.
3757 The caller must own the shard mutex.
3758 @param[in,out]  file    Tablespace file
3759 @return false if the file can't be opened, otherwise true */
fil_node_open_file(fil_node_t * file)3760 bool fil_node_open_file(fil_node_t *file) {
3761   fil_space_t *space = file->space;
3762 
3763   auto shard = fil_system->shard_by_id(space->id);
3764 
3765   shard->mutex_acquire();
3766 
3767   bool res = shard->open_file(file, false);
3768 
3769   shard->mutex_release();
3770 
3771   return res;
3772 }
3773 
3774 /** Closes a file.
3775 @param[in] file file to close. */
fil_node_close_file(fil_node_t * file)3776 void fil_node_close_file(fil_node_t *file) {
3777   if (!file->is_open) {
3778     return;
3779   }
3780 
3781   fil_space_t *space = file->space;
3782 
3783   auto shard = fil_system->shard_by_id(space->id);
3784 
3785   shard->mutex_acquire();
3786 
3787   shard->close_file(file, true);
3788 
3789   shard->mutex_release();
3790 }
3791 
3792 /** Close log files.
3793 @param[in]	free_all	If set then free all instances */
close_log_files(bool free_all)3794 void Fil_shard::close_log_files(bool free_all) {
3795   mutex_acquire();
3796 
3797   auto end = m_spaces.end();
3798 
3799   for (auto it = m_spaces.begin(); it != end; /* No op */) {
3800     auto space = it->second;
3801 
3802     if (space->purpose != FIL_TYPE_LOG) {
3803       ++it;
3804       continue;
3805     }
3806 
3807     if (space->id == dict_sys_t::s_log_space_first_id) {
3808       ut_a(fil_space_t::s_redo_space == space);
3809 
3810       fil_space_t::s_redo_space = nullptr;
3811     }
3812 
3813     for (auto &file : space->files) {
3814       if (file.is_open) {
3815         close_file(&file, false);
3816       }
3817     }
3818 
3819     if (free_all) {
3820       space_detach(space);
3821       space_free_low(space);
3822       ut_a(space == nullptr);
3823 
3824       it = m_spaces.erase(it);
3825 
3826     } else {
3827       ++it;
3828     }
3829   }
3830 
3831   mutex_release();
3832 }
3833 
3834 /** Close all log files in all shards.
3835 @param[in]	free_all	If set then free all instances */
close_all_log_files(bool free_all)3836 void Fil_system::close_all_log_files(bool free_all) {
3837   for (auto shard : m_shards) {
3838     shard->close_log_files(free_all);
3839   }
3840 }
3841 
3842 /** Closes the redo log files. There must not be any pending i/o's or not
3843 flushed modifications in the files.
3844 @param[in]	free_all	If set then free all instances */
fil_close_log_files(bool free_all)3845 void fil_close_log_files(bool free_all) {
3846   fil_system->close_all_log_files(free_all);
3847 }
3848 
3849 /** Iterate through all tablespaces
3850 @param[in]  include_log Include redo log space, if true
3851 @param[in]  f   Callback
3852 @return any error returned by the callback function. */
iterate_spaces(bool include_log,Fil_space_iterator::Function & f)3853 dberr_t Fil_shard::iterate_spaces(bool include_log,
3854                                   Fil_space_iterator::Function &f) {
3855   mutex_acquire();
3856 
3857   for (auto &elem : m_spaces) {
3858     auto space = elem.second;
3859 
3860     if (space->purpose != FIL_TYPE_TABLESPACE &&
3861         (!include_log || space->purpose != FIL_TYPE_LOG)) {
3862       continue;
3863     }
3864 
3865     dberr_t err = f(space);
3866 
3867     if (err != DB_SUCCESS) {
3868       mutex_release();
3869 
3870       return (err);
3871       ;
3872     }
3873   }
3874 
3875   mutex_release();
3876 
3877   return (DB_SUCCESS);
3878 }
3879 
3880 /** Iterate through all persistent tablespace files (FIL_TYPE_TABLESPACE)
3881 returning the nodes via callback function cbk.
3882 @param[in]	include_log	Include log files, if true
3883 @param[in]	f		Callback
3884 @return any error returned by the callback function. */
iterate(bool include_log,Fil_iterator::Function & f)3885 dberr_t Fil_shard::iterate(bool include_log, Fil_iterator::Function &f) {
3886   mutex_acquire();
3887 
3888   for (auto &elem : m_spaces) {
3889     auto space = elem.second;
3890 
3891     if (space->purpose != FIL_TYPE_TABLESPACE &&
3892         (!include_log || space->purpose != FIL_TYPE_LOG)) {
3893       continue;
3894     }
3895 
3896     for (auto &file : space->files) {
3897       /* Note: The callback can release the mutex. */
3898 
3899       dberr_t err = f(&file);
3900 
3901       if (err != DB_SUCCESS) {
3902         mutex_release();
3903 
3904         return (err);
3905       }
3906     }
3907   }
3908 
3909   mutex_release();
3910 
3911   return (DB_SUCCESS);
3912 }
3913 
3914 /** Iterate through all tablespaces
3915 @param[in]  include_log Include redo log space, if true
3916 @param[in]  f   Callback
3917 @return any error returned by the callback function. */
iterate_spaces(bool include_log,Fil_space_iterator::Function & f)3918 dberr_t Fil_system::iterate_spaces(bool include_log,
3919                                    Fil_space_iterator::Function &f) {
3920   for (auto shard : m_shards) {
3921     dberr_t err = shard->iterate_spaces(include_log, f);
3922 
3923     if (err != DB_SUCCESS) {
3924       return (err);
3925     }
3926   }
3927 
3928   return (DB_SUCCESS);
3929 }
3930 
3931 /** Iterate through all persistent tablespace files (FIL_TYPE_TABLESPACE)
3932 returning the nodes via callback function cbk.
3933 @param[in]	include_log	include log files, if true
3934 @param[in]	f		callback function
3935 @return any error returned by the callback function. */
iterate(bool include_log,Fil_iterator::Function & f)3936 dberr_t Fil_system::iterate(bool include_log, Fil_iterator::Function &f) {
3937   for (auto shard : m_shards) {
3938     dberr_t err = shard->iterate(include_log, f);
3939 
3940     if (err != DB_SUCCESS) {
3941       return (err);
3942     }
3943   }
3944 
3945   return (DB_SUCCESS);
3946 }
3947 
3948 /** Iterate through all spaces
3949 returning the them via callback function cbk.
3950 @param[in]	include_log	include log files, if true
3951 @param[in]	f		Callback
3952 @return any error returned by the callback function. */
iterate(bool include_log,Function && f)3953 dberr_t Fil_space_iterator::iterate(bool include_log, Function &&f) {
3954   return (fil_system->iterate_spaces(include_log, f));
3955 }
3956 
3957 /** Iterate through all persistent tablespace files (FIL_TYPE_TABLESPACE)
3958 returning the nodes via callback function cbk.
3959 @param[in]  include_log include log files, if true
3960 @param[in]  f   Callback
3961 @return any error returned by the callback function. */
iterate(bool include_log,Function && f)3962 dberr_t Fil_iterator::iterate(bool include_log, Function &&f) {
3963   return (fil_system->iterate(include_log, f));
3964 }
3965 
3966 /** Sets the max tablespace id counter if the given number is bigger than the
3967 previous value.
3968 @param[in]	max_id		Maximum known tablespace ID */
fil_set_max_space_id_if_bigger(space_id_t max_id)3969 void fil_set_max_space_id_if_bigger(space_id_t max_id) {
3970   if (dict_sys_t::is_reserved(max_id)) {
3971     ib::fatal(ER_IB_MSG_285, ulong{max_id});
3972   }
3973 
3974   fil_system->update_maximum_space_id(max_id);
3975 }
3976 
3977 /** Write the flushed LSN to the page header of the first page in the
3978 system tablespace.
3979 @param[in]	lsn	flushed LSN
3980 @return DB_SUCCESS or error number */
fil_write_flushed_lsn(lsn_t lsn)3981 dberr_t fil_write_flushed_lsn(lsn_t lsn) {
3982   byte *buf1;
3983   byte *buf;
3984   dberr_t err;
3985 
3986   buf1 = static_cast<byte *>(ut_malloc_nokey(2 * UNIV_PAGE_SIZE));
3987   buf = static_cast<byte *>(ut_align(buf1, UNIV_PAGE_SIZE));
3988 
3989   const page_id_t page_id(TRX_SYS_SPACE, 0);
3990 
3991   err = fil_read(page_id, univ_page_size, 0, univ_page_size.physical(), buf);
3992 
3993   if (err == DB_SUCCESS) {
3994     mach_write_to_8(buf + FIL_PAGE_FILE_FLUSH_LSN, lsn);
3995 
3996     err = fil_write(page_id, univ_page_size, 0, univ_page_size.physical(), buf);
3997 
3998     fil_system->flush_file_spaces(to_int(FIL_TYPE_TABLESPACE));
3999   }
4000 
4001   ut_free(buf1);
4002 
4003   return (err);
4004 }
4005 
4006 /** Acquire a tablespace when it could be dropped concurrently.
4007 Used by background threads that do not necessarily hold proper locks
4008 for concurrency control.
4009 @param[in]	space_id	Tablespace ID
4010 @param[in]	silent		Whether to silently ignore missing tablespaces
4011 @return the tablespace, or nullptr if missing or being deleted */
fil_space_acquire_low(space_id_t space_id,bool silent)4012 inline fil_space_t *fil_space_acquire_low(space_id_t space_id, bool silent) {
4013   auto shard = fil_system->shard_by_id(space_id);
4014 
4015   shard->mutex_acquire();
4016 
4017   fil_space_t *space = shard->get_space_by_id(space_id);
4018 
4019   if (space == nullptr) {
4020     if (!silent) {
4021       ib::warn(ER_IB_MSG_286, ulong{space_id});
4022     }
4023   } else if (space->stop_new_ops) {
4024     space = nullptr;
4025   } else {
4026     ++space->n_pending_ops;
4027   }
4028 
4029   shard->mutex_release();
4030 
4031   return (space);
4032 }
4033 
4034 /** Acquire a tablespace when it could be dropped concurrently.
4035 Used by background threads that do not necessarily hold proper locks
4036 for concurrency control.
4037 @param[in]	space_id	Tablespace ID
4038 @return the tablespace, or nullptr if missing or being deleted */
fil_space_acquire(space_id_t space_id)4039 fil_space_t *fil_space_acquire(space_id_t space_id) {
4040   return (fil_space_acquire_low(space_id, false));
4041 }
4042 
4043 /** Acquire a tablespace that may not exist.
4044 Used by background threads that do not necessarily hold proper locks
4045 for concurrency control.
4046 @param[in]	space_id	Tablespace ID
4047 @return the tablespace, or nullptr if missing or being deleted */
fil_space_acquire_silent(space_id_t space_id)4048 fil_space_t *fil_space_acquire_silent(space_id_t space_id) {
4049   return (fil_space_acquire_low(space_id, true));
4050 }
4051 
4052 /** Release a tablespace acquired with fil_space_acquire().
4053 @param[in,out]	space	tablespace to release  */
fil_space_release(fil_space_t * space)4054 void fil_space_release(fil_space_t *space) {
4055   auto shard = fil_system->shard_by_id(space->id);
4056 
4057   shard->mutex_acquire();
4058 
4059   ut_ad(space->magic_n == FIL_SPACE_MAGIC_N);
4060   ut_ad(space->n_pending_ops > 0);
4061 
4062   --space->n_pending_ops;
4063 
4064   shard->mutex_release();
4065 }
4066 
4067 /** Check for pending operations.
4068 @param[in]	space	tablespace
4069 @param[in]	count	number of attempts so far
4070 @return 0 if no pending operations else count + 1. */
space_check_pending_operations(fil_space_t * space,ulint count) const4071 ulint Fil_shard::space_check_pending_operations(fil_space_t *space,
4072                                                 ulint count) const {
4073   ut_ad(mutex_owned());
4074 
4075   if (space != nullptr && space->n_pending_ops > 0) {
4076     if (count > 5000) {
4077       ib::warn(ER_IB_MSG_287, space->name, ulong{space->n_pending_ops});
4078     }
4079 
4080     return (count + 1);
4081   }
4082 
4083   return (0);
4084 }
4085 
4086 /** Check for pending IO.
4087 @param[in]	space		Tablespace to check
4088 @param[in]	file		File in space list
4089 @param[in]	count		number of attempts so far
4090 @return 0 if no pending else count + 1. */
check_pending_io(const fil_space_t * space,const fil_node_t & file,ulint count) const4091 ulint Fil_shard::check_pending_io(const fil_space_t *space,
4092                                   const fil_node_t &file, ulint count) const {
4093   ut_ad(mutex_owned());
4094   ut_a(space->n_pending_ops == 0);
4095 
4096   ut_a(space->id == TRX_SYS_SPACE || space->purpose == FIL_TYPE_TEMPORARY ||
4097        space->id == dict_sys_t::s_log_space_first_id ||
4098        space->files.size() == 1);
4099 
4100   if (space->n_pending_flushes > 0 || file.n_pending > 0) {
4101     if (count > 1000) {
4102       ib::warn(ER_IB_MSG_288, space->name, ulong{space->n_pending_flushes},
4103                size_t{file.n_pending});
4104     }
4105 
4106     return (count + 1);
4107   }
4108 
4109   return (0);
4110 }
4111 
4112 /** Check pending operations on a tablespace.
4113 @param[in]	space_id	Tablespace ID
4114 @param[out]	space		tablespace instance in memory
4115 @param[out]	path		tablespace path
4116 @return DB_SUCCESS or DB_TABLESPACE_NOT_FOUND. */
space_check_pending_operations(space_id_t space_id,fil_space_t * & space,char ** path) const4117 dberr_t Fil_shard::space_check_pending_operations(space_id_t space_id,
4118                                                   fil_space_t *&space,
4119                                                   char **path) const {
4120   ut_ad(!fsp_is_system_tablespace(space_id));
4121   ut_ad(!fsp_is_global_temporary(space_id));
4122 
4123   space = nullptr;
4124 
4125   mutex_acquire();
4126 
4127   fil_space_t *sp = get_space_by_id(space_id);
4128 
4129   if (sp != nullptr) {
4130     sp->stop_new_ops = true;
4131   }
4132 
4133   mutex_release();
4134 
4135   /* Check for pending operations. */
4136 
4137   ulint count = 0;
4138 
4139   do {
4140     mutex_acquire();
4141 
4142     sp = get_space_by_id(space_id);
4143 
4144     count = space_check_pending_operations(sp, count);
4145 
4146     mutex_release();
4147 
4148     if (count > 0) {
4149       os_thread_sleep(20000);
4150     }
4151 
4152   } while (count > 0);
4153 
4154   /* Check for pending IO. */
4155 
4156   *path = nullptr;
4157 
4158   do {
4159     mutex_acquire();
4160 
4161     sp = get_space_by_id(space_id);
4162 
4163     if (sp == nullptr) {
4164       mutex_release();
4165 
4166       return (DB_TABLESPACE_NOT_FOUND);
4167     }
4168 
4169     const fil_node_t &file = sp->files.front();
4170 
4171     count = check_pending_io(sp, file, count);
4172 
4173     if (count == 0) {
4174       *path = mem_strdup(file.name);
4175     }
4176 
4177     mutex_release();
4178 
4179     if (count > 0) {
4180       os_thread_sleep(20000);
4181     }
4182 
4183   } while (count > 0);
4184 
4185   ut_ad(sp != nullptr);
4186 
4187   space = sp;
4188 
4189   return (DB_SUCCESS);
4190 }
4191 
get_existing_path(const std::string & path,std::string & ghost)4192 std::string Fil_path::get_existing_path(const std::string &path,
4193                                         std::string &ghost) {
4194   std::string existing_path{path};
4195 
4196   /* This is only called for non-existing paths. */
4197   while (!os_file_exists(existing_path.c_str())) {
4198     /* Some part of this path does not exist.
4199     If the last char is a separator, strip it off. */
4200     trim_separator(existing_path);
4201 
4202     auto sep = existing_path.find_last_of(SEPARATOR);
4203     if (sep == std::string::npos) {
4204       /* If no separator is found, it must be relative to the current dir. */
4205       if (existing_path == ".") {
4206         /* This probably cannot happen, but break here to ensure that the
4207         loop always has a way out. */
4208         break;
4209       }
4210       ghost.assign(path);
4211       existing_path.assign(".");
4212       existing_path.push_back(OS_SEPARATOR);
4213     } else {
4214       ghost.assign(path.substr(sep + 1, path.length()));
4215       existing_path.resize(sep + 1);
4216     }
4217   }
4218 
4219   return (existing_path);
4220 }
4221 
get_real_path(const std::string & path,bool force)4222 std::string Fil_path::get_real_path(const std::string &path, bool force) {
4223   bool path_exists;
4224   os_file_type_t path_type;
4225   char abspath[OS_FILE_MAX_PATH];
4226   std::string in_path{path};
4227   std::string real_path;
4228 
4229   if (path.empty()) {
4230     return (std::string(""));
4231   }
4232 
4233   /* We do not need a separator at the end in order to determine what
4234   kind of object it is.  So take it off. If it is there and the last
4235   part is actually a file, the correct real path will be returned. */
4236   if (in_path.length() > 1 && is_separator(in_path.back())) {
4237     trim_separator(in_path);
4238   }
4239 
4240   /* Before we make an absolute path, check if this path exists,
4241   and if so, what type it is. */
4242   os_file_status(in_path.c_str(), &path_exists, &path_type);
4243 
4244   int ret = my_realpath(abspath, in_path.c_str(), MYF(0));
4245 
4246   if (ret == 0) {
4247     real_path.assign(abspath);
4248   } else {
4249     /* This often happens on non-Windows platforms when the path does not
4250     fully exist yet. */
4251 
4252     if (path_exists) {
4253       /* my_realpath() failed for some reason other than the path does not
4254       exist. */
4255       if (force) {
4256         /* Use the given path and make it comparable. */
4257         real_path.assign(in_path);
4258       } else {
4259         /* Return null and make a note of it.  Another attempt will be made
4260         later when Fil_path::get_real_path() is called with force=true. */
4261         ib::info(ER_IB_MSG_289) << "my_realpath('" << path
4262                                 << "') failed for path type " << path_type;
4263         return (std::string(""));
4264       }
4265     } else {
4266       /* The path does not exist.  Try my_realpath() again with the
4267       existing portion of the path. */
4268       std::string ghost;
4269       std::string dir = get_existing_path(in_path, ghost);
4270 
4271       ret = my_realpath(abspath, dir.c_str(), MYF(0));
4272       ut_ad(ret == 0);
4273 
4274       /* Concatenate the absolute path with the non-existing sub-path.
4275       NOTE: If this path existed, my_realpath() would put a separator
4276       at the end if it is a directory.  But since the ghost portion
4277       does not yet exist, we don't know if it is a dir or a file, so
4278       we cannot attach a trailing separator for a directory.  So we
4279       trim them off in Fil_path::is_same_as() and is_ancestor(). */
4280       real_path.assign(abspath);
4281       append_separator(real_path);
4282       real_path.append(ghost);
4283     }
4284   }
4285 
4286   if (lower_case_file_system) {
4287     Fil_path::to_lower(real_path);
4288   }
4289 
4290   /* Try to consistently end a directory name with a separator.
4291   On Windows, my_realpath() usually puts a separator at the end
4292   of a directory path (it does not do that for the path ".").
4293   On non-Windows it never does.
4294   So if the separator is missing, decide whether to append it. */
4295   ut_ad(!real_path.empty());
4296   if (!is_separator(real_path.back())) {
4297     bool add_sep = true;
4298     switch (path_type) {
4299       case OS_FILE_TYPE_DIR:
4300       case OS_FILE_TYPE_BLOCK:
4301         break;
4302       case OS_FILE_TYPE_FILE:
4303       case OS_FILE_TYPE_LINK:
4304         add_sep = false;
4305         break;
4306       case OS_FILE_TYPE_FAILED:
4307       case OS_FILE_TYPE_MISSING:
4308       case OS_FILE_TYPE_NAME_TOO_LONG:
4309       case OS_FILE_PERMISSION_ERROR:
4310       case OS_FILE_TYPE_UNKNOWN:
4311         /* This filepath is missing or cannot be identified for some other
4312         reason. If it ends in a three letter extension, assume it is a file
4313         name and do not add the trailing separator. Otherwise, assume it is
4314         intended to be a directory.*/
4315         size_t s = real_path.size();
4316         if (s > 4 && real_path[s - 4] == '.' && real_path[s - 3] != '.' &&
4317             real_path[s - 2] != '.' && real_path[s - 1] != '.' &&
4318             !is_separator(real_path[s - 3]) &&
4319             !is_separator(real_path[s - 2])) {
4320           add_sep = false;
4321         }
4322     }
4323 
4324     if (add_sep) {
4325       append_separator(real_path);
4326     }
4327   }
4328 
4329   return (real_path);
4330 }
4331 
4332 /** Constructor
4333 @param[in]	dir		Directory that the files are under */
Tablespace_files(const std::string & dir)4334 Tablespace_files::Tablespace_files(const std::string &dir)
4335     : m_ibd_paths(), m_undo_paths(), m_dir(dir) {
4336   ut_ad(Fil_path::is_separator(dir.back()));
4337 }
4338 
4339 /** Closes a single-table tablespace. The tablespace must be cached in the
4340 memory cache. Free all pages used by the tablespace.
4341 @param[in,out]	trx		Transaction covering the close
4342 @param[in]	space_id	Tablespace ID
4343 @return DB_SUCCESS or error */
fil_close_tablespace(trx_t * trx,space_id_t space_id)4344 dberr_t fil_close_tablespace(trx_t *trx, space_id_t space_id) {
4345   char *path = nullptr;
4346   fil_space_t *space = nullptr;
4347 
4348   ut_ad(!fsp_is_undo_tablespace(space_id));
4349   ut_ad(!fsp_is_system_or_temp_tablespace(space_id));
4350 
4351   auto shard = fil_system->shard_by_id(space_id);
4352 
4353   dberr_t err;
4354 
4355   err = shard->space_check_pending_operations(space_id, space, &path);
4356 
4357   if (err != DB_SUCCESS) {
4358     return (err);
4359   }
4360 
4361   ut_a(path != nullptr);
4362 
4363   rw_lock_x_lock(&space->latch);
4364 
4365 #ifndef UNIV_HOTBACKUP
4366   /* Invalidate in the buffer pool all pages belonging to the
4367   tablespace. Since we have set space->stop_new_ops = true, readahead
4368   or ibuf merge can no longer read more pages of this tablespace to the
4369   buffer pool. Thus we can clean the tablespace out of the buffer pool
4370   completely and permanently. The flag stop_new_ops also prevents
4371   fil_flush() from being applied to this tablespace. */
4372 
4373   buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_FLUSH_WRITE, trx);
4374 #endif /* !UNIV_HOTBACKUP */
4375 
4376   /* If the free is successful, the X lock will be released before
4377   the space memory data structure is freed. */
4378 
4379   if (!fil_space_free(space_id, true)) {
4380     rw_lock_x_unlock(&space->latch);
4381     err = DB_TABLESPACE_NOT_FOUND;
4382   } else {
4383     err = DB_SUCCESS;
4384   }
4385 
4386   /* If it is a delete then also delete any generated files, otherwise
4387   when we drop the database the remove directory will fail. */
4388 
4389   char *cfg_name = Fil_path::make_cfg(path);
4390 
4391   if (cfg_name != nullptr) {
4392     os_file_delete_if_exists(innodb_data_file_key, cfg_name, nullptr);
4393 
4394     ut_free(cfg_name);
4395   }
4396 
4397   char *cfp_name = Fil_path::make_cfp(path);
4398 
4399   if (cfp_name != nullptr) {
4400     os_file_delete_if_exists(innodb_data_file_key, cfp_name, nullptr);
4401 
4402     ut_free(cfp_name);
4403   }
4404 
4405   ut_free(path);
4406 
4407   return (err);
4408 }
4409 
4410 #ifndef UNIV_HOTBACKUP
4411 #ifndef XTRABACKUP
4412 /** Write a log record about an operation on a tablespace file.
4413 @param[in]	type		MLOG_FILE_OPEN or MLOG_FILE_DELETE
4414                                 or MLOG_FILE_CREATE or MLOG_FILE_RENAME
4415 @param[in]	space_id	tablespace identifier
4416 @param[in]	path		file path
4417 @param[in]	new_path	if type is MLOG_FILE_RENAME, the new name
4418 @param[in]	flags		if type is MLOG_FILE_CREATE, the space flags
4419 @param[in,out]	mtr		mini-transaction */
fil_op_write_log(mlog_id_t type,space_id_t space_id,const char * path,const char * new_path,uint32_t flags,mtr_t * mtr)4420 static void fil_op_write_log(mlog_id_t type, space_id_t space_id,
4421                              const char *path, const char *new_path,
4422                              uint32_t flags, mtr_t *mtr) {
4423   ut_ad(space_id != TRX_SYS_SPACE);
4424 
4425   byte *log_ptr = nullptr;
4426 
4427   if (!mlog_open(mtr, 11 + 4 + 2 + 1, log_ptr)) {
4428     /* Logging in mtr is switched off during crash recovery:
4429     in that case mlog_open returns nullptr */
4430     return;
4431   }
4432 
4433   log_ptr = mlog_write_initial_log_record_low(type, space_id, 0, log_ptr, mtr);
4434 
4435   if (type == MLOG_FILE_CREATE) {
4436     mach_write_to_4(log_ptr, flags);
4437     log_ptr += 4;
4438   }
4439 
4440   /* Let us store the strings as null-terminated for easier readability
4441   and handling */
4442 
4443   ulint len = strlen(path) + 1;
4444 
4445   mach_write_to_2(log_ptr, len);
4446   log_ptr += 2;
4447 
4448   mlog_close(mtr, log_ptr);
4449 
4450   mlog_catenate_string(mtr, reinterpret_cast<const byte *>(path), len);
4451 
4452   switch (type) {
4453     case MLOG_FILE_RENAME:
4454 
4455       ut_ad(strchr(new_path, Fil_path::OS_SEPARATOR) != nullptr);
4456 
4457       len = strlen(new_path) + 1;
4458 
4459       ut_a(mlog_open(mtr, 2 + len, log_ptr));
4460 
4461       mach_write_to_2(log_ptr, len);
4462 
4463       log_ptr += 2;
4464 
4465       mlog_close(mtr, log_ptr);
4466 
4467       mlog_catenate_string(mtr, reinterpret_cast<const byte *>(new_path), len);
4468       break;
4469     case MLOG_FILE_DELETE:
4470     case MLOG_FILE_CREATE:
4471       break;
4472     default:
4473       ut_ad(0);
4474   }
4475 }
4476 
4477 #endif /* !XTRABACKUP */
4478 
4479 /** Fetch the file name opened for a space_id during recovery
4480 from the file map.
4481 @param[in]	space_id	Undo tablespace ID
4482 @return file name that was opened, empty string if space ID not found. */
fil_system_open_fetch(space_id_t space_id)4483 std::string fil_system_open_fetch(space_id_t space_id) {
4484   ut_a(dict_sys_t::is_reserved(space_id) || srv_is_upgrade_mode);
4485 
4486   return (fil_system->find(space_id));
4487 }
4488 
4489 #endif /* !UNIV_HOTBACKUP */
4490 
space_delete(space_id_t space_id,buf_remove_t buf_remove)4491 dberr_t Fil_shard::space_delete(space_id_t space_id, buf_remove_t buf_remove) {
4492   char *path = nullptr;
4493   fil_space_t *space = nullptr;
4494 
4495   ut_ad(!fsp_is_system_tablespace(space_id));
4496   ut_ad(!fsp_is_global_temporary(space_id));
4497 
4498   dberr_t err = space_check_pending_operations(space_id, space, &path);
4499 
4500   if (err != DB_SUCCESS) {
4501     ut_a(err == DB_TABLESPACE_NOT_FOUND);
4502     return (err);
4503   }
4504 
4505   ut_a(path != nullptr);
4506   ut_a(space != nullptr);
4507 
4508 #if !defined(UNIV_HOTBACKUP) && !defined(XTRABACKUP)
4509   /* IMPORTANT: Because we have set space::stop_new_ops there
4510   can't be any new ibuf merges, reads or flushes. We are here
4511   because file::n_pending was zero above. However, it is still
4512   possible to have pending read and write requests:
4513 
4514   A read request can happen because the reader thread has
4515   gone through the ::stop_new_ops check in buf_page_init_for_read()
4516   before the flag was set and has not yet incremented ::n_pending
4517   when we checked it above.
4518 
4519   A write request can be issued any time because we don't check
4520   the ::stop_new_ops flag when queueing a block for write.
4521 
4522   We deal with pending write requests in the following function
4523   where we'd minimally evict all dirty pages belonging to this
4524   space from the flush_list. Note that if a block is IO-fixed
4525   we'll wait for IO to complete.
4526 
4527   For buf_remove == BUF_REMOVE_NONE we mark the fil_space_t instance
4528   as deleted by setting the fil_space_t::m_deleted_lsn to the current
4529   LSN. We wait for any pending IO to complete after that.
4530 
4531   To deal with potential read requests, we will check the
4532   ::stop_new_ops flag in fil_io(). */
4533 
4534   if (buf_remove != BUF_REMOVE_NONE) {
4535     buf_LRU_flush_or_remove_pages(space_id, buf_remove, nullptr);
4536   }
4537 
4538 #endif /* !UNIV_HOTBACKUP && !XTRABACKUP */
4539 
4540   /* If it is a delete then also delete any generated files, otherwise
4541   when we drop the database the remove directory will fail. */
4542   if (space->purpose != FIL_TYPE_TEMPORARY) {
4543 #if defined(UNIV_HOTBACKUP) || defined(XTRABACKUP)
4544     /* When replaying the operation in MySQL Enterprise
4545     Backup, we do not try to write any log record. */
4546 #else  /* UNIV_HOTBACKUP */
4547     /* Before deleting the file, write a log record about it, so that
4548     InnoDB crash recovery will expect the file to be gone. */
4549     mtr_t mtr;
4550 
4551     mtr.start();
4552 
4553     fil_op_write_log(MLOG_FILE_DELETE, space_id, path, nullptr, 0, &mtr);
4554 
4555     mtr.commit();
4556 
4557     /* Even if we got killed shortly after deleting the
4558     tablespace file, the record must have already been
4559     written to the redo log. */
4560     log_write_up_to(*log_sys, mtr.commit_lsn(), true);
4561 #endif /* UNIV_HOTBACKUP || XTRABACKUP */
4562 
4563     char *cfg_name = Fil_path::make_cfg(path);
4564 
4565     if (cfg_name != nullptr) {
4566       os_file_delete_if_exists(innodb_data_file_key, cfg_name, nullptr);
4567 
4568       ut_free(cfg_name);
4569     }
4570 
4571     char *cfp_name = Fil_path::make_cfp(path);
4572 
4573     if (cfp_name != nullptr) {
4574       os_file_delete_if_exists(innodb_data_file_key, cfp_name, nullptr);
4575 
4576       ut_free(cfp_name);
4577     }
4578   }
4579 
4580   /* Must set back to active before returning from function. */
4581   clone_mark_abort(true);
4582 
4583 #ifndef UNIV_HOTBACKUP
4584   lsn_t lsn = log_get_lsn(*log_sys);
4585 #endif /* !UNIV_HOTBACKUP */
4586 
4587   mutex_acquire();
4588 
4589   /* Double check the sanity of pending ops after reacquiring
4590   the fil_system::mutex. */
4591   if (const fil_space_t *s = get_space_by_id(space_id)) {
4592     ut_a(s == space);
4593     ut_a(space->files.size() == 1);
4594     ut_a(space->n_pending_ops == 0);
4595 
4596 #ifndef UNIV_HOTBACKUP
4597     if (buf_remove == BUF_REMOVE_NONE) {
4598       ut_a(space->m_deleted_lsn == 0);
4599 
4600       /* Mark the instance as deleted, this should inform any writer
4601       threads that the tablespace can't be written to anymore. */
4602       space->m_deleted_lsn = lsn;
4603 
4604       /* Release the mutex because we want the IO to complete. */
4605       mutex_release();
4606 
4607       os_thread_yield();
4608 
4609       mutex_acquire();
4610 
4611       /* Wait for any pending writes. */
4612       while (space->files.front().n_pending > 0) {
4613         mutex_release();
4614 
4615         os_thread_yield();
4616 
4617         mutex_acquire();
4618       }
4619 
4620       m_deleted.push_back({space->id, space});
4621     }
4622 #endif /* !UNIV_HOTBACKUP */
4623 
4624     space_detach(space);
4625 
4626     /* Delete the tablespace unless BUF_REMOVE_NONE was used. */
4627     if (space->m_deleted_lsn == 0) {
4628       ut_a(space->files.front().n_pending == 0);
4629 
4630       space_delete(space_id);
4631     }
4632 
4633     mutex_release();
4634 
4635     if (space->m_deleted_lsn == 0) {
4636       space_free_low(space);
4637       ut_a(space == nullptr);
4638     }
4639 
4640     if (!os_file_delete(innodb_data_file_key, path) &&
4641         !os_file_delete_if_exists(innodb_data_file_key, path, nullptr)) {
4642       /* Note: This is because we have removed the
4643       tablespace instance from the cache. */
4644 
4645       err = DB_IO_ERROR;
4646     }
4647   } else {
4648     mutex_release();
4649 
4650     err = DB_TABLESPACE_NOT_FOUND;
4651   }
4652 
4653   ut_free(path);
4654 
4655   clone_mark_active();
4656 
4657   return (err);
4658 }
4659 
fil_delete_tablespace(space_id_t space_id,buf_remove_t buf_remove)4660 dberr_t fil_delete_tablespace(space_id_t space_id, buf_remove_t buf_remove) {
4661   auto shard = fil_system->shard_by_id(space_id);
4662 
4663   return (shard->space_delete(space_id, buf_remove));
4664 }
4665 
4666 /** Prepare for truncating a single-table tablespace.
4667 1) Check pending operations on a tablespace;
4668 2) Remove all insert buffer entries for the tablespace;
4669 @param[in]	space_id	Tablespace ID
4670 @return DB_SUCCESS or error */
space_prepare_for_truncate(space_id_t space_id)4671 dberr_t Fil_shard::space_prepare_for_truncate(space_id_t space_id) {
4672   char *path = nullptr;
4673   fil_space_t *space = nullptr;
4674 
4675   ut_ad(space_id != TRX_SYS_SPACE);
4676   ut_ad(!fsp_is_system_tablespace(space_id));
4677   ut_ad(!fsp_is_global_temporary(space_id));
4678   ut_ad(fsp_is_undo_tablespace(space_id) || fsp_is_session_temporary(space_id));
4679 
4680   dberr_t err = space_check_pending_operations(space_id, space, &path);
4681 
4682   ut_free(path);
4683 
4684   return (err);
4685 }
4686 
4687 /** Truncate the tablespace to needed size.
4688 @param[in]	space_id	Tablespace ID to truncate
4689 @param[in]	size_in_pages	Truncate size.
4690 @return true if truncate was successful. */
space_truncate(space_id_t space_id,page_no_t size_in_pages)4691 bool Fil_shard::space_truncate(space_id_t space_id, page_no_t size_in_pages) {
4692   /* Step-1: Prepare tablespace for truncate. This involves
4693   stopping all the new operations + IO on that tablespace
4694   and ensuring that related pages are flushed to disk. */
4695   if (space_prepare_for_truncate(space_id) != DB_SUCCESS) {
4696     return (false);
4697   }
4698 
4699 #ifndef UNIV_HOTBACKUP
4700   /* Step-2: Invalidate buffer pool pages belonging to the tablespace
4701   to re-create. Remove all insert buffer entries for the tablespace */
4702   buf_LRU_flush_or_remove_pages(space_id, BUF_REMOVE_ALL_NO_WRITE, nullptr);
4703 #endif /* !UNIV_HOTBACKUP */
4704 
4705   /* Step-3: Truncate the tablespace and accordingly update
4706   the fil_space_t handler that is used to access this tablespace. */
4707   mutex_acquire();
4708 
4709   fil_space_t *space = get_space_by_id(space_id);
4710 
4711   ut_a(space->files.size() == 1);
4712 
4713   fil_node_t &file = space->files.front();
4714 
4715   ut_ad(file.is_open);
4716 
4717   space->size = file.size = size_in_pages;
4718 
4719   bool success = os_file_truncate(file.name, file.handle, 0);
4720 
4721   if (success) {
4722     os_offset_t size = size_in_pages * UNIV_PAGE_SIZE;
4723 
4724     success = os_file_set_size(file.name, file.handle, 0, size,
4725                                srv_read_only_mode, true);
4726 
4727     if (success) {
4728       space->stop_new_ops = false;
4729     }
4730   }
4731 
4732   mutex_release();
4733 
4734   return (success);
4735 }
4736 
4737 /** Truncate the tablespace to needed size.
4738 @param[in]	space_id	Tablespace ID to truncate
4739 @param[in]	size_in_pages	Truncate size.
4740 @return true if truncate was successful. */
fil_truncate_tablespace(space_id_t space_id,page_no_t size_in_pages)4741 bool fil_truncate_tablespace(space_id_t space_id, page_no_t size_in_pages) {
4742   auto shard = fil_system->shard_by_id(space_id);
4743 
4744   return (shard->space_truncate(space_id, size_in_pages));
4745 }
4746 
4747 #ifdef UNIV_DEBUG
4748 /** Increase redo skipped count for a tablespace.
4749 @param[in]	space_id	Tablespace ID */
fil_space_inc_redo_skipped_count(space_id_t space_id)4750 void fil_space_inc_redo_skipped_count(space_id_t space_id) {
4751   auto shard = fil_system->shard_by_id(space_id);
4752 
4753   shard->mutex_acquire();
4754 
4755   fil_space_t *space = shard->get_space_by_id(space_id);
4756 
4757   ut_a(space != nullptr);
4758 
4759   ++space->redo_skipped_count;
4760 
4761   shard->mutex_release();
4762 }
4763 
4764 /** Decrease redo skipped count for a tablespace.
4765 @param[in]	space_id	Tablespace id */
fil_space_dec_redo_skipped_count(space_id_t space_id)4766 void fil_space_dec_redo_skipped_count(space_id_t space_id) {
4767   auto shard = fil_system->shard_by_id(space_id);
4768 
4769   shard->mutex_acquire();
4770 
4771   fil_space_t *space = shard->get_space_by_id(space_id);
4772 
4773   ut_a(space != nullptr);
4774   ut_a(space->redo_skipped_count > 0);
4775 
4776   --space->redo_skipped_count;
4777 
4778   shard->mutex_release();
4779 }
4780 
4781 /** Check whether a single-table tablespace is redo skipped.
4782 @param[in]	space_id	Tablespace id
4783 @return true if redo skipped */
fil_space_is_redo_skipped(space_id_t space_id)4784 bool fil_space_is_redo_skipped(space_id_t space_id) {
4785   auto shard = fil_system->shard_by_id(space_id);
4786 
4787   shard->mutex_acquire();
4788 
4789   fil_space_t *space = shard->get_space_by_id(space_id);
4790 
4791   ut_a(space != nullptr);
4792 
4793   bool is_redo_skipped = space->redo_skipped_count > 0;
4794 
4795   shard->mutex_release();
4796 
4797   return (is_redo_skipped);
4798 }
4799 #endif /* UNIV_DEBUG */
4800 
4801 #ifndef UNIV_HOTBACKUP
4802 /** Discards a single-table tablespace. The tablespace must be cached in the
4803 memory cache. Discarding is like deleting a tablespace, but
4804 
4805  1. We do not drop the table from the data dictionary;
4806 
4807  2. We remove all insert buffer entries for the tablespace immediately;
4808     in DROP TABLE they are only removed gradually in the background;
4809 
4810  3. Free all the pages in use by the tablespace.
4811 @param[in]	space_id		Tablespace ID
4812 @return DB_SUCCESS or error */
fil_discard_tablespace(space_id_t space_id)4813 dberr_t fil_discard_tablespace(space_id_t space_id) {
4814   dberr_t err;
4815 
4816   err = fil_delete_tablespace(space_id, BUF_REMOVE_ALL_NO_WRITE);
4817 
4818   switch (err) {
4819     case DB_SUCCESS:
4820       break;
4821 
4822     case DB_IO_ERROR:
4823 
4824       ib::warn(ER_IB_MSG_291, ulong{space_id}, ut_strerr(err));
4825       break;
4826 
4827     case DB_TABLESPACE_NOT_FOUND:
4828 
4829       ib::warn(ER_IB_MSG_292, ulong{space_id}, ut_strerr(err));
4830       break;
4831 
4832     default:
4833       ut_error;
4834   }
4835 
4836   /* Remove all insert buffer entries for the tablespace */
4837 
4838   ibuf_delete_for_discarded_space(space_id);
4839 
4840   return (err);
4841 }
4842 
4843 #if !defined(XTRABACKUP)
4844 /** Write redo log for renaming a file.
4845 @param[in]	space_id	tablespace id
4846 @param[in]	old_name	tablespace file name
4847 @param[in]	new_name	tablespace file name after renaming
4848 @param[in,out]	mtr		mini-transaction */
fil_name_write_rename(space_id_t space_id,const char * old_name,const char * new_name,mtr_t * mtr)4849 static void fil_name_write_rename(space_id_t space_id, const char *old_name,
4850                                   const char *new_name, mtr_t *mtr) {
4851   ut_ad(!fsp_is_system_or_temp_tablespace(space_id));
4852   ut_ad(!fsp_is_undo_tablespace(space_id));
4853 
4854   /* Note: A checkpoint can take place here. */
4855 
4856   DBUG_EXECUTE_IF("ib_crash_rename_log_1", DBUG_SUICIDE(););
4857 
4858   static const auto type = MLOG_FILE_RENAME;
4859 
4860   fil_op_write_log(type, space_id, old_name, new_name, 0, mtr);
4861 
4862   DBUG_EXECUTE_IF("ib_crash_rename_log_2", DBUG_SUICIDE(););
4863 
4864   /* Note: A checkpoint can take place here too before we
4865   have physically renamed the file. */
4866 }
4867 #endif /* !XTRABACKUP */
4868 
4869 #endif /* !UNIV_HOTBACKUP */
4870 
4871 /** Allocate and build a file name from a path, a table or tablespace name
4872 and a suffix.
4873 @param[in]	path_in		nullptr or the direcory path or the full path
4874                                 and filename
4875 @param[in]	name_in		nullptr if path is full, or Table/Tablespace
4876                                 name
4877 @param[in]	ext		the file extension to use
4878 @param[in]	trim		whether last name on the path should be trimmed
4879 @return own: file name; must be freed by ut_free() */
make(const std::string & path_in,const std::string & name_in,ib_file_suffix ext,bool trim)4880 char *Fil_path::make(const std::string &path_in, const std::string &name_in,
4881                      ib_file_suffix ext, bool trim) {
4882   /* The path should be a directory and should not contain the
4883   basename of the file. If the path is empty, we will use  the
4884   default path, */
4885 
4886   ut_ad(!path_in.empty() || !name_in.empty());
4887 
4888   std::string path;
4889 
4890   if (path_in.empty()) {
4891     if (is_absolute_path(name_in)) {
4892       path = "";
4893     } else {
4894       path.assign(MySQL_datadir_path);
4895     }
4896   } else {
4897     path.assign(path_in);
4898   }
4899 
4900   std::string name;
4901 
4902   if (!name_in.empty()) {
4903     name.assign(name_in);
4904   }
4905 
4906   /* Do not prepend the datadir path (which must be DOT_SLASH)
4907   if the name is an absolute path or a relative path like
4908   DOT_SLASH or DOT_DOT_SLASH.  */
4909   if (is_absolute_path(name) || has_prefix(name, DOT_SLASH) ||
4910       has_prefix(name, DOT_DOT_SLASH)) {
4911     path.clear();
4912   }
4913 
4914   std::string filepath;
4915 
4916   if (!path.empty()) {
4917     filepath.assign(path);
4918   }
4919 
4920   if (trim) {
4921     /* Find the offset of the last DIR separator and set it to
4922     null in order to strip off the old basename from this path. */
4923     auto pos = filepath.find_last_of(SEPARATOR);
4924 
4925     if (pos != std::string::npos) {
4926       filepath.resize(pos);
4927     }
4928   }
4929 
4930   if (!name.empty()) {
4931     append_separator(filepath);
4932 
4933     filepath.append(name);
4934   }
4935 
4936   /* Make sure that the specified suffix is at the end. */
4937   if (ext != NO_EXT) {
4938     const auto suffix = dot_ext[ext];
4939     size_t len = strlen(suffix);
4940 
4941     /* This assumes that the suffix starts with '.'.  If the
4942     first char of the suffix is found in the filepath at the
4943     same length as the suffix from the end, then we will assume
4944     that there is a previous suffix that needs to be replaced. */
4945 
4946     ut_ad(*suffix == '.');
4947 
4948     if (filepath.length() > len && *(filepath.end() - len) == *suffix) {
4949       filepath.replace(filepath.end() - len, filepath.end(), suffix);
4950     } else {
4951       filepath.append(suffix);
4952     }
4953   }
4954 
4955   normalize(filepath);
4956 
4957   return (mem_strdup(filepath.c_str()));
4958 }
4959 
parse_file_path(const std::string & file_path,ib_file_suffix extn,std::string & dict_name)4960 bool Fil_path::parse_file_path(const std::string &file_path,
4961                                ib_file_suffix extn, std::string &dict_name) {
4962   dict_name.assign(file_path);
4963   if (!Fil_path::truncate_suffix(extn, dict_name)) {
4964     dict_name.clear();
4965     return (false);
4966   }
4967 
4968   /* Extract table name */
4969   auto table_pos = dict_name.find_last_of(SEPARATOR);
4970   if (table_pos == std::string::npos) {
4971     dict_name.clear();
4972     return (false);
4973   }
4974   std::string table_name = dict_name.substr(table_pos + 1);
4975   dict_name.resize(table_pos);
4976 
4977   /* Extract schema name */
4978   auto schema_pos = dict_name.find_last_of(SEPARATOR);
4979   if (schema_pos == std::string::npos) {
4980     dict_name.clear();
4981     return (false);
4982   }
4983   std::string schema_name = dict_name.substr(schema_pos + 1);
4984 
4985   /* Build dictionary table name schema/table form. */
4986   dict_name.assign(schema_name);
4987   dict_name.push_back(DB_SEPARATOR);
4988   dict_name.append(table_name);
4989   return (true);
4990 }
4991 
make_new_path(const std::string & path_in,const std::string & name_in,ib_file_suffix extn)4992 std::string Fil_path::make_new_path(const std::string &path_in,
4993                                     const std::string &name_in,
4994                                     ib_file_suffix extn) {
4995   ut_a(Fil_path::has_suffix(extn, path_in));
4996   ut_a(!Fil_path::has_suffix(extn, name_in));
4997 
4998   std::string path(path_in);
4999 
5000   auto pos = path.find_last_of(SEPARATOR);
5001 
5002   ut_a(pos != std::string::npos);
5003 
5004   path.resize(pos);
5005 
5006   pos = path.find_last_of(SEPARATOR);
5007 
5008   ut_a(pos != std::string::npos);
5009 
5010   path.resize(pos + 1);
5011 
5012   path.append(name_in + dot_ext[extn]);
5013 
5014   normalize(path);
5015 
5016   return (path);
5017 }
5018 
5019 /** This function reduces a null-terminated full remote path name
5020 into the path that is sent by MySQL for DATA DIRECTORY clause.
5021 It replaces the 'databasename/tablename.ibd' found at the end of the
5022 path with just 'tablename'.
5023 
5024 Since the result is always smaller than the path sent in, no new
5025 memory is allocated. The caller should allocate memory for the path
5026 sent in. This function manipulates that path in place. If the path
5027 format is not as expected, set data_dir_path to "" and return.
5028 
5029 The result is used to inform a SHOW CREATE TABLE command.
5030 @param[in,out]	data_dir_path	Full path/data_dir_path */
make_data_dir_path(char * data_dir_path)5031 void Fil_path::make_data_dir_path(char *data_dir_path) {
5032   /* Replace the period before the extension with a null byte. */
5033   ut_ad(has_suffix(IBD, data_dir_path));
5034   char *dot = strrchr((char *)data_dir_path, '.');
5035   *dot = '\0';
5036 
5037   /* The tablename starts after the last slash. */
5038   char *base_slash = strrchr((char *)data_dir_path, OS_PATH_SEPARATOR);
5039   ut_ad(base_slash != nullptr);
5040 
5041   *base_slash = '\0';
5042 
5043   std::string base_name{base_slash + 1};
5044 
5045   /* The database name starts after the next to last slash. */
5046   char *db_slash = strrchr((char *)data_dir_path, OS_SEPARATOR);
5047   ut_ad(db_slash != nullptr);
5048   char *db_name = db_slash + 1;
5049 
5050   /* Overwrite the db_name with the base_name. */
5051   memmove(db_name, base_name.c_str(), base_name.length());
5052   db_name[base_name.length()] = '\0';
5053 }
5054 
5055 /** Test if a tablespace file can be renamed to a new filepath by checking
5056 if that the old filepath exists and the new filepath does not exist.
5057 @param[in]	space_id	tablespace id
5058 @param[in]	old_path	old filepath
5059 @param[in]	new_path	new filepath
5060 @param[in]	is_discarded	whether the tablespace is discarded
5061 @return innodb error code */
fil_rename_tablespace_check(space_id_t space_id,const char * old_path,const char * new_path,bool is_discarded)5062 dberr_t fil_rename_tablespace_check(space_id_t space_id, const char *old_path,
5063                                     const char *new_path, bool is_discarded) {
5064   bool exists;
5065   os_file_type_t ftype;
5066 
5067   if (!is_discarded && os_file_status(old_path, &exists, &ftype) && !exists) {
5068     ib::error(ER_IB_MSG_293, old_path, new_path, ulong{space_id});
5069     return (DB_TABLESPACE_NOT_FOUND);
5070   }
5071 
5072   if (!os_file_status(new_path, &exists, &ftype) || exists) {
5073     ib::error(ER_IB_MSG_294, old_path, new_path, ulong{space_id});
5074     return (DB_TABLESPACE_EXISTS);
5075   }
5076 
5077   return (DB_SUCCESS);
5078 }
5079 
5080 /** Rename a single-table tablespace.
5081 The tablespace must exist in the memory cache.
5082 @param[in]	space_id	Tablespace ID
5083 @param[in]	old_path	Old file name
5084 @param[in]	new_name	New tablespace  name in the schema/space
5085 @param[in]	new_path_in	New file name, or nullptr if it is located
5086                                 in the normal data directory
5087 @return InnoDB error code */
space_rename(space_id_t space_id,const char * old_path,const char * new_name,const char * new_path_in)5088 dberr_t Fil_shard::space_rename(space_id_t space_id, const char *old_path,
5089                                 const char *new_name, const char *new_path_in) {
5090   fil_space_t *space;
5091   ulint count = 0;
5092   fil_node_t *file = nullptr;
5093   bool write_ddl_log = true;
5094   auto start_time = ut_time_monotonic();
5095 
5096 #ifdef UNIV_DEBUG
5097   static uint32_t crash_injection_rename_tablespace_counter = 1;
5098 #endif /* UNIV_DEBUG */
5099 
5100   ut_a(space_id != TRX_SYS_SPACE);
5101   ut_ad(strchr(new_name, '/') != nullptr);
5102 
5103   for (;;) {
5104     bool retry = false;
5105     bool flush = false;
5106 
5107     ++count;
5108 
5109     if (!(count % 1000)) {
5110       ib::warn(ER_IB_MSG_295, old_path, ulong{space_id}, ulonglong{count});
5111     }
5112 
5113     /* The name map and space ID map are in the same shard. */
5114     mutex_acquire();
5115 
5116     space = get_space_by_id(space_id);
5117 
5118     DBUG_EXECUTE_IF("fil_rename_tablespace_failure_1", space = nullptr;);
5119 
5120     if (space == nullptr) {
5121       ib::error(ER_IB_MSG_296, ulong{space_id}, old_path);
5122 
5123       mutex_release();
5124 
5125       return (DB_ERROR);
5126 
5127     } else if (space->stop_ios) {
5128       /* Some other thread has stopped the IO. We need to
5129        wait for the other thread to complete its operation. */
5130       mutex_release();
5131 
5132       if (ut_time_monotonic() - start_time >= PRINT_INTERVAL_SECS) {
5133         ib::warn(ER_IB_MSG_297);
5134 
5135         start_time = ut_time_monotonic();
5136       }
5137 
5138       os_thread_sleep(1000000);
5139 
5140       continue;
5141 
5142     } else if (count > 25000) {
5143       mutex_release();
5144 
5145       return (DB_ERROR);
5146 
5147     } else if (space != get_space_by_name(space->name)) {
5148       ib::error(ER_IB_MSG_298, space->name);
5149 
5150       mutex_release();
5151 
5152       return (DB_ERROR);
5153 
5154     } else {
5155       auto new_space = get_space_by_name(new_name);
5156 
5157       if (new_space != nullptr) {
5158         if (new_space == space) {
5159           mutex_release();
5160 
5161           return (DB_SUCCESS);
5162         }
5163 
5164         ut_a(new_space->id == space->id);
5165       }
5166     }
5167 
5168     ut_a(space->files.size() == 1);
5169 
5170 #ifndef UNIV_HOTBACKUP
5171     /* Don't write DDL log during recovery when log_ddl is
5172     not initialized. */
5173 
5174     if (write_ddl_log && log_ddl != nullptr) {
5175       /* Write ddl log when space->stop_ios is true
5176       can cause deadlock:
5177       a. buffer flush thread waits for rename thread to set
5178          stop_ios to false;
5179       b. rename thread waits for buffer flush thread to flush
5180          a page and release page lock. The page is ready for
5181          flush in double write buffer. */
5182 
5183       ut_ad(!space->stop_ios);
5184 
5185       file = &space->files.front();
5186 
5187       char *new_file_name = new_path_in == nullptr
5188                                 ? Fil_path::make_ibd_from_table_name(new_name)
5189                                 : mem_strdup(new_path_in);
5190 
5191       char *old_file_name = file->name;
5192 
5193       ut_ad(strchr(old_file_name, OS_PATH_SEPARATOR) != nullptr);
5194 
5195       ut_ad(strchr(new_file_name, OS_PATH_SEPARATOR) != nullptr);
5196 
5197       mutex_release();
5198 
5199       /* Rename ddl log is for rollback, so we exchange
5200       old file name with new file name. */
5201       dberr_t err = log_ddl->write_rename_space_log(space_id, new_file_name,
5202                                                     old_file_name);
5203       ut_free(new_file_name);
5204       if (err != DB_SUCCESS) {
5205         return (err);
5206       }
5207 
5208       write_ddl_log = false;
5209       continue;
5210     }
5211 #endif /* !UNIV_HOTBACKUP */
5212 
5213     /* We temporarily close the .ibd file because we do
5214     not trust that operating systems can rename an open
5215     file. For the closing we have to wait until there
5216     are no pending I/O's or flushes on the file. */
5217 
5218     space->stop_ios = true;
5219 
5220     file = &space->files.front();
5221 
5222     if (file->n_pending > 0 || file->n_pending_flushes > 0 ||
5223         file->in_use > 0) {
5224       /* There are pending I/O's or flushes or the
5225       file is currently being extended, sleep for
5226       a while and retry */
5227 
5228       retry = true;
5229 
5230       space->stop_ios = false;
5231 
5232     } else if (file->modification_counter > file->flush_counter) {
5233       /* Flush the space */
5234 
5235       retry = flush = true;
5236 
5237       space->stop_ios = false;
5238 
5239     } else if (file->is_open) {
5240       close_file(file, false);
5241     }
5242 
5243     mutex_release();
5244 
5245     if (!retry) {
5246       ut_ad(space->stop_ios);
5247       break;
5248     }
5249 
5250     os_thread_sleep(100000);
5251 
5252     if (flush) {
5253       mutex_acquire();
5254 
5255       space_flush(space->id);
5256 
5257       mutex_release();
5258     }
5259   }
5260 
5261   ut_ad(space->stop_ios);
5262 
5263   char *new_file_name;
5264 
5265   if (new_path_in == nullptr) {
5266     new_file_name = Fil_path::make_ibd_from_table_name(new_name);
5267   } else {
5268     new_file_name = mem_strdup(new_path_in);
5269   }
5270 
5271   char *old_file_name = file->name;
5272   char *old_space_name = space->name;
5273   char *new_space_name = mem_strdup(new_name);
5274 
5275 #if !defined(UNIV_HOTBACKUP) && !defined(XTRABACKUP)
5276   if (!recv_recovery_on) {
5277     mtr_t mtr;
5278 
5279     mtr.start();
5280 
5281     fil_name_write_rename(space_id, old_file_name, new_file_name, &mtr);
5282 
5283     mtr.commit();
5284   }
5285 #endif /* !UNIV_HOTBACKUP && !XTRABACKUP */
5286 
5287   ut_ad(strchr(old_file_name, OS_PATH_SEPARATOR) != nullptr);
5288   ut_ad(strchr(new_file_name, OS_PATH_SEPARATOR) != nullptr);
5289 
5290   mutex_acquire();
5291 
5292   /* We already checked these. */
5293   ut_ad(space == get_space_by_name(old_space_name));
5294   ut_ad(get_space_by_name(new_space_name) == nullptr);
5295 
5296   bool success;
5297 
5298   DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2", goto skip_rename;);
5299 
5300   DBUG_INJECT_CRASH("ddl_crash_before_rename_tablespace",
5301                     crash_injection_rename_tablespace_counter++);
5302 
5303   success = os_file_rename(innodb_data_file_key, old_file_name, new_file_name);
5304 
5305   DBUG_EXECUTE_IF("fil_rename_tablespace_failure_2", skip_rename
5306                   : success = false;);
5307 
5308   DBUG_INJECT_CRASH("ddl_crash_after_rename_tablespace",
5309                     crash_injection_rename_tablespace_counter++);
5310 
5311   if (success) {
5312     file->name = new_file_name;
5313 
5314     update_space_name_map(space, new_space_name);
5315 
5316     space->name = new_space_name;
5317 
5318   } else {
5319     /* Because nothing was renamed, we must free the new
5320     names, not the old ones. */
5321     old_file_name = new_file_name;
5322     old_space_name = new_space_name;
5323   }
5324 
5325   ut_ad(space->stop_ios);
5326   space->stop_ios = false;
5327 
5328   mutex_release();
5329 
5330   ut_free(old_file_name);
5331   ut_free(old_space_name);
5332 
5333   return (success ? DB_SUCCESS : DB_ERROR);
5334 }
5335 
5336 /** Rename a single-table tablespace.
5337 The tablespace must exist in the memory cache.
5338 @param[in]	space_id	Tablespace ID
5339 @param[in]	old_path	Old file name
5340 @param[in]	new_name	New tablespace name in the schema/name format
5341 @param[in]	new_path_in	New file name, or nullptr if it is located
5342                                 in the normal data directory
5343 @return InnoDB error code */
fil_rename_tablespace(space_id_t space_id,const char * old_path,const char * new_name,const char * new_path_in)5344 dberr_t fil_rename_tablespace(space_id_t space_id, const char *old_path,
5345                               const char *new_name, const char *new_path_in) {
5346   auto shard = fil_system->shard_by_id(space_id);
5347 
5348   dberr_t err = shard->space_rename(space_id, old_path, new_name, new_path_in);
5349 
5350   return (err);
5351 }
5352 
5353 /** Rename a tablespace.  Use the space_id to find the shard.
5354 @param[in]	space_id	tablespace ID
5355 @param[in]	old_name	old tablespace name
5356 @param[in]	new_name	new tablespace name
5357 @return DB_SUCCESS on success */
rename_tablespace_name(space_id_t space_id,const char * old_name,const char * new_name)5358 dberr_t Fil_system::rename_tablespace_name(space_id_t space_id,
5359                                            const char *old_name,
5360                                            const char *new_name) {
5361   auto old_shard = fil_system->shard_by_id(space_id);
5362 
5363   old_shard->mutex_acquire();
5364 
5365   auto old_space = old_shard->get_space_by_id(space_id);
5366 
5367   if (old_space == nullptr) {
5368     old_shard->mutex_release();
5369 
5370     ib::error(ER_IB_MSG_299, old_name);
5371 
5372     return (DB_TABLESPACE_NOT_FOUND);
5373   }
5374 
5375   ut_ad(old_space == old_shard->get_space_by_name(old_name));
5376   old_shard->mutex_release();
5377 
5378   Fil_shard *new_shard{};
5379   fil_space_t *new_space{};
5380 
5381   mutex_acquire_all();
5382 
5383   for (auto shard : m_shards) {
5384     new_space = shard->get_space_by_name(new_name);
5385 
5386     if (new_space != nullptr) {
5387       new_shard = shard;
5388       break;
5389     }
5390   }
5391 
5392   if (new_space != nullptr) {
5393     mutex_release_all();
5394 
5395     if (new_space->id != old_space->id) {
5396       ib::error(ER_IB_MSG_300, new_name);
5397 
5398       return (DB_TABLESPACE_EXISTS);
5399     } else {
5400       ut_a(new_shard == old_shard);
5401     }
5402 
5403     return (DB_SUCCESS);
5404   }
5405 
5406   auto new_space_name = mem_strdup(new_name);
5407   auto old_space_name = old_space->name;
5408 
5409   old_shard->update_space_name_map(old_space, new_space_name);
5410 
5411   old_space->name = new_space_name;
5412 
5413   mutex_release_all();
5414 
5415   ut_free(old_space_name);
5416 
5417   return (DB_SUCCESS);
5418 }
5419 
5420 /** Rename a tablespace.  Use the space_id to find the shard.
5421 @param[in]	space_id	tablespace ID
5422 @param[in]	old_name	old tablespace name
5423 @param[in]	new_name	new tablespace name
5424 @return DB_SUCCESS on success */
fil_rename_tablespace_by_id(space_id_t space_id,const char * old_name,const char * new_name)5425 dberr_t fil_rename_tablespace_by_id(space_id_t space_id, const char *old_name,
5426                                     const char *new_name) {
5427   return (fil_system->rename_tablespace_name(space_id, old_name, new_name));
5428 }
5429 
5430 /** Create a tablespace (an IBD or IBT) file
5431 @param[in]	space_id	Tablespace ID
5432 @param[in]	name		Tablespace name in dbname/tablename format.
5433                                 For general tablespaces, the 'dbname/' part
5434                                 may be missing.
5435 @param[in]	path		Path and filename of the datafile to create.
5436 @param[in]	flags		Tablespace flags
5437 @param[in]	size		Initial size of the tablespace file in pages,
5438                                 must be >= FIL_IBD_FILE_INITIAL_SIZE
5439 @param[in]	type		FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY
5440 @return DB_SUCCESS or error code */
fil_create_tablespace(space_id_t space_id,const char * name,const char * path,uint32_t flags,page_no_t size,fil_type_t type)5441 static dberr_t fil_create_tablespace(space_id_t space_id, const char *name,
5442                                      const char *path, uint32_t flags,
5443                                      page_no_t size, fil_type_t type) {
5444   pfs_os_file_t file;
5445   dberr_t err;
5446   byte *buf2;
5447   byte *page;
5448   bool success;
5449   bool has_shared_space = FSP_FLAGS_GET_SHARED(flags);
5450   fil_space_t *space = nullptr;
5451 
5452   ut_ad(!fsp_is_system_tablespace(space_id));
5453   ut_ad(!fsp_is_global_temporary(space_id));
5454   ut_a(fsp_flags_is_valid(flags));
5455   ut_a(type == FIL_TYPE_TEMPORARY || type == FIL_TYPE_TABLESPACE);
5456 
5457   const page_size_t page_size(flags);
5458 
5459   /* Create the subdirectories in the path, if they are
5460   not there already. */
5461   if (!has_shared_space) {
5462     err = os_file_create_subdirs_if_needed(path);
5463 
5464     if (err != DB_SUCCESS) {
5465       return (err);
5466     }
5467   }
5468 
5469   file = os_file_create(
5470       type == FIL_TYPE_TEMPORARY ? innodb_temp_file_key : innodb_data_file_key,
5471       path, OS_FILE_CREATE | OS_FILE_ON_ERROR_NO_EXIT, OS_FILE_NORMAL,
5472       OS_DATA_FILE, srv_read_only_mode && (type != FIL_TYPE_TEMPORARY),
5473       &success);
5474 
5475   if (!success) {
5476     /* The following call will print an error message */
5477     ulint error = os_file_get_last_error(true);
5478 
5479     ib::error(ER_IB_MSG_301, path);
5480 
5481     switch (error) {
5482       case OS_FILE_ALREADY_EXISTS:
5483 #ifndef UNIV_HOTBACKUP
5484         ib::error(ER_IB_MSG_UNEXPECTED_FILE_EXISTS, path, path);
5485         return (DB_TABLESPACE_EXISTS);
5486 #else  /* !UNIV_HOTBACKUP */
5487         return (DB_SUCCESS); /* Already existing file not an error here. */
5488 #endif /* !UNIV_HOTBACKUP */
5489 
5490       case OS_FILE_NAME_TOO_LONG:
5491         ib::error(ER_IB_MSG_TOO_LONG_PATH, path);
5492         return (DB_TOO_LONG_PATH);
5493 
5494       case OS_FILE_DISK_FULL:
5495         return (DB_OUT_OF_DISK_SPACE);
5496 
5497       default:
5498         return (DB_ERROR);
5499     }
5500   }
5501 
5502   bool atomic_write;
5503 
5504 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
5505   if (fil_fusionio_enable_atomic_write(file)) {
5506     int ret = posix_fallocate(file.m_file, 0, size * page_size.physical());
5507 
5508     if (ret != 0) {
5509       ib::error(ER_IB_MSG_303, path, ulonglong{size * page_size.physical()},
5510                 ret, REFMAN);
5511       success = false;
5512     } else {
5513       success = true;
5514     }
5515 
5516     atomic_write = true;
5517   } else {
5518     atomic_write = false;
5519 
5520     success = os_file_set_size(path, file, 0, size * page_size.physical(),
5521                                srv_read_only_mode, true);
5522   }
5523 #else
5524   atomic_write = false;
5525 
5526   success = os_file_set_size(path, file, 0, size * page_size.physical(),
5527                              srv_read_only_mode, true);
5528 
5529 #endif /* !NO_FALLOCATE && UNIV_LINUX */
5530 
5531   if (!success) {
5532     os_file_close(file);
5533     os_file_delete(innodb_data_file_key, path);
5534     return (DB_OUT_OF_DISK_SPACE);
5535   }
5536 
5537   /* Note: We are actually punching a hole, previous contents will
5538   be lost after this call, if it succeeds. In this case the file
5539   should be full of NULs. */
5540 
5541   bool punch_hole = os_is_sparse_file_supported(path, file);
5542 
5543   if (punch_hole) {
5544     dberr_t punch_err;
5545 
5546     punch_err = os_file_punch_hole(file.m_file, 0, size * page_size.physical());
5547 
5548     if (punch_err != DB_SUCCESS) {
5549       punch_hole = false;
5550     }
5551   }
5552 
5553   /* We have to write the space id to the file immediately and flush the
5554   file to disk. This is because in crash recovery we must be aware what
5555   tablespaces exist and what are their space id's, so that we can apply
5556   the log records to the right file. It may take quite a while until
5557   buffer pool flush algorithms write anything to the file and flush it to
5558   disk. If we would not write here anything, the file would be filled
5559   with zeros from the call of os_file_set_size(), until a buffer pool
5560   flush would write to it. */
5561 
5562   buf2 = static_cast<byte *>(ut_malloc_nokey(3 * page_size.logical()));
5563 
5564   /* Align the memory for file i/o if we might have O_DIRECT set */
5565   page = static_cast<byte *>(ut_align(buf2, page_size.logical()));
5566 
5567   memset(page, '\0', page_size.logical());
5568 
5569   /* Add the UNIV_PAGE_SIZE to the table flags and write them to the
5570   tablespace header. */
5571   flags = fsp_flags_set_page_size(flags, page_size);
5572   fsp_header_init_fields(page, space_id, flags);
5573   mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, space_id);
5574 
5575   mach_write_to_4(page + FIL_PAGE_SRV_VERSION, DD_SPACE_CURRENT_SRV_VERSION);
5576   mach_write_to_4(page + FIL_PAGE_SPACE_VERSION,
5577                   DD_SPACE_CURRENT_SPACE_VERSION);
5578 
5579   IORequest request(IORequest::WRITE);
5580 
5581   if (!page_size.is_compressed()) {
5582     buf_flush_init_for_writing(nullptr, page, nullptr, 0,
5583                                fsp_is_checksum_disabled(space_id),
5584                                true /* skip_lsn_check */);
5585 
5586     err = os_file_write(request, path, file, page, 0, page_size.physical());
5587 
5588     ut_ad(err != DB_IO_NO_PUNCH_HOLE);
5589 
5590   } else {
5591     page_zip_des_t page_zip;
5592 
5593     page_zip_set_size(&page_zip, page_size.physical());
5594     page_zip.data = page + page_size.logical();
5595 #ifdef UNIV_DEBUG
5596     page_zip.m_start =
5597 #endif /* UNIV_DEBUG */
5598         page_zip.m_end = page_zip.m_nonempty = page_zip.n_blobs = 0;
5599 
5600     buf_flush_init_for_writing(nullptr, page, &page_zip, 0,
5601                                fsp_is_checksum_disabled(space_id),
5602                                true /* skip_lsn_check */);
5603 
5604     err = os_file_write(request, path, file, page_zip.data, 0,
5605                         page_size.physical());
5606 
5607     ut_a(err != DB_IO_NO_PUNCH_HOLE);
5608 
5609     punch_hole = false;
5610   }
5611 
5612   ut_free(buf2);
5613 
5614   if (err != DB_SUCCESS) {
5615     ib::error(ER_IB_MSG_304, path);
5616 
5617     os_file_close(file);
5618     os_file_delete(innodb_data_file_key, path);
5619 
5620     return (DB_ERROR);
5621   }
5622 
5623   success = os_file_flush(file);
5624 
5625   if (!success) {
5626     ib::error(ER_IB_MSG_305, path);
5627 
5628     os_file_close(file);
5629     os_file_delete(innodb_data_file_key, path);
5630     return (DB_ERROR);
5631   }
5632 
5633   space = fil_space_create(name, space_id, flags, type);
5634 
5635   if (space == nullptr) {
5636     os_file_close(file);
5637     os_file_delete(innodb_data_file_key, path);
5638     return (DB_ERROR);
5639   }
5640 
5641   DEBUG_SYNC_C("fil_ibd_created_space");
5642 
5643   auto shard = fil_system->shard_by_id(space_id);
5644 
5645   fil_node_t *file_node =
5646       shard->create_node(path, size, space, false, punch_hole, atomic_write);
5647 
5648   err = (file_node == nullptr) ? DB_ERROR : DB_SUCCESS;
5649 
5650 #if !defined(UNIV_HOTBACKUP) && !defined(XTRABACKUP)
5651   /* Temporary tablespace creation need not be redo logged */
5652   if (err == DB_SUCCESS && type != FIL_TYPE_TEMPORARY) {
5653     const auto &file = space->files.front();
5654 
5655     mtr_t mtr;
5656 
5657     mtr_start(&mtr);
5658 
5659     fil_op_write_log(MLOG_FILE_CREATE, space_id, file.name, nullptr,
5660                      space->flags, &mtr);
5661 
5662     mtr_commit(&mtr);
5663 
5664     DBUG_EXECUTE_IF("fil_ibd_create_log", log_make_latest_checkpoint(););
5665   }
5666 
5667 #endif /* !UNIV_HOTBACKUP && !XTRABACKUP */
5668 
5669   /* For encryption tablespace, initial encryption information. */
5670   if (space != nullptr && FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
5671     err = fil_set_encryption(space->id, Encryption::AES, nullptr, nullptr);
5672 
5673     ut_ad(err == DB_SUCCESS);
5674   }
5675 
5676   os_file_close(file);
5677   if (err != DB_SUCCESS) {
5678     os_file_delete(innodb_data_file_key, path);
5679   }
5680 
5681   return (err);
5682 }
5683 
5684 /** Create a IBD tablespace file.
5685 @param[in]	space_id	Tablespace ID
5686 @param[in]	name		Tablespace name in dbname/tablename format.
5687                                 For general tablespaces, the 'dbname/' part
5688                                 may be missing.
5689 @param[in]	path		Path and filename of the datafile to create.
5690 @param[in]	flags		Tablespace flags
5691 @param[in]	size		Initial size of the tablespace file in pages,
5692                                 must be >= FIL_IBD_FILE_INITIAL_SIZE
5693 @return DB_SUCCESS or error code */
fil_ibd_create(space_id_t space_id,const char * name,const char * path,uint32_t flags,page_no_t size)5694 dberr_t fil_ibd_create(space_id_t space_id, const char *name, const char *path,
5695                        uint32_t flags, page_no_t size) {
5696   ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
5697   ut_ad(!srv_read_only_mode);
5698   return (fil_create_tablespace(space_id, name, path, flags, size,
5699                                 FIL_TYPE_TABLESPACE));
5700 }
5701 
5702 /** Create a session temporary tablespace (IBT) file.
5703 @param[in]	space_id	Tablespace ID
5704 @param[in]	name		Tablespace name
5705 @param[in]	path		Path and filename of the datafile to create.
5706 @param[in]	flags		Tablespace flags
5707 @param[in]	size		Initial size of the tablespace file in pages,
5708                                 must be >= FIL_IBT_FILE_INITIAL_SIZE
5709 @return DB_SUCCESS or error code */
fil_ibt_create(space_id_t space_id,const char * name,const char * path,uint32_t flags,page_no_t size)5710 dberr_t fil_ibt_create(space_id_t space_id, const char *name, const char *path,
5711                        uint32_t flags, page_no_t size) {
5712   ut_a(size >= FIL_IBT_FILE_INITIAL_SIZE);
5713   return (fil_create_tablespace(space_id, name, path, flags, size,
5714                                 FIL_TYPE_TEMPORARY));
5715 }
5716 
fil_replace_tablespace(space_id_t old_space_id,space_id_t new_space_id,page_no_t size_in_pages)5717 bool fil_replace_tablespace(space_id_t old_space_id, space_id_t new_space_id,
5718                             page_no_t size_in_pages) {
5719   auto space = fil_space_get(old_space_id);
5720   std::string space_name(space->name);
5721   std::string file_name(space->files.front().name);
5722 
5723   /* Mark the old tablespace to be deleted. We defer the actual deletion
5724   to avoid concurrency bottleneck.  Leave the pages in the buffer pool
5725   and record the lsn in fil_space_t::m_deleted_lsn. */
5726   dberr_t err = fil_delete_tablespace(old_space_id, BUF_REMOVE_NONE);
5727 
5728   if (err != DB_SUCCESS) {
5729     return (false);
5730   }
5731 
5732   ulint flags = fsp_flags_init(univ_page_size, false, false, false, false);
5733 
5734   /* Create the new UNDO tablespace. */
5735   err =
5736       fil_create_tablespace(new_space_id, space_name.c_str(), file_name.c_str(),
5737                             flags, size_in_pages, FIL_TYPE_TABLESPACE);
5738 
5739   return (err == DB_SUCCESS);
5740 }
5741 
5742 #ifndef UNIV_HOTBACKUP
5743 
5744 /** Open a single-table tablespace and optionally check the space id is
5745 right in it. If not successful, print an error message to the error log. This
5746 function is used to open a tablespace when we start up mysqld, and also in
5747 IMPORT TABLESPACE.
5748 NOTE that we assume this operation is used either at the database startup
5749 or under the protection of the dictionary mutex, so that two users cannot
5750 race here.
5751 
5752 The fil_node_t::handle will not be left open.
5753 
5754 @param[in]	validate	whether we should validate the tablespace
5755                                 (read the first page of the file and
5756                                 check that the space id in it matches id)
5757 @param[in]	purpose		FIL_TYPE_TABLESPACE or FIL_TYPE_TEMPORARY
5758 @param[in]	space_id	Tablespace ID
5759 @param[in]	flags		tablespace flags
5760 @param[in]	space_name	tablespace name of the datafile
5761                                 If file-per-table, it is the table name in
5762                                 the databasename/tablename format
5763 @param[in]	table_name	table name in case if need to construct
5764                                 file path
5765 @param[in]	path_in		expected filepath, usually read from dictionary
5766 @param[in]	strict		whether to report error when open ibd failed
5767 @param[in]	old_space	whether it is a 5.7 tablespace opening
5768                                 by upgrade
5769 @return DB_SUCCESS or error code */
fil_ibd_open(bool validate,fil_type_t purpose,space_id_t space_id,uint32_t flags,const char * space_name,const char * table_name,const char * path_in,bool strict,bool old_space)5770 dberr_t fil_ibd_open(bool validate, fil_type_t purpose, space_id_t space_id,
5771                      uint32_t flags, const char *space_name,
5772                      const char *table_name, const char *path_in, bool strict,
5773                      bool old_space) {
5774   Datafile df;
5775   bool is_encrypted = FSP_FLAGS_GET_ENCRYPTION(flags);
5776   bool for_import = (purpose == FIL_TYPE_IMPORT);
5777 
5778   ut_ad(fil_type_is_data(purpose));
5779 
5780   if (!fsp_flags_is_valid(flags)) {
5781     return (DB_CORRUPTION);
5782   }
5783 
5784   /* Check if the file is already open. The space can be loaded
5785   via fil_space_get_first_path() on startup. This is a problem
5786   for partitioning code. It's a convoluted call graph via the DD.
5787   On Windows this can lead to a sharing violation when we attempt
5788   to open it again. */
5789 
5790   auto shard = fil_system->shard_by_id(space_id);
5791 
5792   shard->mutex_acquire();
5793 
5794   auto space = shard->get_space_by_id(space_id);
5795 
5796   if (space != nullptr) {
5797     shard->space_detach(space);
5798     shard->space_delete(space->id);
5799     shard->space_free_low(space);
5800     ut_a(space == nullptr);
5801   }
5802 
5803   shard->mutex_release();
5804 
5805   df.init(space_name, flags);
5806 
5807   if (path_in == nullptr) {
5808     df.make_filepath(nullptr, space_name, IBD);
5809   } else {
5810     df.set_filepath(path_in);
5811   }
5812 
5813   /* Attempt to open the tablespace. */
5814   if (df.open_read_only(strict) == DB_SUCCESS) {
5815     ut_ad(df.is_open());
5816   } else {
5817     ut_ad(!df.is_open());
5818     return (DB_CANNOT_OPEN_FILE);
5819   }
5820 
5821 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
5822   const bool atomic_write =
5823       !dblwr::enabled && fil_fusionio_enable_atomic_write(df.handle());
5824 #else
5825   const bool atomic_write = false;
5826 #endif /* !NO_FALLOCATE && UNIV_LINUX */
5827 
5828   dberr_t err;
5829 
5830   if ((validate || is_encrypted) &&
5831       (err = df.validate_to_dd(space_id, flags, for_import)) != DB_SUCCESS) {
5832     /* We don't reply the rename via the redo log anymore.
5833     Therefore we can get a space ID mismatch when validating
5834     the files during bootstrap. */
5835 
5836     if (!is_encrypted && err != DB_WRONG_FILE_NAME) {
5837       /* The following call prints an error message.
5838       For encrypted tablespace we skip print, since it should
5839       be keyring plugin issues. */
5840 
5841       os_file_get_last_error(true);
5842 
5843       ib::error(ER_IB_MSG_306, space_name, TROUBLESHOOT_DATADICT_MSG);
5844     }
5845 
5846     return (err);
5847   }
5848 
5849   /* If the encrypted tablespace is already opened,
5850   return success. */
5851   if (validate && is_encrypted && fil_space_get(space_id)) {
5852     return (DB_SUCCESS);
5853   }
5854 
5855   /* We pass UNINITIALIZED flags while we try to open DD tablespace. In that
5856   case, set the flags now based on what is read from disk.*/
5857   if (FSP_FLAGS_ARE_NOT_SET(flags) && fsp_is_dd_tablespace(space_id)) {
5858     flags = df.flags();
5859     is_encrypted = FSP_FLAGS_GET_ENCRYPTION(flags);
5860   }
5861 
5862   space = fil_space_create(space_name, space_id, flags, purpose);
5863 
5864   if (space == nullptr) {
5865     return (DB_ERROR);
5866   }
5867 
5868   /* We do not measure the size of the file, that is why
5869   we pass the 0 below */
5870 
5871   const fil_node_t *file =
5872       shard->create_node(df.filepath(), 0, space, false, atomic_write, false);
5873 
5874   if (file == nullptr) {
5875     return (DB_ERROR);
5876   }
5877 
5878   if (validate && !old_space && !for_import) {
5879     if (df.server_version() > DD_SPACE_CURRENT_SRV_VERSION) {
5880       ib::error(ER_IB_MSG_1272, ulong{DD_SPACE_CURRENT_SRV_VERSION},
5881                 ulonglong{df.server_version()});
5882       /* Server version is less than the tablespace server version.
5883       We don't support downgrade for 8.0 server, so report error */
5884       return (DB_SERVER_VERSION_LOW);
5885     }
5886     ut_ad(df.space_version() == DD_SPACE_CURRENT_SPACE_VERSION);
5887   }
5888 
5889   /* Set unencryption in progress flag */
5890   space->encryption_op_in_progress = df.m_encryption_op_in_progress;
5891 
5892   /* Its possible during Encryption processing, space flag for encryption
5893   has been updated in ibd file but server crashed before DD flags are
5894   updated. Thus, consider ibd setting too for encryption.
5895 
5896   It is safe because m_encryption_op_in_progress will be set to NONE
5897   always unless there is a crash before finishing Encryption. */
5898   if (space->encryption_op_in_progress == ENCRYPTION) {
5899     space->flags |= flags & FSP_FLAGS_MASK_ENCRYPTION;
5900   }
5901 
5902   /* For encryption tablespace, initialize encryption information.*/
5903   if ((is_encrypted || space->encryption_op_in_progress == ENCRYPTION) &&
5904       !for_import) {
5905     dberr_t err;
5906     byte *iv = df.m_encryption_iv;
5907     byte *key = df.m_encryption_key;
5908 
5909     err = fil_set_encryption(space->id, Encryption::AES, key, iv);
5910 
5911     if (err != DB_SUCCESS) {
5912       return (DB_ERROR);
5913     }
5914   }
5915 
5916   return (DB_SUCCESS);
5917 }
5918 
5919 #else  /* !UNIV_HOTBACKUP */
5920 
5921 /** Allocates a file name for an old version of a single-table tablespace.
5922 The string must be freed by caller with ut_free()!
5923 @param[in]	name		Original file name
5924 @return own: file name */
meb_make_ibbackup_old_name(const char * name)5925 static char *meb_make_ibbackup_old_name(const char *name) {
5926   char *path;
5927   ulint len = strlen(name);
5928   static const char suffix[] = "_ibbackup_old_vers_";
5929 
5930   path = static_cast<char *>(ut_malloc_nokey(len + 15 + sizeof(suffix)));
5931 
5932   memcpy(path, name, len);
5933   memcpy(path + len, suffix, sizeof(suffix) - 1);
5934 
5935   meb_sprintf_timestamp_without_extra_chars(path + len + sizeof(suffix) - 1);
5936 
5937   return (path);
5938 }
5939 #endif /* UNIV_HOTBACKUP */
5940 
5941 /** Looks for a pre-existing fil_space_t with the given tablespace ID
5942 and, if found, returns the name and filepath in newly allocated buffers
5943 that the caller must free.
5944 @param[in]	space_id	The tablespace ID to search for.
5945 @param[out]	name		Name of the tablespace found.
5946 @param[out]	filepath	The filepath of the first datafile for the
5947 tablespace.
5948 @return true if tablespace is found, false if not. */
fil_space_read_name_and_filepath(space_id_t space_id,char ** name,char ** filepath)5949 bool fil_space_read_name_and_filepath(space_id_t space_id, char **name,
5950                                       char **filepath) {
5951   bool success = false;
5952 
5953   *name = nullptr;
5954   *filepath = nullptr;
5955 
5956   auto shard = fil_system->shard_by_id(space_id);
5957 
5958   shard->mutex_acquire();
5959 
5960   fil_space_t *space = shard->get_space_by_id(space_id);
5961 
5962   if (space != nullptr) {
5963     *name = mem_strdup(space->name);
5964 
5965     *filepath = mem_strdup(space->files.front().name);
5966 
5967     success = true;
5968   }
5969 
5970   shard->mutex_release();
5971 
5972   return (success);
5973 }
5974 
5975 /** Convert a file name to a tablespace name. Strip the file name
5976 prefix and suffix, leaving only databasename/tablename.
5977 @param[in]	filename	directory/databasename/tablename.ibd
5978 @return database/tablename string, to be freed with ut_free() */
fil_path_to_space_name(const char * filename)5979 char *fil_path_to_space_name(const char *filename) {
5980   std::string path{filename};
5981   auto pos = path.find_last_of(Fil_path::SEPARATOR);
5982 
5983   ut_a(pos != std::string::npos && !Fil_path::is_separator(path.back()));
5984 
5985   std::string db_name = path.substr(0, pos);
5986   std::string space_name = path.substr(pos + 1, path.length());
5987 
5988   /* If it is a path such as a/b/c.ibd, ignore everything before 'b'. */
5989   pos = db_name.find_last_of(Fil_path::SEPARATOR);
5990 
5991   if (pos != std::string::npos) {
5992     db_name = db_name.substr(pos + 1);
5993   }
5994 
5995   char *name;
5996 
5997   if (Fil_path::has_suffix(IBD, space_name)) {
5998     /* fil_space_t::name always uses '/' . */
5999 
6000     path = db_name;
6001     path.push_back('/');
6002 
6003     /* Strip the ".ibd" suffix. */
6004     path.append(space_name.substr(0, space_name.length() - 4));
6005 
6006     name = mem_strdupl(path.c_str(), path.length());
6007 
6008   } else {
6009     /* Must have an "undo" prefix. */
6010     ut_ad(space_name.find("undo") == 0);
6011 
6012     name = mem_strdupl(space_name.c_str(), space_name.length());
6013   }
6014 
6015   return (name);
6016 }
6017 
6018 /** Open an ibd tablespace and add it to the InnoDB data structures.
6019 This is similar to fil_ibd_open() except that it is used while processing
6020 the redo and DDL log, so the data dictionary is not available and very little
6021 validation is done. The tablespace name is extracted from the
6022 dbname/tablename.ibd portion of the filename, which assumes that the file
6023 is a file-per-table tablespace.  Any name will do for now.  General
6024 tablespace names will be read from the dictionary after it has been
6025 recovered.  The tablespace flags are read at this time from the first page
6026 of the file in validate_for_recovery().
6027 @param[in]	space_id	tablespace ID
6028 @param[in]	path		path/to/databasename/tablename.ibd
6029 @param[out]	space		the tablespace, or nullptr on error
6030 @return status of the operation */
ibd_open_for_recovery(space_id_t space_id,const std::string & path,fil_space_t * & space)6031 fil_load_status Fil_shard::ibd_open_for_recovery(space_id_t space_id,
6032                                                  const std::string &path,
6033                                                  fil_space_t *&space) {
6034   /* If the a space is already in the file system cache with this
6035   space ID, then there is nothing to do. */
6036 
6037   mutex_acquire();
6038 
6039   space = get_space_by_id(space_id);
6040 
6041   mutex_release();
6042 
6043   const char *filename = path.c_str();
6044 
6045   if (space != nullptr) {
6046     ut_a(space->files.size() == 1);
6047 
6048     const auto &file = space->files.front();
6049 
6050     /* Compare the real paths. */
6051     if (Fil_path::is_same_as(filename, file.name)) {
6052       return (FIL_LOAD_OK);
6053     }
6054 
6055 #ifdef UNIV_HOTBACKUP
6056     ib::trace_2() << "Ignoring data file '" << filename << "' with space ID "
6057                   << space->id << ". Another data file called '" << file.name
6058                   << "' exists with the same space ID";
6059 #else  /* UNIV_HOTBACKUP */
6060     ib::info(ER_IB_MSG_307, filename, ulong{space->id}, file.name);
6061 #endif /* UNIV_HOTBACKUP */
6062 
6063     space = nullptr;
6064 
6065     return (FIL_LOAD_ID_CHANGED);
6066   }
6067 
6068   Datafile df;
6069 
6070   df.set_filepath(filename);
6071 
6072   if (df.open_read_only(false) != DB_SUCCESS) {
6073     return (FIL_LOAD_NOT_FOUND);
6074   }
6075 
6076   ut_ad(df.is_open());
6077 
6078   /* Read and validate the first page of the tablespace.
6079   Assign a tablespace name based on the tablespace type. */
6080   dberr_t err = df.validate_for_recovery(space_id);
6081 
6082   ut_a(err == DB_SUCCESS || err == DB_INVALID_ENCRYPTION_META);
6083   if (err == DB_INVALID_ENCRYPTION_META) {
6084     bool success = fil_system->erase_path(space_id);
6085     ut_a(success);
6086     return (FIL_LOAD_INVALID_ENCRYPTION_META);
6087   }
6088 
6089   ut_a(df.space_id() == space_id);
6090 
6091   /* Get and test the file size. */
6092   os_offset_t size = os_file_get_size(df.handle());
6093 
6094   /* Every .ibd file is created >= 4 pages in size.
6095   Smaller files cannot be OK. */
6096   os_offset_t minimum_size;
6097 
6098   /* Every .ibd file is created >= FIL_IBD_FILE_INITIAL_SIZE
6099   pages in size. Smaller files cannot be OK. */
6100   {
6101     const page_size_t page_size(df.flags());
6102 
6103     minimum_size = FIL_IBD_FILE_INITIAL_SIZE * page_size.physical();
6104   }
6105 
6106   if (size == static_cast<os_offset_t>(-1)) {
6107     /* The following call prints an error message */
6108     os_file_get_last_error(true);
6109 
6110     ib::error(ER_IB_MSG_308) << "Could not measure the size of"
6111                                 " single-table tablespace file '"
6112                              << df.filepath() << "'";
6113 
6114   } else if (size < minimum_size) {
6115 #ifndef UNIV_HOTBACKUP
6116     ib::error(ER_IB_MSG_309)
6117         << "The size of tablespace file '" << df.filepath() << "' is only "
6118         << size << ", should be at least " << minimum_size << "!";
6119 #else
6120     /* In MEB, we work around this error. */
6121     df.set_space_id(SPACE_UNKNOWN);
6122     df.set_flags(0);
6123 #endif /* !UNIV_HOTBACKUP */
6124   }
6125 
6126   ut_ad(space == nullptr);
6127 
6128 #ifdef UNIV_HOTBACKUP
6129   if (df.space_id() == SPACE_UNKNOWN || df.space_id() == 0) {
6130     char *new_path;
6131 
6132     ib::info(ER_IB_MSG_310)
6133         << "Renaming tablespace file '" << df.filepath() << "' with space ID "
6134         << df.space_id() << " to " << df.name()
6135         << "_ibbackup_old_vers_<timestamp>"
6136            " because its size "
6137         << df.size()
6138         << " is too small"
6139            " (< 4 pages 16 kB each), or the space id in the"
6140            " file header is not sensible. This can happen in"
6141            " an mysqlbackup run, and is not dangerous.";
6142     df.close();
6143 
6144     new_path = meb_make_ibbackup_old_name(df.filepath());
6145 
6146     bool success =
6147         os_file_rename(innodb_data_file_key, df.filepath(), new_path);
6148 
6149     ut_a(success);
6150 
6151     ut_free(new_path);
6152 
6153     return (FIL_LOAD_ID_CHANGED);
6154   }
6155 
6156   /* A backup may contain the same space several times, if the space got
6157   renamed at a sensitive time. Since it is enough to have one version of
6158   the space, we rename the file if a space with the same space id
6159   already exists in the tablespace memory cache. We rather rename the
6160   file than delete it, because if there is a bug, we do not want to
6161   destroy valuable data. */
6162 
6163   mutex_acquire();
6164 
6165   space = get_space_by_id(space_id);
6166 
6167   mutex_release();
6168 
6169   if (space != nullptr) {
6170     ib::info(ER_IB_MSG_311)
6171         << "Renaming data file '" << df.filepath() << "' with space ID "
6172         << space_id << " to " << df.name()
6173         << "_ibbackup_old_vers_<timestamp> because space " << space->name
6174         << " with the same id was scanned"
6175            " earlier. This can happen if you have renamed tables"
6176            " during an mysqlbackup run.";
6177 
6178     df.close();
6179 
6180     char *new_path = meb_make_ibbackup_old_name(df.filepath());
6181 
6182     bool success =
6183         os_file_rename(innodb_data_file_key, df.filepath(), new_path);
6184 
6185     ut_a(success);
6186 
6187     ut_free(new_path);
6188     return (FIL_LOAD_OK);
6189   }
6190 #endif /* UNIV_HOTBACKUP */
6191   std::string tablespace_name(df.name());
6192 
6193   /* During the apply-log operation, MEB already has translated the
6194   file name, so file name to space name conversion is not required. */
6195 #ifndef UNIV_HOTBACKUP
6196   dict_name::convert_to_space(tablespace_name);
6197 #endif /* !UNIV_HOTBACKUP */
6198 
6199   fil_system->mutex_acquire_all();
6200 
6201   space = space_create(tablespace_name.c_str(), space_id, df.flags(),
6202                        FIL_TYPE_TABLESPACE);
6203 
6204   fil_system->mutex_release_all();
6205 
6206   if (space == nullptr) {
6207     return (FIL_LOAD_INVALID);
6208   }
6209 
6210   ut_ad(space->id == df.space_id());
6211   ut_ad(space->id == space_id);
6212 
6213   /* We do not use the size information we have about the file, because
6214   the rounding formula for extents and pages is somewhat complex; we
6215   let create_node() do that task. */
6216 
6217   const fil_node_t *file;
6218 
6219   file = create_node(df.filepath(), 0, space, false, true, false);
6220 
6221   ut_a(file != nullptr);
6222 
6223   /* For encryption tablespace, initial encryption information. */
6224   if (FSP_FLAGS_GET_ENCRYPTION(space->flags) &&
6225       df.m_encryption_key != nullptr) {
6226     dberr_t err = fil_set_encryption(space->id, Encryption::AES,
6227                                      df.m_encryption_key, df.m_encryption_iv);
6228 
6229     if (err != DB_SUCCESS) {
6230       ib::error(ER_IB_MSG_312, space->name);
6231     }
6232   }
6233 
6234   if (FSP_FLAGS_GET_ENCRYPTION(space->flags) && !srv_backup_mode &&
6235       use_dumped_tablespace_keys) {
6236     err = xb_set_encryption(space);
6237     if (err != DB_SUCCESS) {
6238       ib::error() << "Cannot find encryption key for tablespace '%s'."
6239                   << space->name;
6240       return (FIL_LOAD_INVALID);
6241     }
6242   }
6243 
6244   /* Set unencryption in progress flag */
6245   space->encryption_op_in_progress = df.m_encryption_op_in_progress;
6246 
6247   return (FIL_LOAD_OK);
6248 }
6249 
6250 /** Open an ibd tablespace and add it to the InnoDB data structures.
6251 This is similar to fil_ibd_open() except that it is used while processing
6252 the redo log, so the data dictionary is not available and very little
6253 validation is done. The tablespace name is extracted from the
6254 dbname/tablename.ibd portion of the filename, which assumes that the file
6255 is a file-per-table tablespace.  Any name will do for now.  General
6256 tablespace names will be read from the dictionary after it has been
6257 recovered.  The tablespace flags are read at this time from the first page
6258 of the file in validate_for_recovery().
6259 @param[in]	space_id	tablespace ID
6260 @param[in]	path		path/to/databasename/tablename.ibd
6261 @param[out]	space		the tablespace, or nullptr on error
6262 @return status of the operation */
ibd_open_for_recovery(space_id_t space_id,const std::string & path,fil_space_t * & space)6263 fil_load_status Fil_system::ibd_open_for_recovery(space_id_t space_id,
6264                                                   const std::string &path,
6265                                                   fil_space_t *&space) {
6266   /* System tablespace open should never come here. It should be
6267   opened explicitly using the config path. */
6268   ut_a(space_id != TRX_SYS_SPACE);
6269 
6270 #ifndef UNIV_HOTBACKUP
6271   /* Do not attempt to open or load for recovery any undo tablespace that
6272   is currently being truncated. */
6273   if (fsp_is_undo_tablespace(space_id) &&
6274       undo::is_active_truncate_log_present(undo::id2num(space_id))) {
6275     return (FIL_LOAD_NOT_FOUND);
6276   }
6277 #endif /* !UNIV_HOTBACKUP */
6278 
6279   auto shard = shard_by_id(space_id);
6280 
6281   return (shard->ibd_open_for_recovery(space_id, path, space));
6282 }
6283 
6284 #ifndef UNIV_HOTBACKUP
6285 
6286 /** Report that a tablespace for a table was not found.
6287 @param[in]	name		Table name
6288 @param[in]	space_id	Table's space ID */
fil_report_missing_tablespace(const char * name,space_id_t space_id)6289 static void fil_report_missing_tablespace(const char *name,
6290                                           space_id_t space_id) {
6291   ib::error(ER_IB_MSG_313)
6292       << "Table " << name << " in the InnoDB data dictionary has tablespace id "
6293       << space_id
6294       << ","
6295          " but tablespace with that id or name does not exist. Have"
6296          " you deleted or moved .ibd files?";
6297 }
6298 
adjust_space_name(fil_space_t * space,const char * dd_space_name)6299 bool Fil_shard::adjust_space_name(fil_space_t *space,
6300                                   const char *dd_space_name) {
6301   if (!strcmp(space->name, dd_space_name)) {
6302     return (true);
6303   }
6304 
6305   bool replace_general =
6306       FSP_FLAGS_GET_SHARED(space->flags) &&
6307       0 == strncmp(space->name, general_space_name, strlen(general_space_name));
6308   bool replace_undo =
6309       fsp_is_undo_tablespace(space->id) &&
6310       0 == strncmp(space->name, undo_space_name, strlen(undo_space_name));
6311 
6312   /* Update the auto-generated fil_space_t::name */
6313   if (replace_general || replace_undo) {
6314     char *old_space_name = space->name;
6315     char *new_space_name = mem_strdup(dd_space_name);
6316 
6317     update_space_name_map(space, new_space_name);
6318 
6319     space->name = new_space_name;
6320 
6321     ut_free(old_space_name);
6322   }
6323 
6324   /* Update the undo::Tablespace::name. Since the fil_shard mutex is held by
6325   the caller, it would be a sync order violation to get undo::spaces->s_lock.
6326   It is OK to skip this s_lock since this occurs during boot_tablespaces()
6327   which is still single threaded. */
6328   if (replace_undo) {
6329     space_id_t space_num = undo::id2num(space->id);
6330     undo::Tablespace *undo_space = undo::spaces->find(space_num);
6331     undo_space->set_space_name(dd_space_name);
6332   }
6333 
6334   return (replace_general || replace_undo);
6335 }
6336 
6337 /** Returns true if a matching tablespace exists in the InnoDB tablespace
6338 memory cache.
6339 @param[in]	space_id		Tablespace ID
6340 @param[in]	name			Tablespace name used in
6341                                         fil_space_create().
6342 @param[in]	print_err		Print detailed error information to the
6343                                         error log if a matching tablespace is
6344                                         not found from memory.
6345 @param[in]	adjust_space		Whether to adjust space id on mismatch
6346 @param[in]	heap			Heap memory
6347 @param[in]	table_id		table id
6348 @return true if a matching tablespace exists in the memory cache */
space_check_exists(space_id_t space_id,const char * name,bool print_err,bool adjust_space,mem_heap_t * heap,table_id_t table_id)6349 bool Fil_shard::space_check_exists(space_id_t space_id, const char *name,
6350                                    bool print_err, bool adjust_space,
6351                                    mem_heap_t *heap, table_id_t table_id) {
6352   fil_space_t *fnamespace = nullptr;
6353 
6354   mutex_acquire();
6355 
6356   /* Look if there is a space with the same id */
6357   fil_space_t *space = get_space_by_id(space_id);
6358 
6359   /* name is nullptr when replaying a DELETE ddl log. */
6360   if (name == nullptr) {
6361     mutex_release();
6362     return (space != nullptr);
6363   }
6364 
6365   if (space != nullptr) {
6366     /* No need to check a general tablespace name if the DD
6367     is not yet available. */
6368     if (!srv_sys_tablespaces_open && FSP_FLAGS_GET_SHARED(space->flags)) {
6369       mutex_release();
6370       return (true);
6371     }
6372 
6373     /* Sometimes the name has been auto-generated when the
6374     datafile is discovered and needs to be adjusted to that
6375     of the DD. This happens for general and undo tablespaces. */
6376     if (srv_sys_tablespaces_open && adjust_space &&
6377         adjust_space_name(space, name)) {
6378       mutex_release();
6379       return (true);
6380     }
6381 
6382     /* If this space has the expected name, use it. */
6383     fnamespace = get_space_by_name(name);
6384 
6385     if (space == fnamespace) {
6386       /* Found */
6387       mutex_release();
6388       return (true);
6389     }
6390   }
6391 
6392   /* Info from "fnamespace" comes from the ibd file itself, it can
6393   be different from data obtained from System tables since file
6394   operations are not transactional. If adjust_space is set, and the
6395   mismatching space are between a user table and its temp table, we
6396   shall adjust the ibd file name according to system table info */
6397   if (adjust_space && space != nullptr &&
6398       row_is_mysql_tmp_table_name(space->name) &&
6399       !row_is_mysql_tmp_table_name(name)) {
6400     /* Atomic DDL's "ddl_log" will adjust the tablespace name. */
6401     mutex_release();
6402 
6403     return (true);
6404 
6405   } else if (!print_err) {
6406     ;
6407 
6408   } else if (space == nullptr) {
6409     if (fnamespace == nullptr) {
6410       if (print_err) {
6411         fil_report_missing_tablespace(name, space_id);
6412       }
6413 
6414     } else {
6415       ib::error(ER_IB_MSG_314)
6416           << "Table " << name
6417           << " in InnoDB data"
6418              " dictionary has tablespace id "
6419           << space_id
6420           << ", but a tablespace with that id does not"
6421              " exist. There is a tablespace of name "
6422           << fnamespace->name << " and id " << fnamespace->id
6423           << ", though. Have you"
6424              " deleted or moved .ibd files?";
6425     }
6426 
6427     ib::warn(ER_IB_MSG_315) << TROUBLESHOOT_DATADICT_MSG;
6428 
6429   } else if (0 != strcmp(space->name, name)) {
6430     ib::error(ER_IB_MSG_316) << "Table " << name
6431                              << " in InnoDB data dictionary"
6432                                 " has tablespace id "
6433                              << space_id
6434                              << ", but the"
6435                                 " tablespace with that id has name "
6436                              << space->name
6437                              << ". Have you deleted or moved .ibd"
6438                                 " files?";
6439 
6440     if (fnamespace != nullptr) {
6441       ib::error(ER_IB_MSG_317) << "There is a tablespace with the right"
6442                                   " name: "
6443                                << fnamespace->name
6444                                << ", but its id"
6445                                   " is "
6446                                << fnamespace->id << ".";
6447     }
6448 
6449     ib::warn(ER_IB_MSG_318) << TROUBLESHOOT_DATADICT_MSG;
6450   }
6451 
6452   mutex_release();
6453 
6454   return (false);
6455 }
6456 
6457 /** Returns true if a matching tablespace exists in the InnoDB tablespace
6458 memory cache.
6459 @param[in]	space_id	Tablespace ID
6460 @param[in]	name		Tablespace name used in space_create().
6461 @param[in]	print_err	Print detailed error information to the
6462                                 error log if a matching tablespace is
6463                                 not found from memory.
6464 @param[in]	adjust_space	Whether to adjust space id on mismatch
6465 @param[in]	heap		Heap memory
6466 @param[in]	table_id	table ID
6467 @return true if a matching tablespace exists in the memory cache */
fil_space_exists_in_mem(space_id_t space_id,const char * name,bool print_err,bool adjust_space,mem_heap_t * heap,table_id_t table_id)6468 bool fil_space_exists_in_mem(space_id_t space_id, const char *name,
6469                              bool print_err, bool adjust_space,
6470                              mem_heap_t *heap, table_id_t table_id) {
6471   auto shard = fil_system->shard_by_id(space_id);
6472 
6473   return (shard->space_check_exists(space_id, name, print_err, adjust_space,
6474                                     heap, table_id));
6475 }
6476 #endif /* !UNIV_HOTBACKUP */
6477 
6478 /** Return the space ID based on the tablespace name.
6479 The tablespace must be found in the tablespace memory cache.
6480 @param[in]	name		Tablespace name
6481 @return space ID if tablespace found, SPACE_UNKNOWN if space not. */
fil_space_get_id_by_name(const char * name)6482 space_id_t fil_space_get_id_by_name(const char *name) {
6483   auto space = fil_system->get_space_by_name(name);
6484 
6485   return ((space == nullptr) ? SPACE_UNKNOWN : space->id);
6486 }
6487 
6488 /** Fill the pages with NULs
6489 @param[in] file		Tablespace file
6490 @param[in] page_size	physical page size
6491 @param[in] start	Offset from the start of the file in bytes
6492 @param[in] len		Length in bytes
6493 @param[in] read_only_mode
6494                         if true, then read only mode checks are enforced.
6495 @return DB_SUCCESS or error code */
fil_write_zeros(const fil_node_t * file,ulint page_size,os_offset_t start,ulint len,bool read_only_mode)6496 static dberr_t fil_write_zeros(const fil_node_t *file, ulint page_size,
6497                                os_offset_t start, ulint len,
6498                                bool read_only_mode) {
6499   ut_a(len > 0);
6500 
6501   /* Extend at most 1M at a time */
6502   ulint n_bytes = ut_min(static_cast<ulint>(1024 * 1024), len);
6503 
6504   byte *ptr = reinterpret_cast<byte *>(ut_zalloc_nokey(n_bytes + page_size));
6505 
6506   byte *buf = reinterpret_cast<byte *>(ut_align(ptr, page_size));
6507 
6508   os_offset_t offset = start;
6509   dberr_t err = DB_SUCCESS;
6510   const os_offset_t end = start + len;
6511   IORequest request(IORequest::WRITE);
6512 
6513   while (offset < end) {
6514 #ifdef UNIV_HOTBACKUP
6515     err =
6516         os_file_write(request, file->name, file->handle, buf, offset, n_bytes);
6517 #else  /* UNIV_HOTBACKUP */
6518     err = os_aio_func(request, AIO_mode::SYNC, file->name, file->handle, buf,
6519                       offset, n_bytes, read_only_mode, nullptr, nullptr);
6520 #endif /* UNIV_HOTBACKUP */
6521 
6522     if (err != DB_SUCCESS) {
6523       break;
6524     }
6525 
6526     offset += n_bytes;
6527 
6528     n_bytes = ut_min(n_bytes, static_cast<ulint>(end - offset));
6529 
6530     DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension", DBUG_SUICIDE(););
6531   }
6532 
6533   ut_free(ptr);
6534 
6535   return (err);
6536 }
6537 
6538 /** Try to extend a tablespace if it is smaller than the specified size.
6539 @param[in,out]	space		tablespace
6540 @param[in]	size		desired size in pages
6541 @return whether the tablespace is at least as big as requested */
space_extend(fil_space_t * space,page_no_t size)6542 bool Fil_shard::space_extend(fil_space_t *space, page_no_t size) {
6543   /* In read-only mode we allow write to shared temporary tablespace
6544   as intrinsic table created by Optimizer reside in this tablespace. */
6545   ut_ad(!srv_read_only_mode || fsp_is_system_temporary(space->id));
6546 
6547 #ifndef UNIV_HOTBACKUP
6548   DBUG_EXECUTE_IF("fil_space_print_xdes_pages",
6549                   space->print_xdes_pages("xdes_pages.log"););
6550 #endif /* !UNIV_HOTBACKUP */
6551 
6552   fil_node_t *file;
6553   bool slot;
6554   size_t phy_page_size;
6555   bool success = true;
6556 
6557 #ifdef UNIV_HOTBACKUP
6558   page_no_t prev_size = 0;
6559 #endif /* UNIV_HOTBACKUP */
6560 
6561   for (;;) {
6562     slot = mutex_acquire_and_get_space(space->id, space);
6563 
6564     /* Note:If the file is being opened for the first time then
6565     we don't have the file physical size. There is no guarantee
6566     that the file has been opened at this stage. */
6567 
6568     if (size < space->size) {
6569       /* Space already big enough */
6570       mutex_release();
6571 
6572       if (slot) {
6573         release_open_slot(m_id);
6574       }
6575 
6576       return (true);
6577     }
6578 
6579     file = &space->files.back();
6580 
6581     page_size_t page_size(space->flags);
6582 
6583     phy_page_size = page_size.physical();
6584 
6585 #ifdef UNIV_HOTBACKUP
6586     prev_size = space->size;
6587 
6588     ib::trace_1() << "Extending space id : " << space->id
6589                   << ", space name : " << space->name
6590                   << ", space size : " << space->size
6591                   << " page, page size : " << phy_page_size;
6592 #endif /* UNIV_HOTBACKUP */
6593 
6594     if (file->in_use == 0) {
6595       /* Mark this file as undergoing extension. This flag
6596       is used by other threads to wait for the extension
6597       opereation to finish or wait for open to complete. */
6598 
6599       ++file->in_use;
6600 
6601       break;
6602     }
6603 
6604     if (slot) {
6605       release_open_slot(m_id);
6606     }
6607 
6608     /* Another thread is currently using the file. Wait
6609     for it to finish.  It'd have been better to use an event
6610     driven mechanism but the entire module is peppered with
6611     polling code. */
6612 
6613     mutex_release();
6614 
6615     os_thread_sleep(100000);
6616   }
6617 
6618   bool opened = prepare_file_for_io(file, true);
6619 
6620   if (slot) {
6621     release_open_slot(m_id);
6622   }
6623 
6624   if (!opened) {
6625     /* The tablespace data file, such as .ibd file, is missing */
6626     ut_a(file->in_use > 0);
6627     --file->in_use;
6628 
6629     mutex_release();
6630 
6631     return (false);
6632   }
6633 
6634   ut_a(file->is_open);
6635 
6636   if (size <= space->size) {
6637     ut_a(file->in_use > 0);
6638     --file->in_use;
6639 
6640     complete_io(file, IORequestRead);
6641 
6642     mutex_release();
6643 
6644     return (true);
6645   }
6646 
6647   /* At this point it is safe to release the shard mutex. No
6648   other thread can rename, delete or close the file because
6649   we have set the file->in_use flag. */
6650 
6651   mutex_release();
6652 
6653   page_no_t pages_added;
6654   os_offset_t node_start = os_file_get_size(file->handle);
6655 
6656   ut_a(node_start != (os_offset_t)-1);
6657 
6658   /* File first page number */
6659   page_no_t node_first_page = space->size - file->size;
6660 
6661   /* Number of physical pages in the file */
6662   page_no_t n_node_physical_pages =
6663       static_cast<page_no_t>(node_start / phy_page_size);
6664 
6665   /* Number of pages to extend in the file */
6666   page_no_t n_node_extend;
6667 
6668   n_node_extend = size - (node_first_page + file->size);
6669 
6670   /* If we already have enough physical pages to satisfy the
6671   extend request on the file then ignore it */
6672   if (file->size + n_node_extend > n_node_physical_pages) {
6673     DBUG_EXECUTE_IF("ib_crash_during_tablespace_extension", DBUG_SUICIDE(););
6674 
6675     os_offset_t len;
6676     dberr_t err = DB_SUCCESS;
6677 
6678     len = ((file->size + n_node_extend) * phy_page_size) - node_start;
6679 
6680     ut_ad(len > 0);
6681 
6682 #if !defined(NO_FALLOCATE) && defined(UNIV_LINUX)
6683     /* This is required by FusionIO HW/Firmware */
6684 
6685     int ret = posix_fallocate(file->handle.m_file, node_start, len);
6686 
6687     DBUG_EXECUTE_IF("ib_posix_fallocate_fail_eintr", ret = EINTR;);
6688 
6689     DBUG_EXECUTE_IF("ib_posix_fallocate_fail_einval", ret = EINVAL;);
6690 
6691     if (ret != 0) {
6692       /* We already pass the valid offset and len in, if EINVAL
6693       is returned, it could only mean that the file system doesn't
6694       support fallocate(), currently one known case is ext3 with O_DIRECT.
6695 
6696       Also because above call could be interrupted, in this case,
6697       simply go to plan B by writing zeroes.
6698 
6699       Both error messages for above two scenarios are skipped in case
6700       of flooding error messages, because they can be ignored by users. */
6701       if (ret != EINTR && ret != EINVAL) {
6702         ib::error(ER_IB_MSG_319)
6703             << "posix_fallocate(): Failed to preallocate"
6704                " data for file "
6705             << file->name << ", desired size " << len
6706             << " bytes."
6707                " Operating system error number "
6708             << ret
6709             << ". Check"
6710                " that the disk is not full or a disk quota"
6711                " exceeded. Make sure the file system supports"
6712                " this function. Some operating system error"
6713                " numbers are described at " REFMAN
6714                "operating-system-error-codes.html";
6715       }
6716 
6717       err = DB_IO_ERROR;
6718     }
6719 #endif /* NO_FALLOCATE || !UNIV_LINUX */
6720 
6721     if (!file->atomic_write || err == DB_IO_ERROR) {
6722       bool read_only_mode;
6723 
6724       read_only_mode =
6725           (space->purpose != FIL_TYPE_TEMPORARY ? false : srv_read_only_mode);
6726 
6727       err = fil_write_zeros(file, phy_page_size, node_start,
6728                             static_cast<ulint>(len), read_only_mode);
6729 
6730       if (err != DB_SUCCESS) {
6731         ib::warn(ER_IB_MSG_320)
6732             << "Error while writing " << len << " zeroes to " << file->name
6733             << " starting at offset " << node_start;
6734       }
6735     }
6736 
6737     /* Check how many pages actually added */
6738     os_offset_t end = os_file_get_size(file->handle);
6739     ut_a(end != static_cast<os_offset_t>(-1) && end >= node_start);
6740 
6741     os_has_said_disk_full = !(success = (end == node_start + len));
6742 
6743     pages_added = static_cast<page_no_t>(end / phy_page_size);
6744 
6745     ut_a(pages_added >= file->size);
6746     pages_added -= file->size;
6747 
6748   } else {
6749     success = true;
6750     pages_added = n_node_extend;
6751     os_has_said_disk_full = FALSE;
6752   }
6753 
6754   mutex_acquire();
6755 
6756   file->size += pages_added;
6757   space->size += pages_added;
6758 
6759   ut_a(file->in_use > 0);
6760   --file->in_use;
6761 
6762   complete_io(file, IORequestWrite);
6763 
6764 #ifndef UNIV_HOTBACKUP
6765   /* Keep the last data file size info up to date, rounded to
6766   full megabytes */
6767   page_no_t pages_per_mb =
6768       static_cast<page_no_t>((1024 * 1024) / phy_page_size);
6769 
6770   page_no_t size_in_pages = ((file->size / pages_per_mb) * pages_per_mb);
6771 
6772   if (space->id == TRX_SYS_SPACE) {
6773     srv_sys_space.set_last_file_size(size_in_pages);
6774   } else if (fsp_is_system_temporary(space->id)) {
6775     srv_tmp_space.set_last_file_size(size_in_pages);
6776   }
6777 #else  /* !UNIV_HOTBACKUP */
6778   ib::trace_2() << "Extended space : " << space->name << " from " << prev_size
6779                 << " pages to " << space->size << " pages "
6780                 << ", desired space size : " << size << " pages.";
6781 #endif /* !UNIV_HOTBACKUP */
6782 
6783   space_flush(space->id);
6784 
6785   mutex_release();
6786 
6787   return (success);
6788 }
6789 
6790 /** Try to extend a tablespace if it is smaller than the specified size.
6791 @param[in,out]	space	tablespace
6792 @param[in]	size	desired size in pages
6793 @return whether the tablespace is at least as big as requested */
fil_space_extend(fil_space_t * space,page_no_t size)6794 bool fil_space_extend(fil_space_t *space, page_no_t size) {
6795   auto shard = fil_system->shard_by_id(space->id);
6796 
6797   return (shard->space_extend(space, size));
6798 }
6799 
6800 #ifdef UNIV_HOTBACKUP
6801 /** Extends all tablespaces to the size stored in the space header. During the
6802 mysqlbackup --apply-log phase we extended the spaces on-demand so that log
6803 records could be applied, but that may have left spaces still too small
6804 compared to the size stored in the space header. */
meb_extend_tablespaces_to_stored_len()6805 void Fil_shard::meb_extend_tablespaces_to_stored_len() {
6806   ut_ad(mutex_owned());
6807 
6808   byte *buf = static_cast<byte *>(ut_malloc_nokey(UNIV_PAGE_SIZE));
6809 
6810   ut_a(buf != nullptr);
6811 
6812   for (auto &elem : m_spaces) {
6813     auto space = elem.second;
6814 
6815     if (space->purpose == FIL_TYPE_LOG) {
6816       /* ignore redo log tablespace */
6817       continue;
6818     }
6819 
6820     ut_a(space->purpose == FIL_TYPE_TABLESPACE);
6821 
6822     /* No need to protect with a mutex, because this is
6823     a single-threaded operation */
6824 
6825     mutex_release();
6826 
6827     dberr_t error;
6828 
6829     const page_size_t page_size(space->flags);
6830 
6831     error = fil_read(page_id_t(space->id, 0), page_size, 0,
6832                      page_size.physical(), buf);
6833 
6834     ut_a(error == DB_SUCCESS);
6835 
6836     ulint size_in_header;
6837 
6838     size_in_header = fsp_header_get_field(buf, FSP_SIZE);
6839 
6840     bool success;
6841 
6842     success = space_extend(space, size_in_header);
6843 
6844     if (!success) {
6845       ib::error(ER_IB_MSG_321)
6846           << "Could not extend the tablespace of " << space->name
6847           << " to the size stored in"
6848              " header, "
6849           << size_in_header
6850           << " pages;"
6851              " size after extension "
6852           << 0
6853           << " pages. Check that you have free disk"
6854              " space and retry!";
6855 
6856       ut_a(success);
6857     }
6858 
6859     mutex_acquire();
6860   }
6861 
6862   ut_free(buf);
6863 }
6864 
6865 /** Extends all tablespaces to the size stored in the space header. During the
6866 mysqlbackup --apply-log phase we extended the spaces on-demand so that log
6867 records could be applied, but that may have left spaces still too small
6868 compared to the size stored in the space header. */
meb_extend_tablespaces_to_stored_len()6869 void meb_extend_tablespaces_to_stored_len() {
6870   fil_system->meb_extend_tablespaces_to_stored_len();
6871 }
6872 
6873 bool meb_is_redo_log_only_restore = false;
6874 
6875 /** Determine if file is intermediate / temporary. These files are
6876 created during reorganize partition, rename tables, add / drop columns etc.
6877 @param[in]	filepath	absolute / relative or simply file name
6878 @retvalue	true		if it is intermediate file
6879 @retvalue	false		if it is normal file */
meb_is_intermediate_file(const std::string & filepath)6880 bool meb_is_intermediate_file(const std::string &filepath) {
6881   std::string file_name = filepath;
6882 
6883   {
6884     /** If its redo only restore, apply log needs to got through the
6885         intermediate steps to apply a ddl.
6886         Some of these operation might result in intermediate files.
6887     */
6888     if (meb_is_redo_log_only_restore) return false;
6889     /* extract file name from relative or absolute file name */
6890     auto pos = file_name.rfind(OS_PATH_SEPARATOR);
6891 
6892     if (pos != std::string::npos) {
6893       ++pos;
6894       file_name = file_name.substr(pos);
6895     }
6896   }
6897 
6898   transform(file_name.begin(), file_name.end(), file_name.begin(), ::tolower);
6899 
6900   if (file_name[0] != '#') {
6901     auto pos = file_name.rfind("#tmp#.ibd");
6902     if (pos != std::string::npos) {
6903       return (true);
6904     } else {
6905       return (false); /* normal file name */
6906     }
6907   }
6908 
6909   static std::vector<std::string> prefixes = {"#sql-", "#sql2-", "#tmp#",
6910                                               "#ren#"};
6911 
6912   /* search for the unsupported patterns */
6913   for (const auto &prefix : prefixes) {
6914     if (Fil_path::has_prefix(file_name, prefix)) {
6915       return (true);
6916     }
6917   }
6918 
6919   return (false);
6920 }
6921 
6922 /** Return the space ID based of the remote general tablespace name.
6923 This is a wrapper over fil_space_get_id_by_name() method. it means,
6924 the tablespace must be found in the tablespace memory cache.
6925 This method extracts the tablespace name from input parameters and checks if
6926 it has been loaded in memory cache through either any of the remote general
6927 tablespaces directories identified at the time memory cache created.
6928 @param[in, out]	tablespace	Tablespace name
6929 @return space ID if tablespace found, SPACE_UNKNOWN if not found. */
meb_fil_space_get_rem_gen_ts_id_by_name(std::string & tablespace)6930 space_id_t meb_fil_space_get_rem_gen_ts_id_by_name(std::string &tablespace) {
6931   space_id_t space_id = SPACE_UNKNOWN;
6932 
6933   for (auto newpath : rem_gen_ts_dirs) {
6934     auto pos = tablespace.rfind(OS_PATH_SEPARATOR);
6935 
6936     if (pos == std::string::npos) {
6937       break;
6938     }
6939 
6940     newpath += tablespace.substr(pos);
6941 
6942     space_id = fil_space_get_id_by_name(newpath.c_str());
6943 
6944     if (space_id != SPACE_UNKNOWN) {
6945       tablespace = newpath;
6946       break;
6947     }
6948   }
6949 
6950   return (space_id);
6951 }
6952 
6953 /** Tablespace item during recovery */
6954 struct MEB_file_name {
6955   /** Constructor */
MEB_file_nameMEB_file_name6956   MEB_file_name(std::string name, bool deleted)
6957       : m_name(name), m_space(), m_deleted(deleted) {}
6958 
6959   /** Tablespace file name (MLOG_FILE_NAME) */
6960   std::string m_name;
6961 
6962   /** Tablespace object (NULL if not valid or not found) */
6963   fil_space_t *m_space;
6964 
6965   /** Whether the tablespace has been deleted */
6966   bool m_deleted;
6967 };
6968 
6969 /** Map of dirty tablespaces during recovery */
6970 using MEB_recv_spaces =
6971     std::map<space_id_t, MEB_file_name, std::less<space_id_t>,
6972              ut_allocator<std::pair<const space_id_t, MEB_file_name>>>;
6973 
6974 static MEB_recv_spaces recv_spaces;
6975 
6976 /** Checks if MEB has loaded this space for reovery.
6977 @param[in]	space_id	Tablespace ID
6978 @return true if the space_id is loaded */
meb_is_space_loaded(const space_id_t space_id)6979 bool meb_is_space_loaded(const space_id_t space_id) {
6980   return (recv_spaces.find(space_id) != recv_spaces.end());
6981 }
6982 
6983 /** Set the keys for an encrypted tablespace.
6984 @param[in]	space		Tablespace for which to set the key */
meb_set_encryption_key(const fil_space_t * space)6985 static void meb_set_encryption_key(const fil_space_t *space) {
6986   ut_ad(FSP_FLAGS_GET_ENCRYPTION(space->flags));
6987 
6988   for (auto &key : *recv_sys->keys) {
6989     if (key.space_id != space->id) {
6990       continue;
6991     }
6992 
6993     dberr_t err;
6994 
6995     err = fil_set_encryption(space->id, Encryption::AES, key.ptr, key.iv);
6996 
6997     if (err != DB_SUCCESS) {
6998       ib::error(ER_IB_MSG_322) << "Can't set encryption information"
6999                                << " for tablespace" << space->name << "!";
7000     }
7001 
7002     ut_free(key.iv);
7003     ut_free(key.ptr);
7004 
7005     key.iv = nullptr;
7006     key.ptr = nullptr;
7007     key.space_id = 0;
7008   }
7009 }
7010 
7011 /** Process a file name passed as an input
7012 Wrapper around meb_name_process()
7013 @param[in,out]	name		absolute path of tablespace file
7014 @param[in]	space_id	The tablespace ID
7015 @param[in]	deleted		true if MLOG_FILE_DELETE */
meb_name_process(char * name,space_id_t space_id,bool deleted)7016 void Fil_system::meb_name_process(char *name, space_id_t space_id,
7017                                   bool deleted) {
7018   ut_ad(space_id != TRX_SYS_SPACE);
7019 
7020   /* We will also insert space=nullptr into the map, so that
7021   further checks can ensure that a MLOG_FILE_NAME record was
7022   scanned before applying any page records for the space_id. */
7023 
7024   Fil_path::normalize(name);
7025 
7026   size_t len = std::strlen(name);
7027 
7028   MEB_file_name fname(std::string(name, len - 1), deleted);
7029 
7030   auto p = recv_spaces.insert(std::make_pair(space_id, fname));
7031 
7032   ut_ad(p.first->first == space_id);
7033 
7034   MEB_file_name &f = p.first->second;
7035 
7036   if (deleted) {
7037     /* Got MLOG_FILE_DELETE */
7038 
7039     if (!p.second && !f.m_deleted) {
7040       f.m_deleted = true;
7041 
7042       if (f.m_space != nullptr) {
7043         f.m_space = nullptr;
7044       }
7045     }
7046 
7047     ut_ad(f.m_space == nullptr);
7048 
7049   } else if (p.second || f.m_name != fname.m_name) {
7050     fil_space_t *space;
7051 
7052     /* Check if the tablespace file exists and contains
7053     the space_id. If not, ignore the file after displaying
7054     a note. Abort if there are multiple files with the
7055     same space_id. */
7056 
7057     switch (ibd_open_for_recovery(space_id, name, space)) {
7058       case FIL_LOAD_OK:
7059         ut_ad(space != nullptr);
7060 
7061         /* For encrypted tablespace, set key and iv. */
7062         if (FSP_FLAGS_GET_ENCRYPTION(space->flags) &&
7063             recv_sys->keys != nullptr) {
7064           meb_set_encryption_key(space);
7065         }
7066 
7067         if (f.m_space == nullptr || f.m_space == space) {
7068           f.m_name = fname.m_name;
7069           f.m_space = space;
7070           f.m_deleted = false;
7071 
7072         } else {
7073           ib::error(ER_IB_MSG_323)
7074               << "Tablespace " << space_id << " has been found in two places: '"
7075               << f.m_name << "' and '" << name
7076               << "'."
7077                  " You must delete one of them.";
7078 
7079           recv_sys->found_corrupt_fs = true;
7080         }
7081         break;
7082 
7083       case FIL_LOAD_ID_CHANGED:
7084         ut_ad(space == nullptr);
7085 
7086         ib::trace_1() << "Ignoring file " << name << " for space-id mismatch "
7087                       << space_id;
7088         break;
7089 
7090       case FIL_LOAD_NOT_FOUND:
7091         /* No matching tablespace was found; maybe it
7092         was renamed, and we will find a subsequent
7093         MLOG_FILE_* record. */
7094         ut_ad(space == nullptr);
7095         break;
7096 
7097       case FIL_LOAD_INVALID:
7098         ut_ad(space == nullptr);
7099 
7100         ib::warn(ER_IB_MSG_324) << "Invalid tablespace " << name;
7101         break;
7102 
7103       case FIL_LOAD_MISMATCH:
7104         ut_ad(space == nullptr);
7105         break;
7106     }
7107   }
7108 }
7109 
7110 /** Process a file name passed as an input
7111 Wrapper around meb_name_process()
7112 @param[in]	name		absolute path of tablespace file
7113 @param[in]	space_id	the tablespace ID */
meb_fil_name_process(const char * name,space_id_t space_id)7114 void meb_fil_name_process(const char *name, space_id_t space_id) {
7115   char *file_name = static_cast<char *>(mem_strdup(name));
7116 
7117   fil_system->meb_name_process(file_name, space_id, false);
7118 
7119   ut_free(file_name);
7120 }
7121 
7122 /** Test, if a file path name contains a back-link ("../").
7123 We assume a path to a file. So we don't check for a trailing "/..".
7124 @param[in]	path		path to check
7125 @return	whether the path contains a back-link.
7126  */
meb_has_back_link(const std::string & path)7127 static bool meb_has_back_link(const std::string &path) {
7128 #ifdef _WIN32
7129   static const std::string DOT_DOT_SLASH = "..\\";
7130   static const std::string SLASH_DOT_DOT_SLASH = "\\..\\";
7131 #else
7132   static const std::string DOT_DOT_SLASH = "../";
7133   static const std::string SLASH_DOT_DOT_SLASH = "/../";
7134 #endif /* _WIN32 */
7135   return ((0 == path.compare(0, 3, DOT_DOT_SLASH)) ||
7136           (std::string::npos != path.find(SLASH_DOT_DOT_SLASH)));
7137 }
7138 
7139 /** Parse a file name retrieved from a MLOG_FILE_* record,
7140 and return the absolute file path corresponds to backup dir
7141 as well as in the form of database/tablespace
7142 @param[in]	name		path emitted by the redo log
7143 @param[in]	flags		flags emitted by the redo log
7144 @param[in]	space_id	space_id emmited by the redo log
7145 @param[out]	absolute_path	absolute path of tablespace
7146 corresponds to target dir
7147 @param[out]	tablespace_name	name in the form of database/table */
meb_make_abs_file_path(const std::string & name,uint32_t flags,space_id_t space_id,std::string & absolute_path,std::string & tablespace_name)7148 static void meb_make_abs_file_path(const std::string &name, uint32_t flags,
7149                                    space_id_t space_id,
7150                                    std::string &absolute_path,
7151                                    std::string &tablespace_name) {
7152   Datafile df;
7153   std::string file_name = name;
7154 
7155   /* If the tablespace path name is absolute or has back-links ("../"),
7156   we assume, that it is located outside of datadir. */
7157   if (Fil_path::is_absolute_path(file_name.c_str()) ||
7158       (meb_has_back_link(file_name) && !replay_in_datadir)) {
7159     if (replay_in_datadir) {
7160       /* This is an apply-log in the restored datadir. Take the path as is. */
7161       df.set_filepath(file_name.c_str());
7162     } else {
7163       /* This is an apply-log in backup_dir/datadir. Get the file inside. */
7164       auto pos = file_name.rfind(OS_PATH_SEPARATOR);
7165 
7166       /* if it is file per tablespace, then include the schema
7167       directory as well */
7168       if (fsp_is_file_per_table(space_id, flags) && pos != std::string::npos) {
7169         pos = file_name.rfind(OS_PATH_SEPARATOR, pos - 1);
7170       }
7171 
7172       if (pos == std::string::npos) {
7173         ib::fatal(ER_IB_MSG_325)
7174             << "Could not extract the tabelspace"
7175             << " file name from the in the path : " << name;
7176       }
7177 
7178       ++pos;
7179 
7180       file_name = file_name.substr(pos);
7181 
7182       df.make_filepath(MySQL_datadir_path, file_name.c_str(), IBD);
7183     }
7184 
7185   } else {
7186     /* This is an apply-log with a relative path, either in the restored
7187     datadir, or in backup_dir/datadir. If in the restored datadir, the
7188     path might start with "../" to reach outside of datadir. */
7189     auto pos = file_name.find(OS_PATH_SEPARATOR);
7190 
7191     /* Remove the cur dir from the path as this will cause the
7192     path name mismatch when we try to find out the space_id based
7193     on tablespace name */
7194 
7195     if (file_name.substr(0, pos) == ".") {
7196       ++pos;
7197       file_name = file_name.substr(pos);
7198     }
7199 
7200     /* make_filepath() does not prepend the directory, if the file name
7201     starts with "../". Prepend it unconditionally here. */
7202     file_name.insert(0, 1, OS_PATH_SEPARATOR);
7203     file_name.insert(0, MySQL_datadir_path);
7204 
7205     df.make_filepath(nullptr, file_name.c_str(), IBD);
7206   }
7207 
7208   df.set_flags(flags);
7209   df.set_space_id(space_id);
7210   df.set_name(nullptr);
7211 
7212   absolute_path = df.filepath();
7213 
7214   tablespace_name = df.name();
7215 }
7216 
7217 /** Process a MLOG_FILE_CREATE redo record.
7218 @param[in]	page_id		Page id of the redo log record
7219 @param[in]	flags		Tablespace flags
7220 @param[in]	name		Tablespace filename */
meb_tablespace_redo_create(const page_id_t & page_id,uint32_t flags,const char * name)7221 static void meb_tablespace_redo_create(const page_id_t &page_id, uint32_t flags,
7222                                        const char *name) {
7223   std::string abs_file_path;
7224   std::string tablespace_name;
7225 
7226   meb_make_abs_file_path(name, flags, page_id.space(), abs_file_path,
7227                          tablespace_name);
7228 
7229   if (meb_is_intermediate_file(abs_file_path.c_str()) ||
7230       fil_space_get(page_id.space()) ||
7231       fil_space_get_id_by_name(tablespace_name.c_str()) != SPACE_UNKNOWN ||
7232       meb_fil_space_get_rem_gen_ts_id_by_name(tablespace_name) !=
7233           SPACE_UNKNOWN) {
7234     /* Don't create table while :-
7235     1. scanning the redo logs during backup
7236     2. apply-log on a partial backup
7237     3. if it is intermediate file
7238     4. tablespace is already loaded in memory
7239     5. tablespace is a remote general tablespace which is
7240        already loaded for recovery/apply-log from different
7241        directory path */
7242 
7243     ib::trace_1() << "Ignoring the log record. No need to "
7244                   << "create the tablespace : " << abs_file_path;
7245   } else {
7246     auto it = recv_spaces.find(page_id.space());
7247 
7248     if (it == recv_spaces.end() || it->second.m_name != abs_file_path) {
7249       ib::trace_1() << "Creating the tablespace : " << abs_file_path
7250                     << ", space_id : " << page_id.space();
7251 
7252       dberr_t ret = fil_ibd_create(page_id.space(), tablespace_name.c_str(),
7253                                    abs_file_path.c_str(), flags,
7254                                    FIL_IBD_FILE_INITIAL_SIZE);
7255 
7256       if (ret != DB_SUCCESS) {
7257         ib::fatal(ER_IB_MSG_326)
7258             << "Could not create the tablespace : " << abs_file_path
7259             << " with space Id : " << page_id.space();
7260       }
7261     }
7262   }
7263 }
7264 
7265 /** Process a MLOG_FILE_RENAME redo record.
7266 @param[in]	page_id		Page id of the redo log record
7267 @param[in]	from_name	Tablespace from filename
7268 @param[in]	to_name		Tablespace to filename */
meb_tablespace_redo_rename(const page_id_t & page_id,const char * from_name,const char * to_name)7269 static void meb_tablespace_redo_rename(const page_id_t &page_id,
7270                                        const char *from_name,
7271                                        const char *to_name) {
7272   std::string abs_to_path;
7273   std::string abs_from_path;
7274   std::string tablespace_name;
7275 
7276   meb_make_abs_file_path(from_name, 0, page_id.space(), abs_from_path,
7277                          tablespace_name);
7278 
7279   meb_make_abs_file_path(to_name, 0, page_id.space(), abs_to_path,
7280                          tablespace_name);
7281 
7282   char *new_name = nullptr;
7283 
7284   if (meb_is_intermediate_file(from_name) ||
7285       meb_is_intermediate_file(to_name) ||
7286       fil_space_get_id_by_name(tablespace_name.c_str()) != SPACE_UNKNOWN ||
7287       meb_fil_space_get_rem_gen_ts_id_by_name(tablespace_name) !=
7288           SPACE_UNKNOWN ||
7289       fil_space_get(page_id.space()) == nullptr) {
7290     /* Don't rename table while :
7291     1. Scanning the redo logs during backup
7292     2. Apply-log on a partial backup
7293     3. Either of old or new tables are intermediate table
7294     4. The new name is already loaded for recovery/apply-log
7295     5. The new name is a remote general tablespace which is
7296        already loaded for recovery/apply-log from different
7297        directory path
7298     6. Tablespace is not yet loaded in memory.
7299     This will prevent unintended renames during recovery. */
7300 
7301     ib::trace_1() << "Ignoring the log record. "
7302                   << "No need to rename tablespace";
7303 
7304     return;
7305 
7306   } else {
7307     ib::trace_1() << "Renaming space id : " << page_id.space()
7308                   << ", old tablespace name : " << from_name
7309                   << " to new tablespace name : " << to_name;
7310 
7311     new_name = static_cast<char *>(mem_strdup(abs_to_path.c_str()));
7312   }
7313 
7314   meb_fil_name_process(from_name, page_id.space());
7315   meb_fil_name_process(new_name, page_id.space());
7316 
7317   if (!fil_op_replay_rename(page_id, abs_from_path.c_str(),
7318                             abs_to_path.c_str())) {
7319     recv_sys->found_corrupt_fs = true;
7320   }
7321 
7322   meb_fil_name_process(to_name, page_id.space());
7323 
7324   ut_free(new_name);
7325 }
7326 
7327 /** Process a MLOG_FILE_DELETE redo record.
7328 @param[in]	page_id		Page id of the redo log record
7329 @param[in]	name		Tablespace filename */
meb_tablespace_redo_delete(const page_id_t & page_id,const char * name)7330 static void meb_tablespace_redo_delete(const page_id_t &page_id,
7331                                        const char *name) {
7332   std::string abs_file_path;
7333   std::string tablespace_name;
7334 
7335   meb_make_abs_file_path(name, 0, page_id.space(), abs_file_path,
7336                          tablespace_name);
7337 
7338   char *file_name = static_cast<char *>(mem_strdup(name));
7339 
7340   fil_system->meb_name_process(file_name, page_id.space(), true);
7341 
7342   if (fil_space_get(page_id.space())) {
7343     ib::trace_1() << "Deleting the tablespace : " << abs_file_path
7344                   << ", space_id : " << page_id.space();
7345     dberr_t err =
7346         fil_delete_tablespace(page_id.space(), BUF_REMOVE_FLUSH_NO_WRITE);
7347 
7348     ut_a(err == DB_SUCCESS);
7349   }
7350 
7351   ut_free(file_name);
7352 }
7353 
7354 #endif /* UNIV_HOTBACKUP */
7355 
7356 /*========== RESERVE FREE EXTENTS (for a B-tree split, for example) ===*/
7357 
7358 /** Tries to reserve free extents in a file space.
7359 @param[in]	space_id	Tablespace ID
7360 @param[in]	n_free_now	Number of free extents now
7361 @param[in]	n_to_reserve	How many one wants to reserve
7362 @return true if succeed */
fil_space_reserve_free_extents(space_id_t space_id,ulint n_free_now,ulint n_to_reserve)7363 bool fil_space_reserve_free_extents(space_id_t space_id, ulint n_free_now,
7364                                     ulint n_to_reserve) {
7365   auto shard = fil_system->shard_by_id(space_id);
7366 
7367   shard->mutex_acquire();
7368 
7369   fil_space_t *space = shard->get_space_by_id(space_id);
7370 
7371   bool success;
7372 
7373   if (space->n_reserved_extents + n_to_reserve > n_free_now) {
7374     success = false;
7375   } else {
7376     ut_a(n_to_reserve < std::numeric_limits<uint32_t>::max());
7377     space->n_reserved_extents += (uint32_t)n_to_reserve;
7378     success = true;
7379   }
7380 
7381   shard->mutex_release();
7382 
7383   return (success);
7384 }
7385 
7386 /** Releases free extents in a file space.
7387 @param[in]	space_id	Tablespace ID
7388 @param[in]	n_reserved	How many were reserved */
fil_space_release_free_extents(space_id_t space_id,ulint n_reserved)7389 void fil_space_release_free_extents(space_id_t space_id, ulint n_reserved) {
7390   auto shard = fil_system->shard_by_id(space_id);
7391 
7392   shard->mutex_acquire();
7393 
7394   fil_space_t *space = shard->get_space_by_id(space_id);
7395 
7396   ut_a(n_reserved < std::numeric_limits<uint32_t>::max());
7397   ut_a(space->n_reserved_extents >= n_reserved);
7398 
7399   space->n_reserved_extents -= (uint32_t)n_reserved;
7400 
7401   shard->mutex_release();
7402 }
7403 
7404 /** Gets the number of reserved extents. If the database is silent, this number
7405 should be zero.
7406 @param[in]	space_id	Tablespace ID
7407 @return the number of reserved extents */
fil_space_get_n_reserved_extents(space_id_t space_id)7408 ulint fil_space_get_n_reserved_extents(space_id_t space_id) {
7409   auto shard = fil_system->shard_by_id(space_id);
7410 
7411   shard->mutex_acquire();
7412 
7413   fil_space_t *space = shard->get_space_by_id(space_id);
7414 
7415   ulint n = space->n_reserved_extents;
7416 
7417   shard->mutex_release();
7418 
7419   return (n);
7420 }
7421 
7422 /*============================ FILE I/O ================================*/
7423 
prepare_file_for_io(fil_node_t * file,bool extend)7424 bool Fil_shard::prepare_file_for_io(fil_node_t *file, bool extend) {
7425   ut_ad(mutex_owned());
7426 
7427   fil_space_t *space = file->space;
7428 
7429   if (s_n_open > fil_system->m_max_n_open + 5) {
7430     static ulint prev_time;
7431     auto curr_time = ut_time_monotonic();
7432 
7433     /* Spam the log after every minute. Ignore any race here. */
7434 
7435     if ((curr_time - prev_time) > 60) {
7436       ib::warn(ER_IB_MSG_327)
7437           << "Open files " << s_n_open.load() << " exceeds the limit "
7438           << fil_system->m_max_n_open;
7439 
7440       prev_time = curr_time;
7441     }
7442   }
7443 
7444   if (space->is_deleted()) {
7445     return (false);
7446   }
7447 
7448   if (!file->is_open) {
7449     ut_a(file->n_pending == 0);
7450 
7451     if (!open_file(file, extend)) {
7452       return (false);
7453     }
7454   }
7455 
7456   if (file->n_pending == 0 && Fil_system::space_belongs_in_LRU(space)) {
7457     /* The file is in the LRU list, remove it */
7458 
7459     ut_a(UT_LIST_GET_LEN(m_LRU) > 0);
7460 
7461     UT_LIST_REMOVE(m_LRU, file);
7462   }
7463 
7464   ++file->n_pending;
7465 
7466   return (true);
7467 }
7468 
7469 /** If the tablespace is not on the unflushed list, add it.
7470 @param[in,out]	space		Tablespace to add */
add_to_unflushed_list(fil_space_t * space)7471 void Fil_shard::add_to_unflushed_list(fil_space_t *space) {
7472   ut_ad(m_id == REDO_SHARD || mutex_owned());
7473 
7474   if (!space->is_in_unflushed_spaces) {
7475     space->is_in_unflushed_spaces = true;
7476 
7477     UT_LIST_ADD_FIRST(m_unflushed_spaces, space);
7478   }
7479 }
7480 
7481 /** Note that a write IO has completed.
7482 @param[in,out]	file		File on which a write was completed */
write_completed(fil_node_t * file)7483 void Fil_shard::write_completed(fil_node_t *file) {
7484   ut_ad(m_id == REDO_SHARD || mutex_owned());
7485 
7486   ++m_modification_counter;
7487 
7488   file->modification_counter = m_modification_counter;
7489 
7490   if (fil_buffering_disabled(file->space)) {
7491     /* We don't need to keep track of unflushed
7492     changes as user has explicitly disabled
7493     buffering. */
7494     ut_ad(!file->space->is_in_unflushed_spaces);
7495 
7496     file->flush_counter = file->modification_counter;
7497 
7498   } else {
7499     add_to_unflushed_list(file->space);
7500   }
7501 }
7502 
7503 /** Updates the data structures when an I/O operation finishes. Updates the
7504 pending i/o's field in the file appropriately.
7505 @param[in]	file		Tablespace file
7506 @param[in]	type		Marks the file as modified if type == WRITE */
complete_io(fil_node_t * file,const IORequest & type)7507 void Fil_shard::complete_io(fil_node_t *file, const IORequest &type) {
7508   ut_ad(m_id == REDO_SHARD || mutex_owned());
7509 
7510   ut_a(file->n_pending > 0);
7511 
7512   --file->n_pending;
7513 
7514   ut_ad(type.validate());
7515 
7516   if (type.is_write()) {
7517     ut_ad(!srv_read_only_mode || fsp_is_system_temporary(file->space->id));
7518 
7519     write_completed(file);
7520   }
7521 
7522   if (file->n_pending == 0 && Fil_system::space_belongs_in_LRU(file->space)) {
7523     /* The file must be put back to the LRU list */
7524     UT_LIST_ADD_FIRST(m_LRU, file);
7525   }
7526 }
7527 
7528 /** Report information about an invalid page access.
7529 @param[in]	block_offset	Block offset
7530 @param[in]	space_id	Tablespace ID
7531 @param[in]	space_name	Tablespace name
7532 @param[in]	byte_offset	Byte offset
7533 @param[in]	len		I/O length
7534 @param[in]	is_read		I/O type
7535 @param[in]	line		Line called from */
fil_report_invalid_page_access_low(page_no_t block_offset,space_id_t space_id,const char * space_name,ulint byte_offset,ulint len,bool is_read,int line)7536 static void fil_report_invalid_page_access_low(page_no_t block_offset,
7537                                                space_id_t space_id,
7538                                                const char *space_name,
7539                                                ulint byte_offset, ulint len,
7540                                                bool is_read, int line) {
7541   ib::error(ER_IB_MSG_328)
7542       << "Trying to access page number " << block_offset
7543       << " in"
7544          " space "
7545       << space_id << ", space name " << space_name
7546       << ","
7547          " which is outside the tablespace bounds. Byte offset "
7548       << byte_offset << ", len " << len << ", i/o type "
7549       << (is_read ? "read" : "write")
7550       << ". If you get this error at mysqld startup, please check"
7551          " that your my.cnf matches the ibdata files that you have in"
7552          " the MySQL server.";
7553 
7554   ib::error(ER_IB_MSG_329) << "Server exits"
7555 #ifdef UNIV_DEBUG
7556                            << " at "
7557                            << "fil0fil.cc"
7558                            << "[" << line << "]"
7559 #endif /* UNIV_DEBUG */
7560                            << ".";
7561 
7562   ut_error;
7563 }
7564 
7565 #define fil_report_invalid_page_access(b, s, n, o, l, t) \
7566   fil_report_invalid_page_access_low((b), (s), (n), (o), (l), (t), __LINE__)
7567 
7568 /** Set encryption information for IORequest.
7569 @param[in,out]	req_type	IO request
7570 @param[in]	page_id		page id
7571 @param[in]	space		table space */
fil_io_set_encryption(IORequest & req_type,const page_id_t & page_id,fil_space_t * space)7572 void fil_io_set_encryption(IORequest &req_type, const page_id_t &page_id,
7573                            fil_space_t *space) {
7574   /* Don't encrypt pages of system tablespace upto TRX_SYS_PAGE(including). The
7575   doublewrite buffer header is on TRX_SYS_PAGE */
7576   if (fsp_is_system_tablespace(space->id) &&
7577       page_id.page_no() <= FSP_TRX_SYS_PAGE_NO) {
7578     req_type.clear_encrypted();
7579     return;
7580   }
7581 
7582   /* Don't encrypt page 0 of all tablespaces except redo log
7583   tablespace, all pages from the system tablespace. */
7584   if (space->encryption_type == Encryption::NONE ||
7585       (space->encryption_op_in_progress == DECRYPTION && req_type.is_write()) ||
7586       (page_id.page_no() == 0 && !req_type.is_log())) {
7587     req_type.clear_encrypted();
7588     return;
7589   }
7590 
7591   /* For writting redo log, if encryption for redo log is disabled,
7592   skip set encryption. */
7593   if (req_type.is_log() && req_type.is_write() && !srv_redo_log_encrypt) {
7594     req_type.clear_encrypted();
7595     return;
7596   }
7597 
7598   /* For writting undo log, if encryption for undo log is disabled,
7599   skip set encryption. */
7600   if (fsp_is_undo_tablespace(space->id) && !srv_undo_log_encrypt &&
7601       req_type.is_write()) {
7602     req_type.clear_encrypted();
7603     return;
7604   }
7605 
7606   req_type.encryption_key(space->encryption_key, space->encryption_klen,
7607                           space->encryption_iv);
7608 
7609   req_type.encryption_algorithm(Encryption::AES);
7610 }
7611 
7612 /** Get the AIO mode.
7613 @param[in]	req_type	IO request type
7614 @param[in]	sync		true if Synchronous IO
7615 return the AIO mode */
get_AIO_mode(const IORequest & req_type,bool sync)7616 AIO_mode Fil_shard::get_AIO_mode(const IORequest &req_type, bool sync) {
7617 #ifndef UNIV_HOTBACKUP
7618   if (sync) {
7619     return (AIO_mode::SYNC);
7620 
7621   } else if (req_type.is_log()) {
7622     return (AIO_mode::LOG);
7623 
7624   } else {
7625     return (AIO_mode::NORMAL);
7626   }
7627 #else  /* !UNIV_HOTBACKUP */
7628   ut_a(sync);
7629   return (AIO_mode::SYNC);
7630 #endif /* !UNIV_HOTBACKUP */
7631 }
7632 
get_file_for_io(const IORequest & req_type,fil_space_t * space,page_no_t * page_no,fil_node_t * & file)7633 dberr_t Fil_shard::get_file_for_io(const IORequest &req_type,
7634                                    fil_space_t *space, page_no_t *page_no,
7635                                    fil_node_t *&file) {
7636   if (space->files.size() > 1) {
7637     ut_a(space->id == TRX_SYS_SPACE || space->purpose == FIL_TYPE_TEMPORARY ||
7638          space->id == dict_sys_t::s_log_space_first_id);
7639 
7640     for (auto &f : space->files) {
7641       if (f.size > *page_no) {
7642         file = &f;
7643         return (DB_SUCCESS);
7644       }
7645 
7646       *page_no -= f.size;
7647     }
7648 
7649   } else if (!space->files.empty()) {
7650     fil_node_t &f = space->files.front();
7651 
7652     file = &f;
7653 
7654     return (DB_SUCCESS);
7655   }
7656 
7657   file = nullptr;
7658   return (DB_ERROR);
7659 }
7660 
7661 /** Read or write log file data synchronously.
7662 @param[in]	type		IO context
7663 @param[in]	page_id		page id
7664 @param[in]	page_size	page size
7665 @param[in]	byte_offset	remainder of offset in bytes; in AIO
7666                                 this must be divisible by the OS block
7667                                 size
7668 @param[in]	len		how many bytes to read or write; this
7669                                 must not cross a file boundary; in AIO
7670                                 this must be a block size multiple
7671 @param[in,out]	buf		buffer where to store read data or
7672                                 from where to write
7673 @return error code
7674 @retval DB_SUCCESS on success */
do_redo_io(const IORequest & type,const page_id_t & page_id,const page_size_t & page_size,ulint byte_offset,ulint len,void * buf)7675 dberr_t Fil_shard::do_redo_io(const IORequest &type, const page_id_t &page_id,
7676                               const page_size_t &page_size, ulint byte_offset,
7677                               ulint len, void *buf) {
7678   IORequest req_type(type);
7679 
7680   ut_ad(len > 0);
7681   ut_ad(req_type.is_log());
7682   ut_ad(req_type.validate());
7683   ut_ad(fil_validate_skip());
7684   ut_ad(byte_offset < UNIV_PAGE_SIZE);
7685   ut_ad(UNIV_PAGE_SIZE == (ulong)(1 << UNIV_PAGE_SIZE_SHIFT));
7686 
7687 #ifndef UNIV_HOTBACKUP
7688   if (req_type.is_read()) {
7689     srv_stats.data_read.add(len);
7690 
7691   } else if (req_type.is_write()) {
7692     ut_ad(!srv_read_only_mode);
7693     srv_stats.data_written.add(len);
7694   }
7695 #endif
7696 
7697   fil_space_t *space = get_space_by_id(page_id.space());
7698 
7699   fil_node_t *file;
7700   page_no_t page_no = page_id.page_no();
7701   dberr_t err = get_file_for_io(req_type, space, &page_no, file);
7702 
7703   ut_a(file != nullptr);
7704   ut_a(err == DB_SUCCESS);
7705   ut_a(page_size.physical() == page_size.logical());
7706 
7707   os_offset_t offset = (os_offset_t)page_no * page_size.physical();
7708 
7709   offset += byte_offset;
7710 
7711   ut_a(file->size - page_no >=
7712        (byte_offset + len + (page_size.physical() - 1)) / page_size.physical());
7713 
7714   ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
7715   ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
7716 
7717   /* Set encryption information. */
7718   fil_io_set_encryption(req_type, page_id, space);
7719 
7720   req_type.block_size(file->block_size);
7721 
7722   if (!file->is_open) {
7723     ut_a(file->n_pending == 0);
7724 
7725     bool success = open_file(file, false);
7726 
7727     ut_a(success);
7728   }
7729 
7730   if (req_type.is_read()) {
7731     err = os_file_read(req_type, file->name, file->handle, buf, offset, len);
7732 
7733   } else {
7734     ut_ad(!srv_read_only_mode);
7735 
7736     err = os_file_write(req_type, file->name, file->handle, buf, offset, len);
7737   }
7738 
7739   if (type.is_write()) {
7740     mutex_acquire();
7741 
7742     ++m_modification_counter;
7743 
7744     file->modification_counter = m_modification_counter;
7745 
7746     add_to_unflushed_list(file->space);
7747 
7748     mutex_release();
7749   }
7750 
7751   return (err);
7752 }
7753 
do_io(const IORequest & type,bool sync,const page_id_t & page_id,const page_size_t & page_size,ulint byte_offset,ulint len,void * buf,void * message)7754 dberr_t Fil_shard::do_io(const IORequest &type, bool sync,
7755                          const page_id_t &page_id, const page_size_t &page_size,
7756                          ulint byte_offset, ulint len, void *buf,
7757                          void *message) {
7758   IORequest req_type(type);
7759 
7760   ut_ad(req_type.validate());
7761 
7762   ut_ad(len > 0);
7763   ut_ad(byte_offset < UNIV_PAGE_SIZE);
7764   ut_ad(!page_size.is_compressed() || byte_offset == 0);
7765   ut_ad(UNIV_PAGE_SIZE == (ulong)(1 << UNIV_PAGE_SIZE_SHIFT));
7766 
7767   ut_ad(fil_validate_skip());
7768 
7769 #ifndef UNIV_HOTBACKUP
7770   /* ibuf bitmap pages must be read in the sync AIO mode: */
7771   ut_ad(recv_no_ibuf_operations || req_type.is_write() ||
7772         !ibuf_bitmap_page(page_id, page_size) || sync || req_type.is_log());
7773 
7774   auto aio_mode = get_AIO_mode(req_type, sync);
7775 
7776   if (req_type.is_read()) {
7777     srv_stats.data_read.add(len);
7778 
7779     if (aio_mode == AIO_mode::NORMAL && !recv_no_ibuf_operations &&
7780         ibuf_page(page_id, page_size, nullptr)) {
7781       /* Reduce probability of deadlock bugs
7782       in connection with ibuf: do not let the
7783       ibuf I/O handler sleep */
7784 
7785       req_type.clear_do_not_wake();
7786 
7787       aio_mode = AIO_mode::IBUF;
7788     }
7789 
7790 #ifdef UNIV_DEBUG
7791     /* Should never attempt to read from a deleted tablespace. */
7792     for (auto pair : m_deleted) {
7793       ut_ad(pair.first != page_id.space());
7794     }
7795 #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
7796 
7797   } else if (req_type.is_write()) {
7798     ut_ad(!srv_read_only_mode || fsp_is_system_temporary(page_id.space()));
7799 
7800     srv_stats.data_written.add(len);
7801   }
7802 #else  /* !UNIV_HOTBACKUP */
7803   ut_a(sync);
7804   auto aio_mode = AIO_mode::SYNC;
7805 #endif /* !UNIV_HOTBACKUP */
7806 
7807   /* Reserve the mutex and make sure that we can open at
7808   least one file while holding it, if the file is not already open */
7809 
7810   fil_space_t *space;
7811 
7812   bool slot = mutex_acquire_and_get_space(page_id.space(), space);
7813 
7814   /* If we are deleting a tablespace we don't allow async read
7815   operations on that. However, we do allow write operations and
7816   sync read operations. */
7817   if (space == nullptr ||
7818       (req_type.is_read() && !sync && space->stop_new_ops)) {
7819     if (slot) {
7820       release_open_slot(m_id);
7821     }
7822 
7823     mutex_release();
7824 
7825     if (!req_type.ignore_missing()) {
7826       if (space == nullptr) {
7827         ib::error(ER_IB_MSG_330)
7828             << "Trying to do I/O on a tablespace"
7829             << " which does not exist. I/O type: "
7830             << (req_type.is_read() ? "read" : "write") << ", page: " << page_id
7831             << ", I/O length: " << len << " bytes";
7832       } else {
7833         ib::error(ER_IB_MSG_331)
7834             << "Trying to do async read on a"
7835             << " tablespace which is being deleted."
7836             << " Tablespace name: \"" << space->name << "\", page: " << page_id
7837             << ", read length: " << len << " bytes";
7838       }
7839     }
7840 
7841     return (DB_TABLESPACE_DELETED);
7842   }
7843 
7844   ut_ad(aio_mode != AIO_mode::IBUF || fil_type_is_data(space->purpose));
7845 
7846   fil_node_t *file;
7847   auto page_no = page_id.page_no();
7848   auto err = get_file_for_io(req_type, space, &page_no, file);
7849 
7850   if (file == nullptr) {
7851     ut_ad(err == DB_ERROR);
7852 
7853     if (req_type.ignore_missing()) {
7854       if (slot) {
7855         release_open_slot(m_id);
7856       }
7857 
7858       mutex_release();
7859 
7860       return (DB_ERROR);
7861     }
7862 
7863     /* This is a hard error. */
7864     fil_report_invalid_page_access(page_id.page_no(), page_id.space(),
7865                                    space->name, byte_offset, len,
7866                                    req_type.is_read());
7867   }
7868 
7869   bool opened = prepare_file_for_io(file, false);
7870 
7871   if (slot) {
7872     release_open_slot(m_id);
7873   }
7874 
7875   if (!opened) {
7876 #ifndef UNIV_HOTBACKUP
7877     if (space->is_deleted()) {
7878       ut_a(fsp_is_undo_tablespace(space->id));
7879       mutex_release();
7880 
7881       if (!sync) {
7882         buf_page_io_complete(static_cast<buf_page_t *>(message), false);
7883       }
7884 
7885       return (DB_TABLESPACE_DELETED);
7886     }
7887 #endif /* !UNIV_HOTBACKUP */
7888 
7889     if (fil_type_is_data(space->purpose) && fsp_is_ibd_tablespace(space->id)) {
7890       mutex_release();
7891 
7892       if (!req_type.ignore_missing()) {
7893         ib::error(ER_IB_MSG_332)
7894             << "Trying to do I/O to a tablespace"
7895                " which exists without an .ibd data"
7896             << " file. I/O type: " << (req_type.is_read() ? "read" : "write")
7897             << ", page: " << page_id_t(page_id.space(), page_no)
7898             << ", I/O length: " << len << " bytes";
7899       }
7900 
7901       return (DB_TABLESPACE_DELETED);
7902     }
7903 
7904     /* The tablespace is for log. Currently, we just assert here
7905     to prevent handling errors along the way fil_io returns.
7906     Also, if the log files are missing, it would be hard to
7907     promise the server can continue running. */
7908     ut_a(0);
7909   }
7910 
7911   /* Check that at least the start offset is within the bounds of a
7912   single-table tablespace, including rollback tablespaces. */
7913   if (file->size <= page_no && space->id != TRX_SYS_SPACE &&
7914       fil_type_is_data(space->purpose)) {
7915     if (req_type.ignore_missing()) {
7916       /* If we can tolerate the non-existent pages, we
7917       should return with DB_ERROR and let caller decide
7918       what to do. */
7919 
7920       complete_io(file, req_type);
7921 
7922       mutex_release();
7923 
7924       return (DB_ERROR);
7925     }
7926 
7927     /* Extend the file if the page_no does not fall inside its bounds
7928     because xtrabackup may have copied it when it was smaller */
7929     mutex_release();
7930 
7931     bool success = space_extend(space, page_no + 1);
7932 
7933     if (!success) {
7934       return (DB_ERROR);
7935     }
7936   } else {
7937     mutex_release();
7938   }
7939 
7940   ut_a(page_size.is_compressed() ||
7941        page_size.physical() == page_size.logical());
7942 
7943   auto offset = (os_offset_t)page_no * page_size.physical();
7944 
7945   offset += byte_offset;
7946 
7947   ut_a(file->size - page_no >=
7948        (byte_offset + len + (page_size.physical() - 1)) / page_size.physical());
7949 
7950   ut_a((len % OS_FILE_LOG_BLOCK_SIZE) == 0);
7951   ut_a(byte_offset % OS_FILE_LOG_BLOCK_SIZE == 0);
7952 
7953   /* Don't compress the log, page 0 of all tablespaces, tables
7954   compresssed with the old compression scheme and all pages from
7955   the system tablespace. */
7956 
7957   if (req_type.is_write() && !req_type.is_log() && !page_size.is_compressed() &&
7958       page_id.page_no() > 0 && IORequest::is_punch_hole_supported() &&
7959       file->punch_hole) {
7960     ut_ad(!req_type.is_log());
7961 
7962     req_type.set_punch_hole();
7963 
7964     req_type.compression_algorithm(space->compression_type);
7965 
7966   } else {
7967     req_type.clear_compressed();
7968   }
7969 
7970   /* Set encryption information. */
7971   fil_io_set_encryption(req_type, page_id, space);
7972 
7973   req_type.block_size(file->block_size);
7974 
7975 #ifdef UNIV_HOTBACKUP
7976   /* In mysqlbackup do normal I/O, not AIO */
7977   if (req_type.is_read()) {
7978     err = os_file_read(req_type, file->name, file->handle, buf, offset, len);
7979 
7980   } else {
7981     ut_ad(!srv_read_only_mode || fsp_is_system_temporary(page_id.space()));
7982 
7983     err = os_file_write(req_type, file->name, file->handle, buf, offset, len);
7984   }
7985 #else /* UNIV_HOTBACKUP */
7986   /* Queue the aio request */
7987   err = os_aio(
7988       req_type, aio_mode, file->name, file->handle, buf, offset, len,
7989       fsp_is_system_temporary(page_id.space()) ? false : srv_read_only_mode,
7990       file, message);
7991 
7992 #endif /* UNIV_HOTBACKUP */
7993 
7994   if (err == DB_IO_NO_PUNCH_HOLE) {
7995     err = DB_SUCCESS;
7996 
7997     if (file->punch_hole) {
7998       ib::warn(ER_IB_MSG_333) << "Punch hole failed for '" << file->name << "'";
7999     }
8000 
8001     fil_no_punch_hole(file);
8002   }
8003 
8004   /* We an try to recover the page from the double write buffer if
8005   the decompression fails or the page is corrupt. */
8006 
8007   ut_a(req_type.is_dblwr() || err == DB_SUCCESS);
8008 
8009   if (sync) {
8010     /* The i/o operation is already completed when we return from
8011     os_aio: */
8012 
8013     mutex_acquire();
8014 
8015     complete_io(file, req_type);
8016 
8017     mutex_release();
8018 
8019     ut_ad(fil_validate_skip());
8020   }
8021 
8022   return (err);
8023 }
8024 
8025 /** Read or write redo log data (synchronous buffered IO).
8026 @param[in]	type		IO context
8027 @param[in]	page_id		where to read or write
8028 @param[in]	page_size	page size
8029 @param[in]	byte_offset	remainder of offset in bytes
8030 @param[in]	len		this must not cross a file boundary;
8031 @param[in,out]	buf		buffer where to store read data or from where
8032                                 to write
8033 @retval DB_SUCCESS if all OK */
fil_redo_io(const IORequest & type,const page_id_t & page_id,const page_size_t & page_size,ulint byte_offset,ulint len,void * buf)8034 dberr_t fil_redo_io(const IORequest &type, const page_id_t &page_id,
8035                     const page_size_t &page_size, ulint byte_offset, ulint len,
8036                     void *buf) {
8037   ut_ad(type.is_log());
8038 
8039   auto shard = fil_system->shard_by_id(page_id.space());
8040 #if defined(_WIN32) && defined(WIN_ASYNC_IO)
8041   /* On Windows we always open the redo log file in AIO mode. ie. we
8042   use the AIO API for the read/write even for sync IO. */
8043   return (shard->do_io(type, true, page_id, page_size, byte_offset, len, buf,
8044                        nullptr));
8045 #else
8046   return (shard->do_redo_io(type, page_id, page_size, byte_offset, len, buf));
8047 #endif /* _WIN32  || WIN_ASYNC_IO*/
8048 }
8049 
8050 #ifndef UNIV_HOTBACKUP
8051 /** Waits for an AIO operation to complete. This function is used to write the
8052 handler for completed requests. The aio array of pending requests is divided
8053 into segments (see os0file.cc for more info). The thread specifies which
8054 segment it wants to wait for.
8055 @param[in]	segment		The number of the segment in the AIO array
8056                                 to wait for */
fil_aio_wait(ulint segment)8057 void fil_aio_wait(ulint segment) {
8058   void *m2;
8059   fil_node_t *m1;
8060   IORequest type;
8061 
8062   ut_ad(fil_validate_skip());
8063 
8064   auto err = os_aio_handler(segment, &m1, &m2, &type);
8065   ut_a(err == DB_SUCCESS);
8066 
8067   auto file = reinterpret_cast<fil_node_t *>(m1);
8068 
8069   if (file == nullptr) {
8070     ut_ad(srv_shutdown_state.load() == SRV_SHUTDOWN_EXIT_THREADS);
8071     return;
8072   }
8073 
8074   ut_a(!type.is_dblwr());
8075 
8076   srv_set_io_thread_op_info(segment, "complete io for file");
8077 
8078   auto shard = fil_system->shard_by_id(file->space->id);
8079 
8080   shard->mutex_acquire();
8081 
8082   shard->complete_io(file, type);
8083 
8084   shard->mutex_release();
8085 
8086   ut_ad(fil_validate_skip());
8087 
8088   /* Do the i/o handling */
8089   /* IMPORTANT: since i/o handling for reads will read also the insert
8090   buffer in tablespace 0, you have to be very careful not to introduce
8091   deadlocks in the i/o system. We keep tablespace 0 data files always
8092   open, and use a special i/o thread to serve insert buffer requests. */
8093 
8094   switch (file->space->purpose) {
8095     case FIL_TYPE_IMPORT:
8096     case FIL_TYPE_TEMPORARY:
8097     case FIL_TYPE_TABLESPACE:
8098       srv_set_io_thread_op_info(segment, "complete io for buf page");
8099 
8100       /* async single page writes from the dblwr buffer don't have
8101       access to the page */
8102       if (m2 != nullptr) {
8103         buf_page_io_complete(static_cast<buf_page_t *>(m2), false);
8104       }
8105       return;
8106     case FIL_TYPE_LOG:
8107       return;
8108   }
8109 
8110   ut_ad(0);
8111 }
8112 #endif /* !UNIV_HOTBACKUP */
8113 
8114 /** Read or write data from a file.
8115 @param[in]	type		IO context
8116 @param[in]	sync		If true then do synchronous IO
8117 @param[in]	page_id		page id
8118 @param[in]	page_size	page size
8119 @param[in]	byte_offset	remainder of offset in bytes; in aio this
8120                                 must be divisible by the OS block size
8121 @param[in]	len		how many bytes to read or write; this must
8122                                 not cross a file boundary; in AIO this must
8123                                 be a block size multiple
8124 @param[in,out]	buf		buffer where to store read data or from where
8125                                 to write; in AIO this must be appropriately
8126                                 aligned
8127 @param[in]	message		message for AIO handler if !sync, else ignored
8128 @return error code
8129 @retval DB_SUCCESS on success
8130 @retval DB_TABLESPACE_DELETED if the tablespace does not exist */
fil_io(const IORequest & type,bool sync,const page_id_t & page_id,const page_size_t & page_size,ulint byte_offset,ulint len,void * buf,void * message)8131 dberr_t fil_io(const IORequest &type, bool sync, const page_id_t &page_id,
8132                const page_size_t &page_size, ulint byte_offset, ulint len,
8133                void *buf, void *message) {
8134   auto shard = fil_system->shard_by_id(page_id.space());
8135 
8136   return (shard->do_io(type, sync, page_id, page_size, byte_offset, len, buf,
8137                        message));
8138 }
8139 
8140 /** If the tablespace is on the unflushed list and there are no pending
8141 flushes then remove from the unflushed list.
8142 @param[in,out]	space		Tablespace to remove */
remove_from_unflushed_list(fil_space_t * space)8143 void Fil_shard::remove_from_unflushed_list(fil_space_t *space) {
8144   ut_ad(mutex_owned());
8145 
8146   if (space->is_in_unflushed_spaces && space_is_flushed(space)) {
8147     space->is_in_unflushed_spaces = false;
8148 
8149     UT_LIST_REMOVE(m_unflushed_spaces, space);
8150   }
8151 }
8152 
8153 /** Flushes to disk possible writes cached by the OS. */
redo_space_flush()8154 void Fil_shard::redo_space_flush() {
8155   ut_ad(mutex_owned());
8156   ut_ad(m_id == REDO_SHARD);
8157 
8158   fil_space_t *space = fil_space_t::s_redo_space;
8159 
8160   if (space == nullptr) {
8161     space = get_space_by_id(dict_sys_t::s_log_space_first_id);
8162   } else {
8163     ut_ad(space == get_space_by_id(dict_sys_t::s_log_space_first_id));
8164   }
8165 
8166   ut_a(!space->stop_new_ops);
8167   ut_a(space->purpose == FIL_TYPE_LOG);
8168 
8169   /* Prevent dropping of the space while we are flushing */
8170   ++space->n_pending_flushes;
8171 
8172   for (auto &file : space->files) {
8173     ut_a(!file.is_raw_disk);
8174 
8175     int64_t old_mod_counter = file.modification_counter;
8176 
8177     if (old_mod_counter <= file.flush_counter) {
8178       continue;
8179     }
8180 
8181     ut_a(file.is_open);
8182     ut_a(file.space == space);
8183 
8184     ++fil_n_log_flushes;
8185     ++fil_n_pending_log_flushes;
8186 
8187     bool skip_flush = false;
8188 
8189     /* Wait for some other thread that is flushing. */
8190     while (file.n_pending_flushes > 0 && !skip_flush) {
8191       /* Release the mutex to avoid deadlock with
8192       the flushing thread. */
8193 
8194       int64_t sig_count = os_event_reset(file.sync_event);
8195 
8196       mutex_release();
8197 
8198       os_event_wait_low(file.sync_event, sig_count);
8199 
8200       mutex_acquire();
8201 
8202       if (file.flush_counter >= old_mod_counter) {
8203         skip_flush = true;
8204       }
8205     }
8206 
8207     if (!skip_flush) {
8208       ut_a(file.is_open);
8209 
8210       ++file.n_pending_flushes;
8211 
8212       mutex_release();
8213 
8214       os_file_flush(file.handle);
8215 
8216       mutex_acquire();
8217 
8218       os_event_set(file.sync_event);
8219 
8220       --file.n_pending_flushes;
8221     }
8222 
8223     if (file.flush_counter < old_mod_counter) {
8224       file.flush_counter = old_mod_counter;
8225 
8226       remove_from_unflushed_list(space);
8227     }
8228 
8229     --fil_n_pending_log_flushes;
8230   }
8231 
8232   --space->n_pending_flushes;
8233 }
8234 
8235 /** Flushes to disk possible writes cached by the OS. If the space does
8236 not exist or is being dropped, does not do anything.
8237 @param[in]	space_id	File space ID (this can be a group of log files
8238                                 or a tablespace of the database) */
space_flush(space_id_t space_id)8239 void Fil_shard::space_flush(space_id_t space_id) {
8240   ut_ad(mutex_owned());
8241 
8242   if (space_id == dict_sys_t::s_log_space_first_id) {
8243     redo_space_flush();
8244     return;
8245   }
8246 
8247   fil_space_t *space = get_space_by_id(space_id);
8248 
8249   if (space == nullptr || space->purpose == FIL_TYPE_TEMPORARY ||
8250       space->stop_new_ops) {
8251     return;
8252   }
8253 
8254   bool fbd = fil_buffering_disabled(space);
8255 
8256   if (fbd) {
8257     /* No need to flush. User has explicitly disabled
8258     buffering. However, flush should be called if the file
8259     size changes to keep OЅ metadata in sync. */
8260     ut_ad(!space->is_in_unflushed_spaces);
8261     ut_ad(space_is_flushed(space));
8262 
8263     /* Flush only if the file size changes */
8264     bool no_flush = true;
8265     for (const auto &file : space->files) {
8266 #ifdef UNIV_DEBUG
8267       ut_ad(file.modification_counter == file.flush_counter);
8268 #endif /* UNIV_DEBUG */
8269       if (file.flush_size != file.size) {
8270         /* Found at least one file whose size has changed */
8271         no_flush = false;
8272         break;
8273       }
8274     }
8275 
8276     if (no_flush) {
8277       /* Nothing to flush. Just return */
8278       return;
8279     }
8280   }
8281 
8282   /* Prevent dropping of the space while we are flushing */
8283   ++space->n_pending_flushes;
8284 
8285   for (auto &file : space->files) {
8286     int64_t old_mod_counter = file.modification_counter;
8287 
8288     if (!file.is_open) {
8289       continue;
8290     }
8291 
8292     /* Skip flushing if the file size has not changed since
8293     last flush was done and the flush mode is O_DIRECT_NO_FSYNC */
8294     if (fbd && (file.flush_size == file.size)) {
8295       ut_ad(old_mod_counter <= file.flush_counter);
8296       continue;
8297     }
8298 
8299     /* If we are here and the flush mode is O_DIRECT_NO_FSYNC, then
8300     it means that the file size has changed and hence, it should be
8301     flushed, irrespective of the mod_counter and flush counter values,
8302     which are always same in case of O_DIRECT_NO_FSYNC to avoid flush
8303     on every write operation.
8304     For other flush modes, if the flush_counter is same or ahead of
8305     the mod_counter, skip the flush. */
8306     if (!fbd && (old_mod_counter <= file.flush_counter)) {
8307       continue;
8308     }
8309 
8310     switch (space->purpose) {
8311       case FIL_TYPE_TEMPORARY:
8312         ut_ad(0);  // we already checked for this
8313 
8314       case FIL_TYPE_TABLESPACE:
8315       case FIL_TYPE_IMPORT:
8316         ++fil_n_pending_tablespace_flushes;
8317         break;
8318 
8319       case FIL_TYPE_LOG:
8320         ut_error;
8321         break;
8322     }
8323 
8324     bool skip_flush = false;
8325 #ifdef _WIN32
8326     if (file.is_raw_disk) {
8327       skip_flush = true;
8328     }
8329 #endif /* _WIN32 */
8330 
8331     while (file.n_pending_flushes > 0 && !skip_flush) {
8332       /* We want to avoid calling os_file_flush() on
8333       the file twice at the same time, because we do
8334       not know what bugs OS's may contain in file
8335       I/O */
8336 
8337       int64_t sig_count = os_event_reset(file.sync_event);
8338 
8339       mutex_release();
8340 
8341       os_event_wait_low(file.sync_event, sig_count);
8342 
8343       mutex_acquire();
8344 
8345       if (file.flush_counter >= old_mod_counter) {
8346         skip_flush = true;
8347       }
8348     }
8349 
8350     if (!skip_flush) {
8351       ut_a(file.is_open);
8352 
8353       ++file.n_pending_flushes;
8354 
8355       mutex_release();
8356 
8357       os_file_flush(file.handle);
8358 
8359       file.flush_size = file.size;
8360 
8361       mutex_acquire();
8362 
8363       os_event_set(file.sync_event);
8364 
8365       --file.n_pending_flushes;
8366     }
8367 
8368     if (file.flush_counter < old_mod_counter) {
8369       file.flush_counter = old_mod_counter;
8370 
8371       remove_from_unflushed_list(space);
8372     }
8373 
8374     switch (space->purpose) {
8375       case FIL_TYPE_TEMPORARY:
8376         ut_ad(0);  // we already checked for this
8377 
8378       case FIL_TYPE_TABLESPACE:
8379       case FIL_TYPE_IMPORT:
8380         --fil_n_pending_tablespace_flushes;
8381         continue;
8382 
8383       case FIL_TYPE_LOG:
8384         ut_error;
8385     }
8386 
8387     ut_ad(0);
8388   }
8389 
8390   --space->n_pending_flushes;
8391 }
8392 
8393 /** Flushes to disk possible writes cached by the OS. If the space does
8394 not exist or is being dropped, does not do anything.
8395 @param[in]	space_id	File space ID (this can be a group of log files
8396                                 or a tablespace of the database) */
fil_flush(space_id_t space_id)8397 void fil_flush(space_id_t space_id) {
8398   auto shard = fil_system->shard_by_id(space_id);
8399 
8400   shard->mutex_acquire();
8401 
8402   /* Note: Will release and reacquire the Fil_shard::mutex. */
8403   shard->space_flush(space_id);
8404 
8405   shard->mutex_release();
8406 }
8407 
8408 /** Flush any pending writes to disk for the redo log. */
flush_file_redo()8409 void Fil_shard::flush_file_redo() {
8410   /* We never evict the redo log tablespace. It's for all
8411   practical purposes a read-only data structure. */
8412 
8413   mutex_acquire();
8414 
8415   redo_space_flush();
8416 
8417   mutex_release();
8418 }
8419 
8420 /** Collect the tablespace IDs of unflushed tablespaces in space_ids.
8421 @param[in]	purpose		FIL_TYPE_TABLESPACE or FIL_TYPE_LOG,
8422                                 can be ORred */
flush_file_spaces(uint8_t purpose)8423 void Fil_shard::flush_file_spaces(uint8_t purpose) {
8424   Space_ids space_ids;
8425 
8426   ut_ad((purpose & FIL_TYPE_TABLESPACE) || (purpose & FIL_TYPE_LOG));
8427 
8428   mutex_acquire();
8429 
8430   for (auto space = UT_LIST_GET_FIRST(m_unflushed_spaces); space != nullptr;
8431        space = UT_LIST_GET_NEXT(unflushed_spaces, space)) {
8432     if ((to_int(space->purpose) & purpose) && !space->stop_new_ops) {
8433       space_ids.push_back(space->id);
8434     }
8435   }
8436 
8437   mutex_release();
8438 
8439   /* Flush the spaces.  It will not hurt to call fil_flush() on
8440   a non-existing space id. */
8441   for (auto space_id : space_ids) {
8442     mutex_acquire();
8443 
8444     space_flush(space_id);
8445 
8446     mutex_release();
8447   }
8448 }
8449 
8450 /** Flush the redo log writes to disk, possibly cached by the OS. */
flush_file_redo()8451 void Fil_system::flush_file_redo() { m_shards[REDO_SHARD]->flush_file_redo(); }
8452 
8453 /** Flush to disk the writes in file spaces of the given type
8454 possibly cached by the OS.
8455 @param[in]	purpose		FIL_TYPE_TABLESPACE or FIL_TYPE_LOG,
8456                                 can be ORred */
flush_file_spaces(uint8_t purpose)8457 void Fil_system::flush_file_spaces(uint8_t purpose) {
8458   for (auto shard : m_shards) {
8459     shard->flush_file_spaces(purpose);
8460   }
8461 }
8462 
8463 /** Flush to disk the writes in file spaces of the given type
8464 possibly cached by the OS.
8465 @param[in]     purpose FIL_TYPE_TABLESPACE or FIL_TYPE_LOG, can be ORred */
fil_flush_file_spaces(uint8_t purpose)8466 void fil_flush_file_spaces(uint8_t purpose) {
8467   fil_system->flush_file_spaces(purpose);
8468 }
8469 
8470 /** Flush to disk the writes in file spaces of the given type
8471 possibly cached by the OS. */
fil_flush_file_redo()8472 void fil_flush_file_redo() { fil_system->flush_file_redo(); }
8473 
8474 /** Returns true if file address is undefined.
8475 @param[in]	addr		Address
8476 @return true if undefined */
fil_addr_is_null(const fil_addr_t & addr)8477 bool fil_addr_is_null(const fil_addr_t &addr) {
8478   return (addr.page == FIL_NULL);
8479 }
8480 
8481 /** Get the predecessor of a file page.
8482 @param[in]	page		File page
8483 @return FIL_PAGE_PREV */
fil_page_get_prev(const byte * page)8484 page_no_t fil_page_get_prev(const byte *page) {
8485   return (mach_read_from_4(page + FIL_PAGE_PREV));
8486 }
8487 
8488 /** Get the successor of a file page.
8489 @param[in]	page		File page
8490 @return FIL_PAGE_NEXT */
fil_page_get_next(const byte * page)8491 page_no_t fil_page_get_next(const byte *page) {
8492   return (mach_read_from_4(page + FIL_PAGE_NEXT));
8493 }
8494 
8495 /** Sets the file page type.
8496 @param[in,out]	page		File page
8497 @param[in]	type		Page type */
fil_page_set_type(byte * page,ulint type)8498 void fil_page_set_type(byte *page, ulint type) {
8499   mach_write_to_2(page + FIL_PAGE_TYPE, type);
8500 }
8501 
8502 /** Reset the page type.
8503 Data files created before MySQL 5.1 may contain garbage in FIL_PAGE_TYPE.
8504 In MySQL 3.23.53, only undo log pages and index pages were tagged.
8505 Any other pages were written with uninitialized bytes in FIL_PAGE_TYPE.
8506 @param[in]	page_id	page number
8507 @param[in,out]	page	page with invalid FIL_PAGE_TYPE
8508 @param[in]	type	expected page type
8509 @param[in,out]	mtr	mini-transaction */
fil_page_reset_type(const page_id_t & page_id,byte * page,ulint type,mtr_t * mtr)8510 void fil_page_reset_type(const page_id_t &page_id, byte *page, ulint type,
8511                          mtr_t *mtr) {
8512   ib::info(ER_IB_MSG_334) << "Resetting invalid page " << page_id << " type "
8513                           << fil_page_get_type(page) << " to " << type << ".";
8514   mlog_write_ulint(page + FIL_PAGE_TYPE, type, MLOG_2BYTES, mtr);
8515 }
8516 
8517 /** Closes the tablespace memory cache. */
fil_close()8518 void fil_close() {
8519   if (fil_system == nullptr) {
8520     return;
8521   }
8522 
8523   UT_DELETE(fil_system);
8524 
8525   fil_system = nullptr;
8526 }
8527 
8528 #ifndef UNIV_HOTBACKUP
8529 /** Initializes a buffer control block when the buf_pool is created.
8530 @param[in]	block		Pointer to the control block
8531 @param[in]	frame		Pointer to buffer frame */
fil_buf_block_init(buf_block_t * block,byte * frame)8532 static void fil_buf_block_init(buf_block_t *block, byte *frame) {
8533   UNIV_MEM_DESC(frame, UNIV_PAGE_SIZE);
8534 
8535   block->frame = frame;
8536 
8537   block->page.io_fix = BUF_IO_NONE;
8538   /* There are assertions that check for this. */
8539   block->page.buf_fix_count = 1;
8540   block->page.state = BUF_BLOCK_READY_FOR_USE;
8541 
8542   page_zip_des_init(&block->page.zip);
8543 }
8544 
8545 struct Fil_page_iterator {
8546   /** File handle */
8547   pfs_os_file_t m_file;
8548 
8549   /** File path name */
8550   const char *m_filepath;
8551 
8552   /** From where to start */
8553   os_offset_t m_start;
8554 
8555   /** Where to stop */
8556   os_offset_t m_end;
8557 
8558   /* File size in bytes */
8559   os_offset_t m_file_size;
8560 
8561   /** Page size */
8562   size_t m_page_size;
8563 
8564   /** Number of pages to use for I/O */
8565   size_t m_n_io_buffers;
8566 
8567   /** Buffer to use for IO */
8568   byte *m_io_buffer;
8569 
8570   /** Encryption key */
8571   byte *m_encryption_key;
8572 
8573   /** Encruption iv */
8574   byte *m_encryption_iv;
8575 };
8576 
8577 /** TODO: This can be made parallel trivially by chunking up the file
8578 and creating a callback per thread. Main benefit will be to use multiple
8579 CPUs for checksums and compressed tables. We have to do compressed tables
8580 block by block right now. Secondly we need to decompress/compress and copy
8581 too much of data. These are CPU intensive.
8582 
8583 Iterate over all the pages in the tablespace.
8584 @param[in]	iter		Tablespace iterator
8585 @param[in,out]	block		Block to use for IO
8586 @param[in]	callback	Callback to inspect and update page contents
8587 @retval DB_SUCCESS or error code */
fil_iterate(const Fil_page_iterator & iter,buf_block_t * block,PageCallback & callback)8588 static dberr_t fil_iterate(const Fil_page_iterator &iter, buf_block_t *block,
8589                            PageCallback &callback) {
8590   os_offset_t offset;
8591   size_t n_bytes;
8592   page_no_t page_no = 0;
8593   space_id_t space_id = callback.get_space_id();
8594 
8595   n_bytes = iter.m_n_io_buffers * iter.m_page_size;
8596 
8597   ut_ad(!srv_read_only_mode);
8598 
8599   /* For old style compressed tables we do a lot of useless copying
8600   for non-index pages. Unfortunately, it is required by
8601   buf_zip_decompress() */
8602 
8603   ulint read_type = IORequest::READ;
8604   ulint write_type = IORequest::WRITE;
8605 
8606   for (offset = iter.m_start; offset < iter.m_end; offset += n_bytes) {
8607     byte *io_buffer = iter.m_io_buffer;
8608 
8609     block->frame = io_buffer;
8610 
8611     if (callback.get_page_size().is_compressed()) {
8612       page_zip_des_init(&block->page.zip);
8613       page_zip_set_size(&block->page.zip, iter.m_page_size);
8614 
8615       block->page.size.copy_from(
8616           page_size_t(static_cast<uint32_t>(iter.m_page_size),
8617                       static_cast<uint32_t>(univ_page_size.logical()), true));
8618 
8619       block->page.zip.data = block->frame + UNIV_PAGE_SIZE;
8620       ut_d(block->page.zip.m_external = true);
8621       ut_ad(iter.m_page_size == callback.get_page_size().physical());
8622 
8623       /* Zip IO is done in the compressed page buffer. */
8624       io_buffer = block->page.zip.data;
8625     } else {
8626       io_buffer = iter.m_io_buffer;
8627     }
8628 
8629     /* We have to read the exact number of bytes. Otherwise the
8630     InnoDB IO functions croak on failed reads. */
8631 
8632     n_bytes = static_cast<ulint>(
8633         ut_min(static_cast<os_offset_t>(n_bytes), iter.m_end - offset));
8634 
8635     ut_ad(n_bytes > 0);
8636     ut_ad(!(n_bytes % iter.m_page_size));
8637 
8638     dberr_t err;
8639     IORequest read_request(read_type);
8640 
8641     /* For encrypted table, set encryption information. */
8642     if (iter.m_encryption_key != nullptr && offset != 0) {
8643       read_request.encryption_key(iter.m_encryption_key, Encryption::KEY_LEN,
8644                                   iter.m_encryption_iv);
8645 
8646       read_request.encryption_algorithm(Encryption::AES);
8647     }
8648 
8649     err = os_file_read(read_request, iter.m_filepath, iter.m_file, io_buffer,
8650                        offset, (ulint)n_bytes);
8651 
8652     if (err != DB_SUCCESS) {
8653       ib::error(ER_IB_MSG_335) << "os_file_read() failed";
8654 
8655       return (err);
8656     }
8657 
8658     size_t n_pages_read;
8659     bool updated = false;
8660     os_offset_t page_off = offset;
8661 
8662     n_pages_read = (ulint)n_bytes / iter.m_page_size;
8663 
8664     for (size_t i = 0; i < n_pages_read; ++i) {
8665       buf_block_set_file_page(block, page_id_t(space_id, page_no++));
8666 
8667       /* We are going to modify the page. Add to page tracking system. */
8668       arch_page_sys->track_page(&block->page, LSN_MAX, LSN_MAX, true);
8669 
8670       if ((err = callback(page_off, block)) != DB_SUCCESS) {
8671         return (err);
8672 
8673       } else if (!updated) {
8674         updated = buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE;
8675       }
8676 
8677       buf_block_set_state(block, BUF_BLOCK_NOT_USED);
8678       buf_block_set_state(block, BUF_BLOCK_READY_FOR_USE);
8679 
8680       page_off += iter.m_page_size;
8681       block->frame += iter.m_page_size;
8682     }
8683 
8684     IORequest write_request(write_type);
8685 
8686     /* For encrypted table, set encryption information. */
8687     if (iter.m_encryption_key != nullptr && offset != 0) {
8688       write_request.encryption_key(iter.m_encryption_key, Encryption::KEY_LEN,
8689                                    iter.m_encryption_iv);
8690 
8691       write_request.encryption_algorithm(Encryption::AES);
8692     }
8693 
8694     /* A page was updated in the set, write back to disk.
8695     Note: We don't have the compression algorithm, we write
8696     out the imported file as uncompressed. */
8697 
8698     if (updated && (err = os_file_write(write_request, iter.m_filepath,
8699                                         iter.m_file, io_buffer, offset,
8700                                         (ulint)n_bytes)) != DB_SUCCESS) {
8701       /* This is not a hard error */
8702       if (err == DB_IO_NO_PUNCH_HOLE) {
8703         err = DB_SUCCESS;
8704         write_type &= ~IORequest::PUNCH_HOLE;
8705 
8706       } else {
8707         ib::error(ER_IB_MSG_336) << "os_file_write() failed";
8708 
8709         return (err);
8710       }
8711     }
8712   }
8713 
8714   return (DB_SUCCESS);
8715 }
8716 
fil_adjust_name_import(dict_table_t * table,const char * path,ib_file_suffix extn)8717 void fil_adjust_name_import(dict_table_t *table, const char *path,
8718                             ib_file_suffix extn) {
8719   /* Try to open with current name first. */
8720   if (os_file_exists(path)) {
8721     return;
8722   }
8723 
8724   /* On failure we need to check if file exists in different letter case
8725   for partitioned table. */
8726 #ifdef _WIN32
8727   /* Safe check. Never needed on Windows. */
8728   return;
8729 #endif /* WIN32 */
8730 
8731   /* Needed only for case sensitive file system. */
8732   if (lower_case_file_system) {
8733     return;
8734   }
8735 
8736   /* Only needed for partition file. */
8737   if (!dict_name::is_partition(table->name.m_name)) {
8738     return;
8739   }
8740 
8741   /* Get Import directory path. */
8742   std::string import_dir(path);
8743   Fil_path::normalize(import_dir);
8744 
8745   auto pos = import_dir.find_last_of(Fil_path::SEPARATOR);
8746   if (pos == std::string::npos) {
8747     import_dir.assign(Fil_path::DOT_SLASH);
8748 
8749   } else {
8750     import_dir.resize(pos + 1);
8751     ut_ad(Fil_path::is_separator(import_dir.back()));
8752   }
8753 
8754   /* Walk through all files under the directory and match the import file
8755   after adjusting case. This is a safe check to allow files exported from
8756   earlier versions where the case for partition name and separator could
8757   be different. */
8758   bool found_path = false;
8759   std::string saved_path;
8760 
8761   Dir_Walker::walk(import_dir, false, [&](const std::string &file_path) {
8762     /* Skip entry if already found. */
8763     if (found_path) {
8764       return;
8765     }
8766     /* Check only for partition files. */
8767     if (!dict_name::is_partition(file_path)) {
8768       return;
8769     }
8770 
8771     /* Extract table name from path. */
8772     std::string table_name;
8773     if (!Fil_path::parse_file_path(file_path, extn, table_name)) {
8774       /* Not a valid file-per-table path */
8775       return;
8776     }
8777 
8778     /* Check if the file name would match after correcting the case. */
8779     dict_name::rebuild(table_name);
8780     if (table_name.compare(table->name.m_name) != 0) {
8781       return;
8782     }
8783 
8784     saved_path.assign(file_path);
8785     found_path = true;
8786   });
8787 
8788   return;
8789 }
8790 
8791 /** Iterate over all the pages in the tablespace.
8792 @param[in,out]	table		the table definiton in the server
8793 @param[in]	n_io_buffers	number of blocks to read and write together
8794 @param[in]	callback	functor that will do the page updates
8795 @return DB_SUCCESS or error code */
fil_tablespace_iterate(dict_table_t * table,ulint n_io_buffers,PageCallback & callback)8796 dberr_t fil_tablespace_iterate(dict_table_t *table, ulint n_io_buffers,
8797                                PageCallback &callback) {
8798   dberr_t err;
8799   pfs_os_file_t file;
8800   char *filepath;
8801   bool success;
8802 
8803   ut_a(n_io_buffers > 0);
8804   ut_ad(!srv_read_only_mode);
8805 
8806   DBUG_EXECUTE_IF("ib_import_trigger_corruption_1", return (DB_CORRUPTION););
8807 
8808   /* Make sure the data_dir_path is set. */
8809   dd_get_and_save_data_dir_path<dd::Table>(table, nullptr, false);
8810 
8811   std::string path = dict_table_get_datadir(table);
8812 
8813   filepath = Fil_path::make(path, table->name.m_name, IBD, true);
8814 
8815   if (filepath == nullptr) {
8816     return (DB_OUT_OF_MEMORY);
8817   }
8818 
8819   /* Adjust filename for partition file if in different letter case. */
8820   fil_adjust_name_import(table, filepath, IBD);
8821 
8822   file = os_file_create_simple_no_error_handling(
8823       innodb_data_file_key, filepath, OS_FILE_OPEN, OS_FILE_READ_WRITE,
8824       srv_read_only_mode, &success);
8825 
8826   DBUG_EXECUTE_IF("fil_tablespace_iterate_failure", {
8827     static bool once;
8828 
8829     if (!once || ut_rnd_interval(0, 10) == 5) {
8830       once = true;
8831       success = false;
8832       os_file_close(file);
8833     }
8834   });
8835 
8836   if (!success) {
8837     /* The following call prints an error message */
8838     os_file_get_last_error(true);
8839 
8840     ib::error(ER_IB_MSG_337) << "Trying to import a tablespace, but could not"
8841                                 " open the tablespace file "
8842                              << filepath;
8843 
8844     ut_free(filepath);
8845 
8846     return (DB_TABLESPACE_NOT_FOUND);
8847 
8848   } else {
8849     err = DB_SUCCESS;
8850   }
8851 
8852   callback.set_file(filepath, file);
8853 
8854   os_offset_t file_size = os_file_get_size(file);
8855   ut_a(file_size != (os_offset_t)-1);
8856 
8857   /* The block we will use for every physical page */
8858   buf_block_t *block;
8859 
8860   block = reinterpret_cast<buf_block_t *>(ut_zalloc_nokey(sizeof(*block)));
8861 
8862   mutex_create(LATCH_ID_BUF_BLOCK_MUTEX, &block->mutex);
8863 
8864   /* Allocate a page to read in the tablespace header, so that we
8865   can determine the page size and zip size (if it is compressed).
8866   We allocate an extra page in case it is a compressed table. One
8867   page is to ensure alignement. */
8868 
8869   void *page_ptr = ut_malloc_nokey(3 * UNIV_PAGE_SIZE);
8870   byte *page = static_cast<byte *>(ut_align(page_ptr, UNIV_PAGE_SIZE));
8871 
8872   fil_buf_block_init(block, page);
8873 
8874   /* Read the first page and determine the page and zip size. */
8875 
8876   IORequest request(IORequest::READ);
8877 
8878   err = os_file_read_first_page(request, path.c_str(), file, page,
8879                                 UNIV_PAGE_SIZE);
8880 
8881   if (err != DB_SUCCESS) {
8882     err = DB_IO_ERROR;
8883 
8884   } else if ((err = callback.init(file_size, block)) == DB_SUCCESS) {
8885     Fil_page_iterator iter;
8886 
8887     iter.m_file = file;
8888     iter.m_start = 0;
8889     iter.m_end = file_size;
8890     iter.m_filepath = filepath;
8891     iter.m_file_size = file_size;
8892     iter.m_n_io_buffers = n_io_buffers;
8893     iter.m_page_size = callback.get_page_size().physical();
8894 
8895     /* Set encryption info. */
8896     iter.m_encryption_key = table->encryption_key;
8897     iter.m_encryption_iv = table->encryption_iv;
8898 
8899     /* Check encryption is matched or not. */
8900     ulint space_flags = callback.get_space_flags();
8901 
8902     if (FSP_FLAGS_GET_ENCRYPTION(space_flags)) {
8903       if (!dd_is_table_in_encrypted_tablespace(table)) {
8904         ib::error(ER_IB_MSG_338) << "Table is not in an encrypted tablespace,"
8905                                     " but the data file intended for import"
8906                                     " is an encrypted tablespace";
8907 
8908         err = DB_IO_NO_ENCRYPT_TABLESPACE;
8909       } else {
8910         /* encryption_key must have been populated while reading CFP file. */
8911         ut_ad(table->encryption_key != nullptr &&
8912               table->encryption_iv != nullptr);
8913 
8914         if (table->encryption_key == nullptr ||
8915             table->encryption_iv == nullptr) {
8916           err = DB_ERROR;
8917         }
8918       }
8919     }
8920 
8921     if (err == DB_SUCCESS) {
8922       /* Compressed pages can't be optimised for block IO
8923       for now.  We do the IMPORT page by page. */
8924 
8925       if (callback.get_page_size().is_compressed()) {
8926         iter.m_n_io_buffers = 1;
8927         ut_a(iter.m_page_size == callback.get_page_size().physical());
8928       }
8929 
8930       /** Add an extra page for compressed page scratch
8931       area. */
8932       void *io_buffer =
8933           ut_malloc_nokey((2 + iter.m_n_io_buffers) * UNIV_PAGE_SIZE);
8934 
8935       iter.m_io_buffer =
8936           static_cast<byte *>(ut_align(io_buffer, UNIV_PAGE_SIZE));
8937 
8938       err = fil_iterate(iter, block, callback);
8939 
8940       ut_free(io_buffer);
8941     }
8942   }
8943 
8944   if (err == DB_SUCCESS) {
8945     ib::info(ER_IB_MSG_339) << "Sync to disk";
8946 
8947     if (!os_file_flush(file)) {
8948       ib::info(ER_IB_MSG_340) << "os_file_flush() failed!";
8949       err = DB_IO_ERROR;
8950     } else {
8951       ib::info(ER_IB_MSG_341) << "Sync to disk - done!";
8952     }
8953   }
8954 
8955   os_file_close(file);
8956 
8957   ut_free(page_ptr);
8958   ut_free(filepath);
8959 
8960   mutex_free(&block->mutex);
8961 
8962   ut_free(block);
8963 
8964   return (err);
8965 }
8966 #endif /* !UNIV_HOTBACKUP */
8967 
8968 /** Set the tablespace table size.
8969 @param[in]	page	a page belonging to the tablespace */
set_page_size(const buf_frame_t * page)8970 void PageCallback::set_page_size(const buf_frame_t *page) UNIV_NOTHROW {
8971   m_page_size.copy_from(fsp_header_get_page_size(page));
8972 }
8973 
8974 /** Delete the tablespace file and any related files like .cfg.
8975 This should not be called for temporary tables.
8976 @param[in]	path		File path of the IBD tablespace
8977 @return true on success */
fil_delete_file(const char * path)8978 bool fil_delete_file(const char *path) {
8979   bool success = true;
8980 
8981   /* Force a delete of any stale .ibd files that are lying around. */
8982   success = os_file_delete_if_exists(innodb_data_file_key, path, nullptr);
8983 
8984   char *cfg_filepath = Fil_path::make_cfg(path);
8985 
8986   if (cfg_filepath != nullptr) {
8987     os_file_delete_if_exists(innodb_data_file_key, cfg_filepath, nullptr);
8988 
8989     ut_free(cfg_filepath);
8990   }
8991 
8992   char *cfp_filepath = Fil_path::make_cfp(path);
8993 
8994   if (cfp_filepath != nullptr) {
8995     os_file_delete_if_exists(innodb_data_file_key, cfp_filepath, nullptr);
8996 
8997     ut_free(cfp_filepath);
8998   }
8999 
9000   return (success);
9001 }
9002 
9003 #ifndef UNIV_HOTBACKUP
9004 /** Check if swapping two .ibd files can be done without failure.
9005 @param[in]	old_table	old table
9006 @param[in]	new_table	new table
9007 @param[in]	tmp_name	temporary table name
9008 @return innodb error code */
fil_rename_precheck(const dict_table_t * old_table,const dict_table_t * new_table,const char * tmp_name)9009 dberr_t fil_rename_precheck(const dict_table_t *old_table,
9010                             const dict_table_t *new_table,
9011                             const char *tmp_name) {
9012   dberr_t err;
9013 
9014   bool old_is_file_per_table = dict_table_is_file_per_table(old_table);
9015 
9016   bool new_is_file_per_table = dict_table_is_file_per_table(new_table);
9017 
9018   /* If neither table is file-per-table,
9019   there will be no renaming of files. */
9020   if (!old_is_file_per_table && !new_is_file_per_table) {
9021     return (DB_SUCCESS);
9022   }
9023 
9024   auto old_dir = dict_table_get_datadir(old_table);
9025 
9026   char *old_path =
9027       Fil_path::make(old_dir, old_table->name.m_name, IBD, !old_dir.empty());
9028 
9029   if (old_path == nullptr) {
9030     return (DB_OUT_OF_MEMORY);
9031   }
9032 
9033   if (old_is_file_per_table) {
9034     char *tmp_path = Fil_path::make(old_dir, tmp_name, IBD, !old_dir.empty());
9035 
9036     if (tmp_path == nullptr) {
9037       ut_free(old_path);
9038       return (DB_OUT_OF_MEMORY);
9039     }
9040 
9041     /* Temp filepath must not exist. */
9042     err = fil_rename_tablespace_check(old_table->space, old_path, tmp_path,
9043                                       dict_table_is_discarded(old_table));
9044 
9045     if (err != DB_SUCCESS) {
9046       ut_free(old_path);
9047       ut_free(tmp_path);
9048       return (err);
9049     }
9050 
9051     ut_free(tmp_path);
9052   }
9053 
9054   if (new_is_file_per_table) {
9055     auto new_dir = dict_table_get_datadir(new_table);
9056 
9057     char *new_path =
9058         Fil_path::make(new_dir, new_table->name.m_name, IBD, !new_dir.empty());
9059 
9060     if (new_path == nullptr) {
9061       ut_free(old_path);
9062       return (DB_OUT_OF_MEMORY);
9063     }
9064 
9065     /* Destination filepath must not exist unless this ALTER
9066     TABLE starts and ends with a file_per-table tablespace. */
9067     if (!old_is_file_per_table) {
9068       err = fil_rename_tablespace_check(new_table->space, new_path, old_path,
9069                                         dict_table_is_discarded(new_table));
9070 
9071       if (err != DB_SUCCESS) {
9072         ut_free(old_path);
9073         ut_free(new_path);
9074         return (err);
9075       }
9076     }
9077 
9078     ut_free(new_path);
9079   }
9080 
9081   ut_free(old_path);
9082 
9083   return (DB_SUCCESS);
9084 }
9085 #endif /* !UNIV_HOTBACKUP */
9086 
9087 /** Note that the file system where the file resides doesn't support PUNCH HOLE.
9088 Called from AIO handlers when IO returns DB_IO_NO_PUNCH_HOLE
9089 @param[in,out]	file		file to set */
fil_no_punch_hole(fil_node_t * file)9090 void fil_no_punch_hole(fil_node_t *file) { file->punch_hole = false; }
9091 
9092 /** Set the compression type for the tablespace of a table
9093 @param[in]	table		The table that should be compressed
9094 @param[in]	algorithm	Text representation of the algorithm
9095 @return DB_SUCCESS or error code */
fil_set_compression(dict_table_t * table,const char * algorithm)9096 dberr_t fil_set_compression(dict_table_t *table, const char *algorithm) {
9097   ut_ad(table != nullptr);
9098 
9099   /* We don't support Page Compression for the system tablespace,
9100   the temporary tablespace, or any general tablespace because
9101   COMPRESSION is set by TABLE DDL, not TABLESPACE DDL. There is
9102   no other technical reason.  Also, do not use it for missing
9103   tables or tables with compressed row_format. */
9104   if (table->ibd_file_missing ||
9105       !DICT_TF2_FLAG_IS_SET(table, DICT_TF2_USE_FILE_PER_TABLE) ||
9106       DICT_TF2_FLAG_IS_SET(table, DICT_TF2_TEMPORARY) ||
9107       page_size_t(table->flags).is_compressed()) {
9108     return (DB_IO_NO_PUNCH_HOLE_TABLESPACE);
9109   }
9110 
9111   dberr_t err;
9112   Compression compression;
9113 
9114   if (algorithm == nullptr || strlen(algorithm) == 0) {
9115 #ifndef UNIV_DEBUG
9116     compression.m_type = Compression::NONE;
9117 #else /* UNIV_DEBUG */
9118     /* This is a Debug tool for setting compression on all
9119     compressible tables not otherwise specified. */
9120     switch (srv_debug_compress) {
9121       case Compression::LZ4:
9122       case Compression::ZLIB:
9123       case Compression::NONE:
9124 
9125         compression.m_type = static_cast<Compression::Type>(srv_debug_compress);
9126         break;
9127 
9128       default:
9129         compression.m_type = Compression::NONE;
9130     }
9131 
9132 #endif /* UNIV_DEBUG */
9133 
9134     err = DB_SUCCESS;
9135 
9136   } else {
9137     err = Compression::check(algorithm, &compression);
9138   }
9139 
9140   fil_space_t *space = fil_space_get(table->space);
9141 
9142   if (space == nullptr) {
9143     return (DB_NOT_FOUND);
9144   }
9145 
9146   space->compression_type = compression.m_type;
9147 
9148   if (space->compression_type != Compression::NONE) {
9149     if (!space->files.front().punch_hole) {
9150       return (DB_IO_NO_PUNCH_HOLE_FS);
9151     }
9152   }
9153 
9154   return (err);
9155 }
9156 
9157 /** Get the compression algorithm for a tablespace.
9158 @param[in]	space_id	Space ID to check
9159 @return the compression algorithm */
fil_get_compression(space_id_t space_id)9160 Compression::Type fil_get_compression(space_id_t space_id) {
9161   fil_space_t *space = fil_space_get(space_id);
9162 
9163   return (space == nullptr ? Compression::NONE : space->compression_type);
9164 }
9165 
9166 /** Set the encryption type for the tablespace
9167 @param[in] space_id		Space ID of tablespace for which to set
9168 @param[in] algorithm		Encryption algorithm
9169 @param[in] key			Encryption key
9170 @param[in] iv			Encryption iv
9171 @return DB_SUCCESS or error code */
fil_set_encryption(space_id_t space_id,Encryption::Type algorithm,byte * key,byte * iv)9172 dberr_t fil_set_encryption(space_id_t space_id, Encryption::Type algorithm,
9173                            byte *key, byte *iv) {
9174   auto shard = fil_system->shard_by_id(space_id);
9175 
9176   shard->mutex_acquire();
9177 
9178   fil_space_t *space = shard->get_space_by_id(space_id);
9179 
9180   if (space == nullptr) {
9181     shard->mutex_release();
9182     return (DB_NOT_FOUND);
9183   }
9184 
9185   if (key == nullptr) {
9186     Encryption::random_value(space->encryption_key);
9187   } else {
9188     memcpy(space->encryption_key, key, Encryption::KEY_LEN);
9189   }
9190 
9191   space->encryption_klen = Encryption::KEY_LEN;
9192 
9193   if (iv == nullptr) {
9194     Encryption::random_value(space->encryption_iv);
9195   } else {
9196     memcpy(space->encryption_iv, iv, Encryption::KEY_LEN);
9197   }
9198 
9199   ut_ad(algorithm != Encryption::NONE);
9200   space->encryption_type = algorithm;
9201 
9202   shard->mutex_release();
9203 
9204   return (DB_SUCCESS);
9205 }
9206 
9207 /** Reset the encryption type for the tablespace
9208 @param[in] space_id		Space ID of tablespace for which to set
9209 @return DB_SUCCESS or error code */
fil_reset_encryption(space_id_t space_id)9210 dberr_t fil_reset_encryption(space_id_t space_id) {
9211   ut_ad(space_id != TRX_SYS_SPACE);
9212 
9213   if (fsp_is_system_or_temp_tablespace(space_id)) {
9214     return (DB_IO_NO_ENCRYPT_TABLESPACE);
9215   }
9216 
9217   auto shard = fil_system->shard_by_id(space_id);
9218 
9219   shard->mutex_acquire();
9220 
9221   fil_space_t *space = shard->get_space_by_id(space_id);
9222 
9223   if (space == nullptr) {
9224     shard->mutex_release();
9225     return (DB_NOT_FOUND);
9226   }
9227 
9228   memset(space->encryption_key, 0, Encryption::KEY_LEN);
9229   space->encryption_klen = 0;
9230 
9231   memset(space->encryption_iv, 0, Encryption::KEY_LEN);
9232 
9233   space->encryption_type = Encryption::NONE;
9234 
9235   shard->mutex_release();
9236 
9237   return (DB_SUCCESS);
9238 }
9239 
9240 #ifndef UNIV_HOTBACKUP
9241 /** Rotate the tablespace keys by new master key.
9242 @param[in,out]	shard		Rotate the keys in this shard
9243 @return true if the re-encrypt succeeds */
encryption_rotate_in_a_shard(Fil_shard * shard)9244 bool Fil_system::encryption_rotate_in_a_shard(Fil_shard *shard) {
9245   byte encrypt_info[Encryption::INFO_SIZE];
9246 
9247   for (auto &elem : shard->m_spaces) {
9248     auto space = elem.second;
9249 
9250     /* Skip unencypted tablespaces. Encrypted redo log
9251     tablespaces is handled in function log_rotate_encryption. */
9252 
9253     if (fsp_is_system_or_temp_tablespace(space->id) ||
9254         space->purpose == FIL_TYPE_LOG) {
9255       continue;
9256     }
9257 
9258     /* Skip the undo tablespace when it's in default key status,
9259     since it's the first server startup after bootstrap, and the
9260     server uuid is not ready yet. */
9261 
9262     if (fsp_is_undo_tablespace(space->id) &&
9263         Encryption::get_master_key_id() == Encryption::DEFAULT_MASTER_KEY_ID) {
9264       continue;
9265     }
9266 
9267     /* Rotate the encrypted tablespaces. */
9268     if (space->encryption_type != Encryption::NONE) {
9269       memset(encrypt_info, 0, Encryption::INFO_SIZE);
9270 
9271       MDL_ticket *mdl_ticket = nullptr;
9272 #if !defined(XTRABACKUP)
9273       /* Take MDL on UNDO tablespace to make it mutually exclusive with
9274       UNDO tablespace truncation. For other tablespaces MDL is not required
9275       here. */
9276       if (fsp_is_undo_tablespace(space->id)) {
9277         THD *thd = current_thd;
9278         while (
9279             acquire_shared_backup_lock(thd, thd->variables.lock_wait_timeout)) {
9280           os_thread_sleep(20);
9281         }
9282 
9283         while (dd::acquire_exclusive_tablespace_mdl(thd, space->name, false,
9284                                                     &mdl_ticket, false)) {
9285           os_thread_sleep(20);
9286         }
9287         ut_ad(mdl_ticket != nullptr);
9288       }
9289 #endif
9290 
9291       mtr_t mtr;
9292       mtr_start(&mtr);
9293       bool ret = fsp_header_rotate_encryption(space, encrypt_info, &mtr);
9294       mtr_commit(&mtr);
9295 
9296       if (mdl_ticket != nullptr) {
9297         dd_release_mdl(mdl_ticket);
9298       }
9299       if (!ret) {
9300         return (false);
9301       }
9302     }
9303 
9304     DBUG_EXECUTE_IF("ib_crash_during_rotation_for_encryption", DBUG_SUICIDE(););
9305   }
9306 
9307   return (true);
9308 }
9309 
9310 /** Rotate the tablespace keys by new master key.
9311 @return true if the re-encrypt succeeds */
encryption_rotate_all()9312 bool Fil_system::encryption_rotate_all() {
9313   for (auto shard : m_shards) {
9314     // FIXME: We don't acquire the fil_sys::mutex here. Why?
9315 
9316     bool success = encryption_rotate_in_a_shard(shard);
9317 
9318     if (!success) {
9319       return (false);
9320     }
9321   }
9322 
9323   return (true);
9324 }
9325 
9326 /** Rotate the tablespace keys by new master key.
9327 @return true if the re-encrypt succeeds */
fil_encryption_rotate()9328 bool fil_encryption_rotate() { return (fil_system->encryption_rotate_all()); }
9329 
9330 #endif /* !UNIV_HOTBACKUP */
9331 
9332 /** Constructor
9333 @param[in]  path            pathname (may also include the file basename)
9334 @param[in]  normalize_path  If false, it's the callers responsibility to
9335                             ensure that the path is normalized. */
Fil_path(const std::string & path,bool normalize_path)9336 Fil_path::Fil_path(const std::string &path, bool normalize_path)
9337     : m_path(path) {
9338   if (normalize_path) {
9339     normalize(m_path);
9340   }
9341 
9342   m_abs_path = get_real_path(m_path, false);
9343 }
9344 
9345 /** Constructor
9346 @param[in]  path            pathname (may also include the file basename)
9347 @param[in]  normalize_path  If false, it's the callers responsibility to
9348                             ensure that the path is normalized. */
Fil_path(const char * path,bool normalize_path)9349 Fil_path::Fil_path(const char *path, bool normalize_path) : m_path(path) {
9350   if (normalize_path) {
9351     normalize(m_path);
9352   }
9353 
9354   m_abs_path = get_real_path(m_path, false);
9355 }
9356 
9357 /** Constructor
9358 @param[in]  path            pathname (may also include the file basename)
9359 @param[in]  len             Length of path
9360 @param[in]  normalize_path  If false, it's the callers responsibility to
9361                             ensure that the path is normalized. */
Fil_path(const char * path,size_t len,bool normalize_path)9362 Fil_path::Fil_path(const char *path, size_t len, bool normalize_path)
9363     : m_path(path, len) {
9364   if (normalize_path) {
9365     normalize(m_path);
9366   }
9367 
9368   m_abs_path = get_real_path(m_path, false);
9369 }
9370 
9371 /** Default constructor. */
Fil_path()9372 Fil_path::Fil_path() : m_path(), m_abs_path() { /* No op */
9373 }
9374 
is_same_as(const Fil_path & other) const9375 bool Fil_path::is_same_as(const Fil_path &other) const {
9376   if (path().empty() || other.path().empty()) {
9377     return (false);
9378   }
9379 
9380   std::string first = abs_path();
9381   trim_separator(first);
9382 
9383   std::string second = other.abs_path();
9384   trim_separator(second);
9385 
9386   return (first == second);
9387 }
9388 
is_same_as(const std::string & other) const9389 bool Fil_path::is_same_as(const std::string &other) const {
9390   if (path().empty() || other.empty()) {
9391     return (false);
9392   }
9393 
9394   Fil_path other_path(other);
9395 
9396   return (is_same_as(other_path));
9397 }
9398 
is_ancestor(const Fil_path & other) const9399 bool Fil_path::is_ancestor(const Fil_path &other) const {
9400   if (path().empty() || other.path().empty()) {
9401     return (false);
9402   }
9403 
9404   std::string ancestor = abs_path();
9405   std::string descendant = other.abs_path();
9406 
9407   /* We do not know if the descendant is a dir or a file.
9408   But the ancestor in this routine is always a directory.
9409   If it does not yet exist, it may not have a trailing separator.
9410   If there is no trailing separator, add it. */
9411   append_separator(ancestor);
9412 
9413   if (descendant.length() <= ancestor.length()) {
9414     return (false);
9415   }
9416 
9417   return (std::equal(ancestor.begin(), ancestor.end(), descendant.begin()));
9418 }
9419 
is_ancestor(const std::string & other) const9420 bool Fil_path::is_ancestor(const std::string &other) const {
9421   if (path().empty() || other.empty()) {
9422     return (false);
9423   }
9424 
9425   Fil_path descendant(other);
9426 
9427   return (is_ancestor(descendant));
9428 }
9429 
is_hidden(std::string path)9430 bool Fil_path::is_hidden(std::string path) {
9431   std::string basename(path);
9432   while (!basename.empty()) {
9433     char c = basename.back();
9434     if (!(Fil_path::is_separator(c) || c == '*')) {
9435       break;
9436     }
9437     basename.resize(basename.size() - 1);
9438   }
9439   auto sep = basename.find_last_of(SEPARATOR);
9440 
9441   return (sep != std::string::npos && basename[sep + 1] == '.');
9442 }
9443 
9444 #ifdef _WIN32
is_hidden(WIN32_FIND_DATA & dirent)9445 bool Fil_path::is_hidden(WIN32_FIND_DATA &dirent) {
9446   if (dirent.dwFileAttributes & FILE_ATTRIBUTE_HIDDEN ||
9447       dirent.dwFileAttributes & FILE_ATTRIBUTE_SYSTEM) {
9448     return (true);
9449   }
9450 
9451   return (false);
9452 }
9453 #endif /* WIN32 */
9454 
9455 /** @return true if the path exists and is a file . */
get_file_type(const std::string & path)9456 os_file_type_t Fil_path::get_file_type(const std::string &path) {
9457   os_file_type_t type;
9458 
9459   os_file_status(path.c_str(), nullptr, &type);
9460 
9461   return (type);
9462 }
9463 
9464 /** Return a string to display the file type of a path.
9465 @param[in]  path  path name
9466 @return true if the path exists and is a file . */
get_file_type_string(const std::string & path)9467 const char *Fil_path::get_file_type_string(const std::string &path) {
9468   return (get_file_type_string(Fil_path::get_file_type(path)));
9469 }
9470 
9471 /** Return a string to display the file type of a path.
9472 @param[in]  type  OS file type
9473 @return true if the path exists and is a file . */
get_file_type_string(os_file_type_t type)9474 const char *Fil_path::get_file_type_string(os_file_type_t type) {
9475   switch (type) {
9476     case OS_FILE_TYPE_FILE:
9477       return ("file");
9478     case OS_FILE_TYPE_LINK:
9479       return ("symbolic link");
9480     case OS_FILE_TYPE_DIR:
9481       return ("directory");
9482     case OS_FILE_TYPE_BLOCK:
9483       return ("block device");
9484     case OS_FILE_TYPE_NAME_TOO_LONG:
9485       return ("name too long");
9486     case OS_FILE_PERMISSION_ERROR:
9487       return ("permission error");
9488     case OS_FILE_TYPE_MISSING:
9489       return ("missing");
9490     case OS_FILE_TYPE_UNKNOWN:
9491     case OS_FILE_TYPE_FAILED:
9492       break;
9493   }
9494   return ("unknown");
9495 }
9496 
9497 /** @return true if the path exists and is a file . */
is_file_and_exists() const9498 bool Fil_path::is_file_and_exists() const {
9499   return (get_file_type(abs_path()) == OS_FILE_TYPE_FILE);
9500 }
9501 
9502 /** @return true if the path exists and is a directory. */
is_directory_and_exists() const9503 bool Fil_path::is_directory_and_exists() const {
9504   return (get_file_type(abs_path()) == OS_FILE_TYPE_DIR);
9505 }
9506 
9507 /** This validation is only for ':'.
9508 @return true if the path is valid. */
is_valid() const9509 bool Fil_path::is_valid() const {
9510   auto count = std::count(m_path.begin(), m_path.end(), ':');
9511 
9512   if (count == 0) {
9513     return (true);
9514   }
9515 
9516 #ifdef _WIN32
9517   /* Do not allow names like "C:name.ibd" because it
9518   specifies the "C:" drive but allows a relative location.
9519   It should be like "c:\". If a single colon is used it
9520   must be the second byte and the third byte must be a
9521   separator. */
9522 
9523   /* 8 == strlen("c:\a,ibd") */
9524   if (count == 1 && m_path.length() >= 8 && isalpha(m_path.at(0)) &&
9525       m_path.at(1) == ':' && (m_path.at(2) == '\\' || m_path.at(2) == '/')) {
9526     return (true);
9527   }
9528 #endif /* _WIN32 */
9529 
9530   return (false);
9531 }
9532 
is_circular() const9533 bool Fil_path::is_circular() const {
9534   size_t first;
9535 
9536   /* Find the first named directory.  It is OK for a path to
9537   start with "../../../dir". */
9538   for (first = 0; m_path[first] == OS_SEPARATOR || m_path[first] == '.';
9539        ++first)
9540     ;
9541 
9542   size_t back_up = m_path.find(SLASH_DOT_DOT_SLASH, first);
9543   if (back_up == std::string::npos) {
9544     return (false);
9545   }
9546 
9547 #ifndef _WIN32
9548   /* If the path contains a symlink before the /../ and the platform
9549   is not Windows, then '/../' does not go bback through the symlink,
9550   so it is not circular.  It refers to the parent of the symlinked
9551   location and we must allow it. On Windows, it backs up to the directory
9552   where the symlink starts, which is a circular reference. */
9553   std::string up_path = m_path.substr(0, back_up);
9554   if (my_is_symlink(up_path.c_str(), nullptr)) {
9555     return (false);
9556   }
9557 #endif /* _WIN32 */
9558 
9559   return (true);
9560 }
9561 
9562 /** Sets the flags of the tablespace. The tablespace must be locked
9563 in MDL_EXCLUSIVE MODE.
9564 @param[in]	space	tablespace in-memory struct
9565 @param[in]	flags	tablespace flags */
fil_space_set_flags(fil_space_t * space,uint32_t flags)9566 void fil_space_set_flags(fil_space_t *space, uint32_t flags) {
9567   ut_ad(fsp_flags_is_valid(flags));
9568 
9569   rw_lock_x_lock(&space->latch);
9570 
9571   ut_a(flags < std::numeric_limits<uint32_t>::max());
9572   space->flags = (uint32_t)flags;
9573 
9574   rw_lock_x_unlock(&space->latch);
9575 }
9576 
9577 /* Unit Tests */
9578 #ifdef UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH
9579 #define MF Fil_path::make
9580 #define DISPLAY ib::info(ER_IB_MSG_342) << path
test_make_filepath()9581 void test_make_filepath() {
9582   char *path;
9583   const char *long_path =
9584       "this/is/a/very/long/path/including/a/very/"
9585       "looooooooooooooooooooooooooooooooooooooooooooooooo"
9586       "oooooooooooooooooooooooooooooooooooooooooooooooooo"
9587       "oooooooooooooooooooooooooooooooooooooooooooooooooo"
9588       "oooooooooooooooooooooooooooooooooooooooooooooooooo"
9589       "oooooooooooooooooooooooooooooooooooooooooooooooooo"
9590       "oooooooooooooooooooooooooooooooooooooooooooooooooo"
9591       "oooooooooooooooooooooooooooooooooooooooooooooooooo"
9592       "oooooooooooooooooooooooooooooooooooooooooooooooooo"
9593       "oooooooooooooooooooooooooooooooooooooooooooooooooo"
9594       "oooooooooooooooooooooooooooooooooooooooooooooooong"
9595       "/folder/name";
9596   path = MF("/this/is/a/path/with/a/filename", nullptr, IBD, false);
9597   DISPLAY;
9598   path = MF("/this/is/a/path/with/a/filename", nullptr, ISL, false);
9599   DISPLAY;
9600   path = MF("/this/is/a/path/with/a/filename", nullptr, CFG, false);
9601   DISPLAY;
9602   path = MF("/this/is/a/path/with/a/filename", nullptr, CFP, false);
9603   DISPLAY;
9604   path = MF("/this/is/a/path/with/a/filename.ibd", nullptr, IBD, false);
9605   DISPLAY;
9606   path = MF("/this/is/a/path/with/a/filename.ibd", nullptr, IBD, false);
9607   DISPLAY;
9608   path = MF("/this/is/a/path/with/a/filename.dat", nullptr, IBD, false);
9609   DISPLAY;
9610   path = MF(nullptr, "tablespacename", NO_EXT, false);
9611   DISPLAY;
9612   path = MF(nullptr, "tablespacename", IBD, false);
9613   DISPLAY;
9614   path = MF(nullptr, "dbname/tablespacename", NO_EXT, false);
9615   DISPLAY;
9616   path = MF(nullptr, "dbname/tablespacename", IBD, false);
9617   DISPLAY;
9618   path = MF(nullptr, "dbname/tablespacename", ISL, false);
9619   DISPLAY;
9620   path = MF(nullptr, "dbname/tablespacename", CFG, false);
9621   DISPLAY;
9622   path = MF(nullptr, "dbname/tablespacename", CFP, false);
9623   DISPLAY;
9624   path = MF(nullptr, "dbname\\tablespacename", NO_EXT, false);
9625   DISPLAY;
9626   path = MF(nullptr, "dbname\\tablespacename", IBD, false);
9627   DISPLAY;
9628   path = MF("/this/is/a/path", "dbname/tablespacename", IBD, false);
9629   DISPLAY;
9630   path = MF("/this/is/a/path", "dbname/tablespacename", IBD, true);
9631   DISPLAY;
9632   path = MF("./this/is/a/path", "dbname/tablespacename.ibd", IBD, true);
9633   DISPLAY;
9634   path = MF("this\\is\\a\\path", "dbname/tablespacename", IBD, true);
9635   DISPLAY;
9636   path = MF("/this/is/a/path", "dbname\\tablespacename", IBD, true);
9637   DISPLAY;
9638   path = MF(long_path, nullptr, IBD, false);
9639   DISPLAY;
9640   path = MF(long_path, "tablespacename", IBD, false);
9641   DISPLAY;
9642   path = MF(long_path, "tablespacename", IBD, true);
9643   DISPLAY;
9644 }
9645 #endif /* UNIV_ENABLE_UNIT_TEST_MAKE_FILEPATH */
9646 
9647 /** Release the reserved free extents.
9648 @param[in]	n_reserved	number of reserved extents */
release_free_extents(ulint n_reserved)9649 void fil_space_t::release_free_extents(ulint n_reserved) {
9650 #ifndef UNIV_HOTBACKUP
9651   ut_ad(rw_lock_own(&latch, RW_LOCK_X));
9652 #endif /* !UNIV_HOTBACKUP */
9653 
9654   ut_a(n_reserved < std::numeric_limits<uint32_t>::max());
9655   ut_a(n_reserved_extents >= n_reserved);
9656 
9657   n_reserved_extents -= (uint32_t)n_reserved;
9658 }
9659 
9660 #ifndef UNIV_HOTBACKUP
9661 
9662 #ifdef UNIV_DEBUG
9663 
9664 /** Print the extent descriptor pages of this tablespace into
9665 the given file.
9666 @param[in]	filename	the output file name. */
print_xdes_pages(const char * filename) const9667 void fil_space_t::print_xdes_pages(const char *filename) const {
9668   std::ofstream out(filename);
9669   print_xdes_pages(out);
9670 }
9671 
9672 /** Print the extent descriptor pages of this tablespace into
9673 the given file.
9674 @param[in]	out	the output file name.
9675 @return	the output stream. */
print_xdes_pages(std::ostream & out) const9676 std::ostream &fil_space_t::print_xdes_pages(std::ostream &out) const {
9677   mtr_t mtr;
9678   const page_size_t page_size(flags);
9679 
9680   mtr_start(&mtr);
9681 
9682   for (page_no_t i = 0; i < 100; ++i) {
9683     page_no_t xdes_page_no = i * UNIV_PAGE_SIZE;
9684 
9685     if (xdes_page_no >= size) {
9686       break;
9687     }
9688 
9689     buf_block_t *xdes_block =
9690         buf_page_get(page_id_t(id, xdes_page_no), page_size, RW_S_LATCH, &mtr);
9691 
9692     page_t *page = buf_block_get_frame(xdes_block);
9693 
9694     ulint page_type = fil_page_get_type(page);
9695 
9696     switch (page_type) {
9697       case FIL_PAGE_TYPE_ALLOCATED:
9698 
9699         ut_ad(xdes_page_no >= free_limit);
9700 
9701         mtr_commit(&mtr);
9702         return (out);
9703 
9704       case FIL_PAGE_TYPE_FSP_HDR:
9705       case FIL_PAGE_TYPE_XDES:
9706         break;
9707       default:
9708         ut_error;
9709     }
9710 
9711     xdes_page_print(out, page, xdes_page_no, &mtr);
9712   }
9713 
9714   mtr_commit(&mtr);
9715   return (out);
9716 }
9717 #endif /* UNIV_DEBUG */
9718 
9719 /** Initialize the table space encryption
9720 @param[in,out]	space		Tablespace instance */
fil_tablespace_encryption_init(const fil_space_t * space)9721 static void fil_tablespace_encryption_init(const fil_space_t *space) {
9722   for (auto &key : *recv_sys->keys) {
9723     if (key.space_id != space->id) {
9724       continue;
9725     }
9726 
9727     dberr_t err = DB_SUCCESS;
9728 
9729     ut_ad(!fsp_is_system_tablespace(space->id));
9730 
9731     /* Here we try to populate space tablespace_key which is read during
9732     REDO scan.
9733 
9734     Consider following scenario:
9735     1. Alter tablespce .. encrypt=y (KEY1)
9736     2. Alter tablespce .. encrypt=n
9737     3. Alter tablespce .. encrypt=y (KEY2)
9738 
9739     Lets say there is a crash after (3) is finished successfully. All the pages
9740     of tablespace are encrypted with KEY2.
9741 
9742     During recovery:
9743     ----------------
9744     - Let's say we scanned till REDO of (1) but couldn't reach to REDO of (3).
9745     - So we've got tablespace key as KEY1.
9746     - Note, tablespace pages were encrypted using KEY2 which would have been
9747       found on page 0 and thus loaded already in file_space_t.
9748 
9749     If we overwrite this space key (KEY2) with the one we got from REDO log
9750     scan (KEY1), then when we try to read a page from Disk, we will try to
9751     decrypt it using KEY1 whereas page was encrypted with KEY2. ERROR.
9752 
9753     Therefore, for a general tablespace, if tablespace key is already populated
9754     it is the latest key and should be used instead of the one read during
9755     REDO log scan.
9756 
9757     For file-per-table tablespace, which is not INPLACE algorithm, copy what
9758     is found on REDO Log.
9759     */
9760     if (fsp_is_file_per_table(space->id, space->flags) ||
9761         space->encryption_klen == 0) {
9762       err = fil_set_encryption(space->id, Encryption::AES, key.ptr, key.iv);
9763     }
9764 
9765     if (err != DB_SUCCESS) {
9766       ib::error(ER_IB_MSG_343) << "Can't set encryption information"
9767                                << " for tablespace" << space->name << "!";
9768     }
9769   }
9770 }
9771 
9772 /** Modify table name in Innodb persistent stat tables, if needed. Required
9773 when partitioned table file names from old versions are modified to change
9774 the letter case.
9775 @param[in]	old_path	path to old file
9776 @param[in]	new_path	path to new file */
fil_adjust_partition_stat(const std::string & old_path,const std::string & new_path)9777 static void fil_adjust_partition_stat(const std::string &old_path,
9778                                       const std::string &new_path) {
9779   char errstr[FN_REFLEN];
9780   std::string path;
9781 
9782   /* Skip if not IBD file extension. */
9783   if (!Fil_path::has_suffix(IBD, old_path) ||
9784       !Fil_path::has_suffix(IBD, new_path)) {
9785     return;
9786   }
9787 
9788   /* Check if partitioned table. */
9789   if (!dict_name::is_partition(old_path) ||
9790       !dict_name::is_partition(new_path)) {
9791     return;
9792   }
9793 
9794   std::string old_name;
9795   path.assign(old_path);
9796   if (!Fil_path::parse_file_path(path, IBD, old_name)) {
9797     return;
9798   }
9799   ut_ad(!old_name.empty());
9800 
9801   std::string new_name;
9802   path.assign(new_path);
9803   if (!Fil_path::parse_file_path(path, IBD, new_name)) {
9804     return;
9805   }
9806   ut_ad(!new_name.empty());
9807 
9808   /* Required for case insensitive file system where file path letter case
9809   doesn't matter. We need to keep the name in stat table consistent. */
9810   dict_name::rebuild(new_name);
9811 
9812   if (old_name.compare(new_name) != 0) {
9813     dict_stats_rename_table(old_name.c_str(), new_name.c_str(), errstr,
9814                             sizeof(errstr));
9815   }
9816 }
9817 
9818 /** Update the DD if any files were moved to a new location.
9819 Free the Tablespace_files instance.
9820 @param[in]	read_only_mode	true if InnoDB is started in read only mode.
9821 @return DB_SUCCESS if all OK */
prepare_open_for_business(bool read_only_mode)9822 dberr_t Fil_system::prepare_open_for_business(bool read_only_mode) {
9823   if (read_only_mode && !m_moved.empty()) {
9824     ib::error(ER_IB_MSG_344)
9825         << m_moved.size() << " files have been relocated"
9826         << " and the server has been started in read"
9827         << " only mode. Cannot update the data dictionary.";
9828 
9829     return (DB_READ_ONLY);
9830   }
9831 
9832   trx_t *trx = check_trx_exists(current_thd);
9833 
9834   TrxInInnoDB trx_in_innodb(trx);
9835 
9836   /* The transaction should not be active yet, start it */
9837 
9838   trx->isolation_level = trx_t::READ_UNCOMMITTED;
9839 
9840   trx_start_if_not_started_xa(trx, false);
9841 
9842   size_t count = 0;
9843   size_t failed = 0;
9844   size_t batch_size = 0;
9845   bool print_msg = false;
9846   auto start_time = ut_time_monotonic();
9847 
9848   /* If some file paths have changed then update the DD */
9849   for (auto &tablespace : m_moved) {
9850     dberr_t err;
9851 
9852     auto old_path = std::get<dd_fil::OLD_PATH>(tablespace);
9853 
9854     auto space_name = std::get<dd_fil::SPACE_NAME>(tablespace);
9855 
9856     auto new_path = std::get<dd_fil::NEW_PATH>(tablespace);
9857     auto object_id = std::get<dd_fil::OBJECT_ID>(tablespace);
9858 
9859     /* We already have the space name in system cs. */
9860     err = dd_tablespace_rename(object_id, true, space_name.c_str(),
9861                                new_path.c_str());
9862 
9863     if (err != DB_SUCCESS) {
9864       ib::error(ER_IB_MSG_345) << "Unable to update tablespace ID"
9865                                << " " << object_id << " "
9866                                << " '" << old_path << "' to"
9867                                << " '" << new_path << "'";
9868 
9869       ++failed;
9870     }
9871 
9872     /* Update persistent stat table if table name is modified. */
9873     fil_adjust_partition_stat(old_path, new_path);
9874 
9875     ++count;
9876 
9877     if (ut_time_monotonic() - start_time >= PRINT_INTERVAL_SECS) {
9878       ib::info(ER_IB_MSG_346) << "Processed " << count << "/" << m_moved.size()
9879                               << " tablespace paths. Failures " << failed;
9880 
9881       start_time = ut_time_monotonic();
9882       print_msg = true;
9883     }
9884 
9885     ++batch_size;
9886 
9887     if (batch_size > 10000) {
9888       innobase_commit_low(trx);
9889 
9890       ib::info(ER_IB_MSG_347) << "Committed : " << batch_size;
9891 
9892       batch_size = 0;
9893 
9894       trx_start_if_not_started_xa(trx, false);
9895     }
9896   }
9897 
9898   if (batch_size > 0) {
9899     ib::info(ER_IB_MSG_348) << "Committed : " << batch_size;
9900   }
9901 
9902   innobase_commit_low(trx);
9903 
9904   if (print_msg) {
9905     ib::info(ER_IB_MSG_349) << "Updated " << count << " tablespace paths"
9906                             << ", failures " << failed;
9907   }
9908 
9909   return (failed == 0 ? DB_SUCCESS : DB_ERROR);
9910 }
9911 
9912 /** Free the Tablespace_files instance.
9913 @param[in]	read_only_mode	true if InnoDB is started in read only mode.
9914 @return DB_SUCCESS if all OK */
fil_open_for_business(bool read_only_mode)9915 dberr_t fil_open_for_business(bool read_only_mode) {
9916   return (fil_system->prepare_open_for_business(read_only_mode));
9917 }
9918 
9919 /** Replay a file rename operation for ddl replay.
9920 @param[in]	page_id		Space ID and first page number in the file
9921 @param[in]	old_name	old file name
9922 @param[in]	new_name	new file name
9923 @return	whether the operation was successfully applied (the name did not
9924 exist, or new_name did not exist and name was successfully renamed to
9925 new_name)  */
fil_op_replay_rename_for_ddl(const page_id_t & page_id,const char * old_name,const char * new_name)9926 bool fil_op_replay_rename_for_ddl(const page_id_t &page_id,
9927                                   const char *old_name, const char *new_name) {
9928   space_id_t space_id = page_id.space();
9929   fil_space_t *space = fil_space_get(space_id);
9930 
9931   if (space == nullptr && !fil_system->open_for_recovery(space_id)) {
9932     ib::info(ER_IB_MSG_350)
9933         << "Can not find space with space ID " << space_id
9934         << " when replaying the DDL log "
9935         << "rename from '" << old_name << "' to '" << new_name << "'";
9936 
9937     return (true);
9938   }
9939 
9940   return (fil_op_replay_rename(page_id, old_name, new_name));
9941 }
9942 
9943 /** Lookup the tablespace ID for recovery and DDL log apply.
9944 @param[in]	space_id		Tablespace ID to lookup
9945 @return true if the space ID is known. */
lookup_for_recovery(space_id_t space_id)9946 bool Fil_system::lookup_for_recovery(space_id_t space_id) {
9947   ut_ad(recv_recovery_is_on() || Log_DDL::is_in_recovery());
9948 
9949   /* Single threaded code, no need to acquire mutex. */
9950   const auto result = get_scanned_files(space_id);
9951 
9952   if (recv_recovery_is_on()) {
9953     const auto &end = recv_sys->deleted.end();
9954     const auto &it = recv_sys->deleted.find(space_id);
9955 
9956     if (result.second == nullptr) {
9957       /* If it wasn't deleted after finding it on disk then
9958       we tag it as missing. */
9959 
9960       if (it == end) {
9961         recv_sys->missing_ids.insert(space_id);
9962       }
9963 
9964       return (false);
9965     }
9966 
9967     /* Check that it wasn't deleted. */
9968 
9969     return (it == end);
9970   }
9971 
9972   return (result.second != nullptr);
9973 }
9974 
9975 /** Lookup the tablespace ID.
9976 @param[in]	space_id		Tablespace ID to lookup
9977 @return true if the space ID is known. */
fil_tablespace_lookup_for_recovery(space_id_t space_id)9978 bool fil_tablespace_lookup_for_recovery(space_id_t space_id) {
9979   return (fil_system->lookup_for_recovery(space_id));
9980 }
9981 
9982 /** Open a tablespace that has a redo/DDL log record to apply.
9983 @param[in]	space_id		Tablespace ID
9984 @return true if the open was successful */
open_for_recovery(space_id_t space_id)9985 bool Fil_system::open_for_recovery(space_id_t space_id) {
9986   ut_ad(recv_recovery_is_on() || Log_DDL::is_in_recovery());
9987 
9988   if (!lookup_for_recovery(space_id)) {
9989     return (false);
9990   }
9991 
9992   const auto result = get_scanned_files(space_id);
9993 
9994   /* Duplicates should have been sorted out before start of recovery. */
9995   ut_a(result.second->size() == 1);
9996 
9997   const auto &filename = result.second->front();
9998   const std::string path = result.first + filename;
9999 
10000   fil_space_t *space;
10001 
10002   auto status = ibd_open_for_recovery(space_id, path, space);
10003 
10004   if (status == FIL_LOAD_OK) {
10005     /* For encrypted tablespace, set key and iv. */
10006     if (FSP_FLAGS_GET_ENCRYPTION(space->flags) && recv_sys->keys != nullptr) {
10007       fil_tablespace_encryption_init(space);
10008     }
10009 
10010     if (!recv_sys->dblwr->empty()) {
10011       recv_sys->dblwr->recover(space);
10012 
10013     } else {
10014       ib::info(ER_IB_MSG_DBLWR_1317) << "DBLWR recovery skipped for "
10015                                      << space->name << " ID: " << space->id;
10016     }
10017 
10018     return (true);
10019   } else if (status == FIL_LOAD_INVALID_ENCRYPTION_META) {
10020     ib::error() << "Invalid encryption metadata in tablespace header.";
10021     exit(EXIT_FAILURE);
10022   }
10023 
10024   return (false);
10025 }
10026 
10027 /** Open a tablespace that has a redo log record to apply.
10028 @param[in]	space_id		Tablespace ID
10029 @return true if the open was successful */
fil_tablespace_open_for_recovery(space_id_t space_id)10030 bool fil_tablespace_open_for_recovery(space_id_t space_id) {
10031   return (fil_system->open_for_recovery(space_id));
10032 }
10033 
fil_tablespace_path_equals(dd::Object_id dd_object_id,space_id_t space_id,const char * space_name,ulint fsp_flags,std::string old_path,std::string * new_path)10034 Fil_state fil_tablespace_path_equals(dd::Object_id dd_object_id,
10035                                      space_id_t space_id,
10036                                      const char *space_name, ulint fsp_flags,
10037                                      std::string old_path,
10038                                      std::string *new_path) {
10039   ut_ad((fsp_is_ibd_tablespace(space_id) &&
10040          Fil_path::has_suffix(IBD, old_path)) ||
10041         fsp_is_undo_tablespace(space_id));
10042 
10043   /* Watch out for implicit undo tablespaces that are created during startup.
10044   They will not be in the list of scanned files.  But the DD might need to be
10045   updated if the undo directory is different now from when the database was
10046   initialized.  The DD will be updated if we put it in fil_system->moved. */
10047   if (fsp_is_undo_tablespace(space_id)) {
10048     undo::spaces->s_lock();
10049     space_id_t space_num = undo::id2num(space_id);
10050     undo::Tablespace *undo_space = undo::spaces->find(space_num);
10051 
10052     if (undo_space != nullptr && undo_space->is_new()) {
10053       *new_path = undo_space->file_name();
10054       Fil_state state = ((old_path.compare(*new_path) == 0) ? Fil_state::MATCHES
10055                                                             : Fil_state::MOVED);
10056       undo::spaces->s_unlock();
10057       return (state);
10058     }
10059     undo::spaces->s_unlock();
10060   }
10061 
10062   /* Single threaded code, no need to acquire mutex. */
10063   const auto &end = recv_sys->deleted.end();
10064   const auto &it = recv_sys->deleted.find(space_id);
10065   const auto result = fil_system->get_scanned_files(space_id);
10066 
10067   if (result.second == nullptr) {
10068     /* The file was not scanned but the DD has the tablespace. Either;
10069     1. This file is missing
10070     2. The file could not be opened because of encryption or something else,
10071     3. The path is not included in --innodb-directories.
10072     We need to check if the DD path is valid before we tag the file
10073     as missing. */
10074 
10075     if (Fil_path::get_file_type(old_path) == OS_FILE_TYPE_FILE) {
10076       /* This file from the DD exists where the DD thinks it is. It will be
10077       opened later.  Make some noise if the location is unknown. */
10078       if (!fil_path_is_known(old_path)) {
10079         ib::warn(ER_IB_MSG_UNPROTECTED_LOCATION_ALLOWED, old_path.c_str(),
10080                  space_name);
10081       }
10082       return (Fil_state::MATCHES);
10083     }
10084 
10085     /* If it wasn't deleted during redo apply, we tag it as missing. */
10086 
10087     if (it == end && recv_recovery_is_on()) {
10088       recv_sys->missing_ids.insert(space_id);
10089     }
10090 
10091     return (Fil_state::MISSING);
10092   }
10093 
10094   /* Check if it was deleted according to the redo log. */
10095   if (it != end) {
10096     return (Fil_state::DELETED);
10097   }
10098 
10099   /* A file with this space_id was found during scanning.
10100   Validate its location and check if it was moved from where
10101   the DD thinks it is.
10102 
10103   Don't compare the full filename, there can be a mismatch if
10104   there was a DDL in progress and we will end up renaming the path
10105   in the DD dictionary. Such renames should be handled by the
10106   atomic DDL "ddl_log". */
10107 
10108   std::string old_dir{old_path};
10109 
10110   /* Ignore the filename component of the old path. */
10111   auto pos = old_dir.find_last_of(Fil_path::SEPARATOR);
10112   if (pos == std::string::npos) {
10113     old_dir = MySQL_datadir_path;
10114   } else {
10115     old_dir.resize(pos + 1);
10116     ut_ad(Fil_path::is_separator(old_dir.back()));
10117   }
10118   old_dir = Fil_path::get_real_path(old_dir);
10119 
10120   /* Build the new path from the scan path and the found path. */
10121   std::string new_dir{result.first};
10122 
10123   ut_ad(Fil_path::is_separator(new_dir.back()));
10124 
10125   new_dir.append(result.second->front());
10126 
10127   new_dir = Fil_path::get_real_path(new_dir);
10128 
10129   /* Do not use a datafile that is in the wrong place. */
10130   if (!Fil_path::is_valid_location(space_name, space_id, fsp_flags, new_dir)) {
10131     return (Fil_state::MISSING);
10132   }
10133 
10134   /* Ignore the filename component of the new path. */
10135   pos = new_dir.find_last_of(Fil_path::SEPARATOR);
10136 
10137   ut_ad(pos != std::string::npos);
10138 
10139   new_dir.resize(pos + 1);
10140 
10141   if (old_dir.compare(new_dir) != 0) {
10142     *new_path = result.first + result.second->front();
10143     return (Fil_state::MOVED);
10144   }
10145 
10146   *new_path = old_path;
10147   return (Fil_state::MATCHES);
10148 }
10149 
fil_add_moved_space(dd::Object_id dd_object_id,space_id_t space_id,const char * space_name,const std::string & old_path,const std::string & new_path)10150 void fil_add_moved_space(dd::Object_id dd_object_id, space_id_t space_id,
10151                          const char *space_name, const std::string &old_path,
10152                          const std::string &new_path) {
10153   /* Keep space_name in system cs. We handle it while modifying DD. */
10154   fil_system->moved(dd_object_id, space_id, space_name, old_path, new_path);
10155 }
10156 
fil_update_partition_name(space_id_t space_id,uint32_t fsp_flags,bool update_space,std::string & space_name,std::string & dd_path)10157 bool fil_update_partition_name(space_id_t space_id, uint32_t fsp_flags,
10158                                bool update_space, std::string &space_name,
10159                                std::string &dd_path) {
10160 #ifdef _WIN32
10161   /* Safe check. Never needed on Windows for path. */
10162   if (!update_space) {
10163     return (false);
10164   }
10165 #endif /* WIN32 */
10166 
10167   /* Never needed in case insensitive file system for path. */
10168   if (!update_space && lower_case_file_system) {
10169     return (false);
10170   }
10171 
10172   /* Only needed for file per table. */
10173   if (update_space && !fsp_is_file_per_table(space_id, fsp_flags)) {
10174     return (false);
10175   }
10176 
10177   /* Extract dictionary name schema_name/table_name from dd path. */
10178   std::string table_name;
10179 
10180   if (!Fil_path::parse_file_path(dd_path, IBD, table_name)) {
10181     /* Not a valid file-per-table IBD path */
10182     return (false);
10183   }
10184   ut_ad(!table_name.empty());
10185 
10186   /* Only needed for partition file. */
10187   if (!dict_name::is_partition(table_name)) {
10188     return (false);
10189   }
10190 
10191   /* Rebuild dictionary name to convert partition names to lower case. */
10192   dict_name::rebuild(table_name);
10193 
10194   if (update_space) {
10195     /* Rebuild space name if required. */
10196     dict_name::rebuild_space(table_name, space_name);
10197   }
10198 
10199   /* No need to update file name for lower case file system. */
10200   if (lower_case_file_system) {
10201     return (false);
10202   }
10203 
10204   /* Rebuild path and compare. */
10205   std::string table_path = Fil_path::make_new_path(dd_path, table_name, IBD);
10206   ut_ad(!table_path.empty());
10207 
10208   if (dd_path.compare(table_path) != 0) {
10209     /* Validate that the file exists. */
10210     if (os_file_exists(table_path.c_str())) {
10211       dd_path.assign(table_path);
10212       return (true);
10213 
10214     } else {
10215       ib::warn(ER_IB_WARN_OPEN_PARTITION_FILE, table_path.c_str());
10216     }
10217   }
10218 
10219   return (false);
10220 }
10221 
10222 #endif /* !UNIV_HOTBACKUP */
10223 
10224 /** This function should be called after recovery has completed.
10225 Check for tablespace files for which we did not see any MLOG_FILE_DELETE
10226 or MLOG_FILE_RENAME record. These could not be recovered.
10227 @return true if there were some filenames missing for which we had to
10228         ignore redo log records during the apply phase */
check_missing_tablespaces()10229 bool Fil_system::check_missing_tablespaces() {
10230   bool missing = false;
10231   const auto end = recv_sys->deleted.end();
10232 
10233   /* Called in single threaded mode, no need to acquire the mutex. */
10234 
10235   recv_sys->dblwr->check_missing_tablespaces();
10236 
10237   for (auto space_id : recv_sys->missing_ids) {
10238     if (recv_sys->deleted.find(space_id) != end) {
10239       continue;
10240     }
10241 
10242     const auto result = get_scanned_files(space_id);
10243 
10244     if (result.second == nullptr) {
10245       if (fsp_is_undo_tablespace(space_id)) {
10246         /* This could happen if an undo truncate is in progress because
10247         undo tablespace construction is not redo logged.  The DD is updated
10248         at the end and may be out of sync. */
10249         continue;
10250       }
10251 
10252       ib::error(ER_IB_MSG_354) << "Could not find any file associated with"
10253                                << " the tablespace ID: " << space_id;
10254       missing = true;
10255 
10256     } else {
10257       ut_a(!result.second->empty());
10258     }
10259   }
10260 
10261   return (missing);
10262 }
10263 
10264 /** This function should be called after recovery has completed.
10265 Check for tablespace files for which we did not see any MLOG_FILE_DELETE
10266 or MLOG_FILE_RENAME record. These could not be recovered
10267 @return true if there were some filenames missing for which we had to
10268         ignore redo log records during the apply phase */
fil_check_missing_tablespaces()10269 bool fil_check_missing_tablespaces() {
10270   return (fil_system->check_missing_tablespaces());
10271 }
10272 
10273 /** Parse a file name retrieved from a MLOG_FILE_* record,
10274 and return the absolute file path and tablespace name
10275 @param[in]  file_name path emitted by the redo log
10276 @param[in]  flags tablespace flags emitted by the redo log
10277 @param[in]  space_id tablesapce ID emitted by the redo log
10278 @param[out] absolute_path absolute path of tablespace
10279 @param[out] tablespace_name name in the form of database/table */
fil_make_abs_file_path(const char * file_name,ulint flags,space_id_t space_id,std::string & absolute_path,std::string & tablespace_name)10280 static void fil_make_abs_file_path(const char *file_name, ulint flags,
10281                                    space_id_t space_id,
10282                                    std::string &absolute_path,
10283                                    std::string &tablespace_name) {
10284   Datafile df;
10285 
10286   df.set_filepath(file_name);
10287   df.set_flags(flags);
10288   df.set_space_id(space_id);
10289   df.set_name(nullptr);
10290 
10291   absolute_path = df.filepath();
10292   tablespace_name = df.name();
10293 }
10294 
10295 /** Redo a tablespace create.
10296 @param[in]	ptr		redo log record
10297 @param[in]	end		end of the redo log buffer
10298 @param[in]	page_id		Tablespace Id and first page in file
10299 @param[in]	parsed_bytes	Number of bytes parsed so far
10300 @param[in]	parse_only	Don't apply, parse only
10301 @return pointer to next redo log record
10302 @retval nullptr if this log record was truncated */
fil_tablespace_redo_create(byte * ptr,const byte * end,const page_id_t & page_id,ulint parsed_bytes,bool parse_only)10303 byte *fil_tablespace_redo_create(byte *ptr, const byte *end,
10304                                  const page_id_t &page_id, ulint parsed_bytes,
10305                                  bool parse_only) {
10306   ut_a(page_id.page_no() == 0);
10307 
10308   /* We never recreate the system tablespace. */
10309   ut_a(page_id.space() != TRX_SYS_SPACE);
10310 
10311   ut_a(parsed_bytes != ULINT_UNDEFINED);
10312 
10313   /* Where 6 = flags (uint32_t) + name len (uint16_t). */
10314   if (end <= ptr + 6) {
10315     return (nullptr);
10316   }
10317 
10318 #if defined(UNIV_HOTBACKUP) || defined(XTRABACKUP)
10319   uint32_t flags = mach_read_from_4(ptr);
10320 #else
10321   /* Skip the flags, not used here. */
10322 #endif /* UNIV_HOTBACKUP || XTRABACKUP */
10323 
10324   ptr += 4;
10325 
10326   ulint len = mach_read_from_2(ptr);
10327 
10328   ptr += 2;
10329 
10330   /* Do we have the full/valid file name. */
10331   if (end < ptr + len || len < 5) {
10332     if (len < 5) {
10333       char name[6];
10334 
10335       snprintf(name, sizeof(name), "%.*s", (int)len, ptr);
10336 
10337       ib::error(ER_IB_MSG_355) << "MLOG_FILE_CREATE : Invalid file name."
10338                                << " Length (" << len << ") must be >= 5"
10339                                << " and end in '.ibd'. File name in the"
10340                                << " redo log is '" << name << "'";
10341 
10342       recv_sys->found_corrupt_log = true;
10343     }
10344 
10345     return (nullptr);
10346   }
10347 
10348   char *name = reinterpret_cast<char *>(ptr);
10349 
10350   Fil_path::normalize(name);
10351 
10352   ptr += len;
10353 
10354   if (!(Fil_path::has_suffix(IBD, name) ||
10355         fsp_is_undo_tablespace(page_id.space()))) {
10356     recv_sys->found_corrupt_log = true;
10357 
10358     return (nullptr);
10359   }
10360 
10361   if (parse_only) {
10362     return (ptr);
10363   }
10364 #ifdef UNIV_HOTBACKUP
10365 
10366   meb_tablespace_redo_create(page_id, flags, name);
10367 
10368 #else  /* !UNIV_HOTBACKUP */
10369 
10370   const auto files = fil_system->get_scanned_files(page_id.space());
10371 
10372   std::string abs_file_path;
10373   std::string tablespace_name;
10374 
10375   fil_make_abs_file_path(name, flags, page_id.space(), abs_file_path,
10376                          tablespace_name);
10377 
10378   if (!srv_backup_mode &&
10379       (files.second == nullptr || files.second->size() == 0)) {
10380     abs_file_path = xb_tablespace_backup_file_path(abs_file_path.c_str());
10381     bool exists = Fil_path(abs_file_path).is_file_and_exists();
10382 
10383     if (!exists && !fil_space_get(page_id.space())) {
10384       ib::info() << "Creating the tablespace : " << abs_file_path
10385                  << ", space_id : " << page_id.space();
10386 
10387       dberr_t ret = fil_ibd_create(page_id.space(), tablespace_name.c_str(),
10388                                    abs_file_path.c_str(), flags,
10389                                    FIL_IBD_FILE_INITIAL_SIZE);
10390 
10391       if (ret != DB_SUCCESS) {
10392         ib::fatal() << "Could not create the tablespace : " << abs_file_path
10393                     << " with space Id : " << page_id.space();
10394       }
10395 
10396       bool success = fil_system->insert(page_id.space(), abs_file_path);
10397 
10398       if (!success) {
10399         ib::fatal() << "Could not insert the tablespace : " << abs_file_path
10400                     << " with space Id : " << page_id.space() << " to "
10401                     << "the list of known tablespaces";
10402       }
10403     }
10404   }
10405 
10406   if (srv_backup_mode) {
10407     xb_tablespace_map_add(abs_file_path.c_str(), tablespace_name.c_str());
10408   }
10409 
10410   const auto result = fil_system->get_scanned_files(page_id.space());
10411 
10412   if (result.second == nullptr) {
10413     /* No file maps to this tablespace ID. It's possible that
10414     the file was deleted later or is misisng. */
10415 
10416     return (ptr);
10417   }
10418 
10419   /* Duplicates should have been sorted out before we get here. */
10420   ut_a(result.second->size() == 1);
10421 
10422   /* It's possible that the tablespace file was renamed later. */
10423   if (result.second->front().compare(abs_file_path) == 0) {
10424     bool success;
10425 
10426     success = fil_tablespace_open_for_recovery(page_id.space());
10427 
10428     if (!success) {
10429       ib::info(ER_IB_MSG_356) << "Create '" << abs_file_path << "' failed!";
10430     }
10431   }
10432 #endif /* UNIV_HOTBACKUP */
10433 
10434   return (ptr);
10435 }
10436 
10437 /** Redo a tablespace rename.
10438 This function doesn't do anything, simply parses the redo log record.
10439 @param[in]	ptr		redo log record
10440 @param[in]	end		end of the redo log buffer
10441 @param[in]	page_id		Tablespace Id and first page in file
10442 @param[in]	parsed_bytes	Number of bytes parsed so far
10443 @param[in]	parse_only	Don't apply, parse only
10444 @return pointer to next redo log record
10445 @retval nullptr if this log record was truncated */
fil_tablespace_redo_rename(byte * ptr,const byte * end,const page_id_t & page_id,ulint parsed_bytes,bool parse_only)10446 byte *fil_tablespace_redo_rename(byte *ptr, const byte *end,
10447                                  const page_id_t &page_id, ulint parsed_bytes,
10448                                  bool parse_only) {
10449   ut_a(page_id.page_no() == 0);
10450 
10451   /* We never recreate the system tablespace. */
10452   ut_a(page_id.space() != TRX_SYS_SPACE);
10453 
10454   ut_a(parsed_bytes != ULINT_UNDEFINED);
10455 
10456   /* Where 2 = from name len (uint16_t). */
10457   if (end <= ptr + 2) {
10458     return (nullptr);
10459   }
10460 
10461   /* Read and check the RENAME FROM_NAME. */
10462   ulint from_len = mach_read_from_2(ptr);
10463   ptr += 2;
10464   char *from_name = reinterpret_cast<char *>(ptr);
10465 
10466   /* Check if the 'from' file name is valid. */
10467   if (end < ptr + from_len) {
10468     return (nullptr);
10469   }
10470 
10471   std::string whats_wrong;
10472   constexpr char more_than_five[] = "The length must be >= 5.";
10473   constexpr char end_with_ibd[] = "The file suffix must be '.ibd'.";
10474   if (from_len < 5) {
10475     recv_sys->found_corrupt_log = true;
10476     whats_wrong.assign(more_than_five);
10477   } else {
10478     std::string name{from_name};
10479 
10480     if (!Fil_path::has_suffix(IBD, name)) {
10481       recv_sys->found_corrupt_log = true;
10482       whats_wrong.assign(end_with_ibd);
10483     }
10484   }
10485 
10486   if (recv_sys->found_corrupt_log) {
10487     ib::info(ER_IB_MSG_357) << "MLOG_FILE_RENAME: Invalid {from} file name: '"
10488                             << from_name << "'. " << whats_wrong;
10489 
10490     return (nullptr);
10491   }
10492 
10493   ptr += from_len;
10494   Fil_path::normalize(from_name);
10495 
10496   /* Read and check the RENAME TO_NAME. */
10497   ulint to_len = mach_read_from_2(ptr);
10498   ptr += 2;
10499   char *to_name = reinterpret_cast<char *>(ptr);
10500 
10501   /* Check if the 'to' file name is valid. */
10502   if (end < ptr + to_len) {
10503     return (nullptr);
10504   }
10505 
10506   if (to_len < 5) {
10507     recv_sys->found_corrupt_log = true;
10508     whats_wrong.assign(more_than_five);
10509   } else {
10510     std::string name{to_name};
10511 
10512     if (!Fil_path::has_suffix(IBD, name)) {
10513       recv_sys->found_corrupt_log = true;
10514       whats_wrong.assign(end_with_ibd);
10515     }
10516   }
10517 
10518   if (recv_sys->found_corrupt_log) {
10519     ib::info(ER_IB_MSG_357) << "MLOG_FILE_RENAME: Invalid {to} file name: '"
10520                             << to_name << "'. " << whats_wrong;
10521 
10522     return (nullptr);
10523   }
10524 
10525   ptr += to_len;
10526   Fil_path::normalize(to_name);
10527 
10528 #ifdef UNIV_HOTBACKUP
10529 
10530   if (!parse_only) {
10531     meb_tablespace_redo_rename(page_id, from_name, to_name);
10532   }
10533 
10534 #else /* !UNIV_HOTBACKUP */
10535 
10536   /* Update filename with correct partition case, if needed. */
10537   std::string to_name_str(to_name);
10538   std::string space_name;
10539   fil_update_partition_name(page_id.space(), 0, false, space_name, to_name_str);
10540 
10541   if (from_len == to_len && strncmp(to_name, from_name, to_len) == 0) {
10542     ib::error(ER_IB_MSG_360)
10543         << "MLOG_FILE_RENAME: The from and to name are the"
10544         << " same: '" << from_name << "', '" << to_name << "'";
10545 
10546     recv_sys->found_corrupt_log = true;
10547 
10548     return (nullptr);
10549   }
10550 
10551 #endif /* UNIV_HOTBACKUP */
10552 
10553   if (!srv_backup_mode) {
10554     bool success;
10555 
10556     success = fil_tablespace_open_for_recovery(page_id.space());
10557 
10558     if (!success) {
10559       ib::info() << "Rename failed. Cannot find '" << from_name << "'!";
10560       return (ptr);
10561     }
10562 
10563     fil_space_t *space = fil_space_get(page_id.space());
10564 
10565     ut_a(space != nullptr);
10566 
10567     xb_tablespace_map_delete(space->name);
10568     std::string abs_file_path;
10569     std::string tablespace_name;
10570 
10571     fil_make_abs_file_path(to_name, space->flags, space->id, abs_file_path,
10572                            tablespace_name);
10573 
10574     success = fil_op_replay_rename(page_id, from_name, to_name);
10575     ut_a(success);
10576 
10577     xb_tablespace_map_add(abs_file_path.c_str(), tablespace_name.c_str());
10578 
10579     fil_space_free(page_id.space(), false);
10580 
10581     success = fil_system->erase_path(page_id.space());
10582     ut_a(success);
10583 
10584     success = fil_system->insert(page_id.space(), to_name);
10585 
10586     if (!success) {
10587       ib::fatal() << "Could not insert the tablespace : " << to_name
10588                   << " with space Id : " << page_id.space() << " to "
10589                   << "the list of known tablespaces";
10590     }
10591   }
10592 
10593   return (ptr);
10594 }
10595 
10596 /** Redo a tablespace delete.
10597 @param[in]	ptr		redo log record
10598 @param[in]	end		end of the redo log buffer
10599 @param[in]	page_id		Tablespace Id and first page in file
10600 @param[in]	parsed_bytes	Number of bytes parsed so far
10601 @param[in]	parse_only	Don't apply, parse only
10602 @return pointer to next redo log record
10603 @retval nullptr if this log record was truncated */
fil_tablespace_redo_delete(byte * ptr,const byte * end,const page_id_t & page_id,ulint parsed_bytes,bool parse_only)10604 byte *fil_tablespace_redo_delete(byte *ptr, const byte *end,
10605                                  const page_id_t &page_id, ulint parsed_bytes,
10606                                  bool parse_only) {
10607   ut_a(page_id.page_no() == 0);
10608 
10609   /* We never recreate the system tablespace. */
10610   ut_a(page_id.space() != TRX_SYS_SPACE);
10611 
10612   ut_a(parsed_bytes != ULINT_UNDEFINED);
10613 
10614   /* Where 2 =  len (uint16_t). */
10615   if (end <= ptr + 2) {
10616     return (nullptr);
10617   }
10618 
10619   ulint len = mach_read_from_2(ptr);
10620 
10621   ptr += 2;
10622 
10623   /* Do we have the full/valid file name. */
10624   if (end < ptr + len || len < 5) {
10625     if (len < 5) {
10626       char name[6];
10627 
10628       snprintf(name, sizeof(name), "%.*s", (int)len, ptr);
10629 
10630       ib::error(ER_IB_MSG_362) << "MLOG_FILE_DELETE : Invalid file name."
10631                                << " Length (" << len << ") must be >= 5"
10632                                << " and end in '.ibd'. File name in the"
10633                                << " redo log is '" << name << "'";
10634     }
10635 
10636     return (nullptr);
10637   }
10638 
10639   char *name = reinterpret_cast<char *>(ptr);
10640 
10641   Fil_path::normalize(name);
10642 
10643   ptr += len;
10644 
10645   if (!(Fil_path::has_suffix(IBD, name) ||
10646         fsp_is_undo_tablespace(page_id.space()))) {
10647     recv_sys->found_corrupt_log = true;
10648 
10649     return (nullptr);
10650   }
10651 
10652   if (parse_only) {
10653     return (ptr);
10654   }
10655 #ifdef UNIV_HOTBACKUP
10656 
10657   meb_tablespace_redo_delete(page_id, name);
10658 
10659 #else  /* !UNIV_HOTBACKUP */
10660 
10661   if (!srv_backup_mode) {
10662     bool success;
10663 
10664     success = fil_tablespace_open_for_recovery(page_id.space());
10665 
10666     if (!success) {
10667       ib::info(ER_IB_MSG_356) << "Delete '" << name << "' failed!";
10668       return (ptr);
10669     }
10670 
10671     fil_space_t *space = fil_space_get(page_id.space());
10672 
10673     if (space != nullptr) {
10674       xb_tablespace_map_delete(space->name);
10675 
10676       dberr_t err =
10677           fil_delete_tablespace(page_id.space(), BUF_REMOVE_FLUSH_NO_WRITE);
10678 
10679       ut_a(err == DB_SUCCESS);
10680     }
10681   }
10682 
10683   const auto result = fil_system->get_scanned_files(page_id.space());
10684 
10685   recv_sys->deleted.insert(page_id.space());
10686   recv_sys->missing_ids.erase(page_id.space());
10687 
10688   if (result.second == nullptr) {
10689     /* No files map to this tablespace ID. The drop must
10690     have succeeded. */
10691 
10692     return (ptr);
10693   }
10694 
10695   /* Space_id_set should have been sorted out before we get here. */
10696 
10697   ut_a(result.second->size() == 1);
10698 
10699   /* Update filename with correct partition case, if needed. */
10700   std::string name_str(name);
10701   std::string space_name;
10702   fil_update_partition_name(page_id.space(), 0, false, space_name, name_str);
10703 
10704   fil_space_free(page_id.space(), false);
10705 
10706   bool success = fil_system->erase_path(page_id.space());
10707   ut_a(success);
10708 #endif /* UNIV_HOTBACKUP */
10709 
10710   return (ptr);
10711 }
10712 
10713 /** Parse and process an encryption redo record.
10714 @param[in]	ptr		redo log record
10715 @param[in]	end		end of the redo log buffer
10716 @param[in]	space_id	the tablespace ID
10717 @return log record end, nullptr if not a complete record */
fil_tablespace_redo_encryption(byte * ptr,const byte * end,space_id_t space_id)10718 byte *fil_tablespace_redo_encryption(byte *ptr, const byte *end,
10719                                      space_id_t space_id) {
10720   byte *iv = nullptr;
10721   byte *key = nullptr;
10722   bool is_new = false;
10723 
10724 #ifdef UNIV_DEBUG
10725   bool is_allocated = false;
10726 #endif
10727 
10728   fil_space_t *space = fil_space_get(space_id);
10729 
10730   /* An undo space might be open but not have the ENCRYPTION bit set
10731   in its header if the current value of innodb_undo_log_encrypt=OFF
10732   and a crash occurred between flushing this redo record and the header
10733   page of the undo space.  So if the flag is missing, ignore the header
10734   page. */
10735   if (fsp_is_undo_tablespace(space_id) && space != nullptr &&
10736       !FSP_FLAGS_GET_ENCRYPTION(space->flags)) {
10737     space = nullptr;
10738   }
10739 
10740   if (space == nullptr) {
10741     if (recv_sys->keys == nullptr) {
10742       recv_sys->keys = UT_NEW_NOKEY(recv_sys_t::Encryption_Keys());
10743     }
10744 
10745     for (auto &recv_key : *recv_sys->keys) {
10746       if (recv_key.space_id == space_id) {
10747         iv = recv_key.iv;
10748         key = recv_key.ptr;
10749       }
10750     }
10751 
10752 #ifdef UNIV_DEBUG
10753     if (key != nullptr) {
10754       DBUG_EXECUTE_IF(
10755           "dont_update_key_found_during_REDO_scan", is_allocated = true;
10756           key = static_cast<byte *>(ut_malloc_nokey(Encryption::KEY_LEN));
10757           iv = static_cast<byte *>(ut_malloc_nokey(Encryption::KEY_LEN)););
10758     }
10759 #endif
10760 
10761     if (key == nullptr) {
10762       key = static_cast<byte *>(ut_malloc_nokey(Encryption::KEY_LEN));
10763 
10764       iv = static_cast<byte *>(ut_malloc_nokey(Encryption::KEY_LEN));
10765 
10766       is_new = true;
10767     }
10768 
10769   } else {
10770     iv = space->encryption_iv;
10771     key = space->encryption_key;
10772   }
10773 
10774   ulint offset;
10775 
10776   offset = mach_read_from_2(ptr);
10777   ptr += 2;
10778 
10779   ulint len;
10780 
10781   len = mach_read_from_2(ptr);
10782   ptr += 2;
10783 
10784   if (end < ptr + len) {
10785     if (is_new) {
10786       ut_free(key);
10787       ut_free(iv);
10788     }
10789     return (nullptr);
10790   }
10791 
10792   if (offset >= UNIV_PAGE_SIZE || len + offset > UNIV_PAGE_SIZE ||
10793       len != Encryption::INFO_SIZE) {
10794     recv_sys->found_corrupt_log = true;
10795     if (is_new) {
10796       ut_free(key);
10797       ut_free(iv);
10798     }
10799     return (nullptr);
10800   }
10801 
10802   if (srv_backup_mode || !use_dumped_tablespace_keys) {
10803     if (!Encryption::decode_encryption_info(key, iv, ptr, true)) {
10804       if (is_new) {
10805         ut_free(key);
10806         ut_free(iv);
10807       }
10808       if (!srv_backup_mode) {
10809         ib::error() << "Cannot decode encryption information in the redo log.";
10810         exit(EXIT_FAILURE);
10811       }
10812       return (ptr + len);
10813     }
10814   } else {
10815     ulint master_key_id = mach_read_from_4(ptr + Encryption::MAGIC_SIZE);
10816     if (Encryption::get_master_key_id() < master_key_id) {
10817       Encryption::set_master_key(master_key_id);
10818     }
10819     bool found = xb_fetch_tablespace_key(space_id, key, iv);
10820     ut_a(found);
10821   }
10822 
10823   ut_ad(len == Encryption::INFO_SIZE);
10824 
10825   ptr += len;
10826 
10827   if (space == nullptr) {
10828     if (is_new) {
10829       recv_sys_t::Encryption_Key new_key;
10830 
10831       new_key.iv = iv;
10832       new_key.ptr = key;
10833       new_key.space_id = space_id;
10834 
10835       recv_sys->keys->push_back(new_key);
10836     }
10837   } else {
10838     if (FSP_FLAGS_GET_ENCRYPTION(space->flags) ||
10839         space->encryption_op_in_progress == ENCRYPTION) {
10840       space->encryption_type = Encryption::AES;
10841       space->encryption_klen = Encryption::KEY_LEN;
10842     }
10843   }
10844 
10845 #ifdef UNIV_DEBUG
10846   if (is_allocated) {
10847     DBUG_EXECUTE_IF("dont_update_key_found_during_REDO_scan", ut_free(key);
10848                     ut_free(iv););
10849   }
10850 #endif
10851 
10852   return (ptr);
10853 }
10854 
warn_ignore(std::string ignore_path,const char * reason)10855 void Tablespace_dirs::warn_ignore(std::string ignore_path, const char *reason) {
10856   ib::warn(ER_IB_MSG_IGNORE_SCAN_PATH, ignore_path.c_str(), reason);
10857 }
10858 
add_path(const std::string & path_in,bool is_undo_dir)10859 void Tablespace_dirs::add_path(const std::string &path_in, bool is_undo_dir) {
10860   /* Ignore an invalid path. */
10861   if (path_in == "") {
10862     return;
10863   }
10864   if (path_in == "/") {
10865     warn_ignore(path_in,
10866                 "the root directory '/' is not allowed to be scanned.");
10867     return;
10868   }
10869   if (std::string::npos != path_in.find('*')) {
10870     warn_ignore(path_in, "it contains '*'.");
10871     return;
10872   }
10873 
10874   /* Assume this path is a directory and put a trailing slash on it. */
10875   std::string dir_in(path_in);
10876   Fil_path::append_separator(dir_in);
10877 
10878   Fil_path found_path(dir_in, true);
10879 
10880   /* Exclude this path if it is a duplicate of a path already stored or
10881   if a previously stored path is an ancestor.  Remove any previously stored
10882   path that is a descendant of this path. */
10883   for (auto it = m_dirs.cbegin(); it != m_dirs.cend(); /* No op */) {
10884     if (it->root().is_same_as(found_path)) {
10885       /* The exact same path is obviously ignored, so there is no need to
10886       log a warning. */
10887       return;
10888     }
10889 
10890     /* Check if dir_abs_path is an ancestor of this path */
10891     if (it->root().is_ancestor(found_path)) {
10892       /* Descendant directories will be scanned recursively, so don't
10893       add it to the scan list.  Log a warning unless this descendant
10894       is the undo directory since it must be supplied even if it is
10895       a descendant of another data location. */
10896       if (!is_undo_dir) {
10897         std::string reason = "it is a sub-directory of '";
10898         reason += it->root().abs_path();
10899         warn_ignore(path_in, reason.c_str());
10900       }
10901       return;
10902     }
10903 
10904     if (found_path.is_ancestor(it->root())) {
10905       /* This path is an ancestor of an existing dir in fil_system::m_dirs.
10906       The settings have overlapping locations.  Put a note about it to
10907       the error log. The undo_dir is added last, so if it is an ancestor,
10908       the descendant was listed as a datafile directory. So always issue
10909       this message*/
10910       std::string reason = "it is a sub-directory of '";
10911       reason += found_path;
10912       warn_ignore(it->root().path(), reason.c_str());
10913 
10914       /* It might also be an ancestor to another dir as well, so keep looking.
10915       We must delete this descendant because we know that this ancestor path
10916       will be inserted and all its descendants will be scanned. */
10917       it = m_dirs.erase(it);
10918     } else {
10919       it++;
10920     }
10921   }
10922 
10923   m_dirs.push_back(Tablespace_files{found_path.path()});
10924   return;
10925 }
10926 
add_paths(const std::string & str,const std::string & delimiters)10927 void Tablespace_dirs::add_paths(const std::string &str,
10928                                 const std::string &delimiters) {
10929   std::string::size_type start = 0;
10930   std::string::size_type end = 0;
10931 
10932   /* Scan until 'start' reaches the end of the string (npos) */
10933   for (;;) {
10934     start = str.find_first_not_of(delimiters, end);
10935     if (std::string::npos == start) {
10936       break;
10937     }
10938 
10939     end = str.find_first_of(delimiters, start);
10940 
10941     const auto path = str.substr(start, end - start);
10942 
10943     add_path(path);
10944   }
10945 }
10946 
10947 /** Check whether we can rename the file
10948 @param[in]	space		Tablespace for which to rename
10949 @param[in]	name		Source file name
10950 @param[in]	df		Target file that exists on disk
10951 @return DB_SUCCESS if all OK */
fil_rename_validate(fil_space_t * space,const std::string & name,Datafile & df)10952 static dberr_t fil_rename_validate(fil_space_t *space, const std::string &name,
10953                                    Datafile &df) {
10954   dberr_t err = df.validate_for_recovery(space->id);
10955 
10956   if (err == DB_TABLESPACE_NOT_FOUND) {
10957     /* Tablespace header doesn't contain the expected
10958     tablespace ID. This is can happen during truncate. */
10959 
10960     return (err);
10961 
10962   } else if (err != DB_SUCCESS) {
10963     ib::warn(ER_IB_MSG_367) << "Failed to read the first page of the"
10964                             << " file '" << df.filepath() << "'."
10965                             << " You will need to verify and move the"
10966                             << " file out of the way retry recovery.";
10967 
10968     return (err);
10969   }
10970 
10971   auto file = &space->files.front();
10972 
10973   if (strcmp(df.filepath(), file->name) == 0) {
10974     /* Check if already points to the correct file.
10975     Must have the same space ID */
10976 
10977     ib::info(ER_IB_MSG_368) << "Tablespace ID already maps to: '"
10978                             << df.filepath() << "', rename ignored.";
10979 
10980     ut_a(df.space_id() == space->id);
10981 
10982     return (DB_SUCCESS);
10983 
10984   } else if (df.space_id() != space->id) {
10985     /* Target file exists on disk but has a different
10986     tablespce ID. The user should manually delete it. */
10987 
10988     ib::error(ER_IB_MSG_369)
10989         << "Cannot rename '" << name << "' to '" << df.filepath() << "'. File '"
10990         << df.filepath() << "' tablespace ID " << df.space_id()
10991         << " doesn't match the expected tablespace"
10992         << " ID " << space->id << ". You will need to verify and move '"
10993         << df.filepath() << "' manually and retry recovery!";
10994 
10995     return (DB_ERROR);
10996   }
10997 
10998   /* Target file exists on disk and has the same ID. */
10999 
11000   ib::error(ER_IB_MSG_370)
11001       << "Cannot rename '" << name << "' to '" << df.filepath()
11002       << "'. The File '" << df.filepath() << " already exists on"
11003       << " disk. You will need to verify and move either file"
11004       << " manually and retry recovery!";
11005 
11006   return (DB_ERROR);
11007 }
11008 
11009 /** Replay a file rename operation if possible.
11010 @param[in]	page_id		Space ID and first page number in the file
11011 @param[in]	old_name	old file name
11012 @param[in]	new_name	new file name
11013 @return	whether the operation was successfully applied (the name did not exist,
11014 or new_name did not exist and name was successfully renamed to new_name) */
fil_op_replay_rename(const page_id_t & page_id,const std::string & old_name,const std::string & new_name)11015 static bool fil_op_replay_rename(const page_id_t &page_id,
11016                                  const std::string &old_name,
11017                                  const std::string &new_name) {
11018   ut_ad(page_id.page_no() == 0);
11019   ut_ad(old_name.compare(new_name) != 0);
11020   ut_ad(Fil_path::has_suffix(IBD, new_name));
11021   ut_ad(page_id.space() != TRX_SYS_SPACE);
11022 
11023   /* In order to replay the rename, the following must hold:
11024   1. The new name is not already used.
11025   2. A tablespace exists with the old name.
11026   3. The space ID for that tablepace matches this log entry.
11027   This will prevent unintended renames during recovery. */
11028 
11029   space_id_t space_id = page_id.space();
11030   fil_space_t *space = fil_space_get(space_id);
11031 
11032   if (space == nullptr) {
11033     return (true);
11034   }
11035 
11036   Datafile df;
11037   std::string name{new_name};
11038 
11039   df.set_filepath(name.c_str());
11040 
11041   if (df.open_read_only(false) == DB_SUCCESS) {
11042     dberr_t err = fil_rename_validate(space, old_name, df);
11043 
11044     if (err == DB_TABLESPACE_NOT_FOUND) {
11045       /* This can happend during truncate. */
11046       ib::info(ER_IB_MSG_371) << "Tablespace ID mismatch in '" << name << "'";
11047     }
11048 
11049     if (err == DB_WRONG_FILE_NAME) {
11050       df.close();
11051       os_file_delete(innodb_data_file_key, df.filepath());
11052       bool success = fil_system->erase_path(df.space_id());
11053       ut_a(success);
11054     } else {
11055       df.close();
11056       return (err == DB_SUCCESS);
11057     }
11058   }
11059 
11060   auto path_sep_pos = name.find_last_of(Fil_path::SEPARATOR);
11061 
11062   ut_a(path_sep_pos != std::string::npos);
11063 
11064   /* Create the database directory for the new name, if
11065   it does not exist yet */
11066 
11067   name.resize(path_sep_pos);
11068 
11069   bool success = os_file_create_directory(name.c_str(), false);
11070   ut_a(success);
11071 
11072   auto datadir_pos = name.find_last_of(Fil_path::SEPARATOR);
11073 
11074   ut_ad(datadir_pos != std::string::npos);
11075 
11076   name.erase(0, datadir_pos + 1);
11077 
11078   ut_ad(!Fil_path::is_separator(name.back()));
11079 
11080   /* schema/table separator is always a '/'. */
11081   name.push_back('/');
11082 
11083   /* Strip the '.ibd' suffix. */
11084   name.append(new_name.begin() + path_sep_pos + 1, new_name.end() - 4);
11085 
11086   ut_ad(!Fil_path::has_suffix(IBD, name));
11087 
11088   clone_mark_abort(true);
11089 
11090   const auto ptr = name.c_str();
11091 
11092   dberr_t err =
11093       fil_rename_tablespace(space_id, old_name.c_str(), ptr, new_name.c_str());
11094 
11095   /* Stop recovery if this does not succeed. */
11096   ut_a(err == DB_SUCCESS);
11097 
11098   clone_mark_active();
11099 
11100   return (true);
11101 }
11102 
11103 /** Get the tablespace ID from an .ibd and/or an undo tablespace. If the ID
11104 is == 0 on the first page then try finding the ID with
11105 Datafile::find_space_id().
11106 @param[in]	filename	File name to check
11107 @return s_invalid_space_id if not found, otherwise the space ID */
get_tablespace_id(const std::string & filename)11108 space_id_t Fil_system::get_tablespace_id(const std::string &filename) {
11109   FILE *fp = fopen(filename.c_str(), "rb");
11110 
11111   if (fp == nullptr) {
11112     ib::warn(ER_IB_MSG_372) << "Unable to open '" << filename << "'";
11113     return (dict_sys_t::s_invalid_space_id);
11114   }
11115 
11116   std::vector<space_id_t> space_ids;
11117   auto page_size = srv_page_size;
11118 
11119   space_ids.reserve(MAX_PAGES_TO_READ);
11120 
11121   const auto n_bytes = page_size * MAX_PAGES_TO_READ;
11122 
11123   std::unique_ptr<byte[]> buf(new byte[n_bytes]);
11124 
11125   if (!buf) {
11126     return dict_sys_t::s_invalid_space_id;
11127   }
11128 
11129   auto pages_read = fread(buf.get(), page_size, MAX_PAGES_TO_READ, fp);
11130 
11131   DBUG_EXECUTE_IF("invalid_header", pages_read = 0;);
11132 
11133   /* Find the space id from the pages read if enough pages could be read.
11134   Fall back to the more heavier method of finding the space id from
11135   Datafile::find_space_id() if pages cannot be read properly. */
11136   if (pages_read >= MAX_PAGES_TO_READ) {
11137     auto bytes_read = pages_read * page_size;
11138 
11139 #ifdef POSIX_FADV_DONTNEED
11140     posix_fadvise(fileno(fp), 0, bytes_read, POSIX_FADV_DONTNEED);
11141 #endif /* POSIX_FADV_DONTNEED */
11142 
11143     for (page_no_t i = 0; i < MAX_PAGES_TO_READ; ++i) {
11144       const auto off = i * page_size + FIL_PAGE_SPACE_ID;
11145 
11146       if (off == FIL_PAGE_SPACE_ID) {
11147         /* Find out the page size of the tablespace from the first page.
11148         In case of compressed pages, the subsequent pages can be of different
11149         sizes. If MAX_PAGES_TO_READ is changed to a different value, then the
11150         page size of subsequent pages is needed to find out the offset for
11151         space ID. */
11152 
11153         auto space_flags_offset = FSP_HEADER_OFFSET + FSP_SPACE_FLAGS;
11154 
11155         ut_a(space_flags_offset + 4 < n_bytes);
11156 
11157         const auto flags = mach_read_from_4(buf.get() + space_flags_offset);
11158 
11159         page_size_t space_page_size(flags);
11160 
11161         page_size = space_page_size.physical();
11162       }
11163 
11164       space_ids.push_back(mach_read_from_4(buf.get() + off));
11165 
11166       if ((i + 1) * page_size >= bytes_read) {
11167         break;
11168       }
11169     }
11170   }
11171 
11172   fclose(fp);
11173 
11174   space_id_t space_id;
11175 
11176   if (!space_ids.empty()) {
11177     space_id = space_ids.front();
11178 
11179     for (auto id : space_ids) {
11180       if (id == 0 || space_id != id) {
11181         space_id = UINT32_UNDEFINED;
11182 
11183         break;
11184       }
11185     }
11186   } else {
11187     space_id = UINT32_UNDEFINED;
11188   }
11189 
11190   /* Try the more heavy duty method, as a last resort. */
11191   if (space_id == UINT32_UNDEFINED) {
11192     /* If the first page cannot be read properly, then for compressed
11193     tablespaces we don't know where the page boundary starts because
11194     we don't know the page size. */
11195 
11196     Datafile file;
11197 
11198     file.set_filepath(filename.c_str());
11199 
11200     dberr_t err = file.open_read_only(false);
11201 
11202     ut_a(file.is_open());
11203     ut_a(err == DB_SUCCESS);
11204 
11205     /* Use the heavier Datafile::find_space_id() method to
11206     find the space id. */
11207     err = file.find_space_id();
11208 
11209     if (err == DB_SUCCESS) {
11210       space_id = file.space_id();
11211     }
11212 
11213     file.close();
11214   }
11215 
11216   return (space_id);
11217 }
11218 
11219 /** Open tablespace file for backup.
11220 @param[in]  path  file path.
11221 @param[in]  name  space name.
11222 @return DB_SUCCESS if all OK */
fil_open_for_xtrabackup(const std::string & path,const std::string & name)11223 dberr_t fil_open_for_xtrabackup(const std::string &path,
11224                                 const std::string &name) {
11225   Datafile file;
11226   file.set_name(name.c_str());
11227   file.set_filepath(path.c_str());
11228 
11229   dberr_t err = file.open_read_only(true);
11230   if (err != DB_SUCCESS) {
11231     return (err);
11232   }
11233 
11234   lsn_t flush_lsn;
11235   err = file.validate_first_page(SPACE_UNKNOWN, &flush_lsn, false);
11236 
11237   if (err == DB_PAGE_IS_BLANK) {
11238     /* allow corrupted first page for xtrabackup, it could be just
11239     zero-filled page, which we'll restore from redo log later */
11240     return (DB_SUCCESS);
11241   } else if (err != DB_SUCCESS) {
11242     return (err);
11243   }
11244 
11245   if (fil_space_get(file.space_id())) {
11246     /* space already exists */
11247     return (DB_TABLESPACE_EXISTS);
11248   }
11249 
11250   os_offset_t node_size = os_file_get_size(file.handle());
11251   bool is_tmp = FSP_FLAGS_GET_TEMPORARY(file.flags());
11252   os_offset_t n_pages;
11253 
11254   ut_a(node_size != (os_offset_t)-1);
11255 
11256   n_pages = node_size / page_size_t(file.flags()).physical();
11257 
11258   fil_space_t *space =
11259       fil_space_create(name.c_str(), file.space_id(), file.flags(),
11260                        is_tmp ? FIL_TYPE_TEMPORARY : FIL_TYPE_TABLESPACE);
11261 
11262   ut_a(space != NULL);
11263 
11264   /* For encrypted tablespace, initialize encryption
11265   information.*/
11266   if (FSP_FLAGS_GET_ENCRYPTION(file.flags())) {
11267     if (srv_backup_mode || !use_dumped_tablespace_keys) {
11268       byte *key = file.m_encryption_key;
11269       byte *iv = file.m_encryption_iv;
11270 
11271       fsp_flags_set_encryption(space->flags);
11272       if (key && iv) {
11273         err = fil_set_encryption(space->id, Encryption::AES, key, iv);
11274       }
11275     } else {
11276       err = xb_set_encryption(space);
11277     }
11278 
11279     ut_ad(err == DB_SUCCESS);
11280   }
11281 
11282   char *fn = fil_node_create(file.filepath(), n_pages, space, false, false);
11283   if (fn == nullptr) {
11284     return (DB_ERROR);
11285   }
11286 
11287   /* by opening the tablespace we forcing node and space objects
11288   in the cache to be populated with fields from space header */
11289   if (!fil_space_open(space->id)) {
11290     ib::error() << "Failed to open tablespace " << space->name;
11291   }
11292 
11293   if (!srv_backup_mode || srv_close_files) {
11294     fil_space_close(space->id);
11295   }
11296 
11297   return (DB_SUCCESS);
11298 }
11299 
11300 /** Open IBD tablespaces.
11301 @param[in]  start   Start of slice
11302 @param[in]  end   End of slice
11303 @param[in]  thread_id Thread ID */
open_ibd(const Const_iter & start,const Const_iter & end,size_t thread_id,bool & result)11304 void Tablespace_dirs::open_ibd(const Const_iter &start, const Const_iter &end,
11305                                size_t thread_id, bool &result) {
11306   if (!result) return;
11307 
11308   for (auto it = start; it != end; ++it) {
11309     const std::string filename = it->second;
11310     const auto &files = m_dirs[it->first];
11311     const std::string phy_filename = files.path() + filename;
11312 
11313     if (check_if_skip_table(filename.c_str())) {
11314       continue;
11315     }
11316 
11317     dberr_t err = fil_open_for_xtrabackup(
11318         phy_filename, filename.substr(0, filename.length() - 4));
11319     if (err != DB_SUCCESS) {
11320       result = false;
11321     }
11322   }
11323 }
11324 
11325 /** Open all known tablespaces. */
open_ibds() const11326 void Tablespace_dirs::open_ibds() const {
11327   for (auto dir : m_dirs) {
11328     dir.open_ibds();
11329   }
11330 }
11331 
rename_partition_files(bool revert)11332 void Fil_system::rename_partition_files(bool revert) {
11333   /* If revert, then we are downgrading after upgrade failure from 5.7 */
11334   ut_ad(!revert || srv_downgrade_partition_files);
11335 
11336   if (m_old_paths.empty()) {
11337     return;
11338   }
11339 
11340 #if !defined(UNIV_HOTBACKUP) && !defined(XTRABACKUP)
11341   ut_ad(!lower_case_file_system);
11342 
11343   for (auto &old_path : m_old_paths) {
11344     ut_ad(Fil_path::has_suffix(IBD, old_path));
11345     ut_ad(dict_name::is_partition(old_path));
11346 
11347     fil_rename_partition_file(old_path, IBD, revert, false);
11348   }
11349 #endif /* !UNIV_HOTBACKUP && !XTRABACKUP */
11350 }
11351 
11352 /** Check for duplicate tablespace IDs.
11353 @param[in]	start		Slice start
11354 @param[in]	end		Slice end
11355 @param[in]	thread_id	Thread ID
11356 @param[in,out]	mutex		Mutex that covers the global state
11357 @param[in,out]	unique		To check for duplciates
11358 @param[in,out]	duplicates	Duplicate space IDs found */
duplicate_check(const Const_iter & start,const Const_iter & end,size_t thread_id,std::mutex * mutex,Space_id_set * unique,Space_id_set * duplicates)11359 void Tablespace_dirs::duplicate_check(const Const_iter &start,
11360                                       const Const_iter &end, size_t thread_id,
11361                                       std::mutex *mutex, Space_id_set *unique,
11362                                       Space_id_set *duplicates) {
11363   size_t count = 0;
11364   bool printed_msg = false;
11365   auto start_time = ut_time_monotonic();
11366 
11367   for (auto it = start; it != end; ++it, ++m_checked) {
11368     const std::string filename = it->second;
11369     auto &files = m_dirs[it->first];
11370     const std::string phy_filename = files.path() + filename;
11371 
11372     space_id_t space_id;
11373 
11374     space_id = Fil_system::get_tablespace_id(phy_filename);
11375 
11376     if (space_id != 0 && space_id != dict_sys_t::s_invalid_space_id) {
11377       std::lock_guard<std::mutex> guard(*mutex);
11378 
11379       auto ret = unique->insert(space_id);
11380 
11381       size_t n_files;
11382 
11383       n_files = files.add(space_id, filename);
11384 
11385       if (n_files > 1 || !ret.second) {
11386         duplicates->insert(space_id);
11387       }
11388 
11389     } else if (space_id != 0 &&
11390                Fil_path::is_undo_tablespace_name(phy_filename)) {
11391       ib::info(ER_IB_MSG_373) << "Can't determine the undo file tablespace"
11392                               << " ID for '" << phy_filename << "', could be"
11393                               << " an undo truncate in progress";
11394 
11395     } else {
11396       ib::info(ER_IB_MSG_374) << "Ignoring '" << phy_filename << "' invalid"
11397                               << " tablespace ID in the header";
11398     }
11399 
11400     ++count;
11401 
11402     if (ut_time_monotonic() - start_time >= PRINT_INTERVAL_SECS) {
11403       ib::info(ER_IB_MSG_375) << "Thread# " << thread_id << " - Checked "
11404                               << count << "/" << (end - start) << " files";
11405 
11406       start_time = ut_time_monotonic();
11407 
11408       printed_msg = true;
11409     }
11410   }
11411 
11412   if (printed_msg) {
11413     ib::info(ER_IB_MSG_376) << "Checked " << count << " files";
11414   }
11415 }
11416 
11417 /** Print the duplicate filenames for a tablespce ID to the log
11418 @param[in]	duplicates	Duplicate tablespace IDs*/
print_duplicates(const Space_id_set & duplicates)11419 void Tablespace_dirs::print_duplicates(const Space_id_set &duplicates) {
11420   /* Print the duplicate names to the error log. */
11421   for (auto space_id : duplicates) {
11422     Dirs files;
11423 
11424     for (auto &dir : m_dirs) {
11425       const auto names = dir.find(space_id);
11426 
11427       if (names == nullptr) {
11428         continue;
11429       }
11430 
11431       files.insert(files.end(), names->begin(), names->end());
11432     }
11433 
11434     /* Fixes the order in the mtr tests. */
11435     std::sort(files.begin(), files.end());
11436 
11437     ut_a(files.size() > 1);
11438 
11439     std::ostringstream oss;
11440 
11441     oss << "Tablespace ID: " << space_id << " = [";
11442 
11443     for (size_t i = 0; i < files.size(); ++i) {
11444       oss << "'" << files[i] << "'";
11445 
11446       if (i < files.size() - 1) {
11447         oss << ", ";
11448       }
11449     }
11450 
11451     oss << "]" << std::endl;
11452 
11453     ib::error(ER_IB_MSG_377) << oss.str();
11454   }
11455 }
11456 
11457 #ifndef XTRABACKUP
fil_get_partition_file(const std::string & old_path,ib_file_suffix extn,std::string & new_path)11458 static bool fil_get_partition_file(const std::string &old_path,
11459                                    ib_file_suffix extn, std::string &new_path) {
11460 #ifdef _WIN32
11461   /* Safe check. Never needed on Windows. */
11462   return (false);
11463 #endif /* WIN32 */
11464 
11465 #ifndef UNIV_HOTBACKUP
11466 
11467   /* Needed only for case sensitive file system. */
11468   if (lower_case_file_system) {
11469     return (false);
11470   }
11471 
11472   /* Skip if not right file extension. */
11473   if (!Fil_path::has_suffix(extn, old_path)) {
11474     return (false);
11475   }
11476 
11477   /* Check if partitioned table. */
11478   if (!dict_name::is_partition(old_path)) {
11479     return (false);
11480   }
11481 
11482   std::string table_name;
11483   /* Get Innodb dictionary name from file path. */
11484   if (!Fil_path::parse_file_path(old_path, extn, table_name)) {
11485     ut_ad(false);
11486     return (false);
11487   }
11488   ut_ad(!table_name.empty());
11489 
11490   /* Rebuild partition table name with lower case. */
11491   std::string save_name(table_name);
11492   dict_name::rebuild(table_name);
11493 
11494   if (save_name.compare(table_name) == 0) {
11495     return (false);
11496   }
11497 
11498   /* Build new partition file name. */
11499   new_path = Fil_path::make_new_path(old_path, table_name, extn);
11500   ut_ad(!new_path.empty());
11501 #endif /* !UNIV_HOTBACKUP */
11502 
11503   return (true);
11504 }
11505 
11506 #endif /* !XTRABACKUP */
11507 
11508 #if !defined(UNIV_HOTBACKUP) && !defined(XTRABACKUP)
fil_rename_partition_file(const std::string & old_path,ib_file_suffix extn,bool revert,bool import)11509 static void fil_rename_partition_file(const std::string &old_path,
11510                                       ib_file_suffix extn, bool revert,
11511                                       bool import) {
11512   std::string new_path;
11513 
11514   if (!fil_get_partition_file(old_path, extn, new_path)) {
11515     ut_ad(false);
11516     return;
11517   }
11518 
11519   ut_ad(!new_path.empty());
11520 
11521   bool old_exists = os_file_exists(old_path.c_str());
11522   bool new_exists = os_file_exists(new_path.c_str());
11523 
11524   static bool print_upgrade = true;
11525   static bool print_downgrade = true;
11526   bool ret = false;
11527 
11528   if (revert) {
11529     /* Check if rename is required. */
11530     if (!new_exists || old_exists) {
11531       return;
11532     }
11533     ret = os_file_rename(innodb_data_file_key, new_path.c_str(),
11534                          old_path.c_str());
11535     ut_ad(ret);
11536 
11537     if (ret && print_downgrade) {
11538       ib::info(ER_IB_MSG_DOWNGRADE_PARTITION_FILE, new_path.c_str(),
11539                old_path.c_str());
11540       print_downgrade = false;
11541     }
11542     return;
11543   }
11544 
11545   /* Check if rename is required. */
11546   if (new_exists || !old_exists) {
11547     return;
11548   }
11549 
11550   ret =
11551       os_file_rename(innodb_data_file_key, old_path.c_str(), new_path.c_str());
11552 
11553   if (!ret) {
11554     /* File rename failed. */
11555     ut_ad(false);
11556     return;
11557   }
11558 
11559   if (import) {
11560     ib::info(ER_IB_MSG_UPGRADE_PARTITION_FILE_IMPORT, old_path.c_str(),
11561              new_path.c_str());
11562     return;
11563   }
11564 
11565   if (print_upgrade) {
11566     ib::info(ER_IB_MSG_UPGRADE_PARTITION_FILE, old_path.c_str(),
11567              new_path.c_str());
11568     print_upgrade = false;
11569   }
11570 }
11571 #endif /* !UNIV_HOTBACKUP && !XTRABACKUP */
11572 
set_scan_dir(const std::string & in_directory,bool is_undo_dir)11573 void Tablespace_dirs::set_scan_dir(const std::string &in_directory,
11574                                    bool is_undo_dir) {
11575   std::string directory(in_directory);
11576 
11577   Fil_path::normalize(directory);
11578 
11579   add_path(directory, is_undo_dir);
11580 }
11581 
set_scan_dirs(const std::string & in_directories)11582 void Tablespace_dirs::set_scan_dirs(const std::string &in_directories) {
11583   std::string directories(in_directories);
11584 
11585   Fil_path::normalize(directories);
11586 
11587   std::string separators;
11588 
11589   separators.push_back(FIL_PATH_SEPARATOR);
11590 
11591   add_paths(directories, separators);
11592 }
11593 
11594 /** Discover tablespaces by reading the header from .ibd files.
11595 @param[in]      in_directories  Directories to scan
11596 @return DB_SUCCESS if all goes well */
scan(bool populate_fil_cache)11597 dberr_t Tablespace_dirs::scan(bool populate_fil_cache) {
11598   Scanned_files ibd_files;
11599   Scanned_files undo_files;
11600   uint16_t count = 0;
11601   bool print_msg = false;
11602   auto start_time = ut_time_monotonic();
11603 
11604   /* Should be trivial to parallelize the scan and ID check. */
11605   for (const auto &dir : m_dirs) {
11606     const auto real_path_dir = dir.root().abs_path();
11607 
11608     ut_a(Fil_path::is_separator(dir.path().back()));
11609 
11610     ib::info(ER_IB_MSG_379) << "Scanning '" << dir.path() << "'";
11611 
11612     /* Walk the sub-tree of dir. */
11613 
11614     Dir_Walker::walk(real_path_dir, true, [&](const std::string &path) {
11615       /* If it is a file and the suffix matches ".ibd"
11616       or the undo file name format then store it for
11617       determining the space ID. */
11618 
11619       ut_a(path.length() > real_path_dir.length());
11620       ut_a(Fil_path::get_file_type(path) != OS_FILE_TYPE_DIR);
11621 
11622       /* Make the filename relative to the directory that was scanned. */
11623       std::string file = path.substr(real_path_dir.length());
11624 
11625       if (file.size() <= 4) {
11626         return;
11627       }
11628 
11629       using value = Scanned_files::value_type;
11630 
11631       if (Fil_path::has_suffix(IBD, file.c_str())) {
11632         ibd_files.push_back(value{count, file});
11633 
11634       } else if (Fil_path::is_undo_tablespace_name(file)) {
11635         undo_files.push_back(value{count, file});
11636       }
11637 
11638       if (ut_time_monotonic() - start_time >= PRINT_INTERVAL_SECS) {
11639         ib::info(ER_IB_MSG_380)
11640             << "Files found so far: " << ibd_files.size() << " data files"
11641             << " and " << undo_files.size() << " undo files";
11642 
11643         start_time = ut_time_monotonic();
11644         print_msg = true;
11645       }
11646     });
11647 
11648     ++count;
11649   }
11650 
11651   /* Rename all old partition files. */
11652   //  fil_system->rename_partition_files(false);
11653 
11654   if (print_msg) {
11655     ib::info(ER_IB_MSG_381) << "Found " << ibd_files.size() << " '.ibd' and "
11656                             << undo_files.size() << " undo files";
11657   }
11658 
11659   Space_id_set unique;
11660   Space_id_set duplicates;
11661 
11662   /* Get the number of additional threads needed to scan the files. */
11663   size_t n_threads = fil_get_scan_threads(ibd_files.size());
11664 
11665   if (n_threads > 0) {
11666     ib::info(ER_IB_MSG_382)
11667         << "Using " << (n_threads + 1) << " threads to"
11668         << " scan " << ibd_files.size() << " tablespace files";
11669   }
11670 
11671   std::mutex m;
11672 
11673   using std::placeholders::_1;
11674   using std::placeholders::_2;
11675   using std::placeholders::_3;
11676   using std::placeholders::_4;
11677   using std::placeholders::_5;
11678   using std::placeholders::_6;
11679 
11680   std::function<void(const Const_iter &, const Const_iter &, size_t,
11681                      std::mutex *, Space_id_set *, Space_id_set *)>
11682       check = std::bind(&Tablespace_dirs::duplicate_check, this, _1, _2, _3, _4,
11683                         _5, _6);
11684 
11685   if (!populate_fil_cache) {
11686     par_for(PFS_NOT_INSTRUMENTED, ibd_files, n_threads, check, &m, &unique,
11687             &duplicates);
11688   }
11689 
11690   duplicate_check(undo_files.begin(), undo_files.end(), n_threads, &m, &unique,
11691                   &duplicates);
11692 
11693   ib::info(ER_IB_MSG_383) << "Completed space ID check of " << m_checked.load()
11694                           << " files.";
11695 
11696   dberr_t err;
11697 
11698   if (!duplicates.empty()) {
11699     ib::error(ER_IB_MSG_384)
11700         << "Multiple files found for the same tablespace ID:";
11701 
11702     print_duplicates(duplicates);
11703 
11704     err = DB_FAIL;
11705   } else {
11706     err = DB_SUCCESS;
11707   }
11708 
11709   if (err == DB_SUCCESS && populate_fil_cache) {
11710     bool result = true;
11711     std::function<void(const Const_iter &, const Const_iter &, size_t)> open =
11712         std::bind(&Tablespace_dirs::open_ibd, this, _1, _2, _3, result);
11713 
11714     par_for(PFS_NOT_INSTRUMENTED, ibd_files, n_threads, open);
11715 
11716     if (!result) err = DB_FAIL;
11717   }
11718 
11719   return (err);
11720 }
11721 
fil_set_scan_dir(const std::string & directory,bool is_undo_dir)11722 void fil_set_scan_dir(const std::string &directory, bool is_undo_dir) {
11723   fil_system->set_scan_dir(directory, is_undo_dir);
11724 }
11725 
fil_set_scan_dirs(const std::string & directories)11726 void fil_set_scan_dirs(const std::string &directories) {
11727   fil_system->set_scan_dirs(directories);
11728 }
11729 
11730 /** Discover tablespaces by reading the header from .ibd files.
11731 @param[in]  populate_fil_cache Whether to load tablespaces into fil cache
11732 @return DB_SUCCESS if all goes well */
fil_scan_for_tablespaces(bool populate_fil_cache)11733 dberr_t fil_scan_for_tablespaces(bool populate_fil_cache) {
11734   return (fil_system->scan(populate_fil_cache));
11735 }
11736 
11737 /** Open all known tablespaces. */
fil_open_ibds()11738 void fil_open_ibds() { fil_system->open_ibds(); }
11739 
11740 /** Check if a path is known to InnoDB meaning that it is in or under
11741 one of the four path settings scanned at startup for file discovery.
11742 @param[in]  path    Path to check
11743 @return true if path is known to InnoDB */
fil_path_is_known(const std::string & path)11744 bool fil_path_is_known(const std::string &path) {
11745   return (fil_system->check_path(path));
11746 }
11747 
11748 /** Get the list of directories that datafiles can reside in.
11749 @return the list of directories 'dir1;dir2;....;dirN' */
fil_get_dirs()11750 std::string fil_get_dirs() { return (fil_system->get_dirs()); }
11751 
11752 /** Free the data structures required for recovery. */
fil_free_scanned_files()11753 void fil_free_scanned_files() { fil_system->free_scanned_files(); }
11754 
11755 /** Update the tablespace name. Incase, the new name
11756 and old name are same, no update done.
11757 @param[in,out]	space		tablespace object on which name
11758                                 will be updated
11759 @param[in]	name		new name for tablespace */
fil_space_update_name(fil_space_t * space,const char * name)11760 void fil_space_update_name(fil_space_t *space, const char *name) {
11761   if (space == nullptr || name == nullptr || space->name == nullptr ||
11762       strcmp(space->name, name) == 0) {
11763     return;
11764   }
11765 
11766   dberr_t err = fil_rename_tablespace_by_id(space->id, space->name, name);
11767 
11768   if (err != DB_SUCCESS) {
11769     ib::warn(ER_IB_MSG_387) << "Tablespace rename '" << space->name << "' to"
11770                             << " '" << name << "' failed!";
11771   }
11772 }
11773 
11774 #ifndef UNIV_HOTBACKUP
is_valid_location(const char * space_name,space_id_t space_id,uint32_t fsp_flags,const std::string & path)11775 bool Fil_path::is_valid_location(const char *space_name, space_id_t space_id,
11776                                  uint32_t fsp_flags, const std::string &path) {
11777   ut_ad(!path.empty());
11778   ut_ad(space_name != nullptr);
11779 
11780   /* All files sent to this routine have been found by scanning known
11781   locations. */
11782   ib_file_suffix type = (fsp_is_undo_tablespace(space_id) ? IBU : IBD);
11783 
11784   if (type == IBD) {
11785     size_t dirname_len = dirname_length(path.c_str());
11786     Fil_path dirpath(path.c_str(), dirname_len, true);
11787 
11788     bool is_shared = fsp_is_shared_tablespace(fsp_flags);
11789     bool under_datadir = MySQL_datadir_path.is_ancestor(dirpath);
11790 
11791     if (is_shared) {
11792       if (under_datadir) {
11793         ib::error(ER_IB_MSG_GENERAL_TABLESPACE_UNDER_DATADIR, path.c_str());
11794         return (false);
11795       }
11796     } else {
11797       /* file-per-table */
11798       bool in_datadir =
11799           (under_datadir ? false : MySQL_datadir_path.is_same_as(dirpath));
11800 
11801       if (in_datadir) {
11802         ib::error(ER_IB_MSG_IMPLICIT_TABLESPACE_IN_DATADIR, path.c_str());
11803         return (false);
11804       }
11805 
11806       /* Make sure that the last directory of an implicit tablespace is a
11807       filesystem charset version of the schema name. */
11808       if (!is_valid_location_within_db(space_name, path)) {
11809         ib::error(ER_IB_MSG_INVALID_LOCATION_WRONG_DB, path.c_str(),
11810                   space_name);
11811         return (false);
11812       }
11813     }
11814   }
11815 
11816   return (true);
11817 }
11818 
is_valid_location_within_db(const char * space_name,const std::string & path)11819 bool Fil_path::is_valid_location_within_db(const char *space_name,
11820                                            const std::string &path) {
11821   /* Strip off the basename to reduce the path to a directory. */
11822   std::string dirpath{path};
11823   auto pos = dirpath.find_last_of(SEPARATOR);
11824   dirpath.resize(pos);
11825 
11826   /* Only implicit tablespaces are sent to this routine.
11827   They are always prefixed by `schema/`. */
11828   ut_ad(pos != std::string::npos);
11829 
11830   /* Get the subdir that the file is in. */
11831   pos = dirpath.find_last_of(SEPARATOR);
11832   std::string db_dir = (pos == std::string::npos)
11833                            ? dirpath
11834                            : dirpath.substr(pos + 1, dirpath.length());
11835 
11836   /* Convert to lowercase if necessary. */
11837   if (innobase_get_lower_case_table_names() == 2) {
11838     Fil_path::convert_to_lower_case(db_dir);
11839   }
11840 
11841   /* Make sure the db_dir matches the schema name.
11842   db_dir is in filesystem charset and space_name is usually in the
11843   system charset.
11844 
11845   The problem here is that the system charset version of a schema or
11846   table name may contain a '/' and the tablespace name we were sent
11847   is a combination of the two with '/' as a delimiter.
11848   For example `my/schema` + `my/table` == `my/schema/my/table`
11849 
11850   Search the space_name string backwards until we find the db name that
11851   matches the schema name from the path. */
11852 
11853   std::string name(space_name);
11854   pos = name.find_last_of(SEPARATOR);
11855   while (pos < std::string::npos) {
11856     name.resize(pos);
11857     std::string temp = name;
11858     if (temp == db_dir) {
11859       return (true);
11860     }
11861 
11862     /* Convert to filename charset and compare again. */
11863     Fil_path::convert_to_filename_charset(temp);
11864     if (temp == db_dir) {
11865       return (true);
11866     }
11867 
11868     /* Still no match, iterate through the next SEPARATOR. */
11869     pos = name.find_last_of(SEPARATOR);
11870 
11871     /* If end of string is hit, there is no match. */
11872     if (pos == std::string::npos) {
11873       return (false);
11874     }
11875   }
11876 
11877   return (true);
11878 }
11879 
11880 /** Convert filename to the file system charset format.
11881 @param[in,out]	name		Filename to convert */
convert_to_filename_charset(std::string & name)11882 void Fil_path::convert_to_filename_charset(std::string &name) {
11883   uint errors = 0;
11884   char old_name[MAX_TABLE_NAME_LEN + 20];
11885   char filename[MAX_TABLE_NAME_LEN + 20];
11886 
11887   strncpy(filename, name.c_str(), sizeof(filename) - 1);
11888   strncpy(old_name, filename, sizeof(old_name));
11889 
11890   innobase_convert_to_filename_charset(filename, old_name, MAX_TABLE_NAME_LEN);
11891 
11892   if (errors == 0) {
11893     name.assign(filename);
11894   }
11895 }
11896 
11897 /** Convert to lower case using the file system charset.
11898 @param[in,out]	path		Filepath to convert */
convert_to_lower_case(std::string & path)11899 void Fil_path::convert_to_lower_case(std::string &path) {
11900   char lc_path[MAX_TABLE_NAME_LEN + 20];
11901 
11902   ut_ad(path.length() < sizeof(lc_path) - 1);
11903 
11904   strncpy(lc_path, path.c_str(), sizeof(lc_path) - 1);
11905 
11906   innobase_casedn_path(lc_path);
11907 
11908   path.assign(lc_path);
11909 }
11910 
fil_checkpoint(lsn_t lwm)11911 void fil_checkpoint(lsn_t lwm) { fil_system->checkpoint(lwm); }
11912 
fil_count_deleted(space_id_t undo_num)11913 size_t fil_count_deleted(space_id_t undo_num) {
11914   return (fil_system->count_deleted(undo_num));
11915 }
11916 
fil_is_deleted(space_id_t space_id)11917 bool fil_is_deleted(space_id_t space_id) {
11918   ut_ad(fsp_is_undo_tablespace(space_id));
11919 
11920   return (fil_system->is_deleted(space_id));
11921 }
11922 
11923 #endif /* !UNIV_HOTBACKUP */
11924