1 /***********************************************************************
2 
3 Copyright (c) 1995, 2021, Oracle and/or its affiliates.
4 Copyright (c) 2009, Percona Inc.
5 
6 Portions of this file contain modifications contributed and copyrighted
7 by Percona Inc.. Those modifications are
8 gratefully acknowledged and are described briefly in the InnoDB
9 documentation. The contributions by Percona Inc. are incorporated with
10 their permission, and subject to the conditions contained in the file
11 COPYING.Percona.
12 
13 This program is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License, version 2.0,
15 as published by the Free Software Foundation.
16 
17 This program is also distributed with certain software (including
18 but not limited to OpenSSL) that is licensed under separate terms,
19 as designated in a particular file or component or in included license
20 documentation.  The authors of MySQL hereby grant you an additional
21 permission to link the program and your derivative works with the
22 separately licensed software that they have included with MySQL.
23 
24 This program is distributed in the hope that it will be useful,
25 but WITHOUT ANY WARRANTY; without even the implied warranty of
26 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
27 GNU General Public License, version 2.0, for more details.
28 
29 You should have received a copy of the GNU General Public License along with
30 this program; if not, write to the Free Software Foundation, Inc.,
31 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
32 
33 ***********************************************************************/
34 
35 /**************************************************//**
36 @file include/os0file.h
37 The interface to the operating system file io
38 
39 Created 10/21/1995 Heikki Tuuri
40 *******************************************************/
41 
42 #ifndef os0file_h
43 #define os0file_h
44 
45 #include "univ.i"
46 
47 #ifndef _WIN32
48 #include <dirent.h>
49 #include <sys/stat.h>
50 #include <time.h>
51 #endif /* !_WIN32 */
52 
53 /** File node of a tablespace or the log data space */
54 struct fil_node_t;
55 
56 extern bool	os_has_said_disk_full;
57 
58 /** Number of pending read operations */
59 extern ulint	os_n_pending_reads;
60 /** Number of pending write operations */
61 extern ulint	os_n_pending_writes;
62 
63 /** File offset in bytes */
64 typedef ib_uint64_t os_offset_t;
65 
66 #ifdef _WIN32
67 
68 /**
69 Gets the operating system version. Currently works only on Windows.
70 @return OS_WIN95, OS_WIN31, OS_WINNT, OS_WIN2000, OS_WINXP, OS_WINVISTA,
71 OS_WIN7. */
72 
73 ulint
74 os_get_os_version();
75 
76 typedef HANDLE	os_file_dir_t;	/*!< directory stream */
77 
78 /** We define always WIN_ASYNC_IO, and check at run-time whether
79 the OS actually supports it: Win 95 does not, NT does. */
80 # define WIN_ASYNC_IO
81 
82 /** Use unbuffered I/O */
83 # define UNIV_NON_BUFFERED_IO
84 
85 /** File handle */
86 # define os_file_t	HANDLE
87 
88 /** Convert a C file descriptor to a native file handle
89 @param fd file descriptor
90 @return native file handle */
91 # define OS_FILE_FROM_FD(fd) (HANDLE) _get_osfhandle(fd)
92 
93 #else /* _WIN32 */
94 
95 typedef DIR*	os_file_dir_t;	/*!< directory stream */
96 
97 /** File handle */
98 typedef int	os_file_t;
99 
100 /** Convert a C file descriptor to a native file handle
101 @param fd file descriptor
102 @return native file handle */
103 # define OS_FILE_FROM_FD(fd) fd
104 
105 #endif /* _WIN32 */
106 
107 /** Common file descriptor for file IO instrumentation with PFS
108 on windows and other platforms */
109 struct pfs_os_file_t
110 {
111 	os_file_t   m_file;
112 #ifdef UNIV_PFS_IO
113 	struct PSI_file *m_psi;
114 #endif
115 };
116 
117 static const os_file_t OS_FILE_CLOSED = os_file_t(~0);
118 
119 /** The next value should be smaller or equal to the smallest sector size used
120 on any disk. A log block is required to be a portion of disk which is written
121 so that if the start and the end of a block get written to disk, then the
122 whole block gets written. This should be true even in most cases of a crash:
123 if this fails for a log block, then it is equivalent to a media failure in the
124 log. */
125 
126 #define OS_FILE_LOG_BLOCK_SIZE		512
127 
128 /** Options for os_file_create_func @{ */
129 enum os_file_create_t {
130 	OS_FILE_OPEN = 51,		/*!< to open an existing file (if
131 					doesn't exist, error) */
132 	OS_FILE_CREATE,			/*!< to create new file (if
133 					exists, error) */
134 	OS_FILE_OVERWRITE,		/*!< to create a new file, if exists
135 					the overwrite old file */
136 	OS_FILE_OPEN_RAW,		/*!< to open a raw device or disk
137 					partition */
138 	OS_FILE_CREATE_PATH,		/*!< to create the directories */
139 	OS_FILE_OPEN_RETRY,		/*!< open with retry */
140 
141 	/** Flags that can be combined with the above values. Please ensure
142 	that the above values stay below 128. */
143 
144 	OS_FILE_ON_ERROR_NO_EXIT = 128,	/*!< do not exit on unknown errors */
145 	OS_FILE_ON_ERROR_SILENT = 256	/*!< don't print diagnostic messages to
146 					the log unless it is a fatal error,
147 					this flag is only used if
148 					ON_ERROR_NO_EXIT is set */
149 };
150 
151 static const ulint OS_FILE_READ_ONLY = 333;
152 static const ulint OS_FILE_READ_WRITE = 444;
153 
154 /** Used by MySQLBackup */
155 static const ulint OS_FILE_READ_ALLOW_DELETE = 555;
156 
157 /* Options for file_create */
158 static const ulint OS_FILE_AIO = 61;
159 static const ulint OS_FILE_NORMAL = 62;
160 /* @} */
161 
162 /** Types for file create @{ */
163 static const ulint OS_DATA_FILE = 100;
164 static const ulint OS_LOG_FILE = 101;
165 static const ulint OS_DATA_TEMP_FILE = 102;
166 /* @} */
167 
168 /** Error codes from os_file_get_last_error @{ */
169 static const ulint OS_FILE_NOT_FOUND = 71;
170 static const ulint OS_FILE_DISK_FULL = 72;
171 static const ulint OS_FILE_ALREADY_EXISTS = 73;
172 static const ulint OS_FILE_PATH_ERROR = 74;
173 
174 /** wait for OS aio resources to become available again */
175 static const ulint OS_FILE_AIO_RESOURCES_RESERVED = 75;
176 
177 static const ulint OS_FILE_SHARING_VIOLATION = 76;
178 static const ulint OS_FILE_ERROR_NOT_SPECIFIED = 77;
179 static const ulint OS_FILE_INSUFFICIENT_RESOURCE = 78;
180 static const ulint OS_FILE_AIO_INTERRUPTED = 79;
181 static const ulint OS_FILE_OPERATION_ABORTED = 80;
182 static const ulint OS_FILE_ACCESS_VIOLATION = 81;
183 static const ulint OS_FILE_ERROR_MAX = 100;
184 /* @} */
185 
186 /** Compression algorithm. */
187 struct Compression {
188 
189 	/** Algorithm types supported */
190 	enum Type {
191 		/* Note: During recovery we don't have the compression type
192 		because the .frm file has not been read yet. Therefore
193 		we write the recovered pages out without compression. */
194 
195 		/** No compression */
196 		NONE = 0,
197 
198 		/** Use ZLib */
199 		ZLIB = 1,
200 
201 		/** Use LZ4 faster variant, usually lower compression. */
202 		LZ4 = 2
203 	};
204 
205 	/** Compressed page meta-data */
206 	struct meta_t {
207 
208 		/** Version number */
209 		uint8_t		m_version;
210 
211 		/** Algorithm type */
212 		Type		m_algorithm;
213 
214 		/** Original page type */
215 		uint16_t	m_original_type;
216 
217 		/** Original page size, before compression */
218 		uint16_t	m_original_size;
219 
220 		/** Size after compression */
221 		uint16_t	m_compressed_size;
222 	};
223 
224 	/** Default constructor */
CompressionCompression225 	Compression() : m_type(NONE) { };
226 
227 	/** Specific constructor
228 	@param[in]	type		Algorithm type */
CompressionCompression229 	explicit Compression(Type type)
230 		:
231 		m_type(type)
232 	{
233 #ifdef UNIV_DEBUG
234 		switch (m_type) {
235 		case NONE:
236 		case ZLIB:
237 		case LZ4:
238 
239 		default:
240 			ut_error;
241 		}
242 #endif /* UNIV_DEBUG */
243 	}
244 
245 	/** Version of compressed page */
246 	static const uint8_t FIL_PAGE_VERSION_1 = 1;
247 	static const uint8_t FIL_PAGE_VERSION_2 = 2;
248 
249 	/** Check the page header type field.
250 	@param[in]	page		Page contents
251 	@return true if it is a compressed page */
252 	static bool is_compressed_page(const byte* page)
253 		MY_ATTRIBUTE((warn_unused_result));
254 
255 	/** Check the page header type field.
256 	@param[in]   page            Page contents
257 	@return true if it is a compressed and encrypted page */
258 	static bool is_compressed_encrypted_page(const byte *page)
259 		MY_ATTRIBUTE((warn_unused_result));
260 
261 	/** Check if the version on page is valid.
262 	@param[in]   version         version
263 	@return true if version is valid */
264 	static bool is_valid_page_version(uint8_t version);
265 
266         /** Check wether the compression algorithm is supported.
267         @param[in]      algorithm       Compression algorithm to check
268         @param[out]     type            The type that algorithm maps to
269         @return DB_SUCCESS or error code */
270 	static dberr_t check(const char* algorithm, Compression* type)
271 		MY_ATTRIBUTE((warn_unused_result));
272 
273         /** Validate the algorithm string.
274         @param[in]      algorithm       Compression algorithm to check
275         @return DB_SUCCESS or error code */
276 	static dberr_t validate(const char* algorithm)
277 		MY_ATTRIBUTE((warn_unused_result));
278 
279         /** Convert to a "string".
280         @param[in]      type            The compression type
281         @return the string representation */
282         static const char* to_string(Type type)
283 		MY_ATTRIBUTE((warn_unused_result));
284 
285         /** Convert the meta data to a std::string.
286         @param[in]      meta		Page Meta data
287         @return the string representation */
288         static std::string to_string(const meta_t& meta)
289 		MY_ATTRIBUTE((warn_unused_result));
290 
291 	/** Deserizlise the page header compression meta-data
292 	@param[in]	header		Pointer to the page header
293 	@param[out]	control		Deserialised data */
294 	static void deserialize_header(
295 		const byte*	page,
296 		meta_t*		control);
297 
298         /** Check if the string is "empty" or "none".
299         @param[in]      algorithm       Compression algorithm to check
300         @return true if no algorithm requested */
301 	static bool is_none(const char* algorithm)
302 		MY_ATTRIBUTE((warn_unused_result));
303 
304 	/** Decompress the page data contents. Page type must be
305 	FIL_PAGE_COMPRESSED, if not then the source contents are
306 	left unchanged and DB_SUCCESS is returned.
307 	@param[in]	dblwr_recover	true of double write recovery
308 					in progress
309 	@param[in,out]	src		Data read from disk, decompressed
310 					data will be copied to this page
311 	@param[in,out]	dst		Scratch area to use for decompression
312 	@param[in]	dst_len		Size of the scratch area in bytes
313 	@return DB_SUCCESS or error code */
314 	static dberr_t deserialize(
315 		bool		dblwr_recover,
316 		byte*		src,
317 		byte*		dst,
318 		ulint		dst_len)
319 		MY_ATTRIBUTE((warn_unused_result));
320 
321 	/** Compression type */
322 	Type		m_type;
323 };
324 
325 /** Encryption key length */
326 static const ulint ENCRYPTION_KEY_LEN = 32;
327 
328 /** Encryption magic bytes size */
329 static const ulint ENCRYPTION_MAGIC_SIZE = 3;
330 
331 /** Encryption magic bytes for 5.7.11, it's for checking the encryption information
332 version. */
333 static const char ENCRYPTION_KEY_MAGIC_V1[] = "lCA";
334 
335 /** Encryption magic bytes for 5.7.12+, it's for checking the encryption information
336 version. */
337 static const char ENCRYPTION_KEY_MAGIC_V2[] = "lCB";
338 
339 /** Encryption master key prifix */
340 static const char ENCRYPTION_MASTER_KEY_PRIFIX[] = "INNODBKey";
341 
342 /** Encryption master key prifix size */
343 static const ulint ENCRYPTION_MASTER_KEY_PRIFIX_LEN = 9;
344 
345 /** Encryption master key prifix size */
346 static const ulint ENCRYPTION_MASTER_KEY_NAME_MAX_LEN = 100;
347 
348 /** UUID of server instance, it's needed for composing master key name */
349 static const ulint ENCRYPTION_SERVER_UUID_LEN = 36;
350 
351 /** Encryption information total size for 5.7.11: magic number + master_key_id +
352 key + iv + checksum */
353 static const ulint ENCRYPTION_INFO_SIZE_V1 = (ENCRYPTION_MAGIC_SIZE \
354 					 + (ENCRYPTION_KEY_LEN * 2) \
355 					 + 2 * sizeof(ulint));
356 
357 /** Encryption information total size: magic number + master_key_id +
358 key + iv + server_uuid + checksum */
359 static const ulint ENCRYPTION_INFO_SIZE_V2 = (ENCRYPTION_MAGIC_SIZE \
360 					 + (ENCRYPTION_KEY_LEN * 2) \
361 					 + ENCRYPTION_SERVER_UUID_LEN \
362 					 + 2 * sizeof(ulint));
363 
364 class IORequest;
365 
366 /** Encryption algorithm. */
367 struct Encryption {
368 
369 	/** Algorithm types supported */
370 	enum Type {
371 
372 		/** No encryption */
373 		NONE = 0,
374 
375 		/** Use AES */
376 		AES = 1,
377 	};
378 
379 	/** Encryption information format version */
380 	enum Version {
381 
382 		/** Version in 5.7.11 */
383 		ENCRYPTION_VERSION_1 = 0,
384 
385 		/** Version in > 5.7.11 */
386 		ENCRYPTION_VERSION_2 = 1,
387 	};
388 
389 	/** Default constructor */
EncryptionEncryption390 	Encryption() : m_type(NONE) { };
391 
392 	/** Specific constructor
393 	@param[in]	type		Algorithm type */
EncryptionEncryption394 	explicit Encryption(Type type)
395 		:
396 		m_type(type)
397 	{
398 #ifdef UNIV_DEBUG
399 		switch (m_type) {
400 		case NONE:
401 		case AES:
402 
403 		default:
404 			ut_error;
405 		}
406 #endif /* UNIV_DEBUG */
407 	}
408 
409 	/** Copy constructor */
EncryptionEncryption410 	Encryption(const Encryption& other)
411 		:
412 		m_type(other.m_type),
413 		m_key(other.m_key),
414 		m_klen(other.m_klen),
415 		m_iv(other.m_iv)
416 	{ };
417 
418 	/** Check if page is encrypted page or not
419 	@param[in]	page	page which need to check
420 	@return true if it is a encrypted page */
421 	static bool is_encrypted_page(const byte* page)
422 		MY_ATTRIBUTE((warn_unused_result));
423 
424 	/** Check the encryption option and set it
425 	@param[in]	option		encryption option
426 	@param[in/out]	encryption	The encryption type
427 	@return DB_SUCCESS or DB_UNSUPPORTED */
428 	dberr_t set_algorithm(const char* option, Encryption* type)
429 		MY_ATTRIBUTE((warn_unused_result));
430 
431         /** Validate the algorithm string.
432         @param[in]      algorithm       Encryption algorithm to check
433         @return DB_SUCCESS or error code */
434 	static dberr_t validate(const char* algorithm)
435 		MY_ATTRIBUTE((warn_unused_result));
436 
437         /** Convert to a "string".
438         @param[in]      type            The encryption type
439         @return the string representation */
440         static const char* to_string(Type type)
441 		MY_ATTRIBUTE((warn_unused_result));
442 
443         /** Check if the string is "empty" or "none".
444         @param[in]      algorithm       Encryption algorithm to check
445         @return true if no algorithm requested */
446 	static bool is_none(const char* algorithm)
447 		MY_ATTRIBUTE((warn_unused_result));
448 
449         /** Generate random encryption value for key and iv.
450         @param[in,out]	value	Encryption value */
451 	static void random_value(byte* value);
452 
453 	/** Create new master key for key rotation.
454         @param[in,out]	master_key	master key */
455 	static void create_master_key(byte** master_key);
456 
457         /** Get master key by key id.
458         @param[in]	master_key_id	master key id
459 	@param[in]	srv_uuid	uuid of server instance
460         @param[in,out]	master_key	master key */
461 	static void get_master_key(ulint master_key_id,
462 				   char* srv_uuid,
463 				   byte** master_key);
464 
465         /** Get current master key and key id.
466         @param[in,out]	master_key_id	master key id
467         @param[in,out]	master_key	master key
468         @param[in,out]	version		encryption information version */
469 	static void get_master_key(ulint* master_key_id,
470 				   byte** master_key,
471 				   Encryption::Version*  version);
472 
473 	/** Encrypt the page data contents. Page type can't be
474 	FIL_PAGE_ENCRYPTED, FIL_PAGE_COMPRESSED_AND_ENCRYPTED,
475 	FIL_PAGE_ENCRYPTED_RTREE.
476 	@param[in]	type		IORequest
477 	@param[in,out]	src		page data which need to encrypt
478 	@param[in]	src_len		Size of the source in bytes
479 	@param[in,out]	dst		destination area
480 	@param[in,out]	dst_len		Size of the destination in bytes
481 	@return buffer data, dst_len will have the length of the data */
482 	byte* encrypt(
483 		const IORequest&	type,
484 		byte*			src,
485 		ulint			src_len,
486 		byte*			dst,
487 		ulint*			dst_len)
488 		MY_ATTRIBUTE((warn_unused_result));
489 
490 	/** Decrypt the page data contents. Page type must be
491 	FIL_PAGE_ENCRYPTED, FIL_PAGE_COMPRESSED_AND_ENCRYPTED,
492 	FIL_PAGE_ENCRYPTED_RTREE, if not then the source contents are
493 	left unchanged and DB_SUCCESS is returned.
494 	@param[in]	type		IORequest
495 	@param[in,out]	src		Data read from disk, decrypt
496 					data will be copied to this page
497 	@param[in]	src_len		source data length
498 	@param[in,out]	dst		Scratch area to use for decrypt
499 	@param[in]	dst_len		Size of the scratch area in bytes
500 	@return DB_SUCCESS or error code */
501 	dberr_t decrypt(
502 		const IORequest&	type,
503 		byte*			src,
504 		ulint			src_len,
505 		byte*			dst,
506 		ulint			dst_len)
507 		MY_ATTRIBUTE((warn_unused_result));
508 
509 	/** Encrypt type */
510 	Type			m_type;
511 
512 	/** Encrypt key */
513 	byte*			m_key;
514 
515 	/** Encrypt key length*/
516 	ulint			m_klen;
517 
518 	/** Encrypt initial vector */
519 	byte*			m_iv;
520 
521 	/** Current master key id */
522 	static ulint		master_key_id;
523 
524 	/** Current uuid of server instance */
525 	static char		uuid[ENCRYPTION_SERVER_UUID_LEN + 1];
526 };
527 
528 /** Types for AIO operations @{ */
529 
530 /** No transformations during read/write, write as is. */
531 #define IORequestRead		IORequest(IORequest::READ)
532 #define IORequestWrite		IORequest(IORequest::WRITE)
533 #define IORequestLogRead	IORequest(IORequest::LOG | IORequest::READ)
534 #define IORequestLogWrite	IORequest(IORequest::LOG | IORequest::WRITE)
535 /**
536 The IO Context that is passed down to the low level IO code */
537 class IORequest {
538 public:
539 	/** Flags passed in the request, they can be ORred together. */
540 	enum {
541 		READ = 1,
542 		WRITE = 2,
543 
544 		/** Double write buffer recovery. */
545 		DBLWR_RECOVER = 4,
546 
547 		/** Enumarations below can be ORed to READ/WRITE above*/
548 
549 		/** Data file */
550 		DATA_FILE = 8,
551 
552 		/** Log file request*/
553 		LOG = 16,
554 
555 		/** Disable partial read warnings */
556 		DISABLE_PARTIAL_IO_WARNINGS = 32,
557 
558 		/** Do not to wake i/o-handler threads, but the caller will do
559 		the waking explicitly later, in this way the caller can post
560 		several requests in a batch; NOTE that the batch must not be
561 		so big that it exhausts the slots in AIO arrays! NOTE that
562 		a simulated batch may introduce hidden chances of deadlocks,
563 		because I/Os are not actually handled until all
564 		have been posted: use with great caution! */
565 		DO_NOT_WAKE = 64,
566 
567 		/** Ignore failed reads of non-existent pages */
568 		IGNORE_MISSING = 128,
569 
570 		/** Use punch hole if available, only makes sense if
571 		compression algorithm != NONE. Ignored if not set */
572 		PUNCH_HOLE = 256,
573 
574 		/** Force raw read, do not try to compress/decompress.
575 		This can be used to force a read and write without any
576 		compression e.g., for redo log, merge sort temporary files
577 		and the truncate redo log. */
578 		NO_COMPRESSION = 512,
579 
580 		/** Row log used in online DDL */
581 		ROW_LOG = 1024
582 	};
583 
584 	/** Default constructor */
IORequest()585 	IORequest()
586 		:
587 		m_block_size(UNIV_SECTOR_SIZE),
588 		m_type(READ),
589 		m_compression(),
590 		m_encryption()
591 	{
592 		/* No op */
593 	}
594 
595 	/**
596 	@param[in]	type		Request type, can be a value that is
597 					ORed from the above enum */
IORequest(ulint type)598 	explicit IORequest(ulint type)
599 		:
600 		m_block_size(UNIV_SECTOR_SIZE),
601 		m_type(static_cast<uint16_t>(type)),
602 		m_compression(),
603 		m_encryption()
604 	{
605 		if (is_log() || is_row_log()) {
606 			disable_compression();
607 		}
608 
609 		if (!is_punch_hole_supported()) {
610 			clear_punch_hole();
611 		}
612 	}
613 
614 	/** Destructor */
~IORequest()615 	~IORequest() { }
616 
617 	/** @return true if ignore missing flag is set */
ignore_missing(ulint type)618 	static bool ignore_missing(ulint type)
619 		MY_ATTRIBUTE((warn_unused_result))
620 	{
621 		return((type & IGNORE_MISSING) == IGNORE_MISSING);
622 	}
623 
624 	/** @return true if it is a read request */
is_read()625 	bool is_read() const
626 		MY_ATTRIBUTE((warn_unused_result))
627 	{
628 		return((m_type & READ) == READ);
629 	}
630 
631 	/** @return true if it is a write request */
is_write()632 	bool is_write() const
633 		MY_ATTRIBUTE((warn_unused_result))
634 	{
635 		return((m_type & WRITE) == WRITE);
636 	}
637 
638 	/** @return true if it is a redo log write */
is_log()639 	bool is_log() const
640 		MY_ATTRIBUTE((warn_unused_result))
641 	{
642 		return((m_type & LOG) == LOG);
643 	}
644 
645 	/** @return true if it is a row log entry used in online DDL */
is_row_log()646 	bool is_row_log() const
647 		MY_ATTRIBUTE((warn_unused_result))
648 	{
649 		return((m_type & ROW_LOG) == ROW_LOG);
650 	}
651 
652 	/** @return true if the simulated AIO thread should be woken up */
is_wake()653 	bool is_wake() const
654 		MY_ATTRIBUTE((warn_unused_result))
655 	{
656 		return((m_type & DO_NOT_WAKE) == 0);
657 	}
658 
659 	/** @return true if partial read warning disabled */
is_partial_io_warning_disabled()660 	bool is_partial_io_warning_disabled() const
661 		MY_ATTRIBUTE((warn_unused_result))
662 	{
663 		return((m_type & DISABLE_PARTIAL_IO_WARNINGS)
664 		       == DISABLE_PARTIAL_IO_WARNINGS);
665 	}
666 
667 	/** Disable partial read warnings */
disable_partial_io_warnings()668 	void disable_partial_io_warnings()
669 	{
670 		m_type |= DISABLE_PARTIAL_IO_WARNINGS;
671 	}
672 
673 	/** @return true if missing files should be ignored */
ignore_missing()674 	bool ignore_missing() const
675 		MY_ATTRIBUTE((warn_unused_result))
676 	{
677 		return(ignore_missing(m_type));
678 	}
679 
680 	/** @return true if punch hole should be used */
punch_hole()681 	bool punch_hole() const
682 		MY_ATTRIBUTE((warn_unused_result))
683 	{
684 		return((m_type & PUNCH_HOLE) == PUNCH_HOLE);
685 	}
686 
687 	/** @return true if the read should be validated */
validate()688 	bool validate() const
689 		MY_ATTRIBUTE((warn_unused_result))
690 	{
691 		ut_a(is_read() ^ is_write());
692 
693 		return(!is_read() || !punch_hole());
694 	}
695 
696 	/** Set the punch hole flag */
set_punch_hole()697 	void set_punch_hole()
698 	{
699 		if (is_punch_hole_supported()) {
700 			m_type |= PUNCH_HOLE;
701 		}
702 	}
703 
704 	/** Clear the do not wake flag */
clear_do_not_wake()705 	void clear_do_not_wake()
706 	{
707 		m_type &= ~DO_NOT_WAKE;
708 	}
709 
710 	/** Clear the punch hole flag */
clear_punch_hole()711 	void clear_punch_hole()
712 	{
713 		m_type &= ~PUNCH_HOLE;
714 	}
715 
716 	/** @return the block size to use for IO */
block_size()717 	ulint block_size() const
718 		MY_ATTRIBUTE((warn_unused_result))
719 	{
720 		return(m_block_size);
721 	}
722 
723 	/** Set the block size for IO
724 	@param[in] block_size		Block size to set */
block_size(ulint block_size)725 	void block_size(ulint block_size)
726 	{
727 		m_block_size = static_cast<uint32_t>(block_size);
728 	}
729 
730 	/** Clear all compression related flags */
clear_compressed()731 	void clear_compressed()
732 	{
733 		clear_punch_hole();
734 
735 		m_compression.m_type  = Compression::NONE;
736 	}
737 
738 	/** Compare two requests
739 	@reutrn true if the are equal */
740 	bool operator==(const IORequest& rhs) const
741 	{
742 		return(m_type == rhs.m_type);
743 	}
744 
745 	/** Set compression algorithm
746 	@param[in] compression	The compression algorithm to use */
compression_algorithm(Compression::Type type)747 	void compression_algorithm(Compression::Type type)
748 	{
749 		if (type == Compression::NONE) {
750 			return;
751 		}
752 
753 		set_punch_hole();
754 
755 		m_compression.m_type = type;
756 	}
757 
758 	/** Get the compression algorithm.
759 	@return the compression algorithm */
compression_algorithm()760 	Compression compression_algorithm() const
761 		MY_ATTRIBUTE((warn_unused_result))
762 	{
763 		return(m_compression);
764 	}
765 
766 	/** @return true if the page should be compressed */
is_compressed()767 	bool is_compressed() const
768 		MY_ATTRIBUTE((warn_unused_result))
769 	{
770 		return(compression_algorithm().m_type != Compression::NONE);
771 	}
772 
773 	/** @return true if the page read should not be transformed. */
is_compression_enabled()774 	bool is_compression_enabled() const
775 		MY_ATTRIBUTE((warn_unused_result))
776 	{
777 		return((m_type & NO_COMPRESSION) == 0);
778 	}
779 
780 	/** Disable transformations. */
disable_compression()781 	void disable_compression()
782 	{
783 		m_type |= NO_COMPRESSION;
784 	}
785 
786 	/** Set encryption algorithm
787 	@param[in] type		The encryption algorithm to use */
encryption_algorithm(Encryption::Type type)788 	void encryption_algorithm(Encryption::Type type)
789 	{
790 		if (type == Encryption::NONE) {
791 			return;
792 		}
793 
794 		m_encryption.m_type = type;
795 	}
796 
797 	/** Set encryption key and iv
798 	@param[in] key		The encryption key to use
799 	@param[in] key_len	length of the encryption key
800 	@param[in] iv		The encryption iv to use */
encryption_key(byte * key,ulint key_len,byte * iv)801 	void encryption_key(byte* key,
802 			    ulint key_len,
803 			    byte* iv)
804 	{
805 		m_encryption.m_key = key;
806 		m_encryption.m_klen = key_len;
807 		m_encryption.m_iv = iv;
808 	}
809 
810 	/** Get the encryption algorithm.
811 	@return the encryption algorithm */
encryption_algorithm()812 	Encryption encryption_algorithm() const
813 		MY_ATTRIBUTE((warn_unused_result))
814 	{
815 		return(m_encryption);
816 	}
817 
818 	/** @return true if the page should be encrypted. */
is_encrypted()819 	bool is_encrypted() const
820 		MY_ATTRIBUTE((warn_unused_result))
821 	{
822 		return(m_encryption.m_type != Encryption::NONE);
823 	}
824 
825 	/** Clear all encryption related flags */
clear_encrypted()826 	void clear_encrypted()
827 	{
828 		m_encryption.m_key = NULL;
829 		m_encryption.m_klen = 0;
830 		m_encryption.m_iv = NULL;
831 		m_encryption.m_type = Encryption::NONE;
832 	}
833 
834 	/** Note that the IO is for double write recovery. */
dblwr_recover()835 	void dblwr_recover()
836 	{
837 		m_type |= DBLWR_RECOVER;
838 	}
839 
840 	/** @return true if the request is from the dblwr recovery */
is_dblwr_recover()841 	bool is_dblwr_recover() const
842 		MY_ATTRIBUTE((warn_unused_result))
843 	{
844 		return((m_type & DBLWR_RECOVER) == DBLWR_RECOVER);
845 	}
846 
847 	/** @return true if punch hole is supported */
is_punch_hole_supported()848 	static bool is_punch_hole_supported()
849 	{
850 
851 		/* In this debugging mode, we act as if punch hole is supported,
852 		and then skip any calls to actually punch a hole here.
853 		In this way, Transparent Page Compression is still being tested. */
854 		DBUG_EXECUTE_IF("ignore_punch_hole",
855 			return(true);
856 		);
857 
858 #if defined(HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE) || defined(_WIN32)
859 		return(true);
860 #else
861 		return(false);
862 #endif /* HAVE_FALLOC_PUNCH_HOLE_AND_KEEP_SIZE || _WIN32 */
863 	}
864 
865 private:
866 	/* File system best block size */
867 	uint32_t		m_block_size;
868 
869 	/** Request type bit flags */
870 	uint16_t		m_type;
871 
872 	/** Compression algorithm */
873 	Compression		m_compression;
874 
875 	/** Encryption algorithm */
876 	Encryption		m_encryption;
877 };
878 
879 /* @} */
880 
881 /** Sparse file size information. */
882 struct os_file_size_t {
883 	/** Total size of file in bytes */
884 	os_offset_t	m_total_size;
885 
886 	/** If it is a sparse file then this is the number of bytes
887 	actually allocated for the file. */
888 	os_offset_t	m_alloc_size;
889 };
890 
891 /** Win NT does not allow more than 64 */
892 static const ulint OS_AIO_N_PENDING_IOS_PER_THREAD = 32;
893 
894 /** Modes for aio operations @{ */
895 /** Normal asynchronous i/o not for ibuf pages or ibuf bitmap pages */
896 static const ulint OS_AIO_NORMAL = 21;
897 
898 /**  Asynchronous i/o for ibuf pages or ibuf bitmap pages */
899 static const ulint OS_AIO_IBUF = 22;
900 
901 /** Asynchronous i/o for the log */
902 static const ulint OS_AIO_LOG = 23;
903 
904 /** Asynchronous i/o where the calling thread will itself wait for
905 the i/o to complete, doing also the job of the i/o-handler thread;
906 can be used for any pages, ibuf or non-ibuf.  This is used to save
907 CPU time, as we can do with fewer thread switches. Plain synchronous
908 I/O is not as good, because it must serialize the file seek and read
909 or write, causing a bottleneck for parallelism. */
910 static const ulint OS_AIO_SYNC = 24;
911 /* @} */
912 
913 extern ulint	os_n_file_reads;
914 extern ulint	os_n_file_writes;
915 extern ulint	os_n_fsyncs;
916 
917 /* File types for directory entry data type */
918 
919 enum os_file_type_t {
920 	OS_FILE_TYPE_UNKNOWN = 0,
921 	OS_FILE_TYPE_FILE,			/* regular file */
922 	OS_FILE_TYPE_DIR,			/* directory */
923 	OS_FILE_TYPE_LINK,			/* symbolic link */
924 	OS_FILE_TYPE_BLOCK			/* block device */
925 };
926 
927 /* Maximum path string length in bytes when referring to tables with in the
928 './databasename/tablename.ibd' path format; we can allocate at least 2 buffers
929 of this size from the thread stack; that is why this should not be made much
930 bigger than 4000 bytes.  The maximum path length used by any storage engine
931 in the server must be at least this big. */
932 #define OS_FILE_MAX_PATH	4000
933 #if (FN_REFLEN_SE < OS_FILE_MAX_PATH)
934 # error "(FN_REFLEN_SE < OS_FILE_MAX_PATH)"
935 #endif
936 
937 /** Struct used in fetching information of a file in a directory */
938 struct os_file_stat_t {
939 	char		name[OS_FILE_MAX_PATH];	/*!< path to a file */
940 	os_file_type_t	type;			/*!< file type */
941 	os_offset_t	size;			/*!< file size in bytes */
942 	os_offset_t	alloc_size;		/*!< Allocated size for
943 						sparse files in bytes */
944 	size_t		block_size;		/*!< Block size to use for IO
945 						in bytes*/
946 	time_t		ctime;			/*!< creation time */
947 	time_t		mtime;			/*!< modification time */
948 	time_t		atime;			/*!< access time */
949 	bool		rw_perm;		/*!< true if can be opened
950 						in read-write mode. Only valid
951 						if type == OS_FILE_TYPE_FILE */
952 };
953 
954 #ifndef UNIV_HOTBACKUP
955 /** Create a temporary file. This function is like tmpfile(3), but
956 the temporary file is created in the given parameter path. If the path
957 is null then it will create the file in the mysql server configuration
958 parameter (--tmpdir).
959 @param[in]	path	location for creating temporary file
960 @return temporary file handle, or NULL on error */
961 FILE*
962 os_file_create_tmpfile(
963 	const char*	path);
964 #endif /* !UNIV_HOTBACKUP */
965 
966 /** The os_file_opendir() function opens a directory stream corresponding to the
967 directory named by the dirname argument. The directory stream is positioned
968 at the first entry. In both Unix and Windows we automatically skip the '.'
969 and '..' items at the start of the directory listing.
970 
971 @param[in]	dirname		directory name; it must not contain a trailing
972 				'\' or '/'
973 @param[in]	is_fatal	true if we should treat an error as a fatal
974 				error; if we try to open symlinks then we do
975 				not wish a fatal error if it happens not to be
976 				a directory
977 @return directory stream, NULL if error */
978 os_file_dir_t
979 os_file_opendir(
980 	const char*	dirname,
981 	bool		is_fatal);
982 
983 /**
984 Closes a directory stream.
985 @param[in] dir	directory stream
986 @return 0 if success, -1 if failure */
987 int
988 os_file_closedir(
989 	os_file_dir_t	dir);
990 
991 /** This function returns information of the next file in the directory. We jump
992 over the '.' and '..' entries in the directory.
993 @param[in]	dirname		directory name or path
994 @param[in]	dir		directory stream
995 @param[out]	info		buffer where the info is returned
996 @return 0 if ok, -1 if error, 1 if at the end of the directory */
997 int
998 os_file_readdir_next_file(
999 	const char*	dirname,
1000 	os_file_dir_t	dir,
1001 	os_file_stat_t*	info);
1002 
1003 /**
1004 This function attempts to create a directory named pathname. The new directory
1005 gets default permissions. On Unix, the permissions are (0770 & ~umask). If the
1006 directory exists already, nothing is done and the call succeeds, unless the
1007 fail_if_exists arguments is true.
1008 
1009 @param[in]	pathname	directory name as null-terminated string
1010 @param[in]	fail_if_exists	if true, pre-existing directory is treated
1011 				as an error.
1012 @return true if call succeeds, false on error */
1013 bool
1014 os_file_create_directory(
1015 	const char*	pathname,
1016 	bool		fail_if_exists);
1017 
1018 /** NOTE! Use the corresponding macro os_file_create_simple(), not directly
1019 this function!
1020 A simple function to open or create a file.
1021 @param[in]	name		name of the file or path as a null-terminated
1022 				string
1023 @param[in]	create_mode	create mode
1024 @param[in]	access_type	OS_FILE_READ_ONLY or OS_FILE_READ_WRITE
1025 @param[in]	read_only	if true read only mode checks are enforced
1026 @param[out]	success		true if succeed, false if error
1027 @return own: handle to the file, not defined if error, error number
1028 	can be retrieved with os_file_get_last_error */
1029 pfs_os_file_t
1030 os_file_create_simple_func(
1031 	const char*	name,
1032 	ulint		create_mode,
1033 	ulint		access_type,
1034 	bool		read_only,
1035 	bool*		success);
1036 
1037 /** NOTE! Use the corresponding macro
1038 os_file_create_simple_no_error_handling(), not directly this function!
1039 A simple function to open or create a file.
1040 @param[in]	name		name of the file or path as a null-terminated string
1041 @param[in]	create_mode	create mode
1042 @param[in]	access_type	OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
1043 				OS_FILE_READ_ALLOW_DELETE; the last option
1044 				is used by a backup program reading the file
1045 @param[in]	read_only	if true read only mode checks are enforced
1046 @param[out]	success		true if succeeded
1047 @return own: handle to the file, not defined if error, error number
1048 	can be retrieved with os_file_get_last_error */
1049 pfs_os_file_t
1050 os_file_create_simple_no_error_handling_func(
1051 	const char*	name,
1052 	ulint		create_mode,
1053 	ulint		access_type,
1054 	bool		read_only,
1055 	bool*		success)
1056 	MY_ATTRIBUTE((warn_unused_result));
1057 
1058 /** Tries to disable OS caching on an opened file descriptor.
1059 @param[in]	fd		file descriptor to alter
1060 @param[in]	file_name	file name, used in the diagnostic message
1061 @param[in]	name		"open" or "create"; used in the diagnostic
1062 				message */
1063 void
1064 os_file_set_nocache(
1065 	int		fd,
1066 	const char*	file_name,
1067 	const char*	operation_name);
1068 
1069 /** NOTE! Use the corresponding macro os_file_create(), not directly
1070 this function!
1071 Opens an existing file or creates a new.
1072 @param[in]	name		name of the file or path as a null-terminated
1073 				string
1074 @param[in]	create_mode	create mode
1075 @param[in]	purpose		OS_FILE_AIO, if asynchronous, non-buffered I/O
1076 				is desired, OS_FILE_NORMAL, if any normal file;
1077 				NOTE that it also depends on type, os_aio_..
1078 				and srv_.. variables whether we really use
1079 				async I/O or unbuffered I/O: look in the
1080 				function source code for the exact rules
1081 @param[in]	type		OS_DATA_FILE or OS_LOG_FILE
1082 @param[in]	read_only	if true read only mode checks are enforced
1083 @param[in]	success		true if succeeded
1084 @return own: handle to the file, not defined if error, error number
1085 	can be retrieved with os_file_get_last_error */
1086 pfs_os_file_t
1087 os_file_create_func(
1088 	const char*	name,
1089 	ulint		create_mode,
1090 	ulint		purpose,
1091 	ulint		type,
1092 	bool		read_only,
1093 	bool*		success)
1094 	MY_ATTRIBUTE((warn_unused_result));
1095 
1096 /** Deletes a file. The file has to be closed before calling this.
1097 @param[in]	name		file path as a null-terminated string
1098 @return true if success */
1099 bool
1100 os_file_delete_func(const char* name);
1101 
1102 /** Deletes a file if it exists. The file has to be closed before calling this.
1103 @param[in]	name		file path as a null-terminated string
1104 @param[out]	exist		indicate if file pre-exist
1105 @return true if success */
1106 bool
1107 os_file_delete_if_exists_func(const char* name, bool* exist);
1108 
1109 /** NOTE! Use the corresponding macro os_file_rename(), not directly
1110 this function!
1111 Renames a file (can also move it to another directory). It is safest that the
1112 file is closed before calling this function.
1113 @param[in]	oldpath		old file path as a null-terminated string
1114 @param[in]	newpath		new file path
1115 @return true if success */
1116 bool
1117 os_file_rename_func(const char* oldpath, const char* newpath);
1118 
1119 /** NOTE! Use the corresponding macro os_file_close(), not directly this
1120 function!
1121 Closes a file handle. In case of error, error number can be retrieved with
1122 os_file_get_last_error.
1123 @param[in]	file		own: handle to a file
1124 @return true if success */
1125 bool
1126 os_file_close_func(os_file_t file);
1127 
1128 #ifdef UNIV_PFS_IO
1129 
1130 /* Keys to register InnoDB I/O with performance schema */
1131 extern mysql_pfs_key_t	innodb_data_file_key;
1132 extern mysql_pfs_key_t	innodb_log_file_key;
1133 extern mysql_pfs_key_t	innodb_temp_file_key;
1134 
1135 /* Following four macros are instumentations to register
1136 various file I/O operations with performance schema.
1137 1) register_pfs_file_open_begin() and register_pfs_file_open_end() are
1138 used to register file creation, opening, closing and renaming.
1139 2) register_pfs_file_rename_begin() and  register_pfs_file_rename_end()
1140 are used to register file renaming
1141 3) register_pfs_file_io_begin() and register_pfs_file_io_end() are
1142 used to register actual file read, write and flush
1143 3) register_pfs_file_close_begin() and register_pfs_file_close_end()
1144 are used to register file deletion operations*/
1145 # define register_pfs_file_open_begin(state, locker, key, op, name,	\
1146 				      src_file, src_line)		\
1147 do {									\
1148 	locker = PSI_FILE_CALL(get_thread_file_name_locker)(		\
1149 		state, key.m_value, op, name, &locker);			\
1150 	if (locker != NULL) {						\
1151 		PSI_FILE_CALL(start_file_open_wait)(			\
1152 			locker, src_file, static_cast<uint>(src_line));	\
1153 	}								\
1154 } while (0)
1155 
1156 # define register_pfs_file_open_end(locker, file, result)		\
1157 do {									\
1158 	if (locker != NULL) {						\
1159 		file.m_psi = PSI_FILE_CALL(				\
1160 		end_file_open_wait)(					\
1161 			locker, result);				\
1162 	}								\
1163 } while (0)
1164 
1165 # define register_pfs_file_rename_begin(state, locker, key, op, name,	\
1166 					src_file, src_line)             \
1167 	register_pfs_file_open_begin(					\
1168 		state, locker, key, op, name,				\
1169 		src_file, static_cast<uint>(src_line))			\
1170 
1171 # define register_pfs_file_rename_end(locker, from, to, result)		\
1172 do {									\
1173 	if (locker != NULL) {						\
1174 		 PSI_FILE_CALL(						\
1175 			end_file_rename_wait)(				\
1176 			locker, from, to, result);			\
1177 	}								\
1178 }while(0)
1179 
1180 # define register_pfs_file_close_begin(state, locker, key, op, name,	\
1181 				       src_file, src_line)		\
1182 do {									\
1183 	locker = PSI_FILE_CALL(get_thread_file_name_locker)(		\
1184 		state, key.m_value, op, name, &locker);			\
1185 	if (locker != NULL) {						\
1186 		PSI_FILE_CALL(start_file_close_wait)(			\
1187 			locker, src_file, static_cast<uint>(src_line));	\
1188 	}								\
1189 } while (0)
1190 
1191 # define register_pfs_file_close_end(locker, result)			\
1192 do {									\
1193 	if (locker != NULL) {						\
1194 		PSI_FILE_CALL(end_file_close_wait)(			\
1195 			locker, result);				\
1196 	}								\
1197 } while (0)
1198 
1199 # define register_pfs_file_io_begin(state, locker, file, count, op,	\
1200 				    src_file, src_line)			\
1201 do {									\
1202 	locker = PSI_FILE_CALL(get_thread_file_stream_locker)(		\
1203 		state, file.m_psi, op);					\
1204 	if (locker != NULL) {						\
1205 		PSI_FILE_CALL(start_file_wait)(				\
1206 			locker, count,					\
1207 			src_file, static_cast<uint>(src_line));		\
1208 	}								\
1209 } while (0)
1210 
1211 # define register_pfs_file_io_end(locker, count)			\
1212 do {									\
1213 	if (locker != NULL) {						\
1214 		PSI_FILE_CALL(end_file_wait)(locker, count);		\
1215 	}								\
1216 } while (0)
1217 
1218 /* Following macros/functions are file I/O APIs that would be performance
1219 schema instrumented if "UNIV_PFS_IO" is defined. They would point to
1220 wrapper functions with performance schema instrumentation in such case.
1221 
1222 os_file_create
1223 os_file_create_simple
1224 os_file_create_simple_no_error_handling
1225 os_file_close
1226 os_file_rename
1227 os_aio
1228 os_file_read
1229 os_file_read_no_error_handling
1230 os_file_read_no_error_handling_int_fd
1231 os_file_write
1232 
1233 The wrapper functions have the prefix of "innodb_". */
1234 
1235 # define os_file_create(key, name, create, purpose, type, read_only,	\
1236 			success)					\
1237 	pfs_os_file_create_func(key, name, create, purpose,	type,	\
1238 				read_only, success, __FILE__, __LINE__)
1239 
1240 # define os_file_create_simple(key, name, create, access,		\
1241 		read_only, success)					\
1242 	pfs_os_file_create_simple_func(key, name, create, access,	\
1243 		read_only, success, __FILE__, __LINE__)
1244 
1245 # define os_file_create_simple_no_error_handling(			\
1246 		key, name, create_mode, access, read_only, success)	\
1247 	pfs_os_file_create_simple_no_error_handling_func(		\
1248 		key, name, create_mode, access,				\
1249 		read_only, success, __FILE__, __LINE__)
1250 
1251 # define os_file_close_pfs(file)						\
1252 	pfs_os_file_close_func(file, __FILE__, __LINE__)
1253 
1254 # define os_aio(type, mode, name, file, buf, offset,			\
1255 		n, read_only, message1, message2)			\
1256 	pfs_os_aio_func(type, mode, name, file, buf, offset,		\
1257 			n, read_only, message1, message2,		\
1258 			__FILE__, __LINE__)
1259 
1260 # define os_file_read_pfs(type, file, buf, offset, n)			\
1261 	pfs_os_file_read_func(type, file, buf, offset, n, __FILE__, __LINE__)
1262 
1263 # define os_file_read_no_error_handling_pfs(type, file, buf, offset, n, o)	\
1264 	pfs_os_file_read_no_error_handling_func(			\
1265 		type, file, buf, offset, n, o, __FILE__, __LINE__)
1266 
1267 # define os_file_read_no_error_handling_int_fd(                         \
1268 		type, file, buf, offset, n, o)				\
1269 	pfs_os_file_read_no_error_handling_int_fd_func(                 \
1270 		type, file, buf, offset, n, o, __FILE__, __LINE__)
1271 
1272 # define os_file_write_pfs(type, name, file, buf, offset, n)	\
1273 	pfs_os_file_write_func(type, name, file, buf, offset,	\
1274 			       n, __FILE__, __LINE__)
1275 
1276 # define os_file_write_int_fd(type, name, file, buf, offset, n)		\
1277 	pfs_os_file_write_int_fd_func(type, name, file, buf, offset,	\
1278 				n, __FILE__, __LINE__)
1279 
1280 # define os_file_flush_pfs(file)						\
1281 	pfs_os_file_flush_func(file, __FILE__, __LINE__)
1282 
1283 # define os_file_rename(key, oldpath, newpath)				\
1284 	pfs_os_file_rename_func(key, oldpath, newpath, __FILE__, __LINE__)
1285 
1286 # define os_file_delete(key, name)					\
1287 	pfs_os_file_delete_func(key, name, __FILE__, __LINE__)
1288 
1289 # define os_file_delete_if_exists(key, name, exist)			\
1290 	pfs_os_file_delete_if_exists_func(key, name, exist, __FILE__, __LINE__)
1291 
1292 
1293 
1294 /** NOTE! Please use the corresponding macro os_file_create_simple(),
1295 not directly this function!
1296 A performance schema instrumented wrapper function for
1297 os_file_create_simple() which opens or creates a file.
1298 @param[in]	key		Performance Schema Key
1299 @param[in]	name		name of the file or path as a null-terminated
1300 				string
1301 @param[in]	create_mode	create mode
1302 @param[in]	access_type	OS_FILE_READ_ONLY or OS_FILE_READ_WRITE
1303 @param[in]	read_only	if true read only mode checks are enforced
1304 @param[out]	success		true if succeeded
1305 @param[in]	src_file	file name where func invoked
1306 @param[in]	src_line	line where the func invoked
1307 @return own: handle to the file, not defined if error, error number
1308 	can be retrieved with os_file_get_last_error */
1309 UNIV_INLINE
1310 pfs_os_file_t
1311 pfs_os_file_create_simple_func(
1312 	mysql_pfs_key_t key,
1313 	const char*	name,
1314 	ulint		create_mode,
1315 	ulint		access_type,
1316 	bool		read_only,
1317 	bool*		success,
1318 	const char*	src_file,
1319 	ulint		src_line)
1320 	MY_ATTRIBUTE((warn_unused_result));
1321 
1322 /** NOTE! Please use the corresponding macro
1323 os_file_create_simple_no_error_handling(), not directly this function!
1324 A performance schema instrumented wrapper function for
1325 os_file_create_simple_no_error_handling(). Add instrumentation to
1326 monitor file creation/open.
1327 @param[in]	key		Performance Schema Key
1328 @param[in]	name		name of the file or path as a null-terminated
1329 				string
1330 @param[in]	create_mode	create mode
1331 @param[in]	access_type	OS_FILE_READ_ONLY, OS_FILE_READ_WRITE, or
1332 				OS_FILE_READ_ALLOW_DELETE; the last option is
1333 				used by a backup program reading the file
1334 @param[in]	read_only	if true read only mode checks are enforced
1335 @param[out]	success		true if succeeded
1336 @param[in]	src_file	file name where func invoked
1337 @param[in]	src_line	line where the func invoked
1338 @return own: handle to the file, not defined if error, error number
1339 	can be retrieved with os_file_get_last_error */
1340 UNIV_INLINE
1341 pfs_os_file_t
1342 pfs_os_file_create_simple_no_error_handling_func(
1343 	mysql_pfs_key_t key,
1344 	const char*	name,
1345 	ulint		create_mode,
1346 	ulint		access_type,
1347 	bool		read_only,
1348 	bool*		success,
1349 	const char*	src_file,
1350 	ulint		src_line)
1351 	MY_ATTRIBUTE((warn_unused_result));
1352 
1353 /** NOTE! Please use the corresponding macro os_file_create(), not directly
1354 this function!
1355 A performance schema wrapper function for os_file_create().
1356 Add instrumentation to monitor file creation/open.
1357 @param[in]	key		Performance Schema Key
1358 @param[in]	name		name of the file or path as a null-terminated
1359 				string
1360 @param[in]	create_mode	create mode
1361 @param[in]	purpose		OS_FILE_AIO, if asynchronous, non-buffered I/O
1362 				is desired, OS_FILE_NORMAL, if any normal file;
1363 				NOTE that it also depends on type, os_aio_..
1364 				and srv_.. variables whether we really use
1365 				async I/O or unbuffered I/O: look in the
1366 				function source code for the exact rules
1367 @param[in]	read_only	if true read only mode checks are enforced
1368 @param[out]	success		true if succeeded
1369 @param[in]	src_file	file name where func invoked
1370 @param[in]	src_line	line where the func invoked
1371 @return own: handle to the file, not defined if error, error number
1372 	can be retrieved with os_file_get_last_error */
1373 UNIV_INLINE
1374 pfs_os_file_t
1375 pfs_os_file_create_func(
1376 	mysql_pfs_key_t key,
1377 	const char*	name,
1378 	ulint		create_mode,
1379 	ulint		purpose,
1380 	ulint		type,
1381 	bool		read_only,
1382 	bool*		success,
1383 	const char*	src_file,
1384 	ulint		src_line)
1385 	MY_ATTRIBUTE((warn_unused_result));
1386 
1387 /** NOTE! Please use the corresponding macro os_file_close(), not directly
1388 this function!
1389 A performance schema instrumented wrapper function for os_file_close().
1390 @param[in]	file		handle to a file
1391 @param[in]	src_file	file name where func invoked
1392 @param[in]	src_line	line where the func invoked
1393 @return true if success */
1394 UNIV_INLINE
1395 bool
1396 pfs_os_file_close_func(
1397 	pfs_os_file_t	file,
1398 	const char*	src_file,
1399 	ulint		src_line);
1400 
1401 /** NOTE! Please use the corresponding macro os_file_read(), not directly
1402 this function!
1403 This is the performance schema instrumented wrapper function for
1404 os_file_read() which requests a synchronous read operation.
1405 @param[in, out]	type		IO request context
1406 @param[in]	file		Open file handle
1407 @param[out]	buf		buffer where to read
1408 @param[in]	offset		file offset where to read
1409 @param[in]	n		number of bytes to read
1410 @param[in]	src_file	file name where func invoked
1411 @param[in]	src_line	line where the func invoked
1412 @return DB_SUCCESS if request was successful */
1413 UNIV_INLINE
1414 dberr_t
1415 pfs_os_file_read_func(
1416 	IORequest&	type,
1417 	pfs_os_file_t	file,
1418 	void*		buf,
1419 	os_offset_t	offset,
1420 	ulint		n,
1421 	const char*	src_file,
1422 	ulint		src_line);
1423 
1424 /** NOTE! Please use the corresponding macro os_file_read_no_error_handling(),
1425 not directly this function!
1426 This is the performance schema instrumented wrapper function for
1427 os_file_read_no_error_handling_func() which requests a synchronous
1428 read operation.
1429 @param[in, out]	type		IO request context
1430 @param[in]	file		Open file handle
1431 @param[out]	buf		buffer where to read
1432 @param[in]	offset		file offset where to read
1433 @param[in]	n		number of bytes to read
1434 @param[out]	o		number of bytes actually read
1435 @param[in]	src_file	file name where func invoked
1436 @param[in]	src_line	line where the func invoked
1437 @return DB_SUCCESS if request was successful */
1438 UNIV_INLINE
1439 dberr_t
1440 pfs_os_file_read_no_error_handling_func(
1441 	IORequest&	type,
1442 	pfs_os_file_t	file,
1443 	void*		buf,
1444 	os_offset_t	offset,
1445 	ulint		n,
1446 	ulint*		o,
1447 	const char*	src_file,
1448 	ulint		src_line);
1449 
1450 /** NOTE! Please use the corresponding macro
1451 os_file_read_no_error_handling_int_fd(), not directly this function!
1452 This is the performance schema instrumented wrapper function for
1453 os_file_read_no_error_handling_int_fd_func() which requests a
1454 synchronous  read operation on files with int type descriptors.
1455 @param[in, out] type            IO request context
1456 @param[in]      file            Open file handle
1457 @param[out]     buf             buffer where to read
1458 @param[in]      offset          file offset where to read
1459 @param[in]      n               number of bytes to read
1460 @param[out]     o               number of bytes actually read
1461 @param[in]      src_file        file name where func invoked
1462 @param[in]      src_line        line where the func invoked
1463 @return DB_SUCCESS if request was successful */
1464 
1465 UNIV_INLINE
1466 dberr_t
1467 pfs_os_file_read_no_error_handling_int_fd_func(
1468         IORequest&      type,
1469         int             file,
1470         void*           buf,
1471         os_offset_t     offset,
1472         ulint           n,
1473         ulint*          o,
1474         const char*     src_file,
1475         ulint           src_line);
1476 
1477 /** NOTE! Please use the corresponding macro os_aio(), not directly this
1478 function!
1479 Performance schema wrapper function of os_aio() which requests
1480 an asynchronous I/O operation.
1481 @param[in]	type		IO request context
1482 @param[in]	mode		IO mode
1483 @param[in]	name		Name of the file or path as NUL terminated
1484 				string
1485 @param[in]	file		Open file handle
1486 @param[out]	buf		buffer where to read
1487 @param[in]	offset		file offset where to read
1488 @param[in]	n		number of bytes to read
1489 @param[in]	read_only	if true read only mode checks are enforced
1490 @param[in,out]	m1		Message for the AIO handler, (can be used to
1491 				identify a completed AIO operation); ignored
1492 				if mode is OS_AIO_SYNC
1493 @param[in,out]	m2		message for the AIO handler (can be used to
1494 				identify a completed AIO operation); ignored
1495 				if mode is OS_AIO_SYNC
1496 @param[in]	src_file	file name where func invoked
1497 @param[in]	src_line	line where the func invoked
1498 @return DB_SUCCESS if request was queued successfully, FALSE if fail */
1499 UNIV_INLINE
1500 dberr_t
1501 pfs_os_aio_func(
1502 	IORequest&	type,
1503 	ulint		mode,
1504 	const char*	name,
1505 	pfs_os_file_t	file,
1506 	void*		buf,
1507 	os_offset_t	offset,
1508 	ulint		n,
1509 	bool		read_only,
1510 	fil_node_t*	m1,
1511 	void*		m2,
1512 	const char*	src_file,
1513 	ulint		src_line);
1514 
1515 /** NOTE! Please use the corresponding macro os_file_write(), not directly
1516 this function!
1517 This is the performance schema instrumented wrapper function for
1518 os_file_write() which requests a synchronous write operation.
1519 @param[in, out]	type		IO request context
1520 @param[in]	name		Name of the file or path as NUL terminated
1521 				string
1522 @param[in]	file		Open file handle
1523 @param[out]	buf		buffer where to read
1524 @param[in]	offset		file offset where to read
1525 @param[in]	n		number of bytes to read
1526 @param[in]	src_file	file name where func invoked
1527 @param[in]	src_line	line where the func invoked
1528 @return DB_SUCCESS if request was successful */
1529 UNIV_INLINE
1530 dberr_t
1531 pfs_os_file_write_func(
1532 	IORequest&	type,
1533 	const char*	name,
1534 	pfs_os_file_t	file,
1535 	const void*	buf,
1536 	os_offset_t	offset,
1537 	ulint		n,
1538 	const char*	src_file,
1539 	ulint		src_line);
1540 
1541 /** NOTE! Please use the corresponding macro os_file_write(), not
1542 directly this function!
1543 This is the performance schema instrumented wrapper function for
1544 os_file_write() which requests a synchronous write operation
1545 on files with int type descriptors.
1546 @param[in, out] type            IO request context
1547 @param[in]      name            Name of the file or path as NUL terminated
1548 				string
1549 @param[in]      file            Open file handle
1550 @param[out]     buf             buffer where to read
1551 @param[in]      offset          file offset where to read
1552 @param[in]      n		number of bytes to read
1553 @param[in]      src_file        file name where func invoked
1554 @param[in]      src_line        line where the func invoked
1555 @return DB_SUCCESS if request was successful */
1556 UNIV_INLINE
1557 dberr_t
1558 pfs_os_file_write_int_fd_func(
1559         IORequest&      type,
1560         const char*     name,
1561         int		file,
1562         const void*     buf,
1563         os_offset_t     offset,
1564         ulint           n,
1565         const char*     src_file,
1566         ulint           src_line);
1567 
1568 /** NOTE! Please use the corresponding macro os_file_flush(), not directly
1569 this function!
1570 This is the performance schema instrumented wrapper function for
1571 os_file_flush() which flushes the write buffers of a given file to the disk.
1572 Flushes the write buffers of a given file to the disk.
1573 @param[in]	file		Open file handle
1574 @param[in]	src_file	file name where func invoked
1575 @param[in]	src_line	line where the func invoked
1576 @return TRUE if success */
1577 UNIV_INLINE
1578 bool
1579 pfs_os_file_flush_func(
1580 	pfs_os_file_t	file,
1581 	const char*	src_file,
1582 	ulint		src_line);
1583 
1584 /** NOTE! Please use the corresponding macro os_file_rename(), not directly
1585 this function!
1586 This is the performance schema instrumented wrapper function for
1587 os_file_rename()
1588 @param[in]	key		Performance Schema Key
1589 @param[in]	oldpath		old file path as a null-terminated string
1590 @param[in]	newpath		new file path
1591 @param[in]	src_file	file name where func invoked
1592 @param[in]	src_line	line where the func invoked
1593 @return true if success */
1594 UNIV_INLINE
1595 bool
1596 pfs_os_file_rename_func(
1597 	mysql_pfs_key_t	key,
1598 	const char*	oldpath,
1599 	const char*	newpath,
1600 	const char*	src_file,
1601 	ulint		src_line);
1602 
1603 /**
1604 NOTE! Please use the corresponding macro os_file_delete(), not directly
1605 this function!
1606 This is the performance schema instrumented wrapper function for
1607 os_file_delete()
1608 @param[in]	key		Performance Schema Key
1609 @param[in]	name		old file path as a null-terminated string
1610 @param[in]	src_file	file name where func invoked
1611 @param[in]	src_line	line where the func invoked
1612 @return true if success */
1613 UNIV_INLINE
1614 bool
1615 pfs_os_file_delete_func(
1616 	mysql_pfs_key_t	key,
1617 	const char*	name,
1618 	const char*	src_file,
1619 	ulint		src_line);
1620 
1621 /**
1622 NOTE! Please use the corresponding macro os_file_delete_if_exists(), not
1623 directly this function!
1624 This is the performance schema instrumented wrapper function for
1625 os_file_delete_if_exists()
1626 @param[in]	key		Performance Schema Key
1627 @param[in]	name		old file path as a null-terminated string
1628 @param[in]	exist		indicate if file pre-exist
1629 @param[in]	src_file	file name where func invoked
1630 @param[in]	src_line	line where the func invoked
1631 @return true if success */
1632 UNIV_INLINE
1633 bool
1634 pfs_os_file_delete_if_exists_func(
1635 	mysql_pfs_key_t	key,
1636 	const char*	name,
1637 	bool*		exist,
1638 	const char*	src_file,
1639 	ulint		src_line);
1640 
1641 #else /* UNIV_PFS_IO */
1642 
1643 /* If UNIV_PFS_IO is not defined, these I/O APIs point
1644 to original un-instrumented file I/O APIs */
1645 # define os_file_create(key, name, create, purpose, type, read_only,	\
1646 			success)					\
1647 	os_file_create_func(name, create, purpose, type, read_only,	\
1648 			success)
1649 
1650 # define os_file_create_simple(key, name, create_mode, access,		\
1651 		read_only, success)					\
1652 	os_file_create_simple_func(name, create_mode, access,		\
1653 		read_only, success)
1654 
1655 # define os_file_create_simple_no_error_handling(			\
1656 		key, name, create_mode, access, read_only, success)	\
1657 	os_file_create_simple_no_error_handling_func(			\
1658 		name, create_mode, access, read_only, success)
1659 
1660 # define os_file_close_pfs(file)	os_file_close_func(file)
1661 
1662 # define os_aio(type, mode, name, file, buf, offset,			\
1663 		n, read_only, message1, message2)			\
1664 	os_aio_func(type, mode, name, file, buf, offset,		\
1665 		n, read_only, message1, message2)
1666 
1667 # define os_file_read_pfs(type, file, buf, offset, n)			\
1668 	os_file_read_func(type, file, buf, offset, n)
1669 
1670 # define os_file_read_no_error_handling_pfs(type, file, buf, offset, n, o)	\
1671 	os_file_read_no_error_handling_func(type, file, buf, offset, n, o)
1672 
1673 # define os_file_read_no_error_handling_int_fd(type, file, buf, offset, n, o)  \
1674 	os_file_read_no_error_handling_func(type, file, buf, offset, n, o)
1675 
1676 # define os_file_write_pfs(type, name, file, buf, offset, n)		\
1677 	os_file_write_func(type, name, file, buf, offset, n)
1678 
1679 # define os_file_write_int_fd(type, name, file, buf, offset, n)            \
1680 	os_file_write_func(type, name, file, buf, offset, n)
1681 
1682 # define os_file_flush_pfs(file)	os_file_flush_func(file)
1683 
1684 # define os_file_rename(key, oldpath, newpath)				\
1685 	os_file_rename_func(oldpath, newpath)
1686 
1687 # define os_file_delete(key, name)	os_file_delete_func(name)
1688 
1689 # define os_file_delete_if_exists(key, name, exist)			\
1690 	os_file_delete_if_exists_func(name, exist)
1691 
1692 #endif	/* UNIV_PFS_IO */
1693 
1694 #ifdef UNIV_PFS_IO
1695 	#define os_file_close(file) os_file_close_pfs(file)
1696 #else
1697 	#define os_file_close(file) os_file_close_pfs((file).m_file)
1698 #endif
1699 
1700 #ifdef UNIV_PFS_IO
1701 	#define os_file_read(type, file, buf, offset, n)                \
1702 		os_file_read_pfs(type, file, buf, offset, n)
1703 #else
1704 	#define os_file_read(type, file, buf, offset, n)                \
1705                 os_file_read_pfs(type, file.m_file, buf, offset, n)
1706 #endif
1707 
1708 #ifdef UNIV_PFS_IO
1709 	#define os_file_flush(file)	os_file_flush_pfs(file)
1710 #else
1711 	#define os_file_flush(file)	os_file_flush_pfs(file.m_file)
1712 #endif
1713 
1714 #ifdef UNIV_PFS_IO
1715 	#define os_file_write(type, name, file, buf, offset, n)         \
1716 		os_file_write_pfs(type, name, file, buf, offset, n)
1717 #else
1718 	#define os_file_write(type, name, file, buf, offset, n)         \
1719                 os_file_write_pfs(type, name, file.m_file, buf, offset, n)
1720 #endif
1721 
1722 #ifdef UNIV_PFS_IO
1723 	#define os_file_read_no_error_handling(type, file, buf, offset, n, o)  \
1724 		 os_file_read_no_error_handling_pfs(type, file, buf, offset, n, o)
1725 #else
1726 	#define os_file_read_no_error_handling(type, file, buf, offset, n, o) \
1727                  os_file_read_no_error_handling_pfs(			      \
1728 			type, file.m_file, buf, offset, n, o)
1729 #endif
1730 
1731 #ifdef UNIV_HOTBACKUP
1732 /** Closes a file handle.
1733 @param[in] file		handle to a file
1734 @return true if success */
1735 bool
1736 os_file_close_no_error_handling(os_file_t file);
1737 #endif /* UNIV_HOTBACKUP */
1738 
1739 /** Gets a file size.
1740 @param[in]	file		handle to a file
1741 @return file size if OK, else set m_total_size to ~0 and m_alloc_size
1742 	to errno */
1743 os_file_size_t
1744 os_file_get_size(
1745 	const char*	filename)
1746 	MY_ATTRIBUTE((warn_unused_result));
1747 
1748 /** Gets a file size.
1749 @param[in]	file		handle to a file
1750 @return file size, or (os_offset_t) -1 on failure */
1751 os_offset_t
1752 os_file_get_size(
1753 	pfs_os_file_t	file)
1754 	MY_ATTRIBUTE((warn_unused_result));
1755 
1756 /** Write the specified number of zeros to a newly created file.
1757 @param[in]	name		name of the file or path as a null-terminated
1758 				string
1759 @param[in]	file		handle to a file
1760 @param[in]	size		file size
1761 @param[in]	read_only	Enable read-only checks if true
1762 @return true if success */
1763 bool
1764 os_file_set_size(
1765 	const char*	name,
1766 	pfs_os_file_t	file,
1767 	os_offset_t	size,
1768 	bool		read_only)
1769 	MY_ATTRIBUTE((warn_unused_result));
1770 
1771 /** Truncates a file at its current position.
1772 @param[in/out]	file	file to be truncated
1773 @return true if success */
1774 bool
1775 os_file_set_eof(
1776 	FILE*		file);	/*!< in: file to be truncated */
1777 
1778 /** Truncates a file to a specified size in bytes. Do nothing if the size
1779 preserved is smaller or equal than current size of file.
1780 @param[in]	pathname	file path
1781 @param[in]	file		file to be truncated
1782 @param[in]	size		size preserved in bytes
1783 @return true if success */
1784 bool
1785 os_file_truncate(
1786 	const char*	pathname,
1787 	pfs_os_file_t	file,
1788 	os_offset_t	size);
1789 
1790 /** NOTE! Use the corresponding macro os_file_flush(), not directly this
1791 function!
1792 Flushes the write buffers of a given file to the disk.
1793 @param[in]	file		handle to a file
1794 @return true if success */
1795 bool
1796 os_file_flush_func(
1797 	os_file_t	file);
1798 
1799 /** Retrieves the last error number if an error occurs in a file io function.
1800 The number should be retrieved before any other OS calls (because they may
1801 overwrite the error number). If the number is not known to this program,
1802 the OS error number + 100 is returned.
1803 @param[in]	report		true if we want an error message printed
1804 				for all errors
1805 @return error number, or OS error number + 100 */
1806 ulint
1807 os_file_get_last_error(
1808 	bool		report);
1809 
1810 /** NOTE! Use the corresponding macro os_file_read(), not directly this
1811 function!
1812 Requests a synchronous read operation.
1813 @param[in]	type		IO request context
1814 @param[in]	file		Open file handle
1815 @param[out]	buf		buffer where to read
1816 @param[in]	offset		file offset where to read
1817 @param[in]	n		number of bytes to read
1818 @return DB_SUCCESS if request was successful */
1819 dberr_t
1820 os_file_read_func(
1821 	IORequest&	type,
1822 	os_file_t	file,
1823 	void*		buf,
1824 	os_offset_t	offset,
1825 	ulint		n)
1826 	MY_ATTRIBUTE((warn_unused_result));
1827 
1828 /** Rewind file to its start, read at most size - 1 bytes from it to str, and
1829 NUL-terminate str. All errors are silently ignored. This function is
1830 mostly meant to be used with temporary files.
1831 @param[in,out]	file		file to read from
1832 @param[in,out]	str		buffer where to read
1833 @param[in]	size		size of buffer */
1834 void
1835 os_file_read_string(
1836 	FILE*		file,
1837 	char*		str,
1838 	ulint		size);
1839 
1840 /** NOTE! Use the corresponding macro os_file_read_no_error_handling(),
1841 not directly this function!
1842 Requests a synchronous positioned read operation. This function does not do
1843 any error handling. In case of error it returns FALSE.
1844 @param[in]	type		IO request context
1845 @param[in]	file		Open file handle
1846 @param[out]	buf		buffer where to read
1847 @param[in]	offset		file offset where to read
1848 @param[in]	n		number of bytes to read
1849 @param[out]	o		number of bytes actually read
1850 @return DB_SUCCESS or error code */
1851 dberr_t
1852 os_file_read_no_error_handling_func(
1853 	IORequest&	type,
1854 	os_file_t	file,
1855 	void*		buf,
1856 	os_offset_t	offset,
1857 	ulint		n,
1858 	ulint*		o)
1859 	MY_ATTRIBUTE((warn_unused_result));
1860 
1861 /** NOTE! Use the corresponding macro os_file_write(), not directly this
1862 function!
1863 Requests a synchronous write operation.
1864 @param[in,out]	type		IO request context
1865 @param[in]	file		Open file handle
1866 @param[out]	buf		buffer where to read
1867 @param[in]	offset		file offset where to read
1868 @param[in]	n		number of bytes to read
1869 @return DB_SUCCESS if request was successful */
1870 dberr_t
1871 os_file_write_func(
1872 	IORequest&	type,
1873 	const char*	name,
1874 	os_file_t	file,
1875 	const void*	buf,
1876 	os_offset_t	offset,
1877 	ulint		n)
1878 	MY_ATTRIBUTE((warn_unused_result));
1879 
1880 /** Check the existence and type of the given file.
1881 @param[in]	path		pathname of the file
1882 @param[out]	exists		true if file exists
1883 @param[out]	type		type of the file (if it exists)
1884 @return true if call succeeded */
1885 bool
1886 os_file_status(
1887 	const char*	path,
1888 	bool*		exists,
1889 	os_file_type_t* type);
1890 
1891 /** This function returns a new path name after replacing the basename
1892 in an old path with a new basename.  The old_path is a full path
1893 name including the extension.  The tablename is in the normal
1894 form "databasename/tablename".  The new base name is found after
1895 the forward slash.  Both input strings are null terminated.
1896 
1897 This function allocates memory to be returned.  It is the callers
1898 responsibility to free the return value after it is no longer needed.
1899 
1900 @param[in]	old_path		pathname
1901 @param[in]	new_name		new file name
1902 @return own: new full pathname */
1903 char*
1904 os_file_make_new_pathname(
1905 	const char*	old_path,
1906 	const char*	new_name);
1907 
1908 /** This function reduces a null-terminated full remote path name into
1909 the path that is sent by MySQL for DATA DIRECTORY clause.  It replaces
1910 the 'databasename/tablename.ibd' found at the end of the path with just
1911 'tablename'.
1912 
1913 Since the result is always smaller than the path sent in, no new memory
1914 is allocated. The caller should allocate memory for the path sent in.
1915 This function manipulates that path in place.
1916 
1917 If the path format is not as expected, just return.  The result is used
1918 to inform a SHOW CREATE TABLE command.
1919 @param[in,out]	data_dir_path		Full path/data_dir_path */
1920 void
1921 os_file_make_data_dir_path(
1922 	char*	data_dir_path);
1923 
1924 /** Create all missing subdirectories along the given path.
1925 @return DB_SUCCESS if OK, otherwise error code. */
1926 dberr_t
1927 os_file_create_subdirs_if_needed(
1928 	const char*	path);
1929 
1930 #ifdef UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR
1931 /* Test the function os_file_get_parent_dir. */
1932 void
1933 unit_test_os_file_get_parent_dir();
1934 #endif /* UNIV_ENABLE_UNIT_TEST_GET_PARENT_DIR */
1935 
1936 /** Initializes the asynchronous io system. Creates one array each for ibuf
1937 and log i/o. Also creates one array each for read and write where each
1938 array is divided logically into n_read_segs and n_write_segs
1939 respectively. The caller must create an i/o handler thread for each
1940 segment in these arrays. This function also creates the sync array.
1941 No i/o handler thread needs to be created for that
1942 @param[in]	n_read_segs	number of reader threads
1943 @param[in]	n_write_segs	number of writer threads
1944 @param[in]	n_slots_sync	number of slots in the sync aio array */
1945 
1946 bool
1947 os_aio_init(
1948 	ulint		n_read_segs,
1949 	ulint		n_write_segs,
1950 	ulint		n_slots_sync);
1951 
1952 /**
1953 Frees the asynchronous io system. */
1954 void
1955 os_aio_free();
1956 
1957 /**
1958 NOTE! Use the corresponding macro os_aio(), not directly this function!
1959 Requests an asynchronous i/o operation.
1960 @param[in]	type		IO request context
1961 @param[in]	mode		IO mode
1962 @param[in]	name		Name of the file or path as NUL terminated
1963 				string
1964 @param[in]	file		Open file handle
1965 @param[out]	buf		buffer where to read
1966 @param[in]	offset		file offset where to read
1967 @param[in]	n		number of bytes to read
1968 @param[in]	read_only	if true read only mode checks are enforced
1969 @param[in,out]	m1		Message for the AIO handler, (can be used to
1970 				identify a completed AIO operation); ignored
1971 				if mode is OS_AIO_SYNC
1972 @param[in,out]	m2		message for the AIO handler (can be used to
1973 				identify a completed AIO operation); ignored
1974 				if mode is OS_AIO_SYNC
1975 @return DB_SUCCESS or error code */
1976 dberr_t
1977 os_aio_func(
1978 	IORequest&	type,
1979 	ulint		mode,
1980 	const char*	name,
1981 	pfs_os_file_t	file,
1982 	void*		buf,
1983 	os_offset_t	offset,
1984 	ulint		n,
1985 	bool		read_only,
1986 	fil_node_t*	m1,
1987 	void*		m2);
1988 
1989 /** Wakes up all async i/o threads so that they know to exit themselves in
1990 shutdown. */
1991 void
1992 os_aio_wake_all_threads_at_shutdown();
1993 
1994 /** Waits until there are no pending writes in os_aio_write_array. There can
1995 be other, synchronous, pending writes. */
1996 void
1997 os_aio_wait_until_no_pending_writes();
1998 
1999 /** Wakes up simulated aio i/o-handler threads if they have something to do. */
2000 void
2001 os_aio_simulated_wake_handler_threads();
2002 
2003 /** This function can be called if one wants to post a batch of reads and
2004 prefers an i/o-handler thread to handle them all at once later. You must
2005 call os_aio_simulated_wake_handler_threads later to ensure the threads
2006 are not left sleeping! */
2007 void
2008 os_aio_simulated_put_read_threads_to_sleep();
2009 
2010 /** This is the generic AIO handler interface function.
2011 Waits for an aio operation to complete. This function is used to wait the
2012 for completed requests. The AIO array of pending requests is divided
2013 into segments. The thread specifies which segment or slot it wants to wait
2014 for. NOTE: this function will also take care of freeing the aio slot,
2015 therefore no other thread is allowed to do the freeing!
2016 @param[in]	segment		the number of the segment in the aio arrays to
2017 				wait for; segment 0 is the ibuf I/O thread,
2018 				segment 1 the log I/O thread, then follow the
2019 				non-ibuf read threads, and as the last are the
2020 				non-ibuf write threads; if this is
2021 				ULINT_UNDEFINED, then it means that sync AIO
2022 				is used, and this parameter is ignored
2023 @param[out]	m1		the messages passed with the AIO request;
2024 				note that also in the case where the AIO
2025 				operation failed, these output parameters
2026 				are valid and can be used to restart the
2027 				operation, for example
2028 @param[out]	m2		callback message
2029 @param[out]	type		OS_FILE_WRITE or ..._READ
2030 @return DB_SUCCESS or error code */
2031 dberr_t
2032 os_aio_handler(
2033 	ulint		segment,
2034 	fil_node_t**	m1,
2035 	void**		m2,
2036 	IORequest*	type);
2037 
2038 /** Prints info of the aio arrays.
2039 @param[in/out]	file		file where to print */
2040 void
2041 os_aio_print(FILE* file);
2042 
2043 /** Refreshes the statistics used to print per-second averages. */
2044 void
2045 os_aio_refresh_stats();
2046 
2047 /** Checks that all slots in the system have been freed, that is, there are
2048 no pending io operations. */
2049 bool
2050 os_aio_all_slots_free();
2051 
2052 #ifdef UNIV_DEBUG
2053 
2054 /** Prints all pending IO
2055 @param[in]	file	file where to print */
2056 void
2057 os_aio_print_pending_io(FILE* file);
2058 
2059 #endif /* UNIV_DEBUG */
2060 
2061 /** This function returns information about the specified file
2062 @param[in]	path		pathname of the file
2063 @param[in]	stat_info	information of a file in a directory
2064 @param[in]	check_rw_perm	for testing whether the file can be opened
2065 				in RW mode
2066 @param[in]	read_only	if true read only mode checks are enforced
2067 @return DB_SUCCESS if all OK */
2068 dberr_t
2069 os_file_get_status(
2070 	const char*	path,
2071 	os_file_stat_t* stat_info,
2072 	bool		check_rw_perm,
2073 	bool		read_only);
2074 
2075 #if !defined(UNIV_HOTBACKUP)
2076 /** return one of the tmpdir path
2077 @return tmporary dir*/
2078 char *innobase_mysql_tmpdir(void);
2079 
2080 
2081 /** Creates a temporary file in the location specified by the parameter
2082 path. If the path is NULL then it will be created on --tmpdir location.
2083 This function is defined in ha_innodb.cc.
2084 @param[in]	path	location for creating temporary file
2085 @return temporary file descriptor, or < 0 on error */
2086 int
2087 innobase_mysql_tmpfile(
2088 	const char*	path);
2089 #endif /* !UNIV_HOTBACKUP */
2090 
2091 
2092 /** If it is a compressed page return the compressed page data + footer size
2093 @param[in]	buf		Buffer to check, must include header + 10 bytes
2094 @return ULINT_UNDEFINED if the page is not a compressed page or length
2095 	of the compressed data (including footer) if it is a compressed page */
2096 ulint
2097 os_file_compressed_page_size(const byte* buf);
2098 
2099 /** If it is a compressed page return the original page data + footer size
2100 @param[in]	buf		Buffer to check, must include header + 10 bytes
2101 @return ULINT_UNDEFINED if the page is not a compressed page or length
2102 	of the original data + footer if it is a compressed page */
2103 ulint
2104 os_file_original_page_size(const byte* buf);
2105 
2106 /** Set the file create umask
2107 @param[in]	umask		The umask to use for file creation. */
2108 void
2109 os_file_set_umask(ulint umask);
2110 
2111 /** Free storage space associated with a section of the file.
2112 @param[in]	fh		Open file handle
2113 @param[in]	off		Starting offset (SEEK_SET)
2114 @param[in]	len		Size of the hole
2115 @return DB_SUCCESS or error code */
2116 dberr_t
2117 os_file_punch_hole(
2118 	os_file_t	fh,
2119 	os_offset_t	off,
2120 	os_offset_t	len)
2121 	MY_ATTRIBUTE((warn_unused_result));
2122 
2123 /** Check if the file system supports sparse files.
2124 
2125 Warning: On POSIX systems we try and punch a hole from offset 0 to
2126 the system configured page size. This should only be called on an empty
2127 file.
2128 
2129 Note: On Windows we use the name and on Unices we use the file handle.
2130 
2131 @param[in]	name		File name
2132 @param[in]	fh		File handle for the file - if opened
2133 @return true if the file system supports sparse files */
2134 bool
2135 os_is_sparse_file_supported(
2136 	const char*	path,
2137 	pfs_os_file_t	fh)
2138 	MY_ATTRIBUTE((warn_unused_result));
2139 
2140 /** Decompress the page data contents. Page type must be FIL_PAGE_COMPRESSED, if
2141 not then the source contents are left unchanged and DB_SUCCESS is returned.
2142 @param[in]	dblwr_recover	true of double write recovery in progress
2143 @param[in,out]	src		Data read from disk, decompressed data will be
2144 				copied to this page
2145 @param[in,out]	dst		Scratch area to use for decompression
2146 @param[in]	dst_len		Size of the scratch area in bytes
2147 @return DB_SUCCESS or error code */
2148 
2149 dberr_t
2150 os_file_decompress_page(
2151 	bool		dblwr_recover,
2152 	byte*		src,
2153 	byte*		dst,
2154 	ulint		dst_len)
2155 	MY_ATTRIBUTE((warn_unused_result));
2156 
2157 /** Normalizes a directory path for the current OS:
2158 On Windows, we convert '/' to '\', else we convert '\' to '/'.
2159 @param[in,out] str A null-terminated directory and file path */
2160 void os_normalize_path(char*	str);
2161 
2162 /* Determine if a path is an absolute path or not.
2163 @param[in]	OS directory or file path to evaluate
2164 @retval true if an absolute path
2165 @retval false if a relative path */
2166 UNIV_INLINE
2167 bool
is_absolute_path(const char * path)2168 is_absolute_path(
2169 	const char*	path)
2170 {
2171 	if (path[0] == OS_PATH_SEPARATOR) {
2172 		return(true);
2173 	}
2174 
2175 #ifdef _WIN32
2176 	if (path[1] == ':' && path[2] == OS_PATH_SEPARATOR) {
2177 		return(true);
2178 	}
2179 #endif /* _WIN32 */
2180 
2181 	return(false);
2182 }
2183 
2184 #ifndef UNIV_NONINL
2185 #include "os0file.ic"
2186 #endif /* UNIV_NONINL */
2187 
2188 #endif /* os0file_h */
2189