1 /***************************************************************************** 2 3 Copyright (c) 1995, 2021, Oracle and/or its affiliates. 4 Copyright (c) 2016, Percona Inc. All Rights Reserved. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License, version 2.0, 8 as published by the Free Software Foundation. 9 10 This program is also distributed with certain software (including 11 but not limited to OpenSSL) that is licensed under separate terms, 12 as designated in a particular file or component or in included license 13 documentation. The authors of MySQL hereby grant you an additional 14 permission to link the program and your derivative works with the 15 separately licensed software that they have included with MySQL. 16 17 This program is distributed in the hope that it will be useful, 18 but WITHOUT ANY WARRANTY; without even the implied warranty of 19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 GNU General Public License, version 2.0, for more details. 21 22 You should have received a copy of the GNU General Public License along with 23 this program; if not, write to the Free Software Foundation, Inc., 24 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA 25 26 *****************************************************************************/ 27 28 /**************************************************//** 29 @file include/buf0dblwr.h 30 Doublewrite buffer module 31 32 Created 2011/12/19 Inaam Rana 33 *******************************************************/ 34 35 #ifndef buf0dblwr_h 36 #define buf0dblwr_h 37 38 #include "univ.i" 39 #include "buf0buf.h" 40 #include "ut0byte.h" 41 #include "log0log.h" 42 #include "buf0types.h" 43 #include "log0recv.h" 44 45 #ifndef UNIV_HOTBACKUP 46 47 /** Maximum doublewrite batch size. This cannot be set higher than 127, the 48 legacy innodb_doublewrite_batch_size maximum value, without decoupling the 49 legacy buffer sizing in the system tablespace from it. */ 50 enum { MAX_DOUBLEWRITE_BATCH_SIZE = 127 }; 51 52 /** Doublewrite system */ 53 extern buf_dblwr_t* buf_dblwr; 54 /** Set to TRUE when the doublewrite buffer is being created */ 55 extern ibool buf_dblwr_being_created; 56 57 /****************************************************************//** 58 Creates the doublewrite buffer to a new InnoDB installation. The header of the 59 doublewrite buffer is placed on the trx system header page. 60 @return true if successful, false if not. */ 61 MY_ATTRIBUTE((warn_unused_result)) 62 bool 63 buf_dblwr_create(void); 64 /*==================*/ 65 66 /****************************************************************//** 67 At a database startup initializes the doublewrite buffer memory structure if 68 we already have a doublewrite buffer created in the data files. If we are 69 upgrading to an InnoDB version which supports multiple tablespaces, then this 70 function performs the necessary update operations. If we are in a crash 71 recovery, this function loads the pages from double write buffer into memory. 72 @return DB_SUCCESS or error code */ 73 MY_ATTRIBUTE((warn_unused_result)) 74 dberr_t 75 buf_dblwr_init_or_load_pages( 76 pfs_os_file_t file, 77 const char* path); 78 79 /** Process and remove the double write buffer pages for all tablespaces. */ 80 void 81 buf_dblwr_process(void); 82 83 /****************************************************************//** 84 frees doublewrite buffer. */ 85 void 86 buf_dblwr_free(void); 87 /*================*/ 88 /********************************************************************//** 89 Updates the doublewrite buffer when an IO request is completed. */ 90 void 91 buf_dblwr_update( 92 /*=============*/ 93 const buf_page_t* bpage, /*!< in: buffer block descriptor */ 94 buf_flush_t flush_type);/*!< in: flush type */ 95 /****************************************************************//** 96 Determines if a page number is located inside the doublewrite buffer. 97 @return TRUE if the location is inside the two blocks of the 98 doublewrite buffer */ 99 ibool 100 buf_dblwr_page_inside( 101 /*==================*/ 102 ulint page_no); /*!< in: page number */ 103 /********************************************************************//** 104 Posts a buffer page for writing. If the doublewrite memory buffer is 105 full, calls buf_dblwr_flush_buffered_writes and waits for for free 106 space to appear. */ 107 void 108 buf_dblwr_add_to_batch( 109 /*====================*/ 110 buf_page_t* bpage, /*!< in: buffer block to write */ 111 buf_flush_t flush_type);/*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ 112 113 /********************************************************************//** 114 Flush a batch of writes to the datafiles that have already been 115 written to the dblwr buffer on disk. */ 116 void 117 buf_dblwr_sync_datafiles(); 118 119 /********************************************************************//** 120 Flushes possible buffered writes from the specified partition of the 121 doublewrite memory buffer to disk, and also wakes up the aio thread if 122 simulated aio is used. It is very important to call this function after a batch 123 of writes has been posted, and also when we may have to wait for a page latch! 124 Otherwise a deadlock of threads can occur. */ 125 void 126 buf_dblwr_flush_buffered_writes( 127 /*============================*/ 128 ulint dblwr_partition); /*!< in: doublewrite partition */ 129 /********************************************************************//** 130 Writes a page to the doublewrite buffer on disk, sync it, then write 131 the page to the datafile and sync the datafile. This function is used 132 for single page flushes. If all the buffers allocated for single page 133 flushes in the doublewrite buffer are in use we wait here for one to 134 become free. We are guaranteed that a slot will become free because any 135 thread that is using a slot must also release the slot before leaving 136 this function. */ 137 void 138 buf_dblwr_write_single_page( 139 /*========================*/ 140 buf_page_t* bpage, /*!< in: buffer block to write */ 141 bool sync); /*!< in: true if sync IO requested */ 142 143 /** Return the doublewrite partition number for a given buffer page and flush 144 type. 145 @return the doublewrite partition number */ 146 MY_ATTRIBUTE((warn_unused_result)) 147 UNIV_INLINE 148 ulint 149 buf_parallel_dblwr_partition(const buf_page_t* bpage, buf_flush_t flush_type); 150 151 /** Return the doublewrite partition number for a given buffer pool and flush 152 type. 153 @return the doublewrite partition number */ 154 MY_ATTRIBUTE((warn_unused_result)) 155 UNIV_INLINE 156 ulint 157 buf_parallel_dblwr_partition(const buf_pool_t* buf_pool, 158 buf_flush_t flush_type); 159 160 /** Initialize parallel doublewrite subsystem: create its data structure and 161 the disk file. 162 @return DB_SUCCESS or error code */ 163 MY_ATTRIBUTE((warn_unused_result)) 164 dberr_t 165 buf_parallel_dblwr_create(void); 166 167 /** Delete the parallel doublewrite file, if its path already has been 168 computed. It is up to the caller to ensure that this called at safe point */ 169 void 170 buf_parallel_dblwr_delete(void); 171 172 /** Cleanup parallel doublewrite memory structures and optionally close and 173 delete the doublewrite buffer file too. 174 @param delete_file whether to close and delete the buffer file too */ 175 void 176 buf_parallel_dblwr_free(bool delete_file); 177 178 /** Release any unused parallel doublewrite pages and free their underlying 179 buffer at the end of crash recovery */ 180 void 181 buf_parallel_dblwr_finish_recovery(void); 182 183 /** A single parallel doublewrite partition data structure */ 184 struct parallel_dblwr_shard_t { 185 /** First free position in write_buf measured in units of 186 UNIV_PAGE_SIZE */ 187 ulint first_free; 188 /** Number of pages posted to I/O in this doublewrite batch */ 189 ulint batch_size; 190 /** Raw heap pointer for write_buf */ 191 byte* write_buf_unaligned; 192 /** Write buffer used in writing to the doublewrite buffer, aligned 193 on UNIV_PAGE_SIZE */ 194 byte* write_buf; 195 /** Array to store pointers to the buffer blocks which have been cached 196 to write_buf */ 197 buf_page_t** buf_block_arr; 198 /** I/O for a doublewrite batch completion event */ 199 os_event_t batch_completed; 200 }; 201 202 /** Maximum possible number of doublewrite partitions */ 203 enum { MAX_DBLWR_SHARDS = MAX_BUFFER_POOLS * 2 }; 204 205 /** Parallel doublewrite buffer data structure */ 206 class parallel_dblwr_t { 207 public: 208 /** Parallel doublewrite buffer file handle */ 209 pfs_os_file_t file; 210 /** Whether the doublewrite buffer file needs flushing after each 211 write */ 212 bool needs_flush; 213 /** Path to the parallel doublewrite buffer */ 214 char* path; 215 /** Individual parallel doublewrite partitions */ 216 parallel_dblwr_shard_t shard[MAX_DBLWR_SHARDS]; 217 /** Buffer for reading in parallel doublewrite buffer pages 218 during crash recovery */ 219 byte* recovery_buf_unaligned; 220 221 /** Default constructor for the parallel doublewrite instance */ parallel_dblwr_t(void)222 parallel_dblwr_t(void) 223 : 224 path(NULL), 225 recovery_buf_unaligned(NULL) 226 { 227 file.set_closed(); 228 } 229 }; 230 231 /** The parallel doublewrite buffer */ 232 extern parallel_dblwr_t parallel_dblwr_buf; 233 234 /** Doublewrite control struct */ 235 struct buf_dblwr_t{ 236 ib_mutex_t mutex; /*!< mutex protecting write_buf */ 237 ulint block1; /*!< the page number of the first 238 doublewrite block (64 pages) */ 239 ulint block2; /*!< page number of the second block */ 240 ulint s_reserved;/*!< number of slots currently 241 reserved for single page flushes. */ 242 os_event_t s_event;/*!< event where threads wait for a 243 single page flush slot. */ 244 bool* in_use; /*!< flag used to indicate if a slot is 245 in use. Only used for single page 246 flushes. */ 247 byte* write_buf;/*!< write buffer used in writing to the 248 doublewrite buffer, aligned to an 249 address divisible by UNIV_PAGE_SIZE 250 (which is required by Windows aio) */ 251 byte* write_buf_unaligned;/*!< pointer to write_buf, 252 but unaligned */ 253 buf_page_t** buf_block_arr;/*!< array to store pointers to 254 the buffer blocks which have been 255 cached to write_buf */ 256 }; 257 258 259 #endif /* UNIV_HOTBACKUP */ 260 261 #ifndef UNIV_NONINL 262 #include "buf0dblwr.ic" 263 #endif 264 265 #endif 266