1/***************************************************************************** 2 3Copyright (c) 2005, 2020, Oracle and/or its affiliates. All Rights Reserved. 4Copyright (c) 2012, Facebook Inc. 5 6This program is free software; you can redistribute it and/or modify it under 7the terms of the GNU General Public License, version 2.0, as published by the 8Free Software Foundation. 9 10This program is also distributed with certain software (including but not 11limited to OpenSSL) that is licensed under separate terms, as designated in a 12particular file or component or in included license documentation. The authors 13of MySQL hereby grant you an additional permission to link the program and 14your derivative works with the separately licensed software that they have 15included with MySQL. 16 17This program is distributed in the hope that it will be useful, but WITHOUT 18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 19FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0, 20for more details. 21 22You should have received a copy of the GNU General Public License along with 23this program; if not, write to the Free Software Foundation, Inc., 2451 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 25 26*****************************************************************************/ 27 28/** @file include/page0zip.ic 29 Compressed page interface 30 31 Created June 2005 by Marko Makela 32 *******************************************************/ 33 34#ifdef UNIV_MATERIALIZE 35#undef UNIV_INLINE 36#define UNIV_INLINE 37#endif 38 39#include "mtr0log.h" 40#include "page0page.h" 41#include "page0zip.h" 42#include "srv0srv.h" 43 44/* The format of compressed pages is as follows. 45 46The header and trailer of the uncompressed pages, excluding the page 47directory in the trailer, are copied as is to the header and trailer 48of the compressed page. 49 50At the end of the compressed page, there is a dense page directory 51pointing to every user record contained on the page, including deleted 52records on the free list. The dense directory is indexed in the 53collation order, i.e., in the order in which the record list is 54linked on the uncompressed page. The infimum and supremum records are 55excluded. The two most significant bits of the entries are allocated 56for the delete-mark and an n_owned flag indicating the last record in 57a chain of records pointed to from the sparse page directory on the 58uncompressed page. 59 60The data between PAGE_ZIP_START and the last page directory entry will 61be written in compressed format, starting at offset PAGE_DATA. 62Infimum and supremum records are not stored. We exclude the 63REC_N_NEW_EXTRA_BYTES in every record header. These can be recovered 64from the dense page directory stored at the end of the compressed 65page. 66 67The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and 68roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of 69externally stored columns are stored separately, in ascending order of 70heap_no and column index, starting backwards from the dense page 71directory. 72 73The compressed data stream may be followed by a modification log 74covering the compressed portion of the page, as follows. 75 76MODIFICATION LOG ENTRY FORMAT 77- write record: 78 - (heap_no - 1) << 1 (1..2 bytes) 79 - extra bytes backwards 80 - data bytes 81- clear record: 82 - (heap_no - 1) << 1 | 1 (1..2 bytes) 83 84The integer values are stored in a variable-length format: 85- 0xxxxxxx: 0..127 86- 1xxxxxxx xxxxxxxx: 0..32767 87 88The end of the modification log is marked by a 0 byte. 89 90In summary, the compressed page looks like this: 91 92(1) Uncompressed page header (PAGE_DATA bytes) 93(2) Compressed index information 94(3) Compressed page data 95(4) Page modification log (page_zip->m_start..page_zip->m_end) 96(5) Empty zero-filled space 97(6) BLOB pointers (on leaf pages) 98 - BTR_EXTERN_FIELD_REF_SIZE for each externally stored column 99 - in descending collation order 100(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes, 101 - indexed by heap_no 102 - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes 103 - REC_NODE_PTR_SIZE for non-leaf pages 104 - 0 otherwise 105(8) dense page directory, stored backwards 106 - n_dense = n_heap - 2 107 - existing records in ascending collation order 108 - deleted records (free list) in link order 109*/ 110 111/** Set the size of a compressed page in bytes. */ 112UNIV_INLINE 113void page_zip_set_size(page_zip_des_t *page_zip, /*!< in/out: compressed page */ 114 ulint size) /*!< in: size in bytes */ 115{ 116 if (size) { 117 int ssize; 118 119 ut_ad(ut_is_2pow(size)); 120 121 for (ssize = 1; size > (ulint)(512 << ssize); ssize++) { 122 } 123 124 page_zip->ssize = ssize; 125 } else { 126 page_zip->ssize = 0; 127 } 128 129 ut_ad(page_zip_get_size(page_zip) == size); 130} 131 132#ifndef UNIV_HOTBACKUP 133/** Determine if a record is so big that it needs to be stored externally. 134@param[in] rec_size length of the record in bytes 135@param[in] comp nonzero=compact format 136@param[in] n_fields number of fields in the record; ignored if 137tablespace is not compressed 138@param[in] page_size page size 139@return false if the entire record can be stored locally on the page */ 140UNIV_INLINE 141ibool page_zip_rec_needs_ext(ulint rec_size, ulint comp, ulint n_fields, 142 const page_size_t &page_size) { 143 ut_ad(rec_size > (comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES)); 144 ut_ad(comp || !page_size.is_compressed()); 145 146 if (UNIV_PAGE_SIZE_MAX > REC_MAX_DATA_SIZE && rec_size >= REC_MAX_DATA_SIZE) { 147 return (TRUE); 148 } 149 150 if (page_size.is_compressed()) { 151 ut_ad(comp); 152 /* On a compressed page, there is a two-byte entry in 153 the dense page directory for every record. But there 154 is no record header. There should be enough room for 155 one record on an empty leaf page. Subtract 1 byte for 156 the encoded heap number. Check also the available space 157 on the uncompressed page. */ 158 return (rec_size - (REC_N_NEW_EXTRA_BYTES - 2 - 1) >= 159 page_zip_empty_size(n_fields, page_size.physical()) || 160 rec_size >= page_get_free_space_of_empty(TRUE) / 2); 161 } 162 163 return (rec_size >= page_get_free_space_of_empty(comp) / 2); 164} 165#endif /* !UNIV_HOTBACKUP */ 166 167/** Determine if the length of the page trailer. 168 @return length of the page trailer, in bytes, not including the 169 terminating zero byte of the modification log */ 170UNIV_INLINE 171ibool page_zip_get_trailer_len( 172 const page_zip_des_t *page_zip, /*!< in: compressed page */ 173 bool is_clust) /*!< in: TRUE if clustered index */ 174{ 175 ulint uncompressed_size; 176 177 ut_ad(page_zip_simple_validate(page_zip)); 178 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); 179 180 if (!page_is_leaf(page_zip->data)) { 181 uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE; 182 ut_ad(!page_zip->n_blobs); 183 } else if (is_clust) { 184 uncompressed_size = 185 PAGE_ZIP_DIR_SLOT_SIZE + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; 186 } else { 187 uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE; 188 ut_ad(!page_zip->n_blobs); 189 } 190 191 return ((page_dir_get_n_heap(page_zip->data) - 2) * uncompressed_size + 192 page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE); 193} 194 195/** Determine how big record can be inserted without recompressing the page. 196 @return a positive number indicating the maximum size of a record 197 whose insertion is guaranteed to succeed, or zero or negative */ 198UNIV_INLINE 199lint page_zip_max_ins_size( 200 const page_zip_des_t *page_zip, /*!< in: compressed page */ 201 ibool is_clust) /*!< in: TRUE if clustered index */ 202{ 203 ulint trailer_len; 204 205 trailer_len = page_zip_get_trailer_len(page_zip, is_clust); 206 207 /* When a record is created, a pointer may be added to 208 the dense directory. 209 Likewise, space for the columns that will not be 210 compressed will be allocated from the page trailer. 211 Also the BLOB pointers will be allocated from there, but 212 we may as well count them in the length of the record. */ 213 214 trailer_len += PAGE_ZIP_DIR_SLOT_SIZE; 215 216 return ((lint)page_zip_get_size(page_zip) - trailer_len - page_zip->m_end - 217 (REC_N_NEW_EXTRA_BYTES - 2)); 218} 219 220/** Determine if enough space is available in the modification log. 221 @return true if enough space is available */ 222UNIV_INLINE 223ibool page_zip_available( 224 const page_zip_des_t *page_zip, /*!< in: compressed page */ 225 bool is_clust, /*!< in: TRUE if clustered index */ 226 ulint length, /*!< in: combined size of the record */ 227 ulint create) /*!< in: nonzero=add the record to 228 the heap */ 229{ 230 ulint trailer_len; 231 232 ut_ad(length > REC_N_NEW_EXTRA_BYTES); 233 234 trailer_len = page_zip_get_trailer_len(page_zip, is_clust); 235 236 /* Subtract the fixed extra bytes and add the maximum 237 space needed for identifying the record (encoded heap_no). */ 238 length -= REC_N_NEW_EXTRA_BYTES - 2; 239 240 if (create > 0) { 241 /* When a record is created, a pointer may be added to 242 the dense directory. 243 Likewise, space for the columns that will not be 244 compressed will be allocated from the page trailer. 245 Also the BLOB pointers will be allocated from there, but 246 we may as well count them in the length of the record. */ 247 248 trailer_len += PAGE_ZIP_DIR_SLOT_SIZE; 249 } 250 251 return (length + trailer_len + page_zip->m_end < page_zip_get_size(page_zip)); 252} 253 254/** Initialize a compressed page descriptor. */ 255UNIV_INLINE 256void page_zip_des_init(page_zip_des_t *page_zip) /*!< in/out: compressed page 257 descriptor */ 258{ 259 memset(page_zip, 0, sizeof *page_zip); 260} 261 262/** Write a log record of writing to the uncompressed header portion of a page. 263 */ 264void page_zip_write_header_log( 265 const byte *data, /*!< in: data on the uncompressed page */ 266 ulint length, /*!< in: length of the data */ 267 mtr_t *mtr); /*!< in: mini-transaction */ 268 269/** Write data to the uncompressed header portion of a page. The data must 270already have been written to the uncompressed page. 271@param[in,out] page_zip compressed page 272@param[in] str address on the uncompressed page 273@param[in] length length of the data 274@param[in] mtr mini-transaction, or NULL */ 275UNIV_INLINE 276void page_zip_write_header(page_zip_des_t *page_zip, const byte *str, 277 ulint length, mtr_t *mtr) { 278 ulint pos; 279 280 ut_ad(page_zip_simple_validate(page_zip)); 281 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); 282 283 pos = page_offset(str); 284 285 ut_ad(pos < PAGE_DATA); 286 287 memcpy(page_zip->data + pos, str, length); 288 289 /* The following would fail in page_cur_insert_rec_zip(). */ 290 /* ut_ad(page_zip_validate(page_zip, str - pos)); */ 291 292 if (mtr) { 293#ifndef UNIV_HOTBACKUP 294 page_zip_write_header_log(str, length, mtr); 295#endif /* !UNIV_HOTBACKUP */ 296 } 297} 298 299/** Write a log record of compressing an index page without the data on the 300 * page. 301 */ 302UNIV_INLINE 303void page_zip_compress_write_log_no_data( 304 ulint level, /*!< in: compression level */ 305 const page_t *page, /*!< in: page that is compressed */ 306 dict_index_t *index, /*!< in: index */ 307 mtr_t *mtr) /*!< in: mtr */ 308{ 309#ifndef UNIV_HOTBACKUP 310 311 byte *log_ptr = nullptr; 312 if (!mlog_open_and_write_index(mtr, page, index, 313 MLOG_ZIP_PAGE_COMPRESS_NO_DATA, 1, log_ptr)) { 314 return; 315 } 316 317 mach_write_to_1(log_ptr, level); 318 mlog_close(mtr, log_ptr + 1); 319 320#endif /* !UNIV_HOTBACKUP */ 321} 322 323/** Parses a log record of compressing an index page without the data. 324 @return end of log record or NULL */ 325UNIV_INLINE 326byte *page_zip_parse_compress_no_data( 327 byte *ptr, /*!< in: buffer */ 328 byte *end_ptr, /*!< in: buffer end */ 329 page_t *page, /*!< in: uncompressed page */ 330 page_zip_des_t *page_zip, /*!< out: compressed page */ 331 dict_index_t *index) /*!< in: index */ 332{ 333 ulint level; 334 if (end_ptr == ptr) { 335 return (nullptr); 336 } 337 338 level = mach_read_from_1(ptr); 339 340 /* If page compression fails then there must be something wrong 341 because a compress log record is logged only if the compression 342 was successful. Crash in this case. */ 343 344 if (page && !page_zip_compress(page_zip, page, index, level, nullptr)) { 345 ut_error; 346 } 347 348 return (ptr + 1); 349} 350 351#ifndef UNIV_HOTBACKUP 352/** Reset the counters used for filling 353 INFORMATION_SCHEMA.innodb_cmp_per_index. */ 354UNIV_INLINE 355void page_zip_reset_stat_per_index() { 356 mutex_enter(&page_zip_stat_per_index_mutex); 357 358 page_zip_stat_per_index.erase(page_zip_stat_per_index.begin(), 359 page_zip_stat_per_index.end()); 360 361 mutex_exit(&page_zip_stat_per_index_mutex); 362} 363#endif /* !UNIV_HOTBACKUP */ 364 365/** Find the slot of the given record in the dense page directory. 366 @return dense directory slot, or NULL if record not found */ 367UNIV_INLINE 368byte *page_zip_dir_find_low(byte *slot, /*!< in: start of records */ 369 byte *end, /*!< in: end of records */ 370 ulint offset) /*!< in: offset of user record */ 371{ 372 ut_ad(slot <= end); 373 374 for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) { 375 if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK) == offset) { 376 return (slot); 377 } 378 } 379 380 return (nullptr); 381} 382 383/** Gets the size of the compressed page trailer (the dense page directory), 384 only including user records (excluding the free list). 385 @return length of dense page directory comprising existing records, in bytes */ 386UNIV_INLINE 387ulint page_zip_dir_user_size( 388 const page_zip_des_t *page_zip) /*!< in: compressed page */ 389{ 390 ulint size = PAGE_ZIP_DIR_SLOT_SIZE * page_get_n_recs(page_zip->data); 391 ut_ad(size <= page_zip_dir_size(page_zip)); 392 return (size); 393} 394 395/** Find the slot of the given free record in the dense page directory. 396 @return dense directory slot, or NULL if record not found */ 397UNIV_INLINE 398byte *page_zip_dir_find_free( 399 page_zip_des_t *page_zip, /*!< in: compressed page */ 400 ulint offset) /*!< in: offset of user record */ 401{ 402 byte *end = page_zip->data + page_zip_get_size(page_zip); 403 404 ut_ad(page_zip_simple_validate(page_zip)); 405 406 return (page_zip_dir_find_low(end - page_zip_dir_size(page_zip), 407 end - page_zip_dir_user_size(page_zip), 408 offset)); 409} 410 411/** Gets an offset to the compressed page trailer (the dense page directory), 412 including deleted records (the free list). 413 @return offset of the dense page directory */ 414UNIV_INLINE 415ulint page_zip_dir_start_offs( 416 const page_zip_des_t *page_zip, /*!< in: compressed page */ 417 ulint n_dense) /*!< in: directory size */ 418{ 419 ut_ad(n_dense * PAGE_ZIP_DIR_SLOT_SIZE < page_zip_get_size(page_zip)); 420 421 return (page_zip_get_size(page_zip) - n_dense * PAGE_ZIP_DIR_SLOT_SIZE); 422} 423 424#ifdef UNIV_MATERIALIZE 425#undef UNIV_INLINE 426#define UNIV_INLINE UNIV_INLINE_ORIGINAL 427#endif 428