1/***************************************************************************** 2 3Copyright (c) 2005, 2021, Oracle and/or its affiliates. 4Copyright (c) 2012, Facebook Inc. 5 6This program is free software; you can redistribute it and/or modify 7it under the terms of the GNU General Public License, version 2.0, 8as published by the Free Software Foundation. 9 10This program is also distributed with certain software (including 11but not limited to OpenSSL) that is licensed under separate terms, 12as designated in a particular file or component or in included license 13documentation. The authors of MySQL hereby grant you an additional 14permission to link the program and your derivative works with the 15separately licensed software that they have included with MySQL. 16 17This program is distributed in the hope that it will be useful, 18but WITHOUT ANY WARRANTY; without even the implied warranty of 19MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20GNU General Public License, version 2.0, for more details. 21 22You should have received a copy of the GNU General Public License along with 23this program; if not, write to the Free Software Foundation, Inc., 2451 Franklin Street, Suite 500, Boston, MA 02110-1335 USA 25 26*****************************************************************************/ 27 28/**************************************************//** 29@file include/page0zip.ic 30Compressed page interface 31 32Created June 2005 by Marko Makela 33*******************************************************/ 34 35#ifdef UNIV_MATERIALIZE 36# undef UNIV_INLINE 37# define UNIV_INLINE 38#endif 39 40#include "page0zip.h" 41#include "mtr0log.h" 42#include "page0page.h" 43#include "srv0srv.h" 44 45/* The format of compressed pages is as follows. 46 47The header and trailer of the uncompressed pages, excluding the page 48directory in the trailer, are copied as is to the header and trailer 49of the compressed page. 50 51At the end of the compressed page, there is a dense page directory 52pointing to every user record contained on the page, including deleted 53records on the free list. The dense directory is indexed in the 54collation order, i.e., in the order in which the record list is 55linked on the uncompressed page. The infimum and supremum records are 56excluded. The two most significant bits of the entries are allocated 57for the delete-mark and an n_owned flag indicating the last record in 58a chain of records pointed to from the sparse page directory on the 59uncompressed page. 60 61The data between PAGE_ZIP_START and the last page directory entry will 62be written in compressed format, starting at offset PAGE_DATA. 63Infimum and supremum records are not stored. We exclude the 64REC_N_NEW_EXTRA_BYTES in every record header. These can be recovered 65from the dense page directory stored at the end of the compressed 66page. 67 68The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and 69roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of 70externally stored columns are stored separately, in ascending order of 71heap_no and column index, starting backwards from the dense page 72directory. 73 74The compressed data stream may be followed by a modification log 75covering the compressed portion of the page, as follows. 76 77MODIFICATION LOG ENTRY FORMAT 78- write record: 79 - (heap_no - 1) << 1 (1..2 bytes) 80 - extra bytes backwards 81 - data bytes 82- clear record: 83 - (heap_no - 1) << 1 | 1 (1..2 bytes) 84 85The integer values are stored in a variable-length format: 86- 0xxxxxxx: 0..127 87- 1xxxxxxx xxxxxxxx: 0..32767 88 89The end of the modification log is marked by a 0 byte. 90 91In summary, the compressed page looks like this: 92 93(1) Uncompressed page header (PAGE_DATA bytes) 94(2) Compressed index information 95(3) Compressed page data 96(4) Page modification log (page_zip->m_start..page_zip->m_end) 97(5) Empty zero-filled space 98(6) BLOB pointers (on leaf pages) 99 - BTR_EXTERN_FIELD_REF_SIZE for each externally stored column 100 - in descending collation order 101(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes, 102 - indexed by heap_no 103 - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes 104 - REC_NODE_PTR_SIZE for non-leaf pages 105 - 0 otherwise 106(8) dense page directory, stored backwards 107 - n_dense = n_heap - 2 108 - existing records in ascending collation order 109 - deleted records (free list) in link order 110*/ 111 112/**********************************************************************//** 113Determine the size of a compressed page in bytes. 114@return size in bytes */ 115UNIV_INLINE 116ulint 117page_zip_get_size( 118/*==============*/ 119 const page_zip_des_t* page_zip) /*!< in: compressed page */ 120{ 121 ulint size; 122 123 if (!page_zip->ssize) { 124 return(0); 125 } 126 127 size = (UNIV_ZIP_SIZE_MIN >> 1) << page_zip->ssize; 128 129 ut_ad(size >= UNIV_ZIP_SIZE_MIN); 130 ut_ad(size <= UNIV_PAGE_SIZE); 131 132 return(size); 133} 134/**********************************************************************//** 135Set the size of a compressed page in bytes. */ 136UNIV_INLINE 137void 138page_zip_set_size( 139/*==============*/ 140 page_zip_des_t* page_zip, /*!< in/out: compressed page */ 141 ulint size) /*!< in: size in bytes */ 142{ 143 if (size) { 144 int ssize; 145 146 ut_ad(ut_is_2pow(size)); 147 148 for (ssize = 1; size > (ulint) (512 << ssize); ssize++) { 149 } 150 151 page_zip->ssize = ssize; 152 } else { 153 page_zip->ssize = 0; 154 } 155 156 ut_ad(page_zip_get_size(page_zip) == size); 157} 158 159#ifndef UNIV_HOTBACKUP 160/** Determine if a record is so big that it needs to be stored externally. 161@param[in] rec_size length of the record in bytes 162@param[in] comp nonzero=compact format 163@param[in] n_fields number of fields in the record; ignored if 164tablespace is not compressed 165@param[in] page_size page size 166@return FALSE if the entire record can be stored locally on the page */ 167UNIV_INLINE 168ibool 169page_zip_rec_needs_ext( 170 ulint rec_size, 171 ulint comp, 172 ulint n_fields, 173 const page_size_t& page_size) 174{ 175 ut_ad(rec_size > (comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES)); 176 ut_ad(comp || !page_size.is_compressed()); 177 178#if UNIV_PAGE_SIZE_MAX > REC_MAX_DATA_SIZE 179 if (rec_size >= REC_MAX_DATA_SIZE) { 180 return(TRUE); 181 } 182#endif 183 184 if (page_size.is_compressed()) { 185 ut_ad(comp); 186 /* On a compressed page, there is a two-byte entry in 187 the dense page directory for every record. But there 188 is no record header. There should be enough room for 189 one record on an empty leaf page. Subtract 1 byte for 190 the encoded heap number. Check also the available space 191 on the uncompressed page. */ 192 return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2 - 1) 193 >= page_zip_empty_size(n_fields, page_size.physical()) 194 || rec_size >= page_get_free_space_of_empty(TRUE) / 2); 195 } 196 197 return(rec_size >= page_get_free_space_of_empty(comp) / 2); 198} 199#endif /* !UNIV_HOTBACKUP */ 200 201#ifdef UNIV_DEBUG 202/**********************************************************************//** 203Validate a compressed page descriptor. 204@return TRUE if ok */ 205UNIV_INLINE 206ibool 207page_zip_simple_validate( 208/*=====================*/ 209 const page_zip_des_t* page_zip)/*!< in: compressed page descriptor */ 210{ 211 ut_ad(page_zip); 212 ut_ad(page_zip->data); 213 ut_ad(page_zip->ssize <= PAGE_ZIP_SSIZE_MAX); 214 ut_ad(page_zip_get_size(page_zip) 215 > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE); 216 ut_ad(page_zip->m_start <= page_zip->m_end); 217 ut_ad(page_zip->m_end < page_zip_get_size(page_zip)); 218 ut_ad(page_zip->n_blobs 219 < page_zip_get_size(page_zip) / BTR_EXTERN_FIELD_REF_SIZE); 220 return(TRUE); 221} 222#endif /* UNIV_DEBUG */ 223 224/**********************************************************************//** 225Determine if the length of the page trailer. 226@return length of the page trailer, in bytes, not including the 227terminating zero byte of the modification log */ 228UNIV_INLINE 229ibool 230page_zip_get_trailer_len( 231/*=====================*/ 232 const page_zip_des_t* page_zip,/*!< in: compressed page */ 233 ibool is_clust)/*!< in: TRUE if clustered index */ 234{ 235 ulint uncompressed_size; 236 237 ut_ad(page_zip_simple_validate(page_zip)); 238 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); 239 240 if (!page_is_leaf(page_zip->data)) { 241 uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE 242 + REC_NODE_PTR_SIZE; 243 ut_ad(!page_zip->n_blobs); 244 } else if (is_clust) { 245 uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE 246 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; 247 } else { 248 uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE; 249 ut_ad(!page_zip->n_blobs); 250 } 251 252 return((page_dir_get_n_heap(page_zip->data) - 2) 253 * uncompressed_size 254 + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE); 255} 256 257/**********************************************************************//** 258Determine how big record can be inserted without recompressing the page. 259@return a positive number indicating the maximum size of a record 260whose insertion is guaranteed to succeed, or zero or negative */ 261UNIV_INLINE 262lint 263page_zip_max_ins_size( 264/*==================*/ 265 const page_zip_des_t* page_zip,/*!< in: compressed page */ 266 ibool is_clust)/*!< in: TRUE if clustered index */ 267{ 268 ulint trailer_len; 269 270 trailer_len = page_zip_get_trailer_len(page_zip, is_clust); 271 272 /* When a record is created, a pointer may be added to 273 the dense directory. 274 Likewise, space for the columns that will not be 275 compressed will be allocated from the page trailer. 276 Also the BLOB pointers will be allocated from there, but 277 we may as well count them in the length of the record. */ 278 279 trailer_len += PAGE_ZIP_DIR_SLOT_SIZE; 280 281 return((lint) page_zip_get_size(page_zip) 282 - trailer_len - page_zip->m_end 283 - (REC_N_NEW_EXTRA_BYTES - 2)); 284} 285 286/**********************************************************************//** 287Determine if enough space is available in the modification log. 288@return TRUE if enough space is available */ 289UNIV_INLINE 290ibool 291page_zip_available( 292/*===============*/ 293 const page_zip_des_t* page_zip,/*!< in: compressed page */ 294 ibool is_clust,/*!< in: TRUE if clustered index */ 295 ulint length, /*!< in: combined size of the record */ 296 ulint create) /*!< in: nonzero=add the record to 297 the heap */ 298{ 299 ulint trailer_len; 300 301 ut_ad(length > REC_N_NEW_EXTRA_BYTES); 302 303 trailer_len = page_zip_get_trailer_len(page_zip, is_clust); 304 305 /* Subtract the fixed extra bytes and add the maximum 306 space needed for identifying the record (encoded heap_no). */ 307 length -= REC_N_NEW_EXTRA_BYTES - 2; 308 309 if (create > 0) { 310 /* When a record is created, a pointer may be added to 311 the dense directory. 312 Likewise, space for the columns that will not be 313 compressed will be allocated from the page trailer. 314 Also the BLOB pointers will be allocated from there, but 315 we may as well count them in the length of the record. */ 316 317 trailer_len += PAGE_ZIP_DIR_SLOT_SIZE; 318 } 319 320 return(length + trailer_len + page_zip->m_end 321 < page_zip_get_size(page_zip)); 322} 323 324/**********************************************************************//** 325Initialize a compressed page descriptor. */ 326UNIV_INLINE 327void 328page_zip_des_init( 329/*==============*/ 330 page_zip_des_t* page_zip) /*!< in/out: compressed page 331 descriptor */ 332{ 333 memset(page_zip, 0, sizeof *page_zip); 334} 335 336/**********************************************************************//** 337Write a log record of writing to the uncompressed header portion of a page. */ 338void 339page_zip_write_header_log( 340/*======================*/ 341 const byte* data,/*!< in: data on the uncompressed page */ 342 ulint length, /*!< in: length of the data */ 343 mtr_t* mtr); /*!< in: mini-transaction */ 344 345/**********************************************************************//** 346Write data to the uncompressed header portion of a page. The data must 347already have been written to the uncompressed page. 348However, the data portion of the uncompressed page may differ from 349the compressed page when a record is being inserted in 350page_cur_insert_rec_zip(). */ 351UNIV_INLINE 352void 353page_zip_write_header( 354/*==================*/ 355 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 356 const byte* str, /*!< in: address on the uncompressed page */ 357 ulint length, /*!< in: length of the data */ 358 mtr_t* mtr) /*!< in: mini-transaction, or NULL */ 359{ 360 ulint pos; 361 362 ut_ad(page_zip_simple_validate(page_zip)); 363 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); 364 365 pos = page_offset(str); 366 367 ut_ad(pos < PAGE_DATA); 368 369 memcpy(page_zip->data + pos, str, length); 370 371 /* The following would fail in page_cur_insert_rec_zip(). */ 372 /* ut_ad(page_zip_validate(page_zip, str - pos)); */ 373 374 if (mtr) { 375#ifndef UNIV_HOTBACKUP 376 page_zip_write_header_log(str, length, mtr); 377#endif /* !UNIV_HOTBACKUP */ 378 } 379} 380 381/**********************************************************************//** 382Write a log record of compressing an index page without the data on the page. */ 383UNIV_INLINE 384void 385page_zip_compress_write_log_no_data( 386/*================================*/ 387 ulint level, /*!< in: compression level */ 388 const page_t* page, /*!< in: page that is compressed */ 389 dict_index_t* index, /*!< in: index */ 390 mtr_t* mtr) /*!< in: mtr */ 391{ 392 byte* log_ptr = mlog_open_and_write_index( 393 mtr, page, index, MLOG_ZIP_PAGE_COMPRESS_NO_DATA, 1); 394 395 if (log_ptr) { 396 mach_write_to_1(log_ptr, level); 397 mlog_close(mtr, log_ptr + 1); 398 } 399} 400 401/**********************************************************************//** 402Parses a log record of compressing an index page without the data. 403@return end of log record or NULL */ 404UNIV_INLINE 405byte* 406page_zip_parse_compress_no_data( 407/*============================*/ 408 byte* ptr, /*!< in: buffer */ 409 byte* end_ptr, /*!< in: buffer end */ 410 page_t* page, /*!< in: uncompressed page */ 411 page_zip_des_t* page_zip, /*!< out: compressed page */ 412 dict_index_t* index) /*!< in: index */ 413{ 414 ulint level; 415 if (end_ptr == ptr) { 416 return(NULL); 417 } 418 419 level = mach_read_from_1(ptr); 420 421 /* If page compression fails then there must be something wrong 422 because a compress log record is logged only if the compression 423 was successful. Crash in this case. */ 424 425 if (page 426 && !page_zip_compress(page_zip, page, index, level, NULL, NULL)) { 427 ut_error; 428 } 429 430 return(ptr + 1); 431} 432 433/**********************************************************************//** 434Reset the counters used for filling 435INFORMATION_SCHEMA.innodb_cmp_per_index. */ 436UNIV_INLINE 437void 438page_zip_reset_stat_per_index() 439/*===========================*/ 440{ 441 mutex_enter(&page_zip_stat_per_index_mutex); 442 443 page_zip_stat_per_index.erase( 444 page_zip_stat_per_index.begin(), 445 page_zip_stat_per_index.end()); 446 447 mutex_exit(&page_zip_stat_per_index_mutex); 448} 449 450#ifdef UNIV_MATERIALIZE 451# undef UNIV_INLINE 452# define UNIV_INLINE UNIV_INLINE_ORIGINAL 453#endif 454