1/***************************************************************************** 2 3Copyright (c) 2005, 2018, Oracle and/or its affiliates. All Rights Reserved. 4Copyright (c) 2012, Facebook Inc. 5 6This program is free software; you can redistribute it and/or modify 7it under the terms of the GNU General Public License, version 2.0, 8as published by the Free Software Foundation. 9 10This program is also distributed with certain software (including 11but not limited to OpenSSL) that is licensed under separate terms, 12as designated in a particular file or component or in included license 13documentation. The authors of MySQL hereby grant you an additional 14permission to link the program and your derivative works with the 15separately licensed software that they have included with MySQL. 16 17This program is distributed in the hope that it will be useful, 18but WITHOUT ANY WARRANTY; without even the implied warranty of 19MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20GNU General Public License, version 2.0, for more details. 21 22You should have received a copy of the GNU General Public License along with 23this program; if not, write to the Free Software Foundation, Inc., 2451 Franklin Street, Suite 500, Boston, MA 02110-1335 USA 25 26*****************************************************************************/ 27 28/**************************************************//** 29@file include/page0zip.ic 30Compressed page interface 31 32Created June 2005 by Marko Makela 33*******************************************************/ 34 35#ifdef UNIV_MATERIALIZE 36# undef UNIV_INLINE 37# define UNIV_INLINE 38#endif 39 40#include "page0zip.h" 41#include "mtr0log.h" 42#include "page0page.h" 43 44/* The format of compressed pages is as follows. 45 46The header and trailer of the uncompressed pages, excluding the page 47directory in the trailer, are copied as is to the header and trailer 48of the compressed page. 49 50At the end of the compressed page, there is a dense page directory 51pointing to every user record contained on the page, including deleted 52records on the free list. The dense directory is indexed in the 53collation order, i.e., in the order in which the record list is 54linked on the uncompressed page. The infimum and supremum records are 55excluded. The two most significant bits of the entries are allocated 56for the delete-mark and an n_owned flag indicating the last record in 57a chain of records pointed to from the sparse page directory on the 58uncompressed page. 59 60The data between PAGE_ZIP_START and the last page directory entry will 61be written in compressed format, starting at offset PAGE_DATA. 62Infimum and supremum records are not stored. We exclude the 63REC_N_NEW_EXTRA_BYTES in every record header. These can be recovered 64from the dense page directory stored at the end of the compressed 65page. 66 67The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and 68roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of 69externally stored columns are stored separately, in ascending order of 70heap_no and column index, starting backwards from the dense page 71directory. 72 73The compressed data stream may be followed by a modification log 74covering the compressed portion of the page, as follows. 75 76MODIFICATION LOG ENTRY FORMAT 77- write record: 78 - (heap_no - 1) << 1 (1..2 bytes) 79 - extra bytes backwards 80 - data bytes 81- clear record: 82 - (heap_no - 1) << 1 | 1 (1..2 bytes) 83 84The integer values are stored in a variable-length format: 85- 0xxxxxxx: 0..127 86- 1xxxxxxx xxxxxxxx: 0..32767 87 88The end of the modification log is marked by a 0 byte. 89 90In summary, the compressed page looks like this: 91 92(1) Uncompressed page header (PAGE_DATA bytes) 93(2) Compressed index information 94(3) Compressed page data 95(4) Page modification log (page_zip->m_start..page_zip->m_end) 96(5) Empty zero-filled space 97(6) BLOB pointers (on leaf pages) 98 - BTR_EXTERN_FIELD_REF_SIZE for each externally stored column 99 - in descending collation order 100(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes, 101 - indexed by heap_no 102 - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes 103 - REC_NODE_PTR_SIZE for non-leaf pages 104 - 0 otherwise 105(8) dense page directory, stored backwards 106 - n_dense = n_heap - 2 107 - existing records in ascending collation order 108 - deleted records (free list) in link order 109*/ 110 111/** Start offset of the area that will be compressed */ 112#define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END 113/** Size of an compressed page directory entry */ 114#define PAGE_ZIP_DIR_SLOT_SIZE 2 115/** Mask of record offsets */ 116#define PAGE_ZIP_DIR_SLOT_MASK 0x3fff 117/** 'owned' flag */ 118#define PAGE_ZIP_DIR_SLOT_OWNED 0x4000 119/** 'deleted' flag */ 120#define PAGE_ZIP_DIR_SLOT_DEL 0x8000 121 122/**********************************************************************//** 123Determine the size of a compressed page in bytes. 124@return size in bytes */ 125UNIV_INLINE 126ulint 127page_zip_get_size( 128/*==============*/ 129 const page_zip_des_t* page_zip) /*!< in: compressed page */ 130{ 131 ulint size; 132 133 if (!page_zip->ssize) { 134 return(0); 135 } 136 137 size = (UNIV_ZIP_SIZE_MIN >> 1) << page_zip->ssize; 138 139 ut_ad(size >= UNIV_ZIP_SIZE_MIN); 140 ut_ad(size <= UNIV_PAGE_SIZE); 141 142 return(size); 143} 144/**********************************************************************//** 145Set the size of a compressed page in bytes. */ 146UNIV_INLINE 147void 148page_zip_set_size( 149/*==============*/ 150 page_zip_des_t* page_zip, /*!< in/out: compressed page */ 151 ulint size) /*!< in: size in bytes */ 152{ 153 if (size) { 154 int ssize; 155 156 ut_ad(ut_is_2pow(size)); 157 158 for (ssize = 1; size > (ulint) (512 << ssize); ssize++) { 159 } 160 161 page_zip->ssize = ssize; 162 } else { 163 page_zip->ssize = 0; 164 } 165 166 ut_ad(page_zip_get_size(page_zip) == size); 167} 168 169#ifndef UNIV_HOTBACKUP 170/**********************************************************************//** 171Determine if a record is so big that it needs to be stored externally. 172@return FALSE if the entire record can be stored locally on the page */ 173UNIV_INLINE 174ibool 175page_zip_rec_needs_ext( 176/*===================*/ 177 ulint rec_size, /*!< in: length of the record in bytes */ 178 ulint comp, /*!< in: nonzero=compact format */ 179 ulint n_fields, /*!< in: number of fields in the record; 180 ignored if zip_size == 0 */ 181 ulint zip_size) /*!< in: compressed page size in bytes, or 0 */ 182{ 183 ut_ad(rec_size > (comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES)); 184 ut_ad(ut_is_2pow(zip_size)); 185 ut_ad(comp || !zip_size); 186 187#if UNIV_PAGE_SIZE_MAX > REC_MAX_DATA_SIZE 188 if (rec_size >= REC_MAX_DATA_SIZE) { 189 return(TRUE); 190 } 191#endif 192 193 if (zip_size) { 194 ut_ad(comp); 195 /* On a compressed page, there is a two-byte entry in 196 the dense page directory for every record. But there 197 is no record header. There should be enough room for 198 one record on an empty leaf page. Subtract 1 byte for 199 the encoded heap number. Check also the available space 200 on the uncompressed page. */ 201 return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2 - 1) 202 >= page_zip_empty_size(n_fields, zip_size) 203 || rec_size >= page_get_free_space_of_empty(TRUE) / 2); 204 } 205 206 return(rec_size >= page_get_free_space_of_empty(comp) / 2); 207} 208#endif /* !UNIV_HOTBACKUP */ 209 210#ifdef UNIV_DEBUG 211/**********************************************************************//** 212Validate a compressed page descriptor. 213@return TRUE if ok */ 214UNIV_INLINE 215ibool 216page_zip_simple_validate( 217/*=====================*/ 218 const page_zip_des_t* page_zip)/*!< in: compressed page descriptor */ 219{ 220 ut_ad(page_zip); 221 ut_ad(page_zip->data); 222 ut_ad(page_zip->ssize <= PAGE_ZIP_SSIZE_MAX); 223 ut_ad(page_zip_get_size(page_zip) 224 > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE); 225 ut_ad(page_zip->m_start <= page_zip->m_end); 226 ut_ad(page_zip->m_end < page_zip_get_size(page_zip)); 227 ut_ad(page_zip->n_blobs 228 < page_zip_get_size(page_zip) / BTR_EXTERN_FIELD_REF_SIZE); 229 return(TRUE); 230} 231#endif /* UNIV_DEBUG */ 232 233/**********************************************************************//** 234Determine if the length of the page trailer. 235@return length of the page trailer, in bytes, not including the 236terminating zero byte of the modification log */ 237UNIV_INLINE 238ibool 239page_zip_get_trailer_len( 240/*=====================*/ 241 const page_zip_des_t* page_zip,/*!< in: compressed page */ 242 ibool is_clust)/*!< in: TRUE if clustered index */ 243{ 244 ulint uncompressed_size; 245 246 ut_ad(page_zip_simple_validate(page_zip)); 247 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); 248 249 if (!page_is_leaf(page_zip->data)) { 250 uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE 251 + REC_NODE_PTR_SIZE; 252 ut_ad(!page_zip->n_blobs); 253 } else if (is_clust) { 254 uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE 255 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; 256 } else { 257 uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE; 258 ut_ad(!page_zip->n_blobs); 259 } 260 261 return((page_dir_get_n_heap(page_zip->data) - 2) 262 * uncompressed_size 263 + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE); 264} 265 266/**********************************************************************//** 267Determine how big record can be inserted without recompressing the page. 268@return a positive number indicating the maximum size of a record 269whose insertion is guaranteed to succeed, or zero or negative */ 270UNIV_INLINE 271lint 272page_zip_max_ins_size( 273/*==================*/ 274 const page_zip_des_t* page_zip,/*!< in: compressed page */ 275 ibool is_clust)/*!< in: TRUE if clustered index */ 276{ 277 ulint trailer_len; 278 279 trailer_len = page_zip_get_trailer_len(page_zip, is_clust); 280 281 /* When a record is created, a pointer may be added to 282 the dense directory. 283 Likewise, space for the columns that will not be 284 compressed will be allocated from the page trailer. 285 Also the BLOB pointers will be allocated from there, but 286 we may as well count them in the length of the record. */ 287 288 trailer_len += PAGE_ZIP_DIR_SLOT_SIZE; 289 290 return((lint) page_zip_get_size(page_zip) 291 - trailer_len - page_zip->m_end 292 - (REC_N_NEW_EXTRA_BYTES - 2)); 293} 294 295/**********************************************************************//** 296Determine if enough space is available in the modification log. 297@return TRUE if enough space is available */ 298UNIV_INLINE 299ibool 300page_zip_available( 301/*===============*/ 302 const page_zip_des_t* page_zip,/*!< in: compressed page */ 303 ibool is_clust,/*!< in: TRUE if clustered index */ 304 ulint length, /*!< in: combined size of the record */ 305 ulint create) /*!< in: nonzero=add the record to 306 the heap */ 307{ 308 ulint trailer_len; 309 310 ut_ad(length > REC_N_NEW_EXTRA_BYTES); 311 312 trailer_len = page_zip_get_trailer_len(page_zip, is_clust); 313 314 /* Subtract the fixed extra bytes and add the maximum 315 space needed for identifying the record (encoded heap_no). */ 316 length -= REC_N_NEW_EXTRA_BYTES - 2; 317 318 if (create > 0) { 319 /* When a record is created, a pointer may be added to 320 the dense directory. 321 Likewise, space for the columns that will not be 322 compressed will be allocated from the page trailer. 323 Also the BLOB pointers will be allocated from there, but 324 we may as well count them in the length of the record. */ 325 326 trailer_len += PAGE_ZIP_DIR_SLOT_SIZE; 327 } 328 329 return(length + trailer_len + page_zip->m_end 330 < page_zip_get_size(page_zip)); 331} 332 333/**********************************************************************//** 334Initialize a compressed page descriptor. */ 335UNIV_INLINE 336void 337page_zip_des_init( 338/*==============*/ 339 page_zip_des_t* page_zip) /*!< in/out: compressed page 340 descriptor */ 341{ 342 memset(page_zip, 0, sizeof *page_zip); 343} 344 345/**********************************************************************//** 346Write a log record of writing to the uncompressed header portion of a page. */ 347UNIV_INTERN 348void 349page_zip_write_header_log( 350/*======================*/ 351 const byte* data,/*!< in: data on the uncompressed page */ 352 ulint length, /*!< in: length of the data */ 353 mtr_t* mtr); /*!< in: mini-transaction */ 354 355/**********************************************************************//** 356Write data to the uncompressed header portion of a page. The data must 357already have been written to the uncompressed page. 358However, the data portion of the uncompressed page may differ from 359the compressed page when a record is being inserted in 360page_cur_insert_rec_zip(). */ 361UNIV_INLINE 362void 363page_zip_write_header( 364/*==================*/ 365 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 366 const byte* str, /*!< in: address on the uncompressed page */ 367 ulint length, /*!< in: length of the data */ 368 mtr_t* mtr) /*!< in: mini-transaction, or NULL */ 369{ 370 ulint pos; 371 372 ut_ad(PAGE_ZIP_MATCH(str, page_zip)); 373 ut_ad(page_zip_simple_validate(page_zip)); 374 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); 375 376 pos = page_offset(str); 377 378 ut_ad(pos < PAGE_DATA); 379 380 memcpy(page_zip->data + pos, str, length); 381 382 /* The following would fail in page_cur_insert_rec_zip(). */ 383 /* ut_ad(page_zip_validate(page_zip, str - pos)); */ 384 385 if (mtr) { 386#ifndef UNIV_HOTBACKUP 387 page_zip_write_header_log(str, length, mtr); 388#endif /* !UNIV_HOTBACKUP */ 389 } 390} 391 392/**********************************************************************//** 393Write a log record of compressing an index page without the data on the page. */ 394UNIV_INLINE 395void 396page_zip_compress_write_log_no_data( 397/*================================*/ 398 ulint level, /*!< in: compression level */ 399 const page_t* page, /*!< in: page that is compressed */ 400 dict_index_t* index, /*!< in: index */ 401 mtr_t* mtr) /*!< in: mtr */ 402{ 403 byte* log_ptr = mlog_open_and_write_index( 404 mtr, page, index, MLOG_ZIP_PAGE_COMPRESS_NO_DATA, 1); 405 406 if (log_ptr) { 407 mach_write_to_1(log_ptr, level); 408 mlog_close(mtr, log_ptr + 1); 409 } 410} 411 412/**********************************************************************//** 413Parses a log record of compressing an index page without the data. 414@return end of log record or NULL */ 415UNIV_INLINE 416byte* 417page_zip_parse_compress_no_data( 418/*============================*/ 419 byte* ptr, /*!< in: buffer */ 420 byte* end_ptr, /*!< in: buffer end */ 421 page_t* page, /*!< in: uncompressed page */ 422 page_zip_des_t* page_zip, /*!< out: compressed page */ 423 dict_index_t* index) /*!< in: index */ 424{ 425 ulint level; 426 if (end_ptr == ptr) { 427 return(NULL); 428 } 429 430 level = mach_read_from_1(ptr); 431 432 /* If page compression fails then there must be something wrong 433 because a compress log record is logged only if the compression 434 was successful. Crash in this case. */ 435 436 if (page 437 && !page_zip_compress(page_zip, page, index, level, NULL)) { 438 ut_error; 439 } 440 441 return(ptr + 1); 442} 443 444/**********************************************************************//** 445Reset the counters used for filling 446INFORMATION_SCHEMA.innodb_cmp_per_index. */ 447UNIV_INLINE 448void 449page_zip_reset_stat_per_index() 450/*===========================*/ 451{ 452 mutex_enter(&page_zip_stat_per_index_mutex); 453 454 page_zip_stat_per_index.erase( 455 page_zip_stat_per_index.begin(), 456 page_zip_stat_per_index.end()); 457 458 mutex_exit(&page_zip_stat_per_index_mutex); 459} 460 461#ifdef UNIV_MATERIALIZE 462# undef UNIV_INLINE 463# define UNIV_INLINE UNIV_INLINE_ORIGINAL 464#endif 465