1 /***************************************************************************** 2 3 Copyright (c) 2005, 2021, Oracle and/or its affiliates. 4 Copyright (c) 2012, Facebook Inc. 5 6 This program is free software; you can redistribute it and/or modify 7 it under the terms of the GNU General Public License, version 2.0, 8 as published by the Free Software Foundation. 9 10 This program is also distributed with certain software (including 11 but not limited to OpenSSL) that is licensed under separate terms, 12 as designated in a particular file or component or in included license 13 documentation. The authors of MySQL hereby grant you an additional 14 permission to link the program and your derivative works with the 15 separately licensed software that they have included with MySQL. 16 17 This program is distributed in the hope that it will be useful, 18 but WITHOUT ANY WARRANTY; without even the implied warranty of 19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 20 GNU General Public License, version 2.0, for more details. 21 22 You should have received a copy of the GNU General Public License along with 23 this program; if not, write to the Free Software Foundation, Inc., 24 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA 25 26 *****************************************************************************/ 27 28 /**************************************************//** 29 @file include/page0zip.h 30 Compressed page interface 31 32 Created June 2005 by Marko Makela 33 *******************************************************/ 34 35 #ifndef page0zip_h 36 #define page0zip_h 37 38 #ifdef UNIV_MATERIALIZE 39 # undef UNIV_INLINE 40 # define UNIV_INLINE 41 #endif 42 43 #ifdef UNIV_INNOCHECKSUM 44 #include "univ.i" 45 #include "buf0buf.h" 46 #include "ut0crc32.h" 47 #include "buf0checksum.h" 48 #include "mach0data.h" 49 #include "zlib.h" 50 #endif /* UNIV_INNOCHECKSUM */ 51 52 #ifndef UNIV_INNOCHECKSUM 53 #include "mtr0types.h" 54 #include "page0types.h" 55 #endif /* !UNIV_INNOCHECKSUM */ 56 57 #include "buf0types.h" 58 59 #ifndef UNIV_INNOCHECKSUM 60 #include "dict0types.h" 61 #include "srv0srv.h" 62 #include "trx0types.h" 63 #include "mem0mem.h" 64 65 /* Compression level to be used by zlib. Settable by user. */ 66 extern uint page_zip_level; 67 68 /* Default compression level. */ 69 #define DEFAULT_COMPRESSION_LEVEL 6 70 /** Start offset of the area that will be compressed */ 71 #define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END 72 /** Size of an compressed page directory entry */ 73 #define PAGE_ZIP_DIR_SLOT_SIZE 2 74 /** Predefine the sum of DIR_SLOT, TRX_ID & ROLL_PTR */ 75 #define PAGE_ZIP_CLUST_LEAF_SLOT_SIZE \ 76 (PAGE_ZIP_DIR_SLOT_SIZE \ 77 + DATA_TRX_ID_LEN \ 78 + DATA_ROLL_PTR_LEN) 79 /** Mask of record offsets */ 80 #define PAGE_ZIP_DIR_SLOT_MASK 0x3fff 81 /** 'owned' flag */ 82 #define PAGE_ZIP_DIR_SLOT_OWNED 0x4000 83 /** 'deleted' flag */ 84 #define PAGE_ZIP_DIR_SLOT_DEL 0x8000 85 86 /* Whether or not to log compressed page images to avoid possible 87 compression algorithm changes in zlib. */ 88 extern my_bool page_zip_log_pages; 89 90 /**********************************************************************//** 91 Determine the size of a compressed page in bytes. 92 @return size in bytes */ 93 UNIV_INLINE 94 ulint 95 page_zip_get_size( 96 /*==============*/ 97 const page_zip_des_t* page_zip) /*!< in: compressed page */ 98 MY_ATTRIBUTE((warn_unused_result)); 99 /**********************************************************************//** 100 Set the size of a compressed page in bytes. */ 101 UNIV_INLINE 102 void 103 page_zip_set_size( 104 /*==============*/ 105 page_zip_des_t* page_zip, /*!< in/out: compressed page */ 106 ulint size); /*!< in: size in bytes */ 107 108 #ifndef UNIV_HOTBACKUP 109 /** Determine if a record is so big that it needs to be stored externally. 110 @param[in] rec_size length of the record in bytes 111 @param[in] comp nonzero=compact format 112 @param[in] n_fields number of fields in the record; ignored if 113 tablespace is not compressed 114 @param[in] page_size page size 115 @return FALSE if the entire record can be stored locally on the page */ 116 UNIV_INLINE 117 ibool 118 page_zip_rec_needs_ext( 119 ulint rec_size, 120 ulint comp, 121 ulint n_fields, 122 const page_size_t& page_size) 123 MY_ATTRIBUTE((warn_unused_result)); 124 125 /**********************************************************************//** 126 Determine the guaranteed free space on an empty page. 127 @return minimum payload size on the page */ 128 ulint 129 page_zip_empty_size( 130 /*================*/ 131 ulint n_fields, /*!< in: number of columns in the index */ 132 ulint zip_size) /*!< in: compressed page size in bytes */ 133 MY_ATTRIBUTE((const)); 134 135 /** Check whether a tuple is too big for compressed table 136 @param[in] index dict index object 137 @param[in] entry entry for the index 138 @return true if it's too big, otherwise false */ 139 bool 140 page_zip_is_too_big( 141 const dict_index_t* index, 142 const dtuple_t* entry); 143 #endif /* !UNIV_HOTBACKUP */ 144 145 /**********************************************************************//** 146 Initialize a compressed page descriptor. */ 147 UNIV_INLINE 148 void 149 page_zip_des_init( 150 /*==============*/ 151 page_zip_des_t* page_zip); /*!< in/out: compressed page 152 descriptor */ 153 154 /**********************************************************************//** 155 Configure the zlib allocator to use the given memory heap. */ 156 void 157 page_zip_set_alloc( 158 /*===============*/ 159 void* stream, /*!< in/out: zlib stream */ 160 mem_heap_t* heap); /*!< in: memory heap to use */ 161 162 /**********************************************************************//** 163 Compress a page. 164 @return TRUE on success, FALSE on failure; page_zip will be left 165 intact on failure. */ 166 ibool 167 page_zip_compress( 168 /*==============*/ 169 page_zip_des_t* page_zip, /*!< in: size; out: data, 170 n_blobs, m_start, m_end, 171 m_nonempty */ 172 const page_t* page, /*!< in: uncompressed page */ 173 dict_index_t* index, /*!< in: index of the B-tree 174 node */ 175 ulint level, /*!< in: commpression level */ 176 const redo_page_compress_t* page_comp_info, 177 /*!< in: used for applying 178 TRUNCATE log 179 record during recovery */ 180 mtr_t* mtr) /*!< in/out: mini-transaction, 181 or NULL */ 182 MY_ATTRIBUTE((warn_unused_result)); 183 184 /**********************************************************************//** 185 Write the index information for the compressed page. 186 @return used size of buf */ 187 ulint 188 page_zip_fields_encode( 189 /*===================*/ 190 ulint n, /*!< in: number of fields 191 to compress */ 192 const dict_index_t* index, /*!< in: index comprising 193 at least n fields */ 194 ulint trx_id_pos, 195 /*!< in: position of the trx_id column 196 in the index, or ULINT_UNDEFINED if 197 this is a non-leaf page */ 198 byte* buf); /*!< out: buffer of (n + 1) * 2 bytes */ 199 200 /**********************************************************************//** 201 Decompress a page. This function should tolerate errors on the compressed 202 page. Instead of letting assertions fail, it will return FALSE if an 203 inconsistency is detected. 204 @return TRUE on success, FALSE on failure */ 205 ibool 206 page_zip_decompress( 207 /*================*/ 208 page_zip_des_t* page_zip,/*!< in: data, ssize; 209 out: m_start, m_end, m_nonempty, n_blobs */ 210 page_t* page, /*!< out: uncompressed page, may be trashed */ 211 ibool all) /*!< in: TRUE=decompress the whole page; 212 FALSE=verify but do not copy some 213 page header fields that should not change 214 after page creation */ 215 MY_ATTRIBUTE((nonnull(1,2))); 216 217 #ifdef UNIV_DEBUG 218 /**********************************************************************//** 219 Validate a compressed page descriptor. 220 @return TRUE if ok */ 221 UNIV_INLINE 222 ibool 223 page_zip_simple_validate( 224 /*=====================*/ 225 const page_zip_des_t* page_zip); /*!< in: compressed page 226 descriptor */ 227 #endif /* UNIV_DEBUG */ 228 229 #ifdef UNIV_ZIP_DEBUG 230 /**********************************************************************//** 231 Check that the compressed and decompressed pages match. 232 @return TRUE if valid, FALSE if not */ 233 ibool 234 page_zip_validate_low( 235 /*==================*/ 236 const page_zip_des_t* page_zip,/*!< in: compressed page */ 237 const page_t* page, /*!< in: uncompressed page */ 238 const dict_index_t* index, /*!< in: index of the page, if known */ 239 ibool sloppy) /*!< in: FALSE=strict, 240 TRUE=ignore the MIN_REC_FLAG */ 241 MY_ATTRIBUTE((nonnull(1,2))); 242 /**********************************************************************//** 243 Check that the compressed and decompressed pages match. */ 244 ibool 245 page_zip_validate( 246 /*==============*/ 247 const page_zip_des_t* page_zip,/*!< in: compressed page */ 248 const page_t* page, /*!< in: uncompressed page */ 249 const dict_index_t* index) /*!< in: index of the page, if known */ 250 MY_ATTRIBUTE((nonnull(1,2))); 251 #endif /* UNIV_ZIP_DEBUG */ 252 253 /**********************************************************************//** 254 Determine how big record can be inserted without recompressing the page. 255 @return a positive number indicating the maximum size of a record 256 whose insertion is guaranteed to succeed, or zero or negative */ 257 UNIV_INLINE 258 lint 259 page_zip_max_ins_size( 260 /*==================*/ 261 const page_zip_des_t* page_zip,/*!< in: compressed page */ 262 ibool is_clust)/*!< in: TRUE if clustered index */ 263 MY_ATTRIBUTE((warn_unused_result)); 264 265 /**********************************************************************//** 266 Determine if enough space is available in the modification log. 267 @return TRUE if page_zip_write_rec() will succeed */ 268 UNIV_INLINE 269 ibool 270 page_zip_available( 271 /*===============*/ 272 const page_zip_des_t* page_zip,/*!< in: compressed page */ 273 ibool is_clust,/*!< in: TRUE if clustered index */ 274 ulint length, /*!< in: combined size of the record */ 275 ulint create) /*!< in: nonzero=add the record to 276 the heap */ 277 MY_ATTRIBUTE((warn_unused_result)); 278 279 /**********************************************************************//** 280 Write data to the uncompressed header portion of a page. The data must 281 already have been written to the uncompressed page. */ 282 UNIV_INLINE 283 void 284 page_zip_write_header( 285 /*==================*/ 286 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 287 const byte* str, /*!< in: address on the uncompressed page */ 288 ulint length, /*!< in: length of the data */ 289 mtr_t* mtr) /*!< in: mini-transaction, or NULL */ 290 MY_ATTRIBUTE((nonnull(1,2))); 291 292 /**********************************************************************//** 293 Write an entire record on the compressed page. The data must already 294 have been written to the uncompressed page. */ 295 void 296 page_zip_write_rec( 297 /*===============*/ 298 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 299 const byte* rec, /*!< in: record being written */ 300 dict_index_t* index, /*!< in: the index the record belongs to */ 301 const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ 302 ulint create) /*!< in: nonzero=insert, zero=update */ 303 MY_ATTRIBUTE((nonnull)); 304 305 /***********************************************************//** 306 Parses a log record of writing a BLOB pointer of a record. 307 @return end of log record or NULL */ 308 byte* 309 page_zip_parse_write_blob_ptr( 310 /*==========================*/ 311 byte* ptr, /*!< in: redo log buffer */ 312 byte* end_ptr,/*!< in: redo log buffer end */ 313 page_t* page, /*!< in/out: uncompressed page */ 314 page_zip_des_t* page_zip);/*!< in/out: compressed page */ 315 316 /**********************************************************************//** 317 Write a BLOB pointer of a record on the leaf page of a clustered index. 318 The information must already have been updated on the uncompressed page. */ 319 void 320 page_zip_write_blob_ptr( 321 /*====================*/ 322 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 323 const byte* rec, /*!< in/out: record whose data is being 324 written */ 325 dict_index_t* index, /*!< in: index of the page */ 326 const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ 327 ulint n, /*!< in: column index */ 328 mtr_t* mtr); /*!< in: mini-transaction handle, 329 or NULL if no logging is needed */ 330 331 /***********************************************************//** 332 Parses a log record of writing the node pointer of a record. 333 @return end of log record or NULL */ 334 byte* 335 page_zip_parse_write_node_ptr( 336 /*==========================*/ 337 byte* ptr, /*!< in: redo log buffer */ 338 byte* end_ptr,/*!< in: redo log buffer end */ 339 page_t* page, /*!< in/out: uncompressed page */ 340 page_zip_des_t* page_zip);/*!< in/out: compressed page */ 341 342 /**********************************************************************//** 343 Write the node pointer of a record on a non-leaf compressed page. */ 344 void 345 page_zip_write_node_ptr( 346 /*====================*/ 347 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 348 byte* rec, /*!< in/out: record */ 349 ulint size, /*!< in: data size of rec */ 350 ulint ptr, /*!< in: node pointer */ 351 mtr_t* mtr); /*!< in: mini-transaction, or NULL */ 352 353 /**********************************************************************//** 354 Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */ 355 void 356 page_zip_write_trx_id_and_roll_ptr( 357 /*===============================*/ 358 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 359 byte* rec, /*!< in/out: record */ 360 const ulint* offsets,/*!< in: rec_get_offsets(rec, index) */ 361 ulint trx_id_col,/*!< in: column number of TRX_ID in rec */ 362 trx_id_t trx_id, /*!< in: transaction identifier */ 363 roll_ptr_t roll_ptr)/*!< in: roll_ptr */ 364 MY_ATTRIBUTE((nonnull)); 365 366 /**********************************************************************//** 367 Write the "deleted" flag of a record on a compressed page. The flag must 368 already have been written on the uncompressed page. */ 369 void 370 page_zip_rec_set_deleted( 371 /*=====================*/ 372 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 373 const byte* rec, /*!< in: record on the uncompressed page */ 374 ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */ 375 MY_ATTRIBUTE((nonnull)); 376 377 /**********************************************************************//** 378 Write the "owned" flag of a record on a compressed page. The n_owned field 379 must already have been written on the uncompressed page. */ 380 void 381 page_zip_rec_set_owned( 382 /*===================*/ 383 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 384 const byte* rec, /*!< in: record on the uncompressed page */ 385 ulint flag) /*!< in: the owned flag (nonzero=TRUE) */ 386 MY_ATTRIBUTE((nonnull)); 387 388 /**********************************************************************//** 389 Insert a record to the dense page directory. */ 390 void 391 page_zip_dir_insert( 392 /*================*/ 393 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 394 const byte* prev_rec,/*!< in: record after which to insert */ 395 const byte* free_rec,/*!< in: record from which rec was 396 allocated, or NULL */ 397 byte* rec); /*!< in: record to insert */ 398 399 /**********************************************************************//** 400 Shift the dense page directory and the array of BLOB pointers 401 when a record is deleted. */ 402 void 403 page_zip_dir_delete( 404 /*================*/ 405 page_zip_des_t* page_zip, /*!< in/out: compressed page */ 406 byte* rec, /*!< in: deleted record */ 407 const dict_index_t* index, /*!< in: index of rec */ 408 const ulint* offsets, /*!< in: rec_get_offsets(rec) */ 409 const byte* free) /*!< in: previous start of 410 the free list */ 411 MY_ATTRIBUTE((nonnull(1,2,3,4))); 412 413 /**********************************************************************//** 414 Add a slot to the dense page directory. */ 415 void 416 page_zip_dir_add_slot( 417 /*==================*/ 418 page_zip_des_t* page_zip, /*!< in/out: compressed page */ 419 ulint is_clustered) /*!< in: nonzero for clustered index, 420 zero for others */ 421 MY_ATTRIBUTE((nonnull)); 422 423 /***********************************************************//** 424 Parses a log record of writing to the header of a page. 425 @return end of log record or NULL */ 426 byte* 427 page_zip_parse_write_header( 428 /*========================*/ 429 byte* ptr, /*!< in: redo log buffer */ 430 byte* end_ptr,/*!< in: redo log buffer end */ 431 page_t* page, /*!< in/out: uncompressed page */ 432 page_zip_des_t* page_zip);/*!< in/out: compressed page */ 433 434 /**********************************************************************//** 435 Write data to the uncompressed header portion of a page. The data must 436 already have been written to the uncompressed page. 437 However, the data portion of the uncompressed page may differ from 438 the compressed page when a record is being inserted in 439 page_cur_insert_rec_low(). */ 440 UNIV_INLINE 441 void 442 page_zip_write_header( 443 /*==================*/ 444 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 445 const byte* str, /*!< in: address on the uncompressed page */ 446 ulint length, /*!< in: length of the data */ 447 mtr_t* mtr) /*!< in: mini-transaction, or NULL */ 448 MY_ATTRIBUTE((nonnull(1,2))); 449 450 /**********************************************************************//** 451 Reorganize and compress a page. This is a low-level operation for 452 compressed pages, to be used when page_zip_compress() fails. 453 On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written. 454 The function btr_page_reorganize() should be preferred whenever possible. 455 IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a 456 non-clustered index, the caller must update the insert buffer free 457 bits in the same mini-transaction in such a way that the modification 458 will be redo-logged. 459 @return TRUE on success, FALSE on failure; page_zip will be left 460 intact on failure, but page will be overwritten. */ 461 ibool 462 page_zip_reorganize( 463 /*================*/ 464 buf_block_t* block, /*!< in/out: page with compressed page; 465 on the compressed page, in: size; 466 out: data, n_blobs, 467 m_start, m_end, m_nonempty */ 468 dict_index_t* index, /*!< in: index of the B-tree node */ 469 mtr_t* mtr); /*!< in: mini-transaction */ 470 #ifndef UNIV_HOTBACKUP 471 /**********************************************************************//** 472 Copy the records of a page byte for byte. Do not copy the page header 473 or trailer, except those B-tree header fields that are directly 474 related to the storage of records. Also copy PAGE_MAX_TRX_ID. 475 NOTE: The caller must update the lock table and the adaptive hash index. */ 476 void 477 page_zip_copy_recs( 478 /*===============*/ 479 page_zip_des_t* page_zip, /*!< out: copy of src_zip 480 (n_blobs, m_start, m_end, 481 m_nonempty, data[0..size-1]) */ 482 page_t* page, /*!< out: copy of src */ 483 const page_zip_des_t* src_zip, /*!< in: compressed page */ 484 const page_t* src, /*!< in: page */ 485 dict_index_t* index, /*!< in: index of the B-tree */ 486 mtr_t* mtr); /*!< in: mini-transaction */ 487 #endif /* !UNIV_HOTBACKUP */ 488 489 /**********************************************************************//** 490 Parses a log record of compressing an index page. 491 @return end of log record or NULL */ 492 byte* 493 page_zip_parse_compress( 494 /*====================*/ 495 byte* ptr, /*!< in: buffer */ 496 byte* end_ptr, /*!< in: buffer end */ 497 page_t* page, /*!< out: uncompressed page */ 498 page_zip_des_t* page_zip) /*!< out: compressed page */ 499 MY_ATTRIBUTE((warn_unused_result)); 500 501 #endif /* !UNIV_INNOCHECKSUM */ 502 503 /** Calculate the compressed page checksum. 504 @param[in] data compressed page 505 @param[in] size size of compressed page 506 @param[in] algo algorithm to use 507 @param[in] use_legacy_big_endian only used if algo is 508 SRV_CHECKSUM_ALGORITHM_CRC32 or SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 - if true 509 then use big endian byteorder when converting byte strings to integers. 510 @return page checksum */ 511 uint32_t 512 page_zip_calc_checksum( 513 const void* data, 514 ulint size, 515 srv_checksum_algorithm_t algo, 516 bool use_legacy_big_endian = false); 517 518 /**********************************************************************//** 519 Verify a compressed page's checksum. 520 @return TRUE if the stored checksum is valid according to the value of 521 innodb_checksum_algorithm */ 522 ibool 523 page_zip_verify_checksum( 524 /*=====================*/ 525 const void* data, /*!< in: compressed page */ 526 ulint size /*!< in: size of compressed page */ 527 #ifdef UNIV_INNOCHECKSUM 528 /* these variables are used only for innochecksum tool. */ 529 ,uintmax_t page_no, /*!< in: page number of 530 given read_buf */ 531 bool strict_check, /*!< in: true if strict-check 532 option is enable */ 533 bool is_log_enabled, /*!< in: true if log option is 534 enable */ 535 FILE* log_file /*!< in: file pointer to 536 log_file */ 537 #endif /* UNIV_INNOCHECKSUM */ 538 ); 539 540 #ifndef UNIV_INNOCHECKSUM 541 /**********************************************************************//** 542 Write a log record of compressing an index page without the data on the page. */ 543 UNIV_INLINE 544 void 545 page_zip_compress_write_log_no_data( 546 /*================================*/ 547 ulint level, /*!< in: compression level */ 548 const page_t* page, /*!< in: page that is compressed */ 549 dict_index_t* index, /*!< in: index */ 550 mtr_t* mtr); /*!< in: mtr */ 551 /**********************************************************************//** 552 Parses a log record of compressing an index page without the data. 553 @return end of log record or NULL */ 554 UNIV_INLINE 555 byte* 556 page_zip_parse_compress_no_data( 557 /*============================*/ 558 byte* ptr, /*!< in: buffer */ 559 byte* end_ptr, /*!< in: buffer end */ 560 page_t* page, /*!< in: uncompressed page */ 561 page_zip_des_t* page_zip, /*!< out: compressed page */ 562 dict_index_t* index) /*!< in: index */ 563 MY_ATTRIBUTE((nonnull(1,2))); 564 565 /**********************************************************************//** 566 Reset the counters used for filling 567 INFORMATION_SCHEMA.innodb_cmp_per_index. */ 568 UNIV_INLINE 569 void 570 page_zip_reset_stat_per_index(); 571 /*===========================*/ 572 573 #ifdef UNIV_MATERIALIZE 574 # undef UNIV_INLINE 575 # define UNIV_INLINE UNIV_INLINE_ORIGINAL 576 #endif 577 578 #ifndef UNIV_NONINL 579 # include "page0zip.ic" 580 #endif 581 #endif /* !UNIV_INNOCHECKSUM */ 582 583 #endif /* page0zip_h */ 584