1 /***************************************************************************** 2 3 Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved. 4 Copyright (c) 2012, Facebook Inc. 5 Copyright (c) 2017, 2020, MariaDB Corporation. 6 7 This program is free software; you can redistribute it and/or modify it under 8 the terms of the GNU General Public License as published by the Free Software 9 Foundation; version 2 of the License. 10 11 This program is distributed in the hope that it will be useful, but WITHOUT 12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 14 15 You should have received a copy of the GNU General Public License along with 16 this program; if not, write to the Free Software Foundation, Inc., 17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA 18 19 *****************************************************************************/ 20 21 /**************************************************//** 22 @file include/page0zip.h 23 Compressed page interface 24 25 Created June 2005 by Marko Makela 26 *******************************************************/ 27 28 #ifndef page0zip_h 29 #define page0zip_h 30 31 #include "buf0types.h" 32 33 #ifndef UNIV_INNOCHECKSUM 34 #include "mtr0types.h" 35 #include "page0types.h" 36 #include "dict0types.h" 37 #include "srv0srv.h" 38 #include "trx0types.h" 39 #include "mem0mem.h" 40 41 /* Compression level to be used by zlib. Settable by user. */ 42 extern uint page_zip_level; 43 44 /* Default compression level. */ 45 #define DEFAULT_COMPRESSION_LEVEL 6 46 /** Start offset of the area that will be compressed */ 47 #define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END 48 /** Size of an compressed page directory entry */ 49 #define PAGE_ZIP_DIR_SLOT_SIZE 2 50 /** Predefine the sum of DIR_SLOT, TRX_ID & ROLL_PTR */ 51 #define PAGE_ZIP_CLUST_LEAF_SLOT_SIZE \ 52 (PAGE_ZIP_DIR_SLOT_SIZE \ 53 + DATA_TRX_ID_LEN \ 54 + DATA_ROLL_PTR_LEN) 55 /** Mask of record offsets */ 56 #define PAGE_ZIP_DIR_SLOT_MASK 0x3fffU 57 /** 'owned' flag */ 58 #define PAGE_ZIP_DIR_SLOT_OWNED 0x4000U 59 /** 'deleted' flag */ 60 #define PAGE_ZIP_DIR_SLOT_DEL 0x8000U 61 62 /* Whether or not to log compressed page images to avoid possible 63 compression algorithm changes in zlib. */ 64 extern my_bool page_zip_log_pages; 65 66 /**********************************************************************//** 67 Determine the size of a compressed page in bytes. 68 @return size in bytes */ 69 UNIV_INLINE 70 ulint 71 page_zip_get_size( 72 /*==============*/ 73 const page_zip_des_t* page_zip) /*!< in: compressed page */ 74 MY_ATTRIBUTE((warn_unused_result)); 75 /**********************************************************************//** 76 Set the size of a compressed page in bytes. */ 77 UNIV_INLINE 78 void 79 page_zip_set_size( 80 /*==============*/ 81 page_zip_des_t* page_zip, /*!< in/out: compressed page */ 82 ulint size); /*!< in: size in bytes */ 83 84 /** Determine if a record is so big that it needs to be stored externally. 85 @param[in] rec_size length of the record in bytes 86 @param[in] comp nonzero=compact format 87 @param[in] n_fields number of fields in the record; ignored if 88 tablespace is not compressed 89 @param[in] zip_size ROW_FORMAT=COMPRESSED page size, or 0 90 @return false if the entire record can be stored locally on the page */ 91 inline bool page_zip_rec_needs_ext(ulint rec_size, ulint comp, ulint n_fields, 92 ulint zip_size) 93 MY_ATTRIBUTE((warn_unused_result)); 94 95 /**********************************************************************//** 96 Determine the guaranteed free space on an empty page. 97 @return minimum payload size on the page */ 98 ulint 99 page_zip_empty_size( 100 /*================*/ 101 ulint n_fields, /*!< in: number of columns in the index */ 102 ulint zip_size) /*!< in: compressed page size in bytes */ 103 MY_ATTRIBUTE((const)); 104 105 /** Check whether a tuple is too big for compressed table 106 @param[in] index dict index object 107 @param[in] entry entry for the index 108 @return true if it's too big, otherwise false */ 109 bool 110 page_zip_is_too_big( 111 const dict_index_t* index, 112 const dtuple_t* entry); 113 114 /**********************************************************************//** 115 Initialize a compressed page descriptor. */ 116 UNIV_INLINE 117 void 118 page_zip_des_init( 119 /*==============*/ 120 page_zip_des_t* page_zip); /*!< in/out: compressed page 121 descriptor */ 122 123 /**********************************************************************//** 124 Configure the zlib allocator to use the given memory heap. */ 125 void 126 page_zip_set_alloc( 127 /*===============*/ 128 void* stream, /*!< in/out: zlib stream */ 129 mem_heap_t* heap); /*!< in: memory heap to use */ 130 131 /**********************************************************************//** 132 Compress a page. 133 @return TRUE on success, FALSE on failure; page_zip will be left 134 intact on failure. */ 135 ibool 136 page_zip_compress( 137 /*==============*/ 138 page_zip_des_t* page_zip, /*!< in: size; out: data, 139 n_blobs, m_start, m_end, 140 m_nonempty */ 141 const page_t* page, /*!< in: uncompressed page */ 142 dict_index_t* index, /*!< in: index of the B-tree 143 node */ 144 ulint level, /*!< in: commpression level */ 145 mtr_t* mtr); /*!< in/out: mini-transaction, 146 or NULL */ 147 148 /**********************************************************************//** 149 Write the index information for the compressed page. 150 @return used size of buf */ 151 ulint 152 page_zip_fields_encode( 153 /*===================*/ 154 ulint n, /*!< in: number of fields 155 to compress */ 156 const dict_index_t* index, /*!< in: index comprising 157 at least n fields */ 158 ulint trx_id_pos, 159 /*!< in: position of the trx_id column 160 in the index, or ULINT_UNDEFINED if 161 this is a non-leaf page */ 162 byte* buf); /*!< out: buffer of (n + 1) * 2 bytes */ 163 164 /**********************************************************************//** 165 Decompress a page. This function should tolerate errors on the compressed 166 page. Instead of letting assertions fail, it will return FALSE if an 167 inconsistency is detected. 168 @return TRUE on success, FALSE on failure */ 169 ibool 170 page_zip_decompress( 171 /*================*/ 172 page_zip_des_t* page_zip,/*!< in: data, ssize; 173 out: m_start, m_end, m_nonempty, n_blobs */ 174 page_t* page, /*!< out: uncompressed page, may be trashed */ 175 ibool all) /*!< in: TRUE=decompress the whole page; 176 FALSE=verify but do not copy some 177 page header fields that should not change 178 after page creation */ 179 MY_ATTRIBUTE((nonnull(1,2))); 180 181 #ifdef UNIV_DEBUG 182 /**********************************************************************//** 183 Validate a compressed page descriptor. 184 @return TRUE if ok */ 185 UNIV_INLINE 186 ibool 187 page_zip_simple_validate( 188 /*=====================*/ 189 const page_zip_des_t* page_zip); /*!< in: compressed page 190 descriptor */ 191 #endif /* UNIV_DEBUG */ 192 193 #ifdef UNIV_ZIP_DEBUG 194 /**********************************************************************//** 195 Check that the compressed and decompressed pages match. 196 @return TRUE if valid, FALSE if not */ 197 ibool 198 page_zip_validate_low( 199 /*==================*/ 200 const page_zip_des_t* page_zip,/*!< in: compressed page */ 201 const page_t* page, /*!< in: uncompressed page */ 202 const dict_index_t* index, /*!< in: index of the page, if known */ 203 ibool sloppy) /*!< in: FALSE=strict, 204 TRUE=ignore the MIN_REC_FLAG */ 205 MY_ATTRIBUTE((nonnull(1,2))); 206 /**********************************************************************//** 207 Check that the compressed and decompressed pages match. */ 208 ibool 209 page_zip_validate( 210 /*==============*/ 211 const page_zip_des_t* page_zip,/*!< in: compressed page */ 212 const page_t* page, /*!< in: uncompressed page */ 213 const dict_index_t* index) /*!< in: index of the page, if known */ 214 MY_ATTRIBUTE((nonnull(1,2))); 215 #endif /* UNIV_ZIP_DEBUG */ 216 217 /**********************************************************************//** 218 Determine how big record can be inserted without recompressing the page. 219 @return a positive number indicating the maximum size of a record 220 whose insertion is guaranteed to succeed, or zero or negative */ 221 UNIV_INLINE 222 lint 223 page_zip_max_ins_size( 224 /*==================*/ 225 const page_zip_des_t* page_zip,/*!< in: compressed page */ 226 ibool is_clust)/*!< in: TRUE if clustered index */ 227 MY_ATTRIBUTE((warn_unused_result)); 228 229 /**********************************************************************//** 230 Determine if enough space is available in the modification log. 231 @return TRUE if page_zip_write_rec() will succeed */ 232 UNIV_INLINE 233 ibool 234 page_zip_available( 235 /*===============*/ 236 const page_zip_des_t* page_zip,/*!< in: compressed page */ 237 ibool is_clust,/*!< in: TRUE if clustered index */ 238 ulint length, /*!< in: combined size of the record */ 239 ulint create) /*!< in: nonzero=add the record to 240 the heap */ 241 MY_ATTRIBUTE((warn_unused_result)); 242 243 /**********************************************************************//** 244 Write data to the uncompressed header portion of a page. The data must 245 already have been written to the uncompressed page. */ 246 UNIV_INLINE 247 void 248 page_zip_write_header( 249 /*==================*/ 250 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 251 const byte* str, /*!< in: address on the uncompressed page */ 252 ulint length, /*!< in: length of the data */ 253 mtr_t* mtr) /*!< in: mini-transaction, or NULL */ 254 MY_ATTRIBUTE((nonnull(1,2))); 255 256 /**********************************************************************//** 257 Write an entire record on the compressed page. The data must already 258 have been written to the uncompressed page. */ 259 void 260 page_zip_write_rec( 261 /*===============*/ 262 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 263 const byte* rec, /*!< in: record being written */ 264 dict_index_t* index, /*!< in: the index the record belongs to */ 265 const rec_offs* offsets,/*!< in: rec_get_offsets(rec, index) */ 266 ulint create) /*!< in: nonzero=insert, zero=update */ 267 MY_ATTRIBUTE((nonnull)); 268 269 /***********************************************************//** 270 Parses a log record of writing a BLOB pointer of a record. 271 @return end of log record or NULL */ 272 byte* 273 page_zip_parse_write_blob_ptr( 274 /*==========================*/ 275 byte* ptr, /*!< in: redo log buffer */ 276 byte* end_ptr,/*!< in: redo log buffer end */ 277 page_t* page, /*!< in/out: uncompressed page */ 278 page_zip_des_t* page_zip);/*!< in/out: compressed page */ 279 280 /**********************************************************************//** 281 Write a BLOB pointer of a record on the leaf page of a clustered index. 282 The information must already have been updated on the uncompressed page. */ 283 void 284 page_zip_write_blob_ptr( 285 /*====================*/ 286 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 287 const byte* rec, /*!< in/out: record whose data is being 288 written */ 289 dict_index_t* index, /*!< in: index of the page */ 290 const rec_offs* offsets,/*!< in: rec_get_offsets(rec, index) */ 291 ulint n, /*!< in: column index */ 292 mtr_t* mtr); /*!< in: mini-transaction handle, 293 or NULL if no logging is needed */ 294 295 /***********************************************************//** 296 Parses a log record of writing the node pointer of a record. 297 @return end of log record or NULL */ 298 byte* 299 page_zip_parse_write_node_ptr( 300 /*==========================*/ 301 byte* ptr, /*!< in: redo log buffer */ 302 byte* end_ptr,/*!< in: redo log buffer end */ 303 page_t* page, /*!< in/out: uncompressed page */ 304 page_zip_des_t* page_zip);/*!< in/out: compressed page */ 305 306 /**********************************************************************//** 307 Write the node pointer of a record on a non-leaf compressed page. */ 308 void 309 page_zip_write_node_ptr( 310 /*====================*/ 311 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 312 byte* rec, /*!< in/out: record */ 313 ulint size, /*!< in: data size of rec */ 314 ulint ptr, /*!< in: node pointer */ 315 mtr_t* mtr); /*!< in: mini-transaction, or NULL */ 316 317 /** Write the DB_TRX_ID,DB_ROLL_PTR into a clustered index leaf page record. 318 @param[in,out] page_zip compressed page 319 @param[in,out] rec record 320 @param[in] offsets rec_get_offsets(rec, index) 321 @param[in] trx_id_field field number of DB_TRX_ID (number of PK fields) 322 @param[in] trx_id DB_TRX_ID value (transaction identifier) 323 @param[in] roll_ptr DB_ROLL_PTR value (undo log pointer) 324 @param[in,out] mtr mini-transaction, or NULL to skip logging */ 325 void 326 page_zip_write_trx_id_and_roll_ptr( 327 page_zip_des_t* page_zip, 328 byte* rec, 329 const rec_offs* offsets, 330 ulint trx_id_col, 331 trx_id_t trx_id, 332 roll_ptr_t roll_ptr, 333 mtr_t* mtr = NULL) 334 MY_ATTRIBUTE((nonnull(1,2,3))); 335 336 /** Parse a MLOG_ZIP_WRITE_TRX_ID record. 337 @param[in] ptr redo log buffer 338 @param[in] end_ptr end of redo log buffer 339 @param[in,out] page uncompressed page 340 @param[in,out] page_zip compressed page 341 @return end of log record 342 @retval NULL if the log record is incomplete */ 343 byte* 344 page_zip_parse_write_trx_id( 345 byte* ptr, 346 byte* end_ptr, 347 page_t* page, 348 page_zip_des_t* page_zip) 349 MY_ATTRIBUTE((nonnull(1,2), warn_unused_result)); 350 /**********************************************************************//** 351 Write the "deleted" flag of a record on a compressed page. The flag must 352 already have been written on the uncompressed page. */ 353 void 354 page_zip_rec_set_deleted( 355 /*=====================*/ 356 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 357 const byte* rec, /*!< in: record on the uncompressed page */ 358 ulint flag) /*!< in: the deleted flag (nonzero=TRUE) */ 359 MY_ATTRIBUTE((nonnull)); 360 361 /**********************************************************************//** 362 Write the "owned" flag of a record on a compressed page. The n_owned field 363 must already have been written on the uncompressed page. */ 364 void 365 page_zip_rec_set_owned( 366 /*===================*/ 367 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 368 const byte* rec, /*!< in: record on the uncompressed page */ 369 ulint flag) /*!< in: the owned flag (nonzero=TRUE) */ 370 MY_ATTRIBUTE((nonnull)); 371 372 /**********************************************************************//** 373 Insert a record to the dense page directory. */ 374 void 375 page_zip_dir_insert( 376 /*================*/ 377 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 378 const byte* prev_rec,/*!< in: record after which to insert */ 379 const byte* free_rec,/*!< in: record from which rec was 380 allocated, or NULL */ 381 byte* rec); /*!< in: record to insert */ 382 383 /**********************************************************************//** 384 Shift the dense page directory and the array of BLOB pointers 385 when a record is deleted. */ 386 void 387 page_zip_dir_delete( 388 /*================*/ 389 page_zip_des_t* page_zip, /*!< in/out: compressed page */ 390 byte* rec, /*!< in: deleted record */ 391 const dict_index_t* index, /*!< in: index of rec */ 392 const rec_offs* offsets, /*!< in: rec_get_offsets(rec) */ 393 const byte* free) /*!< in: previous start of 394 the free list */ 395 MY_ATTRIBUTE((nonnull(1,2,3,4))); 396 397 /**********************************************************************//** 398 Add a slot to the dense page directory. */ 399 void 400 page_zip_dir_add_slot( 401 /*==================*/ 402 page_zip_des_t* page_zip, /*!< in/out: compressed page */ 403 ulint is_clustered) /*!< in: nonzero for clustered index, 404 zero for others */ 405 MY_ATTRIBUTE((nonnull)); 406 407 /***********************************************************//** 408 Parses a log record of writing to the header of a page. 409 @return end of log record or NULL */ 410 byte* 411 page_zip_parse_write_header( 412 /*========================*/ 413 byte* ptr, /*!< in: redo log buffer */ 414 byte* end_ptr,/*!< in: redo log buffer end */ 415 page_t* page, /*!< in/out: uncompressed page */ 416 page_zip_des_t* page_zip);/*!< in/out: compressed page */ 417 418 /**********************************************************************//** 419 Write data to the uncompressed header portion of a page. The data must 420 already have been written to the uncompressed page. 421 However, the data portion of the uncompressed page may differ from 422 the compressed page when a record is being inserted in 423 page_cur_insert_rec_low(). */ 424 UNIV_INLINE 425 void 426 page_zip_write_header( 427 /*==================*/ 428 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 429 const byte* str, /*!< in: address on the uncompressed page */ 430 ulint length, /*!< in: length of the data */ 431 mtr_t* mtr) /*!< in: mini-transaction, or NULL */ 432 MY_ATTRIBUTE((nonnull(1,2))); 433 434 /**********************************************************************//** 435 Reorganize and compress a page. This is a low-level operation for 436 compressed pages, to be used when page_zip_compress() fails. 437 On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written. 438 The function btr_page_reorganize() should be preferred whenever possible. 439 IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a 440 non-clustered index, the caller must update the insert buffer free 441 bits in the same mini-transaction in such a way that the modification 442 will be redo-logged. 443 @return TRUE on success, FALSE on failure; page_zip will be left 444 intact on failure, but page will be overwritten. */ 445 ibool 446 page_zip_reorganize( 447 /*================*/ 448 buf_block_t* block, /*!< in/out: page with compressed page; 449 on the compressed page, in: size; 450 out: data, n_blobs, 451 m_start, m_end, m_nonempty */ 452 dict_index_t* index, /*!< in: index of the B-tree node */ 453 mtr_t* mtr) /*!< in: mini-transaction */ 454 MY_ATTRIBUTE((nonnull)); 455 456 /**********************************************************************//** 457 Copy the records of a page byte for byte. Do not copy the page header 458 or trailer, except those B-tree header fields that are directly 459 related to the storage of records. Also copy PAGE_MAX_TRX_ID. 460 NOTE: The caller must update the lock table and the adaptive hash index. */ 461 void 462 page_zip_copy_recs( 463 /*===============*/ 464 page_zip_des_t* page_zip, /*!< out: copy of src_zip 465 (n_blobs, m_start, m_end, 466 m_nonempty, data[0..size-1]) */ 467 page_t* page, /*!< out: copy of src */ 468 const page_zip_des_t* src_zip, /*!< in: compressed page */ 469 const page_t* src, /*!< in: page */ 470 dict_index_t* index, /*!< in: index of the B-tree */ 471 mtr_t* mtr); /*!< in: mini-transaction */ 472 473 /** Parse and optionally apply MLOG_ZIP_PAGE_COMPRESS. 474 @param[in] ptr log record 475 @param[in] end_ptr end of log 476 @param[in,out] block ROW_FORMAT=COMPRESSED block, or NULL for parsing only 477 @return end of log record 478 @retval NULL if the log record is incomplete */ 479 byte* page_zip_parse_compress(const byte* ptr, const byte* end_ptr, 480 buf_block_t* block); 481 482 #endif /* !UNIV_INNOCHECKSUM */ 483 484 /** Calculate the compressed page checksum. 485 @param[in] data compressed page 486 @param[in] size size of compressed page 487 @param[in] algo algorithm to use 488 @return page checksum */ 489 uint32_t 490 page_zip_calc_checksum( 491 const void* data, 492 ulint size, 493 srv_checksum_algorithm_t algo); 494 495 /** Validate the checksum on a ROW_FORMAT=COMPRESSED page. 496 @param data ROW_FORMAT=COMPRESSED page 497 @param size size of the page, in bytes 498 @return whether the stored checksum matches innodb_checksum_algorithm */ 499 bool page_zip_verify_checksum(const byte *data, size_t size); 500 501 #ifndef UNIV_INNOCHECKSUM 502 /**********************************************************************//** 503 Write a log record of compressing an index page without the data on the page. */ 504 UNIV_INLINE 505 void 506 page_zip_compress_write_log_no_data( 507 /*================================*/ 508 ulint level, /*!< in: compression level */ 509 const page_t* page, /*!< in: page that is compressed */ 510 dict_index_t* index, /*!< in: index */ 511 mtr_t* mtr); /*!< in: mtr */ 512 /**********************************************************************//** 513 Parses a log record of compressing an index page without the data. 514 @return end of log record or NULL */ 515 UNIV_INLINE 516 byte* 517 page_zip_parse_compress_no_data( 518 /*============================*/ 519 byte* ptr, /*!< in: buffer */ 520 byte* end_ptr, /*!< in: buffer end */ 521 page_t* page, /*!< in: uncompressed page */ 522 page_zip_des_t* page_zip, /*!< out: compressed page */ 523 dict_index_t* index) /*!< in: index */ 524 MY_ATTRIBUTE((nonnull(1,2))); 525 526 /**********************************************************************//** 527 Reset the counters used for filling 528 INFORMATION_SCHEMA.innodb_cmp_per_index. */ 529 UNIV_INLINE 530 void 531 page_zip_reset_stat_per_index(); 532 /*===========================*/ 533 534 #include "page0zip.inl" 535 #endif /* !UNIV_INNOCHECKSUM */ 536 537 #endif /* page0zip_h */ 538