1/***************************************************************************** 2 3Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved. 4 5This program is free software; you can redistribute it and/or modify it under 6the terms of the GNU General Public License as published by the Free Software 7Foundation; version 2 of the License. 8 9This program is distributed in the hope that it will be useful, but WITHOUT 10ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS 11FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. 12 13You should have received a copy of the GNU General Public License along with 14this program; if not, write to the Free Software Foundation, Inc., 1551 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 16 17*****************************************************************************/ 18 19/**************************************************//** 20@file include/page0zip.ic 21Compressed page interface 22 23Created June 2005 by Marko Makela 24*******************************************************/ 25 26#ifdef UNIV_MATERIALIZE 27# undef UNIV_INLINE 28# define UNIV_INLINE 29#endif 30 31#include "page0zip.h" 32#include "page0page.h" 33 34/* The format of compressed pages is as follows. 35 36The header and trailer of the uncompressed pages, excluding the page 37directory in the trailer, are copied as is to the header and trailer 38of the compressed page. 39 40At the end of the compressed page, there is a dense page directory 41pointing to every user record contained on the page, including deleted 42records on the free list. The dense directory is indexed in the 43collation order, i.e., in the order in which the record list is 44linked on the uncompressed page. The infimum and supremum records are 45excluded. The two most significant bits of the entries are allocated 46for the delete-mark and an n_owned flag indicating the last record in 47a chain of records pointed to from the sparse page directory on the 48uncompressed page. 49 50The data between PAGE_ZIP_START and the last page directory entry will 51be written in compressed format, starting at offset PAGE_DATA. 52Infimum and supremum records are not stored. We exclude the 53REC_N_NEW_EXTRA_BYTES in every record header. These can be recovered 54from the dense page directory stored at the end of the compressed 55page. 56 57The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and 58roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of 59externally stored columns are stored separately, in ascending order of 60heap_no and column index, starting backwards from the dense page 61directory. 62 63The compressed data stream may be followed by a modification log 64covering the compressed portion of the page, as follows. 65 66MODIFICATION LOG ENTRY FORMAT 67- write record: 68 - (heap_no - 1) << 1 (1..2 bytes) 69 - extra bytes backwards 70 - data bytes 71- clear record: 72 - (heap_no - 1) << 1 | 1 (1..2 bytes) 73 74The integer values are stored in a variable-length format: 75- 0xxxxxxx: 0..127 76- 1xxxxxxx xxxxxxxx: 0..32767 77 78The end of the modification log is marked by a 0 byte. 79 80In summary, the compressed page looks like this: 81 82(1) Uncompressed page header (PAGE_DATA bytes) 83(2) Compressed index information 84(3) Compressed page data 85(4) Page modification log (page_zip->m_start..page_zip->m_end) 86(5) Empty zero-filled space 87(6) BLOB pointers (on leaf pages) 88 - BTR_EXTERN_FIELD_REF_SIZE for each externally stored column 89 - in descending collation order 90(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes, 91 - indexed by heap_no 92 - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes 93 - REC_NODE_PTR_SIZE for non-leaf pages 94 - 0 otherwise 95(8) dense page directory, stored backwards 96 - n_dense = n_heap - 2 97 - existing records in ascending collation order 98 - deleted records (free list) in link order 99*/ 100 101/** Start offset of the area that will be compressed */ 102#define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END 103/** Size of an compressed page directory entry */ 104#define PAGE_ZIP_DIR_SLOT_SIZE 2 105/** Mask of record offsets */ 106#define PAGE_ZIP_DIR_SLOT_MASK 0x3fff 107/** 'owned' flag */ 108#define PAGE_ZIP_DIR_SLOT_OWNED 0x4000 109/** 'deleted' flag */ 110#define PAGE_ZIP_DIR_SLOT_DEL 0x8000 111 112/**********************************************************************//** 113Determine the size of a compressed page in bytes. 114@return size in bytes */ 115UNIV_INLINE 116ulint 117page_zip_get_size( 118/*==============*/ 119 const page_zip_des_t* page_zip) /*!< in: compressed page */ 120{ 121 ulint size; 122 123 if (UNIV_UNLIKELY(!page_zip->ssize)) { 124 return(0); 125 } 126 127 size = (PAGE_ZIP_MIN_SIZE >> 1) << page_zip->ssize; 128 129 ut_ad(size >= PAGE_ZIP_MIN_SIZE); 130 ut_ad(size <= UNIV_PAGE_SIZE); 131 132 return(size); 133} 134/**********************************************************************//** 135Set the size of a compressed page in bytes. */ 136UNIV_INLINE 137void 138page_zip_set_size( 139/*==============*/ 140 page_zip_des_t* page_zip, /*!< in/out: compressed page */ 141 ulint size) /*!< in: size in bytes */ 142{ 143 if (size) { 144 int ssize; 145 146 ut_ad(ut_is_2pow(size)); 147 148 for (ssize = 1; size > (ulint) (512 << ssize); ssize++) { 149 } 150 151 page_zip->ssize = ssize; 152 } else { 153 page_zip->ssize = 0; 154 } 155 156 ut_ad(page_zip_get_size(page_zip) == size); 157} 158 159#ifndef UNIV_HOTBACKUP 160/**********************************************************************//** 161Determine if a record is so big that it needs to be stored externally. 162@return FALSE if the entire record can be stored locally on the page */ 163UNIV_INLINE 164ibool 165page_zip_rec_needs_ext( 166/*===================*/ 167 ulint rec_size, /*!< in: length of the record in bytes */ 168 ulint comp, /*!< in: nonzero=compact format */ 169 ulint n_fields, /*!< in: number of fields in the record; 170 ignored if zip_size == 0 */ 171 ulint zip_size) /*!< in: compressed page size in bytes, or 0 */ 172{ 173 ut_ad(rec_size > comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES); 174 ut_ad(ut_is_2pow(zip_size)); 175 ut_ad(comp || !zip_size); 176 177#if UNIV_PAGE_SIZE > REC_MAX_DATA_SIZE 178 if (UNIV_UNLIKELY(rec_size >= REC_MAX_DATA_SIZE)) { 179 return(TRUE); 180 } 181#endif 182 183 if (UNIV_UNLIKELY(zip_size)) { 184 ut_ad(comp); 185 /* On a compressed page, there is a two-byte entry in 186 the dense page directory for every record. But there 187 is no record header. There should be enough room for 188 one record on an empty leaf page. Subtract 1 byte for 189 the encoded heap number. Check also the available space 190 on the uncompressed page. */ 191 return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2 - 1) 192 >= page_zip_empty_size(n_fields, zip_size) 193 || rec_size >= page_get_free_space_of_empty(TRUE) / 2); 194 } 195 196 return(rec_size >= page_get_free_space_of_empty(comp) / 2); 197} 198#endif /* !UNIV_HOTBACKUP */ 199 200#ifdef UNIV_DEBUG 201/**********************************************************************//** 202Validate a compressed page descriptor. 203@return TRUE if ok */ 204UNIV_INLINE 205ibool 206page_zip_simple_validate( 207/*=====================*/ 208 const page_zip_des_t* page_zip)/*!< in: compressed page descriptor */ 209{ 210 ut_ad(page_zip); 211 ut_ad(page_zip->data); 212 ut_ad(page_zip->ssize < PAGE_ZIP_NUM_SSIZE); 213 ut_ad(page_zip_get_size(page_zip) 214 > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE); 215 ut_ad(page_zip->m_start <= page_zip->m_end); 216 ut_ad(page_zip->m_end < page_zip_get_size(page_zip)); 217 ut_ad(page_zip->n_blobs 218 < page_zip_get_size(page_zip) / BTR_EXTERN_FIELD_REF_SIZE); 219 return(TRUE); 220} 221#endif /* UNIV_DEBUG */ 222 223/**********************************************************************//** 224Determine if the length of the page trailer. 225@return length of the page trailer, in bytes, not including the 226terminating zero byte of the modification log */ 227UNIV_INLINE 228ibool 229page_zip_get_trailer_len( 230/*=====================*/ 231 const page_zip_des_t* page_zip,/*!< in: compressed page */ 232 ibool is_clust)/*!< in: TRUE if clustered index */ 233{ 234 ulint uncompressed_size; 235 236 ut_ad(page_zip_simple_validate(page_zip)); 237 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); 238 239 if (UNIV_UNLIKELY(!page_is_leaf(page_zip->data))) { 240 uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE 241 + REC_NODE_PTR_SIZE; 242 ut_ad(!page_zip->n_blobs); 243 } else if (UNIV_UNLIKELY(is_clust)) { 244 uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE 245 + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN; 246 } else { 247 uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE; 248 ut_ad(!page_zip->n_blobs); 249 } 250 251 return((page_dir_get_n_heap(page_zip->data) - 2) 252 * uncompressed_size 253 + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE); 254} 255 256/**********************************************************************//** 257Determine how big record can be inserted without recompressing the page. 258@return a positive number indicating the maximum size of a record 259whose insertion is guaranteed to succeed, or zero or negative */ 260UNIV_INLINE 261lint 262page_zip_max_ins_size( 263/*==================*/ 264 const page_zip_des_t* page_zip,/*!< in: compressed page */ 265 ibool is_clust)/*!< in: TRUE if clustered index */ 266{ 267 ulint trailer_len; 268 269 trailer_len = page_zip_get_trailer_len(page_zip, is_clust); 270 271 /* When a record is created, a pointer may be added to 272 the dense directory. 273 Likewise, space for the columns that will not be 274 compressed will be allocated from the page trailer. 275 Also the BLOB pointers will be allocated from there, but 276 we may as well count them in the length of the record. */ 277 278 trailer_len += PAGE_ZIP_DIR_SLOT_SIZE; 279 280 return((lint) page_zip_get_size(page_zip) 281 - trailer_len - page_zip->m_end 282 - (REC_N_NEW_EXTRA_BYTES - 2)); 283} 284 285/**********************************************************************//** 286Determine if enough space is available in the modification log. 287@return TRUE if enough space is available */ 288UNIV_INLINE 289ibool 290page_zip_available( 291/*===============*/ 292 const page_zip_des_t* page_zip,/*!< in: compressed page */ 293 ibool is_clust,/*!< in: TRUE if clustered index */ 294 ulint length, /*!< in: combined size of the record */ 295 ulint create) /*!< in: nonzero=add the record to 296 the heap */ 297{ 298 ulint trailer_len; 299 300 ut_ad(length > REC_N_NEW_EXTRA_BYTES); 301 302 trailer_len = page_zip_get_trailer_len(page_zip, is_clust); 303 304 /* Subtract the fixed extra bytes and add the maximum 305 space needed for identifying the record (encoded heap_no). */ 306 length -= REC_N_NEW_EXTRA_BYTES - 2; 307 308 if (UNIV_UNLIKELY(create)) { 309 /* When a record is created, a pointer may be added to 310 the dense directory. 311 Likewise, space for the columns that will not be 312 compressed will be allocated from the page trailer. 313 Also the BLOB pointers will be allocated from there, but 314 we may as well count them in the length of the record. */ 315 316 trailer_len += PAGE_ZIP_DIR_SLOT_SIZE; 317 } 318 319 return(UNIV_LIKELY(length 320 + trailer_len 321 + page_zip->m_end 322 < page_zip_get_size(page_zip))); 323} 324 325/**********************************************************************//** 326Initialize a compressed page descriptor. */ 327UNIV_INLINE 328void 329page_zip_des_init( 330/*==============*/ 331 page_zip_des_t* page_zip) /*!< in/out: compressed page 332 descriptor */ 333{ 334 memset(page_zip, 0, sizeof *page_zip); 335} 336 337/**********************************************************************//** 338Write a log record of writing to the uncompressed header portion of a page. */ 339UNIV_INTERN 340void 341page_zip_write_header_log( 342/*======================*/ 343 const byte* data,/*!< in: data on the uncompressed page */ 344 ulint length, /*!< in: length of the data */ 345 mtr_t* mtr); /*!< in: mini-transaction */ 346 347/**********************************************************************//** 348Write data to the uncompressed header portion of a page. The data must 349already have been written to the uncompressed page. 350However, the data portion of the uncompressed page may differ from 351the compressed page when a record is being inserted in 352page_cur_insert_rec_zip(). */ 353UNIV_INLINE 354void 355page_zip_write_header( 356/*==================*/ 357 page_zip_des_t* page_zip,/*!< in/out: compressed page */ 358 const byte* str, /*!< in: address on the uncompressed page */ 359 ulint length, /*!< in: length of the data */ 360 mtr_t* mtr) /*!< in: mini-transaction, or NULL */ 361{ 362 ulint pos; 363 364 ut_ad(PAGE_ZIP_MATCH(str, page_zip)); 365 ut_ad(page_zip_simple_validate(page_zip)); 366 UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip)); 367 368 pos = page_offset(str); 369 370 ut_ad(pos < PAGE_DATA); 371 372 memcpy(page_zip->data + pos, str, length); 373 374 /* The following would fail in page_cur_insert_rec_zip(). */ 375 /* ut_ad(page_zip_validate(page_zip, str - pos)); */ 376 377 if (UNIV_LIKELY_NULL(mtr)) { 378#ifndef UNIV_HOTBACKUP 379 page_zip_write_header_log(str, length, mtr); 380#endif /* !UNIV_HOTBACKUP */ 381 } 382} 383 384#ifdef UNIV_MATERIALIZE 385# undef UNIV_INLINE 386# define UNIV_INLINE UNIV_INLINE_ORIGINAL 387#endif 388