1 /*****************************************************************************
2 
3 Copyright (c) 2005, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2012, Facebook Inc.
5 
6 This program is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License, version 2.0, as published by the
8 Free Software Foundation.
9 
10 This program is also distributed with certain software (including but not
11 limited to OpenSSL) that is licensed under separate terms, as designated in a
12 particular file or component or in included license documentation. The authors
13 of MySQL hereby grant you an additional permission to link the program and
14 your derivative works with the separately licensed software that they have
15 included with MySQL.
16 
17 This program is distributed in the hope that it will be useful, but WITHOUT
18 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
20 for more details.
21 
22 You should have received a copy of the GNU General Public License along with
23 this program; if not, write to the Free Software Foundation, Inc.,
24 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
25 
26 *****************************************************************************/
27 
28 /** @file include/page0zip.h
29  Compressed page interface
30 
31  Created June 2005 by Marko Makela
32  *******************************************************/
33 
34 #ifndef page0zip_h
35 #define page0zip_h
36 
37 #ifdef UNIV_MATERIALIZE
38 #undef UNIV_INLINE
39 #define UNIV_INLINE
40 #endif
41 
42 #include <sys/types.h>
43 #include <zlib.h>
44 
45 #include "buf0buf.h"
46 #include "buf0checksum.h"
47 #include "buf0types.h"
48 #include "dict0types.h"
49 #include "mach0data.h"
50 #include "mem0mem.h"
51 #include "mtr0types.h"
52 #include "page/zipdecompress.h"
53 #include "page0types.h"
54 #include "srv0srv.h"
55 #include "trx0types.h"
56 #include "univ.i"
57 #include "ut0crc32.h"
58 
59 /* Compression level to be used by zlib. Settable by user. */
60 extern uint page_zip_level;
61 
62 /* Default compression level. */
63 #define DEFAULT_COMPRESSION_LEVEL 6
64 /** Start offset of the area that will be compressed */
65 #define PAGE_ZIP_START PAGE_NEW_SUPREMUM_END
66 /** Size of an compressed page directory entry */
67 #define PAGE_ZIP_DIR_SLOT_SIZE 2
68 /** Predefine the sum of DIR_SLOT, TRX_ID & ROLL_PTR */
69 #define PAGE_ZIP_CLUST_LEAF_SLOT_SIZE \
70   (PAGE_ZIP_DIR_SLOT_SIZE + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN)
71 /** Mask of record offsets */
72 #define PAGE_ZIP_DIR_SLOT_MASK 0x3fff
73 /** 'owned' flag */
74 #define PAGE_ZIP_DIR_SLOT_OWNED 0x4000
75 /** 'deleted' flag */
76 #define PAGE_ZIP_DIR_SLOT_DEL 0x8000
77 
78 /* Whether or not to log compressed page images to avoid possible
79 compression algorithm changes in zlib. */
80 extern bool page_zip_log_pages;
81 
82 /** Set the size of a compressed page in bytes.
83 @param[in,out]	page_zip	compressed page
84 @param[in]	size		size in bytes */
85 UNIV_INLINE
86 void page_zip_set_size(page_zip_des_t *page_zip, ulint size);
87 
88 /** Determine if a record is so big that it needs to be stored externally.
89 @param[in]	rec_size	length of the record in bytes
90 @param[in]	comp		nonzero=compact format
91 @param[in]	n_fields	number of fields in the record; ignored if
92 tablespace is not compressed
93 @param[in]	page_size	page size
94 @return false if the entire record can be stored locally on the page */
95 UNIV_INLINE
96 ibool page_zip_rec_needs_ext(ulint rec_size, ulint comp, ulint n_fields,
97                              const page_size_t &page_size)
98     MY_ATTRIBUTE((warn_unused_result));
99 
100 /** Determine the guaranteed free space on an empty page.
101 @param[in]  n_fields  number of columns in the index
102 @param[in]  zip_size  compressed page size in bytes
103 @return minimum payload size on the page */
104 ulint page_zip_empty_size(ulint n_fields, ulint zip_size);
105 
106 #ifndef UNIV_HOTBACKUP
107 /** Check whether a tuple is too big for compressed table
108 @param[in]	index	dict index object
109 @param[in]	entry	entry for the index
110 @return	true if it's too big, otherwise false */
111 bool page_zip_is_too_big(const dict_index_t *index, const dtuple_t *entry);
112 #endif /* !UNIV_HOTBACKUP */
113 
114 /** Initialize a compressed page descriptor. */
115 UNIV_INLINE
116 void page_zip_des_init(page_zip_des_t *page_zip); /*!< in/out: compressed page
117                                                   descriptor */
118 
119 /** Configure the zlib allocator to use the given memory heap. */
120 void page_zip_set_alloc(void *stream,      /*!< in/out: zlib stream */
121                         mem_heap_t *heap); /*!< in: memory heap to use */
122 
123 /** Compress a page.
124  @return true on success, false on failure; page_zip will be left
125  intact on failure. */
126 ibool page_zip_compress(page_zip_des_t *page_zip, /*!< in: size; out: data,
127                                                   n_blobs, m_start, m_end,
128                                                   m_nonempty */
129                         const page_t *page,       /*!< in: uncompressed page */
130                         dict_index_t *index,      /*!< in: index tree */
131                         ulint level,              /*!< in: commpression level */
132                         mtr_t *mtr);              /*!< in/out: mini-transaction,
133                                                   or NULL */
134 
135 /** Write the index information for the compressed page.
136  @return used size of buf */
137 ulint page_zip_fields_encode(
138     ulint n,                   /*!< in: number of fields
139                                to compress */
140     const dict_index_t *index, /*!< in: index comprising
141                                at least n fields */
142     ulint trx_id_pos,
143     /*!< in: position of the trx_id column
144     in the index, or ULINT_UNDEFINED if
145     this is a non-leaf page */
146     byte *buf); /*!< out: buffer of (n + 1) * 2 bytes */
147 
148 /** Decompress a page.  This function should tolerate errors on the compressed
149  page.  Instead of letting assertions fail, it will return FALSE if an
150  inconsistency is detected.
151  @return true on success, false on failure */
152 ibool page_zip_decompress(
153     page_zip_des_t *page_zip, /*!< in: data, ssize;
154                              out: m_start, m_end, m_nonempty, n_blobs */
155     page_t *page,             /*!< out: uncompressed page, may be trashed */
156     ibool all);               /*!< in: TRUE=decompress the whole page;
157                               FALSE=verify but do not copy some
158                               page header fields that should not change
159                               after page creation */
160 
161 #ifdef UNIV_ZIP_DEBUG
162 /** Check that the compressed and decompressed pages match.
163  @return true if valid, false if not */
164 ibool page_zip_validate_low(
165     const page_zip_des_t *page_zip, /*!< in: compressed page */
166     const page_t *page,             /*!< in: uncompressed page */
167     const dict_index_t *index,      /*!< in: index of the page, if known */
168     ibool sloppy);                  /*!< in: FALSE=strict,
169                             TRUE=ignore the MIN_REC_FLAG */
170 /** Check that the compressed and decompressed pages match. */
171 ibool page_zip_validate(
172     const page_zip_des_t *page_zip, /*!< in: compressed page */
173     const page_t *page,             /*!< in: uncompressed page */
174     const dict_index_t *index);     /*!< in: index of the page, if known */
175 #endif                              /* UNIV_ZIP_DEBUG */
176 
177 /** Determine how big record can be inserted without recompressing the page.
178  @return a positive number indicating the maximum size of a record
179  whose insertion is guaranteed to succeed, or zero or negative */
180 UNIV_INLINE
181 lint page_zip_max_ins_size(
182     const page_zip_des_t *page_zip, /*!< in: compressed page */
183     ibool is_clust)                 /*!< in: TRUE if clustered index */
184     MY_ATTRIBUTE((warn_unused_result));
185 
186 /** Determine if enough space is available in the modification log.
187  @return true if page_zip_write_rec() will succeed */
188 UNIV_INLINE
189 ibool page_zip_available(
190     const page_zip_des_t *page_zip, /*!< in: compressed page */
191     bool is_clust,                  /*!< in: TRUE if clustered index */
192     ulint length,                   /*!< in: combined size of the record */
193     ulint create)                   /*!< in: nonzero=add the record to
194                                     the heap */
195     MY_ATTRIBUTE((warn_unused_result));
196 
197 /** Write data to the uncompressed header portion of a page. The data must
198 already have been written to the uncompressed page.
199 @param[in,out]	page_zip	compressed page
200 @param[in]	str		address on the uncompressed page
201 @param[in]	length		length of the data
202 @param[in]	mtr		mini-transaction, or NULL */
203 UNIV_INLINE
204 void page_zip_write_header(page_zip_des_t *page_zip, const byte *str,
205                            ulint length, mtr_t *mtr);
206 
207 /** Write an entire record on the compressed page.  The data must already
208  have been written to the uncompressed page. */
209 void page_zip_write_rec(
210     page_zip_des_t *page_zip,  /*!< in/out: compressed page */
211     const byte *rec,           /*!< in: record being written */
212     const dict_index_t *index, /*!< in: the index the record belongs to */
213     const ulint *offsets,      /*!< in: rec_get_offsets(rec, index) */
214     ulint create);             /*!< in: nonzero=insert, zero=update */
215 
216 /** Parses a log record of writing a BLOB pointer of a record.
217  @return end of log record or NULL */
218 byte *page_zip_parse_write_blob_ptr(
219     byte *ptr,                 /*!< in: redo log buffer */
220     byte *end_ptr,             /*!< in: redo log buffer end */
221     page_t *page,              /*!< in/out: uncompressed page */
222     page_zip_des_t *page_zip); /*!< in/out: compressed page */
223 
224 /** Write a BLOB pointer of a record on the leaf page of a clustered index.
225  The information must already have been updated on the uncompressed page. */
226 void page_zip_write_blob_ptr(
227     page_zip_des_t *page_zip,  /*!< in/out: compressed page */
228     const byte *rec,           /*!< in/out: record whose data is being
229                                written */
230     const dict_index_t *index, /*!< in: index of the page */
231     const ulint *offsets,      /*!< in: rec_get_offsets(rec, index) */
232     ulint n,                   /*!< in: column index */
233     mtr_t *mtr);               /*!< in: mini-transaction handle,
234                        or NULL if no logging is needed */
235 
236 /** Parses a log record of writing the node pointer of a record.
237  @return end of log record or NULL */
238 byte *page_zip_parse_write_node_ptr(
239     byte *ptr,                 /*!< in: redo log buffer */
240     byte *end_ptr,             /*!< in: redo log buffer end */
241     page_t *page,              /*!< in/out: uncompressed page */
242     page_zip_des_t *page_zip); /*!< in/out: compressed page */
243 
244 /** Write the node pointer of a record on a non-leaf compressed page. */
245 void page_zip_write_node_ptr(
246     page_zip_des_t *page_zip, /*!< in/out: compressed page */
247     byte *rec,                /*!< in/out: record */
248     ulint size,               /*!< in: data size of rec */
249     ulint ptr,                /*!< in: node pointer */
250     mtr_t *mtr);              /*!< in: mini-transaction, or NULL */
251 
252 /** Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
253 void page_zip_write_trx_id_and_roll_ptr(
254     page_zip_des_t *page_zip, /*!< in/out: compressed page */
255     byte *rec,                /*!< in/out: record */
256     const ulint *offsets,     /*!< in: rec_get_offsets(rec, index) */
257     ulint trx_id_col,         /*!< in: column number of TRX_ID in rec */
258     trx_id_t trx_id,          /*!< in: transaction identifier */
259     roll_ptr_t roll_ptr);     /*!< in: roll_ptr */
260 
261 /** Write the "deleted" flag of a record on a compressed page.  The flag must
262  already have been written on the uncompressed page. */
263 void page_zip_rec_set_deleted(
264     page_zip_des_t *page_zip, /*!< in/out: compressed page */
265     const byte *rec,          /*!< in: record on the uncompressed page */
266     ulint flag);              /*!< in: the deleted flag (nonzero=TRUE) */
267 
268 /** Write the "owned" flag of a record on a compressed page.  The n_owned field
269  must already have been written on the uncompressed page. */
270 void page_zip_rec_set_owned(
271     page_zip_des_t *page_zip, /*!< in/out: compressed page */
272     const byte *rec,          /*!< in: record on the uncompressed page */
273     ulint flag);              /*!< in: the owned flag (nonzero=TRUE) */
274 
275 /** Insert a record to the dense page directory. */
276 void page_zip_dir_insert(
277     page_zip_des_t *page_zip, /*!< in/out: compressed page */
278     const byte *prev_rec,     /*!< in: record after which to insert */
279     const byte *free_rec,     /*!< in: record from which rec was
280                              allocated, or NULL */
281     byte *rec);               /*!< in: record to insert */
282 
283 /** Shift the dense page directory and the array of BLOB pointers when a record
284 is deleted.
285 @param[in,out]	page_zip	compressed page
286 @param[in]	rec		deleted record
287 @param[in]	index		index of rec
288 @param[in]	offsets		rec_get_offsets(rec)
289 @param[in]	free		previous start of the free list */
290 void page_zip_dir_delete(page_zip_des_t *page_zip, byte *rec,
291                          const dict_index_t *index, const ulint *offsets,
292                          const byte *free);
293 
294 /** Add a slot to the dense page directory. */
295 void page_zip_dir_add_slot(
296     page_zip_des_t *page_zip, /*!< in/out: compressed page */
297     bool is_clustered);       /*!< in: nonzero for clustered index,
298                               zero for others */
299 
300 /** Parses a log record of writing to the header of a page.
301  @return end of log record or NULL */
302 byte *page_zip_parse_write_header(
303     byte *ptr,                 /*!< in: redo log buffer */
304     byte *end_ptr,             /*!< in: redo log buffer end */
305     page_t *page,              /*!< in/out: uncompressed page */
306     page_zip_des_t *page_zip); /*!< in/out: compressed page */
307 
308 /** Write data to the uncompressed header portion of a page.  The data must
309 already have been written to the uncompressed page.
310 However, the data portion of the uncompressed page may differ from the
311 compressed page when a record is being inserted in page_cur_insert_rec_low().
312 @param[in,out]  page_zip        compressed page
313 @param[in]      str             address on the uncompressed page
314 @param[in]      length          length of the data
315 @param[in]      mtr             mini-transaction, or NULL */
316 UNIV_INLINE
317 void page_zip_write_header(page_zip_des_t *page_zip, const byte *str,
318                            ulint length, mtr_t *mtr);
319 
320 /** Reorganize and compress a page.  This is a low-level operation for
321  compressed pages, to be used when page_zip_compress() fails.
322  On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
323  The function btr_page_reorganize() should be preferred whenever possible.
324  IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
325  non-clustered index, the caller must update the insert buffer free
326  bits in the same mini-transaction in such a way that the modification
327  will be redo-logged.
328  @return true on success, false on failure; page_zip will be left
329  intact on failure, but page will be overwritten. */
330 ibool page_zip_reorganize(
331     buf_block_t *block,  /*!< in/out: page with compressed page;
332                          on the compressed page, in: size;
333                          out: data, n_blobs,
334                          m_start, m_end, m_nonempty */
335     dict_index_t *index, /*!< in: index of the B-tree node */
336     mtr_t *mtr);         /*!< in: mini-transaction */
337 /** Copy the records of a page byte for byte.  Do not copy the page header
338  or trailer, except those B-tree header fields that are directly
339  related to the storage of records.  Also copy PAGE_MAX_TRX_ID.
340  NOTE: The caller must update the lock table and the adaptive hash index. */
341 void page_zip_copy_recs(
342     page_zip_des_t *page_zip,      /*!< out: copy of src_zip
343                                    (n_blobs, m_start, m_end,
344                                    m_nonempty, data[0..size-1]) */
345     page_t *page,                  /*!< out: copy of src */
346     const page_zip_des_t *src_zip, /*!< in: compressed page */
347     const page_t *src,             /*!< in: page */
348     dict_index_t *index,           /*!< in: index of the B-tree */
349     mtr_t *mtr);                   /*!< in: mini-transaction */
350 #ifndef UNIV_HOTBACKUP
351 #endif /* !UNIV_HOTBACKUP */
352 
353 /** Parses a log record of compressing an index page.
354  @return end of log record or NULL */
355 byte *page_zip_parse_compress(
356     byte *ptr,                 /*!< in: buffer */
357     byte *end_ptr,             /*!< in: buffer end */
358     page_t *page,              /*!< out: uncompressed page */
359     page_zip_des_t *page_zip); /*!< out: compressed page */
360 
361 /** Write a log record of compressing an index page without the data on the
362 page.
363 @param[in]	level	compression level
364 @param[in]	page	page that is compressed
365 @param[in]	index	index
366 @param[in]	mtr	mtr */
367 UNIV_INLINE
368 void page_zip_compress_write_log_no_data(ulint level, const page_t *page,
369                                          dict_index_t *index, mtr_t *mtr);
370 
371 /** Parses a log record of compressing an index page without the data.
372 @param[in]	ptr		buffer
373 @param[in]	end_ptr		buffer end
374 @param[in]	page		uncompressed page
375 @param[out]	page_zip	compressed page
376 @param[in]	index		index
377 @return end of log record or NULL */
378 UNIV_INLINE
379 byte *page_zip_parse_compress_no_data(byte *ptr, byte *end_ptr, page_t *page,
380                                       page_zip_des_t *page_zip,
381                                       dict_index_t *index);
382 
383 #ifndef UNIV_HOTBACKUP
384 /** Reset the counters used for filling
385  INFORMATION_SCHEMA.innodb_cmp_per_index. */
386 UNIV_INLINE
387 void page_zip_reset_stat_per_index();
388 
389 #ifdef UNIV_MATERIALIZE
390 #undef UNIV_INLINE
391 #define UNIV_INLINE UNIV_INLINE_ORIGINAL
392 #endif
393 #endif /* !UNIV_HOTBACKUP */
394 
395 #include "page0zip.ic"
396 
397 #endif /* page0zip_h */
398