1 /*****************************************************************************
2 
3 Copyright (c) 2005, 2021, Oracle and/or its affiliates.
4 Copyright (c) 2012, Facebook Inc.
5 
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License, version 2.0,
8 as published by the Free Software Foundation.
9 
10 This program is also distributed with certain software (including
11 but not limited to OpenSSL) that is licensed under separate terms,
12 as designated in a particular file or component or in included license
13 documentation.  The authors of MySQL hereby grant you an additional
14 permission to link the program and your derivative works with the
15 separately licensed software that they have included with MySQL.
16 
17 This program is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20 GNU General Public License, version 2.0, for more details.
21 
22 You should have received a copy of the GNU General Public License along with
23 this program; if not, write to the Free Software Foundation, Inc.,
24 51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
25 
26 *****************************************************************************/
27 
28 /**************************************************//**
29 @file include/page0zip.h
30 Compressed page interface
31 
32 Created June 2005 by Marko Makela
33 *******************************************************/
34 
35 #ifndef page0zip_h
36 #define page0zip_h
37 
38 #ifdef UNIV_MATERIALIZE
39 # undef UNIV_INLINE
40 # define UNIV_INLINE
41 #endif
42 
43 #ifdef UNIV_INNOCHECKSUM
44 #include "univ.i"
45 #include "buf0buf.h"
46 #include "ut0crc32.h"
47 #include "buf0checksum.h"
48 #include "mach0data.h"
49 #include "zlib.h"
50 #endif /* UNIV_INNOCHECKSUM */
51 
52 #ifndef UNIV_INNOCHECKSUM
53 #include "mtr0types.h"
54 #include "page0types.h"
55 #endif /* !UNIV_INNOCHECKSUM */
56 
57 #include "buf0types.h"
58 
59 #ifndef UNIV_INNOCHECKSUM
60 #include "dict0types.h"
61 #include "srv0srv.h"
62 #include "trx0types.h"
63 #include "mem0mem.h"
64 
65 /* Compression level to be used by zlib. Settable by user. */
66 extern uint	page_zip_level;
67 
68 /* Default compression level. */
69 #define DEFAULT_COMPRESSION_LEVEL	6
70 /** Start offset of the area that will be compressed */
71 #define PAGE_ZIP_START			PAGE_NEW_SUPREMUM_END
72 /** Size of an compressed page directory entry */
73 #define PAGE_ZIP_DIR_SLOT_SIZE		2
74 /** Predefine the sum of DIR_SLOT, TRX_ID & ROLL_PTR */
75 #define PAGE_ZIP_CLUST_LEAF_SLOT_SIZE		\
76 		(PAGE_ZIP_DIR_SLOT_SIZE		\
77 		+ DATA_TRX_ID_LEN		\
78 		+ DATA_ROLL_PTR_LEN)
79 /** Mask of record offsets */
80 #define PAGE_ZIP_DIR_SLOT_MASK		0x3fff
81 /** 'owned' flag */
82 #define PAGE_ZIP_DIR_SLOT_OWNED		0x4000
83 /** 'deleted' flag */
84 #define PAGE_ZIP_DIR_SLOT_DEL		0x8000
85 
86 /* Whether or not to log compressed page images to avoid possible
87 compression algorithm changes in zlib. */
88 extern my_bool	page_zip_log_pages;
89 
90 /**********************************************************************//**
91 Determine the size of a compressed page in bytes.
92 @return size in bytes */
93 UNIV_INLINE
94 ulint
95 page_zip_get_size(
96 /*==============*/
97 	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
98 	MY_ATTRIBUTE((warn_unused_result));
99 /**********************************************************************//**
100 Set the size of a compressed page in bytes. */
101 UNIV_INLINE
102 void
103 page_zip_set_size(
104 /*==============*/
105 	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
106 	ulint		size);		/*!< in: size in bytes */
107 
108 #ifndef UNIV_HOTBACKUP
109 /** Determine if a record is so big that it needs to be stored externally.
110 @param[in]	rec_size	length of the record in bytes
111 @param[in]	comp		nonzero=compact format
112 @param[in]	n_fields	number of fields in the record; ignored if
113 tablespace is not compressed
114 @param[in]	page_size	page size
115 @return FALSE if the entire record can be stored locally on the page */
116 UNIV_INLINE
117 ibool
118 page_zip_rec_needs_ext(
119 	ulint			rec_size,
120 	ulint			comp,
121 	ulint			n_fields,
122 	const page_size_t&	page_size)
123 	MY_ATTRIBUTE((warn_unused_result));
124 
125 /**********************************************************************//**
126 Determine the guaranteed free space on an empty page.
127 @return minimum payload size on the page */
128 ulint
129 page_zip_empty_size(
130 /*================*/
131 	ulint	n_fields,	/*!< in: number of columns in the index */
132 	ulint	zip_size)	/*!< in: compressed page size in bytes */
133 	MY_ATTRIBUTE((const));
134 
135 /** Check whether a tuple is too big for compressed table
136 @param[in]	index	dict index object
137 @param[in]	entry	entry for the index
138 @return	true if it's too big, otherwise false */
139 bool
140 page_zip_is_too_big(
141 	const dict_index_t*	index,
142 	const dtuple_t*		entry);
143 #endif /* !UNIV_HOTBACKUP */
144 
145 /**********************************************************************//**
146 Initialize a compressed page descriptor. */
147 UNIV_INLINE
148 void
149 page_zip_des_init(
150 /*==============*/
151 	page_zip_des_t*	page_zip);	/*!< in/out: compressed page
152 					descriptor */
153 
154 /**********************************************************************//**
155 Configure the zlib allocator to use the given memory heap. */
156 void
157 page_zip_set_alloc(
158 /*===============*/
159 	void*		stream,		/*!< in/out: zlib stream */
160 	mem_heap_t*	heap);		/*!< in: memory heap to use */
161 
162 /**********************************************************************//**
163 Compress a page.
164 @return TRUE on success, FALSE on failure; page_zip will be left
165 intact on failure. */
166 ibool
167 page_zip_compress(
168 /*==============*/
169 	page_zip_des_t*		page_zip,	/*!< in: size; out: data,
170 						n_blobs, m_start, m_end,
171 						m_nonempty */
172 	const page_t*		page,		/*!< in: uncompressed page */
173 	dict_index_t*		index,		/*!< in: index of the B-tree
174 						node */
175 	ulint			level,		/*!< in: commpression level */
176 	const redo_page_compress_t* page_comp_info,
177 						/*!< in: used for applying
178 						TRUNCATE log
179 						record during recovery */
180 	mtr_t*			mtr)		/*!< in/out: mini-transaction,
181 						or NULL */
182 	MY_ATTRIBUTE((warn_unused_result));
183 
184 /**********************************************************************//**
185 Write the index information for the compressed page.
186 @return used size of buf */
187 ulint
188 page_zip_fields_encode(
189 /*===================*/
190 	ulint			n,	/*!< in: number of fields
191 					to compress */
192 	const dict_index_t*	index,	/*!< in: index comprising
193 					at least n fields */
194 	ulint			trx_id_pos,
195 					/*!< in: position of the trx_id column
196 					in the index, or ULINT_UNDEFINED if
197 					this is a non-leaf page */
198 	byte*			buf);	/*!< out: buffer of (n + 1) * 2 bytes */
199 
200 /**********************************************************************//**
201 Decompress a page.  This function should tolerate errors on the compressed
202 page.  Instead of letting assertions fail, it will return FALSE if an
203 inconsistency is detected.
204 @return TRUE on success, FALSE on failure */
205 ibool
206 page_zip_decompress(
207 /*================*/
208 	page_zip_des_t*	page_zip,/*!< in: data, ssize;
209 				out: m_start, m_end, m_nonempty, n_blobs */
210 	page_t*		page,	/*!< out: uncompressed page, may be trashed */
211 	ibool		all)	/*!< in: TRUE=decompress the whole page;
212 				FALSE=verify but do not copy some
213 				page header fields that should not change
214 				after page creation */
215 	MY_ATTRIBUTE((nonnull(1,2)));
216 
217 #ifdef UNIV_DEBUG
218 /**********************************************************************//**
219 Validate a compressed page descriptor.
220 @return TRUE if ok */
221 UNIV_INLINE
222 ibool
223 page_zip_simple_validate(
224 /*=====================*/
225 	const page_zip_des_t*	page_zip);	/*!< in: compressed page
226 						descriptor */
227 #endif /* UNIV_DEBUG */
228 
229 #ifdef UNIV_ZIP_DEBUG
230 /**********************************************************************//**
231 Check that the compressed and decompressed pages match.
232 @return TRUE if valid, FALSE if not */
233 ibool
234 page_zip_validate_low(
235 /*==================*/
236 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
237 	const page_t*		page,	/*!< in: uncompressed page */
238 	const dict_index_t*	index,	/*!< in: index of the page, if known */
239 	ibool			sloppy)	/*!< in: FALSE=strict,
240 					TRUE=ignore the MIN_REC_FLAG */
241 	MY_ATTRIBUTE((nonnull(1,2)));
242 /**********************************************************************//**
243 Check that the compressed and decompressed pages match. */
244 ibool
245 page_zip_validate(
246 /*==============*/
247 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
248 	const page_t*		page,	/*!< in: uncompressed page */
249 	const dict_index_t*	index)	/*!< in: index of the page, if known */
250 	MY_ATTRIBUTE((nonnull(1,2)));
251 #endif /* UNIV_ZIP_DEBUG */
252 
253 /**********************************************************************//**
254 Determine how big record can be inserted without recompressing the page.
255 @return a positive number indicating the maximum size of a record
256 whose insertion is guaranteed to succeed, or zero or negative */
257 UNIV_INLINE
258 lint
259 page_zip_max_ins_size(
260 /*==================*/
261 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
262 	ibool			is_clust)/*!< in: TRUE if clustered index */
263 	MY_ATTRIBUTE((warn_unused_result));
264 
265 /**********************************************************************//**
266 Determine if enough space is available in the modification log.
267 @return TRUE if page_zip_write_rec() will succeed */
268 UNIV_INLINE
269 ibool
270 page_zip_available(
271 /*===============*/
272 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
273 	ibool			is_clust,/*!< in: TRUE if clustered index */
274 	ulint			length,	/*!< in: combined size of the record */
275 	ulint			create)	/*!< in: nonzero=add the record to
276 					the heap */
277 	MY_ATTRIBUTE((warn_unused_result));
278 
279 /**********************************************************************//**
280 Write data to the uncompressed header portion of a page.  The data must
281 already have been written to the uncompressed page. */
282 UNIV_INLINE
283 void
284 page_zip_write_header(
285 /*==================*/
286 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
287 	const byte*	str,	/*!< in: address on the uncompressed page */
288 	ulint		length,	/*!< in: length of the data */
289 	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
290 	MY_ATTRIBUTE((nonnull(1,2)));
291 
292 /**********************************************************************//**
293 Write an entire record on the compressed page.  The data must already
294 have been written to the uncompressed page. */
295 void
296 page_zip_write_rec(
297 /*===============*/
298 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
299 	const byte*	rec,	/*!< in: record being written */
300 	dict_index_t*	index,	/*!< in: the index the record belongs to */
301 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
302 	ulint		create)	/*!< in: nonzero=insert, zero=update */
303 	MY_ATTRIBUTE((nonnull));
304 
305 /***********************************************************//**
306 Parses a log record of writing a BLOB pointer of a record.
307 @return end of log record or NULL */
308 byte*
309 page_zip_parse_write_blob_ptr(
310 /*==========================*/
311 	byte*		ptr,	/*!< in: redo log buffer */
312 	byte*		end_ptr,/*!< in: redo log buffer end */
313 	page_t*		page,	/*!< in/out: uncompressed page */
314 	page_zip_des_t*	page_zip);/*!< in/out: compressed page */
315 
316 /**********************************************************************//**
317 Write a BLOB pointer of a record on the leaf page of a clustered index.
318 The information must already have been updated on the uncompressed page. */
319 void
320 page_zip_write_blob_ptr(
321 /*====================*/
322 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
323 	const byte*	rec,	/*!< in/out: record whose data is being
324 				written */
325 	dict_index_t*	index,	/*!< in: index of the page */
326 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
327 	ulint		n,	/*!< in: column index */
328 	mtr_t*		mtr);	/*!< in: mini-transaction handle,
329 				or NULL if no logging is needed */
330 
331 /***********************************************************//**
332 Parses a log record of writing the node pointer of a record.
333 @return end of log record or NULL */
334 byte*
335 page_zip_parse_write_node_ptr(
336 /*==========================*/
337 	byte*		ptr,	/*!< in: redo log buffer */
338 	byte*		end_ptr,/*!< in: redo log buffer end */
339 	page_t*		page,	/*!< in/out: uncompressed page */
340 	page_zip_des_t*	page_zip);/*!< in/out: compressed page */
341 
342 /**********************************************************************//**
343 Write the node pointer of a record on a non-leaf compressed page. */
344 void
345 page_zip_write_node_ptr(
346 /*====================*/
347 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
348 	byte*		rec,	/*!< in/out: record */
349 	ulint		size,	/*!< in: data size of rec */
350 	ulint		ptr,	/*!< in: node pointer */
351 	mtr_t*		mtr);	/*!< in: mini-transaction, or NULL */
352 
353 /**********************************************************************//**
354 Write the trx_id and roll_ptr of a record on a B-tree leaf node page. */
355 void
356 page_zip_write_trx_id_and_roll_ptr(
357 /*===============================*/
358 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
359 	byte*		rec,	/*!< in/out: record */
360 	const ulint*	offsets,/*!< in: rec_get_offsets(rec, index) */
361 	ulint		trx_id_col,/*!< in: column number of TRX_ID in rec */
362 	trx_id_t	trx_id,	/*!< in: transaction identifier */
363 	roll_ptr_t	roll_ptr)/*!< in: roll_ptr */
364 	MY_ATTRIBUTE((nonnull));
365 
366 /**********************************************************************//**
367 Write the "deleted" flag of a record on a compressed page.  The flag must
368 already have been written on the uncompressed page. */
369 void
370 page_zip_rec_set_deleted(
371 /*=====================*/
372 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
373 	const byte*	rec,	/*!< in: record on the uncompressed page */
374 	ulint		flag)	/*!< in: the deleted flag (nonzero=TRUE) */
375 	MY_ATTRIBUTE((nonnull));
376 
377 /**********************************************************************//**
378 Write the "owned" flag of a record on a compressed page.  The n_owned field
379 must already have been written on the uncompressed page. */
380 void
381 page_zip_rec_set_owned(
382 /*===================*/
383 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
384 	const byte*	rec,	/*!< in: record on the uncompressed page */
385 	ulint		flag)	/*!< in: the owned flag (nonzero=TRUE) */
386 	MY_ATTRIBUTE((nonnull));
387 
388 /**********************************************************************//**
389 Insert a record to the dense page directory. */
390 void
391 page_zip_dir_insert(
392 /*================*/
393 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
394 	const byte*	prev_rec,/*!< in: record after which to insert */
395 	const byte*	free_rec,/*!< in: record from which rec was
396 				allocated, or NULL */
397 	byte*		rec);	/*!< in: record to insert */
398 
399 /**********************************************************************//**
400 Shift the dense page directory and the array of BLOB pointers
401 when a record is deleted. */
402 void
403 page_zip_dir_delete(
404 /*================*/
405 	page_zip_des_t*		page_zip,	/*!< in/out: compressed page */
406 	byte*			rec,		/*!< in: deleted record */
407 	const dict_index_t*	index,		/*!< in: index of rec */
408 	const ulint*		offsets,	/*!< in: rec_get_offsets(rec) */
409 	const byte*		free)		/*!< in: previous start of
410 						the free list */
411 	MY_ATTRIBUTE((nonnull(1,2,3,4)));
412 
413 /**********************************************************************//**
414 Add a slot to the dense page directory. */
415 void
416 page_zip_dir_add_slot(
417 /*==================*/
418 	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
419 	ulint		is_clustered)	/*!< in: nonzero for clustered index,
420 					zero for others */
421 	MY_ATTRIBUTE((nonnull));
422 
423 /***********************************************************//**
424 Parses a log record of writing to the header of a page.
425 @return end of log record or NULL */
426 byte*
427 page_zip_parse_write_header(
428 /*========================*/
429 	byte*		ptr,	/*!< in: redo log buffer */
430 	byte*		end_ptr,/*!< in: redo log buffer end */
431 	page_t*		page,	/*!< in/out: uncompressed page */
432 	page_zip_des_t*	page_zip);/*!< in/out: compressed page */
433 
434 /**********************************************************************//**
435 Write data to the uncompressed header portion of a page.  The data must
436 already have been written to the uncompressed page.
437 However, the data portion of the uncompressed page may differ from
438 the compressed page when a record is being inserted in
439 page_cur_insert_rec_low(). */
440 UNIV_INLINE
441 void
442 page_zip_write_header(
443 /*==================*/
444 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
445 	const byte*	str,	/*!< in: address on the uncompressed page */
446 	ulint		length,	/*!< in: length of the data */
447 	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
448 	MY_ATTRIBUTE((nonnull(1,2)));
449 
450 /**********************************************************************//**
451 Reorganize and compress a page.  This is a low-level operation for
452 compressed pages, to be used when page_zip_compress() fails.
453 On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
454 The function btr_page_reorganize() should be preferred whenever possible.
455 IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
456 non-clustered index, the caller must update the insert buffer free
457 bits in the same mini-transaction in such a way that the modification
458 will be redo-logged.
459 @return TRUE on success, FALSE on failure; page_zip will be left
460 intact on failure, but page will be overwritten. */
461 ibool
462 page_zip_reorganize(
463 /*================*/
464 	buf_block_t*	block,	/*!< in/out: page with compressed page;
465 				on the compressed page, in: size;
466 				out: data, n_blobs,
467 				m_start, m_end, m_nonempty */
468 	dict_index_t*	index,	/*!< in: index of the B-tree node */
469 	mtr_t*		mtr);	/*!< in: mini-transaction */
470 #ifndef UNIV_HOTBACKUP
471 /**********************************************************************//**
472 Copy the records of a page byte for byte.  Do not copy the page header
473 or trailer, except those B-tree header fields that are directly
474 related to the storage of records.  Also copy PAGE_MAX_TRX_ID.
475 NOTE: The caller must update the lock table and the adaptive hash index. */
476 void
477 page_zip_copy_recs(
478 /*===============*/
479 	page_zip_des_t*		page_zip,	/*!< out: copy of src_zip
480 						(n_blobs, m_start, m_end,
481 						m_nonempty, data[0..size-1]) */
482 	page_t*			page,		/*!< out: copy of src */
483 	const page_zip_des_t*	src_zip,	/*!< in: compressed page */
484 	const page_t*		src,		/*!< in: page */
485 	dict_index_t*		index,		/*!< in: index of the B-tree */
486 	mtr_t*			mtr);		/*!< in: mini-transaction */
487 #endif /* !UNIV_HOTBACKUP */
488 
489 /**********************************************************************//**
490 Parses a log record of compressing an index page.
491 @return end of log record or NULL */
492 byte*
493 page_zip_parse_compress(
494 /*====================*/
495 	byte*		ptr,		/*!< in: buffer */
496 	byte*		end_ptr,	/*!< in: buffer end */
497 	page_t*		page,		/*!< out: uncompressed page */
498 	page_zip_des_t*	page_zip)	/*!< out: compressed page */
499 	MY_ATTRIBUTE((warn_unused_result));
500 
501 #endif /* !UNIV_INNOCHECKSUM */
502 
503 /** Calculate the compressed page checksum.
504 @param[in]	data			compressed page
505 @param[in]	size			size of compressed page
506 @param[in]	algo			algorithm to use
507 @param[in]	use_legacy_big_endian	only used if algo is
508 SRV_CHECKSUM_ALGORITHM_CRC32 or SRV_CHECKSUM_ALGORITHM_STRICT_CRC32 - if true
509 then use big endian byteorder when converting byte strings to integers.
510 @return page checksum */
511 uint32_t
512 page_zip_calc_checksum(
513 	const void*			data,
514 	ulint				size,
515 	srv_checksum_algorithm_t	algo,
516 	bool				use_legacy_big_endian = false);
517 
518 /**********************************************************************//**
519 Verify a compressed page's checksum.
520 @return TRUE if the stored checksum is valid according to the value of
521 innodb_checksum_algorithm */
522 ibool
523 page_zip_verify_checksum(
524 /*=====================*/
525 	const void*	data,	/*!< in: compressed page */
526 	ulint		size	/*!< in: size of compressed page */
527 #ifdef UNIV_INNOCHECKSUM
528 	/* these variables are used only for innochecksum tool. */
529 	,uintmax_t	page_no,	/*!< in: page number of
530 					given read_buf */
531 	bool		strict_check,	/*!< in: true if strict-check
532 					option is enable */
533 	bool		is_log_enabled, /*!< in: true if log option is
534 					enable */
535 	FILE*		log_file	/*!< in: file pointer to
536 					log_file */
537 #endif /* UNIV_INNOCHECKSUM */
538 );
539 
540 #ifndef UNIV_INNOCHECKSUM
541 /**********************************************************************//**
542 Write a log record of compressing an index page without the data on the page. */
543 UNIV_INLINE
544 void
545 page_zip_compress_write_log_no_data(
546 /*================================*/
547 	ulint		level,	/*!< in: compression level */
548 	const page_t*	page,	/*!< in: page that is compressed */
549 	dict_index_t*	index,	/*!< in: index */
550 	mtr_t*		mtr);	/*!< in: mtr */
551 /**********************************************************************//**
552 Parses a log record of compressing an index page without the data.
553 @return end of log record or NULL */
554 UNIV_INLINE
555 byte*
556 page_zip_parse_compress_no_data(
557 /*============================*/
558 	byte*		ptr,		/*!< in: buffer */
559 	byte*		end_ptr,	/*!< in: buffer end */
560 	page_t*		page,		/*!< in: uncompressed page */
561 	page_zip_des_t*	page_zip,	/*!< out: compressed page */
562 	dict_index_t*	index)		/*!< in: index */
563 	MY_ATTRIBUTE((nonnull(1,2)));
564 
565 /**********************************************************************//**
566 Reset the counters used for filling
567 INFORMATION_SCHEMA.innodb_cmp_per_index. */
568 UNIV_INLINE
569 void
570 page_zip_reset_stat_per_index();
571 /*===========================*/
572 
573 #ifdef UNIV_MATERIALIZE
574 # undef UNIV_INLINE
575 # define UNIV_INLINE	UNIV_INLINE_ORIGINAL
576 #endif
577 
578 #ifndef UNIV_NONINL
579 # include "page0zip.ic"
580 #endif
581 #endif /* !UNIV_INNOCHECKSUM */
582 
583 #endif /* page0zip_h */
584