1 /*****************************************************************************
2 
3 Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2012, Facebook Inc.
5 Copyright (c) 2017, 2020, MariaDB Corporation.
6 
7 This program is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free Software
9 Foundation; version 2 of the License.
10 
11 This program is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
18 
19 *****************************************************************************/
20 
21 /**************************************************//**
22 @file include/page0zip.h
23 Compressed page interface
24 
25 Created June 2005 by Marko Makela
26 *******************************************************/
27 
28 #ifndef page0zip_h
29 #define page0zip_h
30 
31 #include "buf0types.h"
32 
33 #ifndef UNIV_INNOCHECKSUM
34 #include "mtr0types.h"
35 #include "page0types.h"
36 #include "dict0types.h"
37 #include "srv0srv.h"
38 #include "trx0types.h"
39 #include "mem0mem.h"
40 
41 /* Compression level to be used by zlib. Settable by user. */
42 extern uint	page_zip_level;
43 
44 /* Default compression level. */
45 #define DEFAULT_COMPRESSION_LEVEL	6
46 /** Start offset of the area that will be compressed */
47 #define PAGE_ZIP_START			PAGE_NEW_SUPREMUM_END
48 /** Size of an compressed page directory entry */
49 #define PAGE_ZIP_DIR_SLOT_SIZE		2
50 /** Predefine the sum of DIR_SLOT, TRX_ID & ROLL_PTR */
51 #define PAGE_ZIP_CLUST_LEAF_SLOT_SIZE		\
52 		(PAGE_ZIP_DIR_SLOT_SIZE		\
53 		+ DATA_TRX_ID_LEN		\
54 		+ DATA_ROLL_PTR_LEN)
55 /** Mask of record offsets */
56 #define PAGE_ZIP_DIR_SLOT_MASK		0x3fffU
57 /** 'owned' flag */
58 #define PAGE_ZIP_DIR_SLOT_OWNED		0x4000U
59 /** 'deleted' flag */
60 #define PAGE_ZIP_DIR_SLOT_DEL		0x8000U
61 
62 /* Whether or not to log compressed page images to avoid possible
63 compression algorithm changes in zlib. */
64 extern my_bool	page_zip_log_pages;
65 
66 /**********************************************************************//**
67 Determine the size of a compressed page in bytes.
68 @return size in bytes */
69 UNIV_INLINE
70 ulint
71 page_zip_get_size(
72 /*==============*/
73 	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
74 	MY_ATTRIBUTE((warn_unused_result));
75 /**********************************************************************//**
76 Set the size of a compressed page in bytes. */
77 UNIV_INLINE
78 void
79 page_zip_set_size(
80 /*==============*/
81 	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
82 	ulint		size);		/*!< in: size in bytes */
83 
84 /** Determine if a record is so big that it needs to be stored externally.
85 @param[in]	rec_size	length of the record in bytes
86 @param[in]	comp		nonzero=compact format
87 @param[in]	n_fields	number of fields in the record; ignored if
88 tablespace is not compressed
89 @param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
90 @return false if the entire record can be stored locally on the page */
91 inline bool page_zip_rec_needs_ext(ulint rec_size, ulint comp, ulint n_fields,
92 				   ulint zip_size)
93 	MY_ATTRIBUTE((warn_unused_result));
94 
95 /**********************************************************************//**
96 Determine the guaranteed free space on an empty page.
97 @return minimum payload size on the page */
98 ulint
99 page_zip_empty_size(
100 /*================*/
101 	ulint	n_fields,	/*!< in: number of columns in the index */
102 	ulint	zip_size)	/*!< in: compressed page size in bytes */
103 	MY_ATTRIBUTE((const));
104 
105 /** Check whether a tuple is too big for compressed table
106 @param[in]	index	dict index object
107 @param[in]	entry	entry for the index
108 @return	true if it's too big, otherwise false */
109 bool
110 page_zip_is_too_big(
111 	const dict_index_t*	index,
112 	const dtuple_t*		entry);
113 
114 /**********************************************************************//**
115 Initialize a compressed page descriptor. */
116 UNIV_INLINE
117 void
118 page_zip_des_init(
119 /*==============*/
120 	page_zip_des_t*	page_zip);	/*!< in/out: compressed page
121 					descriptor */
122 
123 /**********************************************************************//**
124 Configure the zlib allocator to use the given memory heap. */
125 void
126 page_zip_set_alloc(
127 /*===============*/
128 	void*		stream,		/*!< in/out: zlib stream */
129 	mem_heap_t*	heap);		/*!< in: memory heap to use */
130 
131 /**********************************************************************//**
132 Compress a page.
133 @return TRUE on success, FALSE on failure; page_zip will be left
134 intact on failure. */
135 ibool
136 page_zip_compress(
137 /*==============*/
138 	page_zip_des_t*		page_zip,	/*!< in: size; out: data,
139 						n_blobs, m_start, m_end,
140 						m_nonempty */
141 	const page_t*		page,		/*!< in: uncompressed page */
142 	dict_index_t*		index,		/*!< in: index of the B-tree
143 						node */
144 	ulint			level,		/*!< in: commpression level */
145 	mtr_t*			mtr);		/*!< in/out: mini-transaction,
146 						or NULL */
147 
148 /**********************************************************************//**
149 Write the index information for the compressed page.
150 @return used size of buf */
151 ulint
152 page_zip_fields_encode(
153 /*===================*/
154 	ulint			n,	/*!< in: number of fields
155 					to compress */
156 	const dict_index_t*	index,	/*!< in: index comprising
157 					at least n fields */
158 	ulint			trx_id_pos,
159 					/*!< in: position of the trx_id column
160 					in the index, or ULINT_UNDEFINED if
161 					this is a non-leaf page */
162 	byte*			buf);	/*!< out: buffer of (n + 1) * 2 bytes */
163 
164 /**********************************************************************//**
165 Decompress a page.  This function should tolerate errors on the compressed
166 page.  Instead of letting assertions fail, it will return FALSE if an
167 inconsistency is detected.
168 @return TRUE on success, FALSE on failure */
169 ibool
170 page_zip_decompress(
171 /*================*/
172 	page_zip_des_t*	page_zip,/*!< in: data, ssize;
173 				out: m_start, m_end, m_nonempty, n_blobs */
174 	page_t*		page,	/*!< out: uncompressed page, may be trashed */
175 	ibool		all)	/*!< in: TRUE=decompress the whole page;
176 				FALSE=verify but do not copy some
177 				page header fields that should not change
178 				after page creation */
179 	MY_ATTRIBUTE((nonnull(1,2)));
180 
181 #ifdef UNIV_DEBUG
182 /**********************************************************************//**
183 Validate a compressed page descriptor.
184 @return TRUE if ok */
185 UNIV_INLINE
186 ibool
187 page_zip_simple_validate(
188 /*=====================*/
189 	const page_zip_des_t*	page_zip);	/*!< in: compressed page
190 						descriptor */
191 #endif /* UNIV_DEBUG */
192 
193 #ifdef UNIV_ZIP_DEBUG
194 /**********************************************************************//**
195 Check that the compressed and decompressed pages match.
196 @return TRUE if valid, FALSE if not */
197 ibool
198 page_zip_validate_low(
199 /*==================*/
200 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
201 	const page_t*		page,	/*!< in: uncompressed page */
202 	const dict_index_t*	index,	/*!< in: index of the page, if known */
203 	ibool			sloppy)	/*!< in: FALSE=strict,
204 					TRUE=ignore the MIN_REC_FLAG */
205 	MY_ATTRIBUTE((nonnull(1,2)));
206 /**********************************************************************//**
207 Check that the compressed and decompressed pages match. */
208 ibool
209 page_zip_validate(
210 /*==============*/
211 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
212 	const page_t*		page,	/*!< in: uncompressed page */
213 	const dict_index_t*	index)	/*!< in: index of the page, if known */
214 	MY_ATTRIBUTE((nonnull(1,2)));
215 #endif /* UNIV_ZIP_DEBUG */
216 
217 /**********************************************************************//**
218 Determine how big record can be inserted without recompressing the page.
219 @return a positive number indicating the maximum size of a record
220 whose insertion is guaranteed to succeed, or zero or negative */
221 UNIV_INLINE
222 lint
223 page_zip_max_ins_size(
224 /*==================*/
225 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
226 	ibool			is_clust)/*!< in: TRUE if clustered index */
227 	MY_ATTRIBUTE((warn_unused_result));
228 
229 /**********************************************************************//**
230 Determine if enough space is available in the modification log.
231 @return TRUE if page_zip_write_rec() will succeed */
232 UNIV_INLINE
233 ibool
234 page_zip_available(
235 /*===============*/
236 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
237 	ibool			is_clust,/*!< in: TRUE if clustered index */
238 	ulint			length,	/*!< in: combined size of the record */
239 	ulint			create)	/*!< in: nonzero=add the record to
240 					the heap */
241 	MY_ATTRIBUTE((warn_unused_result));
242 
243 /**********************************************************************//**
244 Write data to the uncompressed header portion of a page.  The data must
245 already have been written to the uncompressed page. */
246 UNIV_INLINE
247 void
248 page_zip_write_header(
249 /*==================*/
250 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
251 	const byte*	str,	/*!< in: address on the uncompressed page */
252 	ulint		length,	/*!< in: length of the data */
253 	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
254 	MY_ATTRIBUTE((nonnull(1,2)));
255 
256 /**********************************************************************//**
257 Write an entire record on the compressed page.  The data must already
258 have been written to the uncompressed page. */
259 void
260 page_zip_write_rec(
261 /*===============*/
262 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
263 	const byte*	rec,	/*!< in: record being written */
264 	dict_index_t*	index,	/*!< in: the index the record belongs to */
265 	const rec_offs*	offsets,/*!< in: rec_get_offsets(rec, index) */
266 	ulint		create)	/*!< in: nonzero=insert, zero=update */
267 	MY_ATTRIBUTE((nonnull));
268 
269 /***********************************************************//**
270 Parses a log record of writing a BLOB pointer of a record.
271 @return end of log record or NULL */
272 byte*
273 page_zip_parse_write_blob_ptr(
274 /*==========================*/
275 	byte*		ptr,	/*!< in: redo log buffer */
276 	byte*		end_ptr,/*!< in: redo log buffer end */
277 	page_t*		page,	/*!< in/out: uncompressed page */
278 	page_zip_des_t*	page_zip);/*!< in/out: compressed page */
279 
280 /**********************************************************************//**
281 Write a BLOB pointer of a record on the leaf page of a clustered index.
282 The information must already have been updated on the uncompressed page. */
283 void
284 page_zip_write_blob_ptr(
285 /*====================*/
286 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
287 	const byte*	rec,	/*!< in/out: record whose data is being
288 				written */
289 	dict_index_t*	index,	/*!< in: index of the page */
290 	const rec_offs*	offsets,/*!< in: rec_get_offsets(rec, index) */
291 	ulint		n,	/*!< in: column index */
292 	mtr_t*		mtr);	/*!< in: mini-transaction handle,
293 				or NULL if no logging is needed */
294 
295 /***********************************************************//**
296 Parses a log record of writing the node pointer of a record.
297 @return end of log record or NULL */
298 byte*
299 page_zip_parse_write_node_ptr(
300 /*==========================*/
301 	byte*		ptr,	/*!< in: redo log buffer */
302 	byte*		end_ptr,/*!< in: redo log buffer end */
303 	page_t*		page,	/*!< in/out: uncompressed page */
304 	page_zip_des_t*	page_zip);/*!< in/out: compressed page */
305 
306 /**********************************************************************//**
307 Write the node pointer of a record on a non-leaf compressed page. */
308 void
309 page_zip_write_node_ptr(
310 /*====================*/
311 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
312 	byte*		rec,	/*!< in/out: record */
313 	ulint		size,	/*!< in: data size of rec */
314 	ulint		ptr,	/*!< in: node pointer */
315 	mtr_t*		mtr);	/*!< in: mini-transaction, or NULL */
316 
317 /** Write the DB_TRX_ID,DB_ROLL_PTR into a clustered index leaf page record.
318 @param[in,out]	page_zip	compressed page
319 @param[in,out]	rec		record
320 @param[in]	offsets		rec_get_offsets(rec, index)
321 @param[in]	trx_id_field	field number of DB_TRX_ID (number of PK fields)
322 @param[in]	trx_id		DB_TRX_ID value (transaction identifier)
323 @param[in]	roll_ptr	DB_ROLL_PTR value (undo log pointer)
324 @param[in,out]	mtr		mini-transaction, or NULL to skip logging */
325 void
326 page_zip_write_trx_id_and_roll_ptr(
327 	page_zip_des_t*	page_zip,
328 	byte*		rec,
329 	const rec_offs*	offsets,
330 	ulint		trx_id_col,
331 	trx_id_t	trx_id,
332 	roll_ptr_t	roll_ptr,
333 	mtr_t*		mtr = NULL)
334 	MY_ATTRIBUTE((nonnull(1,2,3)));
335 
336 /** Parse a MLOG_ZIP_WRITE_TRX_ID record.
337 @param[in]	ptr		redo log buffer
338 @param[in]	end_ptr		end of redo log buffer
339 @param[in,out]	page		uncompressed page
340 @param[in,out]	page_zip	compressed page
341 @return end of log record
342 @retval	NULL	if the log record is incomplete */
343 byte*
344 page_zip_parse_write_trx_id(
345 	byte*		ptr,
346 	byte*		end_ptr,
347 	page_t*		page,
348 	page_zip_des_t*	page_zip)
349 	MY_ATTRIBUTE((nonnull(1,2), warn_unused_result));
350 /**********************************************************************//**
351 Write the "deleted" flag of a record on a compressed page.  The flag must
352 already have been written on the uncompressed page. */
353 void
354 page_zip_rec_set_deleted(
355 /*=====================*/
356 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
357 	const byte*	rec,	/*!< in: record on the uncompressed page */
358 	ulint		flag)	/*!< in: the deleted flag (nonzero=TRUE) */
359 	MY_ATTRIBUTE((nonnull));
360 
361 /**********************************************************************//**
362 Write the "owned" flag of a record on a compressed page.  The n_owned field
363 must already have been written on the uncompressed page. */
364 void
365 page_zip_rec_set_owned(
366 /*===================*/
367 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
368 	const byte*	rec,	/*!< in: record on the uncompressed page */
369 	ulint		flag)	/*!< in: the owned flag (nonzero=TRUE) */
370 	MY_ATTRIBUTE((nonnull));
371 
372 /**********************************************************************//**
373 Insert a record to the dense page directory. */
374 void
375 page_zip_dir_insert(
376 /*================*/
377 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
378 	const byte*	prev_rec,/*!< in: record after which to insert */
379 	const byte*	free_rec,/*!< in: record from which rec was
380 				allocated, or NULL */
381 	byte*		rec);	/*!< in: record to insert */
382 
383 /**********************************************************************//**
384 Shift the dense page directory and the array of BLOB pointers
385 when a record is deleted. */
386 void
387 page_zip_dir_delete(
388 /*================*/
389 	page_zip_des_t*		page_zip,	/*!< in/out: compressed page */
390 	byte*			rec,		/*!< in: deleted record */
391 	const dict_index_t*	index,		/*!< in: index of rec */
392 	const rec_offs*		offsets,	/*!< in: rec_get_offsets(rec) */
393 	const byte*		free)		/*!< in: previous start of
394 						the free list */
395 	MY_ATTRIBUTE((nonnull(1,2,3,4)));
396 
397 /**********************************************************************//**
398 Add a slot to the dense page directory. */
399 void
400 page_zip_dir_add_slot(
401 /*==================*/
402 	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
403 	ulint		is_clustered)	/*!< in: nonzero for clustered index,
404 					zero for others */
405 	MY_ATTRIBUTE((nonnull));
406 
407 /***********************************************************//**
408 Parses a log record of writing to the header of a page.
409 @return end of log record or NULL */
410 byte*
411 page_zip_parse_write_header(
412 /*========================*/
413 	byte*		ptr,	/*!< in: redo log buffer */
414 	byte*		end_ptr,/*!< in: redo log buffer end */
415 	page_t*		page,	/*!< in/out: uncompressed page */
416 	page_zip_des_t*	page_zip);/*!< in/out: compressed page */
417 
418 /**********************************************************************//**
419 Write data to the uncompressed header portion of a page.  The data must
420 already have been written to the uncompressed page.
421 However, the data portion of the uncompressed page may differ from
422 the compressed page when a record is being inserted in
423 page_cur_insert_rec_low(). */
424 UNIV_INLINE
425 void
426 page_zip_write_header(
427 /*==================*/
428 	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
429 	const byte*	str,	/*!< in: address on the uncompressed page */
430 	ulint		length,	/*!< in: length of the data */
431 	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
432 	MY_ATTRIBUTE((nonnull(1,2)));
433 
434 /**********************************************************************//**
435 Reorganize and compress a page.  This is a low-level operation for
436 compressed pages, to be used when page_zip_compress() fails.
437 On success, a redo log entry MLOG_ZIP_PAGE_COMPRESS will be written.
438 The function btr_page_reorganize() should be preferred whenever possible.
439 IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
440 non-clustered index, the caller must update the insert buffer free
441 bits in the same mini-transaction in such a way that the modification
442 will be redo-logged.
443 @return TRUE on success, FALSE on failure; page_zip will be left
444 intact on failure, but page will be overwritten. */
445 ibool
446 page_zip_reorganize(
447 /*================*/
448 	buf_block_t*	block,	/*!< in/out: page with compressed page;
449 				on the compressed page, in: size;
450 				out: data, n_blobs,
451 				m_start, m_end, m_nonempty */
452 	dict_index_t*	index,	/*!< in: index of the B-tree node */
453 	mtr_t*		mtr)	/*!< in: mini-transaction */
454 	MY_ATTRIBUTE((nonnull));
455 
456 /**********************************************************************//**
457 Copy the records of a page byte for byte.  Do not copy the page header
458 or trailer, except those B-tree header fields that are directly
459 related to the storage of records.  Also copy PAGE_MAX_TRX_ID.
460 NOTE: The caller must update the lock table and the adaptive hash index. */
461 void
462 page_zip_copy_recs(
463 /*===============*/
464 	page_zip_des_t*		page_zip,	/*!< out: copy of src_zip
465 						(n_blobs, m_start, m_end,
466 						m_nonempty, data[0..size-1]) */
467 	page_t*			page,		/*!< out: copy of src */
468 	const page_zip_des_t*	src_zip,	/*!< in: compressed page */
469 	const page_t*		src,		/*!< in: page */
470 	dict_index_t*		index,		/*!< in: index of the B-tree */
471 	mtr_t*			mtr);		/*!< in: mini-transaction */
472 
473 /** Parse and optionally apply MLOG_ZIP_PAGE_COMPRESS.
474 @param[in]	ptr	log record
475 @param[in]	end_ptr	end of log
476 @param[in,out]	block	ROW_FORMAT=COMPRESSED block, or NULL for parsing only
477 @return	end of log record
478 @retval	NULL	if the log record is incomplete */
479 byte* page_zip_parse_compress(const byte* ptr, const byte* end_ptr,
480 			      buf_block_t* block);
481 
482 #endif /* !UNIV_INNOCHECKSUM */
483 
484 /** Calculate the compressed page checksum.
485 @param[in]	data			compressed page
486 @param[in]	size			size of compressed page
487 @param[in]	algo			algorithm to use
488 @return page checksum */
489 uint32_t
490 page_zip_calc_checksum(
491 	const void*			data,
492 	ulint				size,
493 	srv_checksum_algorithm_t	algo);
494 
495 /** Validate the checksum on a ROW_FORMAT=COMPRESSED page.
496 @param data    ROW_FORMAT=COMPRESSED page
497 @param size    size of the page, in bytes
498 @return whether the stored checksum matches innodb_checksum_algorithm */
499 bool page_zip_verify_checksum(const byte *data, size_t size);
500 
501 #ifndef UNIV_INNOCHECKSUM
502 /**********************************************************************//**
503 Write a log record of compressing an index page without the data on the page. */
504 UNIV_INLINE
505 void
506 page_zip_compress_write_log_no_data(
507 /*================================*/
508 	ulint		level,	/*!< in: compression level */
509 	const page_t*	page,	/*!< in: page that is compressed */
510 	dict_index_t*	index,	/*!< in: index */
511 	mtr_t*		mtr);	/*!< in: mtr */
512 /**********************************************************************//**
513 Parses a log record of compressing an index page without the data.
514 @return end of log record or NULL */
515 UNIV_INLINE
516 byte*
517 page_zip_parse_compress_no_data(
518 /*============================*/
519 	byte*		ptr,		/*!< in: buffer */
520 	byte*		end_ptr,	/*!< in: buffer end */
521 	page_t*		page,		/*!< in: uncompressed page */
522 	page_zip_des_t*	page_zip,	/*!< out: compressed page */
523 	dict_index_t*	index)		/*!< in: index */
524 	MY_ATTRIBUTE((nonnull(1,2)));
525 
526 /**********************************************************************//**
527 Reset the counters used for filling
528 INFORMATION_SCHEMA.innodb_cmp_per_index. */
529 UNIV_INLINE
530 void
531 page_zip_reset_stat_per_index();
532 /*===========================*/
533 
534 #include "page0zip.inl"
535 #endif /* !UNIV_INNOCHECKSUM */
536 
537 #endif /* page0zip_h */
538