1 /*****************************************************************************
2 
3 Copyright (c) 2005, 2016, Oracle and/or its affiliates. All Rights Reserved.
4 Copyright (c) 2012, Facebook Inc.
5 Copyright (c) 2017, 2020, MariaDB Corporation.
6 
7 This program is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free Software
9 Foundation; version 2 of the License.
10 
11 This program is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
13 FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
14 
15 You should have received a copy of the GNU General Public License along with
16 this program; if not, write to the Free Software Foundation, Inc.,
17 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335 USA
18 
19 *****************************************************************************/
20 
21 /**************************************************//**
22 @file include/page0zip.h
23 Compressed page interface
24 
25 Created June 2005 by Marko Makela
26 *******************************************************/
27 
28 #ifndef page0zip_h
29 #define page0zip_h
30 
31 #include "buf0types.h"
32 
33 #ifndef UNIV_INNOCHECKSUM
34 #include "mtr0types.h"
35 #include "page0types.h"
36 #include "dict0types.h"
37 #include "srv0srv.h"
38 #include "trx0types.h"
39 #include "mem0mem.h"
40 
41 /* Compression level to be used by zlib. Settable by user. */
42 extern uint	page_zip_level;
43 
44 /* Default compression level. */
45 #define DEFAULT_COMPRESSION_LEVEL	6
46 /** Start offset of the area that will be compressed */
47 #define PAGE_ZIP_START			PAGE_NEW_SUPREMUM_END
48 /** Size of an compressed page directory entry */
49 #define PAGE_ZIP_DIR_SLOT_SIZE		2
50 /** Predefine the sum of DIR_SLOT, TRX_ID & ROLL_PTR */
51 #define PAGE_ZIP_CLUST_LEAF_SLOT_SIZE		\
52 		(PAGE_ZIP_DIR_SLOT_SIZE		\
53 		+ DATA_TRX_ID_LEN		\
54 		+ DATA_ROLL_PTR_LEN)
55 /** Mask of record offsets */
56 #define PAGE_ZIP_DIR_SLOT_MASK		0x3fffU
57 /** 'owned' flag */
58 #define PAGE_ZIP_DIR_SLOT_OWNED		0x4000U
59 /** 'deleted' flag */
60 #define PAGE_ZIP_DIR_SLOT_DEL		0x8000U
61 
62 /**********************************************************************//**
63 Determine the size of a compressed page in bytes.
64 @return size in bytes */
65 UNIV_INLINE
66 ulint
67 page_zip_get_size(
68 /*==============*/
69 	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
70 	MY_ATTRIBUTE((warn_unused_result));
71 /**********************************************************************//**
72 Set the size of a compressed page in bytes. */
73 UNIV_INLINE
74 void
75 page_zip_set_size(
76 /*==============*/
77 	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
78 	ulint		size);		/*!< in: size in bytes */
79 
80 /** Determine if a record is so big that it needs to be stored externally.
81 @param[in]	rec_size	length of the record in bytes
82 @param[in]	comp		nonzero=compact format
83 @param[in]	n_fields	number of fields in the record; ignored if
84 tablespace is not compressed
85 @param[in]	zip_size	ROW_FORMAT=COMPRESSED page size, or 0
86 @return false if the entire record can be stored locally on the page */
87 inline bool page_zip_rec_needs_ext(ulint rec_size, ulint comp, ulint n_fields,
88 				   ulint zip_size)
89 	MY_ATTRIBUTE((warn_unused_result));
90 
91 /**********************************************************************//**
92 Determine the guaranteed free space on an empty page.
93 @return minimum payload size on the page */
94 ulint
95 page_zip_empty_size(
96 /*================*/
97 	ulint	n_fields,	/*!< in: number of columns in the index */
98 	ulint	zip_size)	/*!< in: compressed page size in bytes */
99 	MY_ATTRIBUTE((const));
100 
101 /** Check whether a tuple is too big for compressed table
102 @param[in]	index	dict index object
103 @param[in]	entry	entry for the index
104 @return	true if it's too big, otherwise false */
105 bool
106 page_zip_is_too_big(
107 	const dict_index_t*	index,
108 	const dtuple_t*		entry);
109 
110 /**********************************************************************//**
111 Initialize a compressed page descriptor. */
112 UNIV_INLINE
113 void
114 page_zip_des_init(
115 /*==============*/
116 	page_zip_des_t*	page_zip);	/*!< in/out: compressed page
117 					descriptor */
118 
119 /**********************************************************************//**
120 Configure the zlib allocator to use the given memory heap. */
121 void
122 page_zip_set_alloc(
123 /*===============*/
124 	void*		stream,		/*!< in/out: zlib stream */
125 	mem_heap_t*	heap);		/*!< in: memory heap to use */
126 
127 /** Attempt to compress a ROW_FORMAT=COMPRESSED page.
128 @retval true on success
129 @retval false on failure; block->page.zip will be left intact. */
130 bool
131 page_zip_compress(
132 	buf_block_t*		block,	/*!< in/out: buffer block */
133 	dict_index_t*		index,	/*!< in: index of the B-tree node */
134 	ulint			level,	/*!< in: commpression level */
135 	mtr_t*			mtr)	/*!< in/out: mini-transaction */
136 	MY_ATTRIBUTE((nonnull));
137 
138 /**********************************************************************//**
139 Write the index information for the compressed page.
140 @return used size of buf */
141 ulint
142 page_zip_fields_encode(
143 /*===================*/
144 	ulint			n,	/*!< in: number of fields
145 					to compress */
146 	const dict_index_t*	index,	/*!< in: index comprising
147 					at least n fields */
148 	ulint			trx_id_pos,
149 					/*!< in: position of the trx_id column
150 					in the index, or ULINT_UNDEFINED if
151 					this is a non-leaf page */
152 	byte*			buf);	/*!< out: buffer of (n + 1) * 2 bytes */
153 
154 /**********************************************************************//**
155 Decompress a page.  This function should tolerate errors on the compressed
156 page.  Instead of letting assertions fail, it will return FALSE if an
157 inconsistency is detected.
158 @return TRUE on success, FALSE on failure */
159 ibool
160 page_zip_decompress(
161 /*================*/
162 	page_zip_des_t*	page_zip,/*!< in: data, ssize;
163 				out: m_start, m_end, m_nonempty, n_blobs */
164 	page_t*		page,	/*!< out: uncompressed page, may be trashed */
165 	ibool		all)	/*!< in: TRUE=decompress the whole page;
166 				FALSE=verify but do not copy some
167 				page header fields that should not change
168 				after page creation */
169 	MY_ATTRIBUTE((nonnull(1,2)));
170 
171 #ifdef UNIV_DEBUG
172 /**********************************************************************//**
173 Validate a compressed page descriptor.
174 @return TRUE if ok */
175 UNIV_INLINE
176 ibool
177 page_zip_simple_validate(
178 /*=====================*/
179 	const page_zip_des_t*	page_zip);	/*!< in: compressed page
180 						descriptor */
181 #endif /* UNIV_DEBUG */
182 
183 #ifdef UNIV_ZIP_DEBUG
184 /**********************************************************************//**
185 Check that the compressed and decompressed pages match.
186 @return TRUE if valid, FALSE if not */
187 ibool
188 page_zip_validate_low(
189 /*==================*/
190 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
191 	const page_t*		page,	/*!< in: uncompressed page */
192 	const dict_index_t*	index,	/*!< in: index of the page, if known */
193 	ibool			sloppy)	/*!< in: FALSE=strict,
194 					TRUE=ignore the MIN_REC_FLAG */
195 	MY_ATTRIBUTE((nonnull(1,2)));
196 /**********************************************************************//**
197 Check that the compressed and decompressed pages match. */
198 ibool
199 page_zip_validate(
200 /*==============*/
201 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
202 	const page_t*		page,	/*!< in: uncompressed page */
203 	const dict_index_t*	index)	/*!< in: index of the page, if known */
204 	MY_ATTRIBUTE((nonnull(1,2)));
205 #endif /* UNIV_ZIP_DEBUG */
206 
207 /**********************************************************************//**
208 Determine how big record can be inserted without recompressing the page.
209 @return a positive number indicating the maximum size of a record
210 whose insertion is guaranteed to succeed, or zero or negative */
211 UNIV_INLINE
212 lint
213 page_zip_max_ins_size(
214 /*==================*/
215 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
216 	ibool			is_clust)/*!< in: TRUE if clustered index */
217 	MY_ATTRIBUTE((warn_unused_result));
218 
219 /**********************************************************************//**
220 Determine if enough space is available in the modification log.
221 @return TRUE if page_zip_write_rec() will succeed */
222 UNIV_INLINE
223 ibool
224 page_zip_available(
225 /*===============*/
226 	const page_zip_des_t*	page_zip,/*!< in: compressed page */
227 	ibool			is_clust,/*!< in: TRUE if clustered index */
228 	ulint			length,	/*!< in: combined size of the record */
229 	ulint			create)	/*!< in: nonzero=add the record to
230 					the heap */
231 	MY_ATTRIBUTE((warn_unused_result));
232 
233 /** Write an entire record to the ROW_FORMAT=COMPRESSED page.
234 The data must already have been written to the uncompressed page.
235 @param[in,out]	block		ROW_FORMAT=COMPRESSED page
236 @param[in]	rec		record in the uncompressed page
237 @param[in]	index		the index that the page belongs to
238 @param[in]	offsets		rec_get_offsets(rec, index)
239 @param[in]	create		nonzero=insert, zero=update
240 @param[in,out]	mtr		mini-transaction */
241 void page_zip_write_rec(buf_block_t *block, const byte *rec,
242                         const dict_index_t *index, const rec_offs *offsets,
243                         ulint create, mtr_t *mtr)
244   MY_ATTRIBUTE((nonnull));
245 
246 /**********************************************************************//**
247 Write a BLOB pointer of a record on the leaf page of a clustered index.
248 The information must already have been updated on the uncompressed page. */
249 void
250 page_zip_write_blob_ptr(
251 /*====================*/
252 	buf_block_t*	block,	/*!< in/out: ROW_FORMAT=COMPRESSED page */
253 	const byte*	rec,	/*!< in/out: record whose data is being
254 				written */
255 	dict_index_t*	index,	/*!< in: index of the page */
256 	const rec_offs*	offsets,/*!< in: rec_get_offsets(rec, index) */
257 	ulint		n,	/*!< in: column index */
258 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
259 	MY_ATTRIBUTE((nonnull));
260 
261 /**********************************************************************//**
262 Write the node pointer of a record on a non-leaf compressed page. */
263 void
264 page_zip_write_node_ptr(
265 /*====================*/
266 	buf_block_t*	block,	/*!< in/out: compressed page */
267 	byte*		rec,	/*!< in/out: record */
268 	ulint		size,	/*!< in: data size of rec */
269 	ulint		ptr,	/*!< in: node pointer */
270 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
271 	MY_ATTRIBUTE((nonnull));
272 
273 /** Write the DB_TRX_ID,DB_ROLL_PTR into a clustered index leaf page record.
274 @param[in,out]	block		ROW_FORMAT=COMPRESSED page
275 @param[in,out]	rec		record
276 @param[in]	offsets		rec_get_offsets(rec, index)
277 @param[in]	trx_id_field	field number of DB_TRX_ID (number of PK fields)
278 @param[in]	trx_id		DB_TRX_ID value (transaction identifier)
279 @param[in]	roll_ptr	DB_ROLL_PTR value (undo log pointer)
280 @param[in,out]	mtr		mini-transaction */
281 void
282 page_zip_write_trx_id_and_roll_ptr(
283 	buf_block_t*	block,
284 	byte*		rec,
285 	const rec_offs*	offsets,
286 	ulint		trx_id_col,
287 	trx_id_t	trx_id,
288 	roll_ptr_t	roll_ptr,
289 	mtr_t*		mtr)
290 	MY_ATTRIBUTE((nonnull));
291 
292 /** Modify the delete-mark flag of a ROW_FORMAT=COMPRESSED record.
293 @param[in,out]  block   buffer block
294 @param[in,out]  rec     record on a physical index page
295 @param[in]      flag    the value of the delete-mark flag
296 @param[in,out]  mtr     mini-transaction  */
297 void page_zip_rec_set_deleted(buf_block_t *block, rec_t *rec, bool flag,
298                               mtr_t *mtr)
299   MY_ATTRIBUTE((nonnull));
300 
301 /**********************************************************************//**
302 Insert a record to the dense page directory. */
303 void
304 page_zip_dir_insert(
305 /*================*/
306 	page_cur_t*	cursor,	/*!< in/out: page cursor */
307 	uint16_t	free_rec,/*!< in: record from which rec was
308 				allocated, or 0 */
309 	byte*		rec,	/*!< in: record to insert */
310 	mtr_t*		mtr)	/*!< in/out: mini-transaction */
311 	MY_ATTRIBUTE((nonnull(1,3,4)));
312 
313 /** Shift the dense page directory and the array of BLOB pointers
314 when a record is deleted.
315 @param[in,out]  block   index page
316 @param[in,out]  rec     record being deleted
317 @param[in]      index   the index that the page belongs to
318 @param[in]      offsets rec_get_offsets(rec, index)
319 @param[in]	free	previous start of the free list
320 @param[in,out]  mtr     mini-transaction */
321 void page_zip_dir_delete(buf_block_t *block, byte *rec,
322                          const dict_index_t *index, const rec_offs *offsets,
323                          const byte *free, mtr_t *mtr)
324   MY_ATTRIBUTE((nonnull(1,2,3,4,6)));
325 
326 /**********************************************************************//**
327 Reorganize and compress a page.  This is a low-level operation for
328 compressed pages, to be used when page_zip_compress() fails.
329 On success, redo log will be written.
330 The function btr_page_reorganize() should be preferred whenever possible.
331 IMPORTANT: if page_zip_reorganize() is invoked on a leaf page of a
332 non-clustered index, the caller must update the insert buffer free
333 bits in the same mini-transaction in such a way that the modification
334 will be redo-logged.
335 @retval true on success
336 @retval false on failure; the block_zip will be left intact */
337 bool
338 page_zip_reorganize(
339 	buf_block_t*	block,	/*!< in/out: page with compressed page;
340 				on the compressed page, in: size;
341 				out: data, n_blobs,
342 				m_start, m_end, m_nonempty */
343 	dict_index_t*	index,	/*!< in: index of the B-tree node */
344 	ulint		z_level,/*!< in: compression level */
345 	mtr_t*		mtr,	/*!< in: mini-transaction */
346 	bool		restore = false)/*!< whether to restore on failure */
347 	MY_ATTRIBUTE((nonnull));
348 
349 /**********************************************************************//**
350 Copy the records of a page byte for byte.  Do not copy the page header
351 or trailer, except those B-tree header fields that are directly
352 related to the storage of records.  Also copy PAGE_MAX_TRX_ID.
353 NOTE: The caller must update the lock table and the adaptive hash index. */
354 void
355 page_zip_copy_recs(
356 	buf_block_t*		block,		/*!< in/out: buffer block */
357 	const page_zip_des_t*	src_zip,	/*!< in: compressed page */
358 	const page_t*		src,		/*!< in: page */
359 	dict_index_t*		index,		/*!< in: index of the B-tree */
360 	mtr_t*			mtr);		/*!< in: mini-transaction */
361 #endif /* !UNIV_INNOCHECKSUM */
362 
363 /** Calculate the compressed page checksum.
364 @param[in]	data			compressed page
365 @param[in]	size			size of compressed page
366 @param[in]	algo			algorithm to use
367 @return page checksum */
368 uint32_t
369 page_zip_calc_checksum(
370 	const void*			data,
371 	ulint				size,
372 	srv_checksum_algorithm_t	algo);
373 
374 /** Validate the checksum on a ROW_FORMAT=COMPRESSED page.
375 @param data    ROW_FORMAT=COMPRESSED page
376 @param size    size of the page, in bytes
377 @return whether the stored checksum matches innodb_checksum_algorithm */
378 bool page_zip_verify_checksum(const byte *data, size_t size);
379 
380 #ifndef UNIV_INNOCHECKSUM
381 /**********************************************************************//**
382 Reset the counters used for filling
383 INFORMATION_SCHEMA.innodb_cmp_per_index. */
384 UNIV_INLINE
385 void
386 page_zip_reset_stat_per_index();
387 /*===========================*/
388 
389 #include "page0zip.inl"
390 #endif /* !UNIV_INNOCHECKSUM */
391 
392 #endif /* page0zip_h */
393