1/*****************************************************************************
2
3Copyright (c) 2005, 2021, Oracle and/or its affiliates.
4Copyright (c) 2012, Facebook Inc.
5
6This program is free software; you can redistribute it and/or modify
7it under the terms of the GNU General Public License, version 2.0,
8as published by the Free Software Foundation.
9
10This program is also distributed with certain software (including
11but not limited to OpenSSL) that is licensed under separate terms,
12as designated in a particular file or component or in included license
13documentation.  The authors of MySQL hereby grant you an additional
14permission to link the program and your derivative works with the
15separately licensed software that they have included with MySQL.
16
17This program is distributed in the hope that it will be useful,
18but WITHOUT ANY WARRANTY; without even the implied warranty of
19MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20GNU General Public License, version 2.0, for more details.
21
22You should have received a copy of the GNU General Public License along with
23this program; if not, write to the Free Software Foundation, Inc.,
2451 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
25
26*****************************************************************************/
27
28/**************************************************//**
29@file include/page0zip.ic
30Compressed page interface
31
32Created June 2005 by Marko Makela
33*******************************************************/
34
35#ifdef UNIV_MATERIALIZE
36# undef UNIV_INLINE
37# define UNIV_INLINE
38#endif
39
40#include "page0zip.h"
41#include "mtr0log.h"
42#include "page0page.h"
43#include "srv0srv.h"
44
45/* The format of compressed pages is as follows.
46
47The header and trailer of the uncompressed pages, excluding the page
48directory in the trailer, are copied as is to the header and trailer
49of the compressed page.
50
51At the end of the compressed page, there is a dense page directory
52pointing to every user record contained on the page, including deleted
53records on the free list.  The dense directory is indexed in the
54collation order, i.e., in the order in which the record list is
55linked on the uncompressed page.  The infimum and supremum records are
56excluded.  The two most significant bits of the entries are allocated
57for the delete-mark and an n_owned flag indicating the last record in
58a chain of records pointed to from the sparse page directory on the
59uncompressed page.
60
61The data between PAGE_ZIP_START and the last page directory entry will
62be written in compressed format, starting at offset PAGE_DATA.
63Infimum and supremum records are not stored.  We exclude the
64REC_N_NEW_EXTRA_BYTES in every record header.  These can be recovered
65from the dense page directory stored at the end of the compressed
66page.
67
68The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and
69roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of
70externally stored columns are stored separately, in ascending order of
71heap_no and column index, starting backwards from the dense page
72directory.
73
74The compressed data stream may be followed by a modification log
75covering the compressed portion of the page, as follows.
76
77MODIFICATION LOG ENTRY FORMAT
78- write record:
79  - (heap_no - 1) << 1 (1..2 bytes)
80  - extra bytes backwards
81  - data bytes
82- clear record:
83  - (heap_no - 1) << 1 | 1 (1..2 bytes)
84
85The integer values are stored in a variable-length format:
86- 0xxxxxxx: 0..127
87- 1xxxxxxx xxxxxxxx: 0..32767
88
89The end of the modification log is marked by a 0 byte.
90
91In summary, the compressed page looks like this:
92
93(1) Uncompressed page header (PAGE_DATA bytes)
94(2) Compressed index information
95(3) Compressed page data
96(4) Page modification log (page_zip->m_start..page_zip->m_end)
97(5) Empty zero-filled space
98(6) BLOB pointers (on leaf pages)
99  - BTR_EXTERN_FIELD_REF_SIZE for each externally stored column
100  - in descending collation order
101(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes,
102  - indexed by heap_no
103  - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes
104  - REC_NODE_PTR_SIZE for non-leaf pages
105  - 0 otherwise
106(8) dense page directory, stored backwards
107  - n_dense = n_heap - 2
108  - existing records in ascending collation order
109  - deleted records (free list) in link order
110*/
111
112/**********************************************************************//**
113Determine the size of a compressed page in bytes.
114@return size in bytes */
115UNIV_INLINE
116ulint
117page_zip_get_size(
118/*==============*/
119	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
120{
121	ulint	size;
122
123	if (!page_zip->ssize) {
124		return(0);
125	}
126
127	size = (UNIV_ZIP_SIZE_MIN >> 1) << page_zip->ssize;
128
129	ut_ad(size >= UNIV_ZIP_SIZE_MIN);
130	ut_ad(size <= UNIV_PAGE_SIZE);
131
132	return(size);
133}
134/**********************************************************************//**
135Set the size of a compressed page in bytes. */
136UNIV_INLINE
137void
138page_zip_set_size(
139/*==============*/
140	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
141	ulint		size)		/*!< in: size in bytes */
142{
143	if (size) {
144		int	ssize;
145
146		ut_ad(ut_is_2pow(size));
147
148		for (ssize = 1; size > (ulint) (512 << ssize); ssize++) {
149		}
150
151		page_zip->ssize = ssize;
152	} else {
153		page_zip->ssize = 0;
154	}
155
156	ut_ad(page_zip_get_size(page_zip) == size);
157}
158
159#ifndef UNIV_HOTBACKUP
160/** Determine if a record is so big that it needs to be stored externally.
161@param[in]	rec_size	length of the record in bytes
162@param[in]	comp		nonzero=compact format
163@param[in]	n_fields	number of fields in the record; ignored if
164tablespace is not compressed
165@param[in]	page_size	page size
166@return FALSE if the entire record can be stored locally on the page */
167UNIV_INLINE
168ibool
169page_zip_rec_needs_ext(
170	ulint			rec_size,
171	ulint			comp,
172	ulint			n_fields,
173	const page_size_t&	page_size)
174{
175	ut_ad(rec_size > (comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES));
176	ut_ad(comp || !page_size.is_compressed());
177
178#if UNIV_PAGE_SIZE_MAX > REC_MAX_DATA_SIZE
179	if (rec_size >= REC_MAX_DATA_SIZE) {
180		return(TRUE);
181	}
182#endif
183
184	if (page_size.is_compressed()) {
185		ut_ad(comp);
186		/* On a compressed page, there is a two-byte entry in
187		the dense page directory for every record.  But there
188		is no record header.  There should be enough room for
189		one record on an empty leaf page.  Subtract 1 byte for
190		the encoded heap number.  Check also the available space
191		on the uncompressed page. */
192		return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2 - 1)
193		       >= page_zip_empty_size(n_fields, page_size.physical())
194		       || rec_size >= page_get_free_space_of_empty(TRUE) / 2);
195	}
196
197	return(rec_size >= page_get_free_space_of_empty(comp) / 2);
198}
199#endif /* !UNIV_HOTBACKUP */
200
201#ifdef UNIV_DEBUG
202/**********************************************************************//**
203Validate a compressed page descriptor.
204@return TRUE if ok */
205UNIV_INLINE
206ibool
207page_zip_simple_validate(
208/*=====================*/
209	const page_zip_des_t*	page_zip)/*!< in: compressed page descriptor */
210{
211	ut_ad(page_zip);
212	ut_ad(page_zip->data);
213	ut_ad(page_zip->ssize <= PAGE_ZIP_SSIZE_MAX);
214	ut_ad(page_zip_get_size(page_zip)
215	      > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE);
216	ut_ad(page_zip->m_start <= page_zip->m_end);
217	ut_ad(page_zip->m_end < page_zip_get_size(page_zip));
218	ut_ad(page_zip->n_blobs
219	      < page_zip_get_size(page_zip) / BTR_EXTERN_FIELD_REF_SIZE);
220	return(TRUE);
221}
222#endif /* UNIV_DEBUG */
223
224/**********************************************************************//**
225Determine if the length of the page trailer.
226@return length of the page trailer, in bytes, not including the
227terminating zero byte of the modification log */
228UNIV_INLINE
229ibool
230page_zip_get_trailer_len(
231/*=====================*/
232	const page_zip_des_t*	page_zip,/*!< in: compressed page */
233	ibool			is_clust)/*!< in: TRUE if clustered index */
234{
235	ulint	uncompressed_size;
236
237	ut_ad(page_zip_simple_validate(page_zip));
238	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
239
240	if (!page_is_leaf(page_zip->data)) {
241		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
242			+ REC_NODE_PTR_SIZE;
243		ut_ad(!page_zip->n_blobs);
244	} else if (is_clust) {
245		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
246			+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
247	} else {
248		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE;
249		ut_ad(!page_zip->n_blobs);
250	}
251
252	return((page_dir_get_n_heap(page_zip->data) - 2)
253	       * uncompressed_size
254	       + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE);
255}
256
257/**********************************************************************//**
258Determine how big record can be inserted without recompressing the page.
259@return a positive number indicating the maximum size of a record
260whose insertion is guaranteed to succeed, or zero or negative */
261UNIV_INLINE
262lint
263page_zip_max_ins_size(
264/*==================*/
265	const page_zip_des_t*	page_zip,/*!< in: compressed page */
266	ibool			is_clust)/*!< in: TRUE if clustered index */
267{
268	ulint	trailer_len;
269
270	trailer_len = page_zip_get_trailer_len(page_zip, is_clust);
271
272	/* When a record is created, a pointer may be added to
273	the dense directory.
274	Likewise, space for the columns that will not be
275	compressed will be allocated from the page trailer.
276	Also the BLOB pointers will be allocated from there, but
277	we may as well count them in the length of the record. */
278
279	trailer_len += PAGE_ZIP_DIR_SLOT_SIZE;
280
281	return((lint) page_zip_get_size(page_zip)
282	       - trailer_len - page_zip->m_end
283	       - (REC_N_NEW_EXTRA_BYTES - 2));
284}
285
286/**********************************************************************//**
287Determine if enough space is available in the modification log.
288@return TRUE if enough space is available */
289UNIV_INLINE
290ibool
291page_zip_available(
292/*===============*/
293	const page_zip_des_t*	page_zip,/*!< in: compressed page */
294	ibool			is_clust,/*!< in: TRUE if clustered index */
295	ulint			length,	/*!< in: combined size of the record */
296	ulint			create)	/*!< in: nonzero=add the record to
297					the heap */
298{
299	ulint	trailer_len;
300
301	ut_ad(length > REC_N_NEW_EXTRA_BYTES);
302
303	trailer_len = page_zip_get_trailer_len(page_zip, is_clust);
304
305	/* Subtract the fixed extra bytes and add the maximum
306	space needed for identifying the record (encoded heap_no). */
307	length -= REC_N_NEW_EXTRA_BYTES - 2;
308
309	if (create > 0) {
310		/* When a record is created, a pointer may be added to
311		the dense directory.
312		Likewise, space for the columns that will not be
313		compressed will be allocated from the page trailer.
314		Also the BLOB pointers will be allocated from there, but
315		we may as well count them in the length of the record. */
316
317		trailer_len += PAGE_ZIP_DIR_SLOT_SIZE;
318	}
319
320	return(length + trailer_len + page_zip->m_end
321	       < page_zip_get_size(page_zip));
322}
323
324/**********************************************************************//**
325Initialize a compressed page descriptor. */
326UNIV_INLINE
327void
328page_zip_des_init(
329/*==============*/
330	page_zip_des_t*	page_zip)	/*!< in/out: compressed page
331					descriptor */
332{
333	memset(page_zip, 0, sizeof *page_zip);
334}
335
336/**********************************************************************//**
337Write a log record of writing to the uncompressed header portion of a page. */
338void
339page_zip_write_header_log(
340/*======================*/
341	const byte*	data,/*!< in: data on the uncompressed page */
342	ulint		length,	/*!< in: length of the data */
343	mtr_t*		mtr);	/*!< in: mini-transaction */
344
345/**********************************************************************//**
346Write data to the uncompressed header portion of a page.  The data must
347already have been written to the uncompressed page.
348However, the data portion of the uncompressed page may differ from
349the compressed page when a record is being inserted in
350page_cur_insert_rec_zip(). */
351UNIV_INLINE
352void
353page_zip_write_header(
354/*==================*/
355	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
356	const byte*	str,	/*!< in: address on the uncompressed page */
357	ulint		length,	/*!< in: length of the data */
358	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
359{
360	ulint	pos;
361
362	ut_ad(page_zip_simple_validate(page_zip));
363	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
364
365	pos = page_offset(str);
366
367	ut_ad(pos < PAGE_DATA);
368
369	memcpy(page_zip->data + pos, str, length);
370
371	/* The following would fail in page_cur_insert_rec_zip(). */
372	/* ut_ad(page_zip_validate(page_zip, str - pos)); */
373
374	if (mtr) {
375#ifndef UNIV_HOTBACKUP
376		page_zip_write_header_log(str, length, mtr);
377#endif /* !UNIV_HOTBACKUP */
378	}
379}
380
381/**********************************************************************//**
382Write a log record of compressing an index page without the data on the page. */
383UNIV_INLINE
384void
385page_zip_compress_write_log_no_data(
386/*================================*/
387	ulint		level,	/*!< in: compression level */
388	const page_t*	page,	/*!< in: page that is compressed */
389	dict_index_t*	index,	/*!< in: index */
390	mtr_t*		mtr)	/*!< in: mtr */
391{
392	byte* log_ptr = mlog_open_and_write_index(
393		mtr, page, index, MLOG_ZIP_PAGE_COMPRESS_NO_DATA, 1);
394
395	if (log_ptr) {
396		mach_write_to_1(log_ptr, level);
397		mlog_close(mtr, log_ptr + 1);
398	}
399}
400
401/**********************************************************************//**
402Parses a log record of compressing an index page without the data.
403@return end of log record or NULL */
404UNIV_INLINE
405byte*
406page_zip_parse_compress_no_data(
407/*============================*/
408	byte*		ptr,		/*!< in: buffer */
409	byte*		end_ptr,	/*!< in: buffer end */
410	page_t*		page,		/*!< in: uncompressed page */
411	page_zip_des_t*	page_zip,	/*!< out: compressed page */
412	dict_index_t*	index)		/*!< in: index */
413{
414	ulint	level;
415	if (end_ptr == ptr) {
416		return(NULL);
417	}
418
419	level = mach_read_from_1(ptr);
420
421	/* If page compression fails then there must be something wrong
422	because a compress log record is logged only if the compression
423	was successful. Crash in this case. */
424
425	if (page
426	    && !page_zip_compress(page_zip, page, index, level, NULL, NULL)) {
427		ut_error;
428	}
429
430	return(ptr + 1);
431}
432
433/**********************************************************************//**
434Reset the counters used for filling
435INFORMATION_SCHEMA.innodb_cmp_per_index. */
436UNIV_INLINE
437void
438page_zip_reset_stat_per_index()
439/*===========================*/
440{
441	mutex_enter(&page_zip_stat_per_index_mutex);
442
443	page_zip_stat_per_index.erase(
444		page_zip_stat_per_index.begin(),
445		page_zip_stat_per_index.end());
446
447	mutex_exit(&page_zip_stat_per_index_mutex);
448}
449
450#ifdef UNIV_MATERIALIZE
451# undef UNIV_INLINE
452# define UNIV_INLINE	UNIV_INLINE_ORIGINAL
453#endif
454