1/*****************************************************************************
2
3Copyright (c) 2005, 2013, Oracle and/or its affiliates. All Rights Reserved.
4
5This program is free software; you can redistribute it and/or modify it under
6the terms of the GNU General Public License as published by the Free Software
7Foundation; version 2 of the License.
8
9This program is distributed in the hope that it will be useful, but WITHOUT
10ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
11FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
12
13You should have received a copy of the GNU General Public License along with
14this program; if not, write to the Free Software Foundation, Inc.,
1551 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
16
17*****************************************************************************/
18
19/**************************************************//**
20@file include/page0zip.ic
21Compressed page interface
22
23Created June 2005 by Marko Makela
24*******************************************************/
25
26#ifdef UNIV_MATERIALIZE
27# undef UNIV_INLINE
28# define UNIV_INLINE
29#endif
30
31#include "page0zip.h"
32#include "page0page.h"
33
34/* The format of compressed pages is as follows.
35
36The header and trailer of the uncompressed pages, excluding the page
37directory in the trailer, are copied as is to the header and trailer
38of the compressed page.
39
40At the end of the compressed page, there is a dense page directory
41pointing to every user record contained on the page, including deleted
42records on the free list.  The dense directory is indexed in the
43collation order, i.e., in the order in which the record list is
44linked on the uncompressed page.  The infimum and supremum records are
45excluded.  The two most significant bits of the entries are allocated
46for the delete-mark and an n_owned flag indicating the last record in
47a chain of records pointed to from the sparse page directory on the
48uncompressed page.
49
50The data between PAGE_ZIP_START and the last page directory entry will
51be written in compressed format, starting at offset PAGE_DATA.
52Infimum and supremum records are not stored.  We exclude the
53REC_N_NEW_EXTRA_BYTES in every record header.  These can be recovered
54from the dense page directory stored at the end of the compressed
55page.
56
57The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and
58roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of
59externally stored columns are stored separately, in ascending order of
60heap_no and column index, starting backwards from the dense page
61directory.
62
63The compressed data stream may be followed by a modification log
64covering the compressed portion of the page, as follows.
65
66MODIFICATION LOG ENTRY FORMAT
67- write record:
68  - (heap_no - 1) << 1 (1..2 bytes)
69  - extra bytes backwards
70  - data bytes
71- clear record:
72  - (heap_no - 1) << 1 | 1 (1..2 bytes)
73
74The integer values are stored in a variable-length format:
75- 0xxxxxxx: 0..127
76- 1xxxxxxx xxxxxxxx: 0..32767
77
78The end of the modification log is marked by a 0 byte.
79
80In summary, the compressed page looks like this:
81
82(1) Uncompressed page header (PAGE_DATA bytes)
83(2) Compressed index information
84(3) Compressed page data
85(4) Page modification log (page_zip->m_start..page_zip->m_end)
86(5) Empty zero-filled space
87(6) BLOB pointers (on leaf pages)
88  - BTR_EXTERN_FIELD_REF_SIZE for each externally stored column
89  - in descending collation order
90(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes,
91  - indexed by heap_no
92  - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes
93  - REC_NODE_PTR_SIZE for non-leaf pages
94  - 0 otherwise
95(8) dense page directory, stored backwards
96  - n_dense = n_heap - 2
97  - existing records in ascending collation order
98  - deleted records (free list) in link order
99*/
100
101/** Start offset of the area that will be compressed */
102#define PAGE_ZIP_START		PAGE_NEW_SUPREMUM_END
103/** Size of an compressed page directory entry */
104#define PAGE_ZIP_DIR_SLOT_SIZE	2
105/** Mask of record offsets */
106#define PAGE_ZIP_DIR_SLOT_MASK	0x3fff
107/** 'owned' flag */
108#define PAGE_ZIP_DIR_SLOT_OWNED	0x4000
109/** 'deleted' flag */
110#define PAGE_ZIP_DIR_SLOT_DEL	0x8000
111
112/**********************************************************************//**
113Determine the size of a compressed page in bytes.
114@return	size in bytes */
115UNIV_INLINE
116ulint
117page_zip_get_size(
118/*==============*/
119	const page_zip_des_t*	page_zip)	/*!< in: compressed page */
120{
121	ulint	size;
122
123	if (UNIV_UNLIKELY(!page_zip->ssize)) {
124		return(0);
125	}
126
127	size = (PAGE_ZIP_MIN_SIZE >> 1) << page_zip->ssize;
128
129	ut_ad(size >= PAGE_ZIP_MIN_SIZE);
130	ut_ad(size <= UNIV_PAGE_SIZE);
131
132	return(size);
133}
134/**********************************************************************//**
135Set the size of a compressed page in bytes. */
136UNIV_INLINE
137void
138page_zip_set_size(
139/*==============*/
140	page_zip_des_t*	page_zip,	/*!< in/out: compressed page */
141	ulint		size)		/*!< in: size in bytes */
142{
143	if (size) {
144		int	ssize;
145
146		ut_ad(ut_is_2pow(size));
147
148		for (ssize = 1; size > (ulint) (512 << ssize); ssize++) {
149		}
150
151		page_zip->ssize = ssize;
152	} else {
153		page_zip->ssize = 0;
154	}
155
156	ut_ad(page_zip_get_size(page_zip) == size);
157}
158
159#ifndef UNIV_HOTBACKUP
160/**********************************************************************//**
161Determine if a record is so big that it needs to be stored externally.
162@return	FALSE if the entire record can be stored locally on the page */
163UNIV_INLINE
164ibool
165page_zip_rec_needs_ext(
166/*===================*/
167	ulint	rec_size,	/*!< in: length of the record in bytes */
168	ulint	comp,		/*!< in: nonzero=compact format */
169	ulint	n_fields,	/*!< in: number of fields in the record;
170				ignored if zip_size == 0 */
171	ulint	zip_size)	/*!< in: compressed page size in bytes, or 0 */
172{
173	ut_ad(rec_size > comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES);
174	ut_ad(ut_is_2pow(zip_size));
175	ut_ad(comp || !zip_size);
176
177#if UNIV_PAGE_SIZE > REC_MAX_DATA_SIZE
178	if (UNIV_UNLIKELY(rec_size >= REC_MAX_DATA_SIZE)) {
179		return(TRUE);
180	}
181#endif
182
183	if (UNIV_UNLIKELY(zip_size)) {
184		ut_ad(comp);
185		/* On a compressed page, there is a two-byte entry in
186		the dense page directory for every record.  But there
187		is no record header.  There should be enough room for
188		one record on an empty leaf page.  Subtract 1 byte for
189		the encoded heap number.  Check also the available space
190		on the uncompressed page. */
191		return(rec_size - (REC_N_NEW_EXTRA_BYTES - 2 - 1)
192		       >= page_zip_empty_size(n_fields, zip_size)
193		       || rec_size >= page_get_free_space_of_empty(TRUE) / 2);
194	}
195
196	return(rec_size >= page_get_free_space_of_empty(comp) / 2);
197}
198#endif /* !UNIV_HOTBACKUP */
199
200#ifdef UNIV_DEBUG
201/**********************************************************************//**
202Validate a compressed page descriptor.
203@return	TRUE if ok */
204UNIV_INLINE
205ibool
206page_zip_simple_validate(
207/*=====================*/
208	const page_zip_des_t*	page_zip)/*!< in: compressed page descriptor */
209{
210	ut_ad(page_zip);
211	ut_ad(page_zip->data);
212	ut_ad(page_zip->ssize < PAGE_ZIP_NUM_SSIZE);
213	ut_ad(page_zip_get_size(page_zip)
214	      > PAGE_DATA + PAGE_ZIP_DIR_SLOT_SIZE);
215	ut_ad(page_zip->m_start <= page_zip->m_end);
216	ut_ad(page_zip->m_end < page_zip_get_size(page_zip));
217	ut_ad(page_zip->n_blobs
218	      < page_zip_get_size(page_zip) / BTR_EXTERN_FIELD_REF_SIZE);
219	return(TRUE);
220}
221#endif /* UNIV_DEBUG */
222
223/**********************************************************************//**
224Determine if the length of the page trailer.
225@return length of the page trailer, in bytes, not including the
226terminating zero byte of the modification log */
227UNIV_INLINE
228ibool
229page_zip_get_trailer_len(
230/*=====================*/
231	const page_zip_des_t*	page_zip,/*!< in: compressed page */
232	ibool			is_clust)/*!< in: TRUE if clustered index */
233{
234	ulint	uncompressed_size;
235
236	ut_ad(page_zip_simple_validate(page_zip));
237	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
238
239	if (UNIV_UNLIKELY(!page_is_leaf(page_zip->data))) {
240		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
241			+ REC_NODE_PTR_SIZE;
242		ut_ad(!page_zip->n_blobs);
243	} else if (UNIV_UNLIKELY(is_clust)) {
244		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE
245			+ DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
246	} else {
247		uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE;
248		ut_ad(!page_zip->n_blobs);
249	}
250
251	return((page_dir_get_n_heap(page_zip->data) - 2)
252	       * uncompressed_size
253	       + page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE);
254}
255
256/**********************************************************************//**
257Determine how big record can be inserted without recompressing the page.
258@return a positive number indicating the maximum size of a record
259whose insertion is guaranteed to succeed, or zero or negative */
260UNIV_INLINE
261lint
262page_zip_max_ins_size(
263/*==================*/
264	const page_zip_des_t*	page_zip,/*!< in: compressed page */
265	ibool			is_clust)/*!< in: TRUE if clustered index */
266{
267	ulint	trailer_len;
268
269	trailer_len = page_zip_get_trailer_len(page_zip, is_clust);
270
271	/* When a record is created, a pointer may be added to
272	the dense directory.
273	Likewise, space for the columns that will not be
274	compressed will be allocated from the page trailer.
275	Also the BLOB pointers will be allocated from there, but
276	we may as well count them in the length of the record. */
277
278	trailer_len += PAGE_ZIP_DIR_SLOT_SIZE;
279
280	return((lint) page_zip_get_size(page_zip)
281	       - trailer_len - page_zip->m_end
282	       - (REC_N_NEW_EXTRA_BYTES - 2));
283}
284
285/**********************************************************************//**
286Determine if enough space is available in the modification log.
287@return	TRUE if enough space is available */
288UNIV_INLINE
289ibool
290page_zip_available(
291/*===============*/
292	const page_zip_des_t*	page_zip,/*!< in: compressed page */
293	ibool			is_clust,/*!< in: TRUE if clustered index */
294	ulint			length,	/*!< in: combined size of the record */
295	ulint			create)	/*!< in: nonzero=add the record to
296					the heap */
297{
298	ulint	trailer_len;
299
300	ut_ad(length > REC_N_NEW_EXTRA_BYTES);
301
302	trailer_len = page_zip_get_trailer_len(page_zip, is_clust);
303
304	/* Subtract the fixed extra bytes and add the maximum
305	space needed for identifying the record (encoded heap_no). */
306	length -= REC_N_NEW_EXTRA_BYTES - 2;
307
308	if (UNIV_UNLIKELY(create)) {
309		/* When a record is created, a pointer may be added to
310		the dense directory.
311		Likewise, space for the columns that will not be
312		compressed will be allocated from the page trailer.
313		Also the BLOB pointers will be allocated from there, but
314		we may as well count them in the length of the record. */
315
316		trailer_len += PAGE_ZIP_DIR_SLOT_SIZE;
317	}
318
319	return(UNIV_LIKELY(length
320			   + trailer_len
321			   + page_zip->m_end
322			   < page_zip_get_size(page_zip)));
323}
324
325/**********************************************************************//**
326Initialize a compressed page descriptor. */
327UNIV_INLINE
328void
329page_zip_des_init(
330/*==============*/
331	page_zip_des_t*	page_zip)	/*!< in/out: compressed page
332					descriptor */
333{
334	memset(page_zip, 0, sizeof *page_zip);
335}
336
337/**********************************************************************//**
338Write a log record of writing to the uncompressed header portion of a page. */
339UNIV_INTERN
340void
341page_zip_write_header_log(
342/*======================*/
343	const byte*	data,/*!< in: data on the uncompressed page */
344	ulint		length,	/*!< in: length of the data */
345	mtr_t*		mtr);	/*!< in: mini-transaction */
346
347/**********************************************************************//**
348Write data to the uncompressed header portion of a page.  The data must
349already have been written to the uncompressed page.
350However, the data portion of the uncompressed page may differ from
351the compressed page when a record is being inserted in
352page_cur_insert_rec_zip(). */
353UNIV_INLINE
354void
355page_zip_write_header(
356/*==================*/
357	page_zip_des_t*	page_zip,/*!< in/out: compressed page */
358	const byte*	str,	/*!< in: address on the uncompressed page */
359	ulint		length,	/*!< in: length of the data */
360	mtr_t*		mtr)	/*!< in: mini-transaction, or NULL */
361{
362	ulint	pos;
363
364	ut_ad(PAGE_ZIP_MATCH(str, page_zip));
365	ut_ad(page_zip_simple_validate(page_zip));
366	UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
367
368	pos = page_offset(str);
369
370	ut_ad(pos < PAGE_DATA);
371
372	memcpy(page_zip->data + pos, str, length);
373
374	/* The following would fail in page_cur_insert_rec_zip(). */
375	/* ut_ad(page_zip_validate(page_zip, str - pos)); */
376
377	if (UNIV_LIKELY_NULL(mtr)) {
378#ifndef UNIV_HOTBACKUP
379		page_zip_write_header_log(str, length, mtr);
380#endif /* !UNIV_HOTBACKUP */
381	}
382}
383
384#ifdef UNIV_MATERIALIZE
385# undef UNIV_INLINE
386# define UNIV_INLINE	UNIV_INLINE_ORIGINAL
387#endif
388