1/*****************************************************************************
2
3Copyright (c) 2005, 2020, Oracle and/or its affiliates. All Rights Reserved.
4Copyright (c) 2012, Facebook Inc.
5
6This program is free software; you can redistribute it and/or modify it under
7the terms of the GNU General Public License, version 2.0, as published by the
8Free Software Foundation.
9
10This program is also distributed with certain software (including but not
11limited to OpenSSL) that is licensed under separate terms, as designated in a
12particular file or component or in included license documentation. The authors
13of MySQL hereby grant you an additional permission to link the program and
14your derivative works with the separately licensed software that they have
15included with MySQL.
16
17This program is distributed in the hope that it will be useful, but WITHOUT
18ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
19FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
20for more details.
21
22You should have received a copy of the GNU General Public License along with
23this program; if not, write to the Free Software Foundation, Inc.,
2451 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
25
26*****************************************************************************/
27
28/** @file include/page0zip.ic
29 Compressed page interface
30
31 Created June 2005 by Marko Makela
32 *******************************************************/
33
34#ifdef UNIV_MATERIALIZE
35#undef UNIV_INLINE
36#define UNIV_INLINE
37#endif
38
39#include "mtr0log.h"
40#include "page0page.h"
41#include "page0zip.h"
42#include "srv0srv.h"
43
44/* The format of compressed pages is as follows.
45
46The header and trailer of the uncompressed pages, excluding the page
47directory in the trailer, are copied as is to the header and trailer
48of the compressed page.
49
50At the end of the compressed page, there is a dense page directory
51pointing to every user record contained on the page, including deleted
52records on the free list.  The dense directory is indexed in the
53collation order, i.e., in the order in which the record list is
54linked on the uncompressed page.  The infimum and supremum records are
55excluded.  The two most significant bits of the entries are allocated
56for the delete-mark and an n_owned flag indicating the last record in
57a chain of records pointed to from the sparse page directory on the
58uncompressed page.
59
60The data between PAGE_ZIP_START and the last page directory entry will
61be written in compressed format, starting at offset PAGE_DATA.
62Infimum and supremum records are not stored.  We exclude the
63REC_N_NEW_EXTRA_BYTES in every record header.  These can be recovered
64from the dense page directory stored at the end of the compressed
65page.
66
67The fields node_ptr (in non-leaf B-tree nodes; level>0), trx_id and
68roll_ptr (in leaf B-tree nodes; level=0), and BLOB pointers of
69externally stored columns are stored separately, in ascending order of
70heap_no and column index, starting backwards from the dense page
71directory.
72
73The compressed data stream may be followed by a modification log
74covering the compressed portion of the page, as follows.
75
76MODIFICATION LOG ENTRY FORMAT
77- write record:
78  - (heap_no - 1) << 1 (1..2 bytes)
79  - extra bytes backwards
80  - data bytes
81- clear record:
82  - (heap_no - 1) << 1 | 1 (1..2 bytes)
83
84The integer values are stored in a variable-length format:
85- 0xxxxxxx: 0..127
86- 1xxxxxxx xxxxxxxx: 0..32767
87
88The end of the modification log is marked by a 0 byte.
89
90In summary, the compressed page looks like this:
91
92(1) Uncompressed page header (PAGE_DATA bytes)
93(2) Compressed index information
94(3) Compressed page data
95(4) Page modification log (page_zip->m_start..page_zip->m_end)
96(5) Empty zero-filled space
97(6) BLOB pointers (on leaf pages)
98  - BTR_EXTERN_FIELD_REF_SIZE for each externally stored column
99  - in descending collation order
100(7) Uncompressed columns of user records, n_dense * uncompressed_size bytes,
101  - indexed by heap_no
102  - DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN for leaf pages of clustered indexes
103  - REC_NODE_PTR_SIZE for non-leaf pages
104  - 0 otherwise
105(8) dense page directory, stored backwards
106  - n_dense = n_heap - 2
107  - existing records in ascending collation order
108  - deleted records (free list) in link order
109*/
110
111/** Set the size of a compressed page in bytes. */
112UNIV_INLINE
113void page_zip_set_size(page_zip_des_t *page_zip, /*!< in/out: compressed page */
114                       ulint size)               /*!< in: size in bytes */
115{
116  if (size) {
117    int ssize;
118
119    ut_ad(ut_is_2pow(size));
120
121    for (ssize = 1; size > (ulint)(512 << ssize); ssize++) {
122    }
123
124    page_zip->ssize = ssize;
125  } else {
126    page_zip->ssize = 0;
127  }
128
129  ut_ad(page_zip_get_size(page_zip) == size);
130}
131
132#ifndef UNIV_HOTBACKUP
133/** Determine if a record is so big that it needs to be stored externally.
134@param[in]	rec_size	length of the record in bytes
135@param[in]	comp		nonzero=compact format
136@param[in]	n_fields	number of fields in the record; ignored if
137tablespace is not compressed
138@param[in]	page_size	page size
139@return false if the entire record can be stored locally on the page */
140UNIV_INLINE
141ibool page_zip_rec_needs_ext(ulint rec_size, ulint comp, ulint n_fields,
142                             const page_size_t &page_size) {
143  ut_ad(rec_size > (comp ? REC_N_NEW_EXTRA_BYTES : REC_N_OLD_EXTRA_BYTES));
144  ut_ad(comp || !page_size.is_compressed());
145
146  if (UNIV_PAGE_SIZE_MAX > REC_MAX_DATA_SIZE && rec_size >= REC_MAX_DATA_SIZE) {
147    return (TRUE);
148  }
149
150  if (page_size.is_compressed()) {
151    ut_ad(comp);
152    /* On a compressed page, there is a two-byte entry in
153    the dense page directory for every record.  But there
154    is no record header.  There should be enough room for
155    one record on an empty leaf page.  Subtract 1 byte for
156    the encoded heap number.  Check also the available space
157    on the uncompressed page. */
158    return (rec_size - (REC_N_NEW_EXTRA_BYTES - 2 - 1) >=
159                page_zip_empty_size(n_fields, page_size.physical()) ||
160            rec_size >= page_get_free_space_of_empty(TRUE) / 2);
161  }
162
163  return (rec_size >= page_get_free_space_of_empty(comp) / 2);
164}
165#endif /* !UNIV_HOTBACKUP */
166
167/** Determine if the length of the page trailer.
168 @return length of the page trailer, in bytes, not including the
169 terminating zero byte of the modification log */
170UNIV_INLINE
171ibool page_zip_get_trailer_len(
172    const page_zip_des_t *page_zip, /*!< in: compressed page */
173    bool is_clust)                  /*!< in: TRUE if clustered index */
174{
175  ulint uncompressed_size;
176
177  ut_ad(page_zip_simple_validate(page_zip));
178  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
179
180  if (!page_is_leaf(page_zip->data)) {
181    uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE + REC_NODE_PTR_SIZE;
182    ut_ad(!page_zip->n_blobs);
183  } else if (is_clust) {
184    uncompressed_size =
185        PAGE_ZIP_DIR_SLOT_SIZE + DATA_TRX_ID_LEN + DATA_ROLL_PTR_LEN;
186  } else {
187    uncompressed_size = PAGE_ZIP_DIR_SLOT_SIZE;
188    ut_ad(!page_zip->n_blobs);
189  }
190
191  return ((page_dir_get_n_heap(page_zip->data) - 2) * uncompressed_size +
192          page_zip->n_blobs * BTR_EXTERN_FIELD_REF_SIZE);
193}
194
195/** Determine how big record can be inserted without recompressing the page.
196 @return a positive number indicating the maximum size of a record
197 whose insertion is guaranteed to succeed, or zero or negative */
198UNIV_INLINE
199lint page_zip_max_ins_size(
200    const page_zip_des_t *page_zip, /*!< in: compressed page */
201    ibool is_clust)                 /*!< in: TRUE if clustered index */
202{
203  ulint trailer_len;
204
205  trailer_len = page_zip_get_trailer_len(page_zip, is_clust);
206
207  /* When a record is created, a pointer may be added to
208  the dense directory.
209  Likewise, space for the columns that will not be
210  compressed will be allocated from the page trailer.
211  Also the BLOB pointers will be allocated from there, but
212  we may as well count them in the length of the record. */
213
214  trailer_len += PAGE_ZIP_DIR_SLOT_SIZE;
215
216  return ((lint)page_zip_get_size(page_zip) - trailer_len - page_zip->m_end -
217          (REC_N_NEW_EXTRA_BYTES - 2));
218}
219
220/** Determine if enough space is available in the modification log.
221 @return true if enough space is available */
222UNIV_INLINE
223ibool page_zip_available(
224    const page_zip_des_t *page_zip, /*!< in: compressed page */
225    bool is_clust,                  /*!< in: TRUE if clustered index */
226    ulint length,                   /*!< in: combined size of the record */
227    ulint create)                   /*!< in: nonzero=add the record to
228                                    the heap */
229{
230  ulint trailer_len;
231
232  ut_ad(length > REC_N_NEW_EXTRA_BYTES);
233
234  trailer_len = page_zip_get_trailer_len(page_zip, is_clust);
235
236  /* Subtract the fixed extra bytes and add the maximum
237  space needed for identifying the record (encoded heap_no). */
238  length -= REC_N_NEW_EXTRA_BYTES - 2;
239
240  if (create > 0) {
241    /* When a record is created, a pointer may be added to
242    the dense directory.
243    Likewise, space for the columns that will not be
244    compressed will be allocated from the page trailer.
245    Also the BLOB pointers will be allocated from there, but
246    we may as well count them in the length of the record. */
247
248    trailer_len += PAGE_ZIP_DIR_SLOT_SIZE;
249  }
250
251  return (length + trailer_len + page_zip->m_end < page_zip_get_size(page_zip));
252}
253
254/** Initialize a compressed page descriptor. */
255UNIV_INLINE
256void page_zip_des_init(page_zip_des_t *page_zip) /*!< in/out: compressed page
257                                                 descriptor */
258{
259  memset(page_zip, 0, sizeof *page_zip);
260}
261
262/** Write a log record of writing to the uncompressed header portion of a page.
263 */
264void page_zip_write_header_log(
265    const byte *data, /*!< in: data on the uncompressed page */
266    ulint length,     /*!< in: length of the data */
267    mtr_t *mtr);      /*!< in: mini-transaction */
268
269/** Write data to the uncompressed header portion of a page.  The data must
270already have been written to the uncompressed page.
271@param[in,out]	page_zip	compressed page
272@param[in]	str		address on the uncompressed page
273@param[in]	length		length of the data
274@param[in]	mtr		mini-transaction, or NULL */
275UNIV_INLINE
276void page_zip_write_header(page_zip_des_t *page_zip, const byte *str,
277                           ulint length, mtr_t *mtr) {
278  ulint pos;
279
280  ut_ad(page_zip_simple_validate(page_zip));
281  UNIV_MEM_ASSERT_RW(page_zip->data, page_zip_get_size(page_zip));
282
283  pos = page_offset(str);
284
285  ut_ad(pos < PAGE_DATA);
286
287  memcpy(page_zip->data + pos, str, length);
288
289  /* The following would fail in page_cur_insert_rec_zip(). */
290  /* ut_ad(page_zip_validate(page_zip, str - pos)); */
291
292  if (mtr) {
293#ifndef UNIV_HOTBACKUP
294    page_zip_write_header_log(str, length, mtr);
295#endif /* !UNIV_HOTBACKUP */
296  }
297}
298
299/** Write a log record of compressing an index page without the data on the
300 * page.
301 */
302UNIV_INLINE
303void page_zip_compress_write_log_no_data(
304    ulint level,         /*!< in: compression level */
305    const page_t *page,  /*!< in: page that is compressed */
306    dict_index_t *index, /*!< in: index */
307    mtr_t *mtr)          /*!< in: mtr */
308{
309#ifndef UNIV_HOTBACKUP
310
311  byte *log_ptr = nullptr;
312  if (!mlog_open_and_write_index(mtr, page, index,
313                                 MLOG_ZIP_PAGE_COMPRESS_NO_DATA, 1, log_ptr)) {
314    return;
315  }
316
317  mach_write_to_1(log_ptr, level);
318  mlog_close(mtr, log_ptr + 1);
319
320#endif /* !UNIV_HOTBACKUP */
321}
322
323/** Parses a log record of compressing an index page without the data.
324 @return end of log record or NULL */
325UNIV_INLINE
326byte *page_zip_parse_compress_no_data(
327    byte *ptr,                /*!< in: buffer */
328    byte *end_ptr,            /*!< in: buffer end */
329    page_t *page,             /*!< in: uncompressed page */
330    page_zip_des_t *page_zip, /*!< out: compressed page */
331    dict_index_t *index)      /*!< in: index */
332{
333  ulint level;
334  if (end_ptr == ptr) {
335    return (nullptr);
336  }
337
338  level = mach_read_from_1(ptr);
339
340  /* If page compression fails then there must be something wrong
341  because a compress log record is logged only if the compression
342  was successful. Crash in this case. */
343
344  if (page && !page_zip_compress(page_zip, page, index, level, nullptr)) {
345    ut_error;
346  }
347
348  return (ptr + 1);
349}
350
351#ifndef UNIV_HOTBACKUP
352/** Reset the counters used for filling
353 INFORMATION_SCHEMA.innodb_cmp_per_index. */
354UNIV_INLINE
355void page_zip_reset_stat_per_index() {
356  mutex_enter(&page_zip_stat_per_index_mutex);
357
358  page_zip_stat_per_index.erase(page_zip_stat_per_index.begin(),
359                                page_zip_stat_per_index.end());
360
361  mutex_exit(&page_zip_stat_per_index_mutex);
362}
363#endif /* !UNIV_HOTBACKUP */
364
365/** Find the slot of the given record in the dense page directory.
366 @return dense directory slot, or NULL if record not found */
367UNIV_INLINE
368byte *page_zip_dir_find_low(byte *slot,   /*!< in: start of records */
369                            byte *end,    /*!< in: end of records */
370                            ulint offset) /*!< in: offset of user record */
371{
372  ut_ad(slot <= end);
373
374  for (; slot < end; slot += PAGE_ZIP_DIR_SLOT_SIZE) {
375    if ((mach_read_from_2(slot) & PAGE_ZIP_DIR_SLOT_MASK) == offset) {
376      return (slot);
377    }
378  }
379
380  return (nullptr);
381}
382
383/** Gets the size of the compressed page trailer (the dense page directory),
384 only including user records (excluding the free list).
385 @return length of dense page directory comprising existing records, in bytes */
386UNIV_INLINE
387ulint page_zip_dir_user_size(
388    const page_zip_des_t *page_zip) /*!< in: compressed page */
389{
390  ulint size = PAGE_ZIP_DIR_SLOT_SIZE * page_get_n_recs(page_zip->data);
391  ut_ad(size <= page_zip_dir_size(page_zip));
392  return (size);
393}
394
395/** Find the slot of the given free record in the dense page directory.
396 @return dense directory slot, or NULL if record not found */
397UNIV_INLINE
398byte *page_zip_dir_find_free(
399    page_zip_des_t *page_zip, /*!< in: compressed page */
400    ulint offset)             /*!< in: offset of user record */
401{
402  byte *end = page_zip->data + page_zip_get_size(page_zip);
403
404  ut_ad(page_zip_simple_validate(page_zip));
405
406  return (page_zip_dir_find_low(end - page_zip_dir_size(page_zip),
407                                end - page_zip_dir_user_size(page_zip),
408                                offset));
409}
410
411/** Gets an offset to the compressed page trailer (the dense page directory),
412 including deleted records (the free list).
413 @return offset of the dense page directory */
414UNIV_INLINE
415ulint page_zip_dir_start_offs(
416    const page_zip_des_t *page_zip, /*!< in: compressed page */
417    ulint n_dense)                  /*!< in: directory size */
418{
419  ut_ad(n_dense * PAGE_ZIP_DIR_SLOT_SIZE < page_zip_get_size(page_zip));
420
421  return (page_zip_get_size(page_zip) - n_dense * PAGE_ZIP_DIR_SLOT_SIZE);
422}
423
424#ifdef UNIV_MATERIALIZE
425#undef UNIV_INLINE
426#define UNIV_INLINE UNIV_INLINE_ORIGINAL
427#endif
428