1 /*****************************************************************************
2
3 Copyright (c) 2015, 2020, Oracle and/or its affiliates. All Rights Reserved.
4
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
24
25 *****************************************************************************/
26 #ifndef lob0lob_h
27 #define lob0lob_h
28
29 #include <my_dbug.h>
30 #include "btr0pcur.h"
31 #include "dict0mem.h"
32 #include "page0page.h"
33 #include "row0log.h"
34 #include "univ.i"
35
36 /* Uncomment the following line to generate debug data, useful to analyze
37 LOB issues. */
38 /* #define LOB_DEBUG */
39 /* #define ZLOB_DEBUG */
40
41 struct upd_t;
42 typedef std::map<page_no_t, buf_block_t *> BlockCache;
43
44 /**
45 @file
46 @brief Implements the large objects (LOB) module.
47
48 InnoDB supports large objects (LOB). Previously, the LOB was called as
49 externally stored fields. A large object contains a singly linked list of
50 database pages, aka LOB pages. A reference to the first LOB page is stored
51 along with the clustered index record. This reference is called the LOB
52 reference (lob::ref_t). A single clustered index record can have many LOB
53 references. Secondary indexes cannot have LOB references.
54
55 There are two types of LOB - compressed and uncompressed.
56
57 The main operations implemented for LOB are - INSERT, DELETE and FETCH. To
58 carry out these main operations the following classes are provided.
59
60 Inserter - for inserting uncompressed LOB data.
61 zInserter - for inserting compressed LOB data.
62 BaseInserter - a base class containing common state and functions useful for
63 both Inserter and zInserter. Inserter and zInserter derives
64 from this base class.
65 Reader - for reading uncompressed LOB data.
66 zReader - for reading compressed LOB data.
67 Deleter - for deleting both compressed and uncompressed LOB data.
68
69 For each main operation, the context information is identified separately.
70 They are as follows:
71
72 InsertContext - context information for doing insert of LOB. `
73 DeleteContext - context information for doing delete of LOB. `
74 ReadContext - context information for doing fetch of LOB. `
75
76 */
77
78 /** Provides the large objects (LOB) module. Previously, the LOB was called as
79 externally stored fields. */
80 namespace lob {
81
82 /** The maximum size possible for an LOB */
83 const ulint MAX_SIZE = UINT32_MAX;
84
85 /** The compressed LOB is stored as a collection of zlib streams. The
86 * uncompressed LOB is divided into chunks of size Z_CHUNK_SIZE and each of
87 * these chunks are compressed individually and stored as compressed LOB.
88 data. */
89 #define KB128 (128 * 1024)
90 #define Z_CHUNK_SIZE KB128
91
92 /** The reference in a field for which data is stored on a different page.
93 The reference is at the end of the 'locally' stored part of the field.
94 'Locally' means storage in the index record.
95 We store locally a long enough prefix of each column so that we can determine
96 the ordering parts of each index record without looking into the externally
97 stored part. */
98 /*-------------------------------------- @{ */
99
100 /** Space identifier where stored. */
101 const ulint BTR_EXTERN_SPACE_ID = 0;
102
103 /** page number where stored */
104 const ulint BTR_EXTERN_PAGE_NO = 4;
105
106 /** offset of BLOB header on that page */
107 const ulint BTR_EXTERN_OFFSET = 8;
108
109 /** Version number of LOB (LOB in new format)*/
110 const ulint BTR_EXTERN_VERSION = BTR_EXTERN_OFFSET;
111
112 /** 8 bytes containing the length of the externally stored part of the LOB.
113 The 2 highest bits are reserved to the flags below. */
114 const ulint BTR_EXTERN_LEN = 12;
115
116 /*-------------------------------------- @} */
117
118 /** The most significant bit of BTR_EXTERN_LEN (i.e., the most
119 significant bit of the byte at smallest address) is set to 1 if this
120 field does not 'own' the externally stored field; only the owner field
121 is allowed to free the field in purge! */
122 const ulint BTR_EXTERN_OWNER_FLAG = 128UL;
123
124 /** If the second most significant bit of BTR_EXTERN_LEN (i.e., the
125 second most significant bit of the byte at smallest address) is 1 then
126 it means that the externally stored field was inherited from an
127 earlier version of the row. In rollback we are not allowed to free an
128 inherited external field. */
129 const ulint BTR_EXTERN_INHERITED_FLAG = 64UL;
130
131 /** If the 3rd most significant bit of BTR_EXTERN_LEN is 1, then it
132 means that the externally stored field is currently being modified.
133 This is mainly used by the READ UNCOMMITTED transaction to avoid returning
134 inconsistent blob data. */
135 const ulint BTR_EXTERN_BEING_MODIFIED_FLAG = 32UL;
136
137 /** The structure of uncompressed LOB page header */
138
139 /** Offset within header of LOB length on this page. */
140 const ulint LOB_HDR_PART_LEN = 0;
141
142 /** Offset within header of next BLOB part page no.
143 FIL_NULL if none */
144 const ulint LOB_HDR_NEXT_PAGE_NO = 4;
145
146 /** Size of an uncompressed LOB page header, in bytes */
147 const ulint LOB_HDR_SIZE = 8;
148
149 /** Start of the data on an LOB page */
150 const uint ZLOB_PAGE_DATA = FIL_PAGE_DATA;
151
152 /** In memory representation of the LOB reference. */
153 struct ref_mem_t {
154 /** Space Identifier of the clustered index. */
155 space_id_t m_space_id;
156
157 /** Page number of first LOB page. */
158 page_no_t m_page_no;
159
160 /** Offset within m_page_no where LOB begins. */
161 ulint m_offset;
162
163 /** Length of LOB */
164 ulint m_length;
165
166 /** Whether the LOB is null. */
167 bool m_null;
168
169 /** Whether the clustered index record owns this LOB. */
170 bool m_owner;
171
172 /** Whether the clustered index record inherited this LOB from
173 another clustered index record. */
174 bool m_inherit;
175
176 /** Whether the LOB is partially updated. */
177 bool m_partial;
178
179 /** Whether the blob is being modified. */
180 bool m_being_modified;
181
182 /** Check if the LOB has already been purged.
183 @return true if LOB has been purged, false otherwise. */
is_purgedref_mem_t184 bool is_purged() const {
185 return ((m_page_no == FIL_NULL) && (m_length == 0));
186 }
187 };
188
189 extern const byte field_ref_almost_zero[FIELD_REF_SIZE];
190
191 /** The struct 'lob::ref_t' represents an external field reference. The
192 reference in a field for which data is stored on a different page. The
193 reference is at the end of the 'locally' stored part of the field. 'Locally'
194 means storage in the index record. We store locally a long enough prefix of
195 each column so that we can determine the ordering parts of each index record
196 without looking into the externally stored part. */
197 struct ref_t {
198 private:
199 /** If the LOB size is equal to or above this limit (in physical page
200 size terms), then the LOB is big enough to be partially updated. Only
201 in this case LOB index needs to be built. */
202 static const ulint LOB_BIG_THRESHOLD_SIZE = 2;
203
204 public:
205 /** If the total number of bytes modified in an LOB, in an update
206 operation, is less than or equal to this threshold LOB_SMALL_CHANGE_THRESHOLD,
207 then it is considered as a small change. For small changes to LOB,
208 the changes are undo logged like any other update operation. */
209 static const ulint LOB_SMALL_CHANGE_THRESHOLD = 100;
210
211 /** Constructor.
212 @param[in] ptr Pointer to the external field reference. */
ref_tref_t213 explicit ref_t(byte *ptr) : m_ref(ptr) {}
214
215 /** For compressed LOB, if the length is less than or equal to Z_CHUNK_SIZE
216 then use the older single z stream format to store the LOB. */
use_single_z_streamref_t217 bool use_single_z_stream() const { return (length() <= Z_CHUNK_SIZE); }
218
219 /** For compressed LOB, if the length is less than or equal to Z_CHUNK_SIZE
220 then use the older single z stream format to store the LOB. */
use_single_z_streamref_t221 static bool use_single_z_stream(ulint len) { return (len <= Z_CHUNK_SIZE); }
222
223 /** Check if this LOB is big enough to do partial update.
224 @param[in] page_size the page size
225 @param[in] lob_length the size of BLOB in bytes.
226 @return true if LOB is big enough, false otherwise. */
is_bigref_t227 static bool is_big(const page_size_t &page_size, const ulint lob_length) {
228 /* Disable a performance optimization */
229 return (true);
230
231 const ulint limit = page_size.physical() * LOB_BIG_THRESHOLD_SIZE;
232 return (lob_length >= limit);
233 }
234
235 /** Check if this LOB is big enough to do partial update.
236 @param[in] page_size the page size
237 @return true if LOB is big enough, false otherwise. */
is_bigref_t238 bool is_big(const page_size_t &page_size) const {
239 /* Disable a performance optimization */
240 return (true);
241
242 const ulint limit = page_size.physical() * LOB_BIG_THRESHOLD_SIZE;
243 const ulint lob_length = length();
244 return (lob_length >= limit);
245 }
246
247 /** Parse the LOB reference object and copy data into the given
248 ref_mem_t object.
249 @param[out] obj LOB reference memory object. */
parseref_t250 void parse(ref_mem_t &obj) const {
251 obj.m_space_id = space_id();
252 obj.m_page_no = page_no();
253 obj.m_offset = offset();
254 obj.m_length = length();
255 obj.m_null = is_null();
256 obj.m_owner = is_owner();
257 obj.m_inherit = is_inherited();
258 obj.m_being_modified = is_being_modified();
259 }
260
261 /** Copy the LOB reference into the given memory location.
262 @param[out] field_ref write LOB reference in this
263 location.*/
copyref_t264 void copy(byte *field_ref) const { memcpy(field_ref, m_ref, SIZE); }
265
266 /** Check whether the stored external field reference is equal to the
267 given field reference.
268 @param[in] ptr supplied external field reference. */
is_equalref_t269 bool is_equal(const byte *ptr) const { return (m_ref == ptr); }
270
271 /** Set the external field reference to the given memory location.
272 @param[in] ptr the new external field reference. */
set_refref_t273 void set_ref(byte *ptr) { m_ref = ptr; }
274
275 /** Set the external field reference to null.
276 @param[in,out] mtr the mini-transaction. */
set_nullref_t277 void set_null(mtr_t *mtr) {
278 mlog_write_string(m_ref, field_ref_zero, FIELD_REF_SIZE, mtr);
279 }
280
281 /** Check if the field reference is made of zeroes except the being_modified
282 bit.
283 @return true if field reference is made of zeroes, false otherwise. */
is_null_relaxedref_t284 bool is_null_relaxed() const {
285 return (is_null() || memcmp(field_ref_almost_zero, m_ref, SIZE) == 0);
286 }
287
288 /** Check if the field reference is made of zeroes.
289 @return true if field reference is made of zeroes, false otherwise. */
is_nullref_t290 bool is_null() const { return (memcmp(field_ref_zero, m_ref, SIZE) == 0); }
291
292 #ifdef UNIV_DEBUG
293 /** Check if the LOB reference is null (all zeroes) except the "is being
294 modified" bit.
295 @param[in] ref the LOB reference.
296 @return true if the LOB reference is null (all zeros) except the "is being
297 modified" bit, false otherwise. */
is_null_relaxedref_t298 static bool is_null_relaxed(const byte *ref) {
299 return (is_null(ref) || memcmp(field_ref_almost_zero, ref, SIZE) == 0);
300 }
301
302 /** Check if the LOB reference is null (all zeroes).
303 @param[in] ref the LOB reference.
304 @return true if the LOB reference is null (all zeros), false otherwise. */
is_nullref_t305 static bool is_null(const byte *ref) {
306 return (memcmp(field_ref_zero, ref, SIZE) == 0);
307 }
308 #endif /* UNIV_DEBUG */
309
310 /** Set the ownership flag in the blob reference.
311 @param[in] owner whether to own or disown. if owner, unset
312 the owner flag.
313 @param[in] mtr the mini-transaction or NULL.*/
set_ownerref_t314 void set_owner(bool owner, mtr_t *mtr) {
315 ulint byte_val = mach_read_from_1(m_ref + BTR_EXTERN_LEN);
316
317 if (owner) {
318 /* owns the blob */
319 byte_val &= ~BTR_EXTERN_OWNER_FLAG;
320 } else {
321 byte_val |= BTR_EXTERN_OWNER_FLAG;
322 }
323
324 mlog_write_ulint(m_ref + BTR_EXTERN_LEN, byte_val, MLOG_1BYTE, mtr);
325 }
326
327 /** Set the being_modified flag in the field reference.
328 @param[in,out] ref the LOB reference
329 @param[in] modifying true, if blob is being modified.
330 @param[in] mtr the mini-transaction context.*/
set_being_modifiedref_t331 static void set_being_modified(byte *ref, bool modifying, mtr_t *mtr) {
332 ulint byte_val = mach_read_from_1(ref + BTR_EXTERN_LEN);
333
334 if (modifying) {
335 byte_val |= BTR_EXTERN_BEING_MODIFIED_FLAG;
336 } else {
337 byte_val &= ~BTR_EXTERN_BEING_MODIFIED_FLAG;
338 }
339
340 mlog_write_ulint(ref + BTR_EXTERN_LEN, byte_val, MLOG_1BYTE, mtr);
341 }
342
343 /** Set the being_modified flag in the field reference.
344 @param[in] modifying true, if blob is being modified.
345 @param[in] mtr the mini-transaction context.*/
set_being_modifiedref_t346 void set_being_modified(bool modifying, mtr_t *mtr) {
347 set_being_modified(m_ref, modifying, mtr);
348 }
349
350 /** Check if the current blob is being modified
351 @param[in] field_ref blob field reference
352 @return true if it is being modified, false otherwise. */
is_being_modifiedref_t353 bool static is_being_modified(const byte *field_ref) {
354 const ulint byte_val = mach_read_from_1(field_ref + BTR_EXTERN_LEN);
355 return (byte_val & BTR_EXTERN_BEING_MODIFIED_FLAG);
356 }
357
358 /** Check if the current blob is being modified
359 @return true if it is being modified, false otherwise. */
is_being_modifiedref_t360 bool is_being_modified() const { return (is_being_modified(m_ref)); }
361
362 /** Set the inherited flag in the field reference.
363 @param[in] inherited true, if inherited.
364 @param[in] mtr the mini transaction context.*/
set_inheritedref_t365 void set_inherited(bool inherited, mtr_t *mtr) {
366 ulint byte_val = mach_read_from_1(m_ref + BTR_EXTERN_LEN);
367
368 if (inherited) {
369 byte_val |= BTR_EXTERN_INHERITED_FLAG;
370 } else {
371 byte_val &= ~BTR_EXTERN_INHERITED_FLAG;
372 }
373
374 mlog_write_ulint(m_ref + BTR_EXTERN_LEN, byte_val, MLOG_1BYTE, mtr);
375 }
376
377 /** Check if the current row is the owner of the blob.
378 @return true if owner, false otherwise. */
is_ownerref_t379 bool is_owner() const {
380 ulint byte_val = mach_read_from_1(m_ref + BTR_EXTERN_LEN);
381 return (!(byte_val & BTR_EXTERN_OWNER_FLAG));
382 }
383
384 /** Check if the current row inherited the blob from parent row.
385 @return true if inherited, false otherwise. */
is_inheritedref_t386 bool is_inherited() const {
387 const ulint byte_val = mach_read_from_1(m_ref + BTR_EXTERN_LEN);
388 return (byte_val & BTR_EXTERN_INHERITED_FLAG);
389 }
390
391 #ifdef UNIV_DEBUG
392 /** Read the space id from the given blob reference.
393 @param[in] ref the blob reference.
394 @return the space id */
space_idref_t395 static space_id_t space_id(const byte *ref) {
396 return (mach_read_from_4(ref));
397 }
398
399 /** Read the page no from the blob reference.
400 @return the page no */
page_noref_t401 static page_no_t page_no(const byte *ref) {
402 return (mach_read_from_4(ref + BTR_EXTERN_PAGE_NO));
403 }
404 #endif /* UNIV_DEBUG */
405
406 /** Read the space id from the blob reference.
407 @return the space id */
space_idref_t408 space_id_t space_id() const { return (mach_read_from_4(m_ref)); }
409
410 /** Read the page number from the blob reference.
411 @return the page number */
page_noref_t412 page_no_t page_no() const {
413 return (mach_read_from_4(m_ref + BTR_EXTERN_PAGE_NO));
414 }
415
416 /** Read the offset of blob header from the blob reference.
417 @return the offset of the blob header */
offsetref_t418 ulint offset() const { return (mach_read_from_4(m_ref + BTR_EXTERN_OFFSET)); }
419
420 /** Read the LOB version from the blob reference.
421 @return the LOB version number. */
versionref_t422 uint32_t version() const {
423 return (mach_read_from_4(m_ref + BTR_EXTERN_VERSION));
424 }
425
426 /** Read the length from the blob reference.
427 @return length of the blob */
lengthref_t428 ulint length() const {
429 return (mach_read_from_4(m_ref + BTR_EXTERN_LEN + 4));
430 }
431
432 /** Update the information stored in the external field reference.
433 @param[in] space_id the space identifier.
434 @param[in] page_no the page number.
435 @param[in] offset the offset within the page_no
436 @param[in] mtr the mini trx or NULL. */
updateref_t437 void update(space_id_t space_id, ulint page_no, ulint offset, mtr_t *mtr) {
438 set_space_id(space_id, mtr);
439 set_page_no(page_no, mtr);
440 set_offset(offset, mtr);
441 }
442
443 /** Set the space_id in the external field reference.
444 @param[in] space_id the space identifier.
445 @param[in] mtr mini-trx or NULL. */
set_space_idref_t446 void set_space_id(const space_id_t space_id, mtr_t *mtr) {
447 mlog_write_ulint(m_ref + BTR_EXTERN_SPACE_ID, space_id, MLOG_4BYTES, mtr);
448 }
449
450 /** Set the page number in the external field reference.
451 @param[in] page_no the page number.
452 @param[in] mtr mini-trx or NULL. */
set_page_noref_t453 void set_page_no(const ulint page_no, mtr_t *mtr) {
454 mlog_write_ulint(m_ref + BTR_EXTERN_PAGE_NO, page_no, MLOG_4BYTES, mtr);
455 }
456
457 /** Set the offset information in the external field reference.
458 @param[in] offset the offset.
459 @param[in] mtr mini-trx or NULL. */
set_offsetref_t460 void set_offset(const ulint offset, mtr_t *mtr) {
461 mlog_write_ulint(m_ref + BTR_EXTERN_OFFSET, offset, MLOG_4BYTES, mtr);
462 }
463
464 /** Set the length of blob in the external field reference.
465 @param[in] len the blob length .
466 @param[in] mtr mini-trx or NULL. */
set_lengthref_t467 void set_length(const ulint len, mtr_t *mtr) {
468 ut_ad(len <= MAX_SIZE);
469 mlog_write_ulint(m_ref + BTR_EXTERN_LEN + 4, len, MLOG_4BYTES, mtr);
470 }
471
472 /** Get the start of a page containing this blob reference.
473 @return start of the page */
page_alignref_t474 page_t *page_align() const { return (::page_align(m_ref)); }
475
476 #ifdef UNIV_DEBUG
477 /** Check if the given mtr has necessary latches to update this LOB
478 reference.
479 @param[in] mtr the mini transaction that needs to
480 be checked.
481 @return true if valid, false otherwise. */
validateref_t482 bool validate(mtr_t *mtr) {
483 ut_ad(m_ref != nullptr);
484 ut_ad(mtr != nullptr);
485
486 if (mtr->get_log_mode() == MTR_LOG_NO_REDO) {
487 return (true);
488 }
489
490 buf_block_t *block = mtr->memo_contains_page_flagged(
491 m_ref, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX);
492 ut_ad(block != nullptr);
493 return (true);
494 }
495
496 /** Check if the space_id in the LOB reference is equal to the
497 space_id of the index to which it belongs.
498 @param[in] index the index to which LOB belongs.
499 @return true if space is valid in LOB reference, false otherwise. */
500 bool check_space_id(dict_index_t *index) const;
501 #endif /* UNIV_DEBUG */
502
503 /** Check if the LOB can be partially updated. This is done by loading
504 the first page of LOB and looking at the flags.
505 @param[in] index the index to which LOB belongs.
506 @return true if LOB is partially updatable, false otherwise.*/
507 bool is_lob_partially_updatable(const dict_index_t *index) const;
508
509 /** Load the first page of the LOB and mark it as not partially
510 updatable anymore.
511 @param[in] trx the current transaction
512 @param[in] mtr the mini transaction context.
513 @param[in] index the index dictionary object.
514 @param[in] page_size the page size information. */
515 void mark_not_partially_updatable(trx_t *trx, mtr_t *mtr, dict_index_t *index,
516 const page_size_t &page_size);
517
518 /** Load the first page of LOB and read its page type.
519 @param[in] index the index object.
520 @param[in] page_size the page size of LOB.
521 @param[out] is_partially_updatable is the LOB partially updatable.
522 @return the page type of first page of LOB.*/
523 ulint get_lob_page_info(const dict_index_t *index,
524 const page_size_t &page_size,
525 bool &is_partially_updatable) const;
526
527 /** Print this LOB reference into the given output stream.
528 @param[in] out the output stream.
529 @return the output stream. */
530 std::ostream &print(std::ostream &out) const;
531
532 /** The size of an LOB reference object (in bytes) */
533 static const uint SIZE = BTR_EXTERN_FIELD_REF_SIZE;
534
535 private:
536 /** Pointing to a memory of size BTR_EXTERN_FIELD_REF_SIZE */
537 byte *m_ref;
538 };
539
540 /** Overload the global output stream operator to easily print the
541 lob::ref_t object into the output stream.
542 @param[in,out] out the output stream.
543 @param[in] obj the lob::ref_t object to be printed
544 @return the output stream. */
545 inline std::ostream &operator<<(std::ostream &out, const ref_t &obj) {
546 return (obj.print(out));
547 }
548
549 /** LOB operation code for btr_store_big_rec_extern_fields(). */
550 enum opcode {
551
552 /** Store off-page columns for a freshly inserted record */
553 OPCODE_INSERT = 0,
554
555 /** Store off-page columns for an insert by update */
556 OPCODE_INSERT_UPDATE,
557
558 /** Store off-page columns for an update */
559 OPCODE_UPDATE,
560
561 /** Store off-page columns for a freshly inserted record by bulk */
562 OPCODE_INSERT_BULK,
563
564 /** The operation code is unknown or not important. */
565 OPCODE_UNKNOWN
566 };
567
568 /** Stores the fields in big_rec_vec to the tablespace and puts pointers to
569 them in rec. The extern flags in rec will have to be set beforehand. The
570 fields are stored on pages allocated from leaf node file segment of the index
571 tree.
572
573 TODO: If the allocation extends the tablespace, it will not be redo logged, in
574 any mini-transaction. Tablespace extension should be redo-logged, so that
575 recovery will not fail when the big_rec was written to the extended portion of
576 the file, in case the file was somehow truncated in the crash.
577
578 @param[in] trx current transaction
579 @param[in,out] pcur a persistent cursor. if btr_mtr is restarted,
580 then this can be repositioned.
581 @param[in] upd update vector
582 @param[in,out] offsets rec_get_offsets() on pcur. the "external in
583 offsets will correctly correspond storage"
584 flagsin offsets will correctly correspond to
585 rec when this function returns
586 @param[in] big_rec_vec vector containing fields to be stored
587 externally
588 @param[in,out] btr_mtr mtr containing the latches to the clustered
589 index. can be committed and restarted.
590 @param[in] op operation code
591 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
592 dberr_t btr_store_big_rec_extern_fields(trx_t *trx, btr_pcur_t *pcur,
593 const upd_t *upd, ulint *offsets,
594 const big_rec_t *big_rec_vec,
595 mtr_t *btr_mtr, opcode op)
596 MY_ATTRIBUTE((warn_unused_result));
597
598 /** Copies an externally stored field of a record to mem heap.
599 @param[in] trx the current transaction.
600 @param[in] index the clustered index
601 @param[in] rec record in a clustered index; must be
602 protected by a lock or a page latch
603 @param[in] offsets array returned by rec_get_offsets()
604 @param[in] page_size BLOB page size
605 @param[in] no field number
606 @param[out] len length of the field
607 @param[out] lob_version version of lob that has been copied */
608 #ifdef UNIV_DEBUG
609 /**
610 @param[in] is_sdi true for SDI Indexes */
611 #endif /* UNIV_DEBUG */
612 /**
613 @param[in,out] heap mem heap
614 @return the field copied to heap, or NULL if the field is incomplete */
615 byte *btr_rec_copy_externally_stored_field_func(
616 trx_t *trx, const dict_index_t *index, const rec_t *rec,
617 const ulint *offsets, const page_size_t &page_size, ulint no, ulint *len,
618 size_t *lob_version,
619 #ifdef UNIV_DEBUG
620 bool is_sdi,
621 #endif /* UNIV_DEBUG */
622 mem_heap_t *heap);
623
624 #ifdef UNIV_DEBUG
625 #define btr_rec_copy_externally_stored_field( \
626 trx, index, rec, offsets, page_size, no, len, ver, is_sdi, heap) \
627 btr_rec_copy_externally_stored_field_func( \
628 trx, index, rec, offsets, page_size, no, len, ver, is_sdi, heap)
629 #else /* UNIV_DEBUG */
630 #define btr_rec_copy_externally_stored_field( \
631 trx, index, rec, offsets, page_size, no, len, ver, is_sdi, heap) \
632 btr_rec_copy_externally_stored_field_func(trx, index, rec, offsets, \
633 page_size, no, len, ver, heap)
634 #endif /* UNIV_DEBUG */
635
636 /** Gets the offset of the pointer to the externally stored part of a field.
637 @param[in] offsets array returned by rec_get_offsets()
638 @param[in] n index of the external field
639 @return offset of the pointer to the externally stored part */
640 ulint btr_rec_get_field_ref_offs(const ulint *offsets, ulint n);
641
642 /** Gets a pointer to the externally stored part of a field.
643 @param rec record
644 @param offsets rec_get_offsets(rec)
645 @param n index of the externally stored field
646 @return pointer to the externally stored part */
647 #define btr_rec_get_field_ref(rec, offsets, n) \
648 ((rec) + lob::btr_rec_get_field_ref_offs(offsets, n))
649
650 /** Deallocate a buffer block that was reserved for a BLOB part.
651 @param[in] index index
652 @param[in] block buffer block
653 @param[in] all TRUE=remove also the compressed page
654 if there is one
655 @param[in] mtr mini-transaction to commit */
656 void blob_free(dict_index_t *index, buf_block_t *block, bool all, mtr_t *mtr);
657
658 /** The B-tree context under which the LOB operation is done. */
659 class BtrContext {
660 public:
661 /** Default Constructor */
BtrContext()662 BtrContext()
663 : m_mtr(nullptr),
664 m_pcur(nullptr),
665 m_index(nullptr),
666 m_rec(nullptr),
667 m_offsets(nullptr),
668 m_block(nullptr),
669 m_op(OPCODE_UNKNOWN),
670 m_btr_page_no(FIL_NULL) {}
671
672 /** Constructor **/
BtrContext(mtr_t * mtr,btr_pcur_t * pcur,dict_index_t * index,rec_t * rec,ulint * offsets,buf_block_t * block)673 BtrContext(mtr_t *mtr, btr_pcur_t *pcur, dict_index_t *index, rec_t *rec,
674 ulint *offsets, buf_block_t *block)
675 : m_mtr(mtr),
676 m_pcur(pcur),
677 m_index(index),
678 m_rec(rec),
679 m_offsets(offsets),
680 m_block(block),
681 m_op(OPCODE_UNKNOWN),
682 m_btr_page_no(FIL_NULL) {
683 ut_ad(m_pcur == nullptr || rec_offs_validate());
684 ut_ad(m_block == nullptr || m_rec == nullptr ||
685 m_block->frame == page_align(m_rec));
686 ut_ad(m_pcur == nullptr || m_rec == btr_pcur_get_rec(m_pcur));
687 }
688
689 /** Constructor **/
BtrContext(mtr_t * mtr,btr_pcur_t * pcur,dict_index_t * index,rec_t * rec,ulint * offsets,buf_block_t * block,opcode op)690 BtrContext(mtr_t *mtr, btr_pcur_t *pcur, dict_index_t *index, rec_t *rec,
691 ulint *offsets, buf_block_t *block, opcode op)
692 : m_mtr(mtr),
693 m_pcur(pcur),
694 m_index(index),
695 m_rec(rec),
696 m_offsets(offsets),
697 m_block(block),
698 m_op(op),
699 m_btr_page_no(FIL_NULL) {
700 ut_ad(m_pcur == nullptr || rec_offs_validate());
701 ut_ad(m_block->frame == page_align(m_rec));
702 ut_ad(m_pcur == nullptr || m_rec == btr_pcur_get_rec(m_pcur));
703 }
704
705 /** Copy Constructor **/
BtrContext(const BtrContext & other)706 BtrContext(const BtrContext &other)
707 : m_mtr(other.m_mtr),
708 m_pcur(other.m_pcur),
709 m_index(other.m_index),
710 m_rec(other.m_rec),
711 m_offsets(other.m_offsets),
712 m_block(other.m_block),
713 m_op(other.m_op),
714 m_btr_page_no(other.m_btr_page_no) {}
715
716 /** Marks non-updated off-page fields as disowned by this record.
717 The ownership must be transferred to the updated record which is
718 inserted elsewhere in the index tree. In purge only the owner of
719 externally stored field is allowed to free the field.
720 @param[in] update update vector. */
721 void disown_inherited_fields(const upd_t *update);
722
723 /** Sets the ownership bit of an externally stored field in a record.
724 @param[in] i field number
725 @param[in] val value to set */
set_ownership_of_extern_field(ulint i,ibool val)726 void set_ownership_of_extern_field(ulint i, ibool val) {
727 byte *data;
728 ulint local_len;
729
730 data =
731 const_cast<byte *>(rec_get_nth_field(m_rec, m_offsets, i, &local_len));
732 ut_ad(rec_offs_nth_extern(m_offsets, i));
733 ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
734
735 local_len -= BTR_EXTERN_FIELD_REF_SIZE;
736
737 ref_t ref(data + local_len);
738
739 ut_a(val || ref.is_owner());
740
741 page_zip_des_t *page_zip = get_page_zip();
742
743 if (page_zip) {
744 ref.set_owner(val, nullptr);
745 page_zip_write_blob_ptr(page_zip, m_rec, m_index, m_offsets, i, m_mtr);
746 } else {
747 ref.set_owner(val, m_mtr);
748 }
749 }
750
751 /** Marks all extern fields in a record as owned by the record.
752 This function should be called if the delete mark of a record is
753 removed: a not delete marked record always owns all its extern
754 fields.*/
unmark_extern_fields()755 void unmark_extern_fields() {
756 ut_ad(!rec_offs_comp(m_offsets) || !rec_get_node_ptr_flag(m_rec));
757
758 ulint n = rec_offs_n_fields(m_offsets);
759
760 if (!rec_offs_any_extern(m_offsets)) {
761 return;
762 }
763
764 for (ulint i = 0; i < n; i++) {
765 if (rec_offs_nth_extern(m_offsets, i)) {
766 set_ownership_of_extern_field(i, TRUE);
767 }
768 }
769 }
770
771 /** Frees the externally stored fields for a record.
772 @param[in] trx_id transaction identifier whose LOB is
773 being freed.
774 @param[in] undo_no undo number within a transaction whose
775 LOB is being freed.
776 @param[in] rollback performing rollback?
777 @param[in] rec_type undo record type.*/
778 void free_externally_stored_fields(trx_id_t trx_id, undo_no_t undo_no,
779 bool rollback, ulint rec_type);
780
781 /** Frees the externally stored fields for a record, if the field
782 is mentioned in the update vector.
783 @param[in] trx_id the transaction identifier.
784 @param[in] undo_no undo number within a transaction whose
785 LOB is being freed.
786 @param[in] update update vector
787 @param[in] rollback performing rollback? */
788 void free_updated_extern_fields(trx_id_t trx_id, undo_no_t undo_no,
789 const upd_t *update, bool rollback);
790
791 /** Gets the compressed page descriptor
792 @return the compressed page descriptor. */
get_page_zip()793 page_zip_des_t *get_page_zip() const {
794 return (buf_block_get_page_zip(m_block));
795 }
796
797 /** Get the page number of clustered index block.
798 @return the page number. */
get_page_no()799 page_no_t get_page_no() const {
800 return (page_get_page_no(buf_block_get_frame(m_block)));
801 }
802
803 /** Get the record offset within page of the clustered index record.
804 @return the record offset. */
get_rec_offset()805 ulint get_rec_offset() const { return (page_offset(m_rec)); }
806
807 /** Check if there is a need to recalculate the context information.
808 @return true if there is a need to recalculate, false otherwise. */
need_recalc()809 bool need_recalc() const {
810 return ((m_pcur != nullptr) && (m_rec != btr_pcur_get_rec(m_pcur)));
811 }
812
813 /** Get the clustered index record pointer.
814 @return clustered index record pointer. */
rec()815 rec_t *rec() const {
816 ut_ad(m_pcur == nullptr || m_rec == btr_pcur_get_rec(m_pcur));
817 return (m_rec);
818 }
819
820 /** Get the LOB reference for the given field number.
821 @param[in] field_no field number.
822 @return LOB reference (aka external field reference).*/
get_field_ref(ulint field_no)823 byte *get_field_ref(ulint field_no) const {
824 return (btr_rec_get_field_ref(m_rec, get_offsets(), field_no));
825 }
826
827 #ifdef UNIV_DEBUG
828 /** Validate the current BLOB context object. The BLOB context object
829 is valid if the necessary latches are being held by the
830 mini-transaction of the B-tree (btr mtr). Does not return if the
831 validation fails.
832 @return true if valid */
validate()833 bool validate() const {
834 rec_offs_make_valid(rec(), index(), m_offsets);
835
836 ut_ad(m_mtr->memo_contains_page_flagged(
837 m_rec, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX) ||
838 table()->is_intrinsic());
839
840 ut_ad(mtr_memo_contains_flagged(m_mtr, dict_index_get_lock(index()),
841 MTR_MEMO_SX_LOCK | MTR_MEMO_X_LOCK) ||
842 table()->is_intrinsic());
843
844 return (true);
845 }
846
847 /** Check to see if all pointers to externally stored columns in
848 the record must be valid.
849 @return true if all blob references are valid.
850 @return will not return if any blob reference is invalid. */
are_all_blobrefs_valid()851 bool are_all_blobrefs_valid() const {
852 for (ulint i = 0; i < rec_offs_n_fields(m_offsets); i++) {
853 if (!rec_offs_nth_extern(m_offsets, i)) {
854 continue;
855 }
856
857 byte *field_ref = btr_rec_get_field_ref(rec(), m_offsets, i);
858
859 ref_t blobref(field_ref);
860
861 /* The pointer must not be zero if the operation
862 succeeded. */
863 ut_a(!blobref.is_null());
864
865 /* The column must not be disowned by this record. */
866 ut_a(blobref.is_owner());
867 }
868
869 return (true);
870 }
871 #endif /* UNIV_DEBUG */
872
873 /** Determine whether current operation is a bulk insert operation.
874 @return true, if bulk insert operation, false otherwise. */
is_bulk()875 bool is_bulk() const { return (m_op == OPCODE_INSERT_BULK); }
876
877 /** Get the beginning of the B-tree clustered index page frame
878 that contains the current clustered index record (m_rec).
879 @return the page frame containing the clust rec. */
rec_frame()880 const page_t *rec_frame() const {
881 ut_ad(m_block->frame == page_align(m_rec));
882 return (m_block->frame);
883 }
884
885 /** Commit the mini transaction that is holding the latches
886 of the clustered index record block. */
commit_btr_mtr()887 void commit_btr_mtr() { m_mtr->commit(); }
888
889 /** Start the mini transaction that will be holding the latches
890 of the clustered index record block. */
start_btr_mtr()891 void start_btr_mtr() {
892 mtr_log_t log_mode = m_mtr->get_log_mode();
893 m_mtr->start();
894 m_mtr->set_log_mode(log_mode);
895 }
896
897 /** Get the page number of clustered index record.
898 @return page number of clustered index record. */
get_btr_page_no()899 page_no_t get_btr_page_no() const {
900 return (page_get_page_no(buf_block_get_frame(m_block)));
901 }
902
903 #ifndef UNIV_HOTBACKUP
904
905 /** Increment the buffer fix count of the clustered index record
906 block. */
rec_block_fix()907 void rec_block_fix() {
908 m_rec_offset = page_offset(m_rec);
909 m_btr_page_no = page_get_page_no(buf_block_get_frame(m_block));
910 buf_block_buf_fix_inc(m_block, __FILE__, __LINE__);
911 }
912
913 /** Decrement the buffer fix count of the clustered index record
914 block. */
rec_block_unfix()915 void rec_block_unfix() {
916 space_id_t space_id = space();
917 page_id_t page_id(space_id, m_btr_page_no);
918 page_size_t page_size(dict_table_page_size(table()));
919 page_cur_t *page_cur = &m_pcur->m_btr_cur.page_cur;
920
921 mtr_x_lock(dict_index_get_lock(index()), m_mtr);
922
923 page_cur->block =
924 btr_block_get(page_id, page_size, RW_X_LATCH, index(), m_mtr);
925
926 page_cur->rec = buf_block_get_frame(page_cur->block) + m_rec_offset;
927
928 buf_block_buf_fix_dec(page_cur->block);
929
930 recalc();
931 }
932 #endif /* !UNIV_HOTBACKUP */
933
934 /** Restore the position of the persistent cursor. */
restore_position()935 void restore_position() {
936 ut_ad(m_pcur->m_rel_pos == BTR_PCUR_ON);
937 bool ret = btr_pcur_restore_position(m_pcur->m_latch_mode, m_pcur, m_mtr);
938
939 ut_a(ret);
940
941 recalc();
942 }
943
944 /** Get the index object.
945 @return index object */
index()946 dict_index_t *index() const { return (m_index); }
947
948 /** Get the table object.
949 @return table object or NULL. */
table()950 dict_table_t *table() const {
951 dict_table_t *result = nullptr;
952
953 if (m_pcur != nullptr && m_pcur->index() != nullptr) {
954 result = m_pcur->index()->table;
955 }
956
957 return (result);
958 }
959
960 /** Get the space id.
961 @return space id. */
space()962 space_id_t space() const { return (index()->space); }
963
964 /** Obtain the page size of the underlying table.
965 @return page size of the underlying table. */
page_size()966 const page_size_t page_size() const {
967 return (dict_table_page_size(table()));
968 }
969
970 /** Determine the extent size (in pages) for the underlying table
971 @return extent size in pages */
pages_in_extent()972 page_no_t pages_in_extent() const {
973 return (dict_table_extent_size(table()));
974 }
975
976 /** Check if there is enough space in the redo log file. The btr
977 mini transaction will be restarted. */
check_redolog()978 void check_redolog() {
979 is_bulk() ? check_redolog_bulk() : check_redolog_normal();
980 }
981
982 /** The btr mini transaction will be restarted. */
restart_mtr()983 void restart_mtr() { is_bulk() ? restart_mtr_bulk() : restart_mtr_normal(); }
984
985 /** Mark the nth field as externally stored.
986 @param[in] field_no the field number. */
make_nth_extern(ulint field_no)987 void make_nth_extern(ulint field_no) {
988 rec_offs_make_nth_extern(m_offsets, field_no);
989 }
990
991 /** Get the log mode of the btr mtr.
992 @return the log mode. */
get_log_mode()993 mtr_log_t get_log_mode() { return (m_mtr->get_log_mode()); }
994
995 /** Get flush observer
996 @return flush observer */
get_flush_observer()997 FlushObserver *get_flush_observer() const {
998 return (m_mtr->get_flush_observer());
999 }
1000
1001 /** Get the record offsets array.
1002 @return the record offsets array. */
get_offsets()1003 ulint *get_offsets() const { return (m_offsets); }
1004
1005 /** Validate the record offsets array.
1006 @return true if validation succeeds, false otherwise. */
rec_offs_validate()1007 bool rec_offs_validate() const {
1008 if (m_rec != nullptr) {
1009 ut_ad(::rec_offs_validate(m_rec, m_index, m_offsets));
1010 }
1011 return (true);
1012 }
1013
1014 /** Get the associated mini-transaction.
1015 @return the mini transaction. */
get_mtr()1016 mtr_t *get_mtr() { return (m_mtr); }
1017
1018 /** Get the pointer to the clustered record block.
1019 @return pointer to the clustered rec block. */
block()1020 buf_block_t *block() const { return (m_block); }
1021
1022 /** Save the position of the persistent cursor. */
store_position()1023 void store_position() { btr_pcur_store_position(m_pcur, m_mtr); }
1024
1025 /** Check if there is enough space in log file. Commit and re-start the
1026 mini transaction. */
1027 void check_redolog_normal();
1028
1029 /** When bulk load is being done, check if there is enough space in redo
1030 log file. */
1031 void check_redolog_bulk();
1032
1033 /** Commit and re-start the mini transaction. */
1034 void restart_mtr_normal();
1035
1036 /** When bulk load is being done, Commit and re-start the mini transaction. */
1037 void restart_mtr_bulk();
1038
1039 /** Recalculate some of the members after restoring the persistent
1040 cursor. */
recalc()1041 void recalc() {
1042 m_block = btr_pcur_get_block(m_pcur);
1043 m_rec = btr_pcur_get_rec(m_pcur);
1044 m_btr_page_no = page_get_page_no(buf_block_get_frame(m_block));
1045 m_rec_offset = page_offset(m_rec);
1046
1047 rec_offs_make_valid(rec(), index(), m_offsets);
1048 }
1049
1050 /** Write a blob reference of a field into a clustered index record
1051 in a compressed leaf page. The information must already have been
1052 updated on the uncompressed page.
1053 @param[in] field_no the blob field number
1054 @param[in] mtr the mini transaction to update
1055 blob page. */
zblob_write_blobref(ulint field_no,mtr_t * mtr)1056 void zblob_write_blobref(ulint field_no, mtr_t *mtr) {
1057 page_zip_write_blob_ptr(get_page_zip(), m_rec, index(), m_offsets, field_no,
1058 mtr);
1059 }
1060
1061 mtr_t *m_mtr;
1062 btr_pcur_t *m_pcur;
1063 dict_index_t *m_index;
1064 rec_t *m_rec;
1065 ulint *m_offsets;
1066 buf_block_t *m_block;
1067 opcode m_op;
1068
1069 /** Record offset within the page. */
1070 ulint m_rec_offset;
1071
1072 /** Page number where the clust rec is present. */
1073 page_no_t m_btr_page_no;
1074 };
1075
1076 /** The context for a LOB operation. It contains the necessary information
1077 to carry out a LOB operation. */
1078 struct InsertContext : public BtrContext {
1079 /** Constructor
1080 @param[in] btr_ctx b-tree context for lob operation.
1081 @param[in] big_rec_vec array of blobs */
InsertContextInsertContext1082 InsertContext(const BtrContext &btr_ctx, const big_rec_t *big_rec_vec)
1083 : BtrContext(btr_ctx), m_big_rec_vec(big_rec_vec) {}
1084
1085 /** Get the vector containing fields to be stored externally.
1086 @return the big record vector */
get_big_rec_vecInsertContext1087 const big_rec_t *get_big_rec_vec() { return (m_big_rec_vec); }
1088
1089 /** Get the size of vector containing fields to be stored externally.
1090 @return the big record vector size */
get_big_rec_vec_sizeInsertContext1091 ulint get_big_rec_vec_size() { return (m_big_rec_vec->n_fields); }
1092
1093 /** The B-tree Context */
1094 // const BtrContext m_btr_ctx;
1095
1096 /** vector containing fields to be stored externally */
1097 const big_rec_t *m_big_rec_vec;
1098 };
1099
1100 /** Information about data stored in one BLOB page. */
1101 struct blob_page_info_t {
1102 /** Constructor.
1103 @param[in] page_no the BLOB page number.
1104 @param[in] bytes amount of uncompressed BLOB data
1105 in BLOB page in bytes.
1106 @param[in] zbytes amount of compressed BLOB data
1107 in BLOB page in bytes. */
blob_page_info_tblob_page_info_t1108 blob_page_info_t(page_no_t page_no, uint bytes, uint zbytes)
1109 : m_page_no(page_no), m_bytes(bytes), m_zbytes(zbytes) {}
1110
1111 /** Re-initialize the current object. */
resetblob_page_info_t1112 void reset() {
1113 m_page_no = 0;
1114 m_bytes = 0;
1115 m_zbytes = 0;
1116 }
1117
1118 /** Print this blob_page_into_t object into the given output stream.
1119 @param[in] out the output stream.
1120 @return the output stream. */
1121 std::ostream &print(std::ostream &out) const;
1122
1123 /** Set the compressed data size in bytes.
1124 @param[in] bytes the new compressed data size. */
set_compressed_sizeblob_page_info_t1125 void set_compressed_size(uint bytes) { m_zbytes = bytes; }
1126
1127 /** Set the uncompressed data size in bytes.
1128 @param[in] bytes the new uncompressed data size. */
set_uncompressed_sizeblob_page_info_t1129 void set_uncompressed_size(uint bytes) { m_bytes = bytes; }
1130
1131 /** Set the page number.
1132 @param[in] page_no the page number */
set_page_noblob_page_info_t1133 void set_page_no(page_no_t page_no) { m_page_no = page_no; }
1134
1135 private:
1136 /** The BLOB page number */
1137 page_no_t m_page_no;
1138
1139 /** Amount of uncompressed data (in bytes) in the BLOB page. */
1140 uint m_bytes;
1141
1142 /** Amount of compressed data (in bytes) in the BLOB page. */
1143 uint m_zbytes;
1144 };
1145
1146 inline std::ostream &operator<<(std::ostream &out,
1147 const blob_page_info_t &obj) {
1148 return (obj.print(out));
1149 }
1150
1151 /** The in-memory blob directory. Each blob contains a sequence of pages.
1152 This directory contains a list of those pages along with their metadata. */
1153 struct blob_dir_t {
1154 typedef std::vector<blob_page_info_t>::const_iterator const_iterator;
1155
1156 /** Print this blob directory into the given output stream.
1157 @param[in] out the output stream.
1158 @return the output stream. */
1159 std::ostream &print(std::ostream &out) const;
1160
1161 /** Clear the contents of this blob directory. */
clearblob_dir_t1162 void clear() { m_pages.clear(); }
1163
1164 /** Append the given blob page information.
1165 @param[in] page the blob page information to be added.
1166 @return DB_SUCCESS on success, error code on failure. */
addblob_dir_t1167 dberr_t add(const blob_page_info_t &page) {
1168 m_pages.push_back(page);
1169 return (DB_SUCCESS);
1170 }
1171
1172 /** A vector of blob pages along with its metadata. */
1173 std::vector<blob_page_info_t> m_pages;
1174 };
1175
1176 /** Overloading the global output operator to print the blob_dir_t
1177 object into an output stream.
1178 @param[in,out] out the output stream.
1179 @param[in] obj the object to be printed.
1180 @return the output stream. */
1181 inline std::ostream &operator<<(std::ostream &out, const blob_dir_t &obj) {
1182 return (obj.print(out));
1183 }
1184
1185 /** The context information for reading a single BLOB */
1186 struct ReadContext {
1187 /** Constructor
1188 @param[in] page_size page size information.
1189 @param[in] data 'internally' stored part of the field
1190 containing also the reference to the
1191 external part; must be protected by
1192 a lock or a page latch.
1193 @param[in] prefix_len length of BLOB data stored inline in
1194 the clustered index record, including
1195 the blob reference.
1196 @param[out] buf the output buffer.
1197 @param[in] len the output buffer length. */
1198 #ifdef UNIV_DEBUG
1199 /**
1200 @param[in] is_sdi true for SDI Indexes. */
1201 #endif /* UNIV_DEBUG */
ReadContextReadContext1202 ReadContext(const page_size_t &page_size, const byte *data, ulint prefix_len,
1203 byte *buf, ulint len
1204 #ifdef UNIV_DEBUG
1205 ,
1206 bool is_sdi
1207 #endif /* UNIV_DEBUG */
1208 )
1209 : m_page_size(page_size),
1210 m_data(data),
1211 m_local_len(prefix_len),
1212 m_blobref(const_cast<byte *>(data) + prefix_len -
1213 BTR_EXTERN_FIELD_REF_SIZE),
1214 m_buf(buf),
1215 m_len(len),
1216 m_lob_version(0)
1217 #ifdef UNIV_DEBUG
1218 ,
1219 m_is_sdi(is_sdi)
1220 #endif /* UNIV_DEBUG */
1221 {
1222 read_blobref();
1223 }
1224
1225 /** Read the space_id, page_no and offset information from the BLOB
1226 reference object and update the member variables. */
read_blobrefReadContext1227 void read_blobref() {
1228 m_space_id = m_blobref.space_id();
1229 m_page_no = m_blobref.page_no();
1230 m_offset = m_blobref.offset();
1231 }
1232
1233 /** Check if the BLOB reference is valid. For this particular check,
1234 if the length of the BLOB is greater than 0, it is considered
1235 valid.
1236 @return true if valid. */
is_valid_blobReadContext1237 bool is_valid_blob() const { return (m_blobref.length() > 0); }
1238
indexReadContext1239 dict_index_t *index() { return (m_index); }
1240
1241 /** The page size information. */
1242 const page_size_t &m_page_size;
1243
1244 /** The 'internally' stored part of the field containing also the
1245 reference to the external part; must be protected by a lock or a page
1246 latch */
1247 const byte *m_data;
1248
1249 /** Length (in bytes) of BLOB prefix stored inline in clustered
1250 index record. */
1251 ulint m_local_len;
1252
1253 /** The blob reference of the blob that is being read. */
1254 const ref_t m_blobref;
1255
1256 /** Buffer into which data is read. */
1257 byte *m_buf;
1258
1259 /** Length of the buffer m_buf. */
1260 ulint m_len;
1261
1262 /** The identifier of the space in which blob is available. */
1263 space_id_t m_space_id;
1264
1265 /** The page number obtained from the blob reference. */
1266 page_no_t m_page_no;
1267
1268 /** The offset information obtained from the blob reference. */
1269 ulint m_offset;
1270
1271 dict_index_t *m_index;
1272
1273 ulint m_lob_version;
1274
1275 #ifdef UNIV_DEBUG
1276 /** Is it a space dictionary index (SDI)?
1277 @return true if SDI, false otherwise. */
is_sdiReadContext1278 bool is_sdi() const { return (m_is_sdi); }
1279
1280 /** Is it a tablespace dictionary index (SDI)? */
1281 const bool m_is_sdi;
1282
1283 /** Assert that current trx is using isolation level read uncommitted.
1284 @return true if transaction is using read uncommitted, false otherwise. */
1285 bool assert_read_uncommitted() const;
1286 #endif /* UNIV_DEBUG */
1287
1288 /** The transaction that is reading. */
1289 trx_t *m_trx = nullptr;
1290 };
1291
1292 /** Fetch compressed BLOB */
1293 struct zReader {
1294 /** Constructor. */
zReaderzReader1295 explicit zReader(const ReadContext &ctx) : m_rctx(ctx) {}
1296
1297 /** Fetch the BLOB.
1298 @return DB_SUCCESS on success. */
1299 dberr_t fetch();
1300
1301 /** Fetch one BLOB page.
1302 @return DB_SUCCESS on success. */
1303 dberr_t fetch_page();
1304
1305 /** Get the length of data that has been read.
1306 @return the length of data that has been read. */
lengthzReader1307 ulint length() const { return (m_stream.total_out); }
1308
1309 private:
1310 /** Do setup of the zlib stream.
1311 @return code returned by zlib. */
1312 int setup_zstream();
1313
1314 #ifdef UNIV_DEBUG
1315 /** Assert that the local prefix is empty. For compressed row format,
1316 there is no local prefix stored. This function doesn't return if the
1317 local prefix is non-empty.
1318 @return true if local prefix is empty*/
1319 bool assert_empty_local_prefix();
1320 #endif /* UNIV_DEBUG */
1321
1322 ReadContext m_rctx;
1323
1324 /** Bytes yet to be read. */
1325 ulint m_remaining;
1326
1327 /** The zlib stream used to uncompress while fetching blob. */
1328 z_stream m_stream;
1329
1330 /** The memory heap that will be used by zlib allocator. */
1331 mem_heap_t *m_heap;
1332
1333 /* There is no latch on m_bpage directly. Instead,
1334 m_bpage is protected by the B-tree page latch that
1335 is being held on the clustered index record, or,
1336 in row_merge_copy_blobs(), by an exclusive table lock. */
1337 buf_page_t *m_bpage;
1338
1339 #ifdef UNIV_DEBUG
1340 /** The expected page type. */
1341 ulint m_page_type_ex;
1342 #endif /* UNIV_DEBUG */
1343 };
1344
1345 /** Fetch uncompressed BLOB */
1346 struct Reader {
1347 /** Constructor. */
ReaderReader1348 Reader(const ReadContext &ctx)
1349 : m_rctx(ctx), m_cur_block(nullptr), m_copied_len(0) {}
1350
1351 /** Fetch the complete or prefix of the uncompressed LOB data.
1352 @return bytes of LOB data fetched. */
1353 ulint fetch();
1354
1355 /** Fetch one BLOB page. */
1356 void fetch_page();
1357
1358 ReadContext m_rctx;
1359
1360 /** Buffer block of the current BLOB page */
1361 buf_block_t *m_cur_block;
1362
1363 /** Total bytes of LOB data that has been copied from multiple
1364 LOB pages. This is a cumulative value. When this value reaches
1365 m_rctx.m_len, then the read operation is completed. */
1366 ulint m_copied_len;
1367 };
1368
1369 /** The context information when the delete operation on LOB is
1370 taking place. */
1371 struct DeleteContext : public BtrContext {
1372 /** Constructor. */
DeleteContextDeleteContext1373 DeleteContext(const BtrContext &btr, byte *field_ref, ulint field_no,
1374 bool rollback)
1375 : BtrContext(btr),
1376 m_blobref(field_ref),
1377 m_field_no(field_no),
1378 m_rollback(rollback),
1379 m_page_size(table() == nullptr ? get_page_size()
1380 : dict_table_page_size(table())) {
1381 m_blobref.parse(m_blobref_mem);
1382 }
1383
is_ref_validDeleteContext1384 bool is_ref_valid() const {
1385 return (m_blobref_mem.m_page_no == m_blobref.page_no());
1386 }
1387
1388 /** Determine if it is compressed page format.
1389 @return true if compressed. */
is_compressedDeleteContext1390 bool is_compressed() const { return (m_page_size.is_compressed()); }
1391
1392 /** Check if tablespace supports atomic blobs.
1393 @return true if tablespace has atomic blobs. */
has_atomic_blobsDeleteContext1394 bool has_atomic_blobs() const {
1395 space_id_t space_id = m_blobref.space_id();
1396 uint32_t flags = fil_space_get_flags(space_id);
1397 return (DICT_TF_HAS_ATOMIC_BLOBS(flags));
1398 }
1399
is_delete_markedDeleteContext1400 bool is_delete_marked() const {
1401 rec_t *clust_rec = rec();
1402 if (clust_rec == nullptr) {
1403 return (true);
1404 }
1405 return (rec_get_deleted_flag(clust_rec, page_rec_is_comp(clust_rec)));
1406 }
1407
1408 #ifdef UNIV_DEBUG
1409 /** Validate the LOB reference object.
1410 @return true if valid, false otherwise. */
validate_blobrefDeleteContext1411 bool validate_blobref() const {
1412 rec_t *clust_rec = rec();
1413 if (clust_rec != nullptr) {
1414 const byte *v2 =
1415 btr_rec_get_field_ref(clust_rec, get_offsets(), m_field_no);
1416
1417 ut_ad(m_blobref.is_equal(v2));
1418 }
1419 return (true);
1420 }
1421 #endif /* UNIV_DEBUG */
1422
1423 /** Acquire an x-latch on the index page containing the clustered
1424 index record, in the given mini transaction context.
1425 @param[in] mtr the mini-transaction context. */
1426 void x_latch_rec_page(mtr_t *mtr);
1427
1428 /** the BLOB reference or external field reference. */
1429 ref_t m_blobref;
1430
1431 /** field number of externally stored column; ignored if rec == NULL */
1432 ulint m_field_no;
1433
1434 /** Is this operation part of rollback? */
1435 bool m_rollback;
1436
1437 page_size_t m_page_size;
1438
1439 private:
1440 /** Memory copy of the original LOB reference. */
1441 ref_mem_t m_blobref_mem;
1442
1443 /** Obtain the page size from the tablespace flags.
1444 @return the page size. */
get_page_sizeDeleteContext1445 page_size_t get_page_size() const {
1446 bool found;
1447 space_id_t space_id = m_blobref.space_id();
1448 const page_size_t &tmp = fil_space_get_page_size(space_id, &found);
1449 ut_ad(found);
1450 return (tmp);
1451 }
1452 };
1453
1454 /** Determine if an operation on off-page columns is an update.
1455 @param[in] op type of BLOB operation.
1456 @return true if op != OPCODE_INSERT */
btr_lob_op_is_update(opcode op)1457 inline bool btr_lob_op_is_update(opcode op) {
1458 switch (op) {
1459 case OPCODE_INSERT:
1460 case OPCODE_INSERT_BULK:
1461 return (false);
1462 case OPCODE_INSERT_UPDATE:
1463 case OPCODE_UPDATE:
1464 return (true);
1465 case OPCODE_UNKNOWN:
1466 break;
1467 }
1468
1469 ut_ad(0);
1470 return (FALSE);
1471 }
1472
1473 #ifdef UNIV_DEBUG
1474 #define btr_copy_externally_stored_field_prefix( \
1475 trx, index, buf, len, page_size, data, is_sdi, local_len) \
1476 btr_copy_externally_stored_field_prefix_func( \
1477 trx, index, buf, len, page_size, data, is_sdi, local_len)
1478
1479 #define btr_copy_externally_stored_field(trx, index, len, ver, data, \
1480 page_size, local_len, is_sdi, heap) \
1481 btr_copy_externally_stored_field_func(trx, index, len, ver, data, page_size, \
1482 local_len, is_sdi, heap)
1483
1484 #else /* UNIV_DEBUG */
1485 #define btr_copy_externally_stored_field_prefix( \
1486 trx, index, buf, len, page_size, data, is_sdi, local_len) \
1487 btr_copy_externally_stored_field_prefix_func(trx, index, buf, len, \
1488 page_size, data, local_len)
1489
1490 #define btr_copy_externally_stored_field(trx, index, len, ver, data, \
1491 page_size, local_len, is_sdi, heap) \
1492 btr_copy_externally_stored_field_func(trx, index, len, ver, data, page_size, \
1493 local_len, heap)
1494 #endif /* UNIV_DEBUG */
1495
1496 /** Copies the prefix of an externally stored field of a record.
1497 The clustered index record must be protected by a lock or a page latch.
1498 @param[in] trx the current transaction object if available
1499 or nullptr.
1500 @param[in] index the clust index in which lob is read.
1501 @param[out] buf the field, or a prefix of it
1502 @param[in] len length of buf, in bytes
1503 @param[in] page_size BLOB page size
1504 @param[in] data 'internally' stored part of the field
1505 containing also the reference to the external
1506 part; must be protected by a lock or a page
1507 latch. */
1508 #ifdef UNIV_DEBUG
1509 /**
1510 @param[in] is_sdi true for SDI indexes */
1511 #endif /* UNIV_DEBUG */
1512 /**
1513 @param[in] local_len length of data, in bytes
1514 @return the length of the copied field, or 0 if the column was being
1515 or has been deleted */
1516 ulint btr_copy_externally_stored_field_prefix_func(trx_t *trx,
1517 const dict_index_t *index,
1518 byte *buf, ulint len,
1519 const page_size_t &page_size,
1520 const byte *data,
1521 #ifdef UNIV_DEBUG
1522 bool is_sdi,
1523 #endif /* UNIV_DEBUG */
1524 ulint local_len);
1525
1526 /** Copies an externally stored field of a record to mem heap.
1527 The clustered index record must be protected by a lock or a page latch.
1528 @param[in] index the clust index in which lob is read.
1529 @param[out] len length of the whole field
1530 @param[out] lob_version lob version that has been read.
1531 @param[in] data 'internally' stored part of the field
1532 containing also the reference to the external
1533 part; must be protected by a lock or a page
1534 latch.
1535 @param[in] page_size BLOB page size
1536 @param[in] local_len length of data */
1537 #ifdef UNIV_DEBUG
1538 /**
1539 @param[in] is_sdi true for SDI Indexes */
1540 #endif /* UNIV_DEBUG */
1541 /**
1542 @param[in,out] heap mem heap
1543 @return the whole field copied to heap */
1544 byte *btr_copy_externally_stored_field_func(
1545 trx_t *trx, const dict_index_t *index, ulint *len, size_t *lob_version,
1546 const byte *data, const page_size_t &page_size, ulint local_len,
1547 #ifdef UNIV_DEBUG
1548 bool is_sdi,
1549 #endif /* UNIV_DEBUG */
1550 mem_heap_t *heap);
1551
1552 /** Gets the externally stored size of a record, in units of a database page.
1553 @param[in] rec record
1554 @param[in] offsets array returned by rec_get_offsets()
1555 @return externally stored part, in units of a database page */
1556 ulint btr_rec_get_externally_stored_len(const rec_t *rec, const ulint *offsets);
1557
1558 /** Purge an LOB (either of compressed or uncompressed).
1559 @param[in] ctx the delete operation context information.
1560 @param[in] index clustered index in which LOB is present
1561 @param[in] trxid the transaction that is being purged.
1562 @param[in] undo_no during rollback to savepoint, purge only upto
1563 this undo number.
1564 @param[in] rec_type undo record type.
1565 @param[in] uf the update vector for the field. */
1566 void purge(lob::DeleteContext *ctx, dict_index_t *index, trx_id_t trxid,
1567 undo_no_t undo_no, ulint rec_type, const upd_field_t *uf);
1568
1569 /** Update a portion of the given LOB.
1570 @param[in] ctx update operation context information.
1571 @param[in] trx the transaction that is doing the modification.
1572 @param[in] index the clustered index containing the LOB.
1573 @param[in] upd update vector
1574 @param[in] field_no the LOB field number
1575 @param[in] blobref LOB reference stored in clust record.
1576 @return DB_SUCCESS on success, error code on failure. */
1577 dberr_t update(InsertContext &ctx, trx_t *trx, dict_index_t *index,
1578 const upd_t *upd, ulint field_no, ref_t blobref);
1579
1580 /** Update a portion of the given LOB.
1581 @param[in] ctx update operation context information.
1582 @param[in] trx the transaction that is doing the modification.
1583 @param[in] index the clustered index containing the LOB.
1584 @param[in] upd update vector
1585 @param[in] field_no the LOB field number
1586 @param[in] blobref LOB reference stored in clust record.
1587 @return DB_SUCCESS on success, error code on failure. */
1588 dberr_t z_update(InsertContext &ctx, trx_t *trx, dict_index_t *index,
1589 const upd_t *upd, ulint field_no, ref_t blobref);
1590
1591 /** Print information about the given LOB.
1592 @param[in] trx the current transaction.
1593 @param[in] index the clust index that contains the LOB.
1594 @param[in] out the output stream into which LOB info is printed.
1595 @param[in] ref the LOB reference
1596 @param[in] fatal if true assert at end of function. */
1597 void print(trx_t *trx, dict_index_t *index, std::ostream &out, ref_t ref,
1598 bool fatal);
1599
1600 /** Import the given LOB. Update the creator trx id and the modifier trx
1601 id to the given import trx id.
1602 @param[in] index clustered index containing the lob.
1603 @param[in] field_ref the lob reference.
1604 @param[in] trx_id the import trx id. */
1605 void z_import(const dict_index_t *index, byte *field_ref, trx_id_t trx_id);
1606
1607 /** Import the given LOB. Update the creator trx id and the modifier trx
1608 id to the given import trx id.
1609 @param[in] index clustered index containing the lob.
1610 @param[in] field_ref the lob reference.
1611 @param[in] trx_id the import trx id. */
1612 void import(const dict_index_t *index, byte *field_ref, trx_id_t trx_id);
1613
1614 #ifdef UNIV_DEBUG
1615 /** Check if all the LOB references in the given clustered index record has
1616 valid space_id in it.
1617 @param[in] index the index to which the LOB belongs.
1618 @param[in] rec the clust_rec in which the LOB references are checked.
1619 @param[in] offsets the field offets of the given rec.
1620 @return true if LOB references have valid space_id, false otherwise. */
1621 bool rec_check_lobref_space_id(dict_index_t *index, const rec_t *rec,
1622 const ulint *offsets);
1623 #endif /* UNIV_DEBUG */
1624
1625 /** Mark an LOB that it is not partially updatable anymore.
1626 @param[in] trx the current transaction.
1627 @param[in] index the clustered index to which the LOB belongs.
1628 @param[in] update the update vector.
1629 @param[in] mtr the mini transaction context.
1630 @return DB_SUCCESS on success, error code on failure. */
1631 dberr_t mark_not_partially_updatable(trx_t *trx, dict_index_t *index,
1632 const upd_t *update, mtr_t *mtr);
1633
1634 } // namespace lob
1635
1636 #endif /* lob0lob_h */
1637