1 /*****************************************************************************
2 
3 Copyright (c) 2015, 2020, Oracle and/or its affiliates. All Rights Reserved.
4 
5 This program is free software; you can redistribute it and/or modify it under
6 the terms of the GNU General Public License, version 2.0, as published by the
7 Free Software Foundation.
8 
9 This program is also distributed with certain software (including but not
10 limited to OpenSSL) that is licensed under separate terms, as designated in a
11 particular file or component or in included license documentation. The authors
12 of MySQL hereby grant you an additional permission to link the program and
13 your derivative works with the separately licensed software that they have
14 included with MySQL.
15 
16 This program is distributed in the hope that it will be useful, but WITHOUT
17 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
18 FOR A PARTICULAR PURPOSE. See the GNU General Public License, version 2.0,
19 for more details.
20 
21 You should have received a copy of the GNU General Public License along with
22 this program; if not, write to the Free Software Foundation, Inc.,
23 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
24 
25 *****************************************************************************/
26 #ifndef lob0lob_h
27 #define lob0lob_h
28 
29 #include <my_dbug.h>
30 #include "btr0pcur.h"
31 #include "dict0mem.h"
32 #include "page0page.h"
33 #include "row0log.h"
34 #include "univ.i"
35 
36 /* Uncomment the following line to generate debug data, useful to analyze
37 LOB issues. */
38 /* #define LOB_DEBUG */
39 /* #define ZLOB_DEBUG */
40 
41 struct upd_t;
42 typedef std::map<page_no_t, buf_block_t *> BlockCache;
43 
44 /**
45 @file
46 @brief Implements the large objects (LOB) module.
47 
48 InnoDB supports large objects (LOB).  Previously, the LOB was called as
49 externally stored fields. A large object contains a singly linked list of
50 database pages, aka LOB pages.  A reference to the first LOB page is stored
51 along with the clustered index record.  This reference is called the LOB
52 reference (lob::ref_t). A single clustered index record can have many LOB
53 references.  Secondary indexes cannot have LOB references.
54 
55 There are two types of LOB - compressed and uncompressed.
56 
57 The main operations implemented for LOB are - INSERT, DELETE and FETCH.  To
58 carry out these main operations the following classes are provided.
59 
60 Inserter     - for inserting uncompressed LOB data.
61 zInserter    - for inserting compressed LOB data.
62 BaseInserter - a base class containing common state and functions useful for
63                both Inserter and zInserter.  Inserter and zInserter derives
64                from this base class.
65 Reader       - for reading uncompressed LOB data.
66 zReader      - for reading compressed LOB data.
67 Deleter      - for deleting both compressed and uncompressed LOB data.
68 
69 For each main operation, the context information is identified separately.
70 They are as follows:
71 
72 InsertContext - context information for doing insert of LOB. `
73 DeleteContext - context information for doing delete of LOB. `
74 ReadContext   - context information for doing fetch of LOB. `
75 
76 */
77 
78 /** Provides the large objects (LOB) module.  Previously, the LOB was called as
79 externally stored fields. */
80 namespace lob {
81 
82 /** The maximum size possible for an LOB */
83 const ulint MAX_SIZE = UINT32_MAX;
84 
85 /** The compressed LOB is stored as a collection of zlib streams.  The
86  * uncompressed LOB is divided into chunks of size Z_CHUNK_SIZE and each of
87  * these chunks are compressed individually and stored as compressed LOB.
88 data. */
89 #define KB128 (128 * 1024)
90 #define Z_CHUNK_SIZE KB128
91 
92 /** The reference in a field for which data is stored on a different page.
93 The reference is at the end of the 'locally' stored part of the field.
94 'Locally' means storage in the index record.
95 We store locally a long enough prefix of each column so that we can determine
96 the ordering parts of each index record without looking into the externally
97 stored part. */
98 /*-------------------------------------- @{ */
99 
100 /** Space identifier where stored. */
101 const ulint BTR_EXTERN_SPACE_ID = 0;
102 
103 /** page number where stored */
104 const ulint BTR_EXTERN_PAGE_NO = 4;
105 
106 /** offset of BLOB header on that page */
107 const ulint BTR_EXTERN_OFFSET = 8;
108 
109 /** Version number of LOB (LOB in new format)*/
110 const ulint BTR_EXTERN_VERSION = BTR_EXTERN_OFFSET;
111 
112 /** 8 bytes containing the length of the externally stored part of the LOB.
113 The 2 highest bits are reserved to the flags below. */
114 const ulint BTR_EXTERN_LEN = 12;
115 
116 /*-------------------------------------- @} */
117 
118 /** The most significant bit of BTR_EXTERN_LEN (i.e., the most
119 significant bit of the byte at smallest address) is set to 1 if this
120 field does not 'own' the externally stored field; only the owner field
121 is allowed to free the field in purge! */
122 const ulint BTR_EXTERN_OWNER_FLAG = 128UL;
123 
124 /** If the second most significant bit of BTR_EXTERN_LEN (i.e., the
125 second most significant bit of the byte at smallest address) is 1 then
126 it means that the externally stored field was inherited from an
127 earlier version of the row.  In rollback we are not allowed to free an
128 inherited external field. */
129 const ulint BTR_EXTERN_INHERITED_FLAG = 64UL;
130 
131 /** If the 3rd most significant bit of BTR_EXTERN_LEN is 1, then it
132 means that the externally stored field is currently being modified.
133 This is mainly used by the READ UNCOMMITTED transaction to avoid returning
134 inconsistent blob data. */
135 const ulint BTR_EXTERN_BEING_MODIFIED_FLAG = 32UL;
136 
137 /** The structure of uncompressed LOB page header */
138 
139 /** Offset within header of LOB length on this page. */
140 const ulint LOB_HDR_PART_LEN = 0;
141 
142 /** Offset within header of next BLOB part page no.
143 FIL_NULL if none */
144 const ulint LOB_HDR_NEXT_PAGE_NO = 4;
145 
146 /** Size of an uncompressed LOB page header, in bytes */
147 const ulint LOB_HDR_SIZE = 8;
148 
149 /** Start of the data on an LOB page */
150 const uint ZLOB_PAGE_DATA = FIL_PAGE_DATA;
151 
152 /** In memory representation of the LOB reference. */
153 struct ref_mem_t {
154   /** Space Identifier of the clustered index. */
155   space_id_t m_space_id;
156 
157   /** Page number of first LOB page. */
158   page_no_t m_page_no;
159 
160   /** Offset within m_page_no where LOB begins. */
161   ulint m_offset;
162 
163   /** Length of LOB */
164   ulint m_length;
165 
166   /** Whether the LOB is null. */
167   bool m_null;
168 
169   /** Whether the clustered index record owns this LOB. */
170   bool m_owner;
171 
172   /** Whether the clustered index record inherited this LOB from
173   another clustered index record. */
174   bool m_inherit;
175 
176   /** Whether the LOB is partially updated. */
177   bool m_partial;
178 
179   /** Whether the blob is being modified. */
180   bool m_being_modified;
181 
182   /** Check if the LOB has already been purged.
183   @return true if LOB has been purged, false otherwise. */
is_purgedref_mem_t184   bool is_purged() const {
185     return ((m_page_no == FIL_NULL) && (m_length == 0));
186   }
187 };
188 
189 extern const byte field_ref_almost_zero[FIELD_REF_SIZE];
190 
191 /** The struct 'lob::ref_t' represents an external field reference. The
192 reference in a field for which data is stored on a different page.  The
193 reference is at the end of the 'locally' stored part of the field.  'Locally'
194 means storage in the index record. We store locally a long enough prefix of
195 each column so that we can determine the ordering parts of each index record
196 without looking into the externally stored part. */
197 struct ref_t {
198  private:
199   /** If the LOB size is equal to or above this limit (in physical page
200   size terms), then the LOB is big enough to be partially updated.  Only
201   in this case LOB index needs to be built. */
202   static const ulint LOB_BIG_THRESHOLD_SIZE = 2;
203 
204  public:
205   /** If the total number of bytes modified in an LOB, in an update
206   operation, is less than or equal to this threshold LOB_SMALL_CHANGE_THRESHOLD,
207   then it is considered as a small change.  For small changes to LOB,
208   the changes are undo logged like any other update operation. */
209   static const ulint LOB_SMALL_CHANGE_THRESHOLD = 100;
210 
211   /** Constructor.
212   @param[in]	ptr	Pointer to the external field reference. */
ref_tref_t213   explicit ref_t(byte *ptr) : m_ref(ptr) {}
214 
215   /** For compressed LOB, if the length is less than or equal to Z_CHUNK_SIZE
216   then use the older single z stream format to store the LOB.  */
use_single_z_streamref_t217   bool use_single_z_stream() const { return (length() <= Z_CHUNK_SIZE); }
218 
219   /** For compressed LOB, if the length is less than or equal to Z_CHUNK_SIZE
220   then use the older single z stream format to store the LOB.  */
use_single_z_streamref_t221   static bool use_single_z_stream(ulint len) { return (len <= Z_CHUNK_SIZE); }
222 
223   /** Check if this LOB is big enough to do partial update.
224   @param[in]	page_size	the page size
225   @param[in]	lob_length	the size of BLOB in bytes.
226   @return true if LOB is big enough, false otherwise. */
is_bigref_t227   static bool is_big(const page_size_t &page_size, const ulint lob_length) {
228     /* Disable a performance optimization */
229     return (true);
230 
231     const ulint limit = page_size.physical() * LOB_BIG_THRESHOLD_SIZE;
232     return (lob_length >= limit);
233   }
234 
235   /** Check if this LOB is big enough to do partial update.
236   @param[in]	page_size	the page size
237   @return true if LOB is big enough, false otherwise. */
is_bigref_t238   bool is_big(const page_size_t &page_size) const {
239     /* Disable a performance optimization */
240     return (true);
241 
242     const ulint limit = page_size.physical() * LOB_BIG_THRESHOLD_SIZE;
243     const ulint lob_length = length();
244     return (lob_length >= limit);
245   }
246 
247   /** Parse the LOB reference object and copy data into the given
248   ref_mem_t object.
249   @param[out]	obj	LOB reference memory object. */
parseref_t250   void parse(ref_mem_t &obj) const {
251     obj.m_space_id = space_id();
252     obj.m_page_no = page_no();
253     obj.m_offset = offset();
254     obj.m_length = length();
255     obj.m_null = is_null();
256     obj.m_owner = is_owner();
257     obj.m_inherit = is_inherited();
258     obj.m_being_modified = is_being_modified();
259   }
260 
261   /** Copy the LOB reference into the given memory location.
262   @param[out]	field_ref	write LOB reference in this
263                                   location.*/
copyref_t264   void copy(byte *field_ref) const { memcpy(field_ref, m_ref, SIZE); }
265 
266   /** Check whether the stored external field reference is equal to the
267   given field reference.
268   @param[in]	ptr	supplied external field reference. */
is_equalref_t269   bool is_equal(const byte *ptr) const { return (m_ref == ptr); }
270 
271   /** Set the external field reference to the given memory location.
272   @param[in]	ptr	the new external field reference. */
set_refref_t273   void set_ref(byte *ptr) { m_ref = ptr; }
274 
275   /** Set the external field reference to null.
276   @param[in,out]	mtr	the mini-transaction. */
set_nullref_t277   void set_null(mtr_t *mtr) {
278     mlog_write_string(m_ref, field_ref_zero, FIELD_REF_SIZE, mtr);
279   }
280 
281   /** Check if the field reference is made of zeroes except the being_modified
282   bit.
283   @return true if field reference is made of zeroes, false otherwise. */
is_null_relaxedref_t284   bool is_null_relaxed() const {
285     return (is_null() || memcmp(field_ref_almost_zero, m_ref, SIZE) == 0);
286   }
287 
288   /** Check if the field reference is made of zeroes.
289   @return true if field reference is made of zeroes, false otherwise. */
is_nullref_t290   bool is_null() const { return (memcmp(field_ref_zero, m_ref, SIZE) == 0); }
291 
292 #ifdef UNIV_DEBUG
293   /** Check if the LOB reference is null (all zeroes) except the "is being
294   modified" bit.
295   @param[in]    ref   the LOB reference.
296   @return true if the LOB reference is null (all zeros) except the "is being
297   modified" bit, false otherwise. */
is_null_relaxedref_t298   static bool is_null_relaxed(const byte *ref) {
299     return (is_null(ref) || memcmp(field_ref_almost_zero, ref, SIZE) == 0);
300   }
301 
302   /** Check if the LOB reference is null (all zeroes).
303   @param[in]    ref   the LOB reference.
304   @return true if the LOB reference is null (all zeros), false otherwise. */
is_nullref_t305   static bool is_null(const byte *ref) {
306     return (memcmp(field_ref_zero, ref, SIZE) == 0);
307   }
308 #endif /* UNIV_DEBUG */
309 
310   /** Set the ownership flag in the blob reference.
311   @param[in]	owner	whether to own or disown.  if owner, unset
312                           the owner flag.
313   @param[in]	mtr	the mini-transaction or NULL.*/
set_ownerref_t314   void set_owner(bool owner, mtr_t *mtr) {
315     ulint byte_val = mach_read_from_1(m_ref + BTR_EXTERN_LEN);
316 
317     if (owner) {
318       /* owns the blob */
319       byte_val &= ~BTR_EXTERN_OWNER_FLAG;
320     } else {
321       byte_val |= BTR_EXTERN_OWNER_FLAG;
322     }
323 
324     mlog_write_ulint(m_ref + BTR_EXTERN_LEN, byte_val, MLOG_1BYTE, mtr);
325   }
326 
327   /** Set the being_modified flag in the field reference.
328   @param[in,out]	ref	the LOB reference
329   @param[in]	modifying	true, if blob is being modified.
330   @param[in]	mtr	the mini-transaction context.*/
set_being_modifiedref_t331   static void set_being_modified(byte *ref, bool modifying, mtr_t *mtr) {
332     ulint byte_val = mach_read_from_1(ref + BTR_EXTERN_LEN);
333 
334     if (modifying) {
335       byte_val |= BTR_EXTERN_BEING_MODIFIED_FLAG;
336     } else {
337       byte_val &= ~BTR_EXTERN_BEING_MODIFIED_FLAG;
338     }
339 
340     mlog_write_ulint(ref + BTR_EXTERN_LEN, byte_val, MLOG_1BYTE, mtr);
341   }
342 
343   /** Set the being_modified flag in the field reference.
344   @param[in]	modifying	true, if blob is being modified.
345   @param[in]	mtr	the mini-transaction context.*/
set_being_modifiedref_t346   void set_being_modified(bool modifying, mtr_t *mtr) {
347     set_being_modified(m_ref, modifying, mtr);
348   }
349 
350   /** Check if the current blob is being modified
351   @param[in]	field_ref	blob field reference
352   @return true if it is being modified, false otherwise. */
is_being_modifiedref_t353   bool static is_being_modified(const byte *field_ref) {
354     const ulint byte_val = mach_read_from_1(field_ref + BTR_EXTERN_LEN);
355     return (byte_val & BTR_EXTERN_BEING_MODIFIED_FLAG);
356   }
357 
358   /** Check if the current blob is being modified
359   @return true if it is being modified, false otherwise. */
is_being_modifiedref_t360   bool is_being_modified() const { return (is_being_modified(m_ref)); }
361 
362   /** Set the inherited flag in the field reference.
363   @param[in]	inherited	true, if inherited.
364   @param[in]	mtr		the mini transaction context.*/
set_inheritedref_t365   void set_inherited(bool inherited, mtr_t *mtr) {
366     ulint byte_val = mach_read_from_1(m_ref + BTR_EXTERN_LEN);
367 
368     if (inherited) {
369       byte_val |= BTR_EXTERN_INHERITED_FLAG;
370     } else {
371       byte_val &= ~BTR_EXTERN_INHERITED_FLAG;
372     }
373 
374     mlog_write_ulint(m_ref + BTR_EXTERN_LEN, byte_val, MLOG_1BYTE, mtr);
375   }
376 
377   /** Check if the current row is the owner of the blob.
378   @return true if owner, false otherwise. */
is_ownerref_t379   bool is_owner() const {
380     ulint byte_val = mach_read_from_1(m_ref + BTR_EXTERN_LEN);
381     return (!(byte_val & BTR_EXTERN_OWNER_FLAG));
382   }
383 
384   /** Check if the current row inherited the blob from parent row.
385   @return true if inherited, false otherwise. */
is_inheritedref_t386   bool is_inherited() const {
387     const ulint byte_val = mach_read_from_1(m_ref + BTR_EXTERN_LEN);
388     return (byte_val & BTR_EXTERN_INHERITED_FLAG);
389   }
390 
391 #ifdef UNIV_DEBUG
392   /** Read the space id from the given blob reference.
393   @param[in]   ref   the blob reference.
394   @return the space id */
space_idref_t395   static space_id_t space_id(const byte *ref) {
396     return (mach_read_from_4(ref));
397   }
398 
399   /** Read the page no from the blob reference.
400   @return the page no */
page_noref_t401   static page_no_t page_no(const byte *ref) {
402     return (mach_read_from_4(ref + BTR_EXTERN_PAGE_NO));
403   }
404 #endif /* UNIV_DEBUG */
405 
406   /** Read the space id from the blob reference.
407   @return the space id */
space_idref_t408   space_id_t space_id() const { return (mach_read_from_4(m_ref)); }
409 
410   /** Read the page number from the blob reference.
411   @return the page number */
page_noref_t412   page_no_t page_no() const {
413     return (mach_read_from_4(m_ref + BTR_EXTERN_PAGE_NO));
414   }
415 
416   /** Read the offset of blob header from the blob reference.
417   @return the offset of the blob header */
offsetref_t418   ulint offset() const { return (mach_read_from_4(m_ref + BTR_EXTERN_OFFSET)); }
419 
420   /** Read the LOB version from the blob reference.
421   @return the LOB version number. */
versionref_t422   uint32_t version() const {
423     return (mach_read_from_4(m_ref + BTR_EXTERN_VERSION));
424   }
425 
426   /** Read the length from the blob reference.
427   @return length of the blob */
lengthref_t428   ulint length() const {
429     return (mach_read_from_4(m_ref + BTR_EXTERN_LEN + 4));
430   }
431 
432   /** Update the information stored in the external field reference.
433   @param[in]	space_id	the space identifier.
434   @param[in]	page_no		the page number.
435   @param[in]	offset		the offset within the page_no
436   @param[in]	mtr		the mini trx or NULL. */
updateref_t437   void update(space_id_t space_id, ulint page_no, ulint offset, mtr_t *mtr) {
438     set_space_id(space_id, mtr);
439     set_page_no(page_no, mtr);
440     set_offset(offset, mtr);
441   }
442 
443   /** Set the space_id in the external field reference.
444   @param[in]	space_id	the space identifier.
445   @param[in]	mtr		mini-trx or NULL. */
set_space_idref_t446   void set_space_id(const space_id_t space_id, mtr_t *mtr) {
447     mlog_write_ulint(m_ref + BTR_EXTERN_SPACE_ID, space_id, MLOG_4BYTES, mtr);
448   }
449 
450   /** Set the page number in the external field reference.
451   @param[in]	page_no	the page number.
452   @param[in]	mtr	mini-trx or NULL. */
set_page_noref_t453   void set_page_no(const ulint page_no, mtr_t *mtr) {
454     mlog_write_ulint(m_ref + BTR_EXTERN_PAGE_NO, page_no, MLOG_4BYTES, mtr);
455   }
456 
457   /** Set the offset information in the external field reference.
458   @param[in]	offset	the offset.
459   @param[in]	mtr	mini-trx or NULL. */
set_offsetref_t460   void set_offset(const ulint offset, mtr_t *mtr) {
461     mlog_write_ulint(m_ref + BTR_EXTERN_OFFSET, offset, MLOG_4BYTES, mtr);
462   }
463 
464   /** Set the length of blob in the external field reference.
465   @param[in]	len	the blob length .
466   @param[in]	mtr	mini-trx or NULL. */
set_lengthref_t467   void set_length(const ulint len, mtr_t *mtr) {
468     ut_ad(len <= MAX_SIZE);
469     mlog_write_ulint(m_ref + BTR_EXTERN_LEN + 4, len, MLOG_4BYTES, mtr);
470   }
471 
472   /** Get the start of a page containing this blob reference.
473   @return start of the page */
page_alignref_t474   page_t *page_align() const { return (::page_align(m_ref)); }
475 
476 #ifdef UNIV_DEBUG
477   /** Check if the given mtr has necessary latches to update this LOB
478   reference.
479   @param[in]	mtr	the mini transaction that needs to
480                           be checked.
481   @return true if valid, false otherwise. */
validateref_t482   bool validate(mtr_t *mtr) {
483     ut_ad(m_ref != nullptr);
484     ut_ad(mtr != nullptr);
485 
486     if (mtr->get_log_mode() == MTR_LOG_NO_REDO) {
487       return (true);
488     }
489 
490     buf_block_t *block = mtr->memo_contains_page_flagged(
491         m_ref, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX);
492     ut_ad(block != nullptr);
493     return (true);
494   }
495 
496   /** Check if the space_id in the LOB reference is equal to the
497   space_id of the index to which it belongs.
498   @param[in]  index  the index to which LOB belongs.
499   @return true if space is valid in LOB reference, false otherwise. */
500   bool check_space_id(dict_index_t *index) const;
501 #endif /* UNIV_DEBUG */
502 
503   /** Check if the LOB can be partially updated. This is done by loading
504   the first page of LOB and looking at the flags.
505   @param[in]	index	the index to which LOB belongs.
506   @return true if LOB is partially updatable, false otherwise.*/
507   bool is_lob_partially_updatable(const dict_index_t *index) const;
508 
509   /** Load the first page of the LOB and mark it as not partially
510   updatable anymore.
511   @param[in]	trx		the current transaction
512   @param[in]	mtr		the mini transaction context.
513   @param[in]	index		the index dictionary object.
514   @param[in]	page_size	the page size information. */
515   void mark_not_partially_updatable(trx_t *trx, mtr_t *mtr, dict_index_t *index,
516                                     const page_size_t &page_size);
517 
518   /** Load the first page of LOB and read its page type.
519   @param[in]	index			the index object.
520   @param[in]	page_size		the page size of LOB.
521   @param[out]	is_partially_updatable	is the LOB partially updatable.
522   @return the page type of first page of LOB.*/
523   ulint get_lob_page_info(const dict_index_t *index,
524                           const page_size_t &page_size,
525                           bool &is_partially_updatable) const;
526 
527   /** Print this LOB reference into the given output stream.
528   @param[in]	out	the output stream.
529   @return the output stream. */
530   std::ostream &print(std::ostream &out) const;
531 
532   /** The size of an LOB reference object (in bytes) */
533   static const uint SIZE = BTR_EXTERN_FIELD_REF_SIZE;
534 
535  private:
536   /** Pointing to a memory of size BTR_EXTERN_FIELD_REF_SIZE */
537   byte *m_ref;
538 };
539 
540 /** Overload the global output stream operator to easily print the
541 lob::ref_t object into the output stream.
542 @param[in,out]	out		the output stream.
543 @param[in]	obj		the lob::ref_t object to be printed
544 @return the output stream. */
545 inline std::ostream &operator<<(std::ostream &out, const ref_t &obj) {
546   return (obj.print(out));
547 }
548 
549 /** LOB operation code for btr_store_big_rec_extern_fields(). */
550 enum opcode {
551 
552   /** Store off-page columns for a freshly inserted record */
553   OPCODE_INSERT = 0,
554 
555   /** Store off-page columns for an insert by update */
556   OPCODE_INSERT_UPDATE,
557 
558   /** Store off-page columns for an update */
559   OPCODE_UPDATE,
560 
561   /** Store off-page columns for a freshly inserted record by bulk */
562   OPCODE_INSERT_BULK,
563 
564   /** The operation code is unknown or not important. */
565   OPCODE_UNKNOWN
566 };
567 
568 /** Stores the fields in big_rec_vec to the tablespace and puts pointers to
569 them in rec.  The extern flags in rec will have to be set beforehand. The
570 fields are stored on pages allocated from leaf node file segment of the index
571 tree.
572 
573 TODO: If the allocation extends the tablespace, it will not be redo logged, in
574 any mini-transaction.  Tablespace extension should be redo-logged, so that
575 recovery will not fail when the big_rec was written to the extended portion of
576 the file, in case the file was somehow truncated in the crash.
577 
578 @param[in]	trx		current transaction
579 @param[in,out]	pcur		a persistent cursor. if btr_mtr is restarted,
580                                 then this can be repositioned.
581 @param[in]	upd		update vector
582 @param[in,out]	offsets		rec_get_offsets() on pcur. the "external in
583                                 offsets will correctly correspond storage"
584                                 flagsin offsets will correctly correspond to
585                                 rec when this function returns
586 @param[in]	big_rec_vec	vector containing fields to be stored
587                                 externally
588 @param[in,out]	btr_mtr		mtr containing the latches to the clustered
589                                 index. can be committed and restarted.
590 @param[in]	op		operation code
591 @return DB_SUCCESS or DB_OUT_OF_FILE_SPACE */
592 dberr_t btr_store_big_rec_extern_fields(trx_t *trx, btr_pcur_t *pcur,
593                                         const upd_t *upd, ulint *offsets,
594                                         const big_rec_t *big_rec_vec,
595                                         mtr_t *btr_mtr, opcode op)
596     MY_ATTRIBUTE((warn_unused_result));
597 
598 /** Copies an externally stored field of a record to mem heap.
599 @param[in]	trx		the current transaction.
600 @param[in]	index		the clustered index
601 @param[in]	rec		record in a clustered index; must be
602                                 protected by a lock or a page latch
603 @param[in]	offsets		array returned by rec_get_offsets()
604 @param[in]	page_size	BLOB page size
605 @param[in]	no		field number
606 @param[out]	len		length of the field
607 @param[out]	lob_version	version of lob that has been copied */
608 #ifdef UNIV_DEBUG
609 /**
610 @param[in]	is_sdi		true for SDI Indexes */
611 #endif /* UNIV_DEBUG */
612 /**
613 @param[in,out]	heap		mem heap
614 @return the field copied to heap, or NULL if the field is incomplete */
615 byte *btr_rec_copy_externally_stored_field_func(
616     trx_t *trx, const dict_index_t *index, const rec_t *rec,
617     const ulint *offsets, const page_size_t &page_size, ulint no, ulint *len,
618     size_t *lob_version,
619 #ifdef UNIV_DEBUG
620     bool is_sdi,
621 #endif /* UNIV_DEBUG */
622     mem_heap_t *heap);
623 
624 #ifdef UNIV_DEBUG
625 #define btr_rec_copy_externally_stored_field(                        \
626     trx, index, rec, offsets, page_size, no, len, ver, is_sdi, heap) \
627   btr_rec_copy_externally_stored_field_func(                         \
628       trx, index, rec, offsets, page_size, no, len, ver, is_sdi, heap)
629 #else /* UNIV_DEBUG */
630 #define btr_rec_copy_externally_stored_field(                         \
631     trx, index, rec, offsets, page_size, no, len, ver, is_sdi, heap)  \
632   btr_rec_copy_externally_stored_field_func(trx, index, rec, offsets, \
633                                             page_size, no, len, ver, heap)
634 #endif /* UNIV_DEBUG */
635 
636 /** Gets the offset of the pointer to the externally stored part of a field.
637 @param[in]	offsets		array returned by rec_get_offsets()
638 @param[in]	n		index of the external field
639 @return offset of the pointer to the externally stored part */
640 ulint btr_rec_get_field_ref_offs(const ulint *offsets, ulint n);
641 
642 /** Gets a pointer to the externally stored part of a field.
643 @param rec record
644 @param offsets rec_get_offsets(rec)
645 @param n index of the externally stored field
646 @return pointer to the externally stored part */
647 #define btr_rec_get_field_ref(rec, offsets, n) \
648   ((rec) + lob::btr_rec_get_field_ref_offs(offsets, n))
649 
650 /** Deallocate a buffer block that was reserved for a BLOB part.
651 @param[in]	index	index
652 @param[in]	block	buffer block
653 @param[in]	all	TRUE=remove also the compressed page
654                         if there is one
655 @param[in]	mtr	mini-transaction to commit */
656 void blob_free(dict_index_t *index, buf_block_t *block, bool all, mtr_t *mtr);
657 
658 /** The B-tree context under which the LOB operation is done. */
659 class BtrContext {
660  public:
661   /** Default Constructor */
BtrContext()662   BtrContext()
663       : m_mtr(nullptr),
664         m_pcur(nullptr),
665         m_index(nullptr),
666         m_rec(nullptr),
667         m_offsets(nullptr),
668         m_block(nullptr),
669         m_op(OPCODE_UNKNOWN),
670         m_btr_page_no(FIL_NULL) {}
671 
672   /** Constructor **/
BtrContext(mtr_t * mtr,btr_pcur_t * pcur,dict_index_t * index,rec_t * rec,ulint * offsets,buf_block_t * block)673   BtrContext(mtr_t *mtr, btr_pcur_t *pcur, dict_index_t *index, rec_t *rec,
674              ulint *offsets, buf_block_t *block)
675       : m_mtr(mtr),
676         m_pcur(pcur),
677         m_index(index),
678         m_rec(rec),
679         m_offsets(offsets),
680         m_block(block),
681         m_op(OPCODE_UNKNOWN),
682         m_btr_page_no(FIL_NULL) {
683     ut_ad(m_pcur == nullptr || rec_offs_validate());
684     ut_ad(m_block == nullptr || m_rec == nullptr ||
685           m_block->frame == page_align(m_rec));
686     ut_ad(m_pcur == nullptr || m_rec == btr_pcur_get_rec(m_pcur));
687   }
688 
689   /** Constructor **/
BtrContext(mtr_t * mtr,btr_pcur_t * pcur,dict_index_t * index,rec_t * rec,ulint * offsets,buf_block_t * block,opcode op)690   BtrContext(mtr_t *mtr, btr_pcur_t *pcur, dict_index_t *index, rec_t *rec,
691              ulint *offsets, buf_block_t *block, opcode op)
692       : m_mtr(mtr),
693         m_pcur(pcur),
694         m_index(index),
695         m_rec(rec),
696         m_offsets(offsets),
697         m_block(block),
698         m_op(op),
699         m_btr_page_no(FIL_NULL) {
700     ut_ad(m_pcur == nullptr || rec_offs_validate());
701     ut_ad(m_block->frame == page_align(m_rec));
702     ut_ad(m_pcur == nullptr || m_rec == btr_pcur_get_rec(m_pcur));
703   }
704 
705   /** Copy Constructor **/
BtrContext(const BtrContext & other)706   BtrContext(const BtrContext &other)
707       : m_mtr(other.m_mtr),
708         m_pcur(other.m_pcur),
709         m_index(other.m_index),
710         m_rec(other.m_rec),
711         m_offsets(other.m_offsets),
712         m_block(other.m_block),
713         m_op(other.m_op),
714         m_btr_page_no(other.m_btr_page_no) {}
715 
716   /** Marks non-updated off-page fields as disowned by this record.
717   The ownership must be transferred to the updated record which is
718   inserted elsewhere in the index tree. In purge only the owner of
719   externally stored field is allowed to free the field.
720   @param[in]	update		update vector. */
721   void disown_inherited_fields(const upd_t *update);
722 
723   /** Sets the ownership bit of an externally stored field in a record.
724   @param[in]		i		field number
725   @param[in]		val		value to set */
set_ownership_of_extern_field(ulint i,ibool val)726   void set_ownership_of_extern_field(ulint i, ibool val) {
727     byte *data;
728     ulint local_len;
729 
730     data =
731         const_cast<byte *>(rec_get_nth_field(m_rec, m_offsets, i, &local_len));
732     ut_ad(rec_offs_nth_extern(m_offsets, i));
733     ut_a(local_len >= BTR_EXTERN_FIELD_REF_SIZE);
734 
735     local_len -= BTR_EXTERN_FIELD_REF_SIZE;
736 
737     ref_t ref(data + local_len);
738 
739     ut_a(val || ref.is_owner());
740 
741     page_zip_des_t *page_zip = get_page_zip();
742 
743     if (page_zip) {
744       ref.set_owner(val, nullptr);
745       page_zip_write_blob_ptr(page_zip, m_rec, m_index, m_offsets, i, m_mtr);
746     } else {
747       ref.set_owner(val, m_mtr);
748     }
749   }
750 
751   /** Marks all extern fields in a record as owned by the record.
752   This function should be called if the delete mark of a record is
753   removed: a not delete marked record always owns all its extern
754   fields.*/
unmark_extern_fields()755   void unmark_extern_fields() {
756     ut_ad(!rec_offs_comp(m_offsets) || !rec_get_node_ptr_flag(m_rec));
757 
758     ulint n = rec_offs_n_fields(m_offsets);
759 
760     if (!rec_offs_any_extern(m_offsets)) {
761       return;
762     }
763 
764     for (ulint i = 0; i < n; i++) {
765       if (rec_offs_nth_extern(m_offsets, i)) {
766         set_ownership_of_extern_field(i, TRUE);
767       }
768     }
769   }
770 
771   /** Frees the externally stored fields for a record.
772   @param[in]	trx_id		transaction identifier whose LOB is
773                                   being freed.
774   @param[in]	undo_no		undo number within a transaction whose
775                                   LOB is being freed.
776   @param[in]	rollback	performing rollback?
777   @param[in]	rec_type	undo record type.*/
778   void free_externally_stored_fields(trx_id_t trx_id, undo_no_t undo_no,
779                                      bool rollback, ulint rec_type);
780 
781   /** Frees the externally stored fields for a record, if the field
782   is mentioned in the update vector.
783   @param[in]	trx_id		the transaction identifier.
784   @param[in]	undo_no		undo number within a transaction whose
785                                   LOB is being freed.
786   @param[in]	update		update vector
787   @param[in]	rollback	performing rollback? */
788   void free_updated_extern_fields(trx_id_t trx_id, undo_no_t undo_no,
789                                   const upd_t *update, bool rollback);
790 
791   /** Gets the compressed page descriptor
792   @return the compressed page descriptor. */
get_page_zip()793   page_zip_des_t *get_page_zip() const {
794     return (buf_block_get_page_zip(m_block));
795   }
796 
797   /** Get the page number of clustered index block.
798   @return the page number. */
get_page_no()799   page_no_t get_page_no() const {
800     return (page_get_page_no(buf_block_get_frame(m_block)));
801   }
802 
803   /** Get the record offset within page of the clustered index record.
804   @return the record offset. */
get_rec_offset()805   ulint get_rec_offset() const { return (page_offset(m_rec)); }
806 
807   /** Check if there is a need to recalculate the context information.
808   @return true if there is a need to recalculate, false otherwise. */
need_recalc()809   bool need_recalc() const {
810     return ((m_pcur != nullptr) && (m_rec != btr_pcur_get_rec(m_pcur)));
811   }
812 
813   /** Get the clustered index record pointer.
814   @return clustered index record pointer. */
rec()815   rec_t *rec() const {
816     ut_ad(m_pcur == nullptr || m_rec == btr_pcur_get_rec(m_pcur));
817     return (m_rec);
818   }
819 
820   /** Get the LOB reference for the given field number.
821   @param[in]	field_no	field number.
822   @return LOB reference (aka external field reference).*/
get_field_ref(ulint field_no)823   byte *get_field_ref(ulint field_no) const {
824     return (btr_rec_get_field_ref(m_rec, get_offsets(), field_no));
825   }
826 
827 #ifdef UNIV_DEBUG
828   /** Validate the current BLOB context object.  The BLOB context object
829   is valid if the necessary latches are being held by the
830   mini-transaction of the B-tree (btr mtr).  Does not return if the
831   validation fails.
832   @return true if valid */
validate()833   bool validate() const {
834     rec_offs_make_valid(rec(), index(), m_offsets);
835 
836     ut_ad(m_mtr->memo_contains_page_flagged(
837               m_rec, MTR_MEMO_PAGE_X_FIX | MTR_MEMO_PAGE_SX_FIX) ||
838           table()->is_intrinsic());
839 
840     ut_ad(mtr_memo_contains_flagged(m_mtr, dict_index_get_lock(index()),
841                                     MTR_MEMO_SX_LOCK | MTR_MEMO_X_LOCK) ||
842           table()->is_intrinsic());
843 
844     return (true);
845   }
846 
847   /** Check to see if all pointers to externally stored columns in
848   the record must be valid.
849   @return true if all blob references are valid.
850   @return will not return if any blob reference is invalid. */
are_all_blobrefs_valid()851   bool are_all_blobrefs_valid() const {
852     for (ulint i = 0; i < rec_offs_n_fields(m_offsets); i++) {
853       if (!rec_offs_nth_extern(m_offsets, i)) {
854         continue;
855       }
856 
857       byte *field_ref = btr_rec_get_field_ref(rec(), m_offsets, i);
858 
859       ref_t blobref(field_ref);
860 
861       /* The pointer must not be zero if the operation
862       succeeded. */
863       ut_a(!blobref.is_null());
864 
865       /* The column must not be disowned by this record. */
866       ut_a(blobref.is_owner());
867     }
868 
869     return (true);
870   }
871 #endif /* UNIV_DEBUG */
872 
873   /** Determine whether current operation is a bulk insert operation.
874   @return true, if bulk insert operation, false otherwise. */
is_bulk()875   bool is_bulk() const { return (m_op == OPCODE_INSERT_BULK); }
876 
877   /** Get the beginning of the B-tree clustered index page frame
878   that contains the current clustered index record (m_rec).
879   @return the page frame containing the clust rec. */
rec_frame()880   const page_t *rec_frame() const {
881     ut_ad(m_block->frame == page_align(m_rec));
882     return (m_block->frame);
883   }
884 
885   /** Commit the mini transaction that is holding the latches
886   of the clustered index record block. */
commit_btr_mtr()887   void commit_btr_mtr() { m_mtr->commit(); }
888 
889   /** Start the mini transaction that will be holding the latches
890   of the clustered index record block. */
start_btr_mtr()891   void start_btr_mtr() {
892     mtr_log_t log_mode = m_mtr->get_log_mode();
893     m_mtr->start();
894     m_mtr->set_log_mode(log_mode);
895   }
896 
897   /** Get the page number of clustered index record.
898   @return page number of clustered index record. */
get_btr_page_no()899   page_no_t get_btr_page_no() const {
900     return (page_get_page_no(buf_block_get_frame(m_block)));
901   }
902 
903 #ifndef UNIV_HOTBACKUP
904 
905   /** Increment the buffer fix count of the clustered index record
906   block. */
rec_block_fix()907   void rec_block_fix() {
908     m_rec_offset = page_offset(m_rec);
909     m_btr_page_no = page_get_page_no(buf_block_get_frame(m_block));
910     buf_block_buf_fix_inc(m_block, __FILE__, __LINE__);
911   }
912 
913   /** Decrement the buffer fix count of the clustered index record
914   block. */
rec_block_unfix()915   void rec_block_unfix() {
916     space_id_t space_id = space();
917     page_id_t page_id(space_id, m_btr_page_no);
918     page_size_t page_size(dict_table_page_size(table()));
919     page_cur_t *page_cur = &m_pcur->m_btr_cur.page_cur;
920 
921     mtr_x_lock(dict_index_get_lock(index()), m_mtr);
922 
923     page_cur->block =
924         btr_block_get(page_id, page_size, RW_X_LATCH, index(), m_mtr);
925 
926     page_cur->rec = buf_block_get_frame(page_cur->block) + m_rec_offset;
927 
928     buf_block_buf_fix_dec(page_cur->block);
929 
930     recalc();
931   }
932 #endif /* !UNIV_HOTBACKUP */
933 
934   /** Restore the position of the persistent cursor. */
restore_position()935   void restore_position() {
936     ut_ad(m_pcur->m_rel_pos == BTR_PCUR_ON);
937     bool ret = btr_pcur_restore_position(m_pcur->m_latch_mode, m_pcur, m_mtr);
938 
939     ut_a(ret);
940 
941     recalc();
942   }
943 
944   /** Get the index object.
945   @return index object */
index()946   dict_index_t *index() const { return (m_index); }
947 
948   /** Get the table object.
949   @return table object or NULL. */
table()950   dict_table_t *table() const {
951     dict_table_t *result = nullptr;
952 
953     if (m_pcur != nullptr && m_pcur->index() != nullptr) {
954       result = m_pcur->index()->table;
955     }
956 
957     return (result);
958   }
959 
960   /** Get the space id.
961   @return space id. */
space()962   space_id_t space() const { return (index()->space); }
963 
964   /** Obtain the page size of the underlying table.
965   @return page size of the underlying table. */
page_size()966   const page_size_t page_size() const {
967     return (dict_table_page_size(table()));
968   }
969 
970   /** Determine the extent size (in pages) for the underlying table
971   @return extent size in pages */
pages_in_extent()972   page_no_t pages_in_extent() const {
973     return (dict_table_extent_size(table()));
974   }
975 
976   /** Check if there is enough space in the redo log file.  The btr
977   mini transaction will be restarted. */
check_redolog()978   void check_redolog() {
979     is_bulk() ? check_redolog_bulk() : check_redolog_normal();
980   }
981 
982   /** The btr mini transaction will be restarted. */
restart_mtr()983   void restart_mtr() { is_bulk() ? restart_mtr_bulk() : restart_mtr_normal(); }
984 
985   /** Mark the nth field as externally stored.
986   @param[in]	field_no	the field number. */
make_nth_extern(ulint field_no)987   void make_nth_extern(ulint field_no) {
988     rec_offs_make_nth_extern(m_offsets, field_no);
989   }
990 
991   /** Get the log mode of the btr mtr.
992   @return the log mode. */
get_log_mode()993   mtr_log_t get_log_mode() { return (m_mtr->get_log_mode()); }
994 
995   /** Get flush observer
996   @return flush observer */
get_flush_observer()997   FlushObserver *get_flush_observer() const {
998     return (m_mtr->get_flush_observer());
999   }
1000 
1001   /** Get the record offsets array.
1002   @return the record offsets array. */
get_offsets()1003   ulint *get_offsets() const { return (m_offsets); }
1004 
1005   /** Validate the record offsets array.
1006   @return true if validation succeeds, false otherwise. */
rec_offs_validate()1007   bool rec_offs_validate() const {
1008     if (m_rec != nullptr) {
1009       ut_ad(::rec_offs_validate(m_rec, m_index, m_offsets));
1010     }
1011     return (true);
1012   }
1013 
1014   /** Get the associated mini-transaction.
1015   @return the mini transaction. */
get_mtr()1016   mtr_t *get_mtr() { return (m_mtr); }
1017 
1018   /** Get the pointer to the clustered record block.
1019   @return pointer to the clustered rec block. */
block()1020   buf_block_t *block() const { return (m_block); }
1021 
1022   /** Save the position of the persistent cursor. */
store_position()1023   void store_position() { btr_pcur_store_position(m_pcur, m_mtr); }
1024 
1025   /** Check if there is enough space in log file. Commit and re-start the
1026   mini transaction. */
1027   void check_redolog_normal();
1028 
1029   /** When bulk load is being done, check if there is enough space in redo
1030   log file. */
1031   void check_redolog_bulk();
1032 
1033   /** Commit and re-start the mini transaction. */
1034   void restart_mtr_normal();
1035 
1036   /** When bulk load is being done, Commit and re-start the mini transaction. */
1037   void restart_mtr_bulk();
1038 
1039   /** Recalculate some of the members after restoring the persistent
1040   cursor. */
recalc()1041   void recalc() {
1042     m_block = btr_pcur_get_block(m_pcur);
1043     m_rec = btr_pcur_get_rec(m_pcur);
1044     m_btr_page_no = page_get_page_no(buf_block_get_frame(m_block));
1045     m_rec_offset = page_offset(m_rec);
1046 
1047     rec_offs_make_valid(rec(), index(), m_offsets);
1048   }
1049 
1050   /** Write a blob reference of a field into a clustered index record
1051   in a compressed leaf page. The information must already have been
1052   updated on the uncompressed page.
1053   @param[in]	field_no	the blob field number
1054   @param[in]	mtr		the mini transaction to update
1055                                   blob page. */
zblob_write_blobref(ulint field_no,mtr_t * mtr)1056   void zblob_write_blobref(ulint field_no, mtr_t *mtr) {
1057     page_zip_write_blob_ptr(get_page_zip(), m_rec, index(), m_offsets, field_no,
1058                             mtr);
1059   }
1060 
1061   mtr_t *m_mtr;
1062   btr_pcur_t *m_pcur;
1063   dict_index_t *m_index;
1064   rec_t *m_rec;
1065   ulint *m_offsets;
1066   buf_block_t *m_block;
1067   opcode m_op;
1068 
1069   /** Record offset within the page. */
1070   ulint m_rec_offset;
1071 
1072   /** Page number where the clust rec is present. */
1073   page_no_t m_btr_page_no;
1074 };
1075 
1076 /** The context for a LOB operation.  It contains the necessary information
1077 to carry out a LOB operation. */
1078 struct InsertContext : public BtrContext {
1079   /** Constructor
1080   @param[in]	btr_ctx		b-tree context for lob operation.
1081   @param[in]	big_rec_vec	array of blobs */
InsertContextInsertContext1082   InsertContext(const BtrContext &btr_ctx, const big_rec_t *big_rec_vec)
1083       : BtrContext(btr_ctx), m_big_rec_vec(big_rec_vec) {}
1084 
1085   /** Get the vector containing fields to be stored externally.
1086   @return the big record vector */
get_big_rec_vecInsertContext1087   const big_rec_t *get_big_rec_vec() { return (m_big_rec_vec); }
1088 
1089   /** Get the size of vector containing fields to be stored externally.
1090   @return the big record vector size */
get_big_rec_vec_sizeInsertContext1091   ulint get_big_rec_vec_size() { return (m_big_rec_vec->n_fields); }
1092 
1093   /** The B-tree Context */
1094   // const BtrContext	m_btr_ctx;
1095 
1096   /** vector containing fields to be stored externally */
1097   const big_rec_t *m_big_rec_vec;
1098 };
1099 
1100 /** Information about data stored in one BLOB page. */
1101 struct blob_page_info_t {
1102   /** Constructor.
1103   @param[in]	page_no		the BLOB page number.
1104   @param[in]	bytes		amount of uncompressed BLOB data
1105                                   in BLOB page in bytes.
1106   @param[in]	zbytes		amount of compressed BLOB data
1107                                   in BLOB page in bytes. */
blob_page_info_tblob_page_info_t1108   blob_page_info_t(page_no_t page_no, uint bytes, uint zbytes)
1109       : m_page_no(page_no), m_bytes(bytes), m_zbytes(zbytes) {}
1110 
1111   /** Re-initialize the current object. */
resetblob_page_info_t1112   void reset() {
1113     m_page_no = 0;
1114     m_bytes = 0;
1115     m_zbytes = 0;
1116   }
1117 
1118   /** Print this blob_page_into_t object into the given output stream.
1119   @param[in]	out	the output stream.
1120   @return the output stream. */
1121   std::ostream &print(std::ostream &out) const;
1122 
1123   /** Set the compressed data size in bytes.
1124   @param[in]	bytes	the new compressed data size. */
set_compressed_sizeblob_page_info_t1125   void set_compressed_size(uint bytes) { m_zbytes = bytes; }
1126 
1127   /** Set the uncompressed data size in bytes.
1128   @param[in]	bytes	the new uncompressed data size. */
set_uncompressed_sizeblob_page_info_t1129   void set_uncompressed_size(uint bytes) { m_bytes = bytes; }
1130 
1131   /** Set the page number.
1132   @param[in]	page_no		the page number */
set_page_noblob_page_info_t1133   void set_page_no(page_no_t page_no) { m_page_no = page_no; }
1134 
1135  private:
1136   /** The BLOB page number */
1137   page_no_t m_page_no;
1138 
1139   /** Amount of uncompressed data (in bytes) in the BLOB page. */
1140   uint m_bytes;
1141 
1142   /** Amount of compressed data (in bytes) in the BLOB page. */
1143   uint m_zbytes;
1144 };
1145 
1146 inline std::ostream &operator<<(std::ostream &out,
1147                                 const blob_page_info_t &obj) {
1148   return (obj.print(out));
1149 }
1150 
1151 /** The in-memory blob directory.  Each blob contains a sequence of pages.
1152 This directory contains a list of those pages along with their metadata. */
1153 struct blob_dir_t {
1154   typedef std::vector<blob_page_info_t>::const_iterator const_iterator;
1155 
1156   /** Print this blob directory into the given output stream.
1157   @param[in]	out	the output stream.
1158   @return the output stream. */
1159   std::ostream &print(std::ostream &out) const;
1160 
1161   /** Clear the contents of this blob directory. */
clearblob_dir_t1162   void clear() { m_pages.clear(); }
1163 
1164   /** Append the given blob page information.
1165   @param[in]	page	the blob page information to be added.
1166   @return DB_SUCCESS on success, error code on failure. */
addblob_dir_t1167   dberr_t add(const blob_page_info_t &page) {
1168     m_pages.push_back(page);
1169     return (DB_SUCCESS);
1170   }
1171 
1172   /** A vector of blob pages along with its metadata. */
1173   std::vector<blob_page_info_t> m_pages;
1174 };
1175 
1176 /** Overloading the global output operator to print the blob_dir_t
1177 object into an output stream.
1178 @param[in,out]	out	the output stream.
1179 @param[in]	obj	the object to be printed.
1180 @return the output stream. */
1181 inline std::ostream &operator<<(std::ostream &out, const blob_dir_t &obj) {
1182   return (obj.print(out));
1183 }
1184 
1185 /** The context information for reading a single BLOB */
1186 struct ReadContext {
1187   /** Constructor
1188   @param[in]	page_size	page size information.
1189   @param[in]	data		'internally' stored part of the field
1190                                   containing also the reference to the
1191                                   external part; must be protected by
1192                                   a lock or a page latch.
1193   @param[in]	prefix_len	length of BLOB data stored inline in
1194                                   the clustered index record, including
1195                                   the blob reference.
1196   @param[out]	buf		the output buffer.
1197   @param[in]	len		the output buffer length. */
1198 #ifdef UNIV_DEBUG
1199   /**
1200   @param[in]	is_sdi		true for SDI Indexes. */
1201 #endif /* UNIV_DEBUG */
ReadContextReadContext1202   ReadContext(const page_size_t &page_size, const byte *data, ulint prefix_len,
1203               byte *buf, ulint len
1204 #ifdef UNIV_DEBUG
1205               ,
1206               bool is_sdi
1207 #endif /* UNIV_DEBUG */
1208               )
1209       : m_page_size(page_size),
1210         m_data(data),
1211         m_local_len(prefix_len),
1212         m_blobref(const_cast<byte *>(data) + prefix_len -
1213                   BTR_EXTERN_FIELD_REF_SIZE),
1214         m_buf(buf),
1215         m_len(len),
1216         m_lob_version(0)
1217 #ifdef UNIV_DEBUG
1218         ,
1219         m_is_sdi(is_sdi)
1220 #endif /* UNIV_DEBUG */
1221   {
1222     read_blobref();
1223   }
1224 
1225   /** Read the space_id, page_no and offset information from the BLOB
1226   reference object and update the member variables. */
read_blobrefReadContext1227   void read_blobref() {
1228     m_space_id = m_blobref.space_id();
1229     m_page_no = m_blobref.page_no();
1230     m_offset = m_blobref.offset();
1231   }
1232 
1233   /** Check if the BLOB reference is valid.  For this particular check,
1234   if the length of the BLOB is greater than 0, it is considered
1235   valid.
1236   @return true if valid. */
is_valid_blobReadContext1237   bool is_valid_blob() const { return (m_blobref.length() > 0); }
1238 
indexReadContext1239   dict_index_t *index() { return (m_index); }
1240 
1241   /** The page size information. */
1242   const page_size_t &m_page_size;
1243 
1244   /** The 'internally' stored part of the field containing also the
1245   reference to the external part; must be protected by a lock or a page
1246   latch */
1247   const byte *m_data;
1248 
1249   /** Length (in bytes) of BLOB prefix stored inline in clustered
1250   index record. */
1251   ulint m_local_len;
1252 
1253   /** The blob reference of the blob that is being read. */
1254   const ref_t m_blobref;
1255 
1256   /** Buffer into which data is read. */
1257   byte *m_buf;
1258 
1259   /** Length of the buffer m_buf. */
1260   ulint m_len;
1261 
1262   /** The identifier of the space in which blob is available. */
1263   space_id_t m_space_id;
1264 
1265   /** The page number obtained from the blob reference. */
1266   page_no_t m_page_no;
1267 
1268   /** The offset information obtained from the blob reference. */
1269   ulint m_offset;
1270 
1271   dict_index_t *m_index;
1272 
1273   ulint m_lob_version;
1274 
1275 #ifdef UNIV_DEBUG
1276   /** Is it a space dictionary index (SDI)?
1277   @return true if SDI, false otherwise. */
is_sdiReadContext1278   bool is_sdi() const { return (m_is_sdi); }
1279 
1280   /** Is it a tablespace dictionary index (SDI)? */
1281   const bool m_is_sdi;
1282 
1283   /** Assert that current trx is using isolation level read uncommitted.
1284   @return true if transaction is using read uncommitted, false otherwise. */
1285   bool assert_read_uncommitted() const;
1286 #endif /* UNIV_DEBUG */
1287 
1288   /** The transaction that is reading. */
1289   trx_t *m_trx = nullptr;
1290 };
1291 
1292 /** Fetch compressed BLOB */
1293 struct zReader {
1294   /** Constructor. */
zReaderzReader1295   explicit zReader(const ReadContext &ctx) : m_rctx(ctx) {}
1296 
1297   /** Fetch the BLOB.
1298   @return DB_SUCCESS on success. */
1299   dberr_t fetch();
1300 
1301   /** Fetch one BLOB page.
1302   @return DB_SUCCESS on success. */
1303   dberr_t fetch_page();
1304 
1305   /** Get the length of data that has been read.
1306   @return the length of data that has been read. */
lengthzReader1307   ulint length() const { return (m_stream.total_out); }
1308 
1309  private:
1310   /** Do setup of the zlib stream.
1311   @return code returned by zlib. */
1312   int setup_zstream();
1313 
1314 #ifdef UNIV_DEBUG
1315   /** Assert that the local prefix is empty.  For compressed row format,
1316   there is no local prefix stored.  This function doesn't return if the
1317   local prefix is non-empty.
1318   @return true if local prefix is empty*/
1319   bool assert_empty_local_prefix();
1320 #endif /* UNIV_DEBUG */
1321 
1322   ReadContext m_rctx;
1323 
1324   /** Bytes yet to be read. */
1325   ulint m_remaining;
1326 
1327   /** The zlib stream used to uncompress while fetching blob. */
1328   z_stream m_stream;
1329 
1330   /** The memory heap that will be used by zlib allocator. */
1331   mem_heap_t *m_heap;
1332 
1333   /* There is no latch on m_bpage directly.  Instead,
1334   m_bpage is protected by the B-tree page latch that
1335   is being held on the clustered index record, or,
1336   in row_merge_copy_blobs(), by an exclusive table lock. */
1337   buf_page_t *m_bpage;
1338 
1339 #ifdef UNIV_DEBUG
1340   /** The expected page type. */
1341   ulint m_page_type_ex;
1342 #endif /* UNIV_DEBUG */
1343 };
1344 
1345 /** Fetch uncompressed BLOB */
1346 struct Reader {
1347   /** Constructor. */
ReaderReader1348   Reader(const ReadContext &ctx)
1349       : m_rctx(ctx), m_cur_block(nullptr), m_copied_len(0) {}
1350 
1351   /** Fetch the complete or prefix of the uncompressed LOB data.
1352   @return bytes of LOB data fetched. */
1353   ulint fetch();
1354 
1355   /** Fetch one BLOB page. */
1356   void fetch_page();
1357 
1358   ReadContext m_rctx;
1359 
1360   /** Buffer block of the current BLOB page */
1361   buf_block_t *m_cur_block;
1362 
1363   /** Total bytes of LOB data that has been copied from multiple
1364   LOB pages. This is a cumulative value.  When this value reaches
1365   m_rctx.m_len, then the read operation is completed. */
1366   ulint m_copied_len;
1367 };
1368 
1369 /** The context information when the delete operation on LOB is
1370 taking place. */
1371 struct DeleteContext : public BtrContext {
1372   /** Constructor. */
DeleteContextDeleteContext1373   DeleteContext(const BtrContext &btr, byte *field_ref, ulint field_no,
1374                 bool rollback)
1375       : BtrContext(btr),
1376         m_blobref(field_ref),
1377         m_field_no(field_no),
1378         m_rollback(rollback),
1379         m_page_size(table() == nullptr ? get_page_size()
1380                                        : dict_table_page_size(table())) {
1381     m_blobref.parse(m_blobref_mem);
1382   }
1383 
is_ref_validDeleteContext1384   bool is_ref_valid() const {
1385     return (m_blobref_mem.m_page_no == m_blobref.page_no());
1386   }
1387 
1388   /** Determine if it is compressed page format.
1389   @return true if compressed. */
is_compressedDeleteContext1390   bool is_compressed() const { return (m_page_size.is_compressed()); }
1391 
1392   /** Check if tablespace supports atomic blobs.
1393   @return true if tablespace has atomic blobs. */
has_atomic_blobsDeleteContext1394   bool has_atomic_blobs() const {
1395     space_id_t space_id = m_blobref.space_id();
1396     uint32_t flags = fil_space_get_flags(space_id);
1397     return (DICT_TF_HAS_ATOMIC_BLOBS(flags));
1398   }
1399 
is_delete_markedDeleteContext1400   bool is_delete_marked() const {
1401     rec_t *clust_rec = rec();
1402     if (clust_rec == nullptr) {
1403       return (true);
1404     }
1405     return (rec_get_deleted_flag(clust_rec, page_rec_is_comp(clust_rec)));
1406   }
1407 
1408 #ifdef UNIV_DEBUG
1409   /** Validate the LOB reference object.
1410   @return true if valid, false otherwise. */
validate_blobrefDeleteContext1411   bool validate_blobref() const {
1412     rec_t *clust_rec = rec();
1413     if (clust_rec != nullptr) {
1414       const byte *v2 =
1415           btr_rec_get_field_ref(clust_rec, get_offsets(), m_field_no);
1416 
1417       ut_ad(m_blobref.is_equal(v2));
1418     }
1419     return (true);
1420   }
1421 #endif /* UNIV_DEBUG */
1422 
1423   /** Acquire an x-latch on the index page containing the clustered
1424   index record, in the given mini transaction context.
1425   @param[in]	mtr	the mini-transaction context. */
1426   void x_latch_rec_page(mtr_t *mtr);
1427 
1428   /** the BLOB reference or external field reference. */
1429   ref_t m_blobref;
1430 
1431   /** field number of externally stored column; ignored if rec == NULL */
1432   ulint m_field_no;
1433 
1434   /** Is this operation part of rollback? */
1435   bool m_rollback;
1436 
1437   page_size_t m_page_size;
1438 
1439  private:
1440   /** Memory copy of the original LOB reference. */
1441   ref_mem_t m_blobref_mem;
1442 
1443   /** Obtain the page size from the tablespace flags.
1444   @return the page size. */
get_page_sizeDeleteContext1445   page_size_t get_page_size() const {
1446     bool found;
1447     space_id_t space_id = m_blobref.space_id();
1448     const page_size_t &tmp = fil_space_get_page_size(space_id, &found);
1449     ut_ad(found);
1450     return (tmp);
1451   }
1452 };
1453 
1454 /** Determine if an operation on off-page columns is an update.
1455 @param[in]	op	type of BLOB operation.
1456 @return true if op != OPCODE_INSERT */
btr_lob_op_is_update(opcode op)1457 inline bool btr_lob_op_is_update(opcode op) {
1458   switch (op) {
1459     case OPCODE_INSERT:
1460     case OPCODE_INSERT_BULK:
1461       return (false);
1462     case OPCODE_INSERT_UPDATE:
1463     case OPCODE_UPDATE:
1464       return (true);
1465     case OPCODE_UNKNOWN:
1466       break;
1467   }
1468 
1469   ut_ad(0);
1470   return (FALSE);
1471 }
1472 
1473 #ifdef UNIV_DEBUG
1474 #define btr_copy_externally_stored_field_prefix(              \
1475     trx, index, buf, len, page_size, data, is_sdi, local_len) \
1476   btr_copy_externally_stored_field_prefix_func(               \
1477       trx, index, buf, len, page_size, data, is_sdi, local_len)
1478 
1479 #define btr_copy_externally_stored_field(trx, index, len, ver, data,           \
1480                                          page_size, local_len, is_sdi, heap)   \
1481   btr_copy_externally_stored_field_func(trx, index, len, ver, data, page_size, \
1482                                         local_len, is_sdi, heap)
1483 
1484 #else /* UNIV_DEBUG */
1485 #define btr_copy_externally_stored_field_prefix(                     \
1486     trx, index, buf, len, page_size, data, is_sdi, local_len)        \
1487   btr_copy_externally_stored_field_prefix_func(trx, index, buf, len, \
1488                                                page_size, data, local_len)
1489 
1490 #define btr_copy_externally_stored_field(trx, index, len, ver, data,           \
1491                                          page_size, local_len, is_sdi, heap)   \
1492   btr_copy_externally_stored_field_func(trx, index, len, ver, data, page_size, \
1493                                         local_len, heap)
1494 #endif /* UNIV_DEBUG */
1495 
1496 /** Copies the prefix of an externally stored field of a record.
1497 The clustered index record must be protected by a lock or a page latch.
1498 @param[in]	trx		the current transaction object if available
1499 or nullptr.
1500 @param[in]	index		the clust index in which lob is read.
1501 @param[out]	buf		the field, or a prefix of it
1502 @param[in]	len		length of buf, in bytes
1503 @param[in]	page_size	BLOB page size
1504 @param[in]	data		'internally' stored part of the field
1505                                 containing also the reference to the external
1506                                 part; must be protected by a lock or a page
1507                                 latch. */
1508 #ifdef UNIV_DEBUG
1509 /**
1510 @param[in]	is_sdi		true for SDI indexes */
1511 #endif /* UNIV_DEBUG */
1512 /**
1513 @param[in]	local_len	length of data, in bytes
1514 @return the length of the copied field, or 0 if the column was being
1515 or has been deleted */
1516 ulint btr_copy_externally_stored_field_prefix_func(trx_t *trx,
1517                                                    const dict_index_t *index,
1518                                                    byte *buf, ulint len,
1519                                                    const page_size_t &page_size,
1520                                                    const byte *data,
1521 #ifdef UNIV_DEBUG
1522                                                    bool is_sdi,
1523 #endif /* UNIV_DEBUG */
1524                                                    ulint local_len);
1525 
1526 /** Copies an externally stored field of a record to mem heap.
1527 The clustered index record must be protected by a lock or a page latch.
1528 @param[in]	index		the clust index in which lob is read.
1529 @param[out]	len		length of the whole field
1530 @param[out]	lob_version	lob version that has been read.
1531 @param[in]	data		'internally' stored part of the field
1532                                 containing also the reference to the external
1533                                 part; must be protected by a lock or a page
1534                                 latch.
1535 @param[in]	page_size	BLOB page size
1536 @param[in]	local_len	length of data */
1537 #ifdef UNIV_DEBUG
1538 /**
1539 @param[in]	is_sdi		true for SDI Indexes */
1540 #endif /* UNIV_DEBUG */
1541 /**
1542 @param[in,out]	heap		mem heap
1543 @return the whole field copied to heap */
1544 byte *btr_copy_externally_stored_field_func(
1545     trx_t *trx, const dict_index_t *index, ulint *len, size_t *lob_version,
1546     const byte *data, const page_size_t &page_size, ulint local_len,
1547 #ifdef UNIV_DEBUG
1548     bool is_sdi,
1549 #endif /* UNIV_DEBUG */
1550     mem_heap_t *heap);
1551 
1552 /** Gets the externally stored size of a record, in units of a database page.
1553 @param[in]	rec	record
1554 @param[in]	offsets	array returned by rec_get_offsets()
1555 @return externally stored part, in units of a database page */
1556 ulint btr_rec_get_externally_stored_len(const rec_t *rec, const ulint *offsets);
1557 
1558 /** Purge an LOB (either of compressed or uncompressed).
1559 @param[in]	ctx		the delete operation context information.
1560 @param[in]	index		clustered index in which LOB is present
1561 @param[in]	trxid		the transaction that is being purged.
1562 @param[in]	undo_no		during rollback to savepoint, purge only upto
1563                                 this undo number.
1564 @param[in]	rec_type	undo record type.
1565 @param[in]	uf		the update vector for the field. */
1566 void purge(lob::DeleteContext *ctx, dict_index_t *index, trx_id_t trxid,
1567            undo_no_t undo_no, ulint rec_type, const upd_field_t *uf);
1568 
1569 /** Update a portion of the given LOB.
1570 @param[in]	ctx		update operation context information.
1571 @param[in]	trx		the transaction that is doing the modification.
1572 @param[in]	index		the clustered index containing the LOB.
1573 @param[in]	upd		update vector
1574 @param[in]	field_no	the LOB field number
1575 @param[in]	blobref		LOB reference stored in clust record.
1576 @return DB_SUCCESS on success, error code on failure. */
1577 dberr_t update(InsertContext &ctx, trx_t *trx, dict_index_t *index,
1578                const upd_t *upd, ulint field_no, ref_t blobref);
1579 
1580 /** Update a portion of the given LOB.
1581 @param[in]	ctx		update operation context information.
1582 @param[in]	trx		the transaction that is doing the modification.
1583 @param[in]	index		the clustered index containing the LOB.
1584 @param[in]	upd		update vector
1585 @param[in]	field_no	the LOB field number
1586 @param[in]	blobref		LOB reference stored in clust record.
1587 @return DB_SUCCESS on success, error code on failure. */
1588 dberr_t z_update(InsertContext &ctx, trx_t *trx, dict_index_t *index,
1589                  const upd_t *upd, ulint field_no, ref_t blobref);
1590 
1591 /** Print information about the given LOB.
1592 @param[in]  trx  the current transaction.
1593 @param[in]  index  the clust index that contains the LOB.
1594 @param[in]  out    the output stream into which LOB info is printed.
1595 @param[in]  ref    the LOB reference
1596 @param[in]  fatal  if true assert at end of function. */
1597 void print(trx_t *trx, dict_index_t *index, std::ostream &out, ref_t ref,
1598            bool fatal);
1599 
1600 /** Import the given LOB.  Update the creator trx id and the modifier trx
1601 id to the given import trx id.
1602 @param[in]	index	clustered index containing the lob.
1603 @param[in]	field_ref	the lob reference.
1604 @param[in]	trx_id		the import trx id. */
1605 void z_import(const dict_index_t *index, byte *field_ref, trx_id_t trx_id);
1606 
1607 /** Import the given LOB.  Update the creator trx id and the modifier trx
1608 id to the given import trx id.
1609 @param[in]	index	clustered index containing the lob.
1610 @param[in]	field_ref	the lob reference.
1611 @param[in]	trx_id		the import trx id. */
1612 void import(const dict_index_t *index, byte *field_ref, trx_id_t trx_id);
1613 
1614 #ifdef UNIV_DEBUG
1615 /** Check if all the LOB references in the given clustered index record has
1616 valid space_id in it.
1617 @param[in]    index   the index to which the LOB belongs.
1618 @param[in]    rec     the clust_rec in which the LOB references are checked.
1619 @param[in]    offsets the field offets of the given rec.
1620 @return true if LOB references have valid space_id, false otherwise. */
1621 bool rec_check_lobref_space_id(dict_index_t *index, const rec_t *rec,
1622                                const ulint *offsets);
1623 #endif /* UNIV_DEBUG */
1624 
1625 /** Mark an LOB that it is not partially updatable anymore.
1626 @param[in]  trx  the current transaction.
1627 @param[in]  index  the clustered index to which the LOB belongs.
1628 @param[in]  update  the update vector.
1629 @param[in]  mtr     the mini transaction context.
1630 @return DB_SUCCESS on success, error code on failure. */
1631 dberr_t mark_not_partially_updatable(trx_t *trx, dict_index_t *index,
1632                                      const upd_t *update, mtr_t *mtr);
1633 
1634 }  // namespace lob
1635 
1636 #endif /* lob0lob_h */
1637