1 /*-------------------------------------------------------------------------
2  *
3  * detoast.c
4  *	  Retrieve compressed or external variable size attributes.
5  *
6  * Copyright (c) 2000-2021, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *	  src/backend/access/common/detoast.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 
16 #include "access/detoast.h"
17 #include "access/table.h"
18 #include "access/tableam.h"
19 #include "access/toast_internals.h"
20 #include "common/int.h"
21 #include "common/pg_lzcompress.h"
22 #include "utils/expandeddatum.h"
23 #include "utils/rel.h"
24 
25 static struct varlena *toast_fetch_datum(struct varlena *attr);
26 static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
27 											   int32 sliceoffset,
28 											   int32 slicelength);
29 static struct varlena *toast_decompress_datum(struct varlena *attr);
30 static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
31 
32 /* ----------
33  * detoast_external_attr -
34  *
35  *	Public entry point to get back a toasted value from
36  *	external source (possibly still in compressed format).
37  *
38  * This will return a datum that contains all the data internally, ie, not
39  * relying on external storage or memory, but it can still be compressed or
40  * have a short header.  Note some callers assume that if the input is an
41  * EXTERNAL datum, the result will be a pfree'able chunk.
42  * ----------
43  */
44 struct varlena *
detoast_external_attr(struct varlena * attr)45 detoast_external_attr(struct varlena *attr)
46 {
47 	struct varlena *result;
48 
49 	if (VARATT_IS_EXTERNAL_ONDISK(attr))
50 	{
51 		/*
52 		 * This is an external stored plain value
53 		 */
54 		result = toast_fetch_datum(attr);
55 	}
56 	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
57 	{
58 		/*
59 		 * This is an indirect pointer --- dereference it
60 		 */
61 		struct varatt_indirect redirect;
62 
63 		VARATT_EXTERNAL_GET_POINTER(redirect, attr);
64 		attr = (struct varlena *) redirect.pointer;
65 
66 		/* nested indirect Datums aren't allowed */
67 		Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
68 
69 		/* recurse if value is still external in some other way */
70 		if (VARATT_IS_EXTERNAL(attr))
71 			return detoast_external_attr(attr);
72 
73 		/*
74 		 * Copy into the caller's memory context, in case caller tries to
75 		 * pfree the result.
76 		 */
77 		result = (struct varlena *) palloc(VARSIZE_ANY(attr));
78 		memcpy(result, attr, VARSIZE_ANY(attr));
79 	}
80 	else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
81 	{
82 		/*
83 		 * This is an expanded-object pointer --- get flat format
84 		 */
85 		ExpandedObjectHeader *eoh;
86 		Size		resultsize;
87 
88 		eoh = DatumGetEOHP(PointerGetDatum(attr));
89 		resultsize = EOH_get_flat_size(eoh);
90 		result = (struct varlena *) palloc(resultsize);
91 		EOH_flatten_into(eoh, (void *) result, resultsize);
92 	}
93 	else
94 	{
95 		/*
96 		 * This is a plain value inside of the main tuple - why am I called?
97 		 */
98 		result = attr;
99 	}
100 
101 	return result;
102 }
103 
104 
105 /* ----------
106  * detoast_attr -
107  *
108  *	Public entry point to get back a toasted value from compression
109  *	or external storage.  The result is always non-extended varlena form.
110  *
111  * Note some callers assume that if the input is an EXTERNAL or COMPRESSED
112  * datum, the result will be a pfree'able chunk.
113  * ----------
114  */
115 struct varlena *
detoast_attr(struct varlena * attr)116 detoast_attr(struct varlena *attr)
117 {
118 	if (VARATT_IS_EXTERNAL_ONDISK(attr))
119 	{
120 		/*
121 		 * This is an externally stored datum --- fetch it back from there
122 		 */
123 		attr = toast_fetch_datum(attr);
124 		/* If it's compressed, decompress it */
125 		if (VARATT_IS_COMPRESSED(attr))
126 		{
127 			struct varlena *tmp = attr;
128 
129 			attr = toast_decompress_datum(tmp);
130 			pfree(tmp);
131 		}
132 	}
133 	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
134 	{
135 		/*
136 		 * This is an indirect pointer --- dereference it
137 		 */
138 		struct varatt_indirect redirect;
139 
140 		VARATT_EXTERNAL_GET_POINTER(redirect, attr);
141 		attr = (struct varlena *) redirect.pointer;
142 
143 		/* nested indirect Datums aren't allowed */
144 		Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
145 
146 		/* recurse in case value is still extended in some other way */
147 		attr = detoast_attr(attr);
148 
149 		/* if it isn't, we'd better copy it */
150 		if (attr == (struct varlena *) redirect.pointer)
151 		{
152 			struct varlena *result;
153 
154 			result = (struct varlena *) palloc(VARSIZE_ANY(attr));
155 			memcpy(result, attr, VARSIZE_ANY(attr));
156 			attr = result;
157 		}
158 	}
159 	else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
160 	{
161 		/*
162 		 * This is an expanded-object pointer --- get flat format
163 		 */
164 		attr = detoast_external_attr(attr);
165 		/* flatteners are not allowed to produce compressed/short output */
166 		Assert(!VARATT_IS_EXTENDED(attr));
167 	}
168 	else if (VARATT_IS_COMPRESSED(attr))
169 	{
170 		/*
171 		 * This is a compressed value inside of the main tuple
172 		 */
173 		attr = toast_decompress_datum(attr);
174 	}
175 	else if (VARATT_IS_SHORT(attr))
176 	{
177 		/*
178 		 * This is a short-header varlena --- convert to 4-byte header format
179 		 */
180 		Size		data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
181 		Size		new_size = data_size + VARHDRSZ;
182 		struct varlena *new_attr;
183 
184 		new_attr = (struct varlena *) palloc(new_size);
185 		SET_VARSIZE(new_attr, new_size);
186 		memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
187 		attr = new_attr;
188 	}
189 
190 	return attr;
191 }
192 
193 
194 /* ----------
195  * detoast_attr_slice -
196  *
197  *		Public entry point to get back part of a toasted value
198  *		from compression or external storage.
199  *
200  * sliceoffset is where to start (zero or more)
201  * If slicelength < 0, return everything beyond sliceoffset
202  * ----------
203  */
204 struct varlena *
detoast_attr_slice(struct varlena * attr,int32 sliceoffset,int32 slicelength)205 detoast_attr_slice(struct varlena *attr,
206 				   int32 sliceoffset, int32 slicelength)
207 {
208 	struct varlena *preslice;
209 	struct varlena *result;
210 	char	   *attrdata;
211 	int32		slicelimit;
212 	int32		attrsize;
213 
214 	if (sliceoffset < 0)
215 		elog(ERROR, "invalid sliceoffset: %d", sliceoffset);
216 
217 	/*
218 	 * Compute slicelimit = offset + length, or -1 if we must fetch all of the
219 	 * value.  In case of integer overflow, we must fetch all.
220 	 */
221 	if (slicelength < 0)
222 		slicelimit = -1;
223 	else if (pg_add_s32_overflow(sliceoffset, slicelength, &slicelimit))
224 		slicelength = slicelimit = -1;
225 
226 	if (VARATT_IS_EXTERNAL_ONDISK(attr))
227 	{
228 		struct varatt_external toast_pointer;
229 
230 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
231 
232 		/* fast path for non-compressed external datums */
233 		if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
234 			return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
235 
236 		/*
237 		 * For compressed values, we need to fetch enough slices to decompress
238 		 * at least the requested part (when a prefix is requested).
239 		 * Otherwise, just fetch all slices.
240 		 */
241 		if (slicelimit >= 0)
242 		{
243 			int32		max_size = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
244 
245 			/*
246 			 * Determine maximum amount of compressed data needed for a prefix
247 			 * of a given length (after decompression).
248 			 *
249 			 * At least for now, if it's LZ4 data, we'll have to fetch the
250 			 * whole thing, because there doesn't seem to be an API call to
251 			 * determine how much compressed data we need to be sure of being
252 			 * able to decompress the required slice.
253 			 */
254 			if (VARATT_EXTERNAL_GET_COMPRESS_METHOD(toast_pointer) ==
255 				TOAST_PGLZ_COMPRESSION_ID)
256 				max_size = pglz_maximum_compressed_size(slicelimit, max_size);
257 
258 			/*
259 			 * Fetch enough compressed slices (compressed marker will get set
260 			 * automatically).
261 			 */
262 			preslice = toast_fetch_datum_slice(attr, 0, max_size);
263 		}
264 		else
265 			preslice = toast_fetch_datum(attr);
266 	}
267 	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
268 	{
269 		struct varatt_indirect redirect;
270 
271 		VARATT_EXTERNAL_GET_POINTER(redirect, attr);
272 
273 		/* nested indirect Datums aren't allowed */
274 		Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
275 
276 		return detoast_attr_slice(redirect.pointer,
277 								  sliceoffset, slicelength);
278 	}
279 	else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
280 	{
281 		/* pass it off to detoast_external_attr to flatten */
282 		preslice = detoast_external_attr(attr);
283 	}
284 	else
285 		preslice = attr;
286 
287 	Assert(!VARATT_IS_EXTERNAL(preslice));
288 
289 	if (VARATT_IS_COMPRESSED(preslice))
290 	{
291 		struct varlena *tmp = preslice;
292 
293 		/* Decompress enough to encompass the slice and the offset */
294 		if (slicelimit >= 0)
295 			preslice = toast_decompress_datum_slice(tmp, slicelimit);
296 		else
297 			preslice = toast_decompress_datum(tmp);
298 
299 		if (tmp != attr)
300 			pfree(tmp);
301 	}
302 
303 	if (VARATT_IS_SHORT(preslice))
304 	{
305 		attrdata = VARDATA_SHORT(preslice);
306 		attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT;
307 	}
308 	else
309 	{
310 		attrdata = VARDATA(preslice);
311 		attrsize = VARSIZE(preslice) - VARHDRSZ;
312 	}
313 
314 	/* slicing of datum for compressed cases and plain value */
315 
316 	if (sliceoffset >= attrsize)
317 	{
318 		sliceoffset = 0;
319 		slicelength = 0;
320 	}
321 	else if (slicelength < 0 || slicelimit > attrsize)
322 		slicelength = attrsize - sliceoffset;
323 
324 	result = (struct varlena *) palloc(slicelength + VARHDRSZ);
325 	SET_VARSIZE(result, slicelength + VARHDRSZ);
326 
327 	memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
328 
329 	if (preslice != attr)
330 		pfree(preslice);
331 
332 	return result;
333 }
334 
335 /* ----------
336  * toast_fetch_datum -
337  *
338  *	Reconstruct an in memory Datum from the chunks saved
339  *	in the toast relation
340  * ----------
341  */
342 static struct varlena *
toast_fetch_datum(struct varlena * attr)343 toast_fetch_datum(struct varlena *attr)
344 {
345 	Relation	toastrel;
346 	struct varlena *result;
347 	struct varatt_external toast_pointer;
348 	int32		attrsize;
349 
350 	if (!VARATT_IS_EXTERNAL_ONDISK(attr))
351 		elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
352 
353 	/* Must copy to access aligned fields */
354 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
355 
356 	attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
357 
358 	result = (struct varlena *) palloc(attrsize + VARHDRSZ);
359 
360 	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
361 		SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ);
362 	else
363 		SET_VARSIZE(result, attrsize + VARHDRSZ);
364 
365 	if (attrsize == 0)
366 		return result;			/* Probably shouldn't happen, but just in
367 								 * case. */
368 
369 	/*
370 	 * Open the toast relation and its indexes
371 	 */
372 	toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
373 
374 	/* Fetch all chunks */
375 	table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
376 									 attrsize, 0, attrsize, result);
377 
378 	/* Close toast table */
379 	table_close(toastrel, AccessShareLock);
380 
381 	return result;
382 }
383 
384 /* ----------
385  * toast_fetch_datum_slice -
386  *
387  *	Reconstruct a segment of a Datum from the chunks saved
388  *	in the toast relation
389  *
390  *	Note that this function supports non-compressed external datums
391  *	and compressed external datums (in which case the requested slice
392  *	has to be a prefix, i.e. sliceoffset has to be 0).
393  * ----------
394  */
395 static struct varlena *
toast_fetch_datum_slice(struct varlena * attr,int32 sliceoffset,int32 slicelength)396 toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
397 						int32 slicelength)
398 {
399 	Relation	toastrel;
400 	struct varlena *result;
401 	struct varatt_external toast_pointer;
402 	int32		attrsize;
403 
404 	if (!VARATT_IS_EXTERNAL_ONDISK(attr))
405 		elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
406 
407 	/* Must copy to access aligned fields */
408 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
409 
410 	/*
411 	 * It's nonsense to fetch slices of a compressed datum unless when it's a
412 	 * prefix -- this isn't lo_* we can't return a compressed datum which is
413 	 * meaningful to toast later.
414 	 */
415 	Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset);
416 
417 	attrsize = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
418 
419 	if (sliceoffset >= attrsize)
420 	{
421 		sliceoffset = 0;
422 		slicelength = 0;
423 	}
424 
425 	/*
426 	 * When fetching a prefix of a compressed external datum, account for the
427 	 * space required by va_tcinfo, which is stored at the beginning as an
428 	 * int32 value.
429 	 */
430 	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && slicelength > 0)
431 		slicelength = slicelength + sizeof(int32);
432 
433 	/*
434 	 * Adjust length request if needed.  (Note: our sole caller,
435 	 * detoast_attr_slice, protects us against sliceoffset + slicelength
436 	 * overflowing.)
437 	 */
438 	if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
439 		slicelength = attrsize - sliceoffset;
440 
441 	result = (struct varlena *) palloc(slicelength + VARHDRSZ);
442 
443 	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
444 		SET_VARSIZE_COMPRESSED(result, slicelength + VARHDRSZ);
445 	else
446 		SET_VARSIZE(result, slicelength + VARHDRSZ);
447 
448 	if (slicelength == 0)
449 		return result;			/* Can save a lot of work at this point! */
450 
451 	/* Open the toast relation */
452 	toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
453 
454 	/* Fetch all chunks */
455 	table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
456 									 attrsize, sliceoffset, slicelength,
457 									 result);
458 
459 	/* Close toast table */
460 	table_close(toastrel, AccessShareLock);
461 
462 	return result;
463 }
464 
465 /* ----------
466  * toast_decompress_datum -
467  *
468  * Decompress a compressed version of a varlena datum
469  */
470 static struct varlena *
toast_decompress_datum(struct varlena * attr)471 toast_decompress_datum(struct varlena *attr)
472 {
473 	ToastCompressionId cmid;
474 
475 	Assert(VARATT_IS_COMPRESSED(attr));
476 
477 	/*
478 	 * Fetch the compression method id stored in the compression header and
479 	 * decompress the data using the appropriate decompression routine.
480 	 */
481 	cmid = TOAST_COMPRESS_METHOD(attr);
482 	switch (cmid)
483 	{
484 		case TOAST_PGLZ_COMPRESSION_ID:
485 			return pglz_decompress_datum(attr);
486 		case TOAST_LZ4_COMPRESSION_ID:
487 			return lz4_decompress_datum(attr);
488 		default:
489 			elog(ERROR, "invalid compression method id %d", cmid);
490 			return NULL;		/* keep compiler quiet */
491 	}
492 }
493 
494 
495 /* ----------
496  * toast_decompress_datum_slice -
497  *
498  * Decompress the front of a compressed version of a varlena datum.
499  * offset handling happens in detoast_attr_slice.
500  * Here we just decompress a slice from the front.
501  */
502 static struct varlena *
toast_decompress_datum_slice(struct varlena * attr,int32 slicelength)503 toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
504 {
505 	ToastCompressionId cmid;
506 
507 	Assert(VARATT_IS_COMPRESSED(attr));
508 
509 	/*
510 	 * Some callers may pass a slicelength that's more than the actual
511 	 * decompressed size.  If so, just decompress normally.  This avoids
512 	 * possibly allocating a larger-than-necessary result object, and may be
513 	 * faster and/or more robust as well.  Notably, some versions of liblz4
514 	 * have been seen to give wrong results if passed an output size that is
515 	 * more than the data's true decompressed size.
516 	 */
517 	if ((uint32) slicelength >= TOAST_COMPRESS_EXTSIZE(attr))
518 		return toast_decompress_datum(attr);
519 
520 	/*
521 	 * Fetch the compression method id stored in the compression header and
522 	 * decompress the data slice using the appropriate decompression routine.
523 	 */
524 	cmid = TOAST_COMPRESS_METHOD(attr);
525 	switch (cmid)
526 	{
527 		case TOAST_PGLZ_COMPRESSION_ID:
528 			return pglz_decompress_datum_slice(attr, slicelength);
529 		case TOAST_LZ4_COMPRESSION_ID:
530 			return lz4_decompress_datum_slice(attr, slicelength);
531 		default:
532 			elog(ERROR, "invalid compression method id %d", cmid);
533 			return NULL;		/* keep compiler quiet */
534 	}
535 }
536 
537 /* ----------
538  * toast_raw_datum_size -
539  *
540  *	Return the raw (detoasted) size of a varlena datum
541  *	(including the VARHDRSZ header)
542  * ----------
543  */
544 Size
toast_raw_datum_size(Datum value)545 toast_raw_datum_size(Datum value)
546 {
547 	struct varlena *attr = (struct varlena *) DatumGetPointer(value);
548 	Size		result;
549 
550 	if (VARATT_IS_EXTERNAL_ONDISK(attr))
551 	{
552 		/* va_rawsize is the size of the original datum -- including header */
553 		struct varatt_external toast_pointer;
554 
555 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
556 		result = toast_pointer.va_rawsize;
557 	}
558 	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
559 	{
560 		struct varatt_indirect toast_pointer;
561 
562 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
563 
564 		/* nested indirect Datums aren't allowed */
565 		Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer));
566 
567 		return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
568 	}
569 	else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
570 	{
571 		result = EOH_get_flat_size(DatumGetEOHP(value));
572 	}
573 	else if (VARATT_IS_COMPRESSED(attr))
574 	{
575 		/* here, va_rawsize is just the payload size */
576 		result = VARDATA_COMPRESSED_GET_EXTSIZE(attr) + VARHDRSZ;
577 	}
578 	else if (VARATT_IS_SHORT(attr))
579 	{
580 		/*
581 		 * we have to normalize the header length to VARHDRSZ or else the
582 		 * callers of this function will be confused.
583 		 */
584 		result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ;
585 	}
586 	else
587 	{
588 		/* plain untoasted datum */
589 		result = VARSIZE(attr);
590 	}
591 	return result;
592 }
593 
594 /* ----------
595  * toast_datum_size
596  *
597  *	Return the physical storage size (possibly compressed) of a varlena datum
598  * ----------
599  */
600 Size
toast_datum_size(Datum value)601 toast_datum_size(Datum value)
602 {
603 	struct varlena *attr = (struct varlena *) DatumGetPointer(value);
604 	Size		result;
605 
606 	if (VARATT_IS_EXTERNAL_ONDISK(attr))
607 	{
608 		/*
609 		 * Attribute is stored externally - return the extsize whether
610 		 * compressed or not.  We do not count the size of the toast pointer
611 		 * ... should we?
612 		 */
613 		struct varatt_external toast_pointer;
614 
615 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
616 		result = VARATT_EXTERNAL_GET_EXTSIZE(toast_pointer);
617 	}
618 	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
619 	{
620 		struct varatt_indirect toast_pointer;
621 
622 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
623 
624 		/* nested indirect Datums aren't allowed */
625 		Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
626 
627 		return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
628 	}
629 	else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
630 	{
631 		result = EOH_get_flat_size(DatumGetEOHP(value));
632 	}
633 	else if (VARATT_IS_SHORT(attr))
634 	{
635 		result = VARSIZE_SHORT(attr);
636 	}
637 	else
638 	{
639 		/*
640 		 * Attribute is stored inline either compressed or not, just calculate
641 		 * the size of the datum in either case.
642 		 */
643 		result = VARSIZE(attr);
644 	}
645 	return result;
646 }
647