1 /*-------------------------------------------------------------------------
2  *
3  * detoast.c
4  *	  Retrieve compressed or external variable size attributes.
5  *
6  * Copyright (c) 2000-2020, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *	  src/backend/access/common/detoast.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 
16 #include "access/detoast.h"
17 #include "access/table.h"
18 #include "access/tableam.h"
19 #include "access/toast_internals.h"
20 #include "common/int.h"
21 #include "common/pg_lzcompress.h"
22 #include "utils/expandeddatum.h"
23 #include "utils/rel.h"
24 
25 static struct varlena *toast_fetch_datum(struct varlena *attr);
26 static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
27 											   int32 sliceoffset,
28 											   int32 slicelength);
29 static struct varlena *toast_decompress_datum(struct varlena *attr);
30 static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
31 
32 /* ----------
33  * detoast_external_attr -
34  *
35  *	Public entry point to get back a toasted value from
36  *	external source (possibly still in compressed format).
37  *
38  * This will return a datum that contains all the data internally, ie, not
39  * relying on external storage or memory, but it can still be compressed or
40  * have a short header.  Note some callers assume that if the input is an
41  * EXTERNAL datum, the result will be a pfree'able chunk.
42  * ----------
43  */
44 struct varlena *
detoast_external_attr(struct varlena * attr)45 detoast_external_attr(struct varlena *attr)
46 {
47 	struct varlena *result;
48 
49 	if (VARATT_IS_EXTERNAL_ONDISK(attr))
50 	{
51 		/*
52 		 * This is an external stored plain value
53 		 */
54 		result = toast_fetch_datum(attr);
55 	}
56 	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
57 	{
58 		/*
59 		 * This is an indirect pointer --- dereference it
60 		 */
61 		struct varatt_indirect redirect;
62 
63 		VARATT_EXTERNAL_GET_POINTER(redirect, attr);
64 		attr = (struct varlena *) redirect.pointer;
65 
66 		/* nested indirect Datums aren't allowed */
67 		Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
68 
69 		/* recurse if value is still external in some other way */
70 		if (VARATT_IS_EXTERNAL(attr))
71 			return detoast_external_attr(attr);
72 
73 		/*
74 		 * Copy into the caller's memory context, in case caller tries to
75 		 * pfree the result.
76 		 */
77 		result = (struct varlena *) palloc(VARSIZE_ANY(attr));
78 		memcpy(result, attr, VARSIZE_ANY(attr));
79 	}
80 	else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
81 	{
82 		/*
83 		 * This is an expanded-object pointer --- get flat format
84 		 */
85 		ExpandedObjectHeader *eoh;
86 		Size		resultsize;
87 
88 		eoh = DatumGetEOHP(PointerGetDatum(attr));
89 		resultsize = EOH_get_flat_size(eoh);
90 		result = (struct varlena *) palloc(resultsize);
91 		EOH_flatten_into(eoh, (void *) result, resultsize);
92 	}
93 	else
94 	{
95 		/*
96 		 * This is a plain value inside of the main tuple - why am I called?
97 		 */
98 		result = attr;
99 	}
100 
101 	return result;
102 }
103 
104 
105 /* ----------
106  * detoast_attr -
107  *
108  *	Public entry point to get back a toasted value from compression
109  *	or external storage.  The result is always non-extended varlena form.
110  *
111  * Note some callers assume that if the input is an EXTERNAL or COMPRESSED
112  * datum, the result will be a pfree'able chunk.
113  * ----------
114  */
115 struct varlena *
detoast_attr(struct varlena * attr)116 detoast_attr(struct varlena *attr)
117 {
118 	if (VARATT_IS_EXTERNAL_ONDISK(attr))
119 	{
120 		/*
121 		 * This is an externally stored datum --- fetch it back from there
122 		 */
123 		attr = toast_fetch_datum(attr);
124 		/* If it's compressed, decompress it */
125 		if (VARATT_IS_COMPRESSED(attr))
126 		{
127 			struct varlena *tmp = attr;
128 
129 			attr = toast_decompress_datum(tmp);
130 			pfree(tmp);
131 		}
132 	}
133 	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
134 	{
135 		/*
136 		 * This is an indirect pointer --- dereference it
137 		 */
138 		struct varatt_indirect redirect;
139 
140 		VARATT_EXTERNAL_GET_POINTER(redirect, attr);
141 		attr = (struct varlena *) redirect.pointer;
142 
143 		/* nested indirect Datums aren't allowed */
144 		Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
145 
146 		/* recurse in case value is still extended in some other way */
147 		attr = detoast_attr(attr);
148 
149 		/* if it isn't, we'd better copy it */
150 		if (attr == (struct varlena *) redirect.pointer)
151 		{
152 			struct varlena *result;
153 
154 			result = (struct varlena *) palloc(VARSIZE_ANY(attr));
155 			memcpy(result, attr, VARSIZE_ANY(attr));
156 			attr = result;
157 		}
158 	}
159 	else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
160 	{
161 		/*
162 		 * This is an expanded-object pointer --- get flat format
163 		 */
164 		attr = detoast_external_attr(attr);
165 		/* flatteners are not allowed to produce compressed/short output */
166 		Assert(!VARATT_IS_EXTENDED(attr));
167 	}
168 	else if (VARATT_IS_COMPRESSED(attr))
169 	{
170 		/*
171 		 * This is a compressed value inside of the main tuple
172 		 */
173 		attr = toast_decompress_datum(attr);
174 	}
175 	else if (VARATT_IS_SHORT(attr))
176 	{
177 		/*
178 		 * This is a short-header varlena --- convert to 4-byte header format
179 		 */
180 		Size		data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
181 		Size		new_size = data_size + VARHDRSZ;
182 		struct varlena *new_attr;
183 
184 		new_attr = (struct varlena *) palloc(new_size);
185 		SET_VARSIZE(new_attr, new_size);
186 		memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
187 		attr = new_attr;
188 	}
189 
190 	return attr;
191 }
192 
193 
194 /* ----------
195  * detoast_attr_slice -
196  *
197  *		Public entry point to get back part of a toasted value
198  *		from compression or external storage.
199  *
200  * sliceoffset is where to start (zero or more)
201  * If slicelength < 0, return everything beyond sliceoffset
202  * ----------
203  */
204 struct varlena *
detoast_attr_slice(struct varlena * attr,int32 sliceoffset,int32 slicelength)205 detoast_attr_slice(struct varlena *attr,
206 				   int32 sliceoffset, int32 slicelength)
207 {
208 	struct varlena *preslice;
209 	struct varlena *result;
210 	char	   *attrdata;
211 	int32		slicelimit;
212 	int32		attrsize;
213 
214 	if (sliceoffset < 0)
215 		elog(ERROR, "invalid sliceoffset: %d", sliceoffset);
216 
217 	/*
218 	 * Compute slicelimit = offset + length, or -1 if we must fetch all of the
219 	 * value.  In case of integer overflow, we must fetch all.
220 	 */
221 	if (slicelength < 0)
222 		slicelimit = -1;
223 	else if (pg_add_s32_overflow(sliceoffset, slicelength, &slicelimit))
224 		slicelength = slicelimit = -1;
225 
226 	if (VARATT_IS_EXTERNAL_ONDISK(attr))
227 	{
228 		struct varatt_external toast_pointer;
229 
230 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
231 
232 		/* fast path for non-compressed external datums */
233 		if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
234 			return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
235 
236 		/*
237 		 * For compressed values, we need to fetch enough slices to decompress
238 		 * at least the requested part (when a prefix is requested).
239 		 * Otherwise, just fetch all slices.
240 		 */
241 		if (slicelimit >= 0)
242 		{
243 			int32		max_size;
244 
245 			/*
246 			 * Determine maximum amount of compressed data needed for a prefix
247 			 * of a given length (after decompression).
248 			 */
249 			max_size = pglz_maximum_compressed_size(slicelimit,
250 													toast_pointer.va_extsize);
251 
252 			/*
253 			 * Fetch enough compressed slices (compressed marker will get set
254 			 * automatically).
255 			 */
256 			preslice = toast_fetch_datum_slice(attr, 0, max_size);
257 		}
258 		else
259 			preslice = toast_fetch_datum(attr);
260 	}
261 	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
262 	{
263 		struct varatt_indirect redirect;
264 
265 		VARATT_EXTERNAL_GET_POINTER(redirect, attr);
266 
267 		/* nested indirect Datums aren't allowed */
268 		Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
269 
270 		return detoast_attr_slice(redirect.pointer,
271 								  sliceoffset, slicelength);
272 	}
273 	else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
274 	{
275 		/* pass it off to detoast_external_attr to flatten */
276 		preslice = detoast_external_attr(attr);
277 	}
278 	else
279 		preslice = attr;
280 
281 	Assert(!VARATT_IS_EXTERNAL(preslice));
282 
283 	if (VARATT_IS_COMPRESSED(preslice))
284 	{
285 		struct varlena *tmp = preslice;
286 
287 		/* Decompress enough to encompass the slice and the offset */
288 		if (slicelimit >= 0)
289 			preslice = toast_decompress_datum_slice(tmp, slicelimit);
290 		else
291 			preslice = toast_decompress_datum(tmp);
292 
293 		if (tmp != attr)
294 			pfree(tmp);
295 	}
296 
297 	if (VARATT_IS_SHORT(preslice))
298 	{
299 		attrdata = VARDATA_SHORT(preslice);
300 		attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT;
301 	}
302 	else
303 	{
304 		attrdata = VARDATA(preslice);
305 		attrsize = VARSIZE(preslice) - VARHDRSZ;
306 	}
307 
308 	/* slicing of datum for compressed cases and plain value */
309 
310 	if (sliceoffset >= attrsize)
311 	{
312 		sliceoffset = 0;
313 		slicelength = 0;
314 	}
315 	else if (slicelength < 0 || slicelimit > attrsize)
316 		slicelength = attrsize - sliceoffset;
317 
318 	result = (struct varlena *) palloc(slicelength + VARHDRSZ);
319 	SET_VARSIZE(result, slicelength + VARHDRSZ);
320 
321 	memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
322 
323 	if (preslice != attr)
324 		pfree(preslice);
325 
326 	return result;
327 }
328 
329 /* ----------
330  * toast_fetch_datum -
331  *
332  *	Reconstruct an in memory Datum from the chunks saved
333  *	in the toast relation
334  * ----------
335  */
336 static struct varlena *
toast_fetch_datum(struct varlena * attr)337 toast_fetch_datum(struct varlena *attr)
338 {
339 	Relation	toastrel;
340 	struct varlena *result;
341 	struct varatt_external toast_pointer;
342 	int32		attrsize;
343 
344 	if (!VARATT_IS_EXTERNAL_ONDISK(attr))
345 		elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
346 
347 	/* Must copy to access aligned fields */
348 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
349 
350 	attrsize = toast_pointer.va_extsize;
351 
352 	result = (struct varlena *) palloc(attrsize + VARHDRSZ);
353 
354 	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
355 		SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ);
356 	else
357 		SET_VARSIZE(result, attrsize + VARHDRSZ);
358 
359 	if (attrsize == 0)
360 		return result;			/* Probably shouldn't happen, but just in
361 								 * case. */
362 
363 	/*
364 	 * Open the toast relation and its indexes
365 	 */
366 	toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
367 
368 	/* Fetch all chunks */
369 	table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
370 									 attrsize, 0, attrsize, result);
371 
372 	/* Close toast table */
373 	table_close(toastrel, AccessShareLock);
374 
375 	return result;
376 }
377 
378 /* ----------
379  * toast_fetch_datum_slice -
380  *
381  *	Reconstruct a segment of a Datum from the chunks saved
382  *	in the toast relation
383  *
384  *	Note that this function supports non-compressed external datums
385  *	and compressed external datums (in which case the requested slice
386  *	has to be a prefix, i.e. sliceoffset has to be 0).
387  * ----------
388  */
389 static struct varlena *
toast_fetch_datum_slice(struct varlena * attr,int32 sliceoffset,int32 slicelength)390 toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
391 						int32 slicelength)
392 {
393 	Relation	toastrel;
394 	struct varlena *result;
395 	struct varatt_external toast_pointer;
396 	int32		attrsize;
397 
398 	if (!VARATT_IS_EXTERNAL_ONDISK(attr))
399 		elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
400 
401 	/* Must copy to access aligned fields */
402 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
403 
404 	/*
405 	 * It's nonsense to fetch slices of a compressed datum unless when it's a
406 	 * prefix -- this isn't lo_* we can't return a compressed datum which is
407 	 * meaningful to toast later.
408 	 */
409 	Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset);
410 
411 	attrsize = toast_pointer.va_extsize;
412 
413 	if (sliceoffset >= attrsize)
414 	{
415 		sliceoffset = 0;
416 		slicelength = 0;
417 	}
418 
419 	/*
420 	 * When fetching a prefix of a compressed external datum, account for the
421 	 * rawsize tracking amount of raw data, which is stored at the beginning
422 	 * as an int32 value).
423 	 */
424 	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && slicelength > 0)
425 		slicelength = slicelength + sizeof(int32);
426 
427 	/*
428 	 * Adjust length request if needed.  (Note: our sole caller,
429 	 * detoast_attr_slice, protects us against sliceoffset + slicelength
430 	 * overflowing.)
431 	 */
432 	if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
433 		slicelength = attrsize - sliceoffset;
434 
435 	result = (struct varlena *) palloc(slicelength + VARHDRSZ);
436 
437 	if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
438 		SET_VARSIZE_COMPRESSED(result, slicelength + VARHDRSZ);
439 	else
440 		SET_VARSIZE(result, slicelength + VARHDRSZ);
441 
442 	if (slicelength == 0)
443 		return result;			/* Can save a lot of work at this point! */
444 
445 	/* Open the toast relation */
446 	toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
447 
448 	/* Fetch all chunks */
449 	table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
450 									 attrsize, sliceoffset, slicelength,
451 									 result);
452 
453 	/* Close toast table */
454 	table_close(toastrel, AccessShareLock);
455 
456 	return result;
457 }
458 
459 /* ----------
460  * toast_decompress_datum -
461  *
462  * Decompress a compressed version of a varlena datum
463  */
464 static struct varlena *
toast_decompress_datum(struct varlena * attr)465 toast_decompress_datum(struct varlena *attr)
466 {
467 	struct varlena *result;
468 
469 	Assert(VARATT_IS_COMPRESSED(attr));
470 
471 	result = (struct varlena *)
472 		palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
473 	SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
474 
475 	if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
476 						TOAST_COMPRESS_SIZE(attr),
477 						VARDATA(result),
478 						TOAST_COMPRESS_RAWSIZE(attr), true) < 0)
479 		elog(ERROR, "compressed data is corrupted");
480 
481 	return result;
482 }
483 
484 
485 /* ----------
486  * toast_decompress_datum_slice -
487  *
488  * Decompress the front of a compressed version of a varlena datum.
489  * offset handling happens in detoast_attr_slice.
490  * Here we just decompress a slice from the front.
491  */
492 static struct varlena *
toast_decompress_datum_slice(struct varlena * attr,int32 slicelength)493 toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
494 {
495 	struct varlena *result;
496 	int32		rawsize;
497 
498 	Assert(VARATT_IS_COMPRESSED(attr));
499 
500 	result = (struct varlena *) palloc(slicelength + VARHDRSZ);
501 
502 	rawsize = pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
503 							  VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
504 							  VARDATA(result),
505 							  slicelength, false);
506 	if (rawsize < 0)
507 		elog(ERROR, "compressed data is corrupted");
508 
509 	SET_VARSIZE(result, rawsize + VARHDRSZ);
510 	return result;
511 }
512 
513 /* ----------
514  * toast_raw_datum_size -
515  *
516  *	Return the raw (detoasted) size of a varlena datum
517  *	(including the VARHDRSZ header)
518  * ----------
519  */
520 Size
toast_raw_datum_size(Datum value)521 toast_raw_datum_size(Datum value)
522 {
523 	struct varlena *attr = (struct varlena *) DatumGetPointer(value);
524 	Size		result;
525 
526 	if (VARATT_IS_EXTERNAL_ONDISK(attr))
527 	{
528 		/* va_rawsize is the size of the original datum -- including header */
529 		struct varatt_external toast_pointer;
530 
531 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
532 		result = toast_pointer.va_rawsize;
533 	}
534 	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
535 	{
536 		struct varatt_indirect toast_pointer;
537 
538 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
539 
540 		/* nested indirect Datums aren't allowed */
541 		Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer));
542 
543 		return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
544 	}
545 	else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
546 	{
547 		result = EOH_get_flat_size(DatumGetEOHP(value));
548 	}
549 	else if (VARATT_IS_COMPRESSED(attr))
550 	{
551 		/* here, va_rawsize is just the payload size */
552 		result = VARRAWSIZE_4B_C(attr) + VARHDRSZ;
553 	}
554 	else if (VARATT_IS_SHORT(attr))
555 	{
556 		/*
557 		 * we have to normalize the header length to VARHDRSZ or else the
558 		 * callers of this function will be confused.
559 		 */
560 		result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ;
561 	}
562 	else
563 	{
564 		/* plain untoasted datum */
565 		result = VARSIZE(attr);
566 	}
567 	return result;
568 }
569 
570 /* ----------
571  * toast_datum_size
572  *
573  *	Return the physical storage size (possibly compressed) of a varlena datum
574  * ----------
575  */
576 Size
toast_datum_size(Datum value)577 toast_datum_size(Datum value)
578 {
579 	struct varlena *attr = (struct varlena *) DatumGetPointer(value);
580 	Size		result;
581 
582 	if (VARATT_IS_EXTERNAL_ONDISK(attr))
583 	{
584 		/*
585 		 * Attribute is stored externally - return the extsize whether
586 		 * compressed or not.  We do not count the size of the toast pointer
587 		 * ... should we?
588 		 */
589 		struct varatt_external toast_pointer;
590 
591 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
592 		result = toast_pointer.va_extsize;
593 	}
594 	else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
595 	{
596 		struct varatt_indirect toast_pointer;
597 
598 		VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
599 
600 		/* nested indirect Datums aren't allowed */
601 		Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
602 
603 		return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
604 	}
605 	else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
606 	{
607 		result = EOH_get_flat_size(DatumGetEOHP(value));
608 	}
609 	else if (VARATT_IS_SHORT(attr))
610 	{
611 		result = VARSIZE_SHORT(attr);
612 	}
613 	else
614 	{
615 		/*
616 		 * Attribute is stored inline either compressed or not, just calculate
617 		 * the size of the datum in either case.
618 		 */
619 		result = VARSIZE(attr);
620 	}
621 	return result;
622 }
623