1 /*-------------------------------------------------------------------------
2 *
3 * detoast.c
4 * Retrieve compressed or external variable size attributes.
5 *
6 * Copyright (c) 2000-2020, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/access/common/detoast.c
10 *
11 *-------------------------------------------------------------------------
12 */
13
14 #include "postgres.h"
15
16 #include "access/detoast.h"
17 #include "access/table.h"
18 #include "access/tableam.h"
19 #include "access/toast_internals.h"
20 #include "common/int.h"
21 #include "common/pg_lzcompress.h"
22 #include "utils/expandeddatum.h"
23 #include "utils/rel.h"
24
25 static struct varlena *toast_fetch_datum(struct varlena *attr);
26 static struct varlena *toast_fetch_datum_slice(struct varlena *attr,
27 int32 sliceoffset,
28 int32 slicelength);
29 static struct varlena *toast_decompress_datum(struct varlena *attr);
30 static struct varlena *toast_decompress_datum_slice(struct varlena *attr, int32 slicelength);
31
32 /* ----------
33 * detoast_external_attr -
34 *
35 * Public entry point to get back a toasted value from
36 * external source (possibly still in compressed format).
37 *
38 * This will return a datum that contains all the data internally, ie, not
39 * relying on external storage or memory, but it can still be compressed or
40 * have a short header. Note some callers assume that if the input is an
41 * EXTERNAL datum, the result will be a pfree'able chunk.
42 * ----------
43 */
44 struct varlena *
detoast_external_attr(struct varlena * attr)45 detoast_external_attr(struct varlena *attr)
46 {
47 struct varlena *result;
48
49 if (VARATT_IS_EXTERNAL_ONDISK(attr))
50 {
51 /*
52 * This is an external stored plain value
53 */
54 result = toast_fetch_datum(attr);
55 }
56 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
57 {
58 /*
59 * This is an indirect pointer --- dereference it
60 */
61 struct varatt_indirect redirect;
62
63 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
64 attr = (struct varlena *) redirect.pointer;
65
66 /* nested indirect Datums aren't allowed */
67 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
68
69 /* recurse if value is still external in some other way */
70 if (VARATT_IS_EXTERNAL(attr))
71 return detoast_external_attr(attr);
72
73 /*
74 * Copy into the caller's memory context, in case caller tries to
75 * pfree the result.
76 */
77 result = (struct varlena *) palloc(VARSIZE_ANY(attr));
78 memcpy(result, attr, VARSIZE_ANY(attr));
79 }
80 else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
81 {
82 /*
83 * This is an expanded-object pointer --- get flat format
84 */
85 ExpandedObjectHeader *eoh;
86 Size resultsize;
87
88 eoh = DatumGetEOHP(PointerGetDatum(attr));
89 resultsize = EOH_get_flat_size(eoh);
90 result = (struct varlena *) palloc(resultsize);
91 EOH_flatten_into(eoh, (void *) result, resultsize);
92 }
93 else
94 {
95 /*
96 * This is a plain value inside of the main tuple - why am I called?
97 */
98 result = attr;
99 }
100
101 return result;
102 }
103
104
105 /* ----------
106 * detoast_attr -
107 *
108 * Public entry point to get back a toasted value from compression
109 * or external storage. The result is always non-extended varlena form.
110 *
111 * Note some callers assume that if the input is an EXTERNAL or COMPRESSED
112 * datum, the result will be a pfree'able chunk.
113 * ----------
114 */
115 struct varlena *
detoast_attr(struct varlena * attr)116 detoast_attr(struct varlena *attr)
117 {
118 if (VARATT_IS_EXTERNAL_ONDISK(attr))
119 {
120 /*
121 * This is an externally stored datum --- fetch it back from there
122 */
123 attr = toast_fetch_datum(attr);
124 /* If it's compressed, decompress it */
125 if (VARATT_IS_COMPRESSED(attr))
126 {
127 struct varlena *tmp = attr;
128
129 attr = toast_decompress_datum(tmp);
130 pfree(tmp);
131 }
132 }
133 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
134 {
135 /*
136 * This is an indirect pointer --- dereference it
137 */
138 struct varatt_indirect redirect;
139
140 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
141 attr = (struct varlena *) redirect.pointer;
142
143 /* nested indirect Datums aren't allowed */
144 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
145
146 /* recurse in case value is still extended in some other way */
147 attr = detoast_attr(attr);
148
149 /* if it isn't, we'd better copy it */
150 if (attr == (struct varlena *) redirect.pointer)
151 {
152 struct varlena *result;
153
154 result = (struct varlena *) palloc(VARSIZE_ANY(attr));
155 memcpy(result, attr, VARSIZE_ANY(attr));
156 attr = result;
157 }
158 }
159 else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
160 {
161 /*
162 * This is an expanded-object pointer --- get flat format
163 */
164 attr = detoast_external_attr(attr);
165 /* flatteners are not allowed to produce compressed/short output */
166 Assert(!VARATT_IS_EXTENDED(attr));
167 }
168 else if (VARATT_IS_COMPRESSED(attr))
169 {
170 /*
171 * This is a compressed value inside of the main tuple
172 */
173 attr = toast_decompress_datum(attr);
174 }
175 else if (VARATT_IS_SHORT(attr))
176 {
177 /*
178 * This is a short-header varlena --- convert to 4-byte header format
179 */
180 Size data_size = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT;
181 Size new_size = data_size + VARHDRSZ;
182 struct varlena *new_attr;
183
184 new_attr = (struct varlena *) palloc(new_size);
185 SET_VARSIZE(new_attr, new_size);
186 memcpy(VARDATA(new_attr), VARDATA_SHORT(attr), data_size);
187 attr = new_attr;
188 }
189
190 return attr;
191 }
192
193
194 /* ----------
195 * detoast_attr_slice -
196 *
197 * Public entry point to get back part of a toasted value
198 * from compression or external storage.
199 *
200 * sliceoffset is where to start (zero or more)
201 * If slicelength < 0, return everything beyond sliceoffset
202 * ----------
203 */
204 struct varlena *
detoast_attr_slice(struct varlena * attr,int32 sliceoffset,int32 slicelength)205 detoast_attr_slice(struct varlena *attr,
206 int32 sliceoffset, int32 slicelength)
207 {
208 struct varlena *preslice;
209 struct varlena *result;
210 char *attrdata;
211 int32 slicelimit;
212 int32 attrsize;
213
214 if (sliceoffset < 0)
215 elog(ERROR, "invalid sliceoffset: %d", sliceoffset);
216
217 /*
218 * Compute slicelimit = offset + length, or -1 if we must fetch all of the
219 * value. In case of integer overflow, we must fetch all.
220 */
221 if (slicelength < 0)
222 slicelimit = -1;
223 else if (pg_add_s32_overflow(sliceoffset, slicelength, &slicelimit))
224 slicelength = slicelimit = -1;
225
226 if (VARATT_IS_EXTERNAL_ONDISK(attr))
227 {
228 struct varatt_external toast_pointer;
229
230 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
231
232 /* fast path for non-compressed external datums */
233 if (!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
234 return toast_fetch_datum_slice(attr, sliceoffset, slicelength);
235
236 /*
237 * For compressed values, we need to fetch enough slices to decompress
238 * at least the requested part (when a prefix is requested).
239 * Otherwise, just fetch all slices.
240 */
241 if (slicelimit >= 0)
242 {
243 int32 max_size;
244
245 /*
246 * Determine maximum amount of compressed data needed for a prefix
247 * of a given length (after decompression).
248 */
249 max_size = pglz_maximum_compressed_size(slicelimit,
250 toast_pointer.va_extsize);
251
252 /*
253 * Fetch enough compressed slices (compressed marker will get set
254 * automatically).
255 */
256 preslice = toast_fetch_datum_slice(attr, 0, max_size);
257 }
258 else
259 preslice = toast_fetch_datum(attr);
260 }
261 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
262 {
263 struct varatt_indirect redirect;
264
265 VARATT_EXTERNAL_GET_POINTER(redirect, attr);
266
267 /* nested indirect Datums aren't allowed */
268 Assert(!VARATT_IS_EXTERNAL_INDIRECT(redirect.pointer));
269
270 return detoast_attr_slice(redirect.pointer,
271 sliceoffset, slicelength);
272 }
273 else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
274 {
275 /* pass it off to detoast_external_attr to flatten */
276 preslice = detoast_external_attr(attr);
277 }
278 else
279 preslice = attr;
280
281 Assert(!VARATT_IS_EXTERNAL(preslice));
282
283 if (VARATT_IS_COMPRESSED(preslice))
284 {
285 struct varlena *tmp = preslice;
286
287 /* Decompress enough to encompass the slice and the offset */
288 if (slicelimit >= 0)
289 preslice = toast_decompress_datum_slice(tmp, slicelimit);
290 else
291 preslice = toast_decompress_datum(tmp);
292
293 if (tmp != attr)
294 pfree(tmp);
295 }
296
297 if (VARATT_IS_SHORT(preslice))
298 {
299 attrdata = VARDATA_SHORT(preslice);
300 attrsize = VARSIZE_SHORT(preslice) - VARHDRSZ_SHORT;
301 }
302 else
303 {
304 attrdata = VARDATA(preslice);
305 attrsize = VARSIZE(preslice) - VARHDRSZ;
306 }
307
308 /* slicing of datum for compressed cases and plain value */
309
310 if (sliceoffset >= attrsize)
311 {
312 sliceoffset = 0;
313 slicelength = 0;
314 }
315 else if (slicelength < 0 || slicelimit > attrsize)
316 slicelength = attrsize - sliceoffset;
317
318 result = (struct varlena *) palloc(slicelength + VARHDRSZ);
319 SET_VARSIZE(result, slicelength + VARHDRSZ);
320
321 memcpy(VARDATA(result), attrdata + sliceoffset, slicelength);
322
323 if (preslice != attr)
324 pfree(preslice);
325
326 return result;
327 }
328
329 /* ----------
330 * toast_fetch_datum -
331 *
332 * Reconstruct an in memory Datum from the chunks saved
333 * in the toast relation
334 * ----------
335 */
336 static struct varlena *
toast_fetch_datum(struct varlena * attr)337 toast_fetch_datum(struct varlena *attr)
338 {
339 Relation toastrel;
340 struct varlena *result;
341 struct varatt_external toast_pointer;
342 int32 attrsize;
343
344 if (!VARATT_IS_EXTERNAL_ONDISK(attr))
345 elog(ERROR, "toast_fetch_datum shouldn't be called for non-ondisk datums");
346
347 /* Must copy to access aligned fields */
348 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
349
350 attrsize = toast_pointer.va_extsize;
351
352 result = (struct varlena *) palloc(attrsize + VARHDRSZ);
353
354 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
355 SET_VARSIZE_COMPRESSED(result, attrsize + VARHDRSZ);
356 else
357 SET_VARSIZE(result, attrsize + VARHDRSZ);
358
359 if (attrsize == 0)
360 return result; /* Probably shouldn't happen, but just in
361 * case. */
362
363 /*
364 * Open the toast relation and its indexes
365 */
366 toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
367
368 /* Fetch all chunks */
369 table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
370 attrsize, 0, attrsize, result);
371
372 /* Close toast table */
373 table_close(toastrel, AccessShareLock);
374
375 return result;
376 }
377
378 /* ----------
379 * toast_fetch_datum_slice -
380 *
381 * Reconstruct a segment of a Datum from the chunks saved
382 * in the toast relation
383 *
384 * Note that this function supports non-compressed external datums
385 * and compressed external datums (in which case the requested slice
386 * has to be a prefix, i.e. sliceoffset has to be 0).
387 * ----------
388 */
389 static struct varlena *
toast_fetch_datum_slice(struct varlena * attr,int32 sliceoffset,int32 slicelength)390 toast_fetch_datum_slice(struct varlena *attr, int32 sliceoffset,
391 int32 slicelength)
392 {
393 Relation toastrel;
394 struct varlena *result;
395 struct varatt_external toast_pointer;
396 int32 attrsize;
397
398 if (!VARATT_IS_EXTERNAL_ONDISK(attr))
399 elog(ERROR, "toast_fetch_datum_slice shouldn't be called for non-ondisk datums");
400
401 /* Must copy to access aligned fields */
402 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
403
404 /*
405 * It's nonsense to fetch slices of a compressed datum unless when it's a
406 * prefix -- this isn't lo_* we can't return a compressed datum which is
407 * meaningful to toast later.
408 */
409 Assert(!VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) || 0 == sliceoffset);
410
411 attrsize = toast_pointer.va_extsize;
412
413 if (sliceoffset >= attrsize)
414 {
415 sliceoffset = 0;
416 slicelength = 0;
417 }
418
419 /*
420 * When fetching a prefix of a compressed external datum, account for the
421 * rawsize tracking amount of raw data, which is stored at the beginning
422 * as an int32 value).
423 */
424 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer) && slicelength > 0)
425 slicelength = slicelength + sizeof(int32);
426
427 /*
428 * Adjust length request if needed. (Note: our sole caller,
429 * detoast_attr_slice, protects us against sliceoffset + slicelength
430 * overflowing.)
431 */
432 if (((sliceoffset + slicelength) > attrsize) || slicelength < 0)
433 slicelength = attrsize - sliceoffset;
434
435 result = (struct varlena *) palloc(slicelength + VARHDRSZ);
436
437 if (VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer))
438 SET_VARSIZE_COMPRESSED(result, slicelength + VARHDRSZ);
439 else
440 SET_VARSIZE(result, slicelength + VARHDRSZ);
441
442 if (slicelength == 0)
443 return result; /* Can save a lot of work at this point! */
444
445 /* Open the toast relation */
446 toastrel = table_open(toast_pointer.va_toastrelid, AccessShareLock);
447
448 /* Fetch all chunks */
449 table_relation_fetch_toast_slice(toastrel, toast_pointer.va_valueid,
450 attrsize, sliceoffset, slicelength,
451 result);
452
453 /* Close toast table */
454 table_close(toastrel, AccessShareLock);
455
456 return result;
457 }
458
459 /* ----------
460 * toast_decompress_datum -
461 *
462 * Decompress a compressed version of a varlena datum
463 */
464 static struct varlena *
toast_decompress_datum(struct varlena * attr)465 toast_decompress_datum(struct varlena *attr)
466 {
467 struct varlena *result;
468
469 Assert(VARATT_IS_COMPRESSED(attr));
470
471 result = (struct varlena *)
472 palloc(TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
473 SET_VARSIZE(result, TOAST_COMPRESS_RAWSIZE(attr) + VARHDRSZ);
474
475 if (pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
476 TOAST_COMPRESS_SIZE(attr),
477 VARDATA(result),
478 TOAST_COMPRESS_RAWSIZE(attr), true) < 0)
479 elog(ERROR, "compressed data is corrupted");
480
481 return result;
482 }
483
484
485 /* ----------
486 * toast_decompress_datum_slice -
487 *
488 * Decompress the front of a compressed version of a varlena datum.
489 * offset handling happens in detoast_attr_slice.
490 * Here we just decompress a slice from the front.
491 */
492 static struct varlena *
toast_decompress_datum_slice(struct varlena * attr,int32 slicelength)493 toast_decompress_datum_slice(struct varlena *attr, int32 slicelength)
494 {
495 struct varlena *result;
496 int32 rawsize;
497
498 Assert(VARATT_IS_COMPRESSED(attr));
499
500 result = (struct varlena *) palloc(slicelength + VARHDRSZ);
501
502 rawsize = pglz_decompress(TOAST_COMPRESS_RAWDATA(attr),
503 VARSIZE(attr) - TOAST_COMPRESS_HDRSZ,
504 VARDATA(result),
505 slicelength, false);
506 if (rawsize < 0)
507 elog(ERROR, "compressed data is corrupted");
508
509 SET_VARSIZE(result, rawsize + VARHDRSZ);
510 return result;
511 }
512
513 /* ----------
514 * toast_raw_datum_size -
515 *
516 * Return the raw (detoasted) size of a varlena datum
517 * (including the VARHDRSZ header)
518 * ----------
519 */
520 Size
toast_raw_datum_size(Datum value)521 toast_raw_datum_size(Datum value)
522 {
523 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
524 Size result;
525
526 if (VARATT_IS_EXTERNAL_ONDISK(attr))
527 {
528 /* va_rawsize is the size of the original datum -- including header */
529 struct varatt_external toast_pointer;
530
531 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
532 result = toast_pointer.va_rawsize;
533 }
534 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
535 {
536 struct varatt_indirect toast_pointer;
537
538 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
539
540 /* nested indirect Datums aren't allowed */
541 Assert(!VARATT_IS_EXTERNAL_INDIRECT(toast_pointer.pointer));
542
543 return toast_raw_datum_size(PointerGetDatum(toast_pointer.pointer));
544 }
545 else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
546 {
547 result = EOH_get_flat_size(DatumGetEOHP(value));
548 }
549 else if (VARATT_IS_COMPRESSED(attr))
550 {
551 /* here, va_rawsize is just the payload size */
552 result = VARRAWSIZE_4B_C(attr) + VARHDRSZ;
553 }
554 else if (VARATT_IS_SHORT(attr))
555 {
556 /*
557 * we have to normalize the header length to VARHDRSZ or else the
558 * callers of this function will be confused.
559 */
560 result = VARSIZE_SHORT(attr) - VARHDRSZ_SHORT + VARHDRSZ;
561 }
562 else
563 {
564 /* plain untoasted datum */
565 result = VARSIZE(attr);
566 }
567 return result;
568 }
569
570 /* ----------
571 * toast_datum_size
572 *
573 * Return the physical storage size (possibly compressed) of a varlena datum
574 * ----------
575 */
576 Size
toast_datum_size(Datum value)577 toast_datum_size(Datum value)
578 {
579 struct varlena *attr = (struct varlena *) DatumGetPointer(value);
580 Size result;
581
582 if (VARATT_IS_EXTERNAL_ONDISK(attr))
583 {
584 /*
585 * Attribute is stored externally - return the extsize whether
586 * compressed or not. We do not count the size of the toast pointer
587 * ... should we?
588 */
589 struct varatt_external toast_pointer;
590
591 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
592 result = toast_pointer.va_extsize;
593 }
594 else if (VARATT_IS_EXTERNAL_INDIRECT(attr))
595 {
596 struct varatt_indirect toast_pointer;
597
598 VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
599
600 /* nested indirect Datums aren't allowed */
601 Assert(!VARATT_IS_EXTERNAL_INDIRECT(attr));
602
603 return toast_datum_size(PointerGetDatum(toast_pointer.pointer));
604 }
605 else if (VARATT_IS_EXTERNAL_EXPANDED(attr))
606 {
607 result = EOH_get_flat_size(DatumGetEOHP(value));
608 }
609 else if (VARATT_IS_SHORT(attr))
610 {
611 result = VARSIZE_SHORT(attr);
612 }
613 else
614 {
615 /*
616 * Attribute is stored inline either compressed or not, just calculate
617 * the size of the datum in either case.
618 */
619 result = VARSIZE(attr);
620 }
621 return result;
622 }
623