1 /*-------------------------------------------------------------------------
2  *
3  * toast_internals.c
4  *	  Functions for internal use by the TOAST system.
5  *
6  * Copyright (c) 2000-2021, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *	  src/backend/access/common/toast_internals.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 
14 #include "postgres.h"
15 
16 #include "access/detoast.h"
17 #include "access/genam.h"
18 #include "access/heapam.h"
19 #include "access/heaptoast.h"
20 #include "access/table.h"
21 #include "access/toast_internals.h"
22 #include "access/xact.h"
23 #include "catalog/catalog.h"
24 #include "common/pg_lzcompress.h"
25 #include "miscadmin.h"
26 #include "utils/fmgroids.h"
27 #include "utils/rel.h"
28 #include "utils/snapmgr.h"
29 
30 static bool toastrel_valueid_exists(Relation toastrel, Oid valueid);
31 static bool toastid_valueid_exists(Oid toastrelid, Oid valueid);
32 
33 /* ----------
34  * toast_compress_datum -
35  *
36  *	Create a compressed version of a varlena datum
37  *
38  *	If we fail (ie, compressed result is actually bigger than original)
39  *	then return NULL.  We must not use compressed data if it'd expand
40  *	the tuple!
41  *
42  *	We use VAR{SIZE,DATA}_ANY so we can handle short varlenas here without
43  *	copying them.  But we can't handle external or compressed datums.
44  * ----------
45  */
46 Datum
47 toast_compress_datum(Datum value, char cmethod)
48 {
49 	struct varlena *tmp = NULL;
50 	int32		valsize;
51 	ToastCompressionId cmid = TOAST_INVALID_COMPRESSION_ID;
52 
53 	Assert(!VARATT_IS_EXTERNAL(DatumGetPointer(value)));
54 	Assert(!VARATT_IS_COMPRESSED(DatumGetPointer(value)));
55 
56 	valsize = VARSIZE_ANY_EXHDR(DatumGetPointer(value));
57 
58 	/* If the compression method is not valid, use the current default */
59 	if (!CompressionMethodIsValid(cmethod))
60 		cmethod = default_toast_compression;
61 
62 	/*
63 	 * Call appropriate compression routine for the compression method.
64 	 */
65 	switch (cmethod)
66 	{
67 		case TOAST_PGLZ_COMPRESSION:
68 			tmp = pglz_compress_datum((const struct varlena *) value);
69 			cmid = TOAST_PGLZ_COMPRESSION_ID;
70 			break;
71 		case TOAST_LZ4_COMPRESSION:
72 			tmp = lz4_compress_datum((const struct varlena *) value);
73 			cmid = TOAST_LZ4_COMPRESSION_ID;
74 			break;
75 		default:
76 			elog(ERROR, "invalid compression method %c", cmethod);
77 	}
78 
79 	if (tmp == NULL)
80 		return PointerGetDatum(NULL);
81 
82 	/*
83 	 * We recheck the actual size even if compression reports success, because
84 	 * it might be satisfied with having saved as little as one byte in the
85 	 * compressed data --- which could turn into a net loss once you consider
86 	 * header and alignment padding.  Worst case, the compressed format might
87 	 * require three padding bytes (plus header, which is included in
88 	 * VARSIZE(tmp)), whereas the uncompressed format would take only one
89 	 * header byte and no padding if the value is short enough.  So we insist
90 	 * on a savings of more than 2 bytes to ensure we have a gain.
91 	 */
92 	if (VARSIZE(tmp) < valsize - 2)
93 	{
94 		/* successful compression */
95 		Assert(cmid != TOAST_INVALID_COMPRESSION_ID);
96 		TOAST_COMPRESS_SET_SIZE_AND_COMPRESS_METHOD(tmp, valsize, cmid);
97 		return PointerGetDatum(tmp);
98 	}
99 	else
100 	{
101 		/* incompressible data */
102 		pfree(tmp);
103 		return PointerGetDatum(NULL);
104 	}
105 }
106 
107 /* ----------
108  * toast_save_datum -
109  *
110  *	Save one single datum into the secondary relation and return
111  *	a Datum reference for it.
112  *
113  * rel: the main relation we're working with (not the toast rel!)
114  * value: datum to be pushed to toast storage
115  * oldexternal: if not NULL, toast pointer previously representing the datum
116  * options: options to be passed to heap_insert() for toast rows
117  * ----------
118  */
119 Datum
120 toast_save_datum(Relation rel, Datum value,
121 				 struct varlena *oldexternal, int options)
122 {
123 	Relation	toastrel;
124 	Relation   *toastidxs;
125 	HeapTuple	toasttup;
SyncScanShmemSize(void)126 	TupleDesc	toasttupDesc;
127 	Datum		t_values[3];
128 	bool		t_isnull[3];
129 	CommandId	mycid = GetCurrentCommandId(true);
130 	struct varlena *result;
131 	struct varatt_external toast_pointer;
132 	union
133 	{
134 		struct varlena hdr;
SyncScanShmemInit(void)135 		/* this is to make the union big enough for a chunk: */
136 		char		data[TOAST_MAX_CHUNK_SIZE + VARHDRSZ];
137 		/* ensure union is aligned well enough: */
138 		int32		align_it;
139 	}			chunk_data;
140 	int32		chunk_size;
141 	int32		chunk_seq = 0;
142 	char	   *data_p;
143 	int32		data_todo;
144 	Pointer		dval = DatumGetPointer(value);
145 	int			num_indexes;
146 	int			validIndex;
147 
148 	Assert(!VARATT_IS_EXTERNAL(value));
149 
150 	/*
151 	 * Open the toast relation and its indexes.  We can use the index to check
152 	 * uniqueness of the OID we assign to the toasted item, even though it has
153 	 * additional columns besides OID.
154 	 */
155 	toastrel = table_open(rel->rd_rel->reltoastrelid, RowExclusiveLock);
156 	toasttupDesc = toastrel->rd_att;
157 
158 	/* Open all the toast indexes and look for the valid one */
159 	validIndex = toast_open_indexes(toastrel,
160 									RowExclusiveLock,
161 									&toastidxs,
162 									&num_indexes);
163 
164 	/*
165 	 * Get the data pointer and length, and compute va_rawsize and va_extinfo.
166 	 *
167 	 * va_rawsize is the size of the equivalent fully uncompressed datum, so
168 	 * we have to adjust for short headers.
169 	 *
170 	 * va_extinfo stored the actual size of the data payload in the toast
171 	 * records and the compression method in first 2 bits if data is
172 	 * compressed.
173 	 */
174 	if (VARATT_IS_SHORT(dval))
175 	{
176 		data_p = VARDATA_SHORT(dval);
177 		data_todo = VARSIZE_SHORT(dval) - VARHDRSZ_SHORT;
178 		toast_pointer.va_rawsize = data_todo + VARHDRSZ;	/* as if not short */
179 		toast_pointer.va_extinfo = data_todo;
180 	}
181 	else if (VARATT_IS_COMPRESSED(dval))
182 	{
183 		data_p = VARDATA(dval);
184 		data_todo = VARSIZE(dval) - VARHDRSZ;
185 		/* rawsize in a compressed datum is just the size of the payload */
186 		toast_pointer.va_rawsize = VARDATA_COMPRESSED_GET_EXTSIZE(dval) + VARHDRSZ;
187 
188 		/* set external size and compression method */
189 		VARATT_EXTERNAL_SET_SIZE_AND_COMPRESS_METHOD(toast_pointer, data_todo,
190 													 VARDATA_COMPRESSED_GET_COMPRESS_METHOD(dval));
ss_search(RelFileNode relfilenode,BlockNumber location,bool set)191 		/* Assert that the numbers look like it's compressed */
192 		Assert(VARATT_EXTERNAL_IS_COMPRESSED(toast_pointer));
193 	}
194 	else
195 	{
196 		data_p = VARDATA(dval);
197 		data_todo = VARSIZE(dval) - VARHDRSZ;
198 		toast_pointer.va_rawsize = VARSIZE(dval);
199 		toast_pointer.va_extinfo = data_todo;
200 	}
201 
202 	/*
203 	 * Insert the correct table OID into the result TOAST pointer.
204 	 *
205 	 * Normally this is the actual OID of the target toast table, but during
206 	 * table-rewriting operations such as CLUSTER, we have to insert the OID
207 	 * of the table's real permanent toast table instead.  rd_toastoid is set
208 	 * if we have to substitute such an OID.
209 	 */
210 	if (OidIsValid(rel->rd_toastoid))
211 		toast_pointer.va_toastrelid = rel->rd_toastoid;
212 	else
213 		toast_pointer.va_toastrelid = RelationGetRelid(toastrel);
214 
215 	/*
216 	 * Choose an OID to use as the value ID for this toast value.
217 	 *
218 	 * Normally we just choose an unused OID within the toast table.  But
219 	 * during table-rewriting operations where we are preserving an existing
220 	 * toast table OID, we want to preserve toast value OIDs too.  So, if
221 	 * rd_toastoid is set and we had a prior external value from that same
222 	 * toast table, re-use its value ID.  If we didn't have a prior external
223 	 * value (which is a corner case, but possible if the table's attstorage
224 	 * options have been changed), we have to pick a value ID that doesn't
225 	 * conflict with either new or existing toast value OIDs.
226 	 */
227 	if (!OidIsValid(rel->rd_toastoid))
228 	{
229 		/* normal case: just choose an unused OID */
230 		toast_pointer.va_valueid =
231 			GetNewOidWithIndex(toastrel,
232 							   RelationGetRelid(toastidxs[validIndex]),
233 							   (AttrNumber) 1);
234 	}
235 	else
236 	{
237 		/* rewrite case: check to see if value was in old toast table */
238 		toast_pointer.va_valueid = InvalidOid;
239 		if (oldexternal != NULL)
240 		{
241 			struct varatt_external old_toast_pointer;
242 
243 			Assert(VARATT_IS_EXTERNAL_ONDISK(oldexternal));
244 			/* Must copy to access aligned fields */
245 			VARATT_EXTERNAL_GET_POINTER(old_toast_pointer, oldexternal);
246 			if (old_toast_pointer.va_toastrelid == rel->rd_toastoid)
247 			{
248 				/* This value came from the old toast table; reuse its OID */
249 				toast_pointer.va_valueid = old_toast_pointer.va_valueid;
250 
251 				/*
252 				 * There is a corner case here: the table rewrite might have
ss_get_location(Relation rel,BlockNumber relnblocks)253 				 * to copy both live and recently-dead versions of a row, and
254 				 * those versions could easily reference the same toast value.
255 				 * When we copy the second or later version of such a row,
256 				 * reusing the OID will mean we select an OID that's already
257 				 * in the new toast table.  Check for that, and if so, just
258 				 * fall through without writing the data again.
259 				 *
260 				 * While annoying and ugly-looking, this is a good thing
261 				 * because it ensures that we wind up with only one copy of
262 				 * the toast value when there is only one copy in the old
263 				 * toast table.  Before we detected this case, we'd have made
264 				 * multiple copies, wasting space; and what's worse, the
265 				 * copies belonging to already-deleted heap tuples would not
266 				 * be reclaimed by VACUUM.
267 				 */
268 				if (toastrel_valueid_exists(toastrel,
269 											toast_pointer.va_valueid))
270 				{
271 					/* Match, so short-circuit the data storage loop below */
272 					data_todo = 0;
273 				}
274 			}
275 		}
276 		if (toast_pointer.va_valueid == InvalidOid)
277 		{
278 			/*
279 			 * new value; must choose an OID that doesn't conflict in either
280 			 * old or new toast table
281 			 */
282 			do
283 			{
284 				toast_pointer.va_valueid =
285 					GetNewOidWithIndex(toastrel,
286 									   RelationGetRelid(toastidxs[validIndex]),
287 									   (AttrNumber) 1);
ss_report_location(Relation rel,BlockNumber location)288 			} while (toastid_valueid_exists(rel->rd_toastoid,
289 											toast_pointer.va_valueid));
290 		}
291 	}
292 
293 	/*
294 	 * Initialize constant parts of the tuple data
295 	 */
296 	t_values[0] = ObjectIdGetDatum(toast_pointer.va_valueid);
297 	t_values[2] = PointerGetDatum(&chunk_data);
298 	t_isnull[0] = false;
299 	t_isnull[1] = false;
300 	t_isnull[2] = false;
301 
302 	/*
303 	 * Split up the item into chunks
304 	 */
305 	while (data_todo > 0)
306 	{
307 		int			i;
308 
309 		CHECK_FOR_INTERRUPTS();
310 
311 		/*
312 		 * Calculate the size of this chunk
313 		 */
314 		chunk_size = Min(TOAST_MAX_CHUNK_SIZE, data_todo);
315 
316 		/*
317 		 * Build a tuple and store it
318 		 */
319 		t_values[1] = Int32GetDatum(chunk_seq++);
320 		SET_VARSIZE(&chunk_data, chunk_size + VARHDRSZ);
321 		memcpy(VARDATA(&chunk_data), data_p, chunk_size);
322 		toasttup = heap_form_tuple(toasttupDesc, t_values, t_isnull);
323 
324 		heap_insert(toastrel, toasttup, mycid, options, NULL);
325 
326 		/*
327 		 * Create the index entry.  We cheat a little here by not using
328 		 * FormIndexDatum: this relies on the knowledge that the index columns
329 		 * are the same as the initial columns of the table for all the
330 		 * indexes.  We also cheat by not providing an IndexInfo: this is okay
331 		 * for now because btree doesn't need one, but we might have to be
332 		 * more honest someday.
333 		 *
334 		 * Note also that there had better not be any user-created index on
335 		 * the TOAST table, since we don't bother to update anything else.
336 		 */
337 		for (i = 0; i < num_indexes; i++)
338 		{
339 			/* Only index relations marked as ready can be updated */
340 			if (toastidxs[i]->rd_index->indisready)
341 				index_insert(toastidxs[i], t_values, t_isnull,
342 							 &(toasttup->t_self),
343 							 toastrel,
344 							 toastidxs[i]->rd_index->indisunique ?
345 							 UNIQUE_CHECK_YES : UNIQUE_CHECK_NO,
346 							 false, NULL);
347 		}
348 
349 		/*
350 		 * Free memory
351 		 */
352 		heap_freetuple(toasttup);
353 
354 		/*
355 		 * Move on to next chunk
356 		 */
357 		data_todo -= chunk_size;
358 		data_p += chunk_size;
359 	}
360 
361 	/*
362 	 * Done - close toast relation and its indexes
363 	 */
364 	toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
365 	table_close(toastrel, RowExclusiveLock);
366 
367 	/*
368 	 * Create the TOAST pointer value that we'll return
369 	 */
370 	result = (struct varlena *) palloc(TOAST_POINTER_SIZE);
371 	SET_VARTAG_EXTERNAL(result, VARTAG_ONDISK);
372 	memcpy(VARDATA_EXTERNAL(result), &toast_pointer, sizeof(toast_pointer));
373 
374 	return PointerGetDatum(result);
375 }
376 
377 /* ----------
378  * toast_delete_datum -
379  *
380  *	Delete a single external stored value.
381  * ----------
382  */
383 void
384 toast_delete_datum(Relation rel, Datum value, bool is_speculative)
385 {
386 	struct varlena *attr = (struct varlena *) DatumGetPointer(value);
387 	struct varatt_external toast_pointer;
388 	Relation	toastrel;
389 	Relation   *toastidxs;
390 	ScanKeyData toastkey;
391 	SysScanDesc toastscan;
392 	HeapTuple	toasttup;
393 	int			num_indexes;
394 	int			validIndex;
395 	SnapshotData SnapshotToast;
396 
397 	if (!VARATT_IS_EXTERNAL_ONDISK(attr))
398 		return;
399 
400 	/* Must copy to access aligned fields */
401 	VARATT_EXTERNAL_GET_POINTER(toast_pointer, attr);
402 
403 	/*
404 	 * Open the toast relation and its indexes
405 	 */
406 	toastrel = table_open(toast_pointer.va_toastrelid, RowExclusiveLock);
407 
408 	/* Fetch valid relation used for process */
409 	validIndex = toast_open_indexes(toastrel,
410 									RowExclusiveLock,
411 									&toastidxs,
412 									&num_indexes);
413 
414 	/*
415 	 * Setup a scan key to find chunks with matching va_valueid
416 	 */
417 	ScanKeyInit(&toastkey,
418 				(AttrNumber) 1,
419 				BTEqualStrategyNumber, F_OIDEQ,
420 				ObjectIdGetDatum(toast_pointer.va_valueid));
421 
422 	/*
423 	 * Find all the chunks.  (We don't actually care whether we see them in
424 	 * sequence or not, but since we've already locked the index we might as
425 	 * well use systable_beginscan_ordered.)
426 	 */
427 	init_toast_snapshot(&SnapshotToast);
428 	toastscan = systable_beginscan_ordered(toastrel, toastidxs[validIndex],
429 										   &SnapshotToast, 1, &toastkey);
430 	while ((toasttup = systable_getnext_ordered(toastscan, ForwardScanDirection)) != NULL)
431 	{
432 		/*
433 		 * Have a chunk, delete it
434 		 */
435 		if (is_speculative)
436 			heap_abort_speculative(toastrel, &toasttup->t_self);
437 		else
438 			simple_heap_delete(toastrel, &toasttup->t_self);
439 	}
440 
441 	/*
442 	 * End scan and close relations
443 	 */
444 	systable_endscan_ordered(toastscan);
445 	toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
446 	table_close(toastrel, RowExclusiveLock);
447 }
448 
449 /* ----------
450  * toastrel_valueid_exists -
451  *
452  *	Test whether a toast value with the given ID exists in the toast relation.
453  *	For safety, we consider a value to exist if there are either live or dead
454  *	toast rows with that ID; see notes for GetNewOidWithIndex().
455  * ----------
456  */
457 static bool
458 toastrel_valueid_exists(Relation toastrel, Oid valueid)
459 {
460 	bool		result = false;
461 	ScanKeyData toastkey;
462 	SysScanDesc toastscan;
463 	int			num_indexes;
464 	int			validIndex;
465 	Relation   *toastidxs;
466 
467 	/* Fetch a valid index relation */
468 	validIndex = toast_open_indexes(toastrel,
469 									RowExclusiveLock,
470 									&toastidxs,
471 									&num_indexes);
472 
473 	/*
474 	 * Setup a scan key to find chunks with matching va_valueid
475 	 */
476 	ScanKeyInit(&toastkey,
477 				(AttrNumber) 1,
478 				BTEqualStrategyNumber, F_OIDEQ,
479 				ObjectIdGetDatum(valueid));
480 
481 	/*
482 	 * Is there any such chunk?
483 	 */
484 	toastscan = systable_beginscan(toastrel,
485 								   RelationGetRelid(toastidxs[validIndex]),
486 								   true, SnapshotAny, 1, &toastkey);
487 
488 	if (systable_getnext(toastscan) != NULL)
489 		result = true;
490 
491 	systable_endscan(toastscan);
492 
493 	/* Clean up */
494 	toast_close_indexes(toastidxs, num_indexes, RowExclusiveLock);
495 
496 	return result;
497 }
498 
499 /* ----------
500  * toastid_valueid_exists -
501  *
502  *	As above, but work from toast rel's OID not an open relation
503  * ----------
504  */
505 static bool
506 toastid_valueid_exists(Oid toastrelid, Oid valueid)
507 {
508 	bool		result;
509 	Relation	toastrel;
510 
511 	toastrel = table_open(toastrelid, AccessShareLock);
512 
513 	result = toastrel_valueid_exists(toastrel, valueid);
514 
515 	table_close(toastrel, AccessShareLock);
516 
517 	return result;
518 }
519 
520 /* ----------
521  * toast_get_valid_index
522  *
523  *	Get OID of valid index associated to given toast relation. A toast
524  *	relation can have only one valid index at the same time.
525  */
526 Oid
527 toast_get_valid_index(Oid toastoid, LOCKMODE lock)
528 {
529 	int			num_indexes;
530 	int			validIndex;
531 	Oid			validIndexOid;
532 	Relation   *toastidxs;
533 	Relation	toastrel;
534 
535 	/* Open the toast relation */
536 	toastrel = table_open(toastoid, lock);
537 
538 	/* Look for the valid index of the toast relation */
539 	validIndex = toast_open_indexes(toastrel,
540 									lock,
541 									&toastidxs,
542 									&num_indexes);
543 	validIndexOid = RelationGetRelid(toastidxs[validIndex]);
544 
545 	/* Close the toast relation and all its indexes */
546 	toast_close_indexes(toastidxs, num_indexes, NoLock);
547 	table_close(toastrel, NoLock);
548 
549 	return validIndexOid;
550 }
551 
552 /* ----------
553  * toast_open_indexes
554  *
555  *	Get an array of the indexes associated to the given toast relation
556  *	and return as well the position of the valid index used by the toast
557  *	relation in this array. It is the responsibility of the caller of this
558  *	function to close the indexes as well as free them.
559  */
560 int
561 toast_open_indexes(Relation toastrel,
562 				   LOCKMODE lock,
563 				   Relation **toastidxs,
564 				   int *num_indexes)
565 {
566 	int			i = 0;
567 	int			res = 0;
568 	bool		found = false;
569 	List	   *indexlist;
570 	ListCell   *lc;
571 
572 	/* Get index list of the toast relation */
573 	indexlist = RelationGetIndexList(toastrel);
574 	Assert(indexlist != NIL);
575 
576 	*num_indexes = list_length(indexlist);
577 
578 	/* Open all the index relations */
579 	*toastidxs = (Relation *) palloc(*num_indexes * sizeof(Relation));
580 	foreach(lc, indexlist)
581 		(*toastidxs)[i++] = index_open(lfirst_oid(lc), lock);
582 
583 	/* Fetch the first valid index in list */
584 	for (i = 0; i < *num_indexes; i++)
585 	{
586 		Relation	toastidx = (*toastidxs)[i];
587 
588 		if (toastidx->rd_index->indisvalid)
589 		{
590 			res = i;
591 			found = true;
592 			break;
593 		}
594 	}
595 
596 	/*
597 	 * Free index list, not necessary anymore as relations are opened and a
598 	 * valid index has been found.
599 	 */
600 	list_free(indexlist);
601 
602 	/*
603 	 * The toast relation should have one valid index, so something is going
604 	 * wrong if there is nothing.
605 	 */
606 	if (!found)
607 		elog(ERROR, "no valid index found for toast relation with Oid %u",
608 			 RelationGetRelid(toastrel));
609 
610 	return res;
611 }
612 
613 /* ----------
614  * toast_close_indexes
615  *
616  *	Close an array of indexes for a toast relation and free it. This should
617  *	be called for a set of indexes opened previously with toast_open_indexes.
618  */
619 void
620 toast_close_indexes(Relation *toastidxs, int num_indexes, LOCKMODE lock)
621 {
622 	int			i;
623 
624 	/* Close relations and clean up things */
625 	for (i = 0; i < num_indexes; i++)
626 		index_close(toastidxs[i], lock);
627 	pfree(toastidxs);
628 }
629 
630 /* ----------
631  * init_toast_snapshot
632  *
633  *	Initialize an appropriate TOAST snapshot.  We must use an MVCC snapshot
634  *	to initialize the TOAST snapshot; since we don't know which one to use,
635  *	just use the oldest one.  This is safe: at worst, we will get a "snapshot
636  *	too old" error that might have been avoided otherwise.
637  */
638 void
639 init_toast_snapshot(Snapshot toast_snapshot)
640 {
641 	Snapshot	snapshot = GetOldestSnapshot();
642 
643 	/*
644 	 * GetOldestSnapshot returns NULL if the session has no active snapshots.
645 	 * We can get that if, for example, a procedure fetches a toasted value
646 	 * into a local variable, commits, and then tries to detoast the value.
647 	 * Such coding is unsafe, because once we commit there is nothing to
648 	 * prevent the toast data from being deleted.  Detoasting *must* happen in
649 	 * the same transaction that originally fetched the toast pointer.  Hence,
650 	 * rather than trying to band-aid over the problem, throw an error.  (This
651 	 * is not very much protection, because in many scenarios the procedure
652 	 * would have already created a new transaction snapshot, preventing us
653 	 * from detecting the problem.  But it's better than nothing, and for sure
654 	 * we shouldn't expend code on masking the problem more.)
655 	 */
656 	if (snapshot == NULL)
657 		elog(ERROR, "cannot fetch toast data without an active snapshot");
658 
659 	InitToastSnapshot(*toast_snapshot, snapshot->lsn, snapshot->whenTaken);
660 }
661