1 /*-------------------------------------------------------------------------
2  *
3  * indextuple.c
4  *	   This file contains index tuple accessor and mutator routines,
5  *	   as well as various tuple utilities.
6  *
7  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  *
11  * IDENTIFICATION
12  *	  src/backend/access/common/indextuple.c
13  *
14  *-------------------------------------------------------------------------
15  */
16 
17 #include "postgres.h"
18 
19 #include "access/detoast.h"
20 #include "access/heaptoast.h"
21 #include "access/htup_details.h"
22 #include "access/itup.h"
23 #include "access/toast_internals.h"
24 
25 /*
26  * This enables de-toasting of index entries.  Needed until VACUUM is
27  * smart enough to rebuild indexes from scratch.
28  */
29 #define TOAST_INDEX_HACK
30 
31 /* ----------------------------------------------------------------
32  *				  index_ tuple interface routines
33  * ----------------------------------------------------------------
34  */
35 
36 /* ----------------
37  *		index_form_tuple
38  *
39  *		This shouldn't leak any memory; otherwise, callers such as
40  *		tuplesort_putindextuplevalues() will be very unhappy.
41  *
42  *		This shouldn't perform external table access provided caller
43  *		does not pass values that are stored EXTERNAL.
44  * ----------------
45  */
46 IndexTuple
index_form_tuple(TupleDesc tupleDescriptor,Datum * values,bool * isnull)47 index_form_tuple(TupleDesc tupleDescriptor,
48 				 Datum *values,
49 				 bool *isnull)
50 {
51 	char	   *tp;				/* tuple pointer */
52 	IndexTuple	tuple;			/* return tuple */
53 	Size		size,
54 				data_size,
55 				hoff;
56 	int			i;
57 	unsigned short infomask = 0;
58 	bool		hasnull = false;
59 	uint16		tupmask = 0;
60 	int			numberOfAttributes = tupleDescriptor->natts;
61 
62 #ifdef TOAST_INDEX_HACK
63 	Datum		untoasted_values[INDEX_MAX_KEYS];
64 	bool		untoasted_free[INDEX_MAX_KEYS];
65 #endif
66 
67 	if (numberOfAttributes > INDEX_MAX_KEYS)
68 		ereport(ERROR,
69 				(errcode(ERRCODE_TOO_MANY_COLUMNS),
70 				 errmsg("number of index columns (%d) exceeds limit (%d)",
71 						numberOfAttributes, INDEX_MAX_KEYS)));
72 
73 #ifdef TOAST_INDEX_HACK
74 	for (i = 0; i < numberOfAttributes; i++)
75 	{
76 		Form_pg_attribute att = TupleDescAttr(tupleDescriptor, i);
77 
78 		untoasted_values[i] = values[i];
79 		untoasted_free[i] = false;
80 
81 		/* Do nothing if value is NULL or not of varlena type */
82 		if (isnull[i] || att->attlen != -1)
83 			continue;
84 
85 		/*
86 		 * If value is stored EXTERNAL, must fetch it so we are not depending
87 		 * on outside storage.  This should be improved someday.
88 		 */
89 		if (VARATT_IS_EXTERNAL(DatumGetPointer(values[i])))
90 		{
91 			untoasted_values[i] =
92 				PointerGetDatum(detoast_external_attr((struct varlena *)
93 													  DatumGetPointer(values[i])));
94 			untoasted_free[i] = true;
95 		}
96 
97 		/*
98 		 * If value is above size target, and is of a compressible datatype,
99 		 * try to compress it in-line.
100 		 */
101 		if (!VARATT_IS_EXTENDED(DatumGetPointer(untoasted_values[i])) &&
102 			VARSIZE(DatumGetPointer(untoasted_values[i])) > TOAST_INDEX_TARGET &&
103 			(att->attstorage == TYPSTORAGE_EXTENDED ||
104 			 att->attstorage == TYPSTORAGE_MAIN))
105 		{
106 			Datum		cvalue;
107 
108 			cvalue = toast_compress_datum(untoasted_values[i],
109 										  att->attcompression);
110 
111 			if (DatumGetPointer(cvalue) != NULL)
112 			{
113 				/* successful compression */
114 				if (untoasted_free[i])
115 					pfree(DatumGetPointer(untoasted_values[i]));
116 				untoasted_values[i] = cvalue;
117 				untoasted_free[i] = true;
118 			}
119 		}
120 	}
121 #endif
122 
123 	for (i = 0; i < numberOfAttributes; i++)
124 	{
125 		if (isnull[i])
126 		{
127 			hasnull = true;
128 			break;
129 		}
130 	}
131 
132 	if (hasnull)
133 		infomask |= INDEX_NULL_MASK;
134 
135 	hoff = IndexInfoFindDataOffset(infomask);
136 #ifdef TOAST_INDEX_HACK
137 	data_size = heap_compute_data_size(tupleDescriptor,
138 									   untoasted_values, isnull);
139 #else
140 	data_size = heap_compute_data_size(tupleDescriptor,
141 									   values, isnull);
142 #endif
143 	size = hoff + data_size;
144 	size = MAXALIGN(size);		/* be conservative */
145 
146 	tp = (char *) palloc0(size);
147 	tuple = (IndexTuple) tp;
148 
149 	heap_fill_tuple(tupleDescriptor,
150 #ifdef TOAST_INDEX_HACK
151 					untoasted_values,
152 #else
153 					values,
154 #endif
155 					isnull,
156 					(char *) tp + hoff,
157 					data_size,
158 					&tupmask,
159 					(hasnull ? (bits8 *) tp + sizeof(IndexTupleData) : NULL));
160 
161 #ifdef TOAST_INDEX_HACK
162 	for (i = 0; i < numberOfAttributes; i++)
163 	{
164 		if (untoasted_free[i])
165 			pfree(DatumGetPointer(untoasted_values[i]));
166 	}
167 #endif
168 
169 	/*
170 	 * We do this because heap_fill_tuple wants to initialize a "tupmask"
171 	 * which is used for HeapTuples, but we want an indextuple infomask. The
172 	 * only relevant info is the "has variable attributes" field. We have
173 	 * already set the hasnull bit above.
174 	 */
175 	if (tupmask & HEAP_HASVARWIDTH)
176 		infomask |= INDEX_VAR_MASK;
177 
178 	/* Also assert we got rid of external attributes */
179 #ifdef TOAST_INDEX_HACK
180 	Assert((tupmask & HEAP_HASEXTERNAL) == 0);
181 #endif
182 
183 	/*
184 	 * Here we make sure that the size will fit in the field reserved for it
185 	 * in t_info.
186 	 */
187 	if ((size & INDEX_SIZE_MASK) != size)
188 		ereport(ERROR,
189 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
190 				 errmsg("index row requires %zu bytes, maximum size is %zu",
191 						size, (Size) INDEX_SIZE_MASK)));
192 
193 	infomask |= size;
194 
195 	/*
196 	 * initialize metadata
197 	 */
198 	tuple->t_info = infomask;
199 	return tuple;
200 }
201 
202 /* ----------------
203  *		nocache_index_getattr
204  *
205  *		This gets called from index_getattr() macro, and only in cases
206  *		where we can't use cacheoffset and the value is not null.
207  *
208  *		This caches attribute offsets in the attribute descriptor.
209  *
210  *		An alternative way to speed things up would be to cache offsets
211  *		with the tuple, but that seems more difficult unless you take
212  *		the storage hit of actually putting those offsets into the
213  *		tuple you send to disk.  Yuck.
214  *
215  *		This scheme will be slightly slower than that, but should
216  *		perform well for queries which hit large #'s of tuples.  After
217  *		you cache the offsets once, examining all the other tuples using
218  *		the same attribute descriptor will go much quicker. -cim 5/4/91
219  * ----------------
220  */
221 Datum
nocache_index_getattr(IndexTuple tup,int attnum,TupleDesc tupleDesc)222 nocache_index_getattr(IndexTuple tup,
223 					  int attnum,
224 					  TupleDesc tupleDesc)
225 {
226 	char	   *tp;				/* ptr to data part of tuple */
227 	bits8	   *bp = NULL;		/* ptr to null bitmap in tuple */
228 	bool		slow = false;	/* do we have to walk attrs? */
229 	int			data_off;		/* tuple data offset */
230 	int			off;			/* current offset within data */
231 
232 	/* ----------------
233 	 *	 Three cases:
234 	 *
235 	 *	 1: No nulls and no variable-width attributes.
236 	 *	 2: Has a null or a var-width AFTER att.
237 	 *	 3: Has nulls or var-widths BEFORE att.
238 	 * ----------------
239 	 */
240 
241 	data_off = IndexInfoFindDataOffset(tup->t_info);
242 
243 	attnum--;
244 
245 	if (IndexTupleHasNulls(tup))
246 	{
247 		/*
248 		 * there's a null somewhere in the tuple
249 		 *
250 		 * check to see if desired att is null
251 		 */
252 
253 		/* XXX "knows" t_bits are just after fixed tuple header! */
254 		bp = (bits8 *) ((char *) tup + sizeof(IndexTupleData));
255 
256 		/*
257 		 * Now check to see if any preceding bits are null...
258 		 */
259 		{
260 			int			byte = attnum >> 3;
261 			int			finalbit = attnum & 0x07;
262 
263 			/* check for nulls "before" final bit of last byte */
264 			if ((~bp[byte]) & ((1 << finalbit) - 1))
265 				slow = true;
266 			else
267 			{
268 				/* check for nulls in any "earlier" bytes */
269 				int			i;
270 
271 				for (i = 0; i < byte; i++)
272 				{
273 					if (bp[i] != 0xFF)
274 					{
275 						slow = true;
276 						break;
277 					}
278 				}
279 			}
280 		}
281 	}
282 
283 	tp = (char *) tup + data_off;
284 
285 	if (!slow)
286 	{
287 		Form_pg_attribute att;
288 
289 		/*
290 		 * If we get here, there are no nulls up to and including the target
291 		 * attribute.  If we have a cached offset, we can use it.
292 		 */
293 		att = TupleDescAttr(tupleDesc, attnum);
294 		if (att->attcacheoff >= 0)
295 			return fetchatt(att, tp + att->attcacheoff);
296 
297 		/*
298 		 * Otherwise, check for non-fixed-length attrs up to and including
299 		 * target.  If there aren't any, it's safe to cheaply initialize the
300 		 * cached offsets for these attrs.
301 		 */
302 		if (IndexTupleHasVarwidths(tup))
303 		{
304 			int			j;
305 
306 			for (j = 0; j <= attnum; j++)
307 			{
308 				if (TupleDescAttr(tupleDesc, j)->attlen <= 0)
309 				{
310 					slow = true;
311 					break;
312 				}
313 			}
314 		}
315 	}
316 
317 	if (!slow)
318 	{
319 		int			natts = tupleDesc->natts;
320 		int			j = 1;
321 
322 		/*
323 		 * If we get here, we have a tuple with no nulls or var-widths up to
324 		 * and including the target attribute, so we can use the cached offset
325 		 * ... only we don't have it yet, or we'd not have got here.  Since
326 		 * it's cheap to compute offsets for fixed-width columns, we take the
327 		 * opportunity to initialize the cached offsets for *all* the leading
328 		 * fixed-width columns, in hope of avoiding future visits to this
329 		 * routine.
330 		 */
331 		TupleDescAttr(tupleDesc, 0)->attcacheoff = 0;
332 
333 		/* we might have set some offsets in the slow path previously */
334 		while (j < natts && TupleDescAttr(tupleDesc, j)->attcacheoff > 0)
335 			j++;
336 
337 		off = TupleDescAttr(tupleDesc, j - 1)->attcacheoff +
338 			TupleDescAttr(tupleDesc, j - 1)->attlen;
339 
340 		for (; j < natts; j++)
341 		{
342 			Form_pg_attribute att = TupleDescAttr(tupleDesc, j);
343 
344 			if (att->attlen <= 0)
345 				break;
346 
347 			off = att_align_nominal(off, att->attalign);
348 
349 			att->attcacheoff = off;
350 
351 			off += att->attlen;
352 		}
353 
354 		Assert(j > attnum);
355 
356 		off = TupleDescAttr(tupleDesc, attnum)->attcacheoff;
357 	}
358 	else
359 	{
360 		bool		usecache = true;
361 		int			i;
362 
363 		/*
364 		 * Now we know that we have to walk the tuple CAREFULLY.  But we still
365 		 * might be able to cache some offsets for next time.
366 		 *
367 		 * Note - This loop is a little tricky.  For each non-null attribute,
368 		 * we have to first account for alignment padding before the attr,
369 		 * then advance over the attr based on its length.  Nulls have no
370 		 * storage and no alignment padding either.  We can use/set
371 		 * attcacheoff until we reach either a null or a var-width attribute.
372 		 */
373 		off = 0;
374 		for (i = 0;; i++)		/* loop exit is at "break" */
375 		{
376 			Form_pg_attribute att = TupleDescAttr(tupleDesc, i);
377 
378 			if (IndexTupleHasNulls(tup) && att_isnull(i, bp))
379 			{
380 				usecache = false;
381 				continue;		/* this cannot be the target att */
382 			}
383 
384 			/* If we know the next offset, we can skip the rest */
385 			if (usecache && att->attcacheoff >= 0)
386 				off = att->attcacheoff;
387 			else if (att->attlen == -1)
388 			{
389 				/*
390 				 * We can only cache the offset for a varlena attribute if the
391 				 * offset is already suitably aligned, so that there would be
392 				 * no pad bytes in any case: then the offset will be valid for
393 				 * either an aligned or unaligned value.
394 				 */
395 				if (usecache &&
396 					off == att_align_nominal(off, att->attalign))
397 					att->attcacheoff = off;
398 				else
399 				{
400 					off = att_align_pointer(off, att->attalign, -1,
401 											tp + off);
402 					usecache = false;
403 				}
404 			}
405 			else
406 			{
407 				/* not varlena, so safe to use att_align_nominal */
408 				off = att_align_nominal(off, att->attalign);
409 
410 				if (usecache)
411 					att->attcacheoff = off;
412 			}
413 
414 			if (i == attnum)
415 				break;
416 
417 			off = att_addlength_pointer(off, att->attlen, tp + off);
418 
419 			if (usecache && att->attlen <= 0)
420 				usecache = false;
421 		}
422 	}
423 
424 	return fetchatt(TupleDescAttr(tupleDesc, attnum), tp + off);
425 }
426 
427 /*
428  * Convert an index tuple into Datum/isnull arrays.
429  *
430  * The caller must allocate sufficient storage for the output arrays.
431  * (INDEX_MAX_KEYS entries should be enough.)
432  *
433  * This is nearly the same as heap_deform_tuple(), but for IndexTuples.
434  * One difference is that the tuple should never have any missing columns.
435  */
436 void
index_deform_tuple(IndexTuple tup,TupleDesc tupleDescriptor,Datum * values,bool * isnull)437 index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor,
438 				   Datum *values, bool *isnull)
439 {
440 	char	   *tp;				/* ptr to tuple data */
441 	bits8	   *bp;				/* ptr to null bitmap in tuple */
442 
443 	/* XXX "knows" t_bits are just after fixed tuple header! */
444 	bp = (bits8 *) ((char *) tup + sizeof(IndexTupleData));
445 
446 	tp = (char *) tup + IndexInfoFindDataOffset(tup->t_info);
447 
448 	index_deform_tuple_internal(tupleDescriptor, values, isnull,
449 								tp, bp, IndexTupleHasNulls(tup));
450 }
451 
452 /*
453  * Convert an index tuple into Datum/isnull arrays,
454  * without assuming any specific layout of the index tuple header.
455  *
456  * Caller must supply pointer to data area, pointer to nulls bitmap
457  * (which can be NULL if !hasnulls), and hasnulls flag.
458  */
459 void
index_deform_tuple_internal(TupleDesc tupleDescriptor,Datum * values,bool * isnull,char * tp,bits8 * bp,int hasnulls)460 index_deform_tuple_internal(TupleDesc tupleDescriptor,
461 							Datum *values, bool *isnull,
462 							char *tp, bits8 *bp, int hasnulls)
463 {
464 	int			natts = tupleDescriptor->natts; /* number of atts to extract */
465 	int			attnum;
466 	int			off = 0;		/* offset in tuple data */
467 	bool		slow = false;	/* can we use/set attcacheoff? */
468 
469 	/* Assert to protect callers who allocate fixed-size arrays */
470 	Assert(natts <= INDEX_MAX_KEYS);
471 
472 	for (attnum = 0; attnum < natts; attnum++)
473 	{
474 		Form_pg_attribute thisatt = TupleDescAttr(tupleDescriptor, attnum);
475 
476 		if (hasnulls && att_isnull(attnum, bp))
477 		{
478 			values[attnum] = (Datum) 0;
479 			isnull[attnum] = true;
480 			slow = true;		/* can't use attcacheoff anymore */
481 			continue;
482 		}
483 
484 		isnull[attnum] = false;
485 
486 		if (!slow && thisatt->attcacheoff >= 0)
487 			off = thisatt->attcacheoff;
488 		else if (thisatt->attlen == -1)
489 		{
490 			/*
491 			 * We can only cache the offset for a varlena attribute if the
492 			 * offset is already suitably aligned, so that there would be no
493 			 * pad bytes in any case: then the offset will be valid for either
494 			 * an aligned or unaligned value.
495 			 */
496 			if (!slow &&
497 				off == att_align_nominal(off, thisatt->attalign))
498 				thisatt->attcacheoff = off;
499 			else
500 			{
501 				off = att_align_pointer(off, thisatt->attalign, -1,
502 										tp + off);
503 				slow = true;
504 			}
505 		}
506 		else
507 		{
508 			/* not varlena, so safe to use att_align_nominal */
509 			off = att_align_nominal(off, thisatt->attalign);
510 
511 			if (!slow)
512 				thisatt->attcacheoff = off;
513 		}
514 
515 		values[attnum] = fetchatt(thisatt, tp + off);
516 
517 		off = att_addlength_pointer(off, thisatt->attlen, tp + off);
518 
519 		if (thisatt->attlen <= 0)
520 			slow = true;		/* can't use attcacheoff anymore */
521 	}
522 }
523 
524 /*
525  * Create a palloc'd copy of an index tuple.
526  */
527 IndexTuple
CopyIndexTuple(IndexTuple source)528 CopyIndexTuple(IndexTuple source)
529 {
530 	IndexTuple	result;
531 	Size		size;
532 
533 	size = IndexTupleSize(source);
534 	result = (IndexTuple) palloc(size);
535 	memcpy(result, source, size);
536 	return result;
537 }
538 
539 /*
540  * Create a palloc'd copy of an index tuple, leaving only the first
541  * leavenatts attributes remaining.
542  *
543  * Truncation is guaranteed to result in an index tuple that is no
544  * larger than the original.  It is safe to use the IndexTuple with
545  * the original tuple descriptor, but caller must avoid actually
546  * accessing truncated attributes from returned tuple!  In practice
547  * this means that index_getattr() must be called with special care,
548  * and that the truncated tuple should only ever be accessed by code
549  * under caller's direct control.
550  *
551  * It's safe to call this function with a buffer lock held, since it
552  * never performs external table access.  If it ever became possible
553  * for index tuples to contain EXTERNAL TOAST values, then this would
554  * have to be revisited.
555  */
556 IndexTuple
index_truncate_tuple(TupleDesc sourceDescriptor,IndexTuple source,int leavenatts)557 index_truncate_tuple(TupleDesc sourceDescriptor, IndexTuple source,
558 					 int leavenatts)
559 {
560 	TupleDesc	truncdesc;
561 	Datum		values[INDEX_MAX_KEYS];
562 	bool		isnull[INDEX_MAX_KEYS];
563 	IndexTuple	truncated;
564 
565 	Assert(leavenatts <= sourceDescriptor->natts);
566 
567 	/* Easy case: no truncation actually required */
568 	if (leavenatts == sourceDescriptor->natts)
569 		return CopyIndexTuple(source);
570 
571 	/* Create temporary descriptor to scribble on */
572 	truncdesc = palloc(TupleDescSize(sourceDescriptor));
573 	TupleDescCopy(truncdesc, sourceDescriptor);
574 	truncdesc->natts = leavenatts;
575 
576 	/* Deform, form copy of tuple with fewer attributes */
577 	index_deform_tuple(source, truncdesc, values, isnull);
578 	truncated = index_form_tuple(truncdesc, values, isnull);
579 	truncated->t_tid = source->t_tid;
580 	Assert(IndexTupleSize(truncated) <= IndexTupleSize(source));
581 
582 	/*
583 	 * Cannot leak memory here, TupleDescCopy() doesn't allocate any inner
584 	 * structure, so, plain pfree() should clean all allocated memory
585 	 */
586 	pfree(truncdesc);
587 
588 	return truncated;
589 }
590