1 /*-------------------------------------------------------------------------
2  *
3  * spgutils.c
4  *	  various support functions for SP-GiST
5  *
6  *
7  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * IDENTIFICATION
11  *			src/backend/access/spgist/spgutils.c
12  *
13  *-------------------------------------------------------------------------
14  */
15 
16 #include "postgres.h"
17 
18 #include "access/reloptions.h"
19 #include "access/spgist_private.h"
20 #include "access/transam.h"
21 #include "access/xact.h"
22 #include "storage/bufmgr.h"
23 #include "storage/indexfsm.h"
24 #include "storage/lmgr.h"
25 #include "utils/builtins.h"
26 #include "utils/index_selfuncs.h"
27 #include "utils/lsyscache.h"
28 
29 
30 /*
31  * SP-GiST handler function: return IndexAmRoutine with access method parameters
32  * and callbacks.
33  */
34 Datum
spghandler(PG_FUNCTION_ARGS)35 spghandler(PG_FUNCTION_ARGS)
36 {
37 	IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
38 
39 	amroutine->amstrategies = 0;
40 	amroutine->amsupport = SPGISTNProc;
41 	amroutine->amcanorder = false;
42 	amroutine->amcanorderbyop = false;
43 	amroutine->amcanbackward = false;
44 	amroutine->amcanunique = false;
45 	amroutine->amcanmulticol = false;
46 	amroutine->amoptionalkey = true;
47 	amroutine->amsearcharray = false;
48 	amroutine->amsearchnulls = true;
49 	amroutine->amstorage = false;
50 	amroutine->amclusterable = false;
51 	amroutine->ampredlocks = false;
52 	amroutine->amcanparallel = false;
53 	amroutine->amcaninclude = false;
54 	amroutine->amkeytype = InvalidOid;
55 
56 	amroutine->ambuild = spgbuild;
57 	amroutine->ambuildempty = spgbuildempty;
58 	amroutine->aminsert = spginsert;
59 	amroutine->ambulkdelete = spgbulkdelete;
60 	amroutine->amvacuumcleanup = spgvacuumcleanup;
61 	amroutine->amcanreturn = spgcanreturn;
62 	amroutine->amcostestimate = spgcostestimate;
63 	amroutine->amoptions = spgoptions;
64 	amroutine->amproperty = NULL;
65 	amroutine->amvalidate = spgvalidate;
66 	amroutine->ambeginscan = spgbeginscan;
67 	amroutine->amrescan = spgrescan;
68 	amroutine->amgettuple = spggettuple;
69 	amroutine->amgetbitmap = spggetbitmap;
70 	amroutine->amendscan = spgendscan;
71 	amroutine->ammarkpos = NULL;
72 	amroutine->amrestrpos = NULL;
73 	amroutine->amestimateparallelscan = NULL;
74 	amroutine->aminitparallelscan = NULL;
75 	amroutine->amparallelrescan = NULL;
76 
77 	PG_RETURN_POINTER(amroutine);
78 }
79 
80 /* Fill in a SpGistTypeDesc struct with info about the specified data type */
81 static void
fillTypeDesc(SpGistTypeDesc * desc,Oid type)82 fillTypeDesc(SpGistTypeDesc *desc, Oid type)
83 {
84 	desc->type = type;
85 	get_typlenbyval(type, &desc->attlen, &desc->attbyval);
86 }
87 
88 /*
89  * Fetch local cache of AM-specific info about the index, initializing it
90  * if necessary
91  */
92 SpGistCache *
spgGetCache(Relation index)93 spgGetCache(Relation index)
94 {
95 	SpGistCache *cache;
96 
97 	if (index->rd_amcache == NULL)
98 	{
99 		Oid			atttype;
100 		spgConfigIn in;
101 		FmgrInfo   *procinfo;
102 		Buffer		metabuffer;
103 		SpGistMetaPageData *metadata;
104 
105 		cache = MemoryContextAllocZero(index->rd_indexcxt,
106 									   sizeof(SpGistCache));
107 
108 		/* SPGiST doesn't support multi-column indexes */
109 		Assert(index->rd_att->natts == 1);
110 
111 		/*
112 		 * Get the actual data type of the indexed column from the index
113 		 * tupdesc.  We pass this to the opclass config function so that
114 		 * polymorphic opclasses are possible.
115 		 */
116 		atttype = TupleDescAttr(index->rd_att, 0)->atttypid;
117 
118 		/* Call the config function to get config info for the opclass */
119 		in.attType = atttype;
120 
121 		procinfo = index_getprocinfo(index, 1, SPGIST_CONFIG_PROC);
122 		FunctionCall2Coll(procinfo,
123 						  index->rd_indcollation[0],
124 						  PointerGetDatum(&in),
125 						  PointerGetDatum(&cache->config));
126 
127 		/* Get the information we need about each relevant datatype */
128 		fillTypeDesc(&cache->attType, atttype);
129 
130 		if (OidIsValid(cache->config.leafType) &&
131 			cache->config.leafType != atttype)
132 		{
133 			if (!OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC)))
134 				ereport(ERROR,
135 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
136 						 errmsg("compress method must be defined when leaf type is different from input type")));
137 
138 			fillTypeDesc(&cache->attLeafType, cache->config.leafType);
139 		}
140 		else
141 		{
142 			cache->attLeafType = cache->attType;
143 		}
144 
145 		fillTypeDesc(&cache->attPrefixType, cache->config.prefixType);
146 		fillTypeDesc(&cache->attLabelType, cache->config.labelType);
147 
148 		/* Last, get the lastUsedPages data from the metapage */
149 		metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO);
150 		LockBuffer(metabuffer, BUFFER_LOCK_SHARE);
151 
152 		metadata = SpGistPageGetMeta(BufferGetPage(metabuffer));
153 
154 		if (metadata->magicNumber != SPGIST_MAGIC_NUMBER)
155 			elog(ERROR, "index \"%s\" is not an SP-GiST index",
156 				 RelationGetRelationName(index));
157 
158 		cache->lastUsedPages = metadata->lastUsedPages;
159 
160 		UnlockReleaseBuffer(metabuffer);
161 
162 		index->rd_amcache = (void *) cache;
163 	}
164 	else
165 	{
166 		/* assume it's up to date */
167 		cache = (SpGistCache *) index->rd_amcache;
168 	}
169 
170 	return cache;
171 }
172 
173 /* Initialize SpGistState for working with the given index */
174 void
initSpGistState(SpGistState * state,Relation index)175 initSpGistState(SpGistState *state, Relation index)
176 {
177 	SpGistCache *cache;
178 
179 	/* Get cached static information about index */
180 	cache = spgGetCache(index);
181 
182 	state->config = cache->config;
183 	state->attType = cache->attType;
184 	state->attLeafType = cache->attLeafType;
185 	state->attPrefixType = cache->attPrefixType;
186 	state->attLabelType = cache->attLabelType;
187 
188 	/* Make workspace for constructing dead tuples */
189 	state->deadTupleStorage = palloc0(SGDTSIZE);
190 
191 	/* Set XID to use in redirection tuples */
192 	state->myXid = GetTopTransactionIdIfAny();
193 
194 	/* Assume we're not in an index build (spgbuild will override) */
195 	state->isBuild = false;
196 }
197 
198 /*
199  * Allocate a new page (either by recycling, or by extending the index file).
200  *
201  * The returned buffer is already pinned and exclusive-locked.
202  * Caller is responsible for initializing the page by calling SpGistInitBuffer.
203  */
204 Buffer
SpGistNewBuffer(Relation index)205 SpGistNewBuffer(Relation index)
206 {
207 	Buffer		buffer;
208 	bool		needLock;
209 
210 	/* First, try to get a page from FSM */
211 	for (;;)
212 	{
213 		BlockNumber blkno = GetFreeIndexPage(index);
214 
215 		if (blkno == InvalidBlockNumber)
216 			break;				/* nothing known to FSM */
217 
218 		/*
219 		 * The fixed pages shouldn't ever be listed in FSM, but just in case
220 		 * one is, ignore it.
221 		 */
222 		if (SpGistBlockIsFixed(blkno))
223 			continue;
224 
225 		buffer = ReadBuffer(index, blkno);
226 
227 		/*
228 		 * We have to guard against the possibility that someone else already
229 		 * recycled this page; the buffer may be locked if so.
230 		 */
231 		if (ConditionalLockBuffer(buffer))
232 		{
233 			Page		page = BufferGetPage(buffer);
234 
235 			if (PageIsNew(page))
236 				return buffer;	/* OK to use, if never initialized */
237 
238 			if (SpGistPageIsDeleted(page) || PageIsEmpty(page))
239 				return buffer;	/* OK to use */
240 
241 			LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
242 		}
243 
244 		/* Can't use it, so release buffer and try again */
245 		ReleaseBuffer(buffer);
246 	}
247 
248 	/* Must extend the file */
249 	needLock = !RELATION_IS_LOCAL(index);
250 	if (needLock)
251 		LockRelationForExtension(index, ExclusiveLock);
252 
253 	buffer = ReadBuffer(index, P_NEW);
254 	LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
255 
256 	if (needLock)
257 		UnlockRelationForExtension(index, ExclusiveLock);
258 
259 	return buffer;
260 }
261 
262 /*
263  * Update index metapage's lastUsedPages info from local cache, if possible
264  *
265  * Updating meta page isn't critical for index working, so
266  * 1 use ConditionalLockBuffer to improve concurrency
267  * 2 don't WAL-log metabuffer changes to decrease WAL traffic
268  */
269 void
SpGistUpdateMetaPage(Relation index)270 SpGistUpdateMetaPage(Relation index)
271 {
272 	SpGistCache *cache = (SpGistCache *) index->rd_amcache;
273 
274 	if (cache != NULL)
275 	{
276 		Buffer		metabuffer;
277 
278 		metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO);
279 
280 		if (ConditionalLockBuffer(metabuffer))
281 		{
282 			Page		metapage = BufferGetPage(metabuffer);
283 			SpGistMetaPageData *metadata = SpGistPageGetMeta(metapage);
284 
285 			metadata->lastUsedPages = cache->lastUsedPages;
286 
287 			/*
288 			 * Set pd_lower just past the end of the metadata.  This is
289 			 * essential, because without doing so, metadata will be lost if
290 			 * xlog.c compresses the page.  (We must do this here because
291 			 * pre-v11 versions of PG did not set the metapage's pd_lower
292 			 * correctly, so a pg_upgraded index might contain the wrong
293 			 * value.)
294 			 */
295 			((PageHeader) metapage)->pd_lower =
296 				((char *) metadata + sizeof(SpGistMetaPageData)) - (char *) metapage;
297 
298 			MarkBufferDirty(metabuffer);
299 			UnlockReleaseBuffer(metabuffer);
300 		}
301 		else
302 		{
303 			ReleaseBuffer(metabuffer);
304 		}
305 	}
306 }
307 
308 /* Macro to select proper element of lastUsedPages cache depending on flags */
309 /* Masking flags with SPGIST_CACHED_PAGES is just for paranoia's sake */
310 #define GET_LUP(c, f)  (&(c)->lastUsedPages.cachedPage[((unsigned int) (f)) % SPGIST_CACHED_PAGES])
311 
312 /*
313  * Allocate and initialize a new buffer of the type and parity specified by
314  * flags.  The returned buffer is already pinned and exclusive-locked.
315  *
316  * When requesting an inner page, if we get one with the wrong parity,
317  * we just release the buffer and try again.  We will get a different page
318  * because GetFreeIndexPage will have marked the page used in FSM.  The page
319  * is entered in our local lastUsedPages cache, so there's some hope of
320  * making use of it later in this session, but otherwise we rely on VACUUM
321  * to eventually re-enter the page in FSM, making it available for recycling.
322  * Note that such a page does not get marked dirty here, so unless it's used
323  * fairly soon, the buffer will just get discarded and the page will remain
324  * as it was on disk.
325  *
326  * When we return a buffer to the caller, the page is *not* entered into
327  * the lastUsedPages cache; we expect the caller will do so after it's taken
328  * whatever space it will use.  This is because after the caller has used up
329  * some space, the page might have less space than whatever was cached already
330  * so we'd rather not trash the old cache entry.
331  */
332 static Buffer
allocNewBuffer(Relation index,int flags)333 allocNewBuffer(Relation index, int flags)
334 {
335 	SpGistCache *cache = spgGetCache(index);
336 	uint16		pageflags = 0;
337 
338 	if (GBUF_REQ_LEAF(flags))
339 		pageflags |= SPGIST_LEAF;
340 	if (GBUF_REQ_NULLS(flags))
341 		pageflags |= SPGIST_NULLS;
342 
343 	for (;;)
344 	{
345 		Buffer		buffer;
346 
347 		buffer = SpGistNewBuffer(index);
348 		SpGistInitBuffer(buffer, pageflags);
349 
350 		if (pageflags & SPGIST_LEAF)
351 		{
352 			/* Leaf pages have no parity concerns, so just use it */
353 			return buffer;
354 		}
355 		else
356 		{
357 			BlockNumber blkno = BufferGetBlockNumber(buffer);
358 			int			blkFlags = GBUF_INNER_PARITY(blkno);
359 
360 			if ((flags & GBUF_PARITY_MASK) == blkFlags)
361 			{
362 				/* Page has right parity, use it */
363 				return buffer;
364 			}
365 			else
366 			{
367 				/* Page has wrong parity, record it in cache and try again */
368 				if (pageflags & SPGIST_NULLS)
369 					blkFlags |= GBUF_NULLS;
370 				cache->lastUsedPages.cachedPage[blkFlags].blkno = blkno;
371 				cache->lastUsedPages.cachedPage[blkFlags].freeSpace =
372 					PageGetExactFreeSpace(BufferGetPage(buffer));
373 				UnlockReleaseBuffer(buffer);
374 			}
375 		}
376 	}
377 }
378 
379 /*
380  * Get a buffer of the type and parity specified by flags, having at least
381  * as much free space as indicated by needSpace.  We use the lastUsedPages
382  * cache to assign the same buffer previously requested when possible.
383  * The returned buffer is already pinned and exclusive-locked.
384  *
385  * *isNew is set true if the page was initialized here, false if it was
386  * already valid.
387  */
388 Buffer
SpGistGetBuffer(Relation index,int flags,int needSpace,bool * isNew)389 SpGistGetBuffer(Relation index, int flags, int needSpace, bool *isNew)
390 {
391 	SpGistCache *cache = spgGetCache(index);
392 	SpGistLastUsedPage *lup;
393 
394 	/* Bail out if even an empty page wouldn't meet the demand */
395 	if (needSpace > SPGIST_PAGE_CAPACITY)
396 		elog(ERROR, "desired SPGiST tuple size is too big");
397 
398 	/*
399 	 * If possible, increase the space request to include relation's
400 	 * fillfactor.  This ensures that when we add unrelated tuples to a page,
401 	 * we try to keep 100-fillfactor% available for adding tuples that are
402 	 * related to the ones already on it.  But fillfactor mustn't cause an
403 	 * error for requests that would otherwise be legal.
404 	 */
405 	needSpace += RelationGetTargetPageFreeSpace(index,
406 												SPGIST_DEFAULT_FILLFACTOR);
407 	needSpace = Min(needSpace, SPGIST_PAGE_CAPACITY);
408 
409 	/* Get the cache entry for this flags setting */
410 	lup = GET_LUP(cache, flags);
411 
412 	/* If we have nothing cached, just turn it over to allocNewBuffer */
413 	if (lup->blkno == InvalidBlockNumber)
414 	{
415 		*isNew = true;
416 		return allocNewBuffer(index, flags);
417 	}
418 
419 	/* fixed pages should never be in cache */
420 	Assert(!SpGistBlockIsFixed(lup->blkno));
421 
422 	/* If cached freeSpace isn't enough, don't bother looking at the page */
423 	if (lup->freeSpace >= needSpace)
424 	{
425 		Buffer		buffer;
426 		Page		page;
427 
428 		buffer = ReadBuffer(index, lup->blkno);
429 
430 		if (!ConditionalLockBuffer(buffer))
431 		{
432 			/*
433 			 * buffer is locked by another process, so return a new buffer
434 			 */
435 			ReleaseBuffer(buffer);
436 			*isNew = true;
437 			return allocNewBuffer(index, flags);
438 		}
439 
440 		page = BufferGetPage(buffer);
441 
442 		if (PageIsNew(page) || SpGistPageIsDeleted(page) || PageIsEmpty(page))
443 		{
444 			/* OK to initialize the page */
445 			uint16		pageflags = 0;
446 
447 			if (GBUF_REQ_LEAF(flags))
448 				pageflags |= SPGIST_LEAF;
449 			if (GBUF_REQ_NULLS(flags))
450 				pageflags |= SPGIST_NULLS;
451 			SpGistInitBuffer(buffer, pageflags);
452 			lup->freeSpace = PageGetExactFreeSpace(page) - needSpace;
453 			*isNew = true;
454 			return buffer;
455 		}
456 
457 		/*
458 		 * Check that page is of right type and has enough space.  We must
459 		 * recheck this since our cache isn't necessarily up to date.
460 		 */
461 		if ((GBUF_REQ_LEAF(flags) ? SpGistPageIsLeaf(page) : !SpGistPageIsLeaf(page)) &&
462 			(GBUF_REQ_NULLS(flags) ? SpGistPageStoresNulls(page) : !SpGistPageStoresNulls(page)))
463 		{
464 			int			freeSpace = PageGetExactFreeSpace(page);
465 
466 			if (freeSpace >= needSpace)
467 			{
468 				/* Success, update freespace info and return the buffer */
469 				lup->freeSpace = freeSpace - needSpace;
470 				*isNew = false;
471 				return buffer;
472 			}
473 		}
474 
475 		/*
476 		 * fallback to allocation of new buffer
477 		 */
478 		UnlockReleaseBuffer(buffer);
479 	}
480 
481 	/* No success with cache, so return a new buffer */
482 	*isNew = true;
483 	return allocNewBuffer(index, flags);
484 }
485 
486 /*
487  * Update lastUsedPages cache when done modifying a page.
488  *
489  * We update the appropriate cache entry if it already contained this page
490  * (its freeSpace is likely obsolete), or if this page has more space than
491  * whatever we had cached.
492  */
493 void
SpGistSetLastUsedPage(Relation index,Buffer buffer)494 SpGistSetLastUsedPage(Relation index, Buffer buffer)
495 {
496 	SpGistCache *cache = spgGetCache(index);
497 	SpGistLastUsedPage *lup;
498 	int			freeSpace;
499 	Page		page = BufferGetPage(buffer);
500 	BlockNumber blkno = BufferGetBlockNumber(buffer);
501 	int			flags;
502 
503 	/* Never enter fixed pages (root pages) in cache, though */
504 	if (SpGistBlockIsFixed(blkno))
505 		return;
506 
507 	if (SpGistPageIsLeaf(page))
508 		flags = GBUF_LEAF;
509 	else
510 		flags = GBUF_INNER_PARITY(blkno);
511 	if (SpGistPageStoresNulls(page))
512 		flags |= GBUF_NULLS;
513 
514 	lup = GET_LUP(cache, flags);
515 
516 	freeSpace = PageGetExactFreeSpace(page);
517 	if (lup->blkno == InvalidBlockNumber || lup->blkno == blkno ||
518 		lup->freeSpace < freeSpace)
519 	{
520 		lup->blkno = blkno;
521 		lup->freeSpace = freeSpace;
522 	}
523 }
524 
525 /*
526  * Initialize an SPGiST page to empty, with specified flags
527  */
528 void
SpGistInitPage(Page page,uint16 f)529 SpGistInitPage(Page page, uint16 f)
530 {
531 	SpGistPageOpaque opaque;
532 
533 	PageInit(page, BLCKSZ, MAXALIGN(sizeof(SpGistPageOpaqueData)));
534 	opaque = SpGistPageGetOpaque(page);
535 	memset(opaque, 0, sizeof(SpGistPageOpaqueData));
536 	opaque->flags = f;
537 	opaque->spgist_page_id = SPGIST_PAGE_ID;
538 }
539 
540 /*
541  * Initialize a buffer's page to empty, with specified flags
542  */
543 void
SpGistInitBuffer(Buffer b,uint16 f)544 SpGistInitBuffer(Buffer b, uint16 f)
545 {
546 	Assert(BufferGetPageSize(b) == BLCKSZ);
547 	SpGistInitPage(BufferGetPage(b), f);
548 }
549 
550 /*
551  * Initialize metadata page
552  */
553 void
SpGistInitMetapage(Page page)554 SpGistInitMetapage(Page page)
555 {
556 	SpGistMetaPageData *metadata;
557 	int			i;
558 
559 	SpGistInitPage(page, SPGIST_META);
560 	metadata = SpGistPageGetMeta(page);
561 	memset(metadata, 0, sizeof(SpGistMetaPageData));
562 	metadata->magicNumber = SPGIST_MAGIC_NUMBER;
563 
564 	/* initialize last-used-page cache to empty */
565 	for (i = 0; i < SPGIST_CACHED_PAGES; i++)
566 		metadata->lastUsedPages.cachedPage[i].blkno = InvalidBlockNumber;
567 
568 	/*
569 	 * Set pd_lower just past the end of the metadata.  This is essential,
570 	 * because without doing so, metadata will be lost if xlog.c compresses
571 	 * the page.
572 	 */
573 	((PageHeader) page)->pd_lower =
574 		((char *) metadata + sizeof(SpGistMetaPageData)) - (char *) page;
575 }
576 
577 /*
578  * reloptions processing for SPGiST
579  */
580 bytea *
spgoptions(Datum reloptions,bool validate)581 spgoptions(Datum reloptions, bool validate)
582 {
583 	return default_reloptions(reloptions, validate, RELOPT_KIND_SPGIST);
584 }
585 
586 /*
587  * Get the space needed to store a non-null datum of the indicated type.
588  * Note the result is already rounded up to a MAXALIGN boundary.
589  * Also, we follow the SPGiST convention that pass-by-val types are
590  * just stored in their Datum representation (compare memcpyDatum).
591  */
592 unsigned int
SpGistGetTypeSize(SpGistTypeDesc * att,Datum datum)593 SpGistGetTypeSize(SpGistTypeDesc *att, Datum datum)
594 {
595 	unsigned int size;
596 
597 	if (att->attbyval)
598 		size = sizeof(Datum);
599 	else if (att->attlen > 0)
600 		size = att->attlen;
601 	else
602 		size = VARSIZE_ANY(datum);
603 
604 	return MAXALIGN(size);
605 }
606 
607 /*
608  * Copy the given non-null datum to *target
609  */
610 static void
memcpyDatum(void * target,SpGistTypeDesc * att,Datum datum)611 memcpyDatum(void *target, SpGistTypeDesc *att, Datum datum)
612 {
613 	unsigned int size;
614 
615 	if (att->attbyval)
616 	{
617 		memcpy(target, &datum, sizeof(Datum));
618 	}
619 	else
620 	{
621 		size = (att->attlen > 0) ? att->attlen : VARSIZE_ANY(datum);
622 		memcpy(target, DatumGetPointer(datum), size);
623 	}
624 }
625 
626 /*
627  * Construct a leaf tuple containing the given heap TID and datum value
628  */
629 SpGistLeafTuple
spgFormLeafTuple(SpGistState * state,ItemPointer heapPtr,Datum datum,bool isnull)630 spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
631 				 Datum datum, bool isnull)
632 {
633 	SpGistLeafTuple tup;
634 	unsigned int size;
635 
636 	/* compute space needed (note result is already maxaligned) */
637 	size = SGLTHDRSZ;
638 	if (!isnull)
639 		size += SpGistGetTypeSize(&state->attLeafType, datum);
640 
641 	/*
642 	 * Ensure that we can replace the tuple with a dead tuple later.  This
643 	 * test is unnecessary when !isnull, but let's be safe.
644 	 */
645 	if (size < SGDTSIZE)
646 		size = SGDTSIZE;
647 
648 	/* OK, form the tuple */
649 	tup = (SpGistLeafTuple) palloc0(size);
650 
651 	tup->size = size;
652 	tup->nextOffset = InvalidOffsetNumber;
653 	tup->heapPtr = *heapPtr;
654 	if (!isnull)
655 		memcpyDatum(SGLTDATAPTR(tup), &state->attLeafType, datum);
656 
657 	return tup;
658 }
659 
660 /*
661  * Construct a node (to go into an inner tuple) containing the given label
662  *
663  * Note that the node's downlink is just set invalid here.  Caller will fill
664  * it in later.
665  */
666 SpGistNodeTuple
spgFormNodeTuple(SpGistState * state,Datum label,bool isnull)667 spgFormNodeTuple(SpGistState *state, Datum label, bool isnull)
668 {
669 	SpGistNodeTuple tup;
670 	unsigned int size;
671 	unsigned short infomask = 0;
672 
673 	/* compute space needed (note result is already maxaligned) */
674 	size = SGNTHDRSZ;
675 	if (!isnull)
676 		size += SpGistGetTypeSize(&state->attLabelType, label);
677 
678 	/*
679 	 * Here we make sure that the size will fit in the field reserved for it
680 	 * in t_info.
681 	 */
682 	if ((size & INDEX_SIZE_MASK) != size)
683 		ereport(ERROR,
684 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
685 				 errmsg("index row requires %zu bytes, maximum size is %zu",
686 						(Size) size, (Size) INDEX_SIZE_MASK)));
687 
688 	tup = (SpGistNodeTuple) palloc0(size);
689 
690 	if (isnull)
691 		infomask |= INDEX_NULL_MASK;
692 	/* we don't bother setting the INDEX_VAR_MASK bit */
693 	infomask |= size;
694 	tup->t_info = infomask;
695 
696 	/* The TID field will be filled in later */
697 	ItemPointerSetInvalid(&tup->t_tid);
698 
699 	if (!isnull)
700 		memcpyDatum(SGNTDATAPTR(tup), &state->attLabelType, label);
701 
702 	return tup;
703 }
704 
705 /*
706  * Construct an inner tuple containing the given prefix and node array
707  */
708 SpGistInnerTuple
spgFormInnerTuple(SpGistState * state,bool hasPrefix,Datum prefix,int nNodes,SpGistNodeTuple * nodes)709 spgFormInnerTuple(SpGistState *state, bool hasPrefix, Datum prefix,
710 				  int nNodes, SpGistNodeTuple *nodes)
711 {
712 	SpGistInnerTuple tup;
713 	unsigned int size;
714 	unsigned int prefixSize;
715 	int			i;
716 	char	   *ptr;
717 
718 	/* Compute size needed */
719 	if (hasPrefix)
720 		prefixSize = SpGistGetTypeSize(&state->attPrefixType, prefix);
721 	else
722 		prefixSize = 0;
723 
724 	size = SGITHDRSZ + prefixSize;
725 
726 	/* Note: we rely on node tuple sizes to be maxaligned already */
727 	for (i = 0; i < nNodes; i++)
728 		size += IndexTupleSize(nodes[i]);
729 
730 	/*
731 	 * Ensure that we can replace the tuple with a dead tuple later.  This
732 	 * test is unnecessary given current tuple layouts, but let's be safe.
733 	 */
734 	if (size < SGDTSIZE)
735 		size = SGDTSIZE;
736 
737 	/*
738 	 * Inner tuple should be small enough to fit on a page
739 	 */
740 	if (size > SPGIST_PAGE_CAPACITY - sizeof(ItemIdData))
741 		ereport(ERROR,
742 				(errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
743 				 errmsg("SP-GiST inner tuple size %zu exceeds maximum %zu",
744 						(Size) size,
745 						SPGIST_PAGE_CAPACITY - sizeof(ItemIdData)),
746 				 errhint("Values larger than a buffer page cannot be indexed.")));
747 
748 	/*
749 	 * Check for overflow of header fields --- probably can't fail if the
750 	 * above succeeded, but let's be paranoid
751 	 */
752 	if (size > SGITMAXSIZE ||
753 		prefixSize > SGITMAXPREFIXSIZE ||
754 		nNodes > SGITMAXNNODES)
755 		elog(ERROR, "SPGiST inner tuple header field is too small");
756 
757 	/* OK, form the tuple */
758 	tup = (SpGistInnerTuple) palloc0(size);
759 
760 	tup->nNodes = nNodes;
761 	tup->prefixSize = prefixSize;
762 	tup->size = size;
763 
764 	if (hasPrefix)
765 		memcpyDatum(SGITDATAPTR(tup), &state->attPrefixType, prefix);
766 
767 	ptr = (char *) SGITNODEPTR(tup);
768 
769 	for (i = 0; i < nNodes; i++)
770 	{
771 		SpGistNodeTuple node = nodes[i];
772 
773 		memcpy(ptr, node, IndexTupleSize(node));
774 		ptr += IndexTupleSize(node);
775 	}
776 
777 	return tup;
778 }
779 
780 /*
781  * Construct a "dead" tuple to replace a tuple being deleted.
782  *
783  * The state can be SPGIST_REDIRECT, SPGIST_DEAD, or SPGIST_PLACEHOLDER.
784  * For a REDIRECT tuple, a pointer (blkno+offset) must be supplied, and
785  * the xid field is filled in automatically.
786  *
787  * This is called in critical sections, so we don't use palloc; the tuple
788  * is built in preallocated storage.  It should be copied before another
789  * call with different parameters can occur.
790  */
791 SpGistDeadTuple
spgFormDeadTuple(SpGistState * state,int tupstate,BlockNumber blkno,OffsetNumber offnum)792 spgFormDeadTuple(SpGistState *state, int tupstate,
793 				 BlockNumber blkno, OffsetNumber offnum)
794 {
795 	SpGistDeadTuple tuple = (SpGistDeadTuple) state->deadTupleStorage;
796 
797 	tuple->tupstate = tupstate;
798 	tuple->size = SGDTSIZE;
799 	tuple->nextOffset = InvalidOffsetNumber;
800 
801 	if (tupstate == SPGIST_REDIRECT)
802 	{
803 		ItemPointerSet(&tuple->pointer, blkno, offnum);
804 		Assert(TransactionIdIsValid(state->myXid));
805 		tuple->xid = state->myXid;
806 	}
807 	else
808 	{
809 		ItemPointerSetInvalid(&tuple->pointer);
810 		tuple->xid = InvalidTransactionId;
811 	}
812 
813 	return tuple;
814 }
815 
816 /*
817  * Extract the label datums of the nodes within innerTuple
818  *
819  * Returns NULL if label datums are NULLs
820  */
821 Datum *
spgExtractNodeLabels(SpGistState * state,SpGistInnerTuple innerTuple)822 spgExtractNodeLabels(SpGistState *state, SpGistInnerTuple innerTuple)
823 {
824 	Datum	   *nodeLabels;
825 	int			i;
826 	SpGistNodeTuple node;
827 
828 	/* Either all the labels must be NULL, or none. */
829 	node = SGITNODEPTR(innerTuple);
830 	if (IndexTupleHasNulls(node))
831 	{
832 		SGITITERATE(innerTuple, i, node)
833 		{
834 			if (!IndexTupleHasNulls(node))
835 				elog(ERROR, "some but not all node labels are null in SPGiST inner tuple");
836 		}
837 		/* They're all null, so just return NULL */
838 		return NULL;
839 	}
840 	else
841 	{
842 		nodeLabels = (Datum *) palloc(sizeof(Datum) * innerTuple->nNodes);
843 		SGITITERATE(innerTuple, i, node)
844 		{
845 			if (IndexTupleHasNulls(node))
846 				elog(ERROR, "some but not all node labels are null in SPGiST inner tuple");
847 			nodeLabels[i] = SGNTDATUM(node, state);
848 		}
849 		return nodeLabels;
850 	}
851 }
852 
853 /*
854  * Add a new item to the page, replacing a PLACEHOLDER item if possible.
855  * Return the location it's inserted at, or InvalidOffsetNumber on failure.
856  *
857  * If startOffset isn't NULL, we start searching for placeholders at
858  * *startOffset, and update that to the next place to search.  This is just
859  * an optimization for repeated insertions.
860  *
861  * If errorOK is false, we throw error when there's not enough room,
862  * rather than returning InvalidOffsetNumber.
863  */
864 OffsetNumber
SpGistPageAddNewItem(SpGistState * state,Page page,Item item,Size size,OffsetNumber * startOffset,bool errorOK)865 SpGistPageAddNewItem(SpGistState *state, Page page, Item item, Size size,
866 					 OffsetNumber *startOffset, bool errorOK)
867 {
868 	SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
869 	OffsetNumber i,
870 				maxoff,
871 				offnum;
872 
873 	if (opaque->nPlaceholder > 0 &&
874 		PageGetExactFreeSpace(page) + SGDTSIZE >= MAXALIGN(size))
875 	{
876 		/* Try to replace a placeholder */
877 		maxoff = PageGetMaxOffsetNumber(page);
878 		offnum = InvalidOffsetNumber;
879 
880 		for (;;)
881 		{
882 			if (startOffset && *startOffset != InvalidOffsetNumber)
883 				i = *startOffset;
884 			else
885 				i = FirstOffsetNumber;
886 			for (; i <= maxoff; i++)
887 			{
888 				SpGistDeadTuple it = (SpGistDeadTuple) PageGetItem(page,
889 																   PageGetItemId(page, i));
890 
891 				if (it->tupstate == SPGIST_PLACEHOLDER)
892 				{
893 					offnum = i;
894 					break;
895 				}
896 			}
897 
898 			/* Done if we found a placeholder */
899 			if (offnum != InvalidOffsetNumber)
900 				break;
901 
902 			if (startOffset && *startOffset != InvalidOffsetNumber)
903 			{
904 				/* Hint was no good, re-search from beginning */
905 				*startOffset = InvalidOffsetNumber;
906 				continue;
907 			}
908 
909 			/* Hmm, no placeholder found? */
910 			opaque->nPlaceholder = 0;
911 			break;
912 		}
913 
914 		if (offnum != InvalidOffsetNumber)
915 		{
916 			/* Replace the placeholder tuple */
917 			PageIndexTupleDelete(page, offnum);
918 
919 			offnum = PageAddItem(page, item, size, offnum, false, false);
920 
921 			/*
922 			 * We should not have failed given the size check at the top of
923 			 * the function, but test anyway.  If we did fail, we must PANIC
924 			 * because we've already deleted the placeholder tuple, and
925 			 * there's no other way to keep the damage from getting to disk.
926 			 */
927 			if (offnum != InvalidOffsetNumber)
928 			{
929 				Assert(opaque->nPlaceholder > 0);
930 				opaque->nPlaceholder--;
931 				if (startOffset)
932 					*startOffset = offnum + 1;
933 			}
934 			else
935 				elog(PANIC, "failed to add item of size %u to SPGiST index page",
936 					 (int) size);
937 
938 			return offnum;
939 		}
940 	}
941 
942 	/* No luck in replacing a placeholder, so just add it to the page */
943 	offnum = PageAddItem(page, item, size,
944 						 InvalidOffsetNumber, false, false);
945 
946 	if (offnum == InvalidOffsetNumber && !errorOK)
947 		elog(ERROR, "failed to add item of size %u to SPGiST index page",
948 			 (int) size);
949 
950 	return offnum;
951 }
952