1 /*-------------------------------------------------------------------------
2 *
3 * spgutils.c
4 * various support functions for SP-GiST
5 *
6 *
7 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/access/spgist/spgutils.c
12 *
13 *-------------------------------------------------------------------------
14 */
15
16 #include "postgres.h"
17
18 #include "access/reloptions.h"
19 #include "access/spgist_private.h"
20 #include "access/transam.h"
21 #include "access/xact.h"
22 #include "storage/bufmgr.h"
23 #include "storage/indexfsm.h"
24 #include "storage/lmgr.h"
25 #include "utils/builtins.h"
26 #include "utils/index_selfuncs.h"
27 #include "utils/lsyscache.h"
28
29
30 /*
31 * SP-GiST handler function: return IndexAmRoutine with access method parameters
32 * and callbacks.
33 */
34 Datum
spghandler(PG_FUNCTION_ARGS)35 spghandler(PG_FUNCTION_ARGS)
36 {
37 IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
38
39 amroutine->amstrategies = 0;
40 amroutine->amsupport = SPGISTNProc;
41 amroutine->amcanorder = false;
42 amroutine->amcanorderbyop = false;
43 amroutine->amcanbackward = false;
44 amroutine->amcanunique = false;
45 amroutine->amcanmulticol = false;
46 amroutine->amoptionalkey = true;
47 amroutine->amsearcharray = false;
48 amroutine->amsearchnulls = true;
49 amroutine->amstorage = false;
50 amroutine->amclusterable = false;
51 amroutine->ampredlocks = false;
52 amroutine->amcanparallel = false;
53 amroutine->amcaninclude = false;
54 amroutine->amkeytype = InvalidOid;
55
56 amroutine->ambuild = spgbuild;
57 amroutine->ambuildempty = spgbuildempty;
58 amroutine->aminsert = spginsert;
59 amroutine->ambulkdelete = spgbulkdelete;
60 amroutine->amvacuumcleanup = spgvacuumcleanup;
61 amroutine->amcanreturn = spgcanreturn;
62 amroutine->amcostestimate = spgcostestimate;
63 amroutine->amoptions = spgoptions;
64 amroutine->amproperty = NULL;
65 amroutine->amvalidate = spgvalidate;
66 amroutine->ambeginscan = spgbeginscan;
67 amroutine->amrescan = spgrescan;
68 amroutine->amgettuple = spggettuple;
69 amroutine->amgetbitmap = spggetbitmap;
70 amroutine->amendscan = spgendscan;
71 amroutine->ammarkpos = NULL;
72 amroutine->amrestrpos = NULL;
73 amroutine->amestimateparallelscan = NULL;
74 amroutine->aminitparallelscan = NULL;
75 amroutine->amparallelrescan = NULL;
76
77 PG_RETURN_POINTER(amroutine);
78 }
79
80 /* Fill in a SpGistTypeDesc struct with info about the specified data type */
81 static void
fillTypeDesc(SpGistTypeDesc * desc,Oid type)82 fillTypeDesc(SpGistTypeDesc *desc, Oid type)
83 {
84 desc->type = type;
85 get_typlenbyval(type, &desc->attlen, &desc->attbyval);
86 }
87
88 /*
89 * Fetch local cache of AM-specific info about the index, initializing it
90 * if necessary
91 */
92 SpGistCache *
spgGetCache(Relation index)93 spgGetCache(Relation index)
94 {
95 SpGistCache *cache;
96
97 if (index->rd_amcache == NULL)
98 {
99 Oid atttype;
100 spgConfigIn in;
101 FmgrInfo *procinfo;
102 Buffer metabuffer;
103 SpGistMetaPageData *metadata;
104
105 cache = MemoryContextAllocZero(index->rd_indexcxt,
106 sizeof(SpGistCache));
107
108 /* SPGiST doesn't support multi-column indexes */
109 Assert(index->rd_att->natts == 1);
110
111 /*
112 * Get the actual data type of the indexed column from the index
113 * tupdesc. We pass this to the opclass config function so that
114 * polymorphic opclasses are possible.
115 */
116 atttype = TupleDescAttr(index->rd_att, 0)->atttypid;
117
118 /* Call the config function to get config info for the opclass */
119 in.attType = atttype;
120
121 procinfo = index_getprocinfo(index, 1, SPGIST_CONFIG_PROC);
122 FunctionCall2Coll(procinfo,
123 index->rd_indcollation[0],
124 PointerGetDatum(&in),
125 PointerGetDatum(&cache->config));
126
127 /* Get the information we need about each relevant datatype */
128 fillTypeDesc(&cache->attType, atttype);
129
130 if (OidIsValid(cache->config.leafType) &&
131 cache->config.leafType != atttype)
132 {
133 if (!OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC)))
134 ereport(ERROR,
135 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
136 errmsg("compress method must be defined when leaf type is different from input type")));
137
138 fillTypeDesc(&cache->attLeafType, cache->config.leafType);
139 }
140 else
141 {
142 cache->attLeafType = cache->attType;
143 }
144
145 fillTypeDesc(&cache->attPrefixType, cache->config.prefixType);
146 fillTypeDesc(&cache->attLabelType, cache->config.labelType);
147
148 /* Last, get the lastUsedPages data from the metapage */
149 metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO);
150 LockBuffer(metabuffer, BUFFER_LOCK_SHARE);
151
152 metadata = SpGistPageGetMeta(BufferGetPage(metabuffer));
153
154 if (metadata->magicNumber != SPGIST_MAGIC_NUMBER)
155 elog(ERROR, "index \"%s\" is not an SP-GiST index",
156 RelationGetRelationName(index));
157
158 cache->lastUsedPages = metadata->lastUsedPages;
159
160 UnlockReleaseBuffer(metabuffer);
161
162 index->rd_amcache = (void *) cache;
163 }
164 else
165 {
166 /* assume it's up to date */
167 cache = (SpGistCache *) index->rd_amcache;
168 }
169
170 return cache;
171 }
172
173 /* Initialize SpGistState for working with the given index */
174 void
initSpGistState(SpGistState * state,Relation index)175 initSpGistState(SpGistState *state, Relation index)
176 {
177 SpGistCache *cache;
178
179 /* Get cached static information about index */
180 cache = spgGetCache(index);
181
182 state->config = cache->config;
183 state->attType = cache->attType;
184 state->attLeafType = cache->attLeafType;
185 state->attPrefixType = cache->attPrefixType;
186 state->attLabelType = cache->attLabelType;
187
188 /* Make workspace for constructing dead tuples */
189 state->deadTupleStorage = palloc0(SGDTSIZE);
190
191 /* Set XID to use in redirection tuples */
192 state->myXid = GetTopTransactionIdIfAny();
193
194 /* Assume we're not in an index build (spgbuild will override) */
195 state->isBuild = false;
196 }
197
198 /*
199 * Allocate a new page (either by recycling, or by extending the index file).
200 *
201 * The returned buffer is already pinned and exclusive-locked.
202 * Caller is responsible for initializing the page by calling SpGistInitBuffer.
203 */
204 Buffer
SpGistNewBuffer(Relation index)205 SpGistNewBuffer(Relation index)
206 {
207 Buffer buffer;
208 bool needLock;
209
210 /* First, try to get a page from FSM */
211 for (;;)
212 {
213 BlockNumber blkno = GetFreeIndexPage(index);
214
215 if (blkno == InvalidBlockNumber)
216 break; /* nothing known to FSM */
217
218 /*
219 * The fixed pages shouldn't ever be listed in FSM, but just in case
220 * one is, ignore it.
221 */
222 if (SpGistBlockIsFixed(blkno))
223 continue;
224
225 buffer = ReadBuffer(index, blkno);
226
227 /*
228 * We have to guard against the possibility that someone else already
229 * recycled this page; the buffer may be locked if so.
230 */
231 if (ConditionalLockBuffer(buffer))
232 {
233 Page page = BufferGetPage(buffer);
234
235 if (PageIsNew(page))
236 return buffer; /* OK to use, if never initialized */
237
238 if (SpGistPageIsDeleted(page) || PageIsEmpty(page))
239 return buffer; /* OK to use */
240
241 LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
242 }
243
244 /* Can't use it, so release buffer and try again */
245 ReleaseBuffer(buffer);
246 }
247
248 /* Must extend the file */
249 needLock = !RELATION_IS_LOCAL(index);
250 if (needLock)
251 LockRelationForExtension(index, ExclusiveLock);
252
253 buffer = ReadBuffer(index, P_NEW);
254 LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
255
256 if (needLock)
257 UnlockRelationForExtension(index, ExclusiveLock);
258
259 return buffer;
260 }
261
262 /*
263 * Update index metapage's lastUsedPages info from local cache, if possible
264 *
265 * Updating meta page isn't critical for index working, so
266 * 1 use ConditionalLockBuffer to improve concurrency
267 * 2 don't WAL-log metabuffer changes to decrease WAL traffic
268 */
269 void
SpGistUpdateMetaPage(Relation index)270 SpGistUpdateMetaPage(Relation index)
271 {
272 SpGistCache *cache = (SpGistCache *) index->rd_amcache;
273
274 if (cache != NULL)
275 {
276 Buffer metabuffer;
277
278 metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO);
279
280 if (ConditionalLockBuffer(metabuffer))
281 {
282 Page metapage = BufferGetPage(metabuffer);
283 SpGistMetaPageData *metadata = SpGistPageGetMeta(metapage);
284
285 metadata->lastUsedPages = cache->lastUsedPages;
286
287 /*
288 * Set pd_lower just past the end of the metadata. This is
289 * essential, because without doing so, metadata will be lost if
290 * xlog.c compresses the page. (We must do this here because
291 * pre-v11 versions of PG did not set the metapage's pd_lower
292 * correctly, so a pg_upgraded index might contain the wrong
293 * value.)
294 */
295 ((PageHeader) metapage)->pd_lower =
296 ((char *) metadata + sizeof(SpGistMetaPageData)) - (char *) metapage;
297
298 MarkBufferDirty(metabuffer);
299 UnlockReleaseBuffer(metabuffer);
300 }
301 else
302 {
303 ReleaseBuffer(metabuffer);
304 }
305 }
306 }
307
308 /* Macro to select proper element of lastUsedPages cache depending on flags */
309 /* Masking flags with SPGIST_CACHED_PAGES is just for paranoia's sake */
310 #define GET_LUP(c, f) (&(c)->lastUsedPages.cachedPage[((unsigned int) (f)) % SPGIST_CACHED_PAGES])
311
312 /*
313 * Allocate and initialize a new buffer of the type and parity specified by
314 * flags. The returned buffer is already pinned and exclusive-locked.
315 *
316 * When requesting an inner page, if we get one with the wrong parity,
317 * we just release the buffer and try again. We will get a different page
318 * because GetFreeIndexPage will have marked the page used in FSM. The page
319 * is entered in our local lastUsedPages cache, so there's some hope of
320 * making use of it later in this session, but otherwise we rely on VACUUM
321 * to eventually re-enter the page in FSM, making it available for recycling.
322 * Note that such a page does not get marked dirty here, so unless it's used
323 * fairly soon, the buffer will just get discarded and the page will remain
324 * as it was on disk.
325 *
326 * When we return a buffer to the caller, the page is *not* entered into
327 * the lastUsedPages cache; we expect the caller will do so after it's taken
328 * whatever space it will use. This is because after the caller has used up
329 * some space, the page might have less space than whatever was cached already
330 * so we'd rather not trash the old cache entry.
331 */
332 static Buffer
allocNewBuffer(Relation index,int flags)333 allocNewBuffer(Relation index, int flags)
334 {
335 SpGistCache *cache = spgGetCache(index);
336 uint16 pageflags = 0;
337
338 if (GBUF_REQ_LEAF(flags))
339 pageflags |= SPGIST_LEAF;
340 if (GBUF_REQ_NULLS(flags))
341 pageflags |= SPGIST_NULLS;
342
343 for (;;)
344 {
345 Buffer buffer;
346
347 buffer = SpGistNewBuffer(index);
348 SpGistInitBuffer(buffer, pageflags);
349
350 if (pageflags & SPGIST_LEAF)
351 {
352 /* Leaf pages have no parity concerns, so just use it */
353 return buffer;
354 }
355 else
356 {
357 BlockNumber blkno = BufferGetBlockNumber(buffer);
358 int blkFlags = GBUF_INNER_PARITY(blkno);
359
360 if ((flags & GBUF_PARITY_MASK) == blkFlags)
361 {
362 /* Page has right parity, use it */
363 return buffer;
364 }
365 else
366 {
367 /* Page has wrong parity, record it in cache and try again */
368 if (pageflags & SPGIST_NULLS)
369 blkFlags |= GBUF_NULLS;
370 cache->lastUsedPages.cachedPage[blkFlags].blkno = blkno;
371 cache->lastUsedPages.cachedPage[blkFlags].freeSpace =
372 PageGetExactFreeSpace(BufferGetPage(buffer));
373 UnlockReleaseBuffer(buffer);
374 }
375 }
376 }
377 }
378
379 /*
380 * Get a buffer of the type and parity specified by flags, having at least
381 * as much free space as indicated by needSpace. We use the lastUsedPages
382 * cache to assign the same buffer previously requested when possible.
383 * The returned buffer is already pinned and exclusive-locked.
384 *
385 * *isNew is set true if the page was initialized here, false if it was
386 * already valid.
387 */
388 Buffer
SpGistGetBuffer(Relation index,int flags,int needSpace,bool * isNew)389 SpGistGetBuffer(Relation index, int flags, int needSpace, bool *isNew)
390 {
391 SpGistCache *cache = spgGetCache(index);
392 SpGistLastUsedPage *lup;
393
394 /* Bail out if even an empty page wouldn't meet the demand */
395 if (needSpace > SPGIST_PAGE_CAPACITY)
396 elog(ERROR, "desired SPGiST tuple size is too big");
397
398 /*
399 * If possible, increase the space request to include relation's
400 * fillfactor. This ensures that when we add unrelated tuples to a page,
401 * we try to keep 100-fillfactor% available for adding tuples that are
402 * related to the ones already on it. But fillfactor mustn't cause an
403 * error for requests that would otherwise be legal.
404 */
405 needSpace += RelationGetTargetPageFreeSpace(index,
406 SPGIST_DEFAULT_FILLFACTOR);
407 needSpace = Min(needSpace, SPGIST_PAGE_CAPACITY);
408
409 /* Get the cache entry for this flags setting */
410 lup = GET_LUP(cache, flags);
411
412 /* If we have nothing cached, just turn it over to allocNewBuffer */
413 if (lup->blkno == InvalidBlockNumber)
414 {
415 *isNew = true;
416 return allocNewBuffer(index, flags);
417 }
418
419 /* fixed pages should never be in cache */
420 Assert(!SpGistBlockIsFixed(lup->blkno));
421
422 /* If cached freeSpace isn't enough, don't bother looking at the page */
423 if (lup->freeSpace >= needSpace)
424 {
425 Buffer buffer;
426 Page page;
427
428 buffer = ReadBuffer(index, lup->blkno);
429
430 if (!ConditionalLockBuffer(buffer))
431 {
432 /*
433 * buffer is locked by another process, so return a new buffer
434 */
435 ReleaseBuffer(buffer);
436 *isNew = true;
437 return allocNewBuffer(index, flags);
438 }
439
440 page = BufferGetPage(buffer);
441
442 if (PageIsNew(page) || SpGistPageIsDeleted(page) || PageIsEmpty(page))
443 {
444 /* OK to initialize the page */
445 uint16 pageflags = 0;
446
447 if (GBUF_REQ_LEAF(flags))
448 pageflags |= SPGIST_LEAF;
449 if (GBUF_REQ_NULLS(flags))
450 pageflags |= SPGIST_NULLS;
451 SpGistInitBuffer(buffer, pageflags);
452 lup->freeSpace = PageGetExactFreeSpace(page) - needSpace;
453 *isNew = true;
454 return buffer;
455 }
456
457 /*
458 * Check that page is of right type and has enough space. We must
459 * recheck this since our cache isn't necessarily up to date.
460 */
461 if ((GBUF_REQ_LEAF(flags) ? SpGistPageIsLeaf(page) : !SpGistPageIsLeaf(page)) &&
462 (GBUF_REQ_NULLS(flags) ? SpGistPageStoresNulls(page) : !SpGistPageStoresNulls(page)))
463 {
464 int freeSpace = PageGetExactFreeSpace(page);
465
466 if (freeSpace >= needSpace)
467 {
468 /* Success, update freespace info and return the buffer */
469 lup->freeSpace = freeSpace - needSpace;
470 *isNew = false;
471 return buffer;
472 }
473 }
474
475 /*
476 * fallback to allocation of new buffer
477 */
478 UnlockReleaseBuffer(buffer);
479 }
480
481 /* No success with cache, so return a new buffer */
482 *isNew = true;
483 return allocNewBuffer(index, flags);
484 }
485
486 /*
487 * Update lastUsedPages cache when done modifying a page.
488 *
489 * We update the appropriate cache entry if it already contained this page
490 * (its freeSpace is likely obsolete), or if this page has more space than
491 * whatever we had cached.
492 */
493 void
SpGistSetLastUsedPage(Relation index,Buffer buffer)494 SpGistSetLastUsedPage(Relation index, Buffer buffer)
495 {
496 SpGistCache *cache = spgGetCache(index);
497 SpGistLastUsedPage *lup;
498 int freeSpace;
499 Page page = BufferGetPage(buffer);
500 BlockNumber blkno = BufferGetBlockNumber(buffer);
501 int flags;
502
503 /* Never enter fixed pages (root pages) in cache, though */
504 if (SpGistBlockIsFixed(blkno))
505 return;
506
507 if (SpGistPageIsLeaf(page))
508 flags = GBUF_LEAF;
509 else
510 flags = GBUF_INNER_PARITY(blkno);
511 if (SpGistPageStoresNulls(page))
512 flags |= GBUF_NULLS;
513
514 lup = GET_LUP(cache, flags);
515
516 freeSpace = PageGetExactFreeSpace(page);
517 if (lup->blkno == InvalidBlockNumber || lup->blkno == blkno ||
518 lup->freeSpace < freeSpace)
519 {
520 lup->blkno = blkno;
521 lup->freeSpace = freeSpace;
522 }
523 }
524
525 /*
526 * Initialize an SPGiST page to empty, with specified flags
527 */
528 void
SpGistInitPage(Page page,uint16 f)529 SpGistInitPage(Page page, uint16 f)
530 {
531 SpGistPageOpaque opaque;
532
533 PageInit(page, BLCKSZ, MAXALIGN(sizeof(SpGistPageOpaqueData)));
534 opaque = SpGistPageGetOpaque(page);
535 memset(opaque, 0, sizeof(SpGistPageOpaqueData));
536 opaque->flags = f;
537 opaque->spgist_page_id = SPGIST_PAGE_ID;
538 }
539
540 /*
541 * Initialize a buffer's page to empty, with specified flags
542 */
543 void
SpGistInitBuffer(Buffer b,uint16 f)544 SpGistInitBuffer(Buffer b, uint16 f)
545 {
546 Assert(BufferGetPageSize(b) == BLCKSZ);
547 SpGistInitPage(BufferGetPage(b), f);
548 }
549
550 /*
551 * Initialize metadata page
552 */
553 void
SpGistInitMetapage(Page page)554 SpGistInitMetapage(Page page)
555 {
556 SpGistMetaPageData *metadata;
557 int i;
558
559 SpGistInitPage(page, SPGIST_META);
560 metadata = SpGistPageGetMeta(page);
561 memset(metadata, 0, sizeof(SpGistMetaPageData));
562 metadata->magicNumber = SPGIST_MAGIC_NUMBER;
563
564 /* initialize last-used-page cache to empty */
565 for (i = 0; i < SPGIST_CACHED_PAGES; i++)
566 metadata->lastUsedPages.cachedPage[i].blkno = InvalidBlockNumber;
567
568 /*
569 * Set pd_lower just past the end of the metadata. This is essential,
570 * because without doing so, metadata will be lost if xlog.c compresses
571 * the page.
572 */
573 ((PageHeader) page)->pd_lower =
574 ((char *) metadata + sizeof(SpGistMetaPageData)) - (char *) page;
575 }
576
577 /*
578 * reloptions processing for SPGiST
579 */
580 bytea *
spgoptions(Datum reloptions,bool validate)581 spgoptions(Datum reloptions, bool validate)
582 {
583 return default_reloptions(reloptions, validate, RELOPT_KIND_SPGIST);
584 }
585
586 /*
587 * Get the space needed to store a non-null datum of the indicated type.
588 * Note the result is already rounded up to a MAXALIGN boundary.
589 * Also, we follow the SPGiST convention that pass-by-val types are
590 * just stored in their Datum representation (compare memcpyDatum).
591 */
592 unsigned int
SpGistGetTypeSize(SpGistTypeDesc * att,Datum datum)593 SpGistGetTypeSize(SpGistTypeDesc *att, Datum datum)
594 {
595 unsigned int size;
596
597 if (att->attbyval)
598 size = sizeof(Datum);
599 else if (att->attlen > 0)
600 size = att->attlen;
601 else
602 size = VARSIZE_ANY(datum);
603
604 return MAXALIGN(size);
605 }
606
607 /*
608 * Copy the given non-null datum to *target
609 */
610 static void
memcpyDatum(void * target,SpGistTypeDesc * att,Datum datum)611 memcpyDatum(void *target, SpGistTypeDesc *att, Datum datum)
612 {
613 unsigned int size;
614
615 if (att->attbyval)
616 {
617 memcpy(target, &datum, sizeof(Datum));
618 }
619 else
620 {
621 size = (att->attlen > 0) ? att->attlen : VARSIZE_ANY(datum);
622 memcpy(target, DatumGetPointer(datum), size);
623 }
624 }
625
626 /*
627 * Construct a leaf tuple containing the given heap TID and datum value
628 */
629 SpGistLeafTuple
spgFormLeafTuple(SpGistState * state,ItemPointer heapPtr,Datum datum,bool isnull)630 spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
631 Datum datum, bool isnull)
632 {
633 SpGistLeafTuple tup;
634 unsigned int size;
635
636 /* compute space needed (note result is already maxaligned) */
637 size = SGLTHDRSZ;
638 if (!isnull)
639 size += SpGistGetTypeSize(&state->attLeafType, datum);
640
641 /*
642 * Ensure that we can replace the tuple with a dead tuple later. This
643 * test is unnecessary when !isnull, but let's be safe.
644 */
645 if (size < SGDTSIZE)
646 size = SGDTSIZE;
647
648 /* OK, form the tuple */
649 tup = (SpGistLeafTuple) palloc0(size);
650
651 tup->size = size;
652 tup->nextOffset = InvalidOffsetNumber;
653 tup->heapPtr = *heapPtr;
654 if (!isnull)
655 memcpyDatum(SGLTDATAPTR(tup), &state->attLeafType, datum);
656
657 return tup;
658 }
659
660 /*
661 * Construct a node (to go into an inner tuple) containing the given label
662 *
663 * Note that the node's downlink is just set invalid here. Caller will fill
664 * it in later.
665 */
666 SpGistNodeTuple
spgFormNodeTuple(SpGistState * state,Datum label,bool isnull)667 spgFormNodeTuple(SpGistState *state, Datum label, bool isnull)
668 {
669 SpGistNodeTuple tup;
670 unsigned int size;
671 unsigned short infomask = 0;
672
673 /* compute space needed (note result is already maxaligned) */
674 size = SGNTHDRSZ;
675 if (!isnull)
676 size += SpGistGetTypeSize(&state->attLabelType, label);
677
678 /*
679 * Here we make sure that the size will fit in the field reserved for it
680 * in t_info.
681 */
682 if ((size & INDEX_SIZE_MASK) != size)
683 ereport(ERROR,
684 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
685 errmsg("index row requires %zu bytes, maximum size is %zu",
686 (Size) size, (Size) INDEX_SIZE_MASK)));
687
688 tup = (SpGistNodeTuple) palloc0(size);
689
690 if (isnull)
691 infomask |= INDEX_NULL_MASK;
692 /* we don't bother setting the INDEX_VAR_MASK bit */
693 infomask |= size;
694 tup->t_info = infomask;
695
696 /* The TID field will be filled in later */
697 ItemPointerSetInvalid(&tup->t_tid);
698
699 if (!isnull)
700 memcpyDatum(SGNTDATAPTR(tup), &state->attLabelType, label);
701
702 return tup;
703 }
704
705 /*
706 * Construct an inner tuple containing the given prefix and node array
707 */
708 SpGistInnerTuple
spgFormInnerTuple(SpGistState * state,bool hasPrefix,Datum prefix,int nNodes,SpGistNodeTuple * nodes)709 spgFormInnerTuple(SpGistState *state, bool hasPrefix, Datum prefix,
710 int nNodes, SpGistNodeTuple *nodes)
711 {
712 SpGistInnerTuple tup;
713 unsigned int size;
714 unsigned int prefixSize;
715 int i;
716 char *ptr;
717
718 /* Compute size needed */
719 if (hasPrefix)
720 prefixSize = SpGistGetTypeSize(&state->attPrefixType, prefix);
721 else
722 prefixSize = 0;
723
724 size = SGITHDRSZ + prefixSize;
725
726 /* Note: we rely on node tuple sizes to be maxaligned already */
727 for (i = 0; i < nNodes; i++)
728 size += IndexTupleSize(nodes[i]);
729
730 /*
731 * Ensure that we can replace the tuple with a dead tuple later. This
732 * test is unnecessary given current tuple layouts, but let's be safe.
733 */
734 if (size < SGDTSIZE)
735 size = SGDTSIZE;
736
737 /*
738 * Inner tuple should be small enough to fit on a page
739 */
740 if (size > SPGIST_PAGE_CAPACITY - sizeof(ItemIdData))
741 ereport(ERROR,
742 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
743 errmsg("SP-GiST inner tuple size %zu exceeds maximum %zu",
744 (Size) size,
745 SPGIST_PAGE_CAPACITY - sizeof(ItemIdData)),
746 errhint("Values larger than a buffer page cannot be indexed.")));
747
748 /*
749 * Check for overflow of header fields --- probably can't fail if the
750 * above succeeded, but let's be paranoid
751 */
752 if (size > SGITMAXSIZE ||
753 prefixSize > SGITMAXPREFIXSIZE ||
754 nNodes > SGITMAXNNODES)
755 elog(ERROR, "SPGiST inner tuple header field is too small");
756
757 /* OK, form the tuple */
758 tup = (SpGistInnerTuple) palloc0(size);
759
760 tup->nNodes = nNodes;
761 tup->prefixSize = prefixSize;
762 tup->size = size;
763
764 if (hasPrefix)
765 memcpyDatum(SGITDATAPTR(tup), &state->attPrefixType, prefix);
766
767 ptr = (char *) SGITNODEPTR(tup);
768
769 for (i = 0; i < nNodes; i++)
770 {
771 SpGistNodeTuple node = nodes[i];
772
773 memcpy(ptr, node, IndexTupleSize(node));
774 ptr += IndexTupleSize(node);
775 }
776
777 return tup;
778 }
779
780 /*
781 * Construct a "dead" tuple to replace a tuple being deleted.
782 *
783 * The state can be SPGIST_REDIRECT, SPGIST_DEAD, or SPGIST_PLACEHOLDER.
784 * For a REDIRECT tuple, a pointer (blkno+offset) must be supplied, and
785 * the xid field is filled in automatically.
786 *
787 * This is called in critical sections, so we don't use palloc; the tuple
788 * is built in preallocated storage. It should be copied before another
789 * call with different parameters can occur.
790 */
791 SpGistDeadTuple
spgFormDeadTuple(SpGistState * state,int tupstate,BlockNumber blkno,OffsetNumber offnum)792 spgFormDeadTuple(SpGistState *state, int tupstate,
793 BlockNumber blkno, OffsetNumber offnum)
794 {
795 SpGistDeadTuple tuple = (SpGistDeadTuple) state->deadTupleStorage;
796
797 tuple->tupstate = tupstate;
798 tuple->size = SGDTSIZE;
799 tuple->nextOffset = InvalidOffsetNumber;
800
801 if (tupstate == SPGIST_REDIRECT)
802 {
803 ItemPointerSet(&tuple->pointer, blkno, offnum);
804 Assert(TransactionIdIsValid(state->myXid));
805 tuple->xid = state->myXid;
806 }
807 else
808 {
809 ItemPointerSetInvalid(&tuple->pointer);
810 tuple->xid = InvalidTransactionId;
811 }
812
813 return tuple;
814 }
815
816 /*
817 * Extract the label datums of the nodes within innerTuple
818 *
819 * Returns NULL if label datums are NULLs
820 */
821 Datum *
spgExtractNodeLabels(SpGistState * state,SpGistInnerTuple innerTuple)822 spgExtractNodeLabels(SpGistState *state, SpGistInnerTuple innerTuple)
823 {
824 Datum *nodeLabels;
825 int i;
826 SpGistNodeTuple node;
827
828 /* Either all the labels must be NULL, or none. */
829 node = SGITNODEPTR(innerTuple);
830 if (IndexTupleHasNulls(node))
831 {
832 SGITITERATE(innerTuple, i, node)
833 {
834 if (!IndexTupleHasNulls(node))
835 elog(ERROR, "some but not all node labels are null in SPGiST inner tuple");
836 }
837 /* They're all null, so just return NULL */
838 return NULL;
839 }
840 else
841 {
842 nodeLabels = (Datum *) palloc(sizeof(Datum) * innerTuple->nNodes);
843 SGITITERATE(innerTuple, i, node)
844 {
845 if (IndexTupleHasNulls(node))
846 elog(ERROR, "some but not all node labels are null in SPGiST inner tuple");
847 nodeLabels[i] = SGNTDATUM(node, state);
848 }
849 return nodeLabels;
850 }
851 }
852
853 /*
854 * Add a new item to the page, replacing a PLACEHOLDER item if possible.
855 * Return the location it's inserted at, or InvalidOffsetNumber on failure.
856 *
857 * If startOffset isn't NULL, we start searching for placeholders at
858 * *startOffset, and update that to the next place to search. This is just
859 * an optimization for repeated insertions.
860 *
861 * If errorOK is false, we throw error when there's not enough room,
862 * rather than returning InvalidOffsetNumber.
863 */
864 OffsetNumber
SpGistPageAddNewItem(SpGistState * state,Page page,Item item,Size size,OffsetNumber * startOffset,bool errorOK)865 SpGistPageAddNewItem(SpGistState *state, Page page, Item item, Size size,
866 OffsetNumber *startOffset, bool errorOK)
867 {
868 SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
869 OffsetNumber i,
870 maxoff,
871 offnum;
872
873 if (opaque->nPlaceholder > 0 &&
874 PageGetExactFreeSpace(page) + SGDTSIZE >= MAXALIGN(size))
875 {
876 /* Try to replace a placeholder */
877 maxoff = PageGetMaxOffsetNumber(page);
878 offnum = InvalidOffsetNumber;
879
880 for (;;)
881 {
882 if (startOffset && *startOffset != InvalidOffsetNumber)
883 i = *startOffset;
884 else
885 i = FirstOffsetNumber;
886 for (; i <= maxoff; i++)
887 {
888 SpGistDeadTuple it = (SpGistDeadTuple) PageGetItem(page,
889 PageGetItemId(page, i));
890
891 if (it->tupstate == SPGIST_PLACEHOLDER)
892 {
893 offnum = i;
894 break;
895 }
896 }
897
898 /* Done if we found a placeholder */
899 if (offnum != InvalidOffsetNumber)
900 break;
901
902 if (startOffset && *startOffset != InvalidOffsetNumber)
903 {
904 /* Hint was no good, re-search from beginning */
905 *startOffset = InvalidOffsetNumber;
906 continue;
907 }
908
909 /* Hmm, no placeholder found? */
910 opaque->nPlaceholder = 0;
911 break;
912 }
913
914 if (offnum != InvalidOffsetNumber)
915 {
916 /* Replace the placeholder tuple */
917 PageIndexTupleDelete(page, offnum);
918
919 offnum = PageAddItem(page, item, size, offnum, false, false);
920
921 /*
922 * We should not have failed given the size check at the top of
923 * the function, but test anyway. If we did fail, we must PANIC
924 * because we've already deleted the placeholder tuple, and
925 * there's no other way to keep the damage from getting to disk.
926 */
927 if (offnum != InvalidOffsetNumber)
928 {
929 Assert(opaque->nPlaceholder > 0);
930 opaque->nPlaceholder--;
931 if (startOffset)
932 *startOffset = offnum + 1;
933 }
934 else
935 elog(PANIC, "failed to add item of size %u to SPGiST index page",
936 (int) size);
937
938 return offnum;
939 }
940 }
941
942 /* No luck in replacing a placeholder, so just add it to the page */
943 offnum = PageAddItem(page, item, size,
944 InvalidOffsetNumber, false, false);
945
946 if (offnum == InvalidOffsetNumber && !errorOK)
947 elog(ERROR, "failed to add item of size %u to SPGiST index page",
948 (int) size);
949
950 return offnum;
951 }
952