1 /*-------------------------------------------------------------------------
2 *
3 * spgutils.c
4 * various support functions for SP-GiST
5 *
6 *
7 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
8 * Portions Copyright (c) 1994, Regents of the University of California
9 *
10 * IDENTIFICATION
11 * src/backend/access/spgist/spgutils.c
12 *
13 *-------------------------------------------------------------------------
14 */
15
16 #include "postgres.h"
17
18 #include "access/reloptions.h"
19 #include "access/spgist_private.h"
20 #include "access/transam.h"
21 #include "access/xact.h"
22 #include "storage/bufmgr.h"
23 #include "storage/indexfsm.h"
24 #include "storage/lmgr.h"
25 #include "utils/builtins.h"
26 #include "utils/index_selfuncs.h"
27 #include "utils/lsyscache.h"
28
29
30 /*
31 * SP-GiST handler function: return IndexAmRoutine with access method parameters
32 * and callbacks.
33 */
34 Datum
spghandler(PG_FUNCTION_ARGS)35 spghandler(PG_FUNCTION_ARGS)
36 {
37 IndexAmRoutine *amroutine = makeNode(IndexAmRoutine);
38
39 amroutine->amstrategies = 0;
40 amroutine->amsupport = SPGISTNProc;
41 amroutine->amcanorder = false;
42 amroutine->amcanorderbyop = false;
43 amroutine->amcanbackward = false;
44 amroutine->amcanunique = false;
45 amroutine->amcanmulticol = false;
46 amroutine->amoptionalkey = true;
47 amroutine->amsearcharray = false;
48 amroutine->amsearchnulls = true;
49 amroutine->amstorage = false;
50 amroutine->amclusterable = false;
51 amroutine->ampredlocks = false;
52 amroutine->amcanparallel = false;
53 amroutine->amkeytype = InvalidOid;
54
55 amroutine->ambuild = spgbuild;
56 amroutine->ambuildempty = spgbuildempty;
57 amroutine->aminsert = spginsert;
58 amroutine->ambulkdelete = spgbulkdelete;
59 amroutine->amvacuumcleanup = spgvacuumcleanup;
60 amroutine->amcanreturn = spgcanreturn;
61 amroutine->amcostestimate = spgcostestimate;
62 amroutine->amoptions = spgoptions;
63 amroutine->amproperty = NULL;
64 amroutine->amvalidate = spgvalidate;
65 amroutine->ambeginscan = spgbeginscan;
66 amroutine->amrescan = spgrescan;
67 amroutine->amgettuple = spggettuple;
68 amroutine->amgetbitmap = spggetbitmap;
69 amroutine->amendscan = spgendscan;
70 amroutine->ammarkpos = NULL;
71 amroutine->amrestrpos = NULL;
72 amroutine->amestimateparallelscan = NULL;
73 amroutine->aminitparallelscan = NULL;
74 amroutine->amparallelrescan = NULL;
75
76 PG_RETURN_POINTER(amroutine);
77 }
78
79 /* Fill in a SpGistTypeDesc struct with info about the specified data type */
80 static void
fillTypeDesc(SpGistTypeDesc * desc,Oid type)81 fillTypeDesc(SpGistTypeDesc *desc, Oid type)
82 {
83 desc->type = type;
84 get_typlenbyval(type, &desc->attlen, &desc->attbyval);
85 }
86
87 /*
88 * Fetch local cache of AM-specific info about the index, initializing it
89 * if necessary
90 */
91 SpGistCache *
spgGetCache(Relation index)92 spgGetCache(Relation index)
93 {
94 SpGistCache *cache;
95
96 if (index->rd_amcache == NULL)
97 {
98 Oid atttype;
99 spgConfigIn in;
100 FmgrInfo *procinfo;
101 Buffer metabuffer;
102 SpGistMetaPageData *metadata;
103
104 cache = MemoryContextAllocZero(index->rd_indexcxt,
105 sizeof(SpGistCache));
106
107 /* SPGiST doesn't support multi-column indexes */
108 Assert(index->rd_att->natts == 1);
109
110 /*
111 * Get the actual data type of the indexed column from the index
112 * tupdesc. We pass this to the opclass config function so that
113 * polymorphic opclasses are possible.
114 */
115 atttype = index->rd_att->attrs[0]->atttypid;
116
117 /* Call the config function to get config info for the opclass */
118 in.attType = atttype;
119
120 procinfo = index_getprocinfo(index, 1, SPGIST_CONFIG_PROC);
121 FunctionCall2Coll(procinfo,
122 index->rd_indcollation[0],
123 PointerGetDatum(&in),
124 PointerGetDatum(&cache->config));
125
126 /* Get the information we need about each relevant datatype */
127 fillTypeDesc(&cache->attType, atttype);
128 fillTypeDesc(&cache->attPrefixType, cache->config.prefixType);
129 fillTypeDesc(&cache->attLabelType, cache->config.labelType);
130
131 /* Last, get the lastUsedPages data from the metapage */
132 metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO);
133 LockBuffer(metabuffer, BUFFER_LOCK_SHARE);
134
135 metadata = SpGistPageGetMeta(BufferGetPage(metabuffer));
136
137 if (metadata->magicNumber != SPGIST_MAGIC_NUMBER)
138 elog(ERROR, "index \"%s\" is not an SP-GiST index",
139 RelationGetRelationName(index));
140
141 cache->lastUsedPages = metadata->lastUsedPages;
142
143 UnlockReleaseBuffer(metabuffer);
144
145 index->rd_amcache = (void *) cache;
146 }
147 else
148 {
149 /* assume it's up to date */
150 cache = (SpGistCache *) index->rd_amcache;
151 }
152
153 return cache;
154 }
155
156 /* Initialize SpGistState for working with the given index */
157 void
initSpGistState(SpGistState * state,Relation index)158 initSpGistState(SpGistState *state, Relation index)
159 {
160 SpGistCache *cache;
161
162 /* Get cached static information about index */
163 cache = spgGetCache(index);
164
165 state->config = cache->config;
166 state->attType = cache->attType;
167 state->attPrefixType = cache->attPrefixType;
168 state->attLabelType = cache->attLabelType;
169
170 /* Make workspace for constructing dead tuples */
171 state->deadTupleStorage = palloc0(SGDTSIZE);
172
173 /* Set XID to use in redirection tuples */
174 state->myXid = GetTopTransactionIdIfAny();
175
176 /* Assume we're not in an index build (spgbuild will override) */
177 state->isBuild = false;
178 }
179
180 /*
181 * Allocate a new page (either by recycling, or by extending the index file).
182 *
183 * The returned buffer is already pinned and exclusive-locked.
184 * Caller is responsible for initializing the page by calling SpGistInitBuffer.
185 */
186 Buffer
SpGistNewBuffer(Relation index)187 SpGistNewBuffer(Relation index)
188 {
189 Buffer buffer;
190 bool needLock;
191
192 /* First, try to get a page from FSM */
193 for (;;)
194 {
195 BlockNumber blkno = GetFreeIndexPage(index);
196
197 if (blkno == InvalidBlockNumber)
198 break; /* nothing known to FSM */
199
200 /*
201 * The fixed pages shouldn't ever be listed in FSM, but just in case
202 * one is, ignore it.
203 */
204 if (SpGistBlockIsFixed(blkno))
205 continue;
206
207 buffer = ReadBuffer(index, blkno);
208
209 /*
210 * We have to guard against the possibility that someone else already
211 * recycled this page; the buffer may be locked if so.
212 */
213 if (ConditionalLockBuffer(buffer))
214 {
215 Page page = BufferGetPage(buffer);
216
217 if (PageIsNew(page))
218 return buffer; /* OK to use, if never initialized */
219
220 if (SpGistPageIsDeleted(page) || PageIsEmpty(page))
221 return buffer; /* OK to use */
222
223 LockBuffer(buffer, BUFFER_LOCK_UNLOCK);
224 }
225
226 /* Can't use it, so release buffer and try again */
227 ReleaseBuffer(buffer);
228 }
229
230 /* Must extend the file */
231 needLock = !RELATION_IS_LOCAL(index);
232 if (needLock)
233 LockRelationForExtension(index, ExclusiveLock);
234
235 buffer = ReadBuffer(index, P_NEW);
236 LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE);
237
238 if (needLock)
239 UnlockRelationForExtension(index, ExclusiveLock);
240
241 return buffer;
242 }
243
244 /*
245 * Update index metapage's lastUsedPages info from local cache, if possible
246 *
247 * Updating meta page isn't critical for index working, so
248 * 1 use ConditionalLockBuffer to improve concurrency
249 * 2 don't WAL-log metabuffer changes to decrease WAL traffic
250 */
251 void
SpGistUpdateMetaPage(Relation index)252 SpGistUpdateMetaPage(Relation index)
253 {
254 SpGistCache *cache = (SpGistCache *) index->rd_amcache;
255
256 if (cache != NULL)
257 {
258 Buffer metabuffer;
259 SpGistMetaPageData *metadata;
260
261 metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO);
262
263 if (ConditionalLockBuffer(metabuffer))
264 {
265 metadata = SpGistPageGetMeta(BufferGetPage(metabuffer));
266 metadata->lastUsedPages = cache->lastUsedPages;
267
268 MarkBufferDirty(metabuffer);
269 UnlockReleaseBuffer(metabuffer);
270 }
271 else
272 {
273 ReleaseBuffer(metabuffer);
274 }
275 }
276 }
277
278 /* Macro to select proper element of lastUsedPages cache depending on flags */
279 /* Masking flags with SPGIST_CACHED_PAGES is just for paranoia's sake */
280 #define GET_LUP(c, f) (&(c)->lastUsedPages.cachedPage[((unsigned int) (f)) % SPGIST_CACHED_PAGES])
281
282 /*
283 * Allocate and initialize a new buffer of the type and parity specified by
284 * flags. The returned buffer is already pinned and exclusive-locked.
285 *
286 * When requesting an inner page, if we get one with the wrong parity,
287 * we just release the buffer and try again. We will get a different page
288 * because GetFreeIndexPage will have marked the page used in FSM. The page
289 * is entered in our local lastUsedPages cache, so there's some hope of
290 * making use of it later in this session, but otherwise we rely on VACUUM
291 * to eventually re-enter the page in FSM, making it available for recycling.
292 * Note that such a page does not get marked dirty here, so unless it's used
293 * fairly soon, the buffer will just get discarded and the page will remain
294 * as it was on disk.
295 *
296 * When we return a buffer to the caller, the page is *not* entered into
297 * the lastUsedPages cache; we expect the caller will do so after it's taken
298 * whatever space it will use. This is because after the caller has used up
299 * some space, the page might have less space than whatever was cached already
300 * so we'd rather not trash the old cache entry.
301 */
302 static Buffer
allocNewBuffer(Relation index,int flags)303 allocNewBuffer(Relation index, int flags)
304 {
305 SpGistCache *cache = spgGetCache(index);
306 uint16 pageflags = 0;
307
308 if (GBUF_REQ_LEAF(flags))
309 pageflags |= SPGIST_LEAF;
310 if (GBUF_REQ_NULLS(flags))
311 pageflags |= SPGIST_NULLS;
312
313 for (;;)
314 {
315 Buffer buffer;
316
317 buffer = SpGistNewBuffer(index);
318 SpGistInitBuffer(buffer, pageflags);
319
320 if (pageflags & SPGIST_LEAF)
321 {
322 /* Leaf pages have no parity concerns, so just use it */
323 return buffer;
324 }
325 else
326 {
327 BlockNumber blkno = BufferGetBlockNumber(buffer);
328 int blkFlags = GBUF_INNER_PARITY(blkno);
329
330 if ((flags & GBUF_PARITY_MASK) == blkFlags)
331 {
332 /* Page has right parity, use it */
333 return buffer;
334 }
335 else
336 {
337 /* Page has wrong parity, record it in cache and try again */
338 if (pageflags & SPGIST_NULLS)
339 blkFlags |= GBUF_NULLS;
340 cache->lastUsedPages.cachedPage[blkFlags].blkno = blkno;
341 cache->lastUsedPages.cachedPage[blkFlags].freeSpace =
342 PageGetExactFreeSpace(BufferGetPage(buffer));
343 UnlockReleaseBuffer(buffer);
344 }
345 }
346 }
347 }
348
349 /*
350 * Get a buffer of the type and parity specified by flags, having at least
351 * as much free space as indicated by needSpace. We use the lastUsedPages
352 * cache to assign the same buffer previously requested when possible.
353 * The returned buffer is already pinned and exclusive-locked.
354 *
355 * *isNew is set true if the page was initialized here, false if it was
356 * already valid.
357 */
358 Buffer
SpGistGetBuffer(Relation index,int flags,int needSpace,bool * isNew)359 SpGistGetBuffer(Relation index, int flags, int needSpace, bool *isNew)
360 {
361 SpGistCache *cache = spgGetCache(index);
362 SpGistLastUsedPage *lup;
363
364 /* Bail out if even an empty page wouldn't meet the demand */
365 if (needSpace > SPGIST_PAGE_CAPACITY)
366 elog(ERROR, "desired SPGiST tuple size is too big");
367
368 /*
369 * If possible, increase the space request to include relation's
370 * fillfactor. This ensures that when we add unrelated tuples to a page,
371 * we try to keep 100-fillfactor% available for adding tuples that are
372 * related to the ones already on it. But fillfactor mustn't cause an
373 * error for requests that would otherwise be legal.
374 */
375 needSpace += RelationGetTargetPageFreeSpace(index,
376 SPGIST_DEFAULT_FILLFACTOR);
377 needSpace = Min(needSpace, SPGIST_PAGE_CAPACITY);
378
379 /* Get the cache entry for this flags setting */
380 lup = GET_LUP(cache, flags);
381
382 /* If we have nothing cached, just turn it over to allocNewBuffer */
383 if (lup->blkno == InvalidBlockNumber)
384 {
385 *isNew = true;
386 return allocNewBuffer(index, flags);
387 }
388
389 /* fixed pages should never be in cache */
390 Assert(!SpGistBlockIsFixed(lup->blkno));
391
392 /* If cached freeSpace isn't enough, don't bother looking at the page */
393 if (lup->freeSpace >= needSpace)
394 {
395 Buffer buffer;
396 Page page;
397
398 buffer = ReadBuffer(index, lup->blkno);
399
400 if (!ConditionalLockBuffer(buffer))
401 {
402 /*
403 * buffer is locked by another process, so return a new buffer
404 */
405 ReleaseBuffer(buffer);
406 *isNew = true;
407 return allocNewBuffer(index, flags);
408 }
409
410 page = BufferGetPage(buffer);
411
412 if (PageIsNew(page) || SpGistPageIsDeleted(page) || PageIsEmpty(page))
413 {
414 /* OK to initialize the page */
415 uint16 pageflags = 0;
416
417 if (GBUF_REQ_LEAF(flags))
418 pageflags |= SPGIST_LEAF;
419 if (GBUF_REQ_NULLS(flags))
420 pageflags |= SPGIST_NULLS;
421 SpGistInitBuffer(buffer, pageflags);
422 lup->freeSpace = PageGetExactFreeSpace(page) - needSpace;
423 *isNew = true;
424 return buffer;
425 }
426
427 /*
428 * Check that page is of right type and has enough space. We must
429 * recheck this since our cache isn't necessarily up to date.
430 */
431 if ((GBUF_REQ_LEAF(flags) ? SpGistPageIsLeaf(page) : !SpGistPageIsLeaf(page)) &&
432 (GBUF_REQ_NULLS(flags) ? SpGistPageStoresNulls(page) : !SpGistPageStoresNulls(page)))
433 {
434 int freeSpace = PageGetExactFreeSpace(page);
435
436 if (freeSpace >= needSpace)
437 {
438 /* Success, update freespace info and return the buffer */
439 lup->freeSpace = freeSpace - needSpace;
440 *isNew = false;
441 return buffer;
442 }
443 }
444
445 /*
446 * fallback to allocation of new buffer
447 */
448 UnlockReleaseBuffer(buffer);
449 }
450
451 /* No success with cache, so return a new buffer */
452 *isNew = true;
453 return allocNewBuffer(index, flags);
454 }
455
456 /*
457 * Update lastUsedPages cache when done modifying a page.
458 *
459 * We update the appropriate cache entry if it already contained this page
460 * (its freeSpace is likely obsolete), or if this page has more space than
461 * whatever we had cached.
462 */
463 void
SpGistSetLastUsedPage(Relation index,Buffer buffer)464 SpGistSetLastUsedPage(Relation index, Buffer buffer)
465 {
466 SpGistCache *cache = spgGetCache(index);
467 SpGistLastUsedPage *lup;
468 int freeSpace;
469 Page page = BufferGetPage(buffer);
470 BlockNumber blkno = BufferGetBlockNumber(buffer);
471 int flags;
472
473 /* Never enter fixed pages (root pages) in cache, though */
474 if (SpGistBlockIsFixed(blkno))
475 return;
476
477 if (SpGistPageIsLeaf(page))
478 flags = GBUF_LEAF;
479 else
480 flags = GBUF_INNER_PARITY(blkno);
481 if (SpGistPageStoresNulls(page))
482 flags |= GBUF_NULLS;
483
484 lup = GET_LUP(cache, flags);
485
486 freeSpace = PageGetExactFreeSpace(page);
487 if (lup->blkno == InvalidBlockNumber || lup->blkno == blkno ||
488 lup->freeSpace < freeSpace)
489 {
490 lup->blkno = blkno;
491 lup->freeSpace = freeSpace;
492 }
493 }
494
495 /*
496 * Initialize an SPGiST page to empty, with specified flags
497 */
498 void
SpGistInitPage(Page page,uint16 f)499 SpGistInitPage(Page page, uint16 f)
500 {
501 SpGistPageOpaque opaque;
502
503 PageInit(page, BLCKSZ, MAXALIGN(sizeof(SpGistPageOpaqueData)));
504 opaque = SpGistPageGetOpaque(page);
505 memset(opaque, 0, sizeof(SpGistPageOpaqueData));
506 opaque->flags = f;
507 opaque->spgist_page_id = SPGIST_PAGE_ID;
508 }
509
510 /*
511 * Initialize a buffer's page to empty, with specified flags
512 */
513 void
SpGistInitBuffer(Buffer b,uint16 f)514 SpGistInitBuffer(Buffer b, uint16 f)
515 {
516 Assert(BufferGetPageSize(b) == BLCKSZ);
517 SpGistInitPage(BufferGetPage(b), f);
518 }
519
520 /*
521 * Initialize metadata page
522 */
523 void
SpGistInitMetapage(Page page)524 SpGistInitMetapage(Page page)
525 {
526 SpGistMetaPageData *metadata;
527 int i;
528
529 SpGistInitPage(page, SPGIST_META);
530 metadata = SpGistPageGetMeta(page);
531 memset(metadata, 0, sizeof(SpGistMetaPageData));
532 metadata->magicNumber = SPGIST_MAGIC_NUMBER;
533
534 /* initialize last-used-page cache to empty */
535 for (i = 0; i < SPGIST_CACHED_PAGES; i++)
536 metadata->lastUsedPages.cachedPage[i].blkno = InvalidBlockNumber;
537 }
538
539 /*
540 * reloptions processing for SPGiST
541 */
542 bytea *
spgoptions(Datum reloptions,bool validate)543 spgoptions(Datum reloptions, bool validate)
544 {
545 return default_reloptions(reloptions, validate, RELOPT_KIND_SPGIST);
546 }
547
548 /*
549 * Get the space needed to store a non-null datum of the indicated type.
550 * Note the result is already rounded up to a MAXALIGN boundary.
551 * Also, we follow the SPGiST convention that pass-by-val types are
552 * just stored in their Datum representation (compare memcpyDatum).
553 */
554 unsigned int
SpGistGetTypeSize(SpGistTypeDesc * att,Datum datum)555 SpGistGetTypeSize(SpGistTypeDesc *att, Datum datum)
556 {
557 unsigned int size;
558
559 if (att->attbyval)
560 size = sizeof(Datum);
561 else if (att->attlen > 0)
562 size = att->attlen;
563 else
564 size = VARSIZE_ANY(datum);
565
566 return MAXALIGN(size);
567 }
568
569 /*
570 * Copy the given non-null datum to *target
571 */
572 static void
memcpyDatum(void * target,SpGistTypeDesc * att,Datum datum)573 memcpyDatum(void *target, SpGistTypeDesc *att, Datum datum)
574 {
575 unsigned int size;
576
577 if (att->attbyval)
578 {
579 memcpy(target, &datum, sizeof(Datum));
580 }
581 else
582 {
583 size = (att->attlen > 0) ? att->attlen : VARSIZE_ANY(datum);
584 memcpy(target, DatumGetPointer(datum), size);
585 }
586 }
587
588 /*
589 * Construct a leaf tuple containing the given heap TID and datum value
590 */
591 SpGistLeafTuple
spgFormLeafTuple(SpGistState * state,ItemPointer heapPtr,Datum datum,bool isnull)592 spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr,
593 Datum datum, bool isnull)
594 {
595 SpGistLeafTuple tup;
596 unsigned int size;
597
598 /* compute space needed (note result is already maxaligned) */
599 size = SGLTHDRSZ;
600 if (!isnull)
601 size += SpGistGetTypeSize(&state->attType, datum);
602
603 /*
604 * Ensure that we can replace the tuple with a dead tuple later. This
605 * test is unnecessary when !isnull, but let's be safe.
606 */
607 if (size < SGDTSIZE)
608 size = SGDTSIZE;
609
610 /* OK, form the tuple */
611 tup = (SpGistLeafTuple) palloc0(size);
612
613 tup->size = size;
614 tup->nextOffset = InvalidOffsetNumber;
615 tup->heapPtr = *heapPtr;
616 if (!isnull)
617 memcpyDatum(SGLTDATAPTR(tup), &state->attType, datum);
618
619 return tup;
620 }
621
622 /*
623 * Construct a node (to go into an inner tuple) containing the given label
624 *
625 * Note that the node's downlink is just set invalid here. Caller will fill
626 * it in later.
627 */
628 SpGistNodeTuple
spgFormNodeTuple(SpGistState * state,Datum label,bool isnull)629 spgFormNodeTuple(SpGistState *state, Datum label, bool isnull)
630 {
631 SpGistNodeTuple tup;
632 unsigned int size;
633 unsigned short infomask = 0;
634
635 /* compute space needed (note result is already maxaligned) */
636 size = SGNTHDRSZ;
637 if (!isnull)
638 size += SpGistGetTypeSize(&state->attLabelType, label);
639
640 /*
641 * Here we make sure that the size will fit in the field reserved for it
642 * in t_info.
643 */
644 if ((size & INDEX_SIZE_MASK) != size)
645 ereport(ERROR,
646 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
647 errmsg("index row requires %zu bytes, maximum size is %zu",
648 (Size) size, (Size) INDEX_SIZE_MASK)));
649
650 tup = (SpGistNodeTuple) palloc0(size);
651
652 if (isnull)
653 infomask |= INDEX_NULL_MASK;
654 /* we don't bother setting the INDEX_VAR_MASK bit */
655 infomask |= size;
656 tup->t_info = infomask;
657
658 /* The TID field will be filled in later */
659 ItemPointerSetInvalid(&tup->t_tid);
660
661 if (!isnull)
662 memcpyDatum(SGNTDATAPTR(tup), &state->attLabelType, label);
663
664 return tup;
665 }
666
667 /*
668 * Construct an inner tuple containing the given prefix and node array
669 */
670 SpGistInnerTuple
spgFormInnerTuple(SpGistState * state,bool hasPrefix,Datum prefix,int nNodes,SpGistNodeTuple * nodes)671 spgFormInnerTuple(SpGistState *state, bool hasPrefix, Datum prefix,
672 int nNodes, SpGistNodeTuple *nodes)
673 {
674 SpGistInnerTuple tup;
675 unsigned int size;
676 unsigned int prefixSize;
677 int i;
678 char *ptr;
679
680 /* Compute size needed */
681 if (hasPrefix)
682 prefixSize = SpGistGetTypeSize(&state->attPrefixType, prefix);
683 else
684 prefixSize = 0;
685
686 size = SGITHDRSZ + prefixSize;
687
688 /* Note: we rely on node tuple sizes to be maxaligned already */
689 for (i = 0; i < nNodes; i++)
690 size += IndexTupleSize(nodes[i]);
691
692 /*
693 * Ensure that we can replace the tuple with a dead tuple later. This
694 * test is unnecessary given current tuple layouts, but let's be safe.
695 */
696 if (size < SGDTSIZE)
697 size = SGDTSIZE;
698
699 /*
700 * Inner tuple should be small enough to fit on a page
701 */
702 if (size > SPGIST_PAGE_CAPACITY - sizeof(ItemIdData))
703 ereport(ERROR,
704 (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED),
705 errmsg("SP-GiST inner tuple size %zu exceeds maximum %zu",
706 (Size) size,
707 SPGIST_PAGE_CAPACITY - sizeof(ItemIdData)),
708 errhint("Values larger than a buffer page cannot be indexed.")));
709
710 /*
711 * Check for overflow of header fields --- probably can't fail if the
712 * above succeeded, but let's be paranoid
713 */
714 if (size > SGITMAXSIZE ||
715 prefixSize > SGITMAXPREFIXSIZE ||
716 nNodes > SGITMAXNNODES)
717 elog(ERROR, "SPGiST inner tuple header field is too small");
718
719 /* OK, form the tuple */
720 tup = (SpGistInnerTuple) palloc0(size);
721
722 tup->nNodes = nNodes;
723 tup->prefixSize = prefixSize;
724 tup->size = size;
725
726 if (hasPrefix)
727 memcpyDatum(SGITDATAPTR(tup), &state->attPrefixType, prefix);
728
729 ptr = (char *) SGITNODEPTR(tup);
730
731 for (i = 0; i < nNodes; i++)
732 {
733 SpGistNodeTuple node = nodes[i];
734
735 memcpy(ptr, node, IndexTupleSize(node));
736 ptr += IndexTupleSize(node);
737 }
738
739 return tup;
740 }
741
742 /*
743 * Construct a "dead" tuple to replace a tuple being deleted.
744 *
745 * The state can be SPGIST_REDIRECT, SPGIST_DEAD, or SPGIST_PLACEHOLDER.
746 * For a REDIRECT tuple, a pointer (blkno+offset) must be supplied, and
747 * the xid field is filled in automatically.
748 *
749 * This is called in critical sections, so we don't use palloc; the tuple
750 * is built in preallocated storage. It should be copied before another
751 * call with different parameters can occur.
752 */
753 SpGistDeadTuple
spgFormDeadTuple(SpGistState * state,int tupstate,BlockNumber blkno,OffsetNumber offnum)754 spgFormDeadTuple(SpGistState *state, int tupstate,
755 BlockNumber blkno, OffsetNumber offnum)
756 {
757 SpGistDeadTuple tuple = (SpGistDeadTuple) state->deadTupleStorage;
758
759 tuple->tupstate = tupstate;
760 tuple->size = SGDTSIZE;
761 tuple->nextOffset = InvalidOffsetNumber;
762
763 if (tupstate == SPGIST_REDIRECT)
764 {
765 ItemPointerSet(&tuple->pointer, blkno, offnum);
766 Assert(TransactionIdIsValid(state->myXid));
767 tuple->xid = state->myXid;
768 }
769 else
770 {
771 ItemPointerSetInvalid(&tuple->pointer);
772 tuple->xid = InvalidTransactionId;
773 }
774
775 return tuple;
776 }
777
778 /*
779 * Extract the label datums of the nodes within innerTuple
780 *
781 * Returns NULL if label datums are NULLs
782 */
783 Datum *
spgExtractNodeLabels(SpGistState * state,SpGistInnerTuple innerTuple)784 spgExtractNodeLabels(SpGistState *state, SpGistInnerTuple innerTuple)
785 {
786 Datum *nodeLabels;
787 int i;
788 SpGistNodeTuple node;
789
790 /* Either all the labels must be NULL, or none. */
791 node = SGITNODEPTR(innerTuple);
792 if (IndexTupleHasNulls(node))
793 {
794 SGITITERATE(innerTuple, i, node)
795 {
796 if (!IndexTupleHasNulls(node))
797 elog(ERROR, "some but not all node labels are null in SPGiST inner tuple");
798 }
799 /* They're all null, so just return NULL */
800 return NULL;
801 }
802 else
803 {
804 nodeLabels = (Datum *) palloc(sizeof(Datum) * innerTuple->nNodes);
805 SGITITERATE(innerTuple, i, node)
806 {
807 if (IndexTupleHasNulls(node))
808 elog(ERROR, "some but not all node labels are null in SPGiST inner tuple");
809 nodeLabels[i] = SGNTDATUM(node, state);
810 }
811 return nodeLabels;
812 }
813 }
814
815 /*
816 * Add a new item to the page, replacing a PLACEHOLDER item if possible.
817 * Return the location it's inserted at, or InvalidOffsetNumber on failure.
818 *
819 * If startOffset isn't NULL, we start searching for placeholders at
820 * *startOffset, and update that to the next place to search. This is just
821 * an optimization for repeated insertions.
822 *
823 * If errorOK is false, we throw error when there's not enough room,
824 * rather than returning InvalidOffsetNumber.
825 */
826 OffsetNumber
SpGistPageAddNewItem(SpGistState * state,Page page,Item item,Size size,OffsetNumber * startOffset,bool errorOK)827 SpGistPageAddNewItem(SpGistState *state, Page page, Item item, Size size,
828 OffsetNumber *startOffset, bool errorOK)
829 {
830 SpGistPageOpaque opaque = SpGistPageGetOpaque(page);
831 OffsetNumber i,
832 maxoff,
833 offnum;
834
835 if (opaque->nPlaceholder > 0 &&
836 PageGetExactFreeSpace(page) + SGDTSIZE >= MAXALIGN(size))
837 {
838 /* Try to replace a placeholder */
839 maxoff = PageGetMaxOffsetNumber(page);
840 offnum = InvalidOffsetNumber;
841
842 for (;;)
843 {
844 if (startOffset && *startOffset != InvalidOffsetNumber)
845 i = *startOffset;
846 else
847 i = FirstOffsetNumber;
848 for (; i <= maxoff; i++)
849 {
850 SpGistDeadTuple it = (SpGistDeadTuple) PageGetItem(page,
851 PageGetItemId(page, i));
852
853 if (it->tupstate == SPGIST_PLACEHOLDER)
854 {
855 offnum = i;
856 break;
857 }
858 }
859
860 /* Done if we found a placeholder */
861 if (offnum != InvalidOffsetNumber)
862 break;
863
864 if (startOffset && *startOffset != InvalidOffsetNumber)
865 {
866 /* Hint was no good, re-search from beginning */
867 *startOffset = InvalidOffsetNumber;
868 continue;
869 }
870
871 /* Hmm, no placeholder found? */
872 opaque->nPlaceholder = 0;
873 break;
874 }
875
876 if (offnum != InvalidOffsetNumber)
877 {
878 /* Replace the placeholder tuple */
879 PageIndexTupleDelete(page, offnum);
880
881 offnum = PageAddItem(page, item, size, offnum, false, false);
882
883 /*
884 * We should not have failed given the size check at the top of
885 * the function, but test anyway. If we did fail, we must PANIC
886 * because we've already deleted the placeholder tuple, and
887 * there's no other way to keep the damage from getting to disk.
888 */
889 if (offnum != InvalidOffsetNumber)
890 {
891 Assert(opaque->nPlaceholder > 0);
892 opaque->nPlaceholder--;
893 if (startOffset)
894 *startOffset = offnum + 1;
895 }
896 else
897 elog(PANIC, "failed to add item of size %u to SPGiST index page",
898 (int) size);
899
900 return offnum;
901 }
902 }
903
904 /* No luck in replacing a placeholder, so just add it to the page */
905 offnum = PageAddItem(page, item, size,
906 InvalidOffsetNumber, false, false);
907
908 if (offnum == InvalidOffsetNumber && !errorOK)
909 elog(ERROR, "failed to add item of size %u to SPGiST index page",
910 (int) size);
911
912 return offnum;
913 }
914