1 /*--------------------------------------------------------------------------
2  * gin_private.h
3  *	  header file for postgres inverted index access method implementation.
4  *
5  *	Copyright (c) 2006-2016, PostgreSQL Global Development Group
6  *
7  *	src/include/access/gin_private.h
8  *--------------------------------------------------------------------------
9  */
10 #ifndef GIN_PRIVATE_H
11 #define GIN_PRIVATE_H
12 
13 #include "access/amapi.h"
14 #include "access/gin.h"
15 #include "access/itup.h"
16 #include "access/transam.h"
17 #include "fmgr.h"
18 #include "storage/bufmgr.h"
19 #include "lib/rbtree.h"
20 
21 
22 /*
23  * Page opaque data in an inverted index page.
24  *
25  * Note: GIN does not include a page ID word as do the other index types.
26  * This is OK because the opaque data is only 8 bytes and so can be reliably
27  * distinguished by size.  Revisit this if the size ever increases.
28  * Further note: as of 9.2, SP-GiST also uses 8-byte special space, as does
29  * BRIN as of 9.5.  This is still OK, as long as GIN isn't using all of the
30  * high-order bits in its flags word, because that way the flags word cannot
31  * match the page IDs used by SP-GiST and BRIN.
32  */
33 typedef struct GinPageOpaqueData
34 {
35 	BlockNumber rightlink;		/* next page if any */
36 	OffsetNumber maxoff;		/* number of PostingItems on GIN_DATA &
37 								 * ~GIN_LEAF page. On GIN_LIST page, number of
38 								 * heap tuples. */
39 	uint16		flags;			/* see bit definitions below */
40 } GinPageOpaqueData;
41 
42 typedef GinPageOpaqueData *GinPageOpaque;
43 
44 #define GIN_DATA		  (1 << 0)
45 #define GIN_LEAF		  (1 << 1)
46 #define GIN_DELETED		  (1 << 2)
47 #define GIN_META		  (1 << 3)
48 #define GIN_LIST		  (1 << 4)
49 #define GIN_LIST_FULLROW  (1 << 5)		/* makes sense only on GIN_LIST page */
50 #define GIN_INCOMPLETE_SPLIT (1 << 6)	/* page was split, but parent not
51 										 * updated */
52 #define GIN_COMPRESSED	  (1 << 7)
53 
54 /* Page numbers of fixed-location pages */
55 #define GIN_METAPAGE_BLKNO	(0)
56 #define GIN_ROOT_BLKNO		(1)
57 
58 typedef struct GinMetaPageData
59 {
60 	/*
61 	 * Pointers to head and tail of pending list, which consists of GIN_LIST
62 	 * pages.  These store fast-inserted entries that haven't yet been moved
63 	 * into the regular GIN structure.
64 	 */
65 	BlockNumber head;
66 	BlockNumber tail;
67 
68 	/*
69 	 * Free space in bytes in the pending list's tail page.
70 	 */
71 	uint32		tailFreeSize;
72 
73 	/*
74 	 * We store both number of pages and number of heap tuples that are in the
75 	 * pending list.
76 	 */
77 	BlockNumber nPendingPages;
78 	int64		nPendingHeapTuples;
79 
80 	/*
81 	 * Statistics for planner use (accurate as of last VACUUM)
82 	 */
83 	BlockNumber nTotalPages;
84 	BlockNumber nEntryPages;
85 	BlockNumber nDataPages;
86 	int64		nEntries;
87 
88 	/*
89 	 * GIN version number (ideally this should have been at the front, but too
90 	 * late now.  Don't move it!)
91 	 *
92 	 * Currently 2 (for indexes initialized in 9.4 or later)
93 	 *
94 	 * Version 1 (indexes initialized in version 9.1, 9.2 or 9.3), is
95 	 * compatible, but may contain uncompressed posting tree (leaf) pages and
96 	 * posting lists. They will be converted to compressed format when
97 	 * modified.
98 	 *
99 	 * Version 0 (indexes initialized in 9.0 or before) is compatible but may
100 	 * be missing null entries, including both null keys and placeholders.
101 	 * Reject full-index-scan attempts on such indexes.
102 	 */
103 	int32		ginVersion;
104 } GinMetaPageData;
105 
106 #define GIN_CURRENT_VERSION		2
107 
108 #define GinPageGetMeta(p) \
109 	((GinMetaPageData *) PageGetContents(p))
110 
111 /*
112  * Macros for accessing a GIN index page's opaque data
113  */
114 #define GinPageGetOpaque(page) ( (GinPageOpaque) PageGetSpecialPointer(page) )
115 
116 #define GinPageIsLeaf(page)    ( (GinPageGetOpaque(page)->flags & GIN_LEAF) != 0 )
117 #define GinPageSetLeaf(page)   ( GinPageGetOpaque(page)->flags |= GIN_LEAF )
118 #define GinPageSetNonLeaf(page)    ( GinPageGetOpaque(page)->flags &= ~GIN_LEAF )
119 #define GinPageIsData(page)    ( (GinPageGetOpaque(page)->flags & GIN_DATA) != 0 )
120 #define GinPageSetData(page)   ( GinPageGetOpaque(page)->flags |= GIN_DATA )
121 #define GinPageIsList(page)    ( (GinPageGetOpaque(page)->flags & GIN_LIST) != 0 )
122 #define GinPageSetList(page)   ( GinPageGetOpaque(page)->flags |= GIN_LIST )
123 #define GinPageHasFullRow(page)    ( (GinPageGetOpaque(page)->flags & GIN_LIST_FULLROW) != 0 )
124 #define GinPageSetFullRow(page)   ( GinPageGetOpaque(page)->flags |= GIN_LIST_FULLROW )
125 #define GinPageIsCompressed(page)	 ( (GinPageGetOpaque(page)->flags & GIN_COMPRESSED) != 0 )
126 #define GinPageSetCompressed(page)	 ( GinPageGetOpaque(page)->flags |= GIN_COMPRESSED )
127 
128 #define GinPageIsDeleted(page) ( (GinPageGetOpaque(page)->flags & GIN_DELETED) != 0 )
129 #define GinPageSetDeleted(page)    ( GinPageGetOpaque(page)->flags |= GIN_DELETED)
130 #define GinPageSetNonDeleted(page) ( GinPageGetOpaque(page)->flags &= ~GIN_DELETED)
131 #define GinPageIsIncompleteSplit(page) ( (GinPageGetOpaque(page)->flags & GIN_INCOMPLETE_SPLIT) != 0 )
132 
133 #define GinPageRightMost(page) ( GinPageGetOpaque(page)->rightlink == InvalidBlockNumber)
134 
135 /*
136  * We should reclaim deleted page only once every transaction started before
137  * its deletion is over.
138  */
139 #define GinPageGetDeleteXid(page) ( ((PageHeader) (page))->pd_prune_xid )
140 #define GinPageSetDeleteXid(page, xid) ( ((PageHeader) (page))->pd_prune_xid = xid)
141 #define GinPageIsRecyclable(page) ( PageIsNew(page) || (GinPageIsDeleted(page) \
142 	&& TransactionIdPrecedes(GinPageGetDeleteXid(page), RecentGlobalXmin)))
143 
144 /*
145  * We use our own ItemPointerGet(BlockNumber|OffsetNumber)
146  * to avoid Asserts, since sometimes the ip_posid isn't "valid"
147  */
148 #define GinItemPointerGetBlockNumber(pointer) \
149 	BlockIdGetBlockNumber(&(pointer)->ip_blkid)
150 
151 #define GinItemPointerGetOffsetNumber(pointer) \
152 	((pointer)->ip_posid)
153 
154 /*
155  * Special-case item pointer values needed by the GIN search logic.
156  *	MIN: sorts less than any valid item pointer
157  *	MAX: sorts greater than any valid item pointer
158  *	LOSSY PAGE: indicates a whole heap page, sorts after normal item
159  *				pointers for that page
160  * Note that these are all distinguishable from an "invalid" item pointer
161  * (which is InvalidBlockNumber/0) as well as from all normal item
162  * pointers (which have item numbers in the range 1..MaxHeapTuplesPerPage).
163  */
164 #define ItemPointerSetMin(p)  \
165 	ItemPointerSet((p), (BlockNumber)0, (OffsetNumber)0)
166 #define ItemPointerIsMin(p)  \
167 	(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0 && \
168 	 GinItemPointerGetBlockNumber(p) == (BlockNumber)0)
169 #define ItemPointerSetMax(p)  \
170 	ItemPointerSet((p), InvalidBlockNumber, (OffsetNumber)0xffff)
171 #define ItemPointerIsMax(p)  \
172 	(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
173 	 GinItemPointerGetBlockNumber(p) == InvalidBlockNumber)
174 #define ItemPointerSetLossyPage(p, b)  \
175 	ItemPointerSet((p), (b), (OffsetNumber)0xffff)
176 #define ItemPointerIsLossyPage(p)  \
177 	(GinItemPointerGetOffsetNumber(p) == (OffsetNumber)0xffff && \
178 	 GinItemPointerGetBlockNumber(p) != InvalidBlockNumber)
179 
180 /*
181  * Posting item in a non-leaf posting-tree page
182  */
183 typedef struct
184 {
185 	/* We use BlockIdData not BlockNumber to avoid padding space wastage */
186 	BlockIdData child_blkno;
187 	ItemPointerData key;
188 } PostingItem;
189 
190 #define PostingItemGetBlockNumber(pointer) \
191 	BlockIdGetBlockNumber(&(pointer)->child_blkno)
192 
193 #define PostingItemSetBlockNumber(pointer, blockNumber) \
194 	BlockIdSet(&((pointer)->child_blkno), (blockNumber))
195 
196 /*
197  * Category codes to distinguish placeholder nulls from ordinary NULL keys.
198  * Note that the datatype size and the first two code values are chosen to be
199  * compatible with the usual usage of bool isNull flags.
200  *
201  * GIN_CAT_EMPTY_QUERY is never stored in the index; and notice that it is
202  * chosen to sort before not after regular key values.
203  */
204 typedef signed char GinNullCategory;
205 
206 #define GIN_CAT_NORM_KEY		0		/* normal, non-null key value */
207 #define GIN_CAT_NULL_KEY		1		/* null key value */
208 #define GIN_CAT_EMPTY_ITEM		2		/* placeholder for zero-key item */
209 #define GIN_CAT_NULL_ITEM		3		/* placeholder for null item */
210 #define GIN_CAT_EMPTY_QUERY		(-1)	/* placeholder for full-scan query */
211 
212 /*
213  * Access macros for null category byte in entry tuples
214  */
215 #define GinCategoryOffset(itup,ginstate) \
216 	(IndexInfoFindDataOffset((itup)->t_info) + \
217 	 ((ginstate)->oneCol ? 0 : sizeof(int16)))
218 #define GinGetNullCategory(itup,ginstate) \
219 	(*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate))))
220 #define GinSetNullCategory(itup,ginstate,c) \
221 	(*((GinNullCategory *) ((char*)(itup) + GinCategoryOffset(itup,ginstate))) = (c))
222 
223 /*
224  * Access macros for leaf-page entry tuples (see discussion in README)
225  */
226 #define GinGetNPosting(itup)	GinItemPointerGetOffsetNumber(&(itup)->t_tid)
227 #define GinSetNPosting(itup,n)	ItemPointerSetOffsetNumber(&(itup)->t_tid,n)
228 #define GIN_TREE_POSTING		((OffsetNumber)0xffff)
229 #define GinIsPostingTree(itup)	(GinGetNPosting(itup) == GIN_TREE_POSTING)
230 #define GinSetPostingTree(itup, blkno)	( GinSetNPosting((itup),GIN_TREE_POSTING), ItemPointerSetBlockNumber(&(itup)->t_tid, blkno) )
231 #define GinGetPostingTree(itup) GinItemPointerGetBlockNumber(&(itup)->t_tid)
232 
233 #define GIN_ITUP_COMPRESSED		(1U << 31)
234 #define GinGetPostingOffset(itup)	(GinItemPointerGetBlockNumber(&(itup)->t_tid) & (~GIN_ITUP_COMPRESSED))
235 #define GinSetPostingOffset(itup,n) ItemPointerSetBlockNumber(&(itup)->t_tid,(n)|GIN_ITUP_COMPRESSED)
236 #define GinGetPosting(itup)			((Pointer) ((char*)(itup) + GinGetPostingOffset(itup)))
237 #define GinItupIsCompressed(itup)	((GinItemPointerGetBlockNumber(&(itup)->t_tid) & GIN_ITUP_COMPRESSED) != 0)
238 
239 /*
240  * Maximum size of an item on entry tree page. Make sure that we fit at least
241  * three items on each page. (On regular B-tree indexes, we must fit at least
242  * three items: two data items and the "high key". In GIN entry tree, we don't
243  * currently store the high key explicitly, we just use the rightmost item on
244  * the page, so it would actually be enough to fit two items.)
245  */
246 #define GinMaxItemSize \
247 	Min(INDEX_SIZE_MASK, \
248 		MAXALIGN_DOWN(((BLCKSZ - \
249 						MAXALIGN(SizeOfPageHeaderData + 3 * sizeof(ItemIdData)) - \
250 						MAXALIGN(sizeof(GinPageOpaqueData))) / 3)))
251 
252 /*
253  * Access macros for non-leaf entry tuples
254  */
255 #define GinGetDownlink(itup)	GinItemPointerGetBlockNumber(&(itup)->t_tid)
256 #define GinSetDownlink(itup,blkno)	ItemPointerSet(&(itup)->t_tid, blkno, InvalidOffsetNumber)
257 
258 
259 /*
260  * Data (posting tree) pages
261  *
262  * Posting tree pages don't store regular tuples. Non-leaf pages contain
263  * PostingItems, which are pairs of ItemPointers and child block numbers.
264  * Leaf pages contain GinPostingLists and an uncompressed array of item
265  * pointers.
266  *
267  * In a leaf page, the compressed posting lists are stored after the regular
268  * page header, one after each other. Although we don't store regular tuples,
269  * pd_lower is used to indicate the end of the posting lists. After that, free
270  * space follows.  This layout is compatible with the "standard" heap and
271  * index page layout described in bufpage.h, so that we can e.g set buffer_std
272  * when writing WAL records.
273  *
274  * In the special space is the GinPageOpaque struct.
275  */
276 #define GinDataLeafPageGetPostingList(page) \
277 	(GinPostingList *) ((PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData))))
278 #define GinDataLeafPageGetPostingListSize(page) \
279 	(((PageHeader) page)->pd_lower - MAXALIGN(SizeOfPageHeaderData) - MAXALIGN(sizeof(ItemPointerData)))
280 
281 #define GinDataLeafPageIsEmpty(page) \
282 	(GinPageIsCompressed(page) ? (GinDataLeafPageGetPostingListSize(page) == 0) : (GinPageGetOpaque(page)->maxoff < FirstOffsetNumber))
283 
284 #define GinDataLeafPageGetFreeSpace(page) PageGetExactFreeSpace(page)
285 
286 #define GinDataPageGetRightBound(page)	((ItemPointer) PageGetContents(page))
287 /*
288  * Pointer to the data portion of a posting tree page. For internal pages,
289  * that's the beginning of the array of PostingItems. For compressed leaf
290  * pages, the first compressed posting list. For uncompressed (pre-9.4) leaf
291  * pages, it's the beginning of the ItemPointer array.
292  */
293 #define GinDataPageGetData(page)	\
294 	(PageGetContents(page) + MAXALIGN(sizeof(ItemPointerData)))
295 /* non-leaf pages contain PostingItems */
296 #define GinDataPageGetPostingItem(page, i)	\
297 	((PostingItem *) (GinDataPageGetData(page) + ((i)-1) * sizeof(PostingItem)))
298 
299 /*
300  * Note: there is no GinDataPageGetDataSize macro, because before version
301  * 9.4, we didn't set pd_lower on data pages. There can be pages in the index
302  * that were binary-upgraded from earlier versions and still have an invalid
303  * pd_lower, so we cannot trust it in general. Compressed posting tree leaf
304  * pages are new in 9.4, however, so we can trust them; see
305  * GinDataLeafPageGetPostingListSize.
306  */
307 #define GinDataPageSetDataSize(page, size) \
308 	{ \
309 		Assert(size <= GinDataPageMaxDataSize); \
310 		((PageHeader) page)->pd_lower = (size) + MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(ItemPointerData)); \
311 	}
312 
313 #define GinNonLeafDataPageGetFreeSpace(page)	\
314 	(GinDataPageMaxDataSize - \
315 	 GinPageGetOpaque(page)->maxoff * sizeof(PostingItem))
316 
317 #define GinDataPageMaxDataSize	\
318 	(BLCKSZ - MAXALIGN(SizeOfPageHeaderData) \
319 	 - MAXALIGN(sizeof(ItemPointerData)) \
320 	 - MAXALIGN(sizeof(GinPageOpaqueData)))
321 
322 /*
323  * List pages
324  */
325 #define GinListPageSize  \
326 	( BLCKSZ - SizeOfPageHeaderData - MAXALIGN(sizeof(GinPageOpaqueData)) )
327 
328 /*
329  * Storage type for GIN's reloptions
330  */
331 typedef struct GinOptions
332 {
333 	int32		vl_len_;		/* varlena header (do not touch directly!) */
334 	bool		useFastUpdate;	/* use fast updates? */
335 	int			pendingListCleanupSize; /* maximum size of pending list */
336 } GinOptions;
337 
338 #define GIN_DEFAULT_USE_FASTUPDATE	true
339 #define GinGetUseFastUpdate(relation) \
340 	((relation)->rd_options ? \
341 	 ((GinOptions *) (relation)->rd_options)->useFastUpdate : GIN_DEFAULT_USE_FASTUPDATE)
342 #define GinGetPendingListCleanupSize(relation) \
343 	((relation)->rd_options && \
344 	 ((GinOptions *) (relation)->rd_options)->pendingListCleanupSize != -1 ? \
345 	 ((GinOptions *) (relation)->rd_options)->pendingListCleanupSize : \
346 	 gin_pending_list_limit)
347 
348 
349 /* Macros for buffer lock/unlock operations */
350 #define GIN_UNLOCK	BUFFER_LOCK_UNLOCK
351 #define GIN_SHARE	BUFFER_LOCK_SHARE
352 #define GIN_EXCLUSIVE  BUFFER_LOCK_EXCLUSIVE
353 
354 
355 /*
356  * GinState: working data structure describing the index being worked on
357  */
358 typedef struct GinState
359 {
360 	Relation	index;
361 	bool		oneCol;			/* true if single-column index */
362 
363 	/*
364 	 * origTupDesc is the nominal tuple descriptor of the index, ie, the i'th
365 	 * attribute shows the key type (not the input data type!) of the i'th
366 	 * index column.  In a single-column index this describes the actual leaf
367 	 * index tuples.  In a multi-column index, the actual leaf tuples contain
368 	 * a smallint column number followed by a key datum of the appropriate
369 	 * type for that column.  We set up tupdesc[i] to describe the actual
370 	 * rowtype of the index tuples for the i'th column, ie, (int2, keytype).
371 	 * Note that in any case, leaf tuples contain more data than is known to
372 	 * the TupleDesc; see access/gin/README for details.
373 	 */
374 	TupleDesc	origTupdesc;
375 	TupleDesc	tupdesc[INDEX_MAX_KEYS];
376 
377 	/*
378 	 * Per-index-column opclass support functions
379 	 */
380 	FmgrInfo	compareFn[INDEX_MAX_KEYS];
381 	FmgrInfo	extractValueFn[INDEX_MAX_KEYS];
382 	FmgrInfo	extractQueryFn[INDEX_MAX_KEYS];
383 	FmgrInfo	consistentFn[INDEX_MAX_KEYS];
384 	FmgrInfo	triConsistentFn[INDEX_MAX_KEYS];
385 	FmgrInfo	comparePartialFn[INDEX_MAX_KEYS];		/* optional method */
386 	/* canPartialMatch[i] is true if comparePartialFn[i] is valid */
387 	bool		canPartialMatch[INDEX_MAX_KEYS];
388 	/* Collations to pass to the support functions */
389 	Oid			supportCollation[INDEX_MAX_KEYS];
390 } GinState;
391 
392 
393 /*
394  * A compressed posting list.
395  *
396  * Note: This requires 2-byte alignment.
397  */
398 typedef struct
399 {
400 	ItemPointerData first;		/* first item in this posting list (unpacked) */
401 	uint16		nbytes;			/* number of bytes that follow */
402 	unsigned char bytes[FLEXIBLE_ARRAY_MEMBER]; /* varbyte encoded items */
403 } GinPostingList;
404 
405 #define SizeOfGinPostingList(plist) (offsetof(GinPostingList, bytes) + SHORTALIGN((plist)->nbytes) )
406 #define GinNextPostingListSegment(cur) ((GinPostingList *) (((char *) (cur)) + SizeOfGinPostingList((cur))))
407 
408 
409 /* XLog stuff */
410 
411 #define XLOG_GIN_CREATE_INDEX  0x00
412 
413 #define XLOG_GIN_CREATE_PTREE  0x10
414 
415 typedef struct ginxlogCreatePostingTree
416 {
417 	uint32		size;
418 	/* A compressed posting list follows */
419 } ginxlogCreatePostingTree;
420 
421 /*
422  * The format of the insertion record varies depending on the page type.
423  * ginxlogInsert is the common part between all variants.
424  *
425  * Backup Blk 0: target page
426  * Backup Blk 1: left child, if this insertion finishes an incomplete split
427  */
428 
429 #define XLOG_GIN_INSERT  0x20
430 
431 typedef struct
432 {
433 	uint16		flags;			/* GIN_INSERT_ISLEAF and/or GIN_INSERT_ISDATA */
434 
435 	/*
436 	 * FOLLOWS:
437 	 *
438 	 * 1. if not leaf page, block numbers of the left and right child pages
439 	 * whose split this insertion finishes, as BlockIdData[2] (beware of
440 	 * adding fields in this struct that would make them not 16-bit aligned)
441 	 *
442 	 * 2. a ginxlogInsertEntry or ginxlogRecompressDataLeaf struct, depending
443 	 * on tree type.
444 	 *
445 	 * NB: the below structs are only 16-bit aligned when appended to a
446 	 * ginxlogInsert struct! Beware of adding fields to them that require
447 	 * stricter alignment.
448 	 */
449 } ginxlogInsert;
450 
451 typedef struct
452 {
453 	OffsetNumber offset;
454 	bool		isDelete;
455 	IndexTupleData tuple;		/* variable length */
456 } ginxlogInsertEntry;
457 
458 
459 typedef struct
460 {
461 	uint16		nactions;
462 
463 	/* Variable number of 'actions' follow */
464 } ginxlogRecompressDataLeaf;
465 
466 /*
467  * Note: this struct is currently not used in code, and only acts as
468  * documentation. The WAL record format is as specified here, but the code
469  * uses straight access through a Pointer and memcpy to read/write these.
470  */
471 typedef struct
472 {
473 	uint8		segno;			/* segment this action applies to */
474 	char		type;			/* action type (see below) */
475 
476 	/*
477 	 * Action-specific data follows. For INSERT and REPLACE actions that is a
478 	 * GinPostingList struct. For ADDITEMS, a uint16 for the number of items
479 	 * added, followed by the items themselves as ItemPointers. DELETE actions
480 	 * have no further data.
481 	 */
482 }	ginxlogSegmentAction;
483 
484 /* Action types */
485 #define GIN_SEGMENT_UNMODIFIED	0		/* no action (not used in WAL records) */
486 #define GIN_SEGMENT_DELETE		1		/* a whole segment is removed */
487 #define GIN_SEGMENT_INSERT		2		/* a whole segment is added */
488 #define GIN_SEGMENT_REPLACE		3		/* a segment is replaced */
489 #define GIN_SEGMENT_ADDITEMS	4		/* items are added to existing segment */
490 
491 typedef struct
492 {
493 	OffsetNumber offset;
494 	PostingItem newitem;
495 } ginxlogInsertDataInternal;
496 
497 /*
498  * Backup Blk 0: new left page (= original page, if not root split)
499  * Backup Blk 1: new right page
500  * Backup Blk 2: original page / new root page, if root split
501  * Backup Blk 3: left child, if this insertion completes an earlier split
502  */
503 #define XLOG_GIN_SPLIT	0x30
504 
505 typedef struct ginxlogSplit
506 {
507 	RelFileNode node;
508 	BlockNumber rrlink;			/* right link, or root's blocknumber if root
509 								 * split */
510 	BlockNumber leftChildBlkno; /* valid on a non-leaf split */
511 	BlockNumber rightChildBlkno;
512 	uint16		flags;			/* see below */
513 } ginxlogSplit;
514 
515 /*
516  * Flags used in ginxlogInsert and ginxlogSplit records
517  */
518 #define GIN_INSERT_ISDATA	0x01	/* for both insert and split records */
519 #define GIN_INSERT_ISLEAF	0x02	/* ditto */
520 #define GIN_SPLIT_ROOT		0x04	/* only for split records */
521 
522 /*
523  * Vacuum simply WAL-logs the whole page, when anything is modified. This
524  * is functionally identical to heap_newpage records, but is kept separate for
525  * debugging purposes. (When inspecting the WAL stream, it's easier to see
526  * what's going on when GIN vacuum records are marked as such, not as heap
527  * records.) This is currently only used for entry tree leaf pages.
528  */
529 #define XLOG_GIN_VACUUM_PAGE	0x40
530 
531 /*
532  * Vacuuming posting tree leaf page is WAL-logged like recompression caused
533  * by insertion.
534  */
535 #define XLOG_GIN_VACUUM_DATA_LEAF_PAGE	0x90
536 
537 typedef struct ginxlogVacuumDataLeafPage
538 {
539 	ginxlogRecompressDataLeaf data;
540 } ginxlogVacuumDataLeafPage;
541 
542 /*
543  * Backup Blk 0: deleted page
544  * Backup Blk 1: parent
545  * Backup Blk 2: left sibling
546  */
547 #define XLOG_GIN_DELETE_PAGE	0x50
548 
549 typedef struct ginxlogDeletePage
550 {
551 	OffsetNumber parentOffset;
552 	BlockNumber rightLink;
553 	TransactionId deleteXid;	/* last Xid which could see this page in scan */
554 } ginxlogDeletePage;
555 
556 /*
557  * Previous version of ginxlogDeletePage struct, which didn't have deleteXid
558  * field.  Used for size comparison (see ginRedoDeletePage()).
559  */
560 typedef struct ginxlogDeletePageOld
561 {
562 	OffsetNumber parentOffset;
563 	BlockNumber rightLink;
564 } ginxlogDeletePageOld;
565 
566 #define XLOG_GIN_UPDATE_META_PAGE 0x60
567 
568 /*
569  * Backup Blk 0: metapage
570  * Backup Blk 1: tail page
571  */
572 typedef struct ginxlogUpdateMeta
573 {
574 	RelFileNode node;
575 	GinMetaPageData metadata;
576 	BlockNumber prevTail;
577 	BlockNumber newRightlink;
578 	int32		ntuples;		/* if ntuples > 0 then metadata.tail was
579 								 * updated with that many tuples; else new sub
580 								 * list was inserted */
581 	/* array of inserted tuples follows */
582 } ginxlogUpdateMeta;
583 
584 #define XLOG_GIN_INSERT_LISTPAGE  0x70
585 
586 typedef struct ginxlogInsertListPage
587 {
588 	BlockNumber rightlink;
589 	int32		ntuples;
590 	/* array of inserted tuples follows */
591 } ginxlogInsertListPage;
592 
593 /*
594  * Backup Blk 0: metapage
595  * Backup Blk 1 to (ndeleted + 1): deleted pages
596  */
597 
598 #define XLOG_GIN_DELETE_LISTPAGE  0x80
599 
600 /*
601  * The WAL record for deleting list pages must contain a block reference to
602  * all the deleted pages, so the number of pages that can be deleted in one
603  * record is limited by XLR_MAX_BLOCK_ID. (block_id 0 is used for the
604  * metapage.)
605  */
606 #define GIN_NDELETE_AT_ONCE Min(16, XLR_MAX_BLOCK_ID - 1)
607 typedef struct ginxlogDeleteListPages
608 {
609 	GinMetaPageData metadata;
610 	int32		ndeleted;
611 } ginxlogDeleteListPages;
612 
613 
614 /* ginutil.c */
615 extern Datum ginhandler(PG_FUNCTION_ARGS);
616 extern bytea *ginoptions(Datum reloptions, bool validate);
617 extern void initGinState(GinState *state, Relation index);
618 extern Buffer GinNewBuffer(Relation index);
619 extern void GinInitBuffer(Buffer b, uint32 f);
620 extern void GinInitPage(Page page, uint32 f, Size pageSize);
621 extern void GinInitMetabuffer(Buffer b);
622 extern int ginCompareEntries(GinState *ginstate, OffsetNumber attnum,
623 				  Datum a, GinNullCategory categorya,
624 				  Datum b, GinNullCategory categoryb);
625 extern int ginCompareAttEntries(GinState *ginstate,
626 					 OffsetNumber attnuma, Datum a, GinNullCategory categorya,
627 				   OffsetNumber attnumb, Datum b, GinNullCategory categoryb);
628 extern Datum *ginExtractEntries(GinState *ginstate, OffsetNumber attnum,
629 				  Datum value, bool isNull,
630 				  int32 *nentries, GinNullCategory **categories);
631 
632 extern OffsetNumber gintuple_get_attrnum(GinState *ginstate, IndexTuple tuple);
633 extern Datum gintuple_get_key(GinState *ginstate, IndexTuple tuple,
634 				 GinNullCategory *category);
635 
636 /* gininsert.c */
637 extern IndexBuildResult *ginbuild(Relation heap, Relation index,
638 		 struct IndexInfo *indexInfo);
639 extern void ginbuildempty(Relation index);
640 extern bool gininsert(Relation index, Datum *values, bool *isnull,
641 		  ItemPointer ht_ctid, Relation heapRel,
642 		  IndexUniqueCheck checkUnique);
643 extern void ginEntryInsert(GinState *ginstate,
644 			   OffsetNumber attnum, Datum key, GinNullCategory category,
645 			   ItemPointerData *items, uint32 nitem,
646 			   GinStatsData *buildStats);
647 
648 /* ginbtree.c */
649 
650 typedef struct GinBtreeStack
651 {
652 	BlockNumber blkno;
653 	Buffer		buffer;
654 	OffsetNumber off;
655 	ItemPointerData iptr;
656 	/* predictNumber contains predicted number of pages on current level */
657 	uint32		predictNumber;
658 	struct GinBtreeStack *parent;
659 } GinBtreeStack;
660 
661 typedef struct GinBtreeData *GinBtree;
662 
663 /* Return codes for GinBtreeData.beginPlaceToPage method */
664 typedef enum
665 {
666 	GPTP_NO_WORK,
667 	GPTP_INSERT,
668 	GPTP_SPLIT
669 } GinPlaceToPageRC;
670 
671 typedef struct GinBtreeData
672 {
673 	/* search methods */
674 	BlockNumber (*findChildPage) (GinBtree, GinBtreeStack *);
675 	BlockNumber (*getLeftMostChild) (GinBtree, Page);
676 	bool		(*isMoveRight) (GinBtree, Page);
677 	bool		(*findItem) (GinBtree, GinBtreeStack *);
678 
679 	/* insert methods */
680 	OffsetNumber (*findChildPtr) (GinBtree, Page, BlockNumber, OffsetNumber);
681 	GinPlaceToPageRC (*beginPlaceToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, void **, Page *, Page *);
682 	void		(*execPlaceToPage) (GinBtree, Buffer, GinBtreeStack *, void *, BlockNumber, void *);
683 	void	   *(*prepareDownlink) (GinBtree, Buffer);
684 	void		(*fillRoot) (GinBtree, Page, BlockNumber, Page, BlockNumber, Page);
685 
686 	bool		isData;
687 
688 	Relation	index;
689 	BlockNumber rootBlkno;
690 	GinState   *ginstate;		/* not valid in a data scan */
691 	bool		fullScan;
692 	bool		isBuild;
693 
694 	/* Search key for Entry tree */
695 	OffsetNumber entryAttnum;
696 	Datum		entryKey;
697 	GinNullCategory entryCategory;
698 
699 	/* Search key for data tree (posting tree) */
700 	ItemPointerData itemptr;
701 } GinBtreeData;
702 
703 /* This represents a tuple to be inserted to entry tree. */
704 typedef struct
705 {
706 	IndexTuple	entry;			/* tuple to insert */
707 	bool		isDelete;		/* delete old tuple at same offset? */
708 } GinBtreeEntryInsertData;
709 
710 /*
711  * This represents an itempointer, or many itempointers, to be inserted to
712  * a data (posting tree) leaf page
713  */
714 typedef struct
715 {
716 	ItemPointerData *items;
717 	uint32		nitem;
718 	uint32		curitem;
719 } GinBtreeDataLeafInsertData;
720 
721 /*
722  * For internal data (posting tree) pages, the insertion payload is a
723  * PostingItem
724  */
725 
726 extern GinBtreeStack *ginFindLeafPage(GinBtree btree, bool searchMode, Snapshot snapshot);
727 extern Buffer ginStepRight(Buffer buffer, Relation index, int lockmode);
728 extern void freeGinBtreeStack(GinBtreeStack *stack);
729 extern void ginInsertValue(GinBtree btree, GinBtreeStack *stack,
730 			   void *insertdata, GinStatsData *buildStats);
731 
732 /* ginentrypage.c */
733 extern IndexTuple GinFormTuple(GinState *ginstate,
734 			 OffsetNumber attnum, Datum key, GinNullCategory category,
735 			 Pointer data, Size dataSize, int nipd, bool errorTooBig);
736 extern void ginPrepareEntryScan(GinBtree btree, OffsetNumber attnum,
737 					Datum key, GinNullCategory category,
738 					GinState *ginstate);
739 extern void ginEntryFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
740 extern ItemPointer ginReadTuple(GinState *ginstate, OffsetNumber attnum,
741 			 IndexTuple itup, int *nitems);
742 
743 /* gindatapage.c */
744 extern ItemPointer GinDataLeafPageGetItems(Page page, int *nitems, ItemPointerData advancePast);
745 extern int	GinDataLeafPageGetItemsToTbm(Page page, TIDBitmap *tbm);
746 extern BlockNumber createPostingTree(Relation index,
747 				  ItemPointerData *items, uint32 nitems,
748 				  GinStatsData *buildStats);
749 extern void GinDataPageAddPostingItem(Page page, PostingItem *data, OffsetNumber offset);
750 extern void GinPageDeletePostingItem(Page page, OffsetNumber offset);
751 extern void ginInsertItemPointers(Relation index, BlockNumber rootBlkno,
752 					  ItemPointerData *items, uint32 nitem,
753 					  GinStatsData *buildStats);
754 extern GinBtreeStack *ginScanBeginPostingTree(GinBtree btree, Relation index, BlockNumber rootBlkno, Snapshot snapshot);
755 extern void ginDataFillRoot(GinBtree btree, Page root, BlockNumber lblkno, Page lpage, BlockNumber rblkno, Page rpage);
756 extern void ginPrepareDataScan(GinBtree btree, Relation index, BlockNumber rootBlkno);
757 
758 /*
759  * This is declared in ginvacuum.c, but is passed between ginVacuumItemPointers
760  * and ginVacuumPostingTreeLeaf and as an opaque struct, so we need a forward
761  * declaration for it.
762  */
763 typedef struct GinVacuumState GinVacuumState;
764 
765 extern void ginVacuumPostingTreeLeaf(Relation rel, Buffer buf, GinVacuumState *gvs);
766 
767 /* ginscan.c */
768 
769 /*
770  * GinScanKeyData describes a single GIN index qualifier expression.
771  *
772  * From each qual expression, we extract one or more specific index search
773  * conditions, which are represented by GinScanEntryData.  It's quite
774  * possible for identical search conditions to be requested by more than
775  * one qual expression, in which case we merge such conditions to have just
776  * one unique GinScanEntry --- this is particularly important for efficiency
777  * when dealing with full-index-scan entries.  So there can be multiple
778  * GinScanKeyData.scanEntry pointers to the same GinScanEntryData.
779  *
780  * In each GinScanKeyData, nentries is the true number of entries, while
781  * nuserentries is the number that extractQueryFn returned (which is what
782  * we report to consistentFn).  The "user" entries must come first.
783  */
784 typedef struct GinScanKeyData *GinScanKey;
785 
786 typedef struct GinScanEntryData *GinScanEntry;
787 
788 typedef struct GinScanKeyData
789 {
790 	/* Real number of entries in scanEntry[] (always > 0) */
791 	uint32		nentries;
792 	/* Number of entries that extractQueryFn and consistentFn know about */
793 	uint32		nuserentries;
794 
795 	/* array of GinScanEntry pointers, one per extracted search condition */
796 	GinScanEntry *scanEntry;
797 
798 	/*
799 	 * At least one of the entries in requiredEntries must be present for a
800 	 * tuple to match the overall qual.
801 	 *
802 	 * additionalEntries contains entries that are needed by the consistent
803 	 * function to decide if an item matches, but are not sufficient to
804 	 * satisfy the qual without entries from requiredEntries.
805 	 */
806 	GinScanEntry *requiredEntries;
807 	int			nrequired;
808 	GinScanEntry *additionalEntries;
809 	int			nadditional;
810 
811 	/* array of check flags, reported to consistentFn */
812 	bool	   *entryRes;
813 	bool		(*boolConsistentFn) (GinScanKey key);
814 	GinTernaryValue (*triConsistentFn) (GinScanKey key);
815 	FmgrInfo   *consistentFmgrInfo;
816 	FmgrInfo   *triConsistentFmgrInfo;
817 	Oid			collation;
818 
819 	/* other data needed for calling consistentFn */
820 	Datum		query;
821 	/* NB: these three arrays have only nuserentries elements! */
822 	Datum	   *queryValues;
823 	GinNullCategory *queryCategories;
824 	Pointer    *extra_data;
825 	StrategyNumber strategy;
826 	int32		searchMode;
827 	OffsetNumber attnum;
828 
829 	/*
830 	 * Match status data.  curItem is the TID most recently tested (could be a
831 	 * lossy-page pointer).  curItemMatches is TRUE if it passes the
832 	 * consistentFn test; if so, recheckCurItem is the recheck flag.
833 	 * isFinished means that all the input entry streams are finished, so this
834 	 * key cannot succeed for any later TIDs.
835 	 */
836 	ItemPointerData curItem;
837 	bool		curItemMatches;
838 	bool		recheckCurItem;
839 	bool		isFinished;
840 }	GinScanKeyData;
841 
842 typedef struct GinScanEntryData
843 {
844 	/* query key and other information from extractQueryFn */
845 	Datum		queryKey;
846 	GinNullCategory queryCategory;
847 	bool		isPartialMatch;
848 	Pointer		extra_data;
849 	StrategyNumber strategy;
850 	int32		searchMode;
851 	OffsetNumber attnum;
852 
853 	/* Current page in posting tree */
854 	Buffer		buffer;
855 
856 	/* current ItemPointer to heap */
857 	ItemPointerData curItem;
858 
859 	/* for a partial-match or full-scan query, we accumulate all TIDs here */
860 	TIDBitmap  *matchBitmap;
861 	TBMIterator *matchIterator;
862 	TBMIterateResult *matchResult;
863 
864 	/* used for Posting list and one page in Posting tree */
865 	ItemPointerData *list;
866 	int			nlist;
867 	OffsetNumber offset;
868 
869 	bool		isFinished;
870 	bool		reduceResult;
871 	uint32		predictNumberResult;
872 	GinBtreeData btree;
873 }	GinScanEntryData;
874 
875 typedef struct GinScanOpaqueData
876 {
877 	MemoryContext tempCtx;
878 	GinState	ginstate;
879 
880 	GinScanKey	keys;			/* one per scan qualifier expr */
881 	uint32		nkeys;
882 
883 	GinScanEntry *entries;		/* one per index search condition */
884 	uint32		totalentries;
885 	uint32		allocentries;	/* allocated length of entries[] */
886 
887 	MemoryContext keyCtx;		/* used to hold key and entry data */
888 
889 	bool		isVoidRes;		/* true if query is unsatisfiable */
890 } GinScanOpaqueData;
891 
892 typedef GinScanOpaqueData *GinScanOpaque;
893 
894 extern IndexScanDesc ginbeginscan(Relation rel, int nkeys, int norderbys);
895 extern void ginendscan(IndexScanDesc scan);
896 extern void ginrescan(IndexScanDesc scan, ScanKey key, int nscankeys,
897 		  ScanKey orderbys, int norderbys);
898 extern void ginNewScanKey(IndexScanDesc scan);
899 extern void ginFreeScanKeys(GinScanOpaque so);
900 
901 /* ginget.c */
902 extern int64 gingetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
903 
904 /* ginfast.c */
905 extern Datum gin_clean_pending_list(PG_FUNCTION_ARGS);
906 
907 /* ginlogic.c */
908 extern void ginInitConsistentFunction(GinState *ginstate, GinScanKey key);
909 
910 /* ginvacuum.c */
911 extern IndexBulkDeleteResult *ginbulkdelete(IndexVacuumInfo *info,
912 			  IndexBulkDeleteResult *stats,
913 			  IndexBulkDeleteCallback callback,
914 			  void *callback_state);
915 extern IndexBulkDeleteResult *ginvacuumcleanup(IndexVacuumInfo *info,
916 				 IndexBulkDeleteResult *stats);
917 extern ItemPointer ginVacuumItemPointers(GinVacuumState *gvs,
918 					  ItemPointerData *items, int nitem, int *nremaining);
919 
920 /* ginvalidate.c */
921 extern bool ginvalidate(Oid opclassoid);
922 
923 /* ginbulk.c */
924 typedef struct GinEntryAccumulator
925 {
926 	RBNode		rbnode;
927 	Datum		key;
928 	GinNullCategory category;
929 	OffsetNumber attnum;
930 	bool		shouldSort;
931 	ItemPointerData *list;
932 	uint32		maxcount;		/* allocated size of list[] */
933 	uint32		count;			/* current number of list[] entries */
934 } GinEntryAccumulator;
935 
936 typedef struct
937 {
938 	GinState   *ginstate;
939 	Size		allocatedMemory;
940 	GinEntryAccumulator *entryallocator;
941 	uint32		eas_used;
942 	RBTree	   *tree;
943 } BuildAccumulator;
944 
945 extern void ginInitBA(BuildAccumulator *accum);
946 extern void ginInsertBAEntries(BuildAccumulator *accum,
947 				   ItemPointer heapptr, OffsetNumber attnum,
948 				   Datum *entries, GinNullCategory *categories,
949 				   int32 nentries);
950 extern void ginBeginBAScan(BuildAccumulator *accum);
951 extern ItemPointerData *ginGetBAEntry(BuildAccumulator *accum,
952 			  OffsetNumber *attnum, Datum *key, GinNullCategory *category,
953 			  uint32 *n);
954 
955 /* ginfast.c */
956 
957 typedef struct GinTupleCollector
958 {
959 	IndexTuple *tuples;
960 	uint32		ntuples;
961 	uint32		lentuples;
962 	uint32		sumsize;
963 } GinTupleCollector;
964 
965 extern void ginHeapTupleFastInsert(GinState *ginstate,
966 					   GinTupleCollector *collector);
967 extern void ginHeapTupleFastCollect(GinState *ginstate,
968 						GinTupleCollector *collector,
969 						OffsetNumber attnum, Datum value, bool isNull,
970 						ItemPointer ht_ctid);
971 extern void ginInsertCleanup(GinState *ginstate, bool full_clean,
972 				 bool fill_fsm, bool forceCleanup, IndexBulkDeleteResult *stats);
973 
974 /* ginpostinglist.c */
975 
976 extern GinPostingList *ginCompressPostingList(const ItemPointer ptrs, int nptrs,
977 					   int maxsize, int *nwritten);
978 extern int	ginPostingListDecodeAllSegmentsToTbm(GinPostingList *ptr, int totalsize, TIDBitmap *tbm);
979 
980 extern ItemPointer ginPostingListDecodeAllSegments(GinPostingList *ptr, int len, int *ndecoded);
981 extern ItemPointer ginPostingListDecode(GinPostingList *ptr, int *ndecoded);
982 extern ItemPointer ginMergeItemPointers(ItemPointerData *a, uint32 na,
983 					 ItemPointerData *b, uint32 nb,
984 					 int *nmerged);
985 
986 /*
987  * Merging the results of several gin scans compares item pointers a lot,
988  * so we want this to be inlined.
989  */
990 static inline int
ginCompareItemPointers(ItemPointer a,ItemPointer b)991 ginCompareItemPointers(ItemPointer a, ItemPointer b)
992 {
993 	uint64		ia = (uint64) a->ip_blkid.bi_hi << 32 | (uint64) a->ip_blkid.bi_lo << 16 | a->ip_posid;
994 	uint64		ib = (uint64) b->ip_blkid.bi_hi << 32 | (uint64) b->ip_blkid.bi_lo << 16 | b->ip_posid;
995 
996 	if (ia == ib)
997 		return 0;
998 	else if (ia > ib)
999 		return 1;
1000 	else
1001 		return -1;
1002 }
1003 
1004 #endif   /* GIN_PRIVATE_H */
1005