1 /*-------------------------------------------------------------------------
2  *
3  * nbtree.h
4  *	  header file for postgres btree access method implementation.
5  *
6  *
7  * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/access/nbtree.h
11  *
12  *-------------------------------------------------------------------------
13  */
14 #ifndef NBTREE_H
15 #define NBTREE_H
16 
17 #include "access/amapi.h"
18 #include "access/itup.h"
19 #include "access/sdir.h"
20 #include "access/xlogreader.h"
21 #include "catalog/pg_index.h"
22 #include "lib/stringinfo.h"
23 #include "storage/bufmgr.h"
24 #include "storage/shm_toc.h"
25 
26 /* There's room for a 16-bit vacuum cycle ID in BTPageOpaqueData */
27 typedef uint16 BTCycleId;
28 
29 /*
30  *	BTPageOpaqueData -- At the end of every page, we store a pointer
31  *	to both siblings in the tree.  This is used to do forward/backward
32  *	index scans.  The next-page link is also critical for recovery when
33  *	a search has navigated to the wrong page due to concurrent page splits
34  *	or deletions; see src/backend/access/nbtree/README for more info.
35  *
36  *	In addition, we store the page's btree level (counting upwards from
37  *	zero at a leaf page) as well as some flag bits indicating the page type
38  *	and status.  If the page is deleted, we replace the level with the
39  *	next-transaction-ID value indicating when it is safe to reclaim the page.
40  *
41  *	We also store a "vacuum cycle ID".  When a page is split while VACUUM is
42  *	processing the index, a nonzero value associated with the VACUUM run is
43  *	stored into both halves of the split page.  (If VACUUM is not running,
44  *	both pages receive zero cycleids.)	This allows VACUUM to detect whether
45  *	a page was split since it started, with a small probability of false match
46  *	if the page was last split some exact multiple of MAX_BT_CYCLE_ID VACUUMs
47  *	ago.  Also, during a split, the BTP_SPLIT_END flag is cleared in the left
48  *	(original) page, and set in the right page, but only if the next page
49  *	to its right has a different cycleid.
50  *
51  *	NOTE: the BTP_LEAF flag bit is redundant since level==0 could be tested
52  *	instead.
53  */
54 
55 typedef struct BTPageOpaqueData
56 {
57 	BlockNumber btpo_prev;		/* left sibling, or P_NONE if leftmost */
58 	BlockNumber btpo_next;		/* right sibling, or P_NONE if rightmost */
59 	union
60 	{
61 		uint32		level;		/* tree level --- zero for leaf pages */
62 		TransactionId xact;		/* next transaction ID, if deleted */
63 	}			btpo;
64 	uint16		btpo_flags;		/* flag bits, see below */
65 	BTCycleId	btpo_cycleid;	/* vacuum cycle ID of latest split */
66 } BTPageOpaqueData;
67 
68 typedef BTPageOpaqueData *BTPageOpaque;
69 
70 /* Bits defined in btpo_flags */
71 #define BTP_LEAF		(1 << 0)	/* leaf page, i.e. not internal page */
72 #define BTP_ROOT		(1 << 1)	/* root page (has no parent) */
73 #define BTP_DELETED		(1 << 2)	/* page has been deleted from tree */
74 #define BTP_META		(1 << 3)	/* meta-page */
75 #define BTP_HALF_DEAD	(1 << 4)	/* empty, but still in tree */
76 #define BTP_SPLIT_END	(1 << 5)	/* rightmost page of split group */
77 #define BTP_HAS_GARBAGE (1 << 6)	/* page has LP_DEAD tuples */
78 #define BTP_INCOMPLETE_SPLIT (1 << 7)	/* right sibling's downlink is missing */
79 
80 /*
81  * The max allowed value of a cycle ID is a bit less than 64K.  This is
82  * for convenience of pg_filedump and similar utilities: we want to use
83  * the last 2 bytes of special space as an index type indicator, and
84  * restricting cycle ID lets btree use that space for vacuum cycle IDs
85  * while still allowing index type to be identified.
86  */
87 #define MAX_BT_CYCLE_ID		0xFF7F
88 
89 
90 /*
91  * The Meta page is always the first page in the btree index.
92  * Its primary purpose is to point to the location of the btree root page.
93  * We also point to the "fast" root, which is the current effective root;
94  * see README for discussion.
95  */
96 
97 typedef struct BTMetaPageData
98 {
99 	uint32		btm_magic;		/* should contain BTREE_MAGIC */
100 	uint32		btm_version;	/* nbtree version (always <= BTREE_VERSION) */
101 	BlockNumber btm_root;		/* current root location */
102 	uint32		btm_level;		/* tree level of the root page */
103 	BlockNumber btm_fastroot;	/* current "fast" root location */
104 	uint32		btm_fastlevel;	/* tree level of the "fast" root page */
105 	/* remaining fields only valid when btm_version is BTREE_VERSION */
106 	TransactionId btm_oldest_btpo_xact; /* oldest btpo_xact among all deleted
107 										 * pages */
108 	float8		btm_last_cleanup_num_heap_tuples;	/* number of heap tuples
109 													 * during last cleanup */
110 } BTMetaPageData;
111 
112 #define BTPageGetMeta(p) \
113 	((BTMetaPageData *) PageGetContents(p))
114 
115 #define BTREE_METAPAGE	0		/* first page is meta */
116 #define BTREE_MAGIC		0x053162	/* magic number of btree pages */
117 #define BTREE_VERSION	3		/* current version number */
118 #define BTREE_MIN_VERSION	2	/* minimal supported version number */
119 
120 /*
121  * Maximum size of a btree index entry, including its tuple header.
122  *
123  * We actually need to be able to fit three items on every page,
124  * so restrict any one item to 1/3 the per-page available space.
125  */
126 #define BTMaxItemSize(page) \
127 	MAXALIGN_DOWN((PageGetPageSize(page) - \
128 				   MAXALIGN(SizeOfPageHeaderData + 3*sizeof(ItemIdData)) - \
129 				   MAXALIGN(sizeof(BTPageOpaqueData))) / 3)
130 
131 /*
132  * The leaf-page fillfactor defaults to 90% but is user-adjustable.
133  * For pages above the leaf level, we use a fixed 70% fillfactor.
134  * The fillfactor is applied during index build and when splitting
135  * a rightmost page; when splitting non-rightmost pages we try to
136  * divide the data equally.
137  */
138 #define BTREE_MIN_FILLFACTOR		10
139 #define BTREE_DEFAULT_FILLFACTOR	90
140 #define BTREE_NONLEAF_FILLFACTOR	70
141 
142 /*
143  *	In general, the btree code tries to localize its knowledge about
144  *	page layout to a couple of routines.  However, we need a special
145  *	value to indicate "no page number" in those places where we expect
146  *	page numbers.  We can use zero for this because we never need to
147  *	make a pointer to the metadata page.
148  */
149 
150 #define P_NONE			0
151 
152 /*
153  * Macros to test whether a page is leftmost or rightmost on its tree level,
154  * as well as other state info kept in the opaque data.
155  */
156 #define P_LEFTMOST(opaque)		((opaque)->btpo_prev == P_NONE)
157 #define P_RIGHTMOST(opaque)		((opaque)->btpo_next == P_NONE)
158 #define P_ISLEAF(opaque)		(((opaque)->btpo_flags & BTP_LEAF) != 0)
159 #define P_ISROOT(opaque)		(((opaque)->btpo_flags & BTP_ROOT) != 0)
160 #define P_ISDELETED(opaque)		(((opaque)->btpo_flags & BTP_DELETED) != 0)
161 #define P_ISMETA(opaque)		(((opaque)->btpo_flags & BTP_META) != 0)
162 #define P_ISHALFDEAD(opaque)	(((opaque)->btpo_flags & BTP_HALF_DEAD) != 0)
163 #define P_IGNORE(opaque)		(((opaque)->btpo_flags & (BTP_DELETED|BTP_HALF_DEAD)) != 0)
164 #define P_HAS_GARBAGE(opaque)	(((opaque)->btpo_flags & BTP_HAS_GARBAGE) != 0)
165 #define P_INCOMPLETE_SPLIT(opaque)	(((opaque)->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0)
166 
167 /*
168  *	Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost
169  *	page.  The high key is not a data key, but gives info about what range of
170  *	keys is supposed to be on this page.  The high key on a page is required
171  *	to be greater than or equal to any data key that appears on the page.
172  *	If we find ourselves trying to insert a key > high key, we know we need
173  *	to move right (this should only happen if the page was split since we
174  *	examined the parent page).
175  *
176  *	Our insertion algorithm guarantees that we can use the initial least key
177  *	on our right sibling as the high key.  Once a page is created, its high
178  *	key changes only if the page is split.
179  *
180  *	On a non-rightmost page, the high key lives in item 1 and data items
181  *	start in item 2.  Rightmost pages have no high key, so we store data
182  *	items beginning in item 1.
183  */
184 
185 #define P_HIKEY				((OffsetNumber) 1)
186 #define P_FIRSTKEY			((OffsetNumber) 2)
187 #define P_FIRSTDATAKEY(opaque)	(P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY)
188 
189 /*
190  * INCLUDE B-Tree indexes have non-key attributes.  These are extra
191  * attributes that may be returned by index-only scans, but do not influence
192  * the order of items in the index (formally, non-key attributes are not
193  * considered to be part of the key space).  Non-key attributes are only
194  * present in leaf index tuples whose item pointers actually point to heap
195  * tuples.  All other types of index tuples (collectively, "pivot" tuples)
196  * only have key attributes, since pivot tuples only ever need to represent
197  * how the key space is separated.  In general, any B-Tree index that has
198  * more than one level (i.e. any index that does not just consist of a
199  * metapage and a single leaf root page) must have some number of pivot
200  * tuples, since pivot tuples are used for traversing the tree.
201  *
202  * We store the number of attributes present inside pivot tuples by abusing
203  * their item pointer offset field, since pivot tuples never need to store a
204  * real offset (downlinks only need to store a block number).  The offset
205  * field only stores the number of attributes when the INDEX_ALT_TID_MASK
206  * bit is set (we never assume that pivot tuples must explicitly store the
207  * number of attributes, and currently do not bother storing the number of
208  * attributes unless indnkeyatts actually differs from indnatts).
209  * INDEX_ALT_TID_MASK is only used for pivot tuples at present, though it's
210  * possible that it will be used within non-pivot tuples in the future.  Do
211  * not assume that a tuple with INDEX_ALT_TID_MASK set must be a pivot
212  * tuple.
213  *
214  * The 12 least significant offset bits are used to represent the number of
215  * attributes in INDEX_ALT_TID_MASK tuples, leaving 4 bits that are reserved
216  * for future use (BT_RESERVED_OFFSET_MASK bits). BT_N_KEYS_OFFSET_MASK should
217  * be large enough to store any number <= INDEX_MAX_KEYS.
218  */
219 #define INDEX_ALT_TID_MASK			INDEX_AM_RESERVED_BIT
220 #define BT_RESERVED_OFFSET_MASK		0xF000
221 #define BT_N_KEYS_OFFSET_MASK		0x0FFF
222 
223 /* Get/set downlink block number */
224 #define BTreeInnerTupleGetDownLink(itup) \
225 	ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid))
226 #define BTreeInnerTupleSetDownLink(itup, blkno) \
227 	ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno))
228 
229 /*
230  * Get/set leaf page highkey's link. During the second phase of deletion, the
231  * target leaf page's high key may point to an ancestor page (at all other
232  * times, the leaf level high key's link is not used).  See the nbtree README
233  * for full details.
234  */
235 #define BTreeTupleGetTopParent(itup) \
236 	ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid))
237 #define BTreeTupleSetTopParent(itup, blkno)	\
238 	do { \
239 		ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno)); \
240 		BTreeTupleSetNAtts((itup), 0); \
241 	} while(0)
242 
243 /*
244  * Get/set number of attributes within B-tree index tuple. Asserts should be
245  * removed when BT_RESERVED_OFFSET_MASK bits will be used.
246  */
247 #define BTreeTupleGetNAtts(itup, rel)	\
248 	( \
249 		(itup)->t_info & INDEX_ALT_TID_MASK ? \
250 		( \
251 			AssertMacro((ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_RESERVED_OFFSET_MASK) == 0), \
252 			ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_N_KEYS_OFFSET_MASK \
253 		) \
254 		: \
255 		IndexRelationGetNumberOfAttributes(rel) \
256 	)
257 #define BTreeTupleSetNAtts(itup, n) \
258 	do { \
259 		(itup)->t_info |= INDEX_ALT_TID_MASK; \
260 		Assert(((n) & BT_RESERVED_OFFSET_MASK) == 0); \
261 		ItemPointerSetOffsetNumber(&(itup)->t_tid, (n) & BT_N_KEYS_OFFSET_MASK); \
262 	} while(0)
263 
264 /*
265  *	Operator strategy numbers for B-tree have been moved to access/stratnum.h,
266  *	because many places need to use them in ScanKeyInit() calls.
267  *
268  *	The strategy numbers are chosen so that we can commute them by
269  *	subtraction, thus:
270  */
271 #define BTCommuteStrategyNumber(strat)	(BTMaxStrategyNumber + 1 - (strat))
272 
273 /*
274  *	When a new operator class is declared, we require that the user
275  *	supply us with an amproc procedure (BTORDER_PROC) for determining
276  *	whether, for two keys a and b, a < b, a = b, or a > b.  This routine
277  *	must return < 0, 0, > 0, respectively, in these three cases.
278  *
279  *	To facilitate accelerated sorting, an operator class may choose to
280  *	offer a second procedure (BTSORTSUPPORT_PROC).  For full details, see
281  *	src/include/utils/sortsupport.h.
282  *
283  *	To support window frames defined by "RANGE offset PRECEDING/FOLLOWING",
284  *	an operator class may choose to offer a third amproc procedure
285  *	(BTINRANGE_PROC), independently of whether it offers sortsupport.
286  *	For full details, see doc/src/sgml/btree.sgml.
287  */
288 
289 #define BTORDER_PROC		1
290 #define BTSORTSUPPORT_PROC	2
291 #define BTINRANGE_PROC		3
292 #define BTNProcs			3
293 
294 /*
295  *	We need to be able to tell the difference between read and write
296  *	requests for pages, in order to do locking correctly.
297  */
298 
299 #define BT_READ			BUFFER_LOCK_SHARE
300 #define BT_WRITE		BUFFER_LOCK_EXCLUSIVE
301 
302 /*
303  *	BTStackData -- As we descend a tree, we push the (location, downlink)
304  *	pairs from internal pages onto a private stack.  If we split a
305  *	leaf, we use this stack to walk back up the tree and insert data
306  *	into parent pages (and possibly to split them, too).  Lehman and
307  *	Yao's update algorithm guarantees that under no circumstances can
308  *	our private stack give us an irredeemably bad picture up the tree.
309  *	Again, see the paper for details.
310  */
311 
312 typedef struct BTStackData
313 {
314 	BlockNumber bts_blkno;
315 	OffsetNumber bts_offset;
316 	BlockNumber bts_btentry;
317 	struct BTStackData *bts_parent;
318 } BTStackData;
319 
320 typedef BTStackData *BTStack;
321 
322 /*
323  * BTScanOpaqueData is the btree-private state needed for an indexscan.
324  * This consists of preprocessed scan keys (see _bt_preprocess_keys() for
325  * details of the preprocessing), information about the current location
326  * of the scan, and information about the marked location, if any.  (We use
327  * BTScanPosData to represent the data needed for each of current and marked
328  * locations.)	In addition we can remember some known-killed index entries
329  * that must be marked before we can move off the current page.
330  *
331  * Index scans work a page at a time: we pin and read-lock the page, identify
332  * all the matching items on the page and save them in BTScanPosData, then
333  * release the read-lock while returning the items to the caller for
334  * processing.  This approach minimizes lock/unlock traffic.  Note that we
335  * keep the pin on the index page until the caller is done with all the items
336  * (this is needed for VACUUM synchronization, see nbtree/README).  When we
337  * are ready to step to the next page, if the caller has told us any of the
338  * items were killed, we re-lock the page to mark them killed, then unlock.
339  * Finally we drop the pin and step to the next page in the appropriate
340  * direction.
341  *
342  * If we are doing an index-only scan, we save the entire IndexTuple for each
343  * matched item, otherwise only its heap TID and offset.  The IndexTuples go
344  * into a separate workspace array; each BTScanPosItem stores its tuple's
345  * offset within that array.
346  */
347 
348 typedef struct BTScanPosItem	/* what we remember about each match */
349 {
350 	ItemPointerData heapTid;	/* TID of referenced heap item */
351 	OffsetNumber indexOffset;	/* index item's location within page */
352 	LocationIndex tupleOffset;	/* IndexTuple's offset in workspace, if any */
353 } BTScanPosItem;
354 
355 typedef struct BTScanPosData
356 {
357 	Buffer		buf;			/* if valid, the buffer is pinned */
358 
359 	XLogRecPtr	lsn;			/* pos in the WAL stream when page was read */
360 	BlockNumber currPage;		/* page referenced by items array */
361 	BlockNumber nextPage;		/* page's right link when we scanned it */
362 
363 	/*
364 	 * moreLeft and moreRight track whether we think there may be matching
365 	 * index entries to the left and right of the current page, respectively.
366 	 * We can clear the appropriate one of these flags when _bt_checkkeys()
367 	 * returns continuescan = false.
368 	 */
369 	bool		moreLeft;
370 	bool		moreRight;
371 
372 	/*
373 	 * If we are doing an index-only scan, nextTupleOffset is the first free
374 	 * location in the associated tuple storage workspace.
375 	 */
376 	int			nextTupleOffset;
377 
378 	/*
379 	 * The items array is always ordered in index order (ie, increasing
380 	 * indexoffset).  When scanning backwards it is convenient to fill the
381 	 * array back-to-front, so we start at the last slot and fill downwards.
382 	 * Hence we need both a first-valid-entry and a last-valid-entry counter.
383 	 * itemIndex is a cursor showing which entry was last returned to caller.
384 	 */
385 	int			firstItem;		/* first valid index in items[] */
386 	int			lastItem;		/* last valid index in items[] */
387 	int			itemIndex;		/* current index in items[] */
388 
389 	BTScanPosItem items[MaxIndexTuplesPerPage]; /* MUST BE LAST */
390 } BTScanPosData;
391 
392 typedef BTScanPosData *BTScanPos;
393 
394 #define BTScanPosIsPinned(scanpos) \
395 ( \
396 	AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
397 				!BufferIsValid((scanpos).buf)), \
398 	BufferIsValid((scanpos).buf) \
399 )
400 #define BTScanPosUnpin(scanpos) \
401 	do { \
402 		ReleaseBuffer((scanpos).buf); \
403 		(scanpos).buf = InvalidBuffer; \
404 	} while (0)
405 #define BTScanPosUnpinIfPinned(scanpos) \
406 	do { \
407 		if (BTScanPosIsPinned(scanpos)) \
408 			BTScanPosUnpin(scanpos); \
409 	} while (0)
410 
411 #define BTScanPosIsValid(scanpos) \
412 ( \
413 	AssertMacro(BlockNumberIsValid((scanpos).currPage) || \
414 				!BufferIsValid((scanpos).buf)), \
415 	BlockNumberIsValid((scanpos).currPage) \
416 )
417 #define BTScanPosInvalidate(scanpos) \
418 	do { \
419 		(scanpos).currPage = InvalidBlockNumber; \
420 		(scanpos).nextPage = InvalidBlockNumber; \
421 		(scanpos).buf = InvalidBuffer; \
422 		(scanpos).lsn = InvalidXLogRecPtr; \
423 		(scanpos).nextTupleOffset = 0; \
424 	} while (0)
425 
426 /* We need one of these for each equality-type SK_SEARCHARRAY scan key */
427 typedef struct BTArrayKeyInfo
428 {
429 	int			scan_key;		/* index of associated key in arrayKeyData */
430 	int			cur_elem;		/* index of current element in elem_values */
431 	int			mark_elem;		/* index of marked element in elem_values */
432 	int			num_elems;		/* number of elems in current array value */
433 	Datum	   *elem_values;	/* array of num_elems Datums */
434 } BTArrayKeyInfo;
435 
436 typedef struct BTScanOpaqueData
437 {
438 	/* these fields are set by _bt_preprocess_keys(): */
439 	bool		qual_ok;		/* false if qual can never be satisfied */
440 	int			numberOfKeys;	/* number of preprocessed scan keys */
441 	ScanKey		keyData;		/* array of preprocessed scan keys */
442 
443 	/* workspace for SK_SEARCHARRAY support */
444 	ScanKey		arrayKeyData;	/* modified copy of scan->keyData */
445 	int			numArrayKeys;	/* number of equality-type array keys (-1 if
446 								 * there are any unsatisfiable array keys) */
447 	int			arrayKeyCount;	/* count indicating number of array scan keys
448 								 * processed */
449 	BTArrayKeyInfo *arrayKeys;	/* info about each equality-type array key */
450 	MemoryContext arrayContext; /* scan-lifespan context for array data */
451 
452 	/* info about killed items if any (killedItems is NULL if never used) */
453 	int		   *killedItems;	/* currPos.items indexes of killed items */
454 	int			numKilled;		/* number of currently stored items */
455 
456 	/*
457 	 * If we are doing an index-only scan, these are the tuple storage
458 	 * workspaces for the currPos and markPos respectively.  Each is of size
459 	 * BLCKSZ, so it can hold as much as a full page's worth of tuples.
460 	 */
461 	char	   *currTuples;		/* tuple storage for currPos */
462 	char	   *markTuples;		/* tuple storage for markPos */
463 
464 	/*
465 	 * If the marked position is on the same page as current position, we
466 	 * don't use markPos, but just keep the marked itemIndex in markItemIndex
467 	 * (all the rest of currPos is valid for the mark position). Hence, to
468 	 * determine if there is a mark, first look at markItemIndex, then at
469 	 * markPos.
470 	 */
471 	int			markItemIndex;	/* itemIndex, or -1 if not valid */
472 
473 	/* keep these last in struct for efficiency */
474 	BTScanPosData currPos;		/* current position data */
475 	BTScanPosData markPos;		/* marked position, if any */
476 } BTScanOpaqueData;
477 
478 typedef BTScanOpaqueData *BTScanOpaque;
479 
480 /*
481  * We use some private sk_flags bits in preprocessed scan keys.  We're allowed
482  * to use bits 16-31 (see skey.h).  The uppermost bits are copied from the
483  * index's indoption[] array entry for the index attribute.
484  */
485 #define SK_BT_REQFWD	0x00010000	/* required to continue forward scan */
486 #define SK_BT_REQBKWD	0x00020000	/* required to continue backward scan */
487 #define SK_BT_INDOPTION_SHIFT  24	/* must clear the above bits */
488 #define SK_BT_DESC			(INDOPTION_DESC << SK_BT_INDOPTION_SHIFT)
489 #define SK_BT_NULLS_FIRST	(INDOPTION_NULLS_FIRST << SK_BT_INDOPTION_SHIFT)
490 
491 /*
492  * external entry points for btree, in nbtree.c
493  */
494 extern void btbuildempty(Relation index);
495 extern bool btinsert(Relation rel, Datum *values, bool *isnull,
496 		 ItemPointer ht_ctid, Relation heapRel,
497 		 IndexUniqueCheck checkUnique,
498 		 struct IndexInfo *indexInfo);
499 extern IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys);
500 extern Size btestimateparallelscan(void);
501 extern void btinitparallelscan(void *target);
502 extern bool btgettuple(IndexScanDesc scan, ScanDirection dir);
503 extern int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
504 extern void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
505 		 ScanKey orderbys, int norderbys);
506 extern void btparallelrescan(IndexScanDesc scan);
507 extern void btendscan(IndexScanDesc scan);
508 extern void btmarkpos(IndexScanDesc scan);
509 extern void btrestrpos(IndexScanDesc scan);
510 extern IndexBulkDeleteResult *btbulkdelete(IndexVacuumInfo *info,
511 			 IndexBulkDeleteResult *stats,
512 			 IndexBulkDeleteCallback callback,
513 			 void *callback_state);
514 extern IndexBulkDeleteResult *btvacuumcleanup(IndexVacuumInfo *info,
515 				IndexBulkDeleteResult *stats);
516 extern bool btcanreturn(Relation index, int attno);
517 
518 /*
519  * prototypes for internal functions in nbtree.c
520  */
521 extern bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno);
522 extern void _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page);
523 extern void _bt_parallel_done(IndexScanDesc scan);
524 extern void _bt_parallel_advance_array_keys(IndexScanDesc scan);
525 
526 /*
527  * prototypes for functions in nbtinsert.c
528  */
529 extern bool _bt_doinsert(Relation rel, IndexTuple itup,
530 			 IndexUniqueCheck checkUnique, Relation heapRel);
531 extern Buffer _bt_getstackbuf(Relation rel, BTStack stack, int access);
532 extern void _bt_finish_split(Relation rel, Buffer bbuf, BTStack stack);
533 
534 /*
535  * prototypes for functions in nbtpage.c
536  */
537 extern void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level);
538 extern void _bt_update_meta_cleanup_info(Relation rel,
539 							 TransactionId oldestBtpoXact, float8 numHeapTuples);
540 extern void _bt_upgrademetapage(Page page);
541 extern Buffer _bt_getroot(Relation rel, int access);
542 extern Buffer _bt_gettrueroot(Relation rel);
543 extern int	_bt_getrootheight(Relation rel);
544 extern void _bt_checkpage(Relation rel, Buffer buf);
545 extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access);
546 extern Buffer _bt_relandgetbuf(Relation rel, Buffer obuf,
547 				 BlockNumber blkno, int access);
548 extern void _bt_relbuf(Relation rel, Buffer buf);
549 extern void _bt_pageinit(Page page, Size size);
550 extern bool _bt_page_recyclable(Page page);
551 extern void _bt_delitems_delete(Relation rel, Buffer buf,
552 					OffsetNumber *itemnos, int nitems, Relation heapRel);
553 extern void _bt_delitems_vacuum(Relation rel, Buffer buf,
554 					OffsetNumber *itemnos, int nitems,
555 					BlockNumber lastBlockVacuumed);
556 extern uint32 _bt_pagedel(Relation rel, Buffer leafbuf,
557 						  TransactionId *oldestBtpoXact);
558 
559 /*
560  * prototypes for functions in nbtsearch.c
561  */
562 extern BTStack _bt_search(Relation rel,
563 		   int keysz, ScanKey scankey, bool nextkey,
564 		   Buffer *bufP, int access, Snapshot snapshot);
565 extern Buffer _bt_moveright(Relation rel, Buffer buf, int keysz,
566 			  ScanKey scankey, bool nextkey, bool forupdate, BTStack stack,
567 			  int access, Snapshot snapshot);
568 extern OffsetNumber _bt_binsrch(Relation rel, Buffer buf, int keysz,
569 			ScanKey scankey, bool nextkey);
570 extern int32 _bt_compare(Relation rel, int keysz, ScanKey scankey,
571 			Page page, OffsetNumber offnum);
572 extern bool _bt_first(IndexScanDesc scan, ScanDirection dir);
573 extern bool _bt_next(IndexScanDesc scan, ScanDirection dir);
574 extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost,
575 				 Snapshot snapshot);
576 
577 /*
578  * prototypes for functions in nbtutils.c
579  */
580 extern ScanKey _bt_mkscankey(Relation rel, IndexTuple itup);
581 extern ScanKey _bt_mkscankey_nodata(Relation rel);
582 extern void _bt_freeskey(ScanKey skey);
583 extern void _bt_freestack(BTStack stack);
584 extern void _bt_preprocess_array_keys(IndexScanDesc scan);
585 extern void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir);
586 extern bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir);
587 extern void _bt_mark_array_keys(IndexScanDesc scan);
588 extern void _bt_restore_array_keys(IndexScanDesc scan);
589 extern void _bt_preprocess_keys(IndexScanDesc scan);
590 extern IndexTuple _bt_checkkeys(IndexScanDesc scan,
591 			  Page page, OffsetNumber offnum,
592 			  ScanDirection dir, bool *continuescan);
593 extern void _bt_killitems(IndexScanDesc scan);
594 extern BTCycleId _bt_vacuum_cycleid(Relation rel);
595 extern BTCycleId _bt_start_vacuum(Relation rel);
596 extern void _bt_end_vacuum(Relation rel);
597 extern void _bt_end_vacuum_callback(int code, Datum arg);
598 extern Size BTreeShmemSize(void);
599 extern void BTreeShmemInit(void);
600 extern bytea *btoptions(Datum reloptions, bool validate);
601 extern bool btproperty(Oid index_oid, int attno,
602 		   IndexAMProperty prop, const char *propname,
603 		   bool *res, bool *isnull);
604 extern IndexTuple _bt_nonkey_truncate(Relation rel, IndexTuple itup);
605 extern bool _bt_check_natts(Relation rel, Page page, OffsetNumber offnum);
606 
607 /*
608  * prototypes for functions in nbtvalidate.c
609  */
610 extern bool btvalidate(Oid opclassoid);
611 
612 /*
613  * prototypes for functions in nbtsort.c
614  */
615 extern IndexBuildResult *btbuild(Relation heap, Relation index,
616 		struct IndexInfo *indexInfo);
617 extern void _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc);
618 
619 #endif							/* NBTREE_H */
620