1 /*------------------------------------------------------------------------- 2 * 3 * nbtree.h 4 * header file for postgres btree access method implementation. 5 * 6 * 7 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group 8 * Portions Copyright (c) 1994, Regents of the University of California 9 * 10 * src/include/access/nbtree.h 11 * 12 *------------------------------------------------------------------------- 13 */ 14 #ifndef NBTREE_H 15 #define NBTREE_H 16 17 #include "access/amapi.h" 18 #include "access/itup.h" 19 #include "access/sdir.h" 20 #include "access/xlogreader.h" 21 #include "catalog/pg_index.h" 22 #include "lib/stringinfo.h" 23 #include "storage/bufmgr.h" 24 #include "storage/shm_toc.h" 25 26 /* There's room for a 16-bit vacuum cycle ID in BTPageOpaqueData */ 27 typedef uint16 BTCycleId; 28 29 /* 30 * BTPageOpaqueData -- At the end of every page, we store a pointer 31 * to both siblings in the tree. This is used to do forward/backward 32 * index scans. The next-page link is also critical for recovery when 33 * a search has navigated to the wrong page due to concurrent page splits 34 * or deletions; see src/backend/access/nbtree/README for more info. 35 * 36 * In addition, we store the page's btree level (counting upwards from 37 * zero at a leaf page) as well as some flag bits indicating the page type 38 * and status. If the page is deleted, we replace the level with the 39 * next-transaction-ID value indicating when it is safe to reclaim the page. 40 * 41 * We also store a "vacuum cycle ID". When a page is split while VACUUM is 42 * processing the index, a nonzero value associated with the VACUUM run is 43 * stored into both halves of the split page. (If VACUUM is not running, 44 * both pages receive zero cycleids.) This allows VACUUM to detect whether 45 * a page was split since it started, with a small probability of false match 46 * if the page was last split some exact multiple of MAX_BT_CYCLE_ID VACUUMs 47 * ago. Also, during a split, the BTP_SPLIT_END flag is cleared in the left 48 * (original) page, and set in the right page, but only if the next page 49 * to its right has a different cycleid. 50 * 51 * NOTE: the BTP_LEAF flag bit is redundant since level==0 could be tested 52 * instead. 53 */ 54 55 typedef struct BTPageOpaqueData 56 { 57 BlockNumber btpo_prev; /* left sibling, or P_NONE if leftmost */ 58 BlockNumber btpo_next; /* right sibling, or P_NONE if rightmost */ 59 union 60 { 61 uint32 level; /* tree level --- zero for leaf pages */ 62 TransactionId xact; /* next transaction ID, if deleted */ 63 } btpo; 64 uint16 btpo_flags; /* flag bits, see below */ 65 BTCycleId btpo_cycleid; /* vacuum cycle ID of latest split */ 66 } BTPageOpaqueData; 67 68 typedef BTPageOpaqueData *BTPageOpaque; 69 70 /* Bits defined in btpo_flags */ 71 #define BTP_LEAF (1 << 0) /* leaf page, i.e. not internal page */ 72 #define BTP_ROOT (1 << 1) /* root page (has no parent) */ 73 #define BTP_DELETED (1 << 2) /* page has been deleted from tree */ 74 #define BTP_META (1 << 3) /* meta-page */ 75 #define BTP_HALF_DEAD (1 << 4) /* empty, but still in tree */ 76 #define BTP_SPLIT_END (1 << 5) /* rightmost page of split group */ 77 #define BTP_HAS_GARBAGE (1 << 6) /* page has LP_DEAD tuples */ 78 #define BTP_INCOMPLETE_SPLIT (1 << 7) /* right sibling's downlink is missing */ 79 80 /* 81 * The max allowed value of a cycle ID is a bit less than 64K. This is 82 * for convenience of pg_filedump and similar utilities: we want to use 83 * the last 2 bytes of special space as an index type indicator, and 84 * restricting cycle ID lets btree use that space for vacuum cycle IDs 85 * while still allowing index type to be identified. 86 */ 87 #define MAX_BT_CYCLE_ID 0xFF7F 88 89 90 /* 91 * The Meta page is always the first page in the btree index. 92 * Its primary purpose is to point to the location of the btree root page. 93 * We also point to the "fast" root, which is the current effective root; 94 * see README for discussion. 95 */ 96 97 typedef struct BTMetaPageData 98 { 99 uint32 btm_magic; /* should contain BTREE_MAGIC */ 100 uint32 btm_version; /* nbtree version (always <= BTREE_VERSION) */ 101 BlockNumber btm_root; /* current root location */ 102 uint32 btm_level; /* tree level of the root page */ 103 BlockNumber btm_fastroot; /* current "fast" root location */ 104 uint32 btm_fastlevel; /* tree level of the "fast" root page */ 105 /* remaining fields only valid when btm_version is BTREE_VERSION */ 106 TransactionId btm_oldest_btpo_xact; /* oldest btpo_xact among all deleted 107 * pages */ 108 float8 btm_last_cleanup_num_heap_tuples; /* number of heap tuples 109 * during last cleanup */ 110 } BTMetaPageData; 111 112 #define BTPageGetMeta(p) \ 113 ((BTMetaPageData *) PageGetContents(p)) 114 115 #define BTREE_METAPAGE 0 /* first page is meta */ 116 #define BTREE_MAGIC 0x053162 /* magic number of btree pages */ 117 #define BTREE_VERSION 3 /* current version number */ 118 #define BTREE_MIN_VERSION 2 /* minimal supported version number */ 119 120 /* 121 * Maximum size of a btree index entry, including its tuple header. 122 * 123 * We actually need to be able to fit three items on every page, 124 * so restrict any one item to 1/3 the per-page available space. 125 */ 126 #define BTMaxItemSize(page) \ 127 MAXALIGN_DOWN((PageGetPageSize(page) - \ 128 MAXALIGN(SizeOfPageHeaderData + 3*sizeof(ItemIdData)) - \ 129 MAXALIGN(sizeof(BTPageOpaqueData))) / 3) 130 131 /* 132 * The leaf-page fillfactor defaults to 90% but is user-adjustable. 133 * For pages above the leaf level, we use a fixed 70% fillfactor. 134 * The fillfactor is applied during index build and when splitting 135 * a rightmost page; when splitting non-rightmost pages we try to 136 * divide the data equally. 137 */ 138 #define BTREE_MIN_FILLFACTOR 10 139 #define BTREE_DEFAULT_FILLFACTOR 90 140 #define BTREE_NONLEAF_FILLFACTOR 70 141 142 /* 143 * In general, the btree code tries to localize its knowledge about 144 * page layout to a couple of routines. However, we need a special 145 * value to indicate "no page number" in those places where we expect 146 * page numbers. We can use zero for this because we never need to 147 * make a pointer to the metadata page. 148 */ 149 150 #define P_NONE 0 151 152 /* 153 * Macros to test whether a page is leftmost or rightmost on its tree level, 154 * as well as other state info kept in the opaque data. 155 */ 156 #define P_LEFTMOST(opaque) ((opaque)->btpo_prev == P_NONE) 157 #define P_RIGHTMOST(opaque) ((opaque)->btpo_next == P_NONE) 158 #define P_ISLEAF(opaque) (((opaque)->btpo_flags & BTP_LEAF) != 0) 159 #define P_ISROOT(opaque) (((opaque)->btpo_flags & BTP_ROOT) != 0) 160 #define P_ISDELETED(opaque) (((opaque)->btpo_flags & BTP_DELETED) != 0) 161 #define P_ISMETA(opaque) (((opaque)->btpo_flags & BTP_META) != 0) 162 #define P_ISHALFDEAD(opaque) (((opaque)->btpo_flags & BTP_HALF_DEAD) != 0) 163 #define P_IGNORE(opaque) (((opaque)->btpo_flags & (BTP_DELETED|BTP_HALF_DEAD)) != 0) 164 #define P_HAS_GARBAGE(opaque) (((opaque)->btpo_flags & BTP_HAS_GARBAGE) != 0) 165 #define P_INCOMPLETE_SPLIT(opaque) (((opaque)->btpo_flags & BTP_INCOMPLETE_SPLIT) != 0) 166 167 /* 168 * Lehman and Yao's algorithm requires a ``high key'' on every non-rightmost 169 * page. The high key is not a data key, but gives info about what range of 170 * keys is supposed to be on this page. The high key on a page is required 171 * to be greater than or equal to any data key that appears on the page. 172 * If we find ourselves trying to insert a key > high key, we know we need 173 * to move right (this should only happen if the page was split since we 174 * examined the parent page). 175 * 176 * Our insertion algorithm guarantees that we can use the initial least key 177 * on our right sibling as the high key. Once a page is created, its high 178 * key changes only if the page is split. 179 * 180 * On a non-rightmost page, the high key lives in item 1 and data items 181 * start in item 2. Rightmost pages have no high key, so we store data 182 * items beginning in item 1. 183 */ 184 185 #define P_HIKEY ((OffsetNumber) 1) 186 #define P_FIRSTKEY ((OffsetNumber) 2) 187 #define P_FIRSTDATAKEY(opaque) (P_RIGHTMOST(opaque) ? P_HIKEY : P_FIRSTKEY) 188 189 /* 190 * INCLUDE B-Tree indexes have non-key attributes. These are extra 191 * attributes that may be returned by index-only scans, but do not influence 192 * the order of items in the index (formally, non-key attributes are not 193 * considered to be part of the key space). Non-key attributes are only 194 * present in leaf index tuples whose item pointers actually point to heap 195 * tuples. All other types of index tuples (collectively, "pivot" tuples) 196 * only have key attributes, since pivot tuples only ever need to represent 197 * how the key space is separated. In general, any B-Tree index that has 198 * more than one level (i.e. any index that does not just consist of a 199 * metapage and a single leaf root page) must have some number of pivot 200 * tuples, since pivot tuples are used for traversing the tree. 201 * 202 * We store the number of attributes present inside pivot tuples by abusing 203 * their item pointer offset field, since pivot tuples never need to store a 204 * real offset (downlinks only need to store a block number). The offset 205 * field only stores the number of attributes when the INDEX_ALT_TID_MASK 206 * bit is set (we never assume that pivot tuples must explicitly store the 207 * number of attributes, and currently do not bother storing the number of 208 * attributes unless indnkeyatts actually differs from indnatts). 209 * INDEX_ALT_TID_MASK is only used for pivot tuples at present, though it's 210 * possible that it will be used within non-pivot tuples in the future. Do 211 * not assume that a tuple with INDEX_ALT_TID_MASK set must be a pivot 212 * tuple. 213 * 214 * The 12 least significant offset bits are used to represent the number of 215 * attributes in INDEX_ALT_TID_MASK tuples, leaving 4 bits that are reserved 216 * for future use (BT_RESERVED_OFFSET_MASK bits). BT_N_KEYS_OFFSET_MASK should 217 * be large enough to store any number <= INDEX_MAX_KEYS. 218 */ 219 #define INDEX_ALT_TID_MASK INDEX_AM_RESERVED_BIT 220 #define BT_RESERVED_OFFSET_MASK 0xF000 221 #define BT_N_KEYS_OFFSET_MASK 0x0FFF 222 223 /* Get/set downlink block number */ 224 #define BTreeInnerTupleGetDownLink(itup) \ 225 ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid)) 226 #define BTreeInnerTupleSetDownLink(itup, blkno) \ 227 ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno)) 228 229 /* 230 * Get/set leaf page highkey's link. During the second phase of deletion, the 231 * target leaf page's high key may point to an ancestor page (at all other 232 * times, the leaf level high key's link is not used). See the nbtree README 233 * for full details. 234 */ 235 #define BTreeTupleGetTopParent(itup) \ 236 ItemPointerGetBlockNumberNoCheck(&((itup)->t_tid)) 237 #define BTreeTupleSetTopParent(itup, blkno) \ 238 do { \ 239 ItemPointerSetBlockNumber(&((itup)->t_tid), (blkno)); \ 240 BTreeTupleSetNAtts((itup), 0); \ 241 } while(0) 242 243 /* 244 * Get/set number of attributes within B-tree index tuple. Asserts should be 245 * removed when BT_RESERVED_OFFSET_MASK bits will be used. 246 */ 247 #define BTreeTupleGetNAtts(itup, rel) \ 248 ( \ 249 (itup)->t_info & INDEX_ALT_TID_MASK ? \ 250 ( \ 251 AssertMacro((ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_RESERVED_OFFSET_MASK) == 0), \ 252 ItemPointerGetOffsetNumberNoCheck(&(itup)->t_tid) & BT_N_KEYS_OFFSET_MASK \ 253 ) \ 254 : \ 255 IndexRelationGetNumberOfAttributes(rel) \ 256 ) 257 #define BTreeTupleSetNAtts(itup, n) \ 258 do { \ 259 (itup)->t_info |= INDEX_ALT_TID_MASK; \ 260 Assert(((n) & BT_RESERVED_OFFSET_MASK) == 0); \ 261 ItemPointerSetOffsetNumber(&(itup)->t_tid, (n) & BT_N_KEYS_OFFSET_MASK); \ 262 } while(0) 263 264 /* 265 * Operator strategy numbers for B-tree have been moved to access/stratnum.h, 266 * because many places need to use them in ScanKeyInit() calls. 267 * 268 * The strategy numbers are chosen so that we can commute them by 269 * subtraction, thus: 270 */ 271 #define BTCommuteStrategyNumber(strat) (BTMaxStrategyNumber + 1 - (strat)) 272 273 /* 274 * When a new operator class is declared, we require that the user 275 * supply us with an amproc procedure (BTORDER_PROC) for determining 276 * whether, for two keys a and b, a < b, a = b, or a > b. This routine 277 * must return < 0, 0, > 0, respectively, in these three cases. 278 * 279 * To facilitate accelerated sorting, an operator class may choose to 280 * offer a second procedure (BTSORTSUPPORT_PROC). For full details, see 281 * src/include/utils/sortsupport.h. 282 * 283 * To support window frames defined by "RANGE offset PRECEDING/FOLLOWING", 284 * an operator class may choose to offer a third amproc procedure 285 * (BTINRANGE_PROC), independently of whether it offers sortsupport. 286 * For full details, see doc/src/sgml/btree.sgml. 287 */ 288 289 #define BTORDER_PROC 1 290 #define BTSORTSUPPORT_PROC 2 291 #define BTINRANGE_PROC 3 292 #define BTNProcs 3 293 294 /* 295 * We need to be able to tell the difference between read and write 296 * requests for pages, in order to do locking correctly. 297 */ 298 299 #define BT_READ BUFFER_LOCK_SHARE 300 #define BT_WRITE BUFFER_LOCK_EXCLUSIVE 301 302 /* 303 * BTStackData -- As we descend a tree, we push the (location, downlink) 304 * pairs from internal pages onto a private stack. If we split a 305 * leaf, we use this stack to walk back up the tree and insert data 306 * into parent pages (and possibly to split them, too). Lehman and 307 * Yao's update algorithm guarantees that under no circumstances can 308 * our private stack give us an irredeemably bad picture up the tree. 309 * Again, see the paper for details. 310 */ 311 312 typedef struct BTStackData 313 { 314 BlockNumber bts_blkno; 315 OffsetNumber bts_offset; 316 BlockNumber bts_btentry; 317 struct BTStackData *bts_parent; 318 } BTStackData; 319 320 typedef BTStackData *BTStack; 321 322 /* 323 * BTScanOpaqueData is the btree-private state needed for an indexscan. 324 * This consists of preprocessed scan keys (see _bt_preprocess_keys() for 325 * details of the preprocessing), information about the current location 326 * of the scan, and information about the marked location, if any. (We use 327 * BTScanPosData to represent the data needed for each of current and marked 328 * locations.) In addition we can remember some known-killed index entries 329 * that must be marked before we can move off the current page. 330 * 331 * Index scans work a page at a time: we pin and read-lock the page, identify 332 * all the matching items on the page and save them in BTScanPosData, then 333 * release the read-lock while returning the items to the caller for 334 * processing. This approach minimizes lock/unlock traffic. Note that we 335 * keep the pin on the index page until the caller is done with all the items 336 * (this is needed for VACUUM synchronization, see nbtree/README). When we 337 * are ready to step to the next page, if the caller has told us any of the 338 * items were killed, we re-lock the page to mark them killed, then unlock. 339 * Finally we drop the pin and step to the next page in the appropriate 340 * direction. 341 * 342 * If we are doing an index-only scan, we save the entire IndexTuple for each 343 * matched item, otherwise only its heap TID and offset. The IndexTuples go 344 * into a separate workspace array; each BTScanPosItem stores its tuple's 345 * offset within that array. 346 */ 347 348 typedef struct BTScanPosItem /* what we remember about each match */ 349 { 350 ItemPointerData heapTid; /* TID of referenced heap item */ 351 OffsetNumber indexOffset; /* index item's location within page */ 352 LocationIndex tupleOffset; /* IndexTuple's offset in workspace, if any */ 353 } BTScanPosItem; 354 355 typedef struct BTScanPosData 356 { 357 Buffer buf; /* if valid, the buffer is pinned */ 358 359 XLogRecPtr lsn; /* pos in the WAL stream when page was read */ 360 BlockNumber currPage; /* page referenced by items array */ 361 BlockNumber nextPage; /* page's right link when we scanned it */ 362 363 /* 364 * moreLeft and moreRight track whether we think there may be matching 365 * index entries to the left and right of the current page, respectively. 366 * We can clear the appropriate one of these flags when _bt_checkkeys() 367 * returns continuescan = false. 368 */ 369 bool moreLeft; 370 bool moreRight; 371 372 /* 373 * If we are doing an index-only scan, nextTupleOffset is the first free 374 * location in the associated tuple storage workspace. 375 */ 376 int nextTupleOffset; 377 378 /* 379 * The items array is always ordered in index order (ie, increasing 380 * indexoffset). When scanning backwards it is convenient to fill the 381 * array back-to-front, so we start at the last slot and fill downwards. 382 * Hence we need both a first-valid-entry and a last-valid-entry counter. 383 * itemIndex is a cursor showing which entry was last returned to caller. 384 */ 385 int firstItem; /* first valid index in items[] */ 386 int lastItem; /* last valid index in items[] */ 387 int itemIndex; /* current index in items[] */ 388 389 BTScanPosItem items[MaxIndexTuplesPerPage]; /* MUST BE LAST */ 390 } BTScanPosData; 391 392 typedef BTScanPosData *BTScanPos; 393 394 #define BTScanPosIsPinned(scanpos) \ 395 ( \ 396 AssertMacro(BlockNumberIsValid((scanpos).currPage) || \ 397 !BufferIsValid((scanpos).buf)), \ 398 BufferIsValid((scanpos).buf) \ 399 ) 400 #define BTScanPosUnpin(scanpos) \ 401 do { \ 402 ReleaseBuffer((scanpos).buf); \ 403 (scanpos).buf = InvalidBuffer; \ 404 } while (0) 405 #define BTScanPosUnpinIfPinned(scanpos) \ 406 do { \ 407 if (BTScanPosIsPinned(scanpos)) \ 408 BTScanPosUnpin(scanpos); \ 409 } while (0) 410 411 #define BTScanPosIsValid(scanpos) \ 412 ( \ 413 AssertMacro(BlockNumberIsValid((scanpos).currPage) || \ 414 !BufferIsValid((scanpos).buf)), \ 415 BlockNumberIsValid((scanpos).currPage) \ 416 ) 417 #define BTScanPosInvalidate(scanpos) \ 418 do { \ 419 (scanpos).currPage = InvalidBlockNumber; \ 420 (scanpos).nextPage = InvalidBlockNumber; \ 421 (scanpos).buf = InvalidBuffer; \ 422 (scanpos).lsn = InvalidXLogRecPtr; \ 423 (scanpos).nextTupleOffset = 0; \ 424 } while (0) 425 426 /* We need one of these for each equality-type SK_SEARCHARRAY scan key */ 427 typedef struct BTArrayKeyInfo 428 { 429 int scan_key; /* index of associated key in arrayKeyData */ 430 int cur_elem; /* index of current element in elem_values */ 431 int mark_elem; /* index of marked element in elem_values */ 432 int num_elems; /* number of elems in current array value */ 433 Datum *elem_values; /* array of num_elems Datums */ 434 } BTArrayKeyInfo; 435 436 typedef struct BTScanOpaqueData 437 { 438 /* these fields are set by _bt_preprocess_keys(): */ 439 bool qual_ok; /* false if qual can never be satisfied */ 440 int numberOfKeys; /* number of preprocessed scan keys */ 441 ScanKey keyData; /* array of preprocessed scan keys */ 442 443 /* workspace for SK_SEARCHARRAY support */ 444 ScanKey arrayKeyData; /* modified copy of scan->keyData */ 445 int numArrayKeys; /* number of equality-type array keys (-1 if 446 * there are any unsatisfiable array keys) */ 447 int arrayKeyCount; /* count indicating number of array scan keys 448 * processed */ 449 BTArrayKeyInfo *arrayKeys; /* info about each equality-type array key */ 450 MemoryContext arrayContext; /* scan-lifespan context for array data */ 451 452 /* info about killed items if any (killedItems is NULL if never used) */ 453 int *killedItems; /* currPos.items indexes of killed items */ 454 int numKilled; /* number of currently stored items */ 455 456 /* 457 * If we are doing an index-only scan, these are the tuple storage 458 * workspaces for the currPos and markPos respectively. Each is of size 459 * BLCKSZ, so it can hold as much as a full page's worth of tuples. 460 */ 461 char *currTuples; /* tuple storage for currPos */ 462 char *markTuples; /* tuple storage for markPos */ 463 464 /* 465 * If the marked position is on the same page as current position, we 466 * don't use markPos, but just keep the marked itemIndex in markItemIndex 467 * (all the rest of currPos is valid for the mark position). Hence, to 468 * determine if there is a mark, first look at markItemIndex, then at 469 * markPos. 470 */ 471 int markItemIndex; /* itemIndex, or -1 if not valid */ 472 473 /* keep these last in struct for efficiency */ 474 BTScanPosData currPos; /* current position data */ 475 BTScanPosData markPos; /* marked position, if any */ 476 } BTScanOpaqueData; 477 478 typedef BTScanOpaqueData *BTScanOpaque; 479 480 /* 481 * We use some private sk_flags bits in preprocessed scan keys. We're allowed 482 * to use bits 16-31 (see skey.h). The uppermost bits are copied from the 483 * index's indoption[] array entry for the index attribute. 484 */ 485 #define SK_BT_REQFWD 0x00010000 /* required to continue forward scan */ 486 #define SK_BT_REQBKWD 0x00020000 /* required to continue backward scan */ 487 #define SK_BT_INDOPTION_SHIFT 24 /* must clear the above bits */ 488 #define SK_BT_DESC (INDOPTION_DESC << SK_BT_INDOPTION_SHIFT) 489 #define SK_BT_NULLS_FIRST (INDOPTION_NULLS_FIRST << SK_BT_INDOPTION_SHIFT) 490 491 /* 492 * external entry points for btree, in nbtree.c 493 */ 494 extern void btbuildempty(Relation index); 495 extern bool btinsert(Relation rel, Datum *values, bool *isnull, 496 ItemPointer ht_ctid, Relation heapRel, 497 IndexUniqueCheck checkUnique, 498 struct IndexInfo *indexInfo); 499 extern IndexScanDesc btbeginscan(Relation rel, int nkeys, int norderbys); 500 extern Size btestimateparallelscan(void); 501 extern void btinitparallelscan(void *target); 502 extern bool btgettuple(IndexScanDesc scan, ScanDirection dir); 503 extern int64 btgetbitmap(IndexScanDesc scan, TIDBitmap *tbm); 504 extern void btrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, 505 ScanKey orderbys, int norderbys); 506 extern void btparallelrescan(IndexScanDesc scan); 507 extern void btendscan(IndexScanDesc scan); 508 extern void btmarkpos(IndexScanDesc scan); 509 extern void btrestrpos(IndexScanDesc scan); 510 extern IndexBulkDeleteResult *btbulkdelete(IndexVacuumInfo *info, 511 IndexBulkDeleteResult *stats, 512 IndexBulkDeleteCallback callback, 513 void *callback_state); 514 extern IndexBulkDeleteResult *btvacuumcleanup(IndexVacuumInfo *info, 515 IndexBulkDeleteResult *stats); 516 extern bool btcanreturn(Relation index, int attno); 517 518 /* 519 * prototypes for internal functions in nbtree.c 520 */ 521 extern bool _bt_parallel_seize(IndexScanDesc scan, BlockNumber *pageno); 522 extern void _bt_parallel_release(IndexScanDesc scan, BlockNumber scan_page); 523 extern void _bt_parallel_done(IndexScanDesc scan); 524 extern void _bt_parallel_advance_array_keys(IndexScanDesc scan); 525 526 /* 527 * prototypes for functions in nbtinsert.c 528 */ 529 extern bool _bt_doinsert(Relation rel, IndexTuple itup, 530 IndexUniqueCheck checkUnique, Relation heapRel); 531 extern Buffer _bt_getstackbuf(Relation rel, BTStack stack, int access); 532 extern void _bt_finish_split(Relation rel, Buffer bbuf, BTStack stack); 533 534 /* 535 * prototypes for functions in nbtpage.c 536 */ 537 extern void _bt_initmetapage(Page page, BlockNumber rootbknum, uint32 level); 538 extern void _bt_update_meta_cleanup_info(Relation rel, 539 TransactionId oldestBtpoXact, float8 numHeapTuples); 540 extern void _bt_upgrademetapage(Page page); 541 extern Buffer _bt_getroot(Relation rel, int access); 542 extern Buffer _bt_gettrueroot(Relation rel); 543 extern int _bt_getrootheight(Relation rel); 544 extern void _bt_checkpage(Relation rel, Buffer buf); 545 extern Buffer _bt_getbuf(Relation rel, BlockNumber blkno, int access); 546 extern Buffer _bt_relandgetbuf(Relation rel, Buffer obuf, 547 BlockNumber blkno, int access); 548 extern void _bt_relbuf(Relation rel, Buffer buf); 549 extern void _bt_pageinit(Page page, Size size); 550 extern bool _bt_page_recyclable(Page page); 551 extern void _bt_delitems_delete(Relation rel, Buffer buf, 552 OffsetNumber *itemnos, int nitems, Relation heapRel); 553 extern void _bt_delitems_vacuum(Relation rel, Buffer buf, 554 OffsetNumber *itemnos, int nitems, 555 BlockNumber lastBlockVacuumed); 556 extern uint32 _bt_pagedel(Relation rel, Buffer leafbuf, 557 TransactionId *oldestBtpoXact); 558 559 /* 560 * prototypes for functions in nbtsearch.c 561 */ 562 extern BTStack _bt_search(Relation rel, 563 int keysz, ScanKey scankey, bool nextkey, 564 Buffer *bufP, int access, Snapshot snapshot); 565 extern Buffer _bt_moveright(Relation rel, Buffer buf, int keysz, 566 ScanKey scankey, bool nextkey, bool forupdate, BTStack stack, 567 int access, Snapshot snapshot); 568 extern OffsetNumber _bt_binsrch(Relation rel, Buffer buf, int keysz, 569 ScanKey scankey, bool nextkey); 570 extern int32 _bt_compare(Relation rel, int keysz, ScanKey scankey, 571 Page page, OffsetNumber offnum); 572 extern bool _bt_first(IndexScanDesc scan, ScanDirection dir); 573 extern bool _bt_next(IndexScanDesc scan, ScanDirection dir); 574 extern Buffer _bt_get_endpoint(Relation rel, uint32 level, bool rightmost, 575 Snapshot snapshot); 576 577 /* 578 * prototypes for functions in nbtutils.c 579 */ 580 extern ScanKey _bt_mkscankey(Relation rel, IndexTuple itup); 581 extern ScanKey _bt_mkscankey_nodata(Relation rel); 582 extern void _bt_freeskey(ScanKey skey); 583 extern void _bt_freestack(BTStack stack); 584 extern void _bt_preprocess_array_keys(IndexScanDesc scan); 585 extern void _bt_start_array_keys(IndexScanDesc scan, ScanDirection dir); 586 extern bool _bt_advance_array_keys(IndexScanDesc scan, ScanDirection dir); 587 extern void _bt_mark_array_keys(IndexScanDesc scan); 588 extern void _bt_restore_array_keys(IndexScanDesc scan); 589 extern void _bt_preprocess_keys(IndexScanDesc scan); 590 extern IndexTuple _bt_checkkeys(IndexScanDesc scan, 591 Page page, OffsetNumber offnum, 592 ScanDirection dir, bool *continuescan); 593 extern void _bt_killitems(IndexScanDesc scan); 594 extern BTCycleId _bt_vacuum_cycleid(Relation rel); 595 extern BTCycleId _bt_start_vacuum(Relation rel); 596 extern void _bt_end_vacuum(Relation rel); 597 extern void _bt_end_vacuum_callback(int code, Datum arg); 598 extern Size BTreeShmemSize(void); 599 extern void BTreeShmemInit(void); 600 extern bytea *btoptions(Datum reloptions, bool validate); 601 extern bool btproperty(Oid index_oid, int attno, 602 IndexAMProperty prop, const char *propname, 603 bool *res, bool *isnull); 604 extern IndexTuple _bt_nonkey_truncate(Relation rel, IndexTuple itup); 605 extern bool _bt_check_natts(Relation rel, Page page, OffsetNumber offnum); 606 607 /* 608 * prototypes for functions in nbtvalidate.c 609 */ 610 extern bool btvalidate(Oid opclassoid); 611 612 /* 613 * prototypes for functions in nbtsort.c 614 */ 615 extern IndexBuildResult *btbuild(Relation heap, Relation index, 616 struct IndexInfo *indexInfo); 617 extern void _bt_parallel_build_main(dsm_segment *seg, shm_toc *toc); 618 619 #endif /* NBTREE_H */ 620