1 /*------------------------------------------------------------------------- 2 * 3 * hash.h 4 * header file for postgres hash access method implementation 5 * 6 * 7 * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group 8 * Portions Copyright (c) 1994, Regents of the University of California 9 * 10 * src/include/access/hash.h 11 * 12 * NOTES 13 * modeled after Margo Seltzer's hash implementation for unix. 14 * 15 *------------------------------------------------------------------------- 16 */ 17 #ifndef HASH_H 18 #define HASH_H 19 20 #include "access/amapi.h" 21 #include "access/itup.h" 22 #include "access/sdir.h" 23 #include "access/xlogreader.h" 24 #include "fmgr.h" 25 #include "lib/stringinfo.h" 26 #include "storage/bufmgr.h" 27 #include "storage/lockdefs.h" 28 #include "utils/relcache.h" 29 30 /* 31 * Mapping from hash bucket number to physical block number of bucket's 32 * starting page. Beware of multiple evaluations of argument! 33 */ 34 typedef uint32 Bucket; 35 36 #define BUCKET_TO_BLKNO(metap,B) \ 37 ((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_log2((B)+1)-1] : 0)) + 1) 38 39 /* 40 * Special space for hash index pages. 41 * 42 * hasho_flag tells us which type of page we're looking at. For 43 * example, knowing overflow pages from bucket pages is necessary 44 * information when you're deleting tuples from a page. If all the 45 * tuples are deleted from an overflow page, the overflow is made 46 * available to other buckets by calling _hash_freeovflpage(). If all 47 * the tuples are deleted from a bucket page, no additional action is 48 * necessary. 49 */ 50 #define LH_UNUSED_PAGE (0) 51 #define LH_OVERFLOW_PAGE (1 << 0) 52 #define LH_BUCKET_PAGE (1 << 1) 53 #define LH_BITMAP_PAGE (1 << 2) 54 #define LH_META_PAGE (1 << 3) 55 56 typedef struct HashPageOpaqueData 57 { 58 BlockNumber hasho_prevblkno; /* previous ovfl (or bucket) blkno */ 59 BlockNumber hasho_nextblkno; /* next ovfl blkno */ 60 Bucket hasho_bucket; /* bucket number this pg belongs to */ 61 uint16 hasho_flag; /* page type code, see above */ 62 uint16 hasho_page_id; /* for identification of hash indexes */ 63 } HashPageOpaqueData; 64 65 typedef HashPageOpaqueData *HashPageOpaque; 66 67 /* 68 * The page ID is for the convenience of pg_filedump and similar utilities, 69 * which otherwise would have a hard time telling pages of different index 70 * types apart. It should be the last 2 bytes on the page. This is more or 71 * less "free" due to alignment considerations. 72 */ 73 #define HASHO_PAGE_ID 0xFF80 74 75 /* 76 * HashScanOpaqueData is private state for a hash index scan. 77 */ 78 typedef struct HashScanOpaqueData 79 { 80 /* Hash value of the scan key, ie, the hash key we seek */ 81 uint32 hashso_sk_hash; 82 83 /* 84 * By definition, a hash scan should be examining only one bucket. We 85 * record the bucket number here as soon as it is known. 86 */ 87 Bucket hashso_bucket; 88 bool hashso_bucket_valid; 89 90 /* 91 * If we have a share lock on the bucket, we record it here. When 92 * hashso_bucket_blkno is zero, we have no such lock. 93 */ 94 BlockNumber hashso_bucket_blkno; 95 96 /* 97 * We also want to remember which buffer we're currently examining in the 98 * scan. We keep the buffer pinned (but not locked) across hashgettuple 99 * calls, in order to avoid doing a ReadBuffer() for every tuple in the 100 * index. 101 */ 102 Buffer hashso_curbuf; 103 104 /* Current position of the scan, as an index TID */ 105 ItemPointerData hashso_curpos; 106 107 /* Current position of the scan, as a heap TID */ 108 ItemPointerData hashso_heappos; 109 } HashScanOpaqueData; 110 111 typedef HashScanOpaqueData *HashScanOpaque; 112 113 /* 114 * Definitions for metapage. 115 */ 116 117 #define HASH_METAPAGE 0 /* metapage is always block 0 */ 118 119 #define HASH_MAGIC 0x6440640 120 #define HASH_VERSION 2 /* 2 signifies only hash key value is stored */ 121 122 /* 123 * Spares[] holds the number of overflow pages currently allocated at or 124 * before a certain splitpoint. For example, if spares[3] = 7 then there are 125 * 7 ovflpages before splitpoint 3 (compare BUCKET_TO_BLKNO macro). The 126 * value in spares[ovflpoint] increases as overflow pages are added at the 127 * end of the index. Once ovflpoint increases (ie, we have actually allocated 128 * the bucket pages belonging to that splitpoint) the number of spares at the 129 * prior splitpoint cannot change anymore. 130 * 131 * ovflpages that have been recycled for reuse can be found by looking at 132 * bitmaps that are stored within ovflpages dedicated for the purpose. 133 * The blknos of these bitmap pages are kept in bitmaps[]; nmaps is the 134 * number of currently existing bitmaps. 135 * 136 * The limitation on the size of spares[] comes from the fact that there's 137 * no point in having more than 2^32 buckets with only uint32 hashcodes. 138 * There is no particular upper limit on the size of mapp[], other than 139 * needing to fit into the metapage. (With 8K block size, 128 bitmaps 140 * limit us to 64 Gb of overflow space...) 141 */ 142 #define HASH_MAX_SPLITPOINTS 32 143 #define HASH_MAX_BITMAPS 128 144 145 typedef struct HashMetaPageData 146 { 147 uint32 hashm_magic; /* magic no. for hash tables */ 148 uint32 hashm_version; /* version ID */ 149 double hashm_ntuples; /* number of tuples stored in the table */ 150 uint16 hashm_ffactor; /* target fill factor (tuples/bucket) */ 151 uint16 hashm_bsize; /* index page size (bytes) */ 152 uint16 hashm_bmsize; /* bitmap array size (bytes) - must be a power 153 * of 2 */ 154 uint16 hashm_bmshift; /* log2(bitmap array size in BITS) */ 155 uint32 hashm_maxbucket; /* ID of maximum bucket in use */ 156 uint32 hashm_highmask; /* mask to modulo into entire table */ 157 uint32 hashm_lowmask; /* mask to modulo into lower half of table */ 158 uint32 hashm_ovflpoint;/* splitpoint from which ovflpgs being 159 * allocated */ 160 uint32 hashm_firstfree; /* lowest-number free ovflpage (bit#) */ 161 uint32 hashm_nmaps; /* number of bitmap pages */ 162 RegProcedure hashm_procid; /* hash procedure id from pg_proc */ 163 uint32 hashm_spares[HASH_MAX_SPLITPOINTS]; /* spare pages before 164 * each splitpoint */ 165 BlockNumber hashm_mapp[HASH_MAX_BITMAPS]; /* blknos of ovfl bitmaps */ 166 } HashMetaPageData; 167 168 typedef HashMetaPageData *HashMetaPage; 169 170 /* 171 * Maximum size of a hash index item (it's okay to have only one per page) 172 */ 173 #define HashMaxItemSize(page) \ 174 MAXALIGN_DOWN(PageGetPageSize(page) - \ 175 SizeOfPageHeaderData - \ 176 sizeof(ItemIdData) - \ 177 MAXALIGN(sizeof(HashPageOpaqueData))) 178 179 #define HASH_MIN_FILLFACTOR 10 180 #define HASH_DEFAULT_FILLFACTOR 75 181 182 /* 183 * Constants 184 */ 185 #define BYTE_TO_BIT 3 /* 2^3 bits/byte */ 186 #define ALL_SET ((uint32) ~0) 187 188 /* 189 * Bitmap pages do not contain tuples. They do contain the standard 190 * page headers and trailers; however, everything in between is a 191 * giant bit array. The number of bits that fit on a page obviously 192 * depends on the page size and the header/trailer overhead. We require 193 * the number of bits per page to be a power of 2. 194 */ 195 #define BMPGSZ_BYTE(metap) ((metap)->hashm_bmsize) 196 #define BMPGSZ_BIT(metap) ((metap)->hashm_bmsize << BYTE_TO_BIT) 197 #define BMPG_SHIFT(metap) ((metap)->hashm_bmshift) 198 #define BMPG_MASK(metap) (BMPGSZ_BIT(metap) - 1) 199 200 #define HashPageGetBitmap(page) \ 201 ((uint32 *) PageGetContents(page)) 202 203 #define HashGetMaxBitmapSize(page) \ 204 (PageGetPageSize((Page) page) - \ 205 (MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(HashPageOpaqueData)))) 206 207 #define HashPageGetMeta(page) \ 208 ((HashMetaPage) PageGetContents(page)) 209 210 /* 211 * The number of bits in an ovflpage bitmap word. 212 */ 213 #define BITS_PER_MAP 32 /* Number of bits in uint32 */ 214 215 /* Given the address of the beginning of a bit map, clear/set the nth bit */ 216 #define CLRBIT(A, N) ((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP))) 217 #define SETBIT(A, N) ((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP))) 218 #define ISSET(A, N) ((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP))) 219 220 /* 221 * page-level and high-level locking modes (see README) 222 */ 223 #define HASH_READ BUFFER_LOCK_SHARE 224 #define HASH_WRITE BUFFER_LOCK_EXCLUSIVE 225 #define HASH_NOLOCK (-1) 226 227 #define HASH_SHARE ShareLock 228 #define HASH_EXCLUSIVE ExclusiveLock 229 230 /* 231 * Strategy number. There's only one valid strategy for hashing: equality. 232 */ 233 #define HTEqualStrategyNumber 1 234 #define HTMaxStrategyNumber 1 235 236 /* 237 * When a new operator class is declared, we require that the user supply 238 * us with an amproc procudure for hashing a key of the new type. 239 * Since we only have one such proc in amproc, it's number 1. 240 */ 241 #define HASHPROC 1 242 #define HASHNProcs 1 243 244 245 /* public routines */ 246 247 extern Datum hashhandler(PG_FUNCTION_ARGS); 248 extern IndexBuildResult *hashbuild(Relation heap, Relation index, 249 struct IndexInfo *indexInfo); 250 extern void hashbuildempty(Relation index); 251 extern bool hashinsert(Relation rel, Datum *values, bool *isnull, 252 ItemPointer ht_ctid, Relation heapRel, 253 IndexUniqueCheck checkUnique); 254 extern bool hashgettuple(IndexScanDesc scan, ScanDirection dir); 255 extern int64 hashgetbitmap(IndexScanDesc scan, TIDBitmap *tbm); 256 extern IndexScanDesc hashbeginscan(Relation rel, int nkeys, int norderbys); 257 extern void hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys, 258 ScanKey orderbys, int norderbys); 259 extern void hashendscan(IndexScanDesc scan); 260 extern IndexBulkDeleteResult *hashbulkdelete(IndexVacuumInfo *info, 261 IndexBulkDeleteResult *stats, 262 IndexBulkDeleteCallback callback, 263 void *callback_state); 264 extern IndexBulkDeleteResult *hashvacuumcleanup(IndexVacuumInfo *info, 265 IndexBulkDeleteResult *stats); 266 extern bytea *hashoptions(Datum reloptions, bool validate); 267 extern bool hashvalidate(Oid opclassoid); 268 269 /* 270 * Datatype-specific hash functions in hashfunc.c. 271 * 272 * These support both hash indexes and hash joins. 273 * 274 * NOTE: some of these are also used by catcache operations, without 275 * any direct connection to hash indexes. Also, the common hash_any 276 * routine is also used by dynahash tables. 277 */ 278 extern Datum hashchar(PG_FUNCTION_ARGS); 279 extern Datum hashint2(PG_FUNCTION_ARGS); 280 extern Datum hashint4(PG_FUNCTION_ARGS); 281 extern Datum hashint8(PG_FUNCTION_ARGS); 282 extern Datum hashoid(PG_FUNCTION_ARGS); 283 extern Datum hashenum(PG_FUNCTION_ARGS); 284 extern Datum hashfloat4(PG_FUNCTION_ARGS); 285 extern Datum hashfloat8(PG_FUNCTION_ARGS); 286 extern Datum hashoidvector(PG_FUNCTION_ARGS); 287 extern Datum hashint2vector(PG_FUNCTION_ARGS); 288 extern Datum hashname(PG_FUNCTION_ARGS); 289 extern Datum hashtext(PG_FUNCTION_ARGS); 290 extern Datum hashvarlena(PG_FUNCTION_ARGS); 291 extern Datum hash_any(register const unsigned char *k, register int keylen); 292 extern Datum hash_uint32(uint32 k); 293 294 /* private routines */ 295 296 /* hashinsert.c */ 297 extern void _hash_doinsert(Relation rel, IndexTuple itup); 298 extern OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf, 299 Size itemsize, IndexTuple itup); 300 301 /* hashovfl.c */ 302 extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf); 303 extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf, 304 BufferAccessStrategy bstrategy); 305 extern void _hash_initbitmap(Relation rel, HashMetaPage metap, 306 BlockNumber blkno, ForkNumber forkNum); 307 extern void _hash_squeezebucket(Relation rel, 308 Bucket bucket, BlockNumber bucket_blkno, 309 BufferAccessStrategy bstrategy); 310 311 /* hashpage.c */ 312 extern void _hash_getlock(Relation rel, BlockNumber whichlock, int access); 313 extern bool _hash_try_getlock(Relation rel, BlockNumber whichlock, int access); 314 extern void _hash_droplock(Relation rel, BlockNumber whichlock, int access); 315 extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno, 316 int access, int flags); 317 extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno); 318 extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno, 319 ForkNumber forkNum); 320 extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno, 321 int access, int flags, 322 BufferAccessStrategy bstrategy); 323 extern void _hash_relbuf(Relation rel, Buffer buf); 324 extern void _hash_dropbuf(Relation rel, Buffer buf); 325 extern void _hash_wrtbuf(Relation rel, Buffer buf); 326 extern void _hash_chgbufaccess(Relation rel, Buffer buf, int from_access, 327 int to_access); 328 extern uint32 _hash_metapinit(Relation rel, double num_tuples, 329 ForkNumber forkNum); 330 extern void _hash_pageinit(Page page, Size size); 331 extern void _hash_expandtable(Relation rel, Buffer metabuf); 332 333 /* hashscan.c */ 334 extern void _hash_regscan(IndexScanDesc scan); 335 extern void _hash_dropscan(IndexScanDesc scan); 336 extern bool _hash_has_active_scan(Relation rel, Bucket bucket); 337 extern void ReleaseResources_hash(void); 338 339 /* hashsearch.c */ 340 extern bool _hash_next(IndexScanDesc scan, ScanDirection dir); 341 extern bool _hash_first(IndexScanDesc scan, ScanDirection dir); 342 extern bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir); 343 344 /* hashsort.c */ 345 typedef struct HSpool HSpool; /* opaque struct in hashsort.c */ 346 347 extern HSpool *_h_spoolinit(Relation heap, Relation index, uint32 num_buckets); 348 extern void _h_spooldestroy(HSpool *hspool); 349 extern void _h_spool(HSpool *hspool, ItemPointer self, 350 Datum *values, bool *isnull); 351 extern void _h_indexbuild(HSpool *hspool); 352 353 /* hashutil.c */ 354 extern bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup); 355 extern uint32 _hash_datum2hashkey(Relation rel, Datum key); 356 extern uint32 _hash_datum2hashkey_type(Relation rel, Datum key, Oid keytype); 357 extern Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket, 358 uint32 highmask, uint32 lowmask); 359 extern uint32 _hash_log2(uint32 num); 360 extern void _hash_checkpage(Relation rel, Buffer buf, int flags); 361 extern uint32 _hash_get_indextuple_hashkey(IndexTuple itup); 362 extern bool _hash_convert_tuple(Relation index, 363 Datum *user_values, bool *user_isnull, 364 Datum *index_values, bool *index_isnull); 365 extern OffsetNumber _hash_binsearch(Page page, uint32 hash_value); 366 extern OffsetNumber _hash_binsearch_last(Page page, uint32 hash_value); 367 368 /* hash.c */ 369 extern void hash_redo(XLogReaderState *record); 370 extern void hash_desc(StringInfo buf, XLogReaderState *record); 371 extern const char *hash_identify(uint8 info); 372 373 #endif /* HASH_H */ 374