1 /*-------------------------------------------------------------------------
2  *
3  * hash.h
4  *	  header file for postgres hash access method implementation
5  *
6  *
7  * Portions Copyright (c) 1996-2016, PostgreSQL Global Development Group
8  * Portions Copyright (c) 1994, Regents of the University of California
9  *
10  * src/include/access/hash.h
11  *
12  * NOTES
13  *		modeled after Margo Seltzer's hash implementation for unix.
14  *
15  *-------------------------------------------------------------------------
16  */
17 #ifndef HASH_H
18 #define HASH_H
19 
20 #include "access/amapi.h"
21 #include "access/itup.h"
22 #include "access/sdir.h"
23 #include "access/xlogreader.h"
24 #include "fmgr.h"
25 #include "lib/stringinfo.h"
26 #include "storage/bufmgr.h"
27 #include "storage/lockdefs.h"
28 #include "utils/relcache.h"
29 
30 /*
31  * Mapping from hash bucket number to physical block number of bucket's
32  * starting page.  Beware of multiple evaluations of argument!
33  */
34 typedef uint32 Bucket;
35 
36 #define BUCKET_TO_BLKNO(metap,B) \
37 		((BlockNumber) ((B) + ((B) ? (metap)->hashm_spares[_hash_log2((B)+1)-1] : 0)) + 1)
38 
39 /*
40  * Special space for hash index pages.
41  *
42  * hasho_flag tells us which type of page we're looking at.  For
43  * example, knowing overflow pages from bucket pages is necessary
44  * information when you're deleting tuples from a page. If all the
45  * tuples are deleted from an overflow page, the overflow is made
46  * available to other buckets by calling _hash_freeovflpage(). If all
47  * the tuples are deleted from a bucket page, no additional action is
48  * necessary.
49  */
50 #define LH_UNUSED_PAGE			(0)
51 #define LH_OVERFLOW_PAGE		(1 << 0)
52 #define LH_BUCKET_PAGE			(1 << 1)
53 #define LH_BITMAP_PAGE			(1 << 2)
54 #define LH_META_PAGE			(1 << 3)
55 
56 typedef struct HashPageOpaqueData
57 {
58 	BlockNumber hasho_prevblkno;	/* previous ovfl (or bucket) blkno */
59 	BlockNumber hasho_nextblkno;	/* next ovfl blkno */
60 	Bucket		hasho_bucket;	/* bucket number this pg belongs to */
61 	uint16		hasho_flag;		/* page type code, see above */
62 	uint16		hasho_page_id;	/* for identification of hash indexes */
63 } HashPageOpaqueData;
64 
65 typedef HashPageOpaqueData *HashPageOpaque;
66 
67 /*
68  * The page ID is for the convenience of pg_filedump and similar utilities,
69  * which otherwise would have a hard time telling pages of different index
70  * types apart.  It should be the last 2 bytes on the page.  This is more or
71  * less "free" due to alignment considerations.
72  */
73 #define HASHO_PAGE_ID		0xFF80
74 
75 /*
76  *	HashScanOpaqueData is private state for a hash index scan.
77  */
78 typedef struct HashScanOpaqueData
79 {
80 	/* Hash value of the scan key, ie, the hash key we seek */
81 	uint32		hashso_sk_hash;
82 
83 	/*
84 	 * By definition, a hash scan should be examining only one bucket. We
85 	 * record the bucket number here as soon as it is known.
86 	 */
87 	Bucket		hashso_bucket;
88 	bool		hashso_bucket_valid;
89 
90 	/*
91 	 * If we have a share lock on the bucket, we record it here.  When
92 	 * hashso_bucket_blkno is zero, we have no such lock.
93 	 */
94 	BlockNumber hashso_bucket_blkno;
95 
96 	/*
97 	 * We also want to remember which buffer we're currently examining in the
98 	 * scan. We keep the buffer pinned (but not locked) across hashgettuple
99 	 * calls, in order to avoid doing a ReadBuffer() for every tuple in the
100 	 * index.
101 	 */
102 	Buffer		hashso_curbuf;
103 
104 	/* Current position of the scan, as an index TID */
105 	ItemPointerData hashso_curpos;
106 
107 	/* Current position of the scan, as a heap TID */
108 	ItemPointerData hashso_heappos;
109 } HashScanOpaqueData;
110 
111 typedef HashScanOpaqueData *HashScanOpaque;
112 
113 /*
114  * Definitions for metapage.
115  */
116 
117 #define HASH_METAPAGE	0		/* metapage is always block 0 */
118 
119 #define HASH_MAGIC		0x6440640
120 #define HASH_VERSION	2		/* 2 signifies only hash key value is stored */
121 
122 /*
123  * Spares[] holds the number of overflow pages currently allocated at or
124  * before a certain splitpoint. For example, if spares[3] = 7 then there are
125  * 7 ovflpages before splitpoint 3 (compare BUCKET_TO_BLKNO macro).  The
126  * value in spares[ovflpoint] increases as overflow pages are added at the
127  * end of the index.  Once ovflpoint increases (ie, we have actually allocated
128  * the bucket pages belonging to that splitpoint) the number of spares at the
129  * prior splitpoint cannot change anymore.
130  *
131  * ovflpages that have been recycled for reuse can be found by looking at
132  * bitmaps that are stored within ovflpages dedicated for the purpose.
133  * The blknos of these bitmap pages are kept in bitmaps[]; nmaps is the
134  * number of currently existing bitmaps.
135  *
136  * The limitation on the size of spares[] comes from the fact that there's
137  * no point in having more than 2^32 buckets with only uint32 hashcodes.
138  * There is no particular upper limit on the size of mapp[], other than
139  * needing to fit into the metapage.  (With 8K block size, 128 bitmaps
140  * limit us to 64 Gb of overflow space...)
141  */
142 #define HASH_MAX_SPLITPOINTS		32
143 #define HASH_MAX_BITMAPS			128
144 
145 typedef struct HashMetaPageData
146 {
147 	uint32		hashm_magic;	/* magic no. for hash tables */
148 	uint32		hashm_version;	/* version ID */
149 	double		hashm_ntuples;	/* number of tuples stored in the table */
150 	uint16		hashm_ffactor;	/* target fill factor (tuples/bucket) */
151 	uint16		hashm_bsize;	/* index page size (bytes) */
152 	uint16		hashm_bmsize;	/* bitmap array size (bytes) - must be a power
153 								 * of 2 */
154 	uint16		hashm_bmshift;	/* log2(bitmap array size in BITS) */
155 	uint32		hashm_maxbucket;	/* ID of maximum bucket in use */
156 	uint32		hashm_highmask; /* mask to modulo into entire table */
157 	uint32		hashm_lowmask;	/* mask to modulo into lower half of table */
158 	uint32		hashm_ovflpoint;/* splitpoint from which ovflpgs being
159 								 * allocated */
160 	uint32		hashm_firstfree;	/* lowest-number free ovflpage (bit#) */
161 	uint32		hashm_nmaps;	/* number of bitmap pages */
162 	RegProcedure hashm_procid;	/* hash procedure id from pg_proc */
163 	uint32		hashm_spares[HASH_MAX_SPLITPOINTS];		/* spare pages before
164 														 * each splitpoint */
165 	BlockNumber hashm_mapp[HASH_MAX_BITMAPS];	/* blknos of ovfl bitmaps */
166 } HashMetaPageData;
167 
168 typedef HashMetaPageData *HashMetaPage;
169 
170 /*
171  * Maximum size of a hash index item (it's okay to have only one per page)
172  */
173 #define HashMaxItemSize(page) \
174 	MAXALIGN_DOWN(PageGetPageSize(page) - \
175 				  SizeOfPageHeaderData - \
176 				  sizeof(ItemIdData) - \
177 				  MAXALIGN(sizeof(HashPageOpaqueData)))
178 
179 #define HASH_MIN_FILLFACTOR			10
180 #define HASH_DEFAULT_FILLFACTOR		75
181 
182 /*
183  * Constants
184  */
185 #define BYTE_TO_BIT				3		/* 2^3 bits/byte */
186 #define ALL_SET					((uint32) ~0)
187 
188 /*
189  * Bitmap pages do not contain tuples.  They do contain the standard
190  * page headers and trailers; however, everything in between is a
191  * giant bit array.  The number of bits that fit on a page obviously
192  * depends on the page size and the header/trailer overhead.  We require
193  * the number of bits per page to be a power of 2.
194  */
195 #define BMPGSZ_BYTE(metap)		((metap)->hashm_bmsize)
196 #define BMPGSZ_BIT(metap)		((metap)->hashm_bmsize << BYTE_TO_BIT)
197 #define BMPG_SHIFT(metap)		((metap)->hashm_bmshift)
198 #define BMPG_MASK(metap)		(BMPGSZ_BIT(metap) - 1)
199 
200 #define HashPageGetBitmap(page) \
201 	((uint32 *) PageGetContents(page))
202 
203 #define HashGetMaxBitmapSize(page) \
204 	(PageGetPageSize((Page) page) - \
205 	 (MAXALIGN(SizeOfPageHeaderData) + MAXALIGN(sizeof(HashPageOpaqueData))))
206 
207 #define HashPageGetMeta(page) \
208 	((HashMetaPage) PageGetContents(page))
209 
210 /*
211  * The number of bits in an ovflpage bitmap word.
212  */
213 #define BITS_PER_MAP	32		/* Number of bits in uint32 */
214 
215 /* Given the address of the beginning of a bit map, clear/set the nth bit */
216 #define CLRBIT(A, N)	((A)[(N)/BITS_PER_MAP] &= ~(1<<((N)%BITS_PER_MAP)))
217 #define SETBIT(A, N)	((A)[(N)/BITS_PER_MAP] |= (1<<((N)%BITS_PER_MAP)))
218 #define ISSET(A, N)		((A)[(N)/BITS_PER_MAP] & (1<<((N)%BITS_PER_MAP)))
219 
220 /*
221  * page-level and high-level locking modes (see README)
222  */
223 #define HASH_READ		BUFFER_LOCK_SHARE
224 #define HASH_WRITE		BUFFER_LOCK_EXCLUSIVE
225 #define HASH_NOLOCK		(-1)
226 
227 #define HASH_SHARE		ShareLock
228 #define HASH_EXCLUSIVE	ExclusiveLock
229 
230 /*
231  *	Strategy number. There's only one valid strategy for hashing: equality.
232  */
233 #define HTEqualStrategyNumber			1
234 #define HTMaxStrategyNumber				1
235 
236 /*
237  *	When a new operator class is declared, we require that the user supply
238  *	us with an amproc procudure for hashing a key of the new type.
239  *	Since we only have one such proc in amproc, it's number 1.
240  */
241 #define HASHPROC		1
242 #define HASHNProcs		1
243 
244 
245 /* public routines */
246 
247 extern Datum hashhandler(PG_FUNCTION_ARGS);
248 extern IndexBuildResult *hashbuild(Relation heap, Relation index,
249 		  struct IndexInfo *indexInfo);
250 extern void hashbuildempty(Relation index);
251 extern bool hashinsert(Relation rel, Datum *values, bool *isnull,
252 		   ItemPointer ht_ctid, Relation heapRel,
253 		   IndexUniqueCheck checkUnique);
254 extern bool hashgettuple(IndexScanDesc scan, ScanDirection dir);
255 extern int64 hashgetbitmap(IndexScanDesc scan, TIDBitmap *tbm);
256 extern IndexScanDesc hashbeginscan(Relation rel, int nkeys, int norderbys);
257 extern void hashrescan(IndexScanDesc scan, ScanKey scankey, int nscankeys,
258 		   ScanKey orderbys, int norderbys);
259 extern void hashendscan(IndexScanDesc scan);
260 extern IndexBulkDeleteResult *hashbulkdelete(IndexVacuumInfo *info,
261 			   IndexBulkDeleteResult *stats,
262 			   IndexBulkDeleteCallback callback,
263 			   void *callback_state);
264 extern IndexBulkDeleteResult *hashvacuumcleanup(IndexVacuumInfo *info,
265 				  IndexBulkDeleteResult *stats);
266 extern bytea *hashoptions(Datum reloptions, bool validate);
267 extern bool hashvalidate(Oid opclassoid);
268 
269 /*
270  * Datatype-specific hash functions in hashfunc.c.
271  *
272  * These support both hash indexes and hash joins.
273  *
274  * NOTE: some of these are also used by catcache operations, without
275  * any direct connection to hash indexes.  Also, the common hash_any
276  * routine is also used by dynahash tables.
277  */
278 extern Datum hashchar(PG_FUNCTION_ARGS);
279 extern Datum hashint2(PG_FUNCTION_ARGS);
280 extern Datum hashint4(PG_FUNCTION_ARGS);
281 extern Datum hashint8(PG_FUNCTION_ARGS);
282 extern Datum hashoid(PG_FUNCTION_ARGS);
283 extern Datum hashenum(PG_FUNCTION_ARGS);
284 extern Datum hashfloat4(PG_FUNCTION_ARGS);
285 extern Datum hashfloat8(PG_FUNCTION_ARGS);
286 extern Datum hashoidvector(PG_FUNCTION_ARGS);
287 extern Datum hashint2vector(PG_FUNCTION_ARGS);
288 extern Datum hashname(PG_FUNCTION_ARGS);
289 extern Datum hashtext(PG_FUNCTION_ARGS);
290 extern Datum hashvarlena(PG_FUNCTION_ARGS);
291 extern Datum hash_any(register const unsigned char *k, register int keylen);
292 extern Datum hash_uint32(uint32 k);
293 
294 /* private routines */
295 
296 /* hashinsert.c */
297 extern void _hash_doinsert(Relation rel, IndexTuple itup);
298 extern OffsetNumber _hash_pgaddtup(Relation rel, Buffer buf,
299 			   Size itemsize, IndexTuple itup);
300 
301 /* hashovfl.c */
302 extern Buffer _hash_addovflpage(Relation rel, Buffer metabuf, Buffer buf);
303 extern BlockNumber _hash_freeovflpage(Relation rel, Buffer ovflbuf,
304 				   BufferAccessStrategy bstrategy);
305 extern void _hash_initbitmap(Relation rel, HashMetaPage metap,
306 				 BlockNumber blkno, ForkNumber forkNum);
307 extern void _hash_squeezebucket(Relation rel,
308 					Bucket bucket, BlockNumber bucket_blkno,
309 					BufferAccessStrategy bstrategy);
310 
311 /* hashpage.c */
312 extern void _hash_getlock(Relation rel, BlockNumber whichlock, int access);
313 extern bool _hash_try_getlock(Relation rel, BlockNumber whichlock, int access);
314 extern void _hash_droplock(Relation rel, BlockNumber whichlock, int access);
315 extern Buffer _hash_getbuf(Relation rel, BlockNumber blkno,
316 			 int access, int flags);
317 extern Buffer _hash_getinitbuf(Relation rel, BlockNumber blkno);
318 extern Buffer _hash_getnewbuf(Relation rel, BlockNumber blkno,
319 				ForkNumber forkNum);
320 extern Buffer _hash_getbuf_with_strategy(Relation rel, BlockNumber blkno,
321 						   int access, int flags,
322 						   BufferAccessStrategy bstrategy);
323 extern void _hash_relbuf(Relation rel, Buffer buf);
324 extern void _hash_dropbuf(Relation rel, Buffer buf);
325 extern void _hash_wrtbuf(Relation rel, Buffer buf);
326 extern void _hash_chgbufaccess(Relation rel, Buffer buf, int from_access,
327 				   int to_access);
328 extern uint32 _hash_metapinit(Relation rel, double num_tuples,
329 				ForkNumber forkNum);
330 extern void _hash_pageinit(Page page, Size size);
331 extern void _hash_expandtable(Relation rel, Buffer metabuf);
332 
333 /* hashscan.c */
334 extern void _hash_regscan(IndexScanDesc scan);
335 extern void _hash_dropscan(IndexScanDesc scan);
336 extern bool _hash_has_active_scan(Relation rel, Bucket bucket);
337 extern void ReleaseResources_hash(void);
338 
339 /* hashsearch.c */
340 extern bool _hash_next(IndexScanDesc scan, ScanDirection dir);
341 extern bool _hash_first(IndexScanDesc scan, ScanDirection dir);
342 extern bool _hash_step(IndexScanDesc scan, Buffer *bufP, ScanDirection dir);
343 
344 /* hashsort.c */
345 typedef struct HSpool HSpool;	/* opaque struct in hashsort.c */
346 
347 extern HSpool *_h_spoolinit(Relation heap, Relation index, uint32 num_buckets);
348 extern void _h_spooldestroy(HSpool *hspool);
349 extern void _h_spool(HSpool *hspool, ItemPointer self,
350 		 Datum *values, bool *isnull);
351 extern void _h_indexbuild(HSpool *hspool);
352 
353 /* hashutil.c */
354 extern bool _hash_checkqual(IndexScanDesc scan, IndexTuple itup);
355 extern uint32 _hash_datum2hashkey(Relation rel, Datum key);
356 extern uint32 _hash_datum2hashkey_type(Relation rel, Datum key, Oid keytype);
357 extern Bucket _hash_hashkey2bucket(uint32 hashkey, uint32 maxbucket,
358 					 uint32 highmask, uint32 lowmask);
359 extern uint32 _hash_log2(uint32 num);
360 extern void _hash_checkpage(Relation rel, Buffer buf, int flags);
361 extern uint32 _hash_get_indextuple_hashkey(IndexTuple itup);
362 extern bool _hash_convert_tuple(Relation index,
363 					Datum *user_values, bool *user_isnull,
364 					Datum *index_values, bool *index_isnull);
365 extern OffsetNumber _hash_binsearch(Page page, uint32 hash_value);
366 extern OffsetNumber _hash_binsearch_last(Page page, uint32 hash_value);
367 
368 /* hash.c */
369 extern void hash_redo(XLogReaderState *record);
370 extern void hash_desc(StringInfo buf, XLogReaderState *record);
371 extern const char *hash_identify(uint8 info);
372 
373 #endif   /* HASH_H */
374