1 /*
2  * hashfuncs.c
3  *		Functions to investigate the content of HASH indexes
4  *
5  * Copyright (c) 2017-2021, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *		contrib/pageinspect/hashfuncs.c
9  */
10 
11 #include "postgres.h"
12 
13 #include "access/hash.h"
14 #include "access/htup_details.h"
15 #include "catalog/pg_am.h"
16 #include "catalog/pg_type.h"
17 #include "funcapi.h"
18 #include "miscadmin.h"
19 #include "pageinspect.h"
20 #include "utils/array.h"
21 #include "utils/builtins.h"
22 #include "utils/rel.h"
23 
24 PG_FUNCTION_INFO_V1(hash_page_type);
25 PG_FUNCTION_INFO_V1(hash_page_stats);
26 PG_FUNCTION_INFO_V1(hash_page_items);
27 PG_FUNCTION_INFO_V1(hash_bitmap_info);
28 PG_FUNCTION_INFO_V1(hash_metapage_info);
29 
30 #define IS_HASH(r) ((r)->rd_rel->relam == HASH_AM_OID)
31 
32 /* ------------------------------------------------
33  * structure for single hash page statistics
34  * ------------------------------------------------
35  */
36 typedef struct HashPageStat
37 {
38 	int			live_items;
39 	int			dead_items;
40 	int			page_size;
41 	int			free_size;
42 
43 	/* opaque data */
44 	BlockNumber hasho_prevblkno;
45 	BlockNumber hasho_nextblkno;
46 	Bucket		hasho_bucket;
47 	uint16		hasho_flag;
48 	uint16		hasho_page_id;
49 } HashPageStat;
50 
51 
52 /*
53  * Verify that the given bytea contains a HASH page, or die in the attempt.
54  * A pointer to a palloc'd, properly aligned copy of the page is returned.
55  */
56 static Page
verify_hash_page(bytea * raw_page,int flags)57 verify_hash_page(bytea *raw_page, int flags)
58 {
59 	Page		page = get_page_from_raw(raw_page);
60 	int			pagetype = LH_UNUSED_PAGE;
61 
62 	/* Treat new pages as unused. */
63 	if (!PageIsNew(page))
64 	{
65 		HashPageOpaque pageopaque;
66 
67 		if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HashPageOpaqueData)))
68 			ereport(ERROR,
69 					(errcode(ERRCODE_INDEX_CORRUPTED),
70 					 errmsg("index table contains corrupted page")));
71 
72 		pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
73 		if (pageopaque->hasho_page_id != HASHO_PAGE_ID)
74 			ereport(ERROR,
75 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
76 					 errmsg("page is not a hash page"),
77 					 errdetail("Expected %08x, got %08x.",
78 							   HASHO_PAGE_ID, pageopaque->hasho_page_id)));
79 
80 		pagetype = pageopaque->hasho_flag & LH_PAGE_TYPE;
81 	}
82 
83 	/* Check that page type is sane. */
84 	if (pagetype != LH_OVERFLOW_PAGE && pagetype != LH_BUCKET_PAGE &&
85 		pagetype != LH_BITMAP_PAGE && pagetype != LH_META_PAGE &&
86 		pagetype != LH_UNUSED_PAGE)
87 		ereport(ERROR,
88 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
89 				 errmsg("invalid hash page type %08x", pagetype)));
90 
91 	/* If requested, verify page type. */
92 	if (flags != 0 && (pagetype & flags) == 0)
93 	{
94 		switch (flags)
95 		{
96 			case LH_META_PAGE:
97 				ereport(ERROR,
98 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
99 						 errmsg("page is not a hash meta page")));
100 				break;
101 			case LH_BUCKET_PAGE | LH_OVERFLOW_PAGE:
102 				ereport(ERROR,
103 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
104 						 errmsg("page is not a hash bucket or overflow page")));
105 				break;
106 			case LH_OVERFLOW_PAGE:
107 				ereport(ERROR,
108 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
109 						 errmsg("page is not a hash overflow page")));
110 				break;
111 			default:
112 				elog(ERROR,
113 					 "hash page of type %08x not in mask %08x",
114 					 pagetype, flags);
115 				break;
116 		}
117 	}
118 
119 	/*
120 	 * If it is the metapage, also verify magic number and version.
121 	 */
122 	if (pagetype == LH_META_PAGE)
123 	{
124 		HashMetaPage metap = HashPageGetMeta(page);
125 
126 		if (metap->hashm_magic != HASH_MAGIC)
127 			ereport(ERROR,
128 					(errcode(ERRCODE_INDEX_CORRUPTED),
129 					 errmsg("invalid magic number for metadata"),
130 					 errdetail("Expected 0x%08x, got 0x%08x.",
131 							   HASH_MAGIC, metap->hashm_magic)));
132 
133 		if (metap->hashm_version != HASH_VERSION)
134 			ereport(ERROR,
135 					(errcode(ERRCODE_INDEX_CORRUPTED),
136 					 errmsg("invalid version for metadata"),
137 					 errdetail("Expected %d, got %d",
138 							   HASH_VERSION, metap->hashm_version)));
139 	}
140 
141 	return page;
142 }
143 
144 /* -------------------------------------------------
145  * GetHashPageStatistics()
146  *
147  * Collect statistics of single hash page
148  * -------------------------------------------------
149  */
150 static void
GetHashPageStatistics(Page page,HashPageStat * stat)151 GetHashPageStatistics(Page page, HashPageStat *stat)
152 {
153 	OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
154 	HashPageOpaque opaque = (HashPageOpaque) PageGetSpecialPointer(page);
155 	int			off;
156 
157 	stat->dead_items = stat->live_items = 0;
158 	stat->page_size = PageGetPageSize(page);
159 
160 	/* hash page opaque data */
161 	stat->hasho_prevblkno = opaque->hasho_prevblkno;
162 	stat->hasho_nextblkno = opaque->hasho_nextblkno;
163 	stat->hasho_bucket = opaque->hasho_bucket;
164 	stat->hasho_flag = opaque->hasho_flag;
165 	stat->hasho_page_id = opaque->hasho_page_id;
166 
167 	/* count live and dead tuples, and free space */
168 	for (off = FirstOffsetNumber; off <= maxoff; off++)
169 	{
170 		ItemId		id = PageGetItemId(page, off);
171 
172 		if (!ItemIdIsDead(id))
173 			stat->live_items++;
174 		else
175 			stat->dead_items++;
176 	}
177 	stat->free_size = PageGetFreeSpace(page);
178 }
179 
180 /* ---------------------------------------------------
181  * hash_page_type()
182  *
183  * Usage: SELECT hash_page_type(get_raw_page('con_hash_index', 1));
184  * ---------------------------------------------------
185  */
186 Datum
hash_page_type(PG_FUNCTION_ARGS)187 hash_page_type(PG_FUNCTION_ARGS)
188 {
189 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
190 	Page		page;
191 	HashPageOpaque opaque;
192 	int			pagetype;
193 	const char *type;
194 
195 	if (!superuser())
196 		ereport(ERROR,
197 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
198 				 errmsg("must be superuser to use raw page functions")));
199 
200 	page = verify_hash_page(raw_page, 0);
201 
202 	if (PageIsNew(page))
203 		type = "unused";
204 	else
205 	{
206 		opaque = (HashPageOpaque) PageGetSpecialPointer(page);
207 
208 		/* page type (flags) */
209 		pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
210 		if (pagetype == LH_META_PAGE)
211 			type = "metapage";
212 		else if (pagetype == LH_OVERFLOW_PAGE)
213 			type = "overflow";
214 		else if (pagetype == LH_BUCKET_PAGE)
215 			type = "bucket";
216 		else if (pagetype == LH_BITMAP_PAGE)
217 			type = "bitmap";
218 		else
219 			type = "unused";
220 	}
221 
222 	PG_RETURN_TEXT_P(cstring_to_text(type));
223 }
224 
225 /* ---------------------------------------------------
226  * hash_page_stats()
227  *
228  * Usage: SELECT * FROM hash_page_stats(get_raw_page('con_hash_index', 1));
229  * ---------------------------------------------------
230  */
231 Datum
hash_page_stats(PG_FUNCTION_ARGS)232 hash_page_stats(PG_FUNCTION_ARGS)
233 {
234 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
235 	Page		page;
236 	int			j;
237 	Datum		values[9];
238 	bool		nulls[9];
239 	HashPageStat stat;
240 	HeapTuple	tuple;
241 	TupleDesc	tupleDesc;
242 
243 	if (!superuser())
244 		ereport(ERROR,
245 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
246 				 errmsg("must be superuser to use raw page functions")));
247 
248 	page = verify_hash_page(raw_page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
249 
250 	/* keep compiler quiet */
251 	stat.hasho_prevblkno = stat.hasho_nextblkno = InvalidBlockNumber;
252 	stat.hasho_flag = stat.hasho_page_id = stat.free_size = 0;
253 
254 	GetHashPageStatistics(page, &stat);
255 
256 	/* Build a tuple descriptor for our result type */
257 	if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
258 		elog(ERROR, "return type must be a row type");
259 	tupleDesc = BlessTupleDesc(tupleDesc);
260 
261 	MemSet(nulls, 0, sizeof(nulls));
262 
263 	j = 0;
264 	values[j++] = Int32GetDatum(stat.live_items);
265 	values[j++] = Int32GetDatum(stat.dead_items);
266 	values[j++] = Int32GetDatum(stat.page_size);
267 	values[j++] = Int32GetDatum(stat.free_size);
268 	values[j++] = Int64GetDatum((int64) stat.hasho_prevblkno);
269 	values[j++] = Int64GetDatum((int64) stat.hasho_nextblkno);
270 	values[j++] = Int64GetDatum((int64) stat.hasho_bucket);
271 	values[j++] = Int32GetDatum((int32) stat.hasho_flag);
272 	values[j++] = Int32GetDatum((int32) stat.hasho_page_id);
273 
274 	tuple = heap_form_tuple(tupleDesc, values, nulls);
275 
276 	PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
277 }
278 
279 /*
280  * cross-call data structure for SRF
281  */
282 struct user_args
283 {
284 	Page		page;
285 	OffsetNumber offset;
286 };
287 
288 /*-------------------------------------------------------
289  * hash_page_items()
290  *
291  * Get IndexTupleData set in a hash page
292  *
293  * Usage: SELECT * FROM hash_page_items(get_raw_page('con_hash_index', 1));
294  *-------------------------------------------------------
295  */
296 Datum
hash_page_items(PG_FUNCTION_ARGS)297 hash_page_items(PG_FUNCTION_ARGS)
298 {
299 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
300 	Page		page;
301 	Datum		result;
302 	Datum		values[3];
303 	bool		nulls[3];
304 	uint32		hashkey;
305 	HeapTuple	tuple;
306 	FuncCallContext *fctx;
307 	MemoryContext mctx;
308 	struct user_args *uargs;
309 
310 	if (!superuser())
311 		ereport(ERROR,
312 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
313 				 errmsg("must be superuser to use raw page functions")));
314 
315 	if (SRF_IS_FIRSTCALL())
316 	{
317 		TupleDesc	tupleDesc;
318 
319 		fctx = SRF_FIRSTCALL_INIT();
320 
321 		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
322 
323 		page = verify_hash_page(raw_page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
324 
325 		uargs = palloc(sizeof(struct user_args));
326 
327 		uargs->page = page;
328 
329 		uargs->offset = FirstOffsetNumber;
330 
331 		fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
332 
333 		/* Build a tuple descriptor for our result type */
334 		if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
335 			elog(ERROR, "return type must be a row type");
336 		tupleDesc = BlessTupleDesc(tupleDesc);
337 
338 		fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
339 
340 		fctx->user_fctx = uargs;
341 
342 		MemoryContextSwitchTo(mctx);
343 	}
344 
345 	fctx = SRF_PERCALL_SETUP();
346 	uargs = fctx->user_fctx;
347 
348 	if (fctx->call_cntr < fctx->max_calls)
349 	{
350 		ItemId		id;
351 		IndexTuple	itup;
352 		int			j;
353 
354 		id = PageGetItemId(uargs->page, uargs->offset);
355 
356 		if (!ItemIdIsValid(id))
357 			elog(ERROR, "invalid ItemId");
358 
359 		itup = (IndexTuple) PageGetItem(uargs->page, id);
360 
361 		MemSet(nulls, 0, sizeof(nulls));
362 
363 		j = 0;
364 		values[j++] = Int32GetDatum((int32) uargs->offset);
365 		values[j++] = PointerGetDatum(&itup->t_tid);
366 
367 		hashkey = _hash_get_indextuple_hashkey(itup);
368 		values[j] = Int64GetDatum((int64) hashkey);
369 
370 		tuple = heap_form_tuple(fctx->attinmeta->tupdesc, values, nulls);
371 		result = HeapTupleGetDatum(tuple);
372 
373 		uargs->offset = uargs->offset + 1;
374 
375 		SRF_RETURN_NEXT(fctx, result);
376 	}
377 
378 	SRF_RETURN_DONE(fctx);
379 }
380 
381 /* ------------------------------------------------
382  * hash_bitmap_info()
383  *
384  * Get bitmap information for a particular overflow page
385  *
386  * Usage: SELECT * FROM hash_bitmap_info('con_hash_index'::regclass, 5);
387  * ------------------------------------------------
388  */
389 Datum
hash_bitmap_info(PG_FUNCTION_ARGS)390 hash_bitmap_info(PG_FUNCTION_ARGS)
391 {
392 	Oid			indexRelid = PG_GETARG_OID(0);
393 	int64		ovflblkno = PG_GETARG_INT64(1);
394 	HashMetaPage metap;
395 	Buffer		metabuf,
396 				mapbuf;
397 	BlockNumber bitmapblkno;
398 	Page		mappage;
399 	bool		bit = false;
400 	TupleDesc	tupleDesc;
401 	Relation	indexRel;
402 	uint32		ovflbitno;
403 	int32		bitmappage,
404 				bitmapbit;
405 	HeapTuple	tuple;
406 	int			i,
407 				j;
408 	Datum		values[3];
409 	bool		nulls[3];
410 	uint32	   *freep;
411 
412 	if (!superuser())
413 		ereport(ERROR,
414 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
415 				 errmsg("must be superuser to use raw page functions")));
416 
417 	indexRel = index_open(indexRelid, AccessShareLock);
418 
419 	if (!IS_HASH(indexRel))
420 		elog(ERROR, "relation \"%s\" is not a hash index",
421 			 RelationGetRelationName(indexRel));
422 
423 	if (RELATION_IS_OTHER_TEMP(indexRel))
424 		ereport(ERROR,
425 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
426 				 errmsg("cannot access temporary tables of other sessions")));
427 
428 	if (ovflblkno < 0 || ovflblkno > MaxBlockNumber)
429 		ereport(ERROR,
430 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
431 				 errmsg("invalid block number")));
432 
433 	if (ovflblkno >= RelationGetNumberOfBlocks(indexRel))
434 		ereport(ERROR,
435 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
436 				 errmsg("block number %lld is out of range for relation \"%s\"",
437 						(long long int) ovflblkno, RelationGetRelationName(indexRel))));
438 
439 	/* Read the metapage so we can determine which bitmap page to use */
440 	metabuf = _hash_getbuf(indexRel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
441 	metap = HashPageGetMeta(BufferGetPage(metabuf));
442 
443 	/*
444 	 * Reject attempt to read the bit for a metapage or bitmap page; this is
445 	 * only meaningful for overflow pages.
446 	 */
447 	if (ovflblkno == 0)
448 		ereport(ERROR,
449 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
450 				 errmsg("invalid overflow block number %u",
451 						(BlockNumber) ovflblkno)));
452 	for (i = 0; i < metap->hashm_nmaps; i++)
453 		if (metap->hashm_mapp[i] == ovflblkno)
454 			ereport(ERROR,
455 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
456 					 errmsg("invalid overflow block number %u",
457 							(BlockNumber) ovflblkno)));
458 
459 	/*
460 	 * Identify overflow bit number.  This will error out for primary bucket
461 	 * pages, and we've already rejected the metapage and bitmap pages above.
462 	 */
463 	ovflbitno = _hash_ovflblkno_to_bitno(metap, (BlockNumber) ovflblkno);
464 
465 	bitmappage = ovflbitno >> BMPG_SHIFT(metap);
466 	bitmapbit = ovflbitno & BMPG_MASK(metap);
467 
468 	if (bitmappage >= metap->hashm_nmaps)
469 		ereport(ERROR,
470 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
471 				 errmsg("invalid overflow block number %u",
472 						(BlockNumber) ovflblkno)));
473 
474 	bitmapblkno = metap->hashm_mapp[bitmappage];
475 
476 	_hash_relbuf(indexRel, metabuf);
477 
478 	/* Check the status of bitmap bit for overflow page */
479 	mapbuf = _hash_getbuf(indexRel, bitmapblkno, HASH_READ, LH_BITMAP_PAGE);
480 	mappage = BufferGetPage(mapbuf);
481 	freep = HashPageGetBitmap(mappage);
482 
483 	bit = ISSET(freep, bitmapbit) != 0;
484 
485 	_hash_relbuf(indexRel, mapbuf);
486 	index_close(indexRel, AccessShareLock);
487 
488 	/* Build a tuple descriptor for our result type */
489 	if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
490 		elog(ERROR, "return type must be a row type");
491 	tupleDesc = BlessTupleDesc(tupleDesc);
492 
493 	MemSet(nulls, 0, sizeof(nulls));
494 
495 	j = 0;
496 	values[j++] = Int64GetDatum((int64) bitmapblkno);
497 	values[j++] = Int32GetDatum(bitmapbit);
498 	values[j++] = BoolGetDatum(bit);
499 
500 	tuple = heap_form_tuple(tupleDesc, values, nulls);
501 
502 	PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
503 }
504 
505 /* ------------------------------------------------
506  * hash_metapage_info()
507  *
508  * Get the meta-page information for a hash index
509  *
510  * Usage: SELECT * FROM hash_metapage_info(get_raw_page('con_hash_index', 0))
511  * ------------------------------------------------
512  */
513 Datum
hash_metapage_info(PG_FUNCTION_ARGS)514 hash_metapage_info(PG_FUNCTION_ARGS)
515 {
516 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
517 	Page		page;
518 	HashMetaPageData *metad;
519 	TupleDesc	tupleDesc;
520 	HeapTuple	tuple;
521 	int			i,
522 				j;
523 	Datum		values[16];
524 	bool		nulls[16];
525 	Datum		spares[HASH_MAX_SPLITPOINTS];
526 	Datum		mapp[HASH_MAX_BITMAPS];
527 
528 	if (!superuser())
529 		ereport(ERROR,
530 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
531 				 errmsg("must be superuser to use raw page functions")));
532 
533 	page = verify_hash_page(raw_page, LH_META_PAGE);
534 
535 	/* Build a tuple descriptor for our result type */
536 	if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
537 		elog(ERROR, "return type must be a row type");
538 	tupleDesc = BlessTupleDesc(tupleDesc);
539 
540 	metad = HashPageGetMeta(page);
541 
542 	MemSet(nulls, 0, sizeof(nulls));
543 
544 	j = 0;
545 	values[j++] = Int64GetDatum((int64) metad->hashm_magic);
546 	values[j++] = Int64GetDatum((int64) metad->hashm_version);
547 	values[j++] = Float8GetDatum(metad->hashm_ntuples);
548 	values[j++] = Int32GetDatum((int32) metad->hashm_ffactor);
549 	values[j++] = Int32GetDatum((int32) metad->hashm_bsize);
550 	values[j++] = Int32GetDatum((int32) metad->hashm_bmsize);
551 	values[j++] = Int32GetDatum((int32) metad->hashm_bmshift);
552 	values[j++] = Int64GetDatum((int64) metad->hashm_maxbucket);
553 	values[j++] = Int64GetDatum((int64) metad->hashm_highmask);
554 	values[j++] = Int64GetDatum((int64) metad->hashm_lowmask);
555 	values[j++] = Int64GetDatum((int64) metad->hashm_ovflpoint);
556 	values[j++] = Int64GetDatum((int64) metad->hashm_firstfree);
557 	values[j++] = Int64GetDatum((int64) metad->hashm_nmaps);
558 	values[j++] = ObjectIdGetDatum((Oid) metad->hashm_procid);
559 
560 	for (i = 0; i < HASH_MAX_SPLITPOINTS; i++)
561 		spares[i] = Int64GetDatum((int64) metad->hashm_spares[i]);
562 	values[j++] = PointerGetDatum(construct_array(spares,
563 												  HASH_MAX_SPLITPOINTS,
564 												  INT8OID,
565 												  sizeof(int64),
566 												  FLOAT8PASSBYVAL,
567 												  TYPALIGN_DOUBLE));
568 
569 	for (i = 0; i < HASH_MAX_BITMAPS; i++)
570 		mapp[i] = Int64GetDatum((int64) metad->hashm_mapp[i]);
571 	values[j++] = PointerGetDatum(construct_array(mapp,
572 												  HASH_MAX_BITMAPS,
573 												  INT8OID,
574 												  sizeof(int64),
575 												  FLOAT8PASSBYVAL,
576 												  TYPALIGN_DOUBLE));
577 
578 	tuple = heap_form_tuple(tupleDesc, values, nulls);
579 
580 	PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
581 }
582