1 /*
2  * hashfuncs.c
3  *		Functions to investigate the content of HASH indexes
4  *
5  * Copyright (c) 2017-2019, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *		contrib/pageinspect/hashfuncs.c
9  */
10 
11 #include "postgres.h"
12 
13 #include "pageinspect.h"
14 
15 #include "access/hash.h"
16 #include "access/htup_details.h"
17 #include "catalog/pg_type.h"
18 #include "catalog/pg_am.h"
19 #include "funcapi.h"
20 #include "miscadmin.h"
21 #include "utils/builtins.h"
22 #include "utils/rel.h"
23 
24 PG_FUNCTION_INFO_V1(hash_page_type);
25 PG_FUNCTION_INFO_V1(hash_page_stats);
26 PG_FUNCTION_INFO_V1(hash_page_items);
27 PG_FUNCTION_INFO_V1(hash_bitmap_info);
28 PG_FUNCTION_INFO_V1(hash_metapage_info);
29 
30 #define IS_HASH(r) ((r)->rd_rel->relam == HASH_AM_OID)
31 
32 /* ------------------------------------------------
33  * structure for single hash page statistics
34  * ------------------------------------------------
35  */
36 typedef struct HashPageStat
37 {
38 	int			live_items;
39 	int			dead_items;
40 	int			page_size;
41 	int			free_size;
42 
43 	/* opaque data */
44 	BlockNumber hasho_prevblkno;
45 	BlockNumber hasho_nextblkno;
46 	Bucket		hasho_bucket;
47 	uint16		hasho_flag;
48 	uint16		hasho_page_id;
49 } HashPageStat;
50 
51 
52 /*
53  * Verify that the given bytea contains a HASH page, or die in the attempt.
54  * A pointer to a palloc'd, properly aligned copy of the page is returned.
55  */
56 static Page
verify_hash_page(bytea * raw_page,int flags)57 verify_hash_page(bytea *raw_page, int flags)
58 {
59 	Page		page = get_page_from_raw(raw_page);
60 	int			pagetype = LH_UNUSED_PAGE;
61 
62 	/* Treat new pages as unused. */
63 	if (!PageIsNew(page))
64 	{
65 		HashPageOpaque pageopaque;
66 
67 		if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HashPageOpaqueData)))
68 			ereport(ERROR,
69 					(errcode(ERRCODE_INDEX_CORRUPTED),
70 					 errmsg("index table contains corrupted page")));
71 
72 		pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
73 		if (pageopaque->hasho_page_id != HASHO_PAGE_ID)
74 			ereport(ERROR,
75 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
76 					 errmsg("page is not a hash page"),
77 					 errdetail("Expected %08x, got %08x.",
78 							   HASHO_PAGE_ID, pageopaque->hasho_page_id)));
79 
80 		pagetype = pageopaque->hasho_flag & LH_PAGE_TYPE;
81 	}
82 
83 	/* Check that page type is sane. */
84 	if (pagetype != LH_OVERFLOW_PAGE && pagetype != LH_BUCKET_PAGE &&
85 		pagetype != LH_BITMAP_PAGE && pagetype != LH_META_PAGE &&
86 		pagetype != LH_UNUSED_PAGE)
87 		ereport(ERROR,
88 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
89 				 errmsg("invalid hash page type %08x", pagetype)));
90 
91 	/* If requested, verify page type. */
92 	if (flags != 0 && (pagetype & flags) == 0)
93 	{
94 		switch (flags)
95 		{
96 			case LH_META_PAGE:
97 				ereport(ERROR,
98 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
99 						 errmsg("page is not a hash meta page")));
100 				break;
101 			case LH_BUCKET_PAGE | LH_OVERFLOW_PAGE:
102 				ereport(ERROR,
103 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
104 						 errmsg("page is not a hash bucket or overflow page")));
105 				break;
106 			case LH_OVERFLOW_PAGE:
107 				ereport(ERROR,
108 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
109 						 errmsg("page is not a hash overflow page")));
110 				break;
111 			default:
112 				elog(ERROR,
113 					 "hash page of type %08x not in mask %08x",
114 					 pagetype, flags);
115 				break;
116 		}
117 	}
118 
119 	/*
120 	 * If it is the metapage, also verify magic number and version.
121 	 */
122 	if (pagetype == LH_META_PAGE)
123 	{
124 		HashMetaPage metap = HashPageGetMeta(page);
125 
126 		if (metap->hashm_magic != HASH_MAGIC)
127 			ereport(ERROR,
128 					(errcode(ERRCODE_INDEX_CORRUPTED),
129 					 errmsg("invalid magic number for metadata"),
130 					 errdetail("Expected 0x%08x, got 0x%08x.",
131 							   HASH_MAGIC, metap->hashm_magic)));
132 
133 		if (metap->hashm_version != HASH_VERSION)
134 			ereport(ERROR,
135 					(errcode(ERRCODE_INDEX_CORRUPTED),
136 					 errmsg("invalid version for metadata"),
137 					 errdetail("Expected %d, got %d",
138 							   HASH_VERSION, metap->hashm_version)));
139 	}
140 
141 	return page;
142 }
143 
144 /* -------------------------------------------------
145  * GetHashPageStatistics()
146  *
147  * Collect statistics of single hash page
148  * -------------------------------------------------
149  */
150 static void
GetHashPageStatistics(Page page,HashPageStat * stat)151 GetHashPageStatistics(Page page, HashPageStat *stat)
152 {
153 	OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
154 	HashPageOpaque opaque = (HashPageOpaque) PageGetSpecialPointer(page);
155 	int			off;
156 
157 	stat->dead_items = stat->live_items = 0;
158 	stat->page_size = PageGetPageSize(page);
159 
160 	/* hash page opaque data */
161 	stat->hasho_prevblkno = opaque->hasho_prevblkno;
162 	stat->hasho_nextblkno = opaque->hasho_nextblkno;
163 	stat->hasho_bucket = opaque->hasho_bucket;
164 	stat->hasho_flag = opaque->hasho_flag;
165 	stat->hasho_page_id = opaque->hasho_page_id;
166 
167 	/* count live and dead tuples, and free space */
168 	for (off = FirstOffsetNumber; off <= maxoff; off++)
169 	{
170 		ItemId		id = PageGetItemId(page, off);
171 
172 		if (!ItemIdIsDead(id))
173 			stat->live_items++;
174 		else
175 			stat->dead_items++;
176 	}
177 	stat->free_size = PageGetFreeSpace(page);
178 }
179 
180 /* ---------------------------------------------------
181  * hash_page_type()
182  *
183  * Usage: SELECT hash_page_type(get_raw_page('con_hash_index', 1));
184  * ---------------------------------------------------
185  */
186 Datum
hash_page_type(PG_FUNCTION_ARGS)187 hash_page_type(PG_FUNCTION_ARGS)
188 {
189 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
190 	Page		page;
191 	HashPageOpaque opaque;
192 	int			pagetype;
193 	const char *type;
194 
195 	if (!superuser())
196 		ereport(ERROR,
197 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
198 				 (errmsg("must be superuser to use raw page functions"))));
199 
200 	page = verify_hash_page(raw_page, 0);
201 
202 	if (PageIsNew(page))
203 		type = "unused";
204 	else
205 	{
206 		opaque = (HashPageOpaque) PageGetSpecialPointer(page);
207 
208 		/* page type (flags) */
209 		pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
210 		if (pagetype == LH_META_PAGE)
211 			type = "metapage";
212 		else if (pagetype == LH_OVERFLOW_PAGE)
213 			type = "overflow";
214 		else if (pagetype == LH_BUCKET_PAGE)
215 			type = "bucket";
216 		else if (pagetype == LH_BITMAP_PAGE)
217 			type = "bitmap";
218 		else
219 			type = "unused";
220 	}
221 
222 	PG_RETURN_TEXT_P(cstring_to_text(type));
223 }
224 
225 /* ---------------------------------------------------
226  * hash_page_stats()
227  *
228  * Usage: SELECT * FROM hash_page_stats(get_raw_page('con_hash_index', 1));
229  * ---------------------------------------------------
230  */
231 Datum
hash_page_stats(PG_FUNCTION_ARGS)232 hash_page_stats(PG_FUNCTION_ARGS)
233 {
234 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
235 	Page		page;
236 	int			j;
237 	Datum		values[9];
238 	bool		nulls[9];
239 	HashPageStat stat;
240 	HeapTuple	tuple;
241 	TupleDesc	tupleDesc;
242 
243 	if (!superuser())
244 		ereport(ERROR,
245 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
246 				 (errmsg("must be superuser to use raw page functions"))));
247 
248 	page = verify_hash_page(raw_page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
249 
250 	/* keep compiler quiet */
251 	stat.hasho_prevblkno = stat.hasho_nextblkno = InvalidBlockNumber;
252 	stat.hasho_flag = stat.hasho_page_id = stat.free_size = 0;
253 
254 	GetHashPageStatistics(page, &stat);
255 
256 	/* Build a tuple descriptor for our result type */
257 	if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
258 		elog(ERROR, "return type must be a row type");
259 	tupleDesc = BlessTupleDesc(tupleDesc);
260 
261 	MemSet(nulls, 0, sizeof(nulls));
262 
263 	j = 0;
264 	values[j++] = Int32GetDatum(stat.live_items);
265 	values[j++] = Int32GetDatum(stat.dead_items);
266 	values[j++] = Int32GetDatum(stat.page_size);
267 	values[j++] = Int32GetDatum(stat.free_size);
268 	values[j++] = Int64GetDatum((int64) stat.hasho_prevblkno);
269 	values[j++] = Int64GetDatum((int64) stat.hasho_nextblkno);
270 	values[j++] = Int64GetDatum((int64) stat.hasho_bucket);
271 	values[j++] = Int32GetDatum((int32) stat.hasho_flag);
272 	values[j++] = Int32GetDatum((int32) stat.hasho_page_id);
273 
274 	tuple = heap_form_tuple(tupleDesc, values, nulls);
275 
276 	PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
277 }
278 
279 /*
280  * cross-call data structure for SRF
281  */
282 struct user_args
283 {
284 	Page		page;
285 	OffsetNumber offset;
286 };
287 
288 /*-------------------------------------------------------
289  * hash_page_items()
290  *
291  * Get IndexTupleData set in a hash page
292  *
293  * Usage: SELECT * FROM hash_page_items(get_raw_page('con_hash_index', 1));
294  *-------------------------------------------------------
295  */
296 Datum
hash_page_items(PG_FUNCTION_ARGS)297 hash_page_items(PG_FUNCTION_ARGS)
298 {
299 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
300 	Page		page;
301 	Datum		result;
302 	Datum		values[3];
303 	bool		nulls[3];
304 	uint32		hashkey;
305 	HeapTuple	tuple;
306 	FuncCallContext *fctx;
307 	MemoryContext mctx;
308 	struct user_args *uargs;
309 
310 	if (!superuser())
311 		ereport(ERROR,
312 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
313 				 (errmsg("must be superuser to use raw page functions"))));
314 
315 	if (SRF_IS_FIRSTCALL())
316 	{
317 		TupleDesc	tupleDesc;
318 
319 		fctx = SRF_FIRSTCALL_INIT();
320 
321 		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
322 
323 		page = verify_hash_page(raw_page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
324 
325 		uargs = palloc(sizeof(struct user_args));
326 
327 		uargs->page = page;
328 
329 		uargs->offset = FirstOffsetNumber;
330 
331 		fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
332 
333 		/* Build a tuple descriptor for our result type */
334 		if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
335 			elog(ERROR, "return type must be a row type");
336 		tupleDesc = BlessTupleDesc(tupleDesc);
337 
338 		fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
339 
340 		fctx->user_fctx = uargs;
341 
342 		MemoryContextSwitchTo(mctx);
343 	}
344 
345 	fctx = SRF_PERCALL_SETUP();
346 	uargs = fctx->user_fctx;
347 
348 	if (fctx->call_cntr < fctx->max_calls)
349 	{
350 		ItemId		id;
351 		IndexTuple	itup;
352 		int			j;
353 
354 		id = PageGetItemId(uargs->page, uargs->offset);
355 
356 		if (!ItemIdIsValid(id))
357 			elog(ERROR, "invalid ItemId");
358 
359 		itup = (IndexTuple) PageGetItem(uargs->page, id);
360 
361 		MemSet(nulls, 0, sizeof(nulls));
362 
363 		j = 0;
364 		values[j++] = Int32GetDatum((int32) uargs->offset);
365 		values[j++] = PointerGetDatum(&itup->t_tid);
366 
367 		hashkey = _hash_get_indextuple_hashkey(itup);
368 		values[j] = Int64GetDatum((int64) hashkey);
369 
370 		tuple = heap_form_tuple(fctx->attinmeta->tupdesc, values, nulls);
371 		result = HeapTupleGetDatum(tuple);
372 
373 		uargs->offset = uargs->offset + 1;
374 
375 		SRF_RETURN_NEXT(fctx, result);
376 	}
377 	else
378 	{
379 		pfree(uargs);
380 		SRF_RETURN_DONE(fctx);
381 	}
382 }
383 
384 /* ------------------------------------------------
385  * hash_bitmap_info()
386  *
387  * Get bitmap information for a particular overflow page
388  *
389  * Usage: SELECT * FROM hash_bitmap_info('con_hash_index'::regclass, 5);
390  * ------------------------------------------------
391  */
392 Datum
hash_bitmap_info(PG_FUNCTION_ARGS)393 hash_bitmap_info(PG_FUNCTION_ARGS)
394 {
395 	Oid			indexRelid = PG_GETARG_OID(0);
396 	uint64		ovflblkno = PG_GETARG_INT64(1);
397 	HashMetaPage metap;
398 	Buffer		metabuf,
399 				mapbuf;
400 	BlockNumber bitmapblkno;
401 	Page		mappage;
402 	bool		bit = false;
403 	TupleDesc	tupleDesc;
404 	Relation	indexRel;
405 	uint32		ovflbitno;
406 	int32		bitmappage,
407 				bitmapbit;
408 	HeapTuple	tuple;
409 	int			i,
410 				j;
411 	Datum		values[3];
412 	bool		nulls[3];
413 	uint32	   *freep;
414 
415 	if (!superuser())
416 		ereport(ERROR,
417 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
418 				 (errmsg("must be superuser to use raw page functions"))));
419 
420 	indexRel = index_open(indexRelid, AccessShareLock);
421 
422 	if (!IS_HASH(indexRel))
423 		elog(ERROR, "relation \"%s\" is not a hash index",
424 			 RelationGetRelationName(indexRel));
425 
426 	if (RELATION_IS_OTHER_TEMP(indexRel))
427 		ereport(ERROR,
428 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
429 				 errmsg("cannot access temporary tables of other sessions")));
430 
431 	if (ovflblkno >= RelationGetNumberOfBlocks(indexRel))
432 		ereport(ERROR,
433 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
434 				 errmsg("block number " UINT64_FORMAT " is out of range for relation \"%s\"",
435 						ovflblkno, RelationGetRelationName(indexRel))));
436 
437 	/* Read the metapage so we can determine which bitmap page to use */
438 	metabuf = _hash_getbuf(indexRel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
439 	metap = HashPageGetMeta(BufferGetPage(metabuf));
440 
441 	/*
442 	 * Reject attempt to read the bit for a metapage or bitmap page; this is
443 	 * only meaningful for overflow pages.
444 	 */
445 	if (ovflblkno == 0)
446 		ereport(ERROR,
447 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
448 				 errmsg("invalid overflow block number %u",
449 						(BlockNumber) ovflblkno)));
450 	for (i = 0; i < metap->hashm_nmaps; i++)
451 		if (metap->hashm_mapp[i] == ovflblkno)
452 			ereport(ERROR,
453 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
454 					 errmsg("invalid overflow block number %u",
455 							(BlockNumber) ovflblkno)));
456 
457 	/*
458 	 * Identify overflow bit number.  This will error out for primary bucket
459 	 * pages, and we've already rejected the metapage and bitmap pages above.
460 	 */
461 	ovflbitno = _hash_ovflblkno_to_bitno(metap, (BlockNumber) ovflblkno);
462 
463 	bitmappage = ovflbitno >> BMPG_SHIFT(metap);
464 	bitmapbit = ovflbitno & BMPG_MASK(metap);
465 
466 	if (bitmappage >= metap->hashm_nmaps)
467 		ereport(ERROR,
468 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
469 				 errmsg("invalid overflow block number %u",
470 						(BlockNumber) ovflblkno)));
471 
472 	bitmapblkno = metap->hashm_mapp[bitmappage];
473 
474 	_hash_relbuf(indexRel, metabuf);
475 
476 	/* Check the status of bitmap bit for overflow page */
477 	mapbuf = _hash_getbuf(indexRel, bitmapblkno, HASH_READ, LH_BITMAP_PAGE);
478 	mappage = BufferGetPage(mapbuf);
479 	freep = HashPageGetBitmap(mappage);
480 
481 	bit = ISSET(freep, bitmapbit) != 0;
482 
483 	_hash_relbuf(indexRel, mapbuf);
484 	index_close(indexRel, AccessShareLock);
485 
486 	/* Build a tuple descriptor for our result type */
487 	if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
488 		elog(ERROR, "return type must be a row type");
489 	tupleDesc = BlessTupleDesc(tupleDesc);
490 
491 	MemSet(nulls, 0, sizeof(nulls));
492 
493 	j = 0;
494 	values[j++] = Int64GetDatum((int64) bitmapblkno);
495 	values[j++] = Int32GetDatum(bitmapbit);
496 	values[j++] = BoolGetDatum(bit);
497 
498 	tuple = heap_form_tuple(tupleDesc, values, nulls);
499 
500 	PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
501 }
502 
503 /* ------------------------------------------------
504  * hash_metapage_info()
505  *
506  * Get the meta-page information for a hash index
507  *
508  * Usage: SELECT * FROM hash_metapage_info(get_raw_page('con_hash_index', 0))
509  * ------------------------------------------------
510  */
511 Datum
hash_metapage_info(PG_FUNCTION_ARGS)512 hash_metapage_info(PG_FUNCTION_ARGS)
513 {
514 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
515 	Page		page;
516 	HashMetaPageData *metad;
517 	TupleDesc	tupleDesc;
518 	HeapTuple	tuple;
519 	int			i,
520 				j;
521 	Datum		values[16];
522 	bool		nulls[16];
523 	Datum		spares[HASH_MAX_SPLITPOINTS];
524 	Datum		mapp[HASH_MAX_BITMAPS];
525 
526 	if (!superuser())
527 		ereport(ERROR,
528 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
529 				 (errmsg("must be superuser to use raw page functions"))));
530 
531 	page = verify_hash_page(raw_page, LH_META_PAGE);
532 
533 	/* Build a tuple descriptor for our result type */
534 	if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
535 		elog(ERROR, "return type must be a row type");
536 	tupleDesc = BlessTupleDesc(tupleDesc);
537 
538 	metad = HashPageGetMeta(page);
539 
540 	MemSet(nulls, 0, sizeof(nulls));
541 
542 	j = 0;
543 	values[j++] = Int64GetDatum((int64) metad->hashm_magic);
544 	values[j++] = Int64GetDatum((int64) metad->hashm_version);
545 	values[j++] = Float8GetDatum(metad->hashm_ntuples);
546 	values[j++] = Int32GetDatum((int32) metad->hashm_ffactor);
547 	values[j++] = Int32GetDatum((int32) metad->hashm_bsize);
548 	values[j++] = Int32GetDatum((int32) metad->hashm_bmsize);
549 	values[j++] = Int32GetDatum((int32) metad->hashm_bmshift);
550 	values[j++] = Int64GetDatum((int64) metad->hashm_maxbucket);
551 	values[j++] = Int64GetDatum((int64) metad->hashm_highmask);
552 	values[j++] = Int64GetDatum((int64) metad->hashm_lowmask);
553 	values[j++] = Int64GetDatum((int64) metad->hashm_ovflpoint);
554 	values[j++] = Int64GetDatum((int64) metad->hashm_firstfree);
555 	values[j++] = Int64GetDatum((int64) metad->hashm_nmaps);
556 	values[j++] = ObjectIdGetDatum((Oid) metad->hashm_procid);
557 
558 	for (i = 0; i < HASH_MAX_SPLITPOINTS; i++)
559 		spares[i] = Int64GetDatum((int64) metad->hashm_spares[i]);
560 	values[j++] = PointerGetDatum(construct_array(spares,
561 												  HASH_MAX_SPLITPOINTS,
562 												  INT8OID,
563 												  8, FLOAT8PASSBYVAL, 'd'));
564 
565 	for (i = 0; i < HASH_MAX_BITMAPS; i++)
566 		mapp[i] = Int64GetDatum((int64) metad->hashm_mapp[i]);
567 	values[j++] = PointerGetDatum(construct_array(mapp,
568 												  HASH_MAX_BITMAPS,
569 												  INT8OID,
570 												  8, FLOAT8PASSBYVAL, 'd'));
571 
572 	tuple = heap_form_tuple(tupleDesc, values, nulls);
573 
574 	PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
575 }
576