1 /*
2  * hashfuncs.c
3  *		Functions to investigate the content of HASH indexes
4  *
5  * Copyright (c) 2017, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *		contrib/pageinspect/hashfuncs.c
9  */
10 
11 #include "postgres.h"
12 
13 #include "pageinspect.h"
14 
15 #include "access/hash.h"
16 #include "access/htup_details.h"
17 #include "catalog/pg_type.h"
18 #include "catalog/pg_am.h"
19 #include "funcapi.h"
20 #include "miscadmin.h"
21 #include "utils/builtins.h"
22 
23 PG_FUNCTION_INFO_V1(hash_page_type);
24 PG_FUNCTION_INFO_V1(hash_page_stats);
25 PG_FUNCTION_INFO_V1(hash_page_items);
26 PG_FUNCTION_INFO_V1(hash_bitmap_info);
27 PG_FUNCTION_INFO_V1(hash_metapage_info);
28 
29 #define IS_HASH(r) ((r)->rd_rel->relam == HASH_AM_OID)
30 
31 /* ------------------------------------------------
32  * structure for single hash page statistics
33  * ------------------------------------------------
34  */
35 typedef struct HashPageStat
36 {
37 	int			live_items;
38 	int			dead_items;
39 	int			page_size;
40 	int			free_size;
41 
42 	/* opaque data */
43 	BlockNumber hasho_prevblkno;
44 	BlockNumber hasho_nextblkno;
45 	Bucket		hasho_bucket;
46 	uint16		hasho_flag;
47 	uint16		hasho_page_id;
48 } HashPageStat;
49 
50 
51 /*
52  * Verify that the given bytea contains a HASH page, or die in the attempt.
53  * A pointer to a palloc'd, properly aligned copy of the page is returned.
54  */
55 static Page
verify_hash_page(bytea * raw_page,int flags)56 verify_hash_page(bytea *raw_page, int flags)
57 {
58 	Page		page = get_page_from_raw(raw_page);
59 	int			pagetype = LH_UNUSED_PAGE;
60 
61 	/* Treat new pages as unused. */
62 	if (!PageIsNew(page))
63 	{
64 		HashPageOpaque pageopaque;
65 
66 		if (PageGetSpecialSize(page) != MAXALIGN(sizeof(HashPageOpaqueData)))
67 			ereport(ERROR,
68 					(errcode(ERRCODE_INDEX_CORRUPTED),
69 					 errmsg("index table contains corrupted page")));
70 
71 		pageopaque = (HashPageOpaque) PageGetSpecialPointer(page);
72 		if (pageopaque->hasho_page_id != HASHO_PAGE_ID)
73 			ereport(ERROR,
74 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
75 					 errmsg("page is not a hash page"),
76 					 errdetail("Expected %08x, got %08x.",
77 							   HASHO_PAGE_ID, pageopaque->hasho_page_id)));
78 
79 		pagetype = pageopaque->hasho_flag & LH_PAGE_TYPE;
80 	}
81 
82 	/* Check that page type is sane. */
83 	if (pagetype != LH_OVERFLOW_PAGE && pagetype != LH_BUCKET_PAGE &&
84 		pagetype != LH_BITMAP_PAGE && pagetype != LH_META_PAGE &&
85 		pagetype != LH_UNUSED_PAGE)
86 		ereport(ERROR,
87 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
88 				 errmsg("invalid hash page type %08x", pagetype)));
89 
90 	/* If requested, verify page type. */
91 	if (flags != 0 && (pagetype & flags) == 0)
92 	{
93 		switch (flags)
94 		{
95 			case LH_META_PAGE:
96 				ereport(ERROR,
97 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
98 						 errmsg("page is not a hash meta page")));
99 			case LH_BUCKET_PAGE | LH_OVERFLOW_PAGE:
100 				ereport(ERROR,
101 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
102 						 errmsg("page is not a hash bucket or overflow page")));
103 			case LH_OVERFLOW_PAGE:
104 				ereport(ERROR,
105 						(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
106 						 errmsg("page is not a hash overflow page")));
107 			default:
108 				elog(ERROR,
109 					 "hash page of type %08x not in mask %08x",
110 					 pagetype, flags);
111 		}
112 	}
113 
114 	/*
115 	 * If it is the metapage, also verify magic number and version.
116 	 */
117 	if (pagetype == LH_META_PAGE)
118 	{
119 		HashMetaPage metap = HashPageGetMeta(page);
120 
121 		if (metap->hashm_magic != HASH_MAGIC)
122 			ereport(ERROR,
123 					(errcode(ERRCODE_INDEX_CORRUPTED),
124 					 errmsg("invalid magic number for metadata"),
125 					 errdetail("Expected 0x%08x, got 0x%08x.",
126 							   HASH_MAGIC, metap->hashm_magic)));
127 
128 		if (metap->hashm_version != HASH_VERSION)
129 			ereport(ERROR,
130 					(errcode(ERRCODE_INDEX_CORRUPTED),
131 					 errmsg("invalid version for metadata"),
132 					 errdetail("Expected %d, got %d",
133 							   HASH_VERSION, metap->hashm_version)));
134 	}
135 
136 	return page;
137 }
138 
139 /* -------------------------------------------------
140  * GetHashPageStatistics()
141  *
142  * Collect statistics of single hash page
143  * -------------------------------------------------
144  */
145 static void
GetHashPageStatistics(Page page,HashPageStat * stat)146 GetHashPageStatistics(Page page, HashPageStat *stat)
147 {
148 	OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
149 	HashPageOpaque opaque = (HashPageOpaque) PageGetSpecialPointer(page);
150 	int			off;
151 
152 	stat->dead_items = stat->live_items = 0;
153 	stat->page_size = PageGetPageSize(page);
154 
155 	/* hash page opaque data */
156 	stat->hasho_prevblkno = opaque->hasho_prevblkno;
157 	stat->hasho_nextblkno = opaque->hasho_nextblkno;
158 	stat->hasho_bucket = opaque->hasho_bucket;
159 	stat->hasho_flag = opaque->hasho_flag;
160 	stat->hasho_page_id = opaque->hasho_page_id;
161 
162 	/* count live and dead tuples, and free space */
163 	for (off = FirstOffsetNumber; off <= maxoff; off++)
164 	{
165 		ItemId		id = PageGetItemId(page, off);
166 
167 		if (!ItemIdIsDead(id))
168 			stat->live_items++;
169 		else
170 			stat->dead_items++;
171 	}
172 	stat->free_size = PageGetFreeSpace(page);
173 }
174 
175 /* ---------------------------------------------------
176  * hash_page_type()
177  *
178  * Usage: SELECT hash_page_type(get_raw_page('con_hash_index', 1));
179  * ---------------------------------------------------
180  */
181 Datum
hash_page_type(PG_FUNCTION_ARGS)182 hash_page_type(PG_FUNCTION_ARGS)
183 {
184 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
185 	Page		page;
186 	HashPageOpaque opaque;
187 	int			pagetype;
188 	const char *type;
189 
190 	if (!superuser())
191 		ereport(ERROR,
192 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
193 				 (errmsg("must be superuser to use raw page functions"))));
194 
195 	page = verify_hash_page(raw_page, 0);
196 
197 	if (PageIsNew(page))
198 		type = "unused";
199 	else
200 	{
201 		opaque = (HashPageOpaque) PageGetSpecialPointer(page);
202 
203 		/* page type (flags) */
204 		pagetype = opaque->hasho_flag & LH_PAGE_TYPE;
205 		if (pagetype == LH_META_PAGE)
206 			type = "metapage";
207 		else if (pagetype == LH_OVERFLOW_PAGE)
208 			type = "overflow";
209 		else if (pagetype == LH_BUCKET_PAGE)
210 			type = "bucket";
211 		else if (pagetype == LH_BITMAP_PAGE)
212 			type = "bitmap";
213 		else
214 			type = "unused";
215 	}
216 
217 	PG_RETURN_TEXT_P(cstring_to_text(type));
218 }
219 
220 /* ---------------------------------------------------
221  * hash_page_stats()
222  *
223  * Usage: SELECT * FROM hash_page_stats(get_raw_page('con_hash_index', 1));
224  * ---------------------------------------------------
225  */
226 Datum
hash_page_stats(PG_FUNCTION_ARGS)227 hash_page_stats(PG_FUNCTION_ARGS)
228 {
229 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
230 	Page		page;
231 	int			j;
232 	Datum		values[9];
233 	bool		nulls[9];
234 	HashPageStat stat;
235 	HeapTuple	tuple;
236 	TupleDesc	tupleDesc;
237 
238 	if (!superuser())
239 		ereport(ERROR,
240 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
241 				 (errmsg("must be superuser to use raw page functions"))));
242 
243 	page = verify_hash_page(raw_page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
244 
245 	/* keep compiler quiet */
246 	stat.hasho_prevblkno = stat.hasho_nextblkno = InvalidBlockNumber;
247 	stat.hasho_flag = stat.hasho_page_id = stat.free_size = 0;
248 
249 	GetHashPageStatistics(page, &stat);
250 
251 	/* Build a tuple descriptor for our result type */
252 	if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
253 		elog(ERROR, "return type must be a row type");
254 	tupleDesc = BlessTupleDesc(tupleDesc);
255 
256 	MemSet(nulls, 0, sizeof(nulls));
257 
258 	j = 0;
259 	values[j++] = Int32GetDatum(stat.live_items);
260 	values[j++] = Int32GetDatum(stat.dead_items);
261 	values[j++] = Int32GetDatum(stat.page_size);
262 	values[j++] = Int32GetDatum(stat.free_size);
263 	values[j++] = Int64GetDatum((int64) stat.hasho_prevblkno);
264 	values[j++] = Int64GetDatum((int64) stat.hasho_nextblkno);
265 	values[j++] = Int64GetDatum((int64) stat.hasho_bucket);
266 	values[j++] = Int32GetDatum((int32) stat.hasho_flag);
267 	values[j++] = Int32GetDatum((int32) stat.hasho_page_id);
268 
269 	tuple = heap_form_tuple(tupleDesc, values, nulls);
270 
271 	PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
272 }
273 
274 /*
275  * cross-call data structure for SRF
276  */
277 struct user_args
278 {
279 	Page		page;
280 	OffsetNumber offset;
281 };
282 
283 /*-------------------------------------------------------
284  * hash_page_items()
285  *
286  * Get IndexTupleData set in a hash page
287  *
288  * Usage: SELECT * FROM hash_page_items(get_raw_page('con_hash_index', 1));
289  *-------------------------------------------------------
290  */
291 Datum
hash_page_items(PG_FUNCTION_ARGS)292 hash_page_items(PG_FUNCTION_ARGS)
293 {
294 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
295 	Page		page;
296 	Datum		result;
297 	Datum		values[3];
298 	bool		nulls[3];
299 	uint32		hashkey;
300 	HeapTuple	tuple;
301 	FuncCallContext *fctx;
302 	MemoryContext mctx;
303 	struct user_args *uargs;
304 
305 	if (!superuser())
306 		ereport(ERROR,
307 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
308 				 (errmsg("must be superuser to use raw page functions"))));
309 
310 	if (SRF_IS_FIRSTCALL())
311 	{
312 		TupleDesc	tupleDesc;
313 
314 		fctx = SRF_FIRSTCALL_INIT();
315 
316 		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
317 
318 		page = verify_hash_page(raw_page, LH_BUCKET_PAGE | LH_OVERFLOW_PAGE);
319 
320 		uargs = palloc(sizeof(struct user_args));
321 
322 		uargs->page = page;
323 
324 		uargs->offset = FirstOffsetNumber;
325 
326 		fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
327 
328 		/* Build a tuple descriptor for our result type */
329 		if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
330 			elog(ERROR, "return type must be a row type");
331 		tupleDesc = BlessTupleDesc(tupleDesc);
332 
333 		fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
334 
335 		fctx->user_fctx = uargs;
336 
337 		MemoryContextSwitchTo(mctx);
338 	}
339 
340 	fctx = SRF_PERCALL_SETUP();
341 	uargs = fctx->user_fctx;
342 
343 	if (fctx->call_cntr < fctx->max_calls)
344 	{
345 		ItemId		id;
346 		IndexTuple	itup;
347 		int			j;
348 
349 		id = PageGetItemId(uargs->page, uargs->offset);
350 
351 		if (!ItemIdIsValid(id))
352 			elog(ERROR, "invalid ItemId");
353 
354 		itup = (IndexTuple) PageGetItem(uargs->page, id);
355 
356 		MemSet(nulls, 0, sizeof(nulls));
357 
358 		j = 0;
359 		values[j++] = Int32GetDatum((int32) uargs->offset);
360 		values[j++] = PointerGetDatum(&itup->t_tid);
361 
362 		hashkey = _hash_get_indextuple_hashkey(itup);
363 		values[j] = Int64GetDatum((int64) hashkey);
364 
365 		tuple = heap_form_tuple(fctx->attinmeta->tupdesc, values, nulls);
366 		result = HeapTupleGetDatum(tuple);
367 
368 		uargs->offset = uargs->offset + 1;
369 
370 		SRF_RETURN_NEXT(fctx, result);
371 	}
372 	else
373 	{
374 		pfree(uargs);
375 		SRF_RETURN_DONE(fctx);
376 	}
377 }
378 
379 /* ------------------------------------------------
380  * hash_bitmap_info()
381  *
382  * Get bitmap information for a particular overflow page
383  *
384  * Usage: SELECT * FROM hash_bitmap_info('con_hash_index'::regclass, 5);
385  * ------------------------------------------------
386  */
387 Datum
hash_bitmap_info(PG_FUNCTION_ARGS)388 hash_bitmap_info(PG_FUNCTION_ARGS)
389 {
390 	Oid			indexRelid = PG_GETARG_OID(0);
391 	uint64		ovflblkno = PG_GETARG_INT64(1);
392 	HashMetaPage metap;
393 	Buffer		metabuf,
394 				mapbuf;
395 	BlockNumber bitmapblkno;
396 	Page		mappage;
397 	bool		bit = false;
398 	TupleDesc	tupleDesc;
399 	Relation	indexRel;
400 	uint32		ovflbitno;
401 	int32		bitmappage,
402 				bitmapbit;
403 	HeapTuple	tuple;
404 	int			i,
405 				j;
406 	Datum		values[3];
407 	bool		nulls[3];
408 	uint32	   *freep;
409 
410 	if (!superuser())
411 		ereport(ERROR,
412 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
413 				 (errmsg("must be superuser to use raw page functions"))));
414 
415 	indexRel = index_open(indexRelid, AccessShareLock);
416 
417 	if (!IS_HASH(indexRel))
418 		elog(ERROR, "relation \"%s\" is not a hash index",
419 			 RelationGetRelationName(indexRel));
420 
421 	if (RELATION_IS_OTHER_TEMP(indexRel))
422 		ereport(ERROR,
423 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
424 				 errmsg("cannot access temporary tables of other sessions")));
425 
426 	if (ovflblkno >= RelationGetNumberOfBlocks(indexRel))
427 		ereport(ERROR,
428 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
429 				 errmsg("block number " UINT64_FORMAT " is out of range for relation \"%s\"",
430 						ovflblkno, RelationGetRelationName(indexRel))));
431 
432 	/* Read the metapage so we can determine which bitmap page to use */
433 	metabuf = _hash_getbuf(indexRel, HASH_METAPAGE, HASH_READ, LH_META_PAGE);
434 	metap = HashPageGetMeta(BufferGetPage(metabuf));
435 
436 	/*
437 	 * Reject attempt to read the bit for a metapage or bitmap page; this is
438 	 * only meaningful for overflow pages.
439 	 */
440 	if (ovflblkno == 0)
441 		ereport(ERROR,
442 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
443 				 errmsg("invalid overflow block number %u",
444 						(BlockNumber) ovflblkno)));
445 	for (i = 0; i < metap->hashm_nmaps; i++)
446 		if (metap->hashm_mapp[i] == ovflblkno)
447 			ereport(ERROR,
448 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
449 					 errmsg("invalid overflow block number %u",
450 							(BlockNumber) ovflblkno)));
451 
452 	/*
453 	 * Identify overflow bit number.  This will error out for primary bucket
454 	 * pages, and we've already rejected the metapage and bitmap pages above.
455 	 */
456 	ovflbitno = _hash_ovflblkno_to_bitno(metap, (BlockNumber) ovflblkno);
457 
458 	bitmappage = ovflbitno >> BMPG_SHIFT(metap);
459 	bitmapbit = ovflbitno & BMPG_MASK(metap);
460 
461 	if (bitmappage >= metap->hashm_nmaps)
462 		ereport(ERROR,
463 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
464 				 errmsg("invalid overflow block number %u",
465 						(BlockNumber) ovflblkno)));
466 
467 	bitmapblkno = metap->hashm_mapp[bitmappage];
468 
469 	_hash_relbuf(indexRel, metabuf);
470 
471 	/* Check the status of bitmap bit for overflow page */
472 	mapbuf = _hash_getbuf(indexRel, bitmapblkno, HASH_READ, LH_BITMAP_PAGE);
473 	mappage = BufferGetPage(mapbuf);
474 	freep = HashPageGetBitmap(mappage);
475 
476 	bit = ISSET(freep, bitmapbit) != 0;
477 
478 	_hash_relbuf(indexRel, mapbuf);
479 	index_close(indexRel, AccessShareLock);
480 
481 	/* Build a tuple descriptor for our result type */
482 	if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
483 		elog(ERROR, "return type must be a row type");
484 	tupleDesc = BlessTupleDesc(tupleDesc);
485 
486 	MemSet(nulls, 0, sizeof(nulls));
487 
488 	j = 0;
489 	values[j++] = Int64GetDatum((int64) bitmapblkno);
490 	values[j++] = Int32GetDatum(bitmapbit);
491 	values[j++] = BoolGetDatum(bit);
492 
493 	tuple = heap_form_tuple(tupleDesc, values, nulls);
494 
495 	PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
496 }
497 
498 /* ------------------------------------------------
499  * hash_metapage_info()
500  *
501  * Get the meta-page information for a hash index
502  *
503  * Usage: SELECT * FROM hash_metapage_info(get_raw_page('con_hash_index', 0))
504  * ------------------------------------------------
505  */
506 Datum
hash_metapage_info(PG_FUNCTION_ARGS)507 hash_metapage_info(PG_FUNCTION_ARGS)
508 {
509 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
510 	Page		page;
511 	HashMetaPageData *metad;
512 	TupleDesc	tupleDesc;
513 	HeapTuple	tuple;
514 	int			i,
515 				j;
516 	Datum		values[16];
517 	bool		nulls[16];
518 	Datum		spares[HASH_MAX_SPLITPOINTS];
519 	Datum		mapp[HASH_MAX_BITMAPS];
520 
521 	if (!superuser())
522 		ereport(ERROR,
523 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
524 				 (errmsg("must be superuser to use raw page functions"))));
525 
526 	page = verify_hash_page(raw_page, LH_META_PAGE);
527 
528 	/* Build a tuple descriptor for our result type */
529 	if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
530 		elog(ERROR, "return type must be a row type");
531 	tupleDesc = BlessTupleDesc(tupleDesc);
532 
533 	metad = HashPageGetMeta(page);
534 
535 	MemSet(nulls, 0, sizeof(nulls));
536 
537 	j = 0;
538 	values[j++] = Int64GetDatum((int64) metad->hashm_magic);
539 	values[j++] = Int64GetDatum((int64) metad->hashm_version);
540 	values[j++] = Float8GetDatum(metad->hashm_ntuples);
541 	values[j++] = Int32GetDatum((int32) metad->hashm_ffactor);
542 	values[j++] = Int32GetDatum((int32) metad->hashm_bsize);
543 	values[j++] = Int32GetDatum((int32) metad->hashm_bmsize);
544 	values[j++] = Int32GetDatum((int32) metad->hashm_bmshift);
545 	values[j++] = Int64GetDatum((int64) metad->hashm_maxbucket);
546 	values[j++] = Int64GetDatum((int64) metad->hashm_highmask);
547 	values[j++] = Int64GetDatum((int64) metad->hashm_lowmask);
548 	values[j++] = Int64GetDatum((int64) metad->hashm_ovflpoint);
549 	values[j++] = Int64GetDatum((int64) metad->hashm_firstfree);
550 	values[j++] = Int64GetDatum((int64) metad->hashm_nmaps);
551 	values[j++] = ObjectIdGetDatum((Oid) metad->hashm_procid);
552 
553 	for (i = 0; i < HASH_MAX_SPLITPOINTS; i++)
554 		spares[i] = Int64GetDatum((int64) metad->hashm_spares[i]);
555 	values[j++] = PointerGetDatum(construct_array(spares,
556 												  HASH_MAX_SPLITPOINTS,
557 												  INT8OID,
558 												  8, FLOAT8PASSBYVAL, 'd'));
559 
560 	for (i = 0; i < HASH_MAX_BITMAPS; i++)
561 		mapp[i] = Int64GetDatum((int64) metad->hashm_mapp[i]);
562 	values[j++] = PointerGetDatum(construct_array(mapp,
563 												  HASH_MAX_BITMAPS,
564 												  INT8OID,
565 												  8, FLOAT8PASSBYVAL, 'd'));
566 
567 	tuple = heap_form_tuple(tupleDesc, values, nulls);
568 
569 	PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
570 }
571