1 /*
2  * contrib/pageinspect/btreefuncs.c
3  *
4  *
5  * btreefuncs.c
6  *
7  * Copyright (c) 2006 Satoshi Nagayasu <nagayasus@nttdata.co.jp>
8  *
9  * Permission to use, copy, modify, and distribute this software and
10  * its documentation for any purpose, without fee, and without a
11  * written agreement is hereby granted, provided that the above
12  * copyright notice and this paragraph and the following two
13  * paragraphs appear in all copies.
14  *
15  * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
16  * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
17  * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
18  * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
19  * OF THE POSSIBILITY OF SUCH DAMAGE.
20  *
21  * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE.  THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
24  * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
25  * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
26  */
27 
28 #include "postgres.h"
29 
30 #include "access/nbtree.h"
31 #include "catalog/namespace.h"
32 #include "catalog/pg_am.h"
33 #include "funcapi.h"
34 #include "miscadmin.h"
35 #include "utils/builtins.h"
36 #include "utils/rel.h"
37 
38 
39 PG_FUNCTION_INFO_V1(bt_metap);
40 PG_FUNCTION_INFO_V1(bt_page_items);
41 PG_FUNCTION_INFO_V1(bt_page_stats);
42 
43 #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
44 #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
45 
46 /* note: BlockNumber is unsigned, hence can't be negative */
47 #define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \
48 		if ( RelationGetNumberOfBlocks(rel) <= (BlockNumber) (blkno) ) \
49 			 elog(ERROR, "block number out of range"); }
50 
51 /* ------------------------------------------------
52  * structure for single btree page statistics
53  * ------------------------------------------------
54  */
55 typedef struct BTPageStat
56 {
57 	uint32		blkno;
58 	uint32		live_items;
59 	uint32		dead_items;
60 	uint32		page_size;
61 	uint32		max_avail;
62 	uint32		free_size;
63 	uint32		avg_item_size;
64 	char		type;
65 
66 	/* opaque data */
67 	BlockNumber btpo_prev;
68 	BlockNumber btpo_next;
69 	union
70 	{
71 		uint32		level;
72 		TransactionId xact;
73 	}			btpo;
74 	uint16		btpo_flags;
75 	BTCycleId	btpo_cycleid;
76 } BTPageStat;
77 
78 
79 /* -------------------------------------------------
80  * GetBTPageStatistics()
81  *
82  * Collect statistics of single b-tree page
83  * -------------------------------------------------
84  */
85 static void
GetBTPageStatistics(BlockNumber blkno,Buffer buffer,BTPageStat * stat)86 GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
87 {
88 	Page		page = BufferGetPage(buffer);
89 	PageHeader	phdr = (PageHeader) page;
90 	OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
91 	BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
92 	int			item_size = 0;
93 	int			off;
94 
95 	stat->blkno = blkno;
96 
97 	stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData);
98 
99 	stat->dead_items = stat->live_items = 0;
100 
101 	stat->page_size = PageGetPageSize(page);
102 
103 	/* page type (flags) */
104 	if (P_ISDELETED(opaque))
105 	{
106 		stat->type = 'd';
107 		stat->btpo.xact = opaque->btpo.xact;
108 		return;
109 	}
110 	else if (P_IGNORE(opaque))
111 		stat->type = 'e';
112 	else if (P_ISLEAF(opaque))
113 		stat->type = 'l';
114 	else if (P_ISROOT(opaque))
115 		stat->type = 'r';
116 	else
117 		stat->type = 'i';
118 
119 	/* btpage opaque data */
120 	stat->btpo_prev = opaque->btpo_prev;
121 	stat->btpo_next = opaque->btpo_next;
122 	stat->btpo.level = opaque->btpo.level;
123 	stat->btpo_flags = opaque->btpo_flags;
124 	stat->btpo_cycleid = opaque->btpo_cycleid;
125 
126 	/* count live and dead tuples, and free space */
127 	for (off = FirstOffsetNumber; off <= maxoff; off++)
128 	{
129 		IndexTuple	itup;
130 
131 		ItemId		id = PageGetItemId(page, off);
132 
133 		itup = (IndexTuple) PageGetItem(page, id);
134 
135 		item_size += IndexTupleSize(itup);
136 
137 		if (!ItemIdIsDead(id))
138 			stat->live_items++;
139 		else
140 			stat->dead_items++;
141 	}
142 	stat->free_size = PageGetFreeSpace(page);
143 
144 	if ((stat->live_items + stat->dead_items) > 0)
145 		stat->avg_item_size = item_size / (stat->live_items + stat->dead_items);
146 	else
147 		stat->avg_item_size = 0;
148 }
149 
150 /* -----------------------------------------------
151  * bt_page_stats()
152  *
153  * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1);
154  * -----------------------------------------------
155  */
156 Datum
bt_page_stats(PG_FUNCTION_ARGS)157 bt_page_stats(PG_FUNCTION_ARGS)
158 {
159 	text	   *relname = PG_GETARG_TEXT_P(0);
160 	uint32		blkno = PG_GETARG_UINT32(1);
161 	Buffer		buffer;
162 	Relation	rel;
163 	RangeVar   *relrv;
164 	Datum		result;
165 	HeapTuple	tuple;
166 	TupleDesc	tupleDesc;
167 	int			j;
168 	char	   *values[11];
169 	BTPageStat	stat;
170 
171 	if (!superuser())
172 		ereport(ERROR,
173 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
174 				 (errmsg("must be superuser to use pageinspect functions"))));
175 
176 	relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
177 	rel = relation_openrv(relrv, AccessShareLock);
178 
179 	if (!IS_INDEX(rel) || !IS_BTREE(rel))
180 		elog(ERROR, "relation \"%s\" is not a btree index",
181 			 RelationGetRelationName(rel));
182 
183 	/*
184 	 * Reject attempts to read non-local temporary relations; we would be
185 	 * likely to get wrong data since we have no visibility into the owning
186 	 * session's local buffers.
187 	 */
188 	if (RELATION_IS_OTHER_TEMP(rel))
189 		ereport(ERROR,
190 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
191 				 errmsg("cannot access temporary tables of other sessions")));
192 
193 	if (blkno == 0)
194 		elog(ERROR, "block 0 is a meta page");
195 
196 	CHECK_RELATION_BLOCK_RANGE(rel, blkno);
197 
198 	buffer = ReadBuffer(rel, blkno);
199 	LockBuffer(buffer, BUFFER_LOCK_SHARE);
200 
201 	/* keep compiler quiet */
202 	stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
203 	stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
204 
205 	GetBTPageStatistics(blkno, buffer, &stat);
206 
207 	UnlockReleaseBuffer(buffer);
208 	relation_close(rel, AccessShareLock);
209 
210 	/* Build a tuple descriptor for our result type */
211 	if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
212 		elog(ERROR, "return type must be a row type");
213 
214 	j = 0;
215 	values[j++] = psprintf("%d", stat.blkno);
216 	values[j++] = psprintf("%c", stat.type);
217 	values[j++] = psprintf("%d", stat.live_items);
218 	values[j++] = psprintf("%d", stat.dead_items);
219 	values[j++] = psprintf("%d", stat.avg_item_size);
220 	values[j++] = psprintf("%d", stat.page_size);
221 	values[j++] = psprintf("%d", stat.free_size);
222 	values[j++] = psprintf("%d", stat.btpo_prev);
223 	values[j++] = psprintf("%d", stat.btpo_next);
224 	values[j++] = psprintf("%d", (stat.type == 'd') ? stat.btpo.xact : stat.btpo.level);
225 	values[j++] = psprintf("%d", stat.btpo_flags);
226 
227 	tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
228 								   values);
229 
230 	result = HeapTupleGetDatum(tuple);
231 
232 	PG_RETURN_DATUM(result);
233 }
234 
235 /*-------------------------------------------------------
236  * bt_page_items()
237  *
238  * Get IndexTupleData set in a btree page
239  *
240  * Usage: SELECT * FROM bt_page_items('t1_pkey', 1);
241  *-------------------------------------------------------
242  */
243 
244 /*
245  * cross-call data structure for SRF
246  */
247 struct user_args
248 {
249 	Page		page;
250 	OffsetNumber offset;
251 };
252 
253 Datum
bt_page_items(PG_FUNCTION_ARGS)254 bt_page_items(PG_FUNCTION_ARGS)
255 {
256 	text	   *relname = PG_GETARG_TEXT_P(0);
257 	uint32		blkno = PG_GETARG_UINT32(1);
258 	Datum		result;
259 	char	   *values[6];
260 	HeapTuple	tuple;
261 	FuncCallContext *fctx;
262 	MemoryContext mctx;
263 	struct user_args *uargs;
264 
265 	if (!superuser())
266 		ereport(ERROR,
267 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
268 				 (errmsg("must be superuser to use pageinspect functions"))));
269 
270 	if (SRF_IS_FIRSTCALL())
271 	{
272 		RangeVar   *relrv;
273 		Relation	rel;
274 		Buffer		buffer;
275 		BTPageOpaque opaque;
276 		TupleDesc	tupleDesc;
277 
278 		fctx = SRF_FIRSTCALL_INIT();
279 
280 		relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
281 		rel = relation_openrv(relrv, AccessShareLock);
282 
283 		if (!IS_INDEX(rel) || !IS_BTREE(rel))
284 			elog(ERROR, "relation \"%s\" is not a btree index",
285 				 RelationGetRelationName(rel));
286 
287 		/*
288 		 * Reject attempts to read non-local temporary relations; we would be
289 		 * likely to get wrong data since we have no visibility into the
290 		 * owning session's local buffers.
291 		 */
292 		if (RELATION_IS_OTHER_TEMP(rel))
293 			ereport(ERROR,
294 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
295 				errmsg("cannot access temporary tables of other sessions")));
296 
297 		if (blkno == 0)
298 			elog(ERROR, "block 0 is a meta page");
299 
300 		CHECK_RELATION_BLOCK_RANGE(rel, blkno);
301 
302 		buffer = ReadBuffer(rel, blkno);
303 		LockBuffer(buffer, BUFFER_LOCK_SHARE);
304 
305 		/*
306 		 * We copy the page into local storage to avoid holding pin on the
307 		 * buffer longer than we must, and possibly failing to release it at
308 		 * all if the calling query doesn't fetch all rows.
309 		 */
310 		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
311 
312 		uargs = palloc(sizeof(struct user_args));
313 
314 		uargs->page = palloc(BLCKSZ);
315 		memcpy(uargs->page, BufferGetPage(buffer), BLCKSZ);
316 
317 		UnlockReleaseBuffer(buffer);
318 		relation_close(rel, AccessShareLock);
319 
320 		uargs->offset = FirstOffsetNumber;
321 
322 		opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page);
323 
324 		if (P_ISDELETED(opaque))
325 			elog(NOTICE, "page is deleted");
326 
327 		fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
328 
329 		/* Build a tuple descriptor for our result type */
330 		if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
331 			elog(ERROR, "return type must be a row type");
332 
333 		fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
334 
335 		fctx->user_fctx = uargs;
336 
337 		MemoryContextSwitchTo(mctx);
338 	}
339 
340 	fctx = SRF_PERCALL_SETUP();
341 	uargs = fctx->user_fctx;
342 
343 	if (fctx->call_cntr < fctx->max_calls)
344 	{
345 		ItemId		id;
346 		IndexTuple	itup;
347 		int			j;
348 		int			off;
349 		int			dlen;
350 		char	   *dump;
351 		char	   *ptr;
352 
353 		id = PageGetItemId(uargs->page, uargs->offset);
354 
355 		if (!ItemIdIsValid(id))
356 			elog(ERROR, "invalid ItemId");
357 
358 		itup = (IndexTuple) PageGetItem(uargs->page, id);
359 
360 		j = 0;
361 		values[j++] = psprintf("%d", uargs->offset);
362 		values[j++] = psprintf("(%u,%u)",
363 							   BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)),
364 							   itup->t_tid.ip_posid);
365 		values[j++] = psprintf("%d", (int) IndexTupleSize(itup));
366 		values[j++] = psprintf("%c", IndexTupleHasNulls(itup) ? 't' : 'f');
367 		values[j++] = psprintf("%c", IndexTupleHasVarwidths(itup) ? 't' : 'f');
368 
369 		ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
370 		dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
371 		dump = palloc0(dlen * 3 + 1);
372 		values[j] = dump;
373 		for (off = 0; off < dlen; off++)
374 		{
375 			if (off > 0)
376 				*dump++ = ' ';
377 			sprintf(dump, "%02x", *(ptr + off) & 0xff);
378 			dump += 2;
379 		}
380 
381 		tuple = BuildTupleFromCStrings(fctx->attinmeta, values);
382 		result = HeapTupleGetDatum(tuple);
383 
384 		uargs->offset = uargs->offset + 1;
385 
386 		SRF_RETURN_NEXT(fctx, result);
387 	}
388 	else
389 	{
390 		pfree(uargs->page);
391 		pfree(uargs);
392 		SRF_RETURN_DONE(fctx);
393 	}
394 }
395 
396 
397 /* ------------------------------------------------
398  * bt_metap()
399  *
400  * Get a btree's meta-page information
401  *
402  * Usage: SELECT * FROM bt_metap('t1_pkey')
403  * ------------------------------------------------
404  */
405 Datum
bt_metap(PG_FUNCTION_ARGS)406 bt_metap(PG_FUNCTION_ARGS)
407 {
408 	text	   *relname = PG_GETARG_TEXT_P(0);
409 	Datum		result;
410 	Relation	rel;
411 	RangeVar   *relrv;
412 	BTMetaPageData *metad;
413 	TupleDesc	tupleDesc;
414 	int			j;
415 	char	   *values[6];
416 	Buffer		buffer;
417 	Page		page;
418 	HeapTuple	tuple;
419 
420 	if (!superuser())
421 		ereport(ERROR,
422 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
423 				 (errmsg("must be superuser to use pageinspect functions"))));
424 
425 	relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
426 	rel = relation_openrv(relrv, AccessShareLock);
427 
428 	if (!IS_INDEX(rel) || !IS_BTREE(rel))
429 		elog(ERROR, "relation \"%s\" is not a btree index",
430 			 RelationGetRelationName(rel));
431 
432 	/*
433 	 * Reject attempts to read non-local temporary relations; we would be
434 	 * likely to get wrong data since we have no visibility into the owning
435 	 * session's local buffers.
436 	 */
437 	if (RELATION_IS_OTHER_TEMP(rel))
438 		ereport(ERROR,
439 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
440 				 errmsg("cannot access temporary tables of other sessions")));
441 
442 	buffer = ReadBuffer(rel, 0);
443 	LockBuffer(buffer, BUFFER_LOCK_SHARE);
444 
445 	page = BufferGetPage(buffer);
446 	metad = BTPageGetMeta(page);
447 
448 	/* Build a tuple descriptor for our result type */
449 	if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
450 		elog(ERROR, "return type must be a row type");
451 
452 	j = 0;
453 	values[j++] = psprintf("%d", metad->btm_magic);
454 	values[j++] = psprintf("%d", metad->btm_version);
455 	values[j++] = psprintf("%d", metad->btm_root);
456 	values[j++] = psprintf("%d", metad->btm_level);
457 	values[j++] = psprintf("%d", metad->btm_fastroot);
458 	values[j++] = psprintf("%d", metad->btm_fastlevel);
459 
460 	tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
461 								   values);
462 
463 	result = HeapTupleGetDatum(tuple);
464 
465 	UnlockReleaseBuffer(buffer);
466 	relation_close(rel, AccessShareLock);
467 
468 	PG_RETURN_DATUM(result);
469 }
470