1 /*
2 * contrib/pageinspect/btreefuncs.c
3 *
4 *
5 * btreefuncs.c
6 *
7 * Copyright (c) 2006 Satoshi Nagayasu <nagayasus@nttdata.co.jp>
8 *
9 * Permission to use, copy, modify, and distribute this software and
10 * its documentation for any purpose, without fee, and without a
11 * written agreement is hereby granted, provided that the above
12 * copyright notice and this paragraph and the following two
13 * paragraphs appear in all copies.
14 *
15 * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT,
16 * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING
17 * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS
18 * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED
19 * OF THE POSSIBILITY OF SUCH DAMAGE.
20 *
21 * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS
24 * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE,
25 * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS.
26 */
27
28 #include "postgres.h"
29
30 #include "access/nbtree.h"
31 #include "catalog/namespace.h"
32 #include "catalog/pg_am.h"
33 #include "funcapi.h"
34 #include "miscadmin.h"
35 #include "utils/builtins.h"
36 #include "utils/rel.h"
37
38
39 PG_FUNCTION_INFO_V1(bt_metap);
40 PG_FUNCTION_INFO_V1(bt_page_items);
41 PG_FUNCTION_INFO_V1(bt_page_stats);
42
43 #define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX)
44 #define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID)
45
46 /* note: BlockNumber is unsigned, hence can't be negative */
47 #define CHECK_RELATION_BLOCK_RANGE(rel, blkno) { \
48 if ( RelationGetNumberOfBlocks(rel) <= (BlockNumber) (blkno) ) \
49 elog(ERROR, "block number out of range"); }
50
51 /* ------------------------------------------------
52 * structure for single btree page statistics
53 * ------------------------------------------------
54 */
55 typedef struct BTPageStat
56 {
57 uint32 blkno;
58 uint32 live_items;
59 uint32 dead_items;
60 uint32 page_size;
61 uint32 max_avail;
62 uint32 free_size;
63 uint32 avg_item_size;
64 char type;
65
66 /* opaque data */
67 BlockNumber btpo_prev;
68 BlockNumber btpo_next;
69 union
70 {
71 uint32 level;
72 TransactionId xact;
73 } btpo;
74 uint16 btpo_flags;
75 BTCycleId btpo_cycleid;
76 } BTPageStat;
77
78
79 /* -------------------------------------------------
80 * GetBTPageStatistics()
81 *
82 * Collect statistics of single b-tree page
83 * -------------------------------------------------
84 */
85 static void
GetBTPageStatistics(BlockNumber blkno,Buffer buffer,BTPageStat * stat)86 GetBTPageStatistics(BlockNumber blkno, Buffer buffer, BTPageStat *stat)
87 {
88 Page page = BufferGetPage(buffer);
89 PageHeader phdr = (PageHeader) page;
90 OffsetNumber maxoff = PageGetMaxOffsetNumber(page);
91 BTPageOpaque opaque = (BTPageOpaque) PageGetSpecialPointer(page);
92 int item_size = 0;
93 int off;
94
95 stat->blkno = blkno;
96
97 stat->max_avail = BLCKSZ - (BLCKSZ - phdr->pd_special + SizeOfPageHeaderData);
98
99 stat->dead_items = stat->live_items = 0;
100
101 stat->page_size = PageGetPageSize(page);
102
103 /* page type (flags) */
104 if (P_ISDELETED(opaque))
105 {
106 stat->type = 'd';
107 stat->btpo.xact = opaque->btpo.xact;
108 return;
109 }
110 else if (P_IGNORE(opaque))
111 stat->type = 'e';
112 else if (P_ISLEAF(opaque))
113 stat->type = 'l';
114 else if (P_ISROOT(opaque))
115 stat->type = 'r';
116 else
117 stat->type = 'i';
118
119 /* btpage opaque data */
120 stat->btpo_prev = opaque->btpo_prev;
121 stat->btpo_next = opaque->btpo_next;
122 stat->btpo.level = opaque->btpo.level;
123 stat->btpo_flags = opaque->btpo_flags;
124 stat->btpo_cycleid = opaque->btpo_cycleid;
125
126 /* count live and dead tuples, and free space */
127 for (off = FirstOffsetNumber; off <= maxoff; off++)
128 {
129 IndexTuple itup;
130
131 ItemId id = PageGetItemId(page, off);
132
133 itup = (IndexTuple) PageGetItem(page, id);
134
135 item_size += IndexTupleSize(itup);
136
137 if (!ItemIdIsDead(id))
138 stat->live_items++;
139 else
140 stat->dead_items++;
141 }
142 stat->free_size = PageGetFreeSpace(page);
143
144 if ((stat->live_items + stat->dead_items) > 0)
145 stat->avg_item_size = item_size / (stat->live_items + stat->dead_items);
146 else
147 stat->avg_item_size = 0;
148 }
149
150 /* -----------------------------------------------
151 * bt_page_stats()
152 *
153 * Usage: SELECT * FROM bt_page_stats('t1_pkey', 1);
154 * -----------------------------------------------
155 */
156 Datum
bt_page_stats(PG_FUNCTION_ARGS)157 bt_page_stats(PG_FUNCTION_ARGS)
158 {
159 text *relname = PG_GETARG_TEXT_P(0);
160 uint32 blkno = PG_GETARG_UINT32(1);
161 Buffer buffer;
162 Relation rel;
163 RangeVar *relrv;
164 Datum result;
165 HeapTuple tuple;
166 TupleDesc tupleDesc;
167 int j;
168 char *values[11];
169 BTPageStat stat;
170
171 if (!superuser())
172 ereport(ERROR,
173 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
174 (errmsg("must be superuser to use pageinspect functions"))));
175
176 relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
177 rel = relation_openrv(relrv, AccessShareLock);
178
179 if (!IS_INDEX(rel) || !IS_BTREE(rel))
180 elog(ERROR, "relation \"%s\" is not a btree index",
181 RelationGetRelationName(rel));
182
183 /*
184 * Reject attempts to read non-local temporary relations; we would be
185 * likely to get wrong data since we have no visibility into the owning
186 * session's local buffers.
187 */
188 if (RELATION_IS_OTHER_TEMP(rel))
189 ereport(ERROR,
190 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
191 errmsg("cannot access temporary tables of other sessions")));
192
193 if (blkno == 0)
194 elog(ERROR, "block 0 is a meta page");
195
196 CHECK_RELATION_BLOCK_RANGE(rel, blkno);
197
198 buffer = ReadBuffer(rel, blkno);
199 LockBuffer(buffer, BUFFER_LOCK_SHARE);
200
201 /* keep compiler quiet */
202 stat.btpo_prev = stat.btpo_next = InvalidBlockNumber;
203 stat.btpo_flags = stat.free_size = stat.avg_item_size = 0;
204
205 GetBTPageStatistics(blkno, buffer, &stat);
206
207 UnlockReleaseBuffer(buffer);
208 relation_close(rel, AccessShareLock);
209
210 /* Build a tuple descriptor for our result type */
211 if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
212 elog(ERROR, "return type must be a row type");
213
214 j = 0;
215 values[j++] = psprintf("%d", stat.blkno);
216 values[j++] = psprintf("%c", stat.type);
217 values[j++] = psprintf("%d", stat.live_items);
218 values[j++] = psprintf("%d", stat.dead_items);
219 values[j++] = psprintf("%d", stat.avg_item_size);
220 values[j++] = psprintf("%d", stat.page_size);
221 values[j++] = psprintf("%d", stat.free_size);
222 values[j++] = psprintf("%d", stat.btpo_prev);
223 values[j++] = psprintf("%d", stat.btpo_next);
224 values[j++] = psprintf("%d", (stat.type == 'd') ? stat.btpo.xact : stat.btpo.level);
225 values[j++] = psprintf("%d", stat.btpo_flags);
226
227 tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
228 values);
229
230 result = HeapTupleGetDatum(tuple);
231
232 PG_RETURN_DATUM(result);
233 }
234
235 /*-------------------------------------------------------
236 * bt_page_items()
237 *
238 * Get IndexTupleData set in a btree page
239 *
240 * Usage: SELECT * FROM bt_page_items('t1_pkey', 1);
241 *-------------------------------------------------------
242 */
243
244 /*
245 * cross-call data structure for SRF
246 */
247 struct user_args
248 {
249 Page page;
250 OffsetNumber offset;
251 };
252
253 Datum
bt_page_items(PG_FUNCTION_ARGS)254 bt_page_items(PG_FUNCTION_ARGS)
255 {
256 text *relname = PG_GETARG_TEXT_P(0);
257 uint32 blkno = PG_GETARG_UINT32(1);
258 Datum result;
259 char *values[6];
260 HeapTuple tuple;
261 FuncCallContext *fctx;
262 MemoryContext mctx;
263 struct user_args *uargs;
264
265 if (!superuser())
266 ereport(ERROR,
267 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
268 (errmsg("must be superuser to use pageinspect functions"))));
269
270 if (SRF_IS_FIRSTCALL())
271 {
272 RangeVar *relrv;
273 Relation rel;
274 Buffer buffer;
275 BTPageOpaque opaque;
276 TupleDesc tupleDesc;
277
278 fctx = SRF_FIRSTCALL_INIT();
279
280 relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
281 rel = relation_openrv(relrv, AccessShareLock);
282
283 if (!IS_INDEX(rel) || !IS_BTREE(rel))
284 elog(ERROR, "relation \"%s\" is not a btree index",
285 RelationGetRelationName(rel));
286
287 /*
288 * Reject attempts to read non-local temporary relations; we would be
289 * likely to get wrong data since we have no visibility into the
290 * owning session's local buffers.
291 */
292 if (RELATION_IS_OTHER_TEMP(rel))
293 ereport(ERROR,
294 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
295 errmsg("cannot access temporary tables of other sessions")));
296
297 if (blkno == 0)
298 elog(ERROR, "block 0 is a meta page");
299
300 CHECK_RELATION_BLOCK_RANGE(rel, blkno);
301
302 buffer = ReadBuffer(rel, blkno);
303 LockBuffer(buffer, BUFFER_LOCK_SHARE);
304
305 /*
306 * We copy the page into local storage to avoid holding pin on the
307 * buffer longer than we must, and possibly failing to release it at
308 * all if the calling query doesn't fetch all rows.
309 */
310 mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
311
312 uargs = palloc(sizeof(struct user_args));
313
314 uargs->page = palloc(BLCKSZ);
315 memcpy(uargs->page, BufferGetPage(buffer), BLCKSZ);
316
317 UnlockReleaseBuffer(buffer);
318 relation_close(rel, AccessShareLock);
319
320 uargs->offset = FirstOffsetNumber;
321
322 opaque = (BTPageOpaque) PageGetSpecialPointer(uargs->page);
323
324 if (P_ISDELETED(opaque))
325 elog(NOTICE, "page is deleted");
326
327 fctx->max_calls = PageGetMaxOffsetNumber(uargs->page);
328
329 /* Build a tuple descriptor for our result type */
330 if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
331 elog(ERROR, "return type must be a row type");
332
333 fctx->attinmeta = TupleDescGetAttInMetadata(tupleDesc);
334
335 fctx->user_fctx = uargs;
336
337 MemoryContextSwitchTo(mctx);
338 }
339
340 fctx = SRF_PERCALL_SETUP();
341 uargs = fctx->user_fctx;
342
343 if (fctx->call_cntr < fctx->max_calls)
344 {
345 ItemId id;
346 IndexTuple itup;
347 int j;
348 int off;
349 int dlen;
350 char *dump;
351 char *ptr;
352
353 id = PageGetItemId(uargs->page, uargs->offset);
354
355 if (!ItemIdIsValid(id))
356 elog(ERROR, "invalid ItemId");
357
358 itup = (IndexTuple) PageGetItem(uargs->page, id);
359
360 j = 0;
361 values[j++] = psprintf("%d", uargs->offset);
362 values[j++] = psprintf("(%u,%u)",
363 BlockIdGetBlockNumber(&(itup->t_tid.ip_blkid)),
364 itup->t_tid.ip_posid);
365 values[j++] = psprintf("%d", (int) IndexTupleSize(itup));
366 values[j++] = psprintf("%c", IndexTupleHasNulls(itup) ? 't' : 'f');
367 values[j++] = psprintf("%c", IndexTupleHasVarwidths(itup) ? 't' : 'f');
368
369 ptr = (char *) itup + IndexInfoFindDataOffset(itup->t_info);
370 dlen = IndexTupleSize(itup) - IndexInfoFindDataOffset(itup->t_info);
371 dump = palloc0(dlen * 3 + 1);
372 values[j] = dump;
373 for (off = 0; off < dlen; off++)
374 {
375 if (off > 0)
376 *dump++ = ' ';
377 sprintf(dump, "%02x", *(ptr + off) & 0xff);
378 dump += 2;
379 }
380
381 tuple = BuildTupleFromCStrings(fctx->attinmeta, values);
382 result = HeapTupleGetDatum(tuple);
383
384 uargs->offset = uargs->offset + 1;
385
386 SRF_RETURN_NEXT(fctx, result);
387 }
388 else
389 {
390 pfree(uargs->page);
391 pfree(uargs);
392 SRF_RETURN_DONE(fctx);
393 }
394 }
395
396
397 /* ------------------------------------------------
398 * bt_metap()
399 *
400 * Get a btree's meta-page information
401 *
402 * Usage: SELECT * FROM bt_metap('t1_pkey')
403 * ------------------------------------------------
404 */
405 Datum
bt_metap(PG_FUNCTION_ARGS)406 bt_metap(PG_FUNCTION_ARGS)
407 {
408 text *relname = PG_GETARG_TEXT_P(0);
409 Datum result;
410 Relation rel;
411 RangeVar *relrv;
412 BTMetaPageData *metad;
413 TupleDesc tupleDesc;
414 int j;
415 char *values[6];
416 Buffer buffer;
417 Page page;
418 HeapTuple tuple;
419
420 if (!superuser())
421 ereport(ERROR,
422 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
423 (errmsg("must be superuser to use pageinspect functions"))));
424
425 relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname));
426 rel = relation_openrv(relrv, AccessShareLock);
427
428 if (!IS_INDEX(rel) || !IS_BTREE(rel))
429 elog(ERROR, "relation \"%s\" is not a btree index",
430 RelationGetRelationName(rel));
431
432 /*
433 * Reject attempts to read non-local temporary relations; we would be
434 * likely to get wrong data since we have no visibility into the owning
435 * session's local buffers.
436 */
437 if (RELATION_IS_OTHER_TEMP(rel))
438 ereport(ERROR,
439 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
440 errmsg("cannot access temporary tables of other sessions")));
441
442 buffer = ReadBuffer(rel, 0);
443 LockBuffer(buffer, BUFFER_LOCK_SHARE);
444
445 page = BufferGetPage(buffer);
446 metad = BTPageGetMeta(page);
447
448 /* Build a tuple descriptor for our result type */
449 if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE)
450 elog(ERROR, "return type must be a row type");
451
452 j = 0;
453 values[j++] = psprintf("%d", metad->btm_magic);
454 values[j++] = psprintf("%d", metad->btm_version);
455 values[j++] = psprintf("%d", metad->btm_root);
456 values[j++] = psprintf("%d", metad->btm_level);
457 values[j++] = psprintf("%d", metad->btm_fastroot);
458 values[j++] = psprintf("%d", metad->btm_fastlevel);
459
460 tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc),
461 values);
462
463 result = HeapTupleGetDatum(tuple);
464
465 UnlockReleaseBuffer(buffer);
466 relation_close(rel, AccessShareLock);
467
468 PG_RETURN_DATUM(result);
469 }
470