1 /*
2 * brinfuncs.c
3 * Functions to investigate BRIN indexes
4 *
5 * Copyright (c) 2014-2019, PostgreSQL Global Development Group
6 *
7 * IDENTIFICATION
8 * contrib/pageinspect/brinfuncs.c
9 */
10 #include "postgres.h"
11
12 #include "pageinspect.h"
13
14 #include "access/htup_details.h"
15 #include "access/brin.h"
16 #include "access/brin_internal.h"
17 #include "access/brin_page.h"
18 #include "access/brin_revmap.h"
19 #include "access/brin_tuple.h"
20 #include "catalog/index.h"
21 #include "catalog/pg_type.h"
22 #include "funcapi.h"
23 #include "lib/stringinfo.h"
24 #include "utils/array.h"
25 #include "utils/builtins.h"
26 #include "utils/lsyscache.h"
27 #include "utils/rel.h"
28 #include "miscadmin.h"
29
30
31 PG_FUNCTION_INFO_V1(brin_page_type);
32 PG_FUNCTION_INFO_V1(brin_page_items);
33 PG_FUNCTION_INFO_V1(brin_metapage_info);
34 PG_FUNCTION_INFO_V1(brin_revmap_data);
35
36 typedef struct brin_column_state
37 {
38 int nstored;
39 FmgrInfo outputFn[FLEXIBLE_ARRAY_MEMBER];
40 } brin_column_state;
41
42
43 static Page verify_brin_page(bytea *raw_page, uint16 type,
44 const char *strtype);
45
46 Datum
brin_page_type(PG_FUNCTION_ARGS)47 brin_page_type(PG_FUNCTION_ARGS)
48 {
49 bytea *raw_page = PG_GETARG_BYTEA_P(0);
50 Page page = VARDATA(raw_page);
51 int raw_page_size;
52 char *type;
53
54 if (!superuser())
55 ereport(ERROR,
56 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
57 (errmsg("must be superuser to use raw page functions"))));
58
59 raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
60
61 if (raw_page_size != BLCKSZ)
62 ereport(ERROR,
63 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
64 errmsg("input page too small"),
65 errdetail("Expected size %d, got %d",
66 BLCKSZ, raw_page_size)));
67
68 switch (BrinPageType(page))
69 {
70 case BRIN_PAGETYPE_META:
71 type = "meta";
72 break;
73 case BRIN_PAGETYPE_REVMAP:
74 type = "revmap";
75 break;
76 case BRIN_PAGETYPE_REGULAR:
77 type = "regular";
78 break;
79 default:
80 type = psprintf("unknown (%02x)", BrinPageType(page));
81 break;
82 }
83
84 PG_RETURN_TEXT_P(cstring_to_text(type));
85 }
86
87 /*
88 * Verify that the given bytea contains a BRIN page of the indicated page
89 * type, or die in the attempt. A pointer to the page is returned.
90 */
91 static Page
verify_brin_page(bytea * raw_page,uint16 type,const char * strtype)92 verify_brin_page(bytea *raw_page, uint16 type, const char *strtype)
93 {
94 Page page;
95 int raw_page_size;
96
97 raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
98
99 if (raw_page_size != BLCKSZ)
100 ereport(ERROR,
101 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
102 errmsg("input page too small"),
103 errdetail("Expected size %d, got %d",
104 BLCKSZ, raw_page_size)));
105
106 page = VARDATA(raw_page);
107
108 /* verify the special space says this page is what we want */
109 if (BrinPageType(page) != type)
110 ereport(ERROR,
111 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
112 errmsg("page is not a BRIN page of type \"%s\"", strtype),
113 errdetail("Expected special type %08x, got %08x.",
114 type, BrinPageType(page))));
115
116 return page;
117 }
118
119
120 /*
121 * Extract all item values from a BRIN index page
122 *
123 * Usage: SELECT * FROM brin_page_items(get_raw_page('idx', 1), 'idx'::regclass);
124 */
125 Datum
brin_page_items(PG_FUNCTION_ARGS)126 brin_page_items(PG_FUNCTION_ARGS)
127 {
128 bytea *raw_page = PG_GETARG_BYTEA_P(0);
129 Oid indexRelid = PG_GETARG_OID(1);
130 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
131 TupleDesc tupdesc;
132 MemoryContext oldcontext;
133 Tuplestorestate *tupstore;
134 Relation indexRel;
135 brin_column_state **columns;
136 BrinDesc *bdesc;
137 BrinMemTuple *dtup;
138 Page page;
139 OffsetNumber offset;
140 AttrNumber attno;
141 bool unusedItem;
142
143 if (!superuser())
144 ereport(ERROR,
145 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
146 (errmsg("must be superuser to use raw page functions"))));
147
148 /* check to see if caller supports us returning a tuplestore */
149 if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
150 ereport(ERROR,
151 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
152 errmsg("set-valued function called in context that cannot accept a set")));
153 if (!(rsinfo->allowedModes & SFRM_Materialize) ||
154 rsinfo->expectedDesc == NULL)
155 ereport(ERROR,
156 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
157 errmsg("materialize mode required, but it is not allowed in this context")));
158
159 /* Build a tuple descriptor for our result type */
160 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
161 elog(ERROR, "return type must be a row type");
162
163 /* Build tuplestore to hold the result rows */
164 oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
165
166 tupstore = tuplestore_begin_heap(true, false, work_mem);
167 rsinfo->returnMode = SFRM_Materialize;
168 rsinfo->setResult = tupstore;
169 rsinfo->setDesc = tupdesc;
170
171 MemoryContextSwitchTo(oldcontext);
172
173 indexRel = index_open(indexRelid, AccessShareLock);
174 bdesc = brin_build_desc(indexRel);
175
176 /* minimally verify the page we got */
177 page = verify_brin_page(raw_page, BRIN_PAGETYPE_REGULAR, "regular");
178
179 /*
180 * Initialize output functions for all indexed datatypes; simplifies
181 * calling them later.
182 */
183 columns = palloc(sizeof(brin_column_state *) * RelationGetDescr(indexRel)->natts);
184 for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++)
185 {
186 Oid output;
187 bool isVarlena;
188 BrinOpcInfo *opcinfo;
189 int i;
190 brin_column_state *column;
191
192 opcinfo = bdesc->bd_info[attno - 1];
193 column = palloc(offsetof(brin_column_state, outputFn) +
194 sizeof(FmgrInfo) * opcinfo->oi_nstored);
195
196 column->nstored = opcinfo->oi_nstored;
197 for (i = 0; i < opcinfo->oi_nstored; i++)
198 {
199 getTypeOutputInfo(opcinfo->oi_typcache[i]->type_id, &output, &isVarlena);
200 fmgr_info(output, &column->outputFn[i]);
201 }
202
203 columns[attno - 1] = column;
204 }
205
206 offset = FirstOffsetNumber;
207 unusedItem = false;
208 dtup = NULL;
209 for (;;)
210 {
211 Datum values[7];
212 bool nulls[7];
213
214 /*
215 * This loop is called once for every attribute of every tuple in the
216 * page. At the start of a tuple, we get a NULL dtup; that's our
217 * signal for obtaining and decoding the next one. If that's not the
218 * case, we output the next attribute.
219 */
220 if (dtup == NULL)
221 {
222 ItemId itemId;
223
224 /* verify item status: if there's no data, we can't decode */
225 itemId = PageGetItemId(page, offset);
226 if (ItemIdIsUsed(itemId))
227 {
228 dtup = brin_deform_tuple(bdesc,
229 (BrinTuple *) PageGetItem(page, itemId),
230 NULL);
231 attno = 1;
232 unusedItem = false;
233 }
234 else
235 unusedItem = true;
236 }
237 else
238 attno++;
239
240 MemSet(nulls, 0, sizeof(nulls));
241
242 if (unusedItem)
243 {
244 values[0] = UInt16GetDatum(offset);
245 nulls[1] = true;
246 nulls[2] = true;
247 nulls[3] = true;
248 nulls[4] = true;
249 nulls[5] = true;
250 nulls[6] = true;
251 }
252 else
253 {
254 int att = attno - 1;
255
256 values[0] = UInt16GetDatum(offset);
257 values[1] = UInt32GetDatum(dtup->bt_blkno);
258 values[2] = UInt16GetDatum(attno);
259 values[3] = BoolGetDatum(dtup->bt_columns[att].bv_allnulls);
260 values[4] = BoolGetDatum(dtup->bt_columns[att].bv_hasnulls);
261 values[5] = BoolGetDatum(dtup->bt_placeholder);
262 if (!dtup->bt_columns[att].bv_allnulls)
263 {
264 BrinValues *bvalues = &dtup->bt_columns[att];
265 StringInfoData s;
266 bool first;
267 int i;
268
269 initStringInfo(&s);
270 appendStringInfoChar(&s, '{');
271
272 first = true;
273 for (i = 0; i < columns[att]->nstored; i++)
274 {
275 char *val;
276
277 if (!first)
278 appendStringInfoString(&s, " .. ");
279 first = false;
280 val = OutputFunctionCall(&columns[att]->outputFn[i],
281 bvalues->bv_values[i]);
282 appendStringInfoString(&s, val);
283 pfree(val);
284 }
285 appendStringInfoChar(&s, '}');
286
287 values[6] = CStringGetTextDatum(s.data);
288 pfree(s.data);
289 }
290 else
291 {
292 nulls[6] = true;
293 }
294 }
295
296 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
297
298 /*
299 * If the item was unused, jump straight to the next one; otherwise,
300 * the only cleanup needed here is to set our signal to go to the next
301 * tuple in the following iteration, by freeing the current one.
302 */
303 if (unusedItem)
304 offset = OffsetNumberNext(offset);
305 else if (attno >= bdesc->bd_tupdesc->natts)
306 {
307 pfree(dtup);
308 dtup = NULL;
309 offset = OffsetNumberNext(offset);
310 }
311
312 /*
313 * If we're beyond the end of the page, we're done.
314 */
315 if (offset > PageGetMaxOffsetNumber(page))
316 break;
317 }
318
319 /* clean up and return the tuplestore */
320 brin_free_desc(bdesc);
321 tuplestore_donestoring(tupstore);
322 index_close(indexRel, AccessShareLock);
323
324 return (Datum) 0;
325 }
326
327 Datum
brin_metapage_info(PG_FUNCTION_ARGS)328 brin_metapage_info(PG_FUNCTION_ARGS)
329 {
330 bytea *raw_page = PG_GETARG_BYTEA_P(0);
331 Page page;
332 BrinMetaPageData *meta;
333 TupleDesc tupdesc;
334 Datum values[4];
335 bool nulls[4];
336 HeapTuple htup;
337
338 if (!superuser())
339 ereport(ERROR,
340 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
341 (errmsg("must be superuser to use raw page functions"))));
342
343 page = verify_brin_page(raw_page, BRIN_PAGETYPE_META, "metapage");
344
345 /* Build a tuple descriptor for our result type */
346 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
347 elog(ERROR, "return type must be a row type");
348 tupdesc = BlessTupleDesc(tupdesc);
349
350 /* Extract values from the metapage */
351 meta = (BrinMetaPageData *) PageGetContents(page);
352 MemSet(nulls, 0, sizeof(nulls));
353 values[0] = CStringGetTextDatum(psprintf("0x%08X", meta->brinMagic));
354 values[1] = Int32GetDatum(meta->brinVersion);
355 values[2] = Int32GetDatum(meta->pagesPerRange);
356 values[3] = Int64GetDatum(meta->lastRevmapPage);
357
358 htup = heap_form_tuple(tupdesc, values, nulls);
359
360 PG_RETURN_DATUM(HeapTupleGetDatum(htup));
361 }
362
363 /*
364 * Return the TID array stored in a BRIN revmap page
365 */
366 Datum
brin_revmap_data(PG_FUNCTION_ARGS)367 brin_revmap_data(PG_FUNCTION_ARGS)
368 {
369 struct
370 {
371 ItemPointerData *tids;
372 int idx;
373 } *state;
374 FuncCallContext *fctx;
375
376 if (!superuser())
377 ereport(ERROR,
378 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
379 (errmsg("must be superuser to use raw page functions"))));
380
381 if (SRF_IS_FIRSTCALL())
382 {
383 bytea *raw_page = PG_GETARG_BYTEA_P(0);
384 MemoryContext mctx;
385 Page page;
386
387 /* minimally verify the page we got */
388 page = verify_brin_page(raw_page, BRIN_PAGETYPE_REVMAP, "revmap");
389
390 /* create a function context for cross-call persistence */
391 fctx = SRF_FIRSTCALL_INIT();
392
393 /* switch to memory context appropriate for multiple function calls */
394 mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
395
396 state = palloc(sizeof(*state));
397 state->tids = ((RevmapContents *) PageGetContents(page))->rm_tids;
398 state->idx = 0;
399
400 fctx->user_fctx = state;
401
402 MemoryContextSwitchTo(mctx);
403 }
404
405 fctx = SRF_PERCALL_SETUP();
406 state = fctx->user_fctx;
407
408 if (state->idx < REVMAP_PAGE_MAXITEMS)
409 SRF_RETURN_NEXT(fctx, PointerGetDatum(&state->tids[state->idx++]));
410
411 SRF_RETURN_DONE(fctx);
412 }
413