1 /*
2 * brinfuncs.c
3 * Functions to investigate BRIN indexes
4 *
5 * Copyright (c) 2014-2020, PostgreSQL Global Development Group
6 *
7 * IDENTIFICATION
8 * contrib/pageinspect/brinfuncs.c
9 */
10 #include "postgres.h"
11
12 #include "access/brin.h"
13 #include "access/brin_internal.h"
14 #include "access/brin_page.h"
15 #include "access/brin_revmap.h"
16 #include "access/brin_tuple.h"
17 #include "access/htup_details.h"
18 #include "catalog/index.h"
19 #include "catalog/pg_type.h"
20 #include "funcapi.h"
21 #include "lib/stringinfo.h"
22 #include "miscadmin.h"
23 #include "pageinspect.h"
24 #include "utils/array.h"
25 #include "utils/builtins.h"
26 #include "utils/lsyscache.h"
27 #include "utils/rel.h"
28
29 PG_FUNCTION_INFO_V1(brin_page_type);
30 PG_FUNCTION_INFO_V1(brin_page_items);
31 PG_FUNCTION_INFO_V1(brin_metapage_info);
32 PG_FUNCTION_INFO_V1(brin_revmap_data);
33
34 typedef struct brin_column_state
35 {
36 int nstored;
37 FmgrInfo outputFn[FLEXIBLE_ARRAY_MEMBER];
38 } brin_column_state;
39
40
41 static Page verify_brin_page(bytea *raw_page, uint16 type,
42 const char *strtype);
43
44 Datum
brin_page_type(PG_FUNCTION_ARGS)45 brin_page_type(PG_FUNCTION_ARGS)
46 {
47 bytea *raw_page = PG_GETARG_BYTEA_P(0);
48 Page page = VARDATA(raw_page);
49 int raw_page_size;
50 char *type;
51
52 if (!superuser())
53 ereport(ERROR,
54 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
55 errmsg("must be superuser to use raw page functions")));
56
57 raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
58
59 if (raw_page_size != BLCKSZ)
60 ereport(ERROR,
61 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
62 errmsg("input page too small"),
63 errdetail("Expected size %d, got %d",
64 BLCKSZ, raw_page_size)));
65
66 switch (BrinPageType(page))
67 {
68 case BRIN_PAGETYPE_META:
69 type = "meta";
70 break;
71 case BRIN_PAGETYPE_REVMAP:
72 type = "revmap";
73 break;
74 case BRIN_PAGETYPE_REGULAR:
75 type = "regular";
76 break;
77 default:
78 type = psprintf("unknown (%02x)", BrinPageType(page));
79 break;
80 }
81
82 PG_RETURN_TEXT_P(cstring_to_text(type));
83 }
84
85 /*
86 * Verify that the given bytea contains a BRIN page of the indicated page
87 * type, or die in the attempt. A pointer to the page is returned.
88 */
89 static Page
verify_brin_page(bytea * raw_page,uint16 type,const char * strtype)90 verify_brin_page(bytea *raw_page, uint16 type, const char *strtype)
91 {
92 Page page;
93 int raw_page_size;
94
95 raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
96
97 if (raw_page_size != BLCKSZ)
98 ereport(ERROR,
99 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
100 errmsg("input page too small"),
101 errdetail("Expected size %d, got %d",
102 BLCKSZ, raw_page_size)));
103
104 page = VARDATA(raw_page);
105
106 /* verify the special space says this page is what we want */
107 if (BrinPageType(page) != type)
108 ereport(ERROR,
109 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
110 errmsg("page is not a BRIN page of type \"%s\"", strtype),
111 errdetail("Expected special type %08x, got %08x.",
112 type, BrinPageType(page))));
113
114 return page;
115 }
116
117
118 /*
119 * Extract all item values from a BRIN index page
120 *
121 * Usage: SELECT * FROM brin_page_items(get_raw_page('idx', 1), 'idx'::regclass);
122 */
123 Datum
brin_page_items(PG_FUNCTION_ARGS)124 brin_page_items(PG_FUNCTION_ARGS)
125 {
126 bytea *raw_page = PG_GETARG_BYTEA_P(0);
127 Oid indexRelid = PG_GETARG_OID(1);
128 ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
129 TupleDesc tupdesc;
130 MemoryContext oldcontext;
131 Tuplestorestate *tupstore;
132 Relation indexRel;
133 brin_column_state **columns;
134 BrinDesc *bdesc;
135 BrinMemTuple *dtup;
136 Page page;
137 OffsetNumber offset;
138 AttrNumber attno;
139 bool unusedItem;
140
141 if (!superuser())
142 ereport(ERROR,
143 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
144 errmsg("must be superuser to use raw page functions")));
145
146 /* check to see if caller supports us returning a tuplestore */
147 if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
148 ereport(ERROR,
149 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
150 errmsg("set-valued function called in context that cannot accept a set")));
151 if (!(rsinfo->allowedModes & SFRM_Materialize) ||
152 rsinfo->expectedDesc == NULL)
153 ereport(ERROR,
154 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
155 errmsg("materialize mode required, but it is not allowed in this context")));
156
157 /* Build a tuple descriptor for our result type */
158 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
159 elog(ERROR, "return type must be a row type");
160
161 /* Build tuplestore to hold the result rows */
162 oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
163
164 tupstore = tuplestore_begin_heap(true, false, work_mem);
165 rsinfo->returnMode = SFRM_Materialize;
166 rsinfo->setResult = tupstore;
167 rsinfo->setDesc = tupdesc;
168
169 MemoryContextSwitchTo(oldcontext);
170
171 indexRel = index_open(indexRelid, AccessShareLock);
172 bdesc = brin_build_desc(indexRel);
173
174 /* minimally verify the page we got */
175 page = verify_brin_page(raw_page, BRIN_PAGETYPE_REGULAR, "regular");
176
177 /*
178 * Initialize output functions for all indexed datatypes; simplifies
179 * calling them later.
180 */
181 columns = palloc(sizeof(brin_column_state *) * RelationGetDescr(indexRel)->natts);
182 for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++)
183 {
184 Oid output;
185 bool isVarlena;
186 BrinOpcInfo *opcinfo;
187 int i;
188 brin_column_state *column;
189
190 opcinfo = bdesc->bd_info[attno - 1];
191 column = palloc(offsetof(brin_column_state, outputFn) +
192 sizeof(FmgrInfo) * opcinfo->oi_nstored);
193
194 column->nstored = opcinfo->oi_nstored;
195 for (i = 0; i < opcinfo->oi_nstored; i++)
196 {
197 getTypeOutputInfo(opcinfo->oi_typcache[i]->type_id, &output, &isVarlena);
198 fmgr_info(output, &column->outputFn[i]);
199 }
200
201 columns[attno - 1] = column;
202 }
203
204 offset = FirstOffsetNumber;
205 unusedItem = false;
206 dtup = NULL;
207 for (;;)
208 {
209 Datum values[7];
210 bool nulls[7];
211
212 /*
213 * This loop is called once for every attribute of every tuple in the
214 * page. At the start of a tuple, we get a NULL dtup; that's our
215 * signal for obtaining and decoding the next one. If that's not the
216 * case, we output the next attribute.
217 */
218 if (dtup == NULL)
219 {
220 ItemId itemId;
221
222 /* verify item status: if there's no data, we can't decode */
223 itemId = PageGetItemId(page, offset);
224 if (ItemIdIsUsed(itemId))
225 {
226 dtup = brin_deform_tuple(bdesc,
227 (BrinTuple *) PageGetItem(page, itemId),
228 NULL);
229 attno = 1;
230 unusedItem = false;
231 }
232 else
233 unusedItem = true;
234 }
235 else
236 attno++;
237
238 MemSet(nulls, 0, sizeof(nulls));
239
240 if (unusedItem)
241 {
242 values[0] = UInt16GetDatum(offset);
243 nulls[1] = true;
244 nulls[2] = true;
245 nulls[3] = true;
246 nulls[4] = true;
247 nulls[5] = true;
248 nulls[6] = true;
249 }
250 else
251 {
252 int att = attno - 1;
253
254 values[0] = UInt16GetDatum(offset);
255 values[1] = UInt32GetDatum(dtup->bt_blkno);
256 values[2] = UInt16GetDatum(attno);
257 values[3] = BoolGetDatum(dtup->bt_columns[att].bv_allnulls);
258 values[4] = BoolGetDatum(dtup->bt_columns[att].bv_hasnulls);
259 values[5] = BoolGetDatum(dtup->bt_placeholder);
260 if (!dtup->bt_columns[att].bv_allnulls)
261 {
262 BrinValues *bvalues = &dtup->bt_columns[att];
263 StringInfoData s;
264 bool first;
265 int i;
266
267 initStringInfo(&s);
268 appendStringInfoChar(&s, '{');
269
270 first = true;
271 for (i = 0; i < columns[att]->nstored; i++)
272 {
273 char *val;
274
275 if (!first)
276 appendStringInfoString(&s, " .. ");
277 first = false;
278 val = OutputFunctionCall(&columns[att]->outputFn[i],
279 bvalues->bv_values[i]);
280 appendStringInfoString(&s, val);
281 pfree(val);
282 }
283 appendStringInfoChar(&s, '}');
284
285 values[6] = CStringGetTextDatum(s.data);
286 pfree(s.data);
287 }
288 else
289 {
290 nulls[6] = true;
291 }
292 }
293
294 tuplestore_putvalues(tupstore, tupdesc, values, nulls);
295
296 /*
297 * If the item was unused, jump straight to the next one; otherwise,
298 * the only cleanup needed here is to set our signal to go to the next
299 * tuple in the following iteration, by freeing the current one.
300 */
301 if (unusedItem)
302 offset = OffsetNumberNext(offset);
303 else if (attno >= bdesc->bd_tupdesc->natts)
304 {
305 pfree(dtup);
306 dtup = NULL;
307 offset = OffsetNumberNext(offset);
308 }
309
310 /*
311 * If we're beyond the end of the page, we're done.
312 */
313 if (offset > PageGetMaxOffsetNumber(page))
314 break;
315 }
316
317 /* clean up and return the tuplestore */
318 brin_free_desc(bdesc);
319 tuplestore_donestoring(tupstore);
320 index_close(indexRel, AccessShareLock);
321
322 return (Datum) 0;
323 }
324
325 Datum
brin_metapage_info(PG_FUNCTION_ARGS)326 brin_metapage_info(PG_FUNCTION_ARGS)
327 {
328 bytea *raw_page = PG_GETARG_BYTEA_P(0);
329 Page page;
330 BrinMetaPageData *meta;
331 TupleDesc tupdesc;
332 Datum values[4];
333 bool nulls[4];
334 HeapTuple htup;
335
336 if (!superuser())
337 ereport(ERROR,
338 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
339 errmsg("must be superuser to use raw page functions")));
340
341 page = verify_brin_page(raw_page, BRIN_PAGETYPE_META, "metapage");
342
343 /* Build a tuple descriptor for our result type */
344 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
345 elog(ERROR, "return type must be a row type");
346 tupdesc = BlessTupleDesc(tupdesc);
347
348 /* Extract values from the metapage */
349 meta = (BrinMetaPageData *) PageGetContents(page);
350 MemSet(nulls, 0, sizeof(nulls));
351 values[0] = CStringGetTextDatum(psprintf("0x%08X", meta->brinMagic));
352 values[1] = Int32GetDatum(meta->brinVersion);
353 values[2] = Int32GetDatum(meta->pagesPerRange);
354 values[3] = Int64GetDatum(meta->lastRevmapPage);
355
356 htup = heap_form_tuple(tupdesc, values, nulls);
357
358 PG_RETURN_DATUM(HeapTupleGetDatum(htup));
359 }
360
361 /*
362 * Return the TID array stored in a BRIN revmap page
363 */
364 Datum
brin_revmap_data(PG_FUNCTION_ARGS)365 brin_revmap_data(PG_FUNCTION_ARGS)
366 {
367 struct
368 {
369 ItemPointerData *tids;
370 int idx;
371 } *state;
372 FuncCallContext *fctx;
373
374 if (!superuser())
375 ereport(ERROR,
376 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
377 errmsg("must be superuser to use raw page functions")));
378
379 if (SRF_IS_FIRSTCALL())
380 {
381 bytea *raw_page = PG_GETARG_BYTEA_P(0);
382 MemoryContext mctx;
383 Page page;
384
385 /* minimally verify the page we got */
386 page = verify_brin_page(raw_page, BRIN_PAGETYPE_REVMAP, "revmap");
387
388 /* create a function context for cross-call persistence */
389 fctx = SRF_FIRSTCALL_INIT();
390
391 /* switch to memory context appropriate for multiple function calls */
392 mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
393
394 state = palloc(sizeof(*state));
395 state->tids = ((RevmapContents *) PageGetContents(page))->rm_tids;
396 state->idx = 0;
397
398 fctx->user_fctx = state;
399
400 MemoryContextSwitchTo(mctx);
401 }
402
403 fctx = SRF_PERCALL_SETUP();
404 state = fctx->user_fctx;
405
406 if (state->idx < REVMAP_PAGE_MAXITEMS)
407 SRF_RETURN_NEXT(fctx, PointerGetDatum(&state->tids[state->idx++]));
408
409 SRF_RETURN_DONE(fctx);
410 }
411