1 /*
2  * brinfuncs.c
3  *		Functions to investigate BRIN indexes
4  *
5  * Copyright (c) 2014-2020, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *		contrib/pageinspect/brinfuncs.c
9  */
10 #include "postgres.h"
11 
12 #include "access/brin.h"
13 #include "access/brin_internal.h"
14 #include "access/brin_page.h"
15 #include "access/brin_revmap.h"
16 #include "access/brin_tuple.h"
17 #include "access/htup_details.h"
18 #include "catalog/index.h"
19 #include "catalog/pg_type.h"
20 #include "funcapi.h"
21 #include "lib/stringinfo.h"
22 #include "miscadmin.h"
23 #include "pageinspect.h"
24 #include "utils/array.h"
25 #include "utils/builtins.h"
26 #include "utils/lsyscache.h"
27 #include "utils/rel.h"
28 
29 PG_FUNCTION_INFO_V1(brin_page_type);
30 PG_FUNCTION_INFO_V1(brin_page_items);
31 PG_FUNCTION_INFO_V1(brin_metapage_info);
32 PG_FUNCTION_INFO_V1(brin_revmap_data);
33 
34 typedef struct brin_column_state
35 {
36 	int			nstored;
37 	FmgrInfo	outputFn[FLEXIBLE_ARRAY_MEMBER];
38 } brin_column_state;
39 
40 
41 static Page verify_brin_page(bytea *raw_page, uint16 type,
42 							 const char *strtype);
43 
44 Datum
brin_page_type(PG_FUNCTION_ARGS)45 brin_page_type(PG_FUNCTION_ARGS)
46 {
47 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
48 	Page		page = VARDATA(raw_page);
49 	int			raw_page_size;
50 	char	   *type;
51 
52 	if (!superuser())
53 		ereport(ERROR,
54 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
55 				 errmsg("must be superuser to use raw page functions")));
56 
57 	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
58 
59 	if (raw_page_size != BLCKSZ)
60 		ereport(ERROR,
61 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
62 				 errmsg("input page too small"),
63 				 errdetail("Expected size %d, got %d",
64 						   BLCKSZ, raw_page_size)));
65 
66 	switch (BrinPageType(page))
67 	{
68 		case BRIN_PAGETYPE_META:
69 			type = "meta";
70 			break;
71 		case BRIN_PAGETYPE_REVMAP:
72 			type = "revmap";
73 			break;
74 		case BRIN_PAGETYPE_REGULAR:
75 			type = "regular";
76 			break;
77 		default:
78 			type = psprintf("unknown (%02x)", BrinPageType(page));
79 			break;
80 	}
81 
82 	PG_RETURN_TEXT_P(cstring_to_text(type));
83 }
84 
85 /*
86  * Verify that the given bytea contains a BRIN page of the indicated page
87  * type, or die in the attempt.  A pointer to the page is returned.
88  */
89 static Page
verify_brin_page(bytea * raw_page,uint16 type,const char * strtype)90 verify_brin_page(bytea *raw_page, uint16 type, const char *strtype)
91 {
92 	Page		page;
93 	int			raw_page_size;
94 
95 	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
96 
97 	if (raw_page_size != BLCKSZ)
98 		ereport(ERROR,
99 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
100 				 errmsg("input page too small"),
101 				 errdetail("Expected size %d, got %d",
102 						   BLCKSZ, raw_page_size)));
103 
104 	page = VARDATA(raw_page);
105 
106 	/* verify the special space says this page is what we want */
107 	if (BrinPageType(page) != type)
108 		ereport(ERROR,
109 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
110 				 errmsg("page is not a BRIN page of type \"%s\"", strtype),
111 				 errdetail("Expected special type %08x, got %08x.",
112 						   type, BrinPageType(page))));
113 
114 	return page;
115 }
116 
117 
118 /*
119  * Extract all item values from a BRIN index page
120  *
121  * Usage: SELECT * FROM brin_page_items(get_raw_page('idx', 1), 'idx'::regclass);
122  */
123 Datum
brin_page_items(PG_FUNCTION_ARGS)124 brin_page_items(PG_FUNCTION_ARGS)
125 {
126 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
127 	Oid			indexRelid = PG_GETARG_OID(1);
128 	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
129 	TupleDesc	tupdesc;
130 	MemoryContext oldcontext;
131 	Tuplestorestate *tupstore;
132 	Relation	indexRel;
133 	brin_column_state **columns;
134 	BrinDesc   *bdesc;
135 	BrinMemTuple *dtup;
136 	Page		page;
137 	OffsetNumber offset;
138 	AttrNumber	attno;
139 	bool		unusedItem;
140 
141 	if (!superuser())
142 		ereport(ERROR,
143 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
144 				 errmsg("must be superuser to use raw page functions")));
145 
146 	/* check to see if caller supports us returning a tuplestore */
147 	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
148 		ereport(ERROR,
149 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
150 				 errmsg("set-valued function called in context that cannot accept a set")));
151 	if (!(rsinfo->allowedModes & SFRM_Materialize) ||
152 		rsinfo->expectedDesc == NULL)
153 		ereport(ERROR,
154 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
155 				 errmsg("materialize mode required, but it is not allowed in this context")));
156 
157 	/* Build a tuple descriptor for our result type */
158 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
159 		elog(ERROR, "return type must be a row type");
160 
161 	/* Build tuplestore to hold the result rows */
162 	oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
163 
164 	tupstore = tuplestore_begin_heap(true, false, work_mem);
165 	rsinfo->returnMode = SFRM_Materialize;
166 	rsinfo->setResult = tupstore;
167 	rsinfo->setDesc = tupdesc;
168 
169 	MemoryContextSwitchTo(oldcontext);
170 
171 	indexRel = index_open(indexRelid, AccessShareLock);
172 	bdesc = brin_build_desc(indexRel);
173 
174 	/* minimally verify the page we got */
175 	page = verify_brin_page(raw_page, BRIN_PAGETYPE_REGULAR, "regular");
176 
177 	/*
178 	 * Initialize output functions for all indexed datatypes; simplifies
179 	 * calling them later.
180 	 */
181 	columns = palloc(sizeof(brin_column_state *) * RelationGetDescr(indexRel)->natts);
182 	for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++)
183 	{
184 		Oid			output;
185 		bool		isVarlena;
186 		BrinOpcInfo *opcinfo;
187 		int			i;
188 		brin_column_state *column;
189 
190 		opcinfo = bdesc->bd_info[attno - 1];
191 		column = palloc(offsetof(brin_column_state, outputFn) +
192 						sizeof(FmgrInfo) * opcinfo->oi_nstored);
193 
194 		column->nstored = opcinfo->oi_nstored;
195 		for (i = 0; i < opcinfo->oi_nstored; i++)
196 		{
197 			getTypeOutputInfo(opcinfo->oi_typcache[i]->type_id, &output, &isVarlena);
198 			fmgr_info(output, &column->outputFn[i]);
199 		}
200 
201 		columns[attno - 1] = column;
202 	}
203 
204 	offset = FirstOffsetNumber;
205 	unusedItem = false;
206 	dtup = NULL;
207 	for (;;)
208 	{
209 		Datum		values[7];
210 		bool		nulls[7];
211 
212 		/*
213 		 * This loop is called once for every attribute of every tuple in the
214 		 * page.  At the start of a tuple, we get a NULL dtup; that's our
215 		 * signal for obtaining and decoding the next one.  If that's not the
216 		 * case, we output the next attribute.
217 		 */
218 		if (dtup == NULL)
219 		{
220 			ItemId		itemId;
221 
222 			/* verify item status: if there's no data, we can't decode */
223 			itemId = PageGetItemId(page, offset);
224 			if (ItemIdIsUsed(itemId))
225 			{
226 				dtup = brin_deform_tuple(bdesc,
227 										 (BrinTuple *) PageGetItem(page, itemId),
228 										 NULL);
229 				attno = 1;
230 				unusedItem = false;
231 			}
232 			else
233 				unusedItem = true;
234 		}
235 		else
236 			attno++;
237 
238 		MemSet(nulls, 0, sizeof(nulls));
239 
240 		if (unusedItem)
241 		{
242 			values[0] = UInt16GetDatum(offset);
243 			nulls[1] = true;
244 			nulls[2] = true;
245 			nulls[3] = true;
246 			nulls[4] = true;
247 			nulls[5] = true;
248 			nulls[6] = true;
249 		}
250 		else
251 		{
252 			int			att = attno - 1;
253 
254 			values[0] = UInt16GetDatum(offset);
255 			values[1] = UInt32GetDatum(dtup->bt_blkno);
256 			values[2] = UInt16GetDatum(attno);
257 			values[3] = BoolGetDatum(dtup->bt_columns[att].bv_allnulls);
258 			values[4] = BoolGetDatum(dtup->bt_columns[att].bv_hasnulls);
259 			values[5] = BoolGetDatum(dtup->bt_placeholder);
260 			if (!dtup->bt_columns[att].bv_allnulls)
261 			{
262 				BrinValues *bvalues = &dtup->bt_columns[att];
263 				StringInfoData s;
264 				bool		first;
265 				int			i;
266 
267 				initStringInfo(&s);
268 				appendStringInfoChar(&s, '{');
269 
270 				first = true;
271 				for (i = 0; i < columns[att]->nstored; i++)
272 				{
273 					char	   *val;
274 
275 					if (!first)
276 						appendStringInfoString(&s, " .. ");
277 					first = false;
278 					val = OutputFunctionCall(&columns[att]->outputFn[i],
279 											 bvalues->bv_values[i]);
280 					appendStringInfoString(&s, val);
281 					pfree(val);
282 				}
283 				appendStringInfoChar(&s, '}');
284 
285 				values[6] = CStringGetTextDatum(s.data);
286 				pfree(s.data);
287 			}
288 			else
289 			{
290 				nulls[6] = true;
291 			}
292 		}
293 
294 		tuplestore_putvalues(tupstore, tupdesc, values, nulls);
295 
296 		/*
297 		 * If the item was unused, jump straight to the next one; otherwise,
298 		 * the only cleanup needed here is to set our signal to go to the next
299 		 * tuple in the following iteration, by freeing the current one.
300 		 */
301 		if (unusedItem)
302 			offset = OffsetNumberNext(offset);
303 		else if (attno >= bdesc->bd_tupdesc->natts)
304 		{
305 			pfree(dtup);
306 			dtup = NULL;
307 			offset = OffsetNumberNext(offset);
308 		}
309 
310 		/*
311 		 * If we're beyond the end of the page, we're done.
312 		 */
313 		if (offset > PageGetMaxOffsetNumber(page))
314 			break;
315 	}
316 
317 	/* clean up and return the tuplestore */
318 	brin_free_desc(bdesc);
319 	tuplestore_donestoring(tupstore);
320 	index_close(indexRel, AccessShareLock);
321 
322 	return (Datum) 0;
323 }
324 
325 Datum
brin_metapage_info(PG_FUNCTION_ARGS)326 brin_metapage_info(PG_FUNCTION_ARGS)
327 {
328 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
329 	Page		page;
330 	BrinMetaPageData *meta;
331 	TupleDesc	tupdesc;
332 	Datum		values[4];
333 	bool		nulls[4];
334 	HeapTuple	htup;
335 
336 	if (!superuser())
337 		ereport(ERROR,
338 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
339 				 errmsg("must be superuser to use raw page functions")));
340 
341 	page = verify_brin_page(raw_page, BRIN_PAGETYPE_META, "metapage");
342 
343 	/* Build a tuple descriptor for our result type */
344 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
345 		elog(ERROR, "return type must be a row type");
346 	tupdesc = BlessTupleDesc(tupdesc);
347 
348 	/* Extract values from the metapage */
349 	meta = (BrinMetaPageData *) PageGetContents(page);
350 	MemSet(nulls, 0, sizeof(nulls));
351 	values[0] = CStringGetTextDatum(psprintf("0x%08X", meta->brinMagic));
352 	values[1] = Int32GetDatum(meta->brinVersion);
353 	values[2] = Int32GetDatum(meta->pagesPerRange);
354 	values[3] = Int64GetDatum(meta->lastRevmapPage);
355 
356 	htup = heap_form_tuple(tupdesc, values, nulls);
357 
358 	PG_RETURN_DATUM(HeapTupleGetDatum(htup));
359 }
360 
361 /*
362  * Return the TID array stored in a BRIN revmap page
363  */
364 Datum
brin_revmap_data(PG_FUNCTION_ARGS)365 brin_revmap_data(PG_FUNCTION_ARGS)
366 {
367 	struct
368 	{
369 		ItemPointerData *tids;
370 		int			idx;
371 	}		   *state;
372 	FuncCallContext *fctx;
373 
374 	if (!superuser())
375 		ereport(ERROR,
376 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
377 				 errmsg("must be superuser to use raw page functions")));
378 
379 	if (SRF_IS_FIRSTCALL())
380 	{
381 		bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
382 		MemoryContext mctx;
383 		Page		page;
384 
385 		/* minimally verify the page we got */
386 		page = verify_brin_page(raw_page, BRIN_PAGETYPE_REVMAP, "revmap");
387 
388 		/* create a function context for cross-call persistence */
389 		fctx = SRF_FIRSTCALL_INIT();
390 
391 		/* switch to memory context appropriate for multiple function calls */
392 		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
393 
394 		state = palloc(sizeof(*state));
395 		state->tids = ((RevmapContents *) PageGetContents(page))->rm_tids;
396 		state->idx = 0;
397 
398 		fctx->user_fctx = state;
399 
400 		MemoryContextSwitchTo(mctx);
401 	}
402 
403 	fctx = SRF_PERCALL_SETUP();
404 	state = fctx->user_fctx;
405 
406 	if (state->idx < REVMAP_PAGE_MAXITEMS)
407 		SRF_RETURN_NEXT(fctx, PointerGetDatum(&state->tids[state->idx++]));
408 
409 	SRF_RETURN_DONE(fctx);
410 }
411