1 /*
2  * brinfuncs.c
3  *		Functions to investigate BRIN indexes
4  *
5  * Copyright (c) 2014-2017, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *		contrib/pageinspect/brinfuncs.c
9  */
10 #include "postgres.h"
11 
12 #include "pageinspect.h"
13 
14 #include "access/htup_details.h"
15 #include "access/brin.h"
16 #include "access/brin_internal.h"
17 #include "access/brin_page.h"
18 #include "access/brin_revmap.h"
19 #include "access/brin_tuple.h"
20 #include "catalog/index.h"
21 #include "catalog/pg_type.h"
22 #include "funcapi.h"
23 #include "lib/stringinfo.h"
24 #include "utils/array.h"
25 #include "utils/builtins.h"
26 #include "utils/lsyscache.h"
27 #include "utils/rel.h"
28 #include "miscadmin.h"
29 
30 
31 PG_FUNCTION_INFO_V1(brin_page_type);
32 PG_FUNCTION_INFO_V1(brin_page_items);
33 PG_FUNCTION_INFO_V1(brin_metapage_info);
34 PG_FUNCTION_INFO_V1(brin_revmap_data);
35 
36 typedef struct brin_column_state
37 {
38 	int			nstored;
39 	FmgrInfo	outputFn[FLEXIBLE_ARRAY_MEMBER];
40 } brin_column_state;
41 
42 
43 static Page verify_brin_page(bytea *raw_page, uint16 type,
44 				 const char *strtype);
45 
46 Datum
brin_page_type(PG_FUNCTION_ARGS)47 brin_page_type(PG_FUNCTION_ARGS)
48 {
49 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
50 	Page		page = VARDATA(raw_page);
51 	int			raw_page_size;
52 	char	   *type;
53 
54 	if (!superuser())
55 		ereport(ERROR,
56 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
57 				 (errmsg("must be superuser to use raw page functions"))));
58 
59 	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
60 
61 	if (raw_page_size != BLCKSZ)
62 		ereport(ERROR,
63 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
64 				 errmsg("input page too small"),
65 				 errdetail("Expected size %d, got %d",
66 						   BLCKSZ, raw_page_size)));
67 
68 	switch (BrinPageType(page))
69 	{
70 		case BRIN_PAGETYPE_META:
71 			type = "meta";
72 			break;
73 		case BRIN_PAGETYPE_REVMAP:
74 			type = "revmap";
75 			break;
76 		case BRIN_PAGETYPE_REGULAR:
77 			type = "regular";
78 			break;
79 		default:
80 			type = psprintf("unknown (%02x)", BrinPageType(page));
81 			break;
82 	}
83 
84 	PG_RETURN_TEXT_P(cstring_to_text(type));
85 }
86 
87 /*
88  * Verify that the given bytea contains a BRIN page of the indicated page
89  * type, or die in the attempt.  A pointer to the page is returned.
90  */
91 static Page
verify_brin_page(bytea * raw_page,uint16 type,const char * strtype)92 verify_brin_page(bytea *raw_page, uint16 type, const char *strtype)
93 {
94 	Page		page;
95 	int			raw_page_size;
96 
97 	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
98 
99 	if (raw_page_size != BLCKSZ)
100 		ereport(ERROR,
101 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
102 				 errmsg("input page too small"),
103 				 errdetail("Expected size %d, got %d",
104 						   BLCKSZ, raw_page_size)));
105 
106 	page = VARDATA(raw_page);
107 
108 	/* verify the special space says this page is what we want */
109 	if (BrinPageType(page) != type)
110 		ereport(ERROR,
111 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
112 				 errmsg("page is not a BRIN page of type \"%s\"", strtype),
113 				 errdetail("Expected special type %08x, got %08x.",
114 						   type, BrinPageType(page))));
115 
116 	return page;
117 }
118 
119 
120 /*
121  * Extract all item values from a BRIN index page
122  *
123  * Usage: SELECT * FROM brin_page_items(get_raw_page('idx', 1), 'idx'::regclass);
124  */
125 Datum
brin_page_items(PG_FUNCTION_ARGS)126 brin_page_items(PG_FUNCTION_ARGS)
127 {
128 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
129 	Oid			indexRelid = PG_GETARG_OID(1);
130 	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
131 	TupleDesc	tupdesc;
132 	MemoryContext oldcontext;
133 	Tuplestorestate *tupstore;
134 	Relation	indexRel;
135 	brin_column_state **columns;
136 	BrinDesc   *bdesc;
137 	BrinMemTuple *dtup;
138 	Page		page;
139 	OffsetNumber offset;
140 	AttrNumber	attno;
141 	bool		unusedItem;
142 
143 	if (!superuser())
144 		ereport(ERROR,
145 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
146 				 (errmsg("must be superuser to use raw page functions"))));
147 
148 	/* check to see if caller supports us returning a tuplestore */
149 	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
150 		ereport(ERROR,
151 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
152 				 errmsg("set-valued function called in context that cannot accept a set")));
153 	if (!(rsinfo->allowedModes & SFRM_Materialize) ||
154 		rsinfo->expectedDesc == NULL)
155 		ereport(ERROR,
156 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
157 				 errmsg("materialize mode required, but it is not allowed in this context")));
158 
159 	/* Build a tuple descriptor for our result type */
160 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
161 		elog(ERROR, "return type must be a row type");
162 
163 	/* Build tuplestore to hold the result rows */
164 	oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory);
165 
166 	tupstore = tuplestore_begin_heap(true, false, work_mem);
167 	rsinfo->returnMode = SFRM_Materialize;
168 	rsinfo->setResult = tupstore;
169 	rsinfo->setDesc = tupdesc;
170 
171 	MemoryContextSwitchTo(oldcontext);
172 
173 	indexRel = index_open(indexRelid, AccessShareLock);
174 	bdesc = brin_build_desc(indexRel);
175 
176 	/* minimally verify the page we got */
177 	page = verify_brin_page(raw_page, BRIN_PAGETYPE_REGULAR, "regular");
178 
179 	/*
180 	 * Initialize output functions for all indexed datatypes; simplifies
181 	 * calling them later.
182 	 */
183 	columns = palloc(sizeof(brin_column_state *) * RelationGetDescr(indexRel)->natts);
184 	for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++)
185 	{
186 		Oid			output;
187 		bool		isVarlena;
188 		BrinOpcInfo *opcinfo;
189 		int			i;
190 		brin_column_state *column;
191 
192 		opcinfo = bdesc->bd_info[attno - 1];
193 		column = palloc(offsetof(brin_column_state, outputFn) +
194 						sizeof(FmgrInfo) * opcinfo->oi_nstored);
195 
196 		column->nstored = opcinfo->oi_nstored;
197 		for (i = 0; i < opcinfo->oi_nstored; i++)
198 		{
199 			getTypeOutputInfo(opcinfo->oi_typcache[i]->type_id, &output, &isVarlena);
200 			fmgr_info(output, &column->outputFn[i]);
201 		}
202 
203 		columns[attno - 1] = column;
204 	}
205 
206 	offset = FirstOffsetNumber;
207 	unusedItem = false;
208 	dtup = NULL;
209 	for (;;)
210 	{
211 		Datum		values[7];
212 		bool		nulls[7];
213 
214 		/*
215 		 * This loop is called once for every attribute of every tuple in the
216 		 * page.  At the start of a tuple, we get a NULL dtup; that's our
217 		 * signal for obtaining and decoding the next one.  If that's not the
218 		 * case, we output the next attribute.
219 		 */
220 		if (dtup == NULL)
221 		{
222 			ItemId		itemId;
223 
224 			/* verify item status: if there's no data, we can't decode */
225 			itemId = PageGetItemId(page, offset);
226 			if (ItemIdIsUsed(itemId))
227 			{
228 				dtup = brin_deform_tuple(bdesc,
229 										 (BrinTuple *) PageGetItem(page, itemId),
230 										 NULL);
231 				attno = 1;
232 				unusedItem = false;
233 			}
234 			else
235 				unusedItem = true;
236 		}
237 		else
238 			attno++;
239 
240 		MemSet(nulls, 0, sizeof(nulls));
241 
242 		if (unusedItem)
243 		{
244 			values[0] = UInt16GetDatum(offset);
245 			nulls[1] = true;
246 			nulls[2] = true;
247 			nulls[3] = true;
248 			nulls[4] = true;
249 			nulls[5] = true;
250 			nulls[6] = true;
251 		}
252 		else
253 		{
254 			int			att = attno - 1;
255 
256 			values[0] = UInt16GetDatum(offset);
257 			values[1] = UInt32GetDatum(dtup->bt_blkno);
258 			values[2] = UInt16GetDatum(attno);
259 			values[3] = BoolGetDatum(dtup->bt_columns[att].bv_allnulls);
260 			values[4] = BoolGetDatum(dtup->bt_columns[att].bv_hasnulls);
261 			values[5] = BoolGetDatum(dtup->bt_placeholder);
262 			if (!dtup->bt_columns[att].bv_allnulls)
263 			{
264 				BrinValues *bvalues = &dtup->bt_columns[att];
265 				StringInfoData s;
266 				bool		first;
267 				int			i;
268 
269 				initStringInfo(&s);
270 				appendStringInfoChar(&s, '{');
271 
272 				first = true;
273 				for (i = 0; i < columns[att]->nstored; i++)
274 				{
275 					char	   *val;
276 
277 					if (!first)
278 						appendStringInfoString(&s, " .. ");
279 					first = false;
280 					val = OutputFunctionCall(&columns[att]->outputFn[i],
281 											 bvalues->bv_values[i]);
282 					appendStringInfoString(&s, val);
283 					pfree(val);
284 				}
285 				appendStringInfoChar(&s, '}');
286 
287 				values[6] = CStringGetTextDatum(s.data);
288 				pfree(s.data);
289 			}
290 			else
291 			{
292 				nulls[6] = true;
293 			}
294 		}
295 
296 		tuplestore_putvalues(tupstore, tupdesc, values, nulls);
297 
298 		/*
299 		 * If the item was unused, jump straight to the next one; otherwise,
300 		 * the only cleanup needed here is to set our signal to go to the next
301 		 * tuple in the following iteration, by freeing the current one.
302 		 */
303 		if (unusedItem)
304 			offset = OffsetNumberNext(offset);
305 		else if (attno >= bdesc->bd_tupdesc->natts)
306 		{
307 			pfree(dtup);
308 			dtup = NULL;
309 			offset = OffsetNumberNext(offset);
310 		}
311 
312 		/*
313 		 * If we're beyond the end of the page, we're done.
314 		 */
315 		if (offset > PageGetMaxOffsetNumber(page))
316 			break;
317 	}
318 
319 	/* clean up and return the tuplestore */
320 	brin_free_desc(bdesc);
321 	tuplestore_donestoring(tupstore);
322 	index_close(indexRel, AccessShareLock);
323 
324 	return (Datum) 0;
325 }
326 
327 Datum
brin_metapage_info(PG_FUNCTION_ARGS)328 brin_metapage_info(PG_FUNCTION_ARGS)
329 {
330 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
331 	Page		page;
332 	BrinMetaPageData *meta;
333 	TupleDesc	tupdesc;
334 	Datum		values[4];
335 	bool		nulls[4];
336 	HeapTuple	htup;
337 
338 	if (!superuser())
339 		ereport(ERROR,
340 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
341 				 (errmsg("must be superuser to use raw page functions"))));
342 
343 	page = verify_brin_page(raw_page, BRIN_PAGETYPE_META, "metapage");
344 
345 	/* Build a tuple descriptor for our result type */
346 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
347 		elog(ERROR, "return type must be a row type");
348 	tupdesc = BlessTupleDesc(tupdesc);
349 
350 	/* Extract values from the metapage */
351 	meta = (BrinMetaPageData *) PageGetContents(page);
352 	MemSet(nulls, 0, sizeof(nulls));
353 	values[0] = CStringGetTextDatum(psprintf("0x%08X", meta->brinMagic));
354 	values[1] = Int32GetDatum(meta->brinVersion);
355 	values[2] = Int32GetDatum(meta->pagesPerRange);
356 	values[3] = Int64GetDatum(meta->lastRevmapPage);
357 
358 	htup = heap_form_tuple(tupdesc, values, nulls);
359 
360 	PG_RETURN_DATUM(HeapTupleGetDatum(htup));
361 }
362 
363 /*
364  * Return the TID array stored in a BRIN revmap page
365  */
366 Datum
brin_revmap_data(PG_FUNCTION_ARGS)367 brin_revmap_data(PG_FUNCTION_ARGS)
368 {
369 	struct
370 	{
371 		ItemPointerData *tids;
372 		int			idx;
373 	}		   *state;
374 	FuncCallContext *fctx;
375 
376 	if (!superuser())
377 		ereport(ERROR,
378 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
379 				 (errmsg("must be superuser to use raw page functions"))));
380 
381 	if (SRF_IS_FIRSTCALL())
382 	{
383 		bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
384 		MemoryContext mctx;
385 		Page		page;
386 
387 		/* minimally verify the page we got */
388 		page = verify_brin_page(raw_page, BRIN_PAGETYPE_REVMAP, "revmap");
389 
390 		/* create a function context for cross-call persistence */
391 		fctx = SRF_FIRSTCALL_INIT();
392 
393 		/* switch to memory context appropriate for multiple function calls */
394 		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
395 
396 		state = palloc(sizeof(*state));
397 		state->tids = ((RevmapContents *) PageGetContents(page))->rm_tids;
398 		state->idx = 0;
399 
400 		fctx->user_fctx = state;
401 
402 		MemoryContextSwitchTo(mctx);
403 	}
404 
405 	fctx = SRF_PERCALL_SETUP();
406 	state = fctx->user_fctx;
407 
408 	if (state->idx < REVMAP_PAGE_MAXITEMS)
409 		SRF_RETURN_NEXT(fctx, PointerGetDatum(&state->tids[state->idx++]));
410 
411 	SRF_RETURN_DONE(fctx);
412 }
413