1 /*-------------------------------------------------------------------------
2  *
3  * heapfuncs.c
4  *	  Functions to investigate heap pages
5  *
6  * We check the input to these functions for corrupt pointers etc. that
7  * might cause crashes, but at the same time we try to print out as much
8  * information as possible, even if it's nonsense. That's because if a
9  * page is corrupt, we don't know why and how exactly it is corrupt, so we
10  * let the user judge it.
11  *
12  * These functions are restricted to superusers for the fear of introducing
13  * security holes if the input checking isn't as water-tight as it should be.
14  * You'd need to be superuser to obtain a raw page image anyway, so
15  * there's hardly any use case for using these without superuser-rights
16  * anyway.
17  *
18  * Copyright (c) 2007-2019, PostgreSQL Global Development Group
19  *
20  * IDENTIFICATION
21  *	  contrib/pageinspect/heapfuncs.c
22  *
23  *-------------------------------------------------------------------------
24  */
25 
26 #include "postgres.h"
27 
28 #include "pageinspect.h"
29 
30 #include "access/htup_details.h"
31 #include "access/relation.h"
32 #include "funcapi.h"
33 #include "catalog/pg_am_d.h"
34 #include "catalog/pg_type.h"
35 #include "miscadmin.h"
36 #include "utils/array.h"
37 #include "utils/builtins.h"
38 #include "utils/rel.h"
39 
40 /*
41  * It's not supported to create tuples with oids anymore, but when pg_upgrade
42  * was used to upgrade from an older version, tuples might still have an
43  * oid. Seems worthwhile to display that.
44  */
45 #define HeapTupleHeaderGetOidOld(tup) \
46 ( \
47 	((tup)->t_infomask & HEAP_HASOID_OLD) ? \
48 	   *((Oid *) ((char *)(tup) + (tup)->t_hoff - sizeof(Oid))) \
49 	: \
50 		InvalidOid \
51 )
52 
53 
54 /*
55  * bits_to_text
56  *
57  * Converts a bits8-array of 'len' bits to a human-readable
58  * c-string representation.
59  */
60 static char *
bits_to_text(bits8 * bits,int len)61 bits_to_text(bits8 *bits, int len)
62 {
63 	int			i;
64 	char	   *str;
65 
66 	str = palloc(len + 1);
67 
68 	for (i = 0; i < len; i++)
69 		str[i] = (bits[(i / 8)] & (1 << (i % 8))) ? '1' : '0';
70 
71 	str[i] = '\0';
72 
73 	return str;
74 }
75 
76 
77 /*
78  * text_to_bits
79  *
80  * Converts a c-string representation of bits into a bits8-array. This is
81  * the reverse operation of previous routine.
82  */
83 static bits8 *
text_to_bits(char * str,int len)84 text_to_bits(char *str, int len)
85 {
86 	bits8	   *bits;
87 	int			off = 0;
88 	char		byte = 0;
89 
90 	bits = palloc(len + 1);
91 
92 	while (off < len)
93 	{
94 		if (off % 8 == 0)
95 			byte = 0;
96 
97 		if ((str[off] == '0') || (str[off] == '1'))
98 			byte = byte | ((str[off] - '0') << off % 8);
99 		else
100 			ereport(ERROR,
101 					(errcode(ERRCODE_DATA_CORRUPTED),
102 					 errmsg("illegal character '%c' in t_bits string", str[off])));
103 
104 		if (off % 8 == 7)
105 			bits[off / 8] = byte;
106 
107 		off++;
108 	}
109 
110 	return bits;
111 }
112 
113 /*
114  * heap_page_items
115  *
116  * Allows inspection of line pointers and tuple headers of a heap page.
117  */
118 PG_FUNCTION_INFO_V1(heap_page_items);
119 
120 typedef struct heap_page_items_state
121 {
122 	TupleDesc	tupd;
123 	Page		page;
124 	uint16		offset;
125 } heap_page_items_state;
126 
127 Datum
heap_page_items(PG_FUNCTION_ARGS)128 heap_page_items(PG_FUNCTION_ARGS)
129 {
130 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
131 	heap_page_items_state *inter_call_data = NULL;
132 	FuncCallContext *fctx;
133 	int			raw_page_size;
134 
135 	if (!superuser())
136 		ereport(ERROR,
137 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
138 				 (errmsg("must be superuser to use raw page functions"))));
139 
140 	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
141 
142 	if (SRF_IS_FIRSTCALL())
143 	{
144 		TupleDesc	tupdesc;
145 		MemoryContext mctx;
146 
147 		if (raw_page_size < SizeOfPageHeaderData)
148 			ereport(ERROR,
149 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
150 					 errmsg("input page too small (%d bytes)", raw_page_size)));
151 
152 		fctx = SRF_FIRSTCALL_INIT();
153 		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
154 
155 		inter_call_data = palloc(sizeof(heap_page_items_state));
156 
157 		/* Build a tuple descriptor for our result type */
158 		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
159 			elog(ERROR, "return type must be a row type");
160 
161 		inter_call_data->tupd = tupdesc;
162 
163 		inter_call_data->offset = FirstOffsetNumber;
164 		inter_call_data->page = VARDATA(raw_page);
165 
166 		fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
167 		fctx->user_fctx = inter_call_data;
168 
169 		MemoryContextSwitchTo(mctx);
170 	}
171 
172 	fctx = SRF_PERCALL_SETUP();
173 	inter_call_data = fctx->user_fctx;
174 
175 	if (fctx->call_cntr < fctx->max_calls)
176 	{
177 		Page		page = inter_call_data->page;
178 		HeapTuple	resultTuple;
179 		Datum		result;
180 		ItemId		id;
181 		Datum		values[14];
182 		bool		nulls[14];
183 		uint16		lp_offset;
184 		uint16		lp_flags;
185 		uint16		lp_len;
186 
187 		memset(nulls, 0, sizeof(nulls));
188 
189 		/* Extract information from the line pointer */
190 
191 		id = PageGetItemId(page, inter_call_data->offset);
192 
193 		lp_offset = ItemIdGetOffset(id);
194 		lp_flags = ItemIdGetFlags(id);
195 		lp_len = ItemIdGetLength(id);
196 
197 		values[0] = UInt16GetDatum(inter_call_data->offset);
198 		values[1] = UInt16GetDatum(lp_offset);
199 		values[2] = UInt16GetDatum(lp_flags);
200 		values[3] = UInt16GetDatum(lp_len);
201 
202 		/*
203 		 * We do just enough validity checking to make sure we don't reference
204 		 * data outside the page passed to us. The page could be corrupt in
205 		 * many other ways, but at least we won't crash.
206 		 */
207 		if (ItemIdHasStorage(id) &&
208 			lp_len >= MinHeapTupleSize &&
209 			lp_offset == MAXALIGN(lp_offset) &&
210 			lp_offset + lp_len <= raw_page_size)
211 		{
212 			HeapTupleHeader tuphdr;
213 			bytea	   *tuple_data_bytea;
214 			int			tuple_data_len;
215 
216 			/* Extract information from the tuple header */
217 
218 			tuphdr = (HeapTupleHeader) PageGetItem(page, id);
219 
220 			values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));
221 			values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
222 			/* shared with xvac */
223 			values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr));
224 			values[7] = PointerGetDatum(&tuphdr->t_ctid);
225 			values[8] = UInt32GetDatum(tuphdr->t_infomask2);
226 			values[9] = UInt32GetDatum(tuphdr->t_infomask);
227 			values[10] = UInt8GetDatum(tuphdr->t_hoff);
228 
229 			/* Copy raw tuple data into bytea attribute */
230 			tuple_data_len = lp_len - tuphdr->t_hoff;
231 			tuple_data_bytea = (bytea *) palloc(tuple_data_len + VARHDRSZ);
232 			SET_VARSIZE(tuple_data_bytea, tuple_data_len + VARHDRSZ);
233 			memcpy(VARDATA(tuple_data_bytea), (char *) tuphdr + tuphdr->t_hoff,
234 				   tuple_data_len);
235 			values[13] = PointerGetDatum(tuple_data_bytea);
236 
237 			/*
238 			 * We already checked that the item is completely within the raw
239 			 * page passed to us, with the length given in the line pointer.
240 			 * Let's check that t_hoff doesn't point over lp_len, before using
241 			 * it to access t_bits and oid.
242 			 */
243 			if (tuphdr->t_hoff >= SizeofHeapTupleHeader &&
244 				tuphdr->t_hoff <= lp_len &&
245 				tuphdr->t_hoff == MAXALIGN(tuphdr->t_hoff))
246 			{
247 				if (tuphdr->t_infomask & HEAP_HASNULL)
248 				{
249 					int			bits_len;
250 
251 					bits_len =
252 						BITMAPLEN(HeapTupleHeaderGetNatts(tuphdr)) * BITS_PER_BYTE;
253 					values[11] = CStringGetTextDatum(
254 													 bits_to_text(tuphdr->t_bits, bits_len));
255 				}
256 				else
257 					nulls[11] = true;
258 
259 				if (tuphdr->t_infomask & HEAP_HASOID_OLD)
260 					values[12] = HeapTupleHeaderGetOidOld(tuphdr);
261 				else
262 					nulls[12] = true;
263 			}
264 			else
265 			{
266 				nulls[11] = true;
267 				nulls[12] = true;
268 			}
269 		}
270 		else
271 		{
272 			/*
273 			 * The line pointer is not used, or it's invalid. Set the rest of
274 			 * the fields to NULL
275 			 */
276 			int			i;
277 
278 			for (i = 4; i <= 13; i++)
279 				nulls[i] = true;
280 		}
281 
282 		/* Build and return the result tuple. */
283 		resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
284 		result = HeapTupleGetDatum(resultTuple);
285 
286 		inter_call_data->offset++;
287 
288 		SRF_RETURN_NEXT(fctx, result);
289 	}
290 	else
291 		SRF_RETURN_DONE(fctx);
292 }
293 
294 /*
295  * tuple_data_split_internal
296  *
297  * Split raw tuple data taken directly from a page into an array of bytea
298  * elements. This routine does a lookup on NULL values and creates array
299  * elements accordingly. This is a reimplementation of nocachegetattr()
300  * in heaptuple.c simplified for educational purposes.
301  */
302 static Datum
tuple_data_split_internal(Oid relid,char * tupdata,uint16 tupdata_len,uint16 t_infomask,uint16 t_infomask2,bits8 * t_bits,bool do_detoast)303 tuple_data_split_internal(Oid relid, char *tupdata,
304 						  uint16 tupdata_len, uint16 t_infomask,
305 						  uint16 t_infomask2, bits8 *t_bits,
306 						  bool do_detoast)
307 {
308 	ArrayBuildState *raw_attrs;
309 	int			nattrs;
310 	int			i;
311 	int			off = 0;
312 	Relation	rel;
313 	TupleDesc	tupdesc;
314 
315 	/* Get tuple descriptor from relation OID */
316 	rel = relation_open(relid, AccessShareLock);
317 	tupdesc = RelationGetDescr(rel);
318 
319 	raw_attrs = initArrayResult(BYTEAOID, CurrentMemoryContext, false);
320 	nattrs = tupdesc->natts;
321 
322 	if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
323 		ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
324 						errmsg("only heap AM is supported")));
325 
326 	if (nattrs < (t_infomask2 & HEAP_NATTS_MASK))
327 		ereport(ERROR,
328 				(errcode(ERRCODE_DATA_CORRUPTED),
329 				 errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor")));
330 
331 	for (i = 0; i < nattrs; i++)
332 	{
333 		Form_pg_attribute attr;
334 		bool		is_null;
335 		bytea	   *attr_data = NULL;
336 
337 		attr = TupleDescAttr(tupdesc, i);
338 
339 		/*
340 		 * Tuple header can specify less attributes than tuple descriptor as
341 		 * ALTER TABLE ADD COLUMN without DEFAULT keyword does not actually
342 		 * change tuples in pages, so attributes with numbers greater than
343 		 * (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL.
344 		 */
345 		if (i >= (t_infomask2 & HEAP_NATTS_MASK))
346 			is_null = true;
347 		else
348 			is_null = (t_infomask & HEAP_HASNULL) && att_isnull(i, t_bits);
349 
350 		if (!is_null)
351 		{
352 			int			len;
353 
354 			if (attr->attlen == -1)
355 			{
356 				off = att_align_pointer(off, attr->attalign, -1,
357 										tupdata + off);
358 
359 				/*
360 				 * As VARSIZE_ANY throws an exception if it can't properly
361 				 * detect the type of external storage in macros VARTAG_SIZE,
362 				 * this check is repeated to have a nicer error handling.
363 				 */
364 				if (VARATT_IS_EXTERNAL(tupdata + off) &&
365 					!VARATT_IS_EXTERNAL_ONDISK(tupdata + off) &&
366 					!VARATT_IS_EXTERNAL_INDIRECT(tupdata + off))
367 					ereport(ERROR,
368 							(errcode(ERRCODE_DATA_CORRUPTED),
369 							 errmsg("first byte of varlena attribute is incorrect for attribute %d", i)));
370 
371 				len = VARSIZE_ANY(tupdata + off);
372 			}
373 			else
374 			{
375 				off = att_align_nominal(off, attr->attalign);
376 				len = attr->attlen;
377 			}
378 
379 			if (tupdata_len < off + len)
380 				ereport(ERROR,
381 						(errcode(ERRCODE_DATA_CORRUPTED),
382 						 errmsg("unexpected end of tuple data")));
383 
384 			if (attr->attlen == -1 && do_detoast)
385 				attr_data = DatumGetByteaPCopy(tupdata + off);
386 			else
387 			{
388 				attr_data = (bytea *) palloc(len + VARHDRSZ);
389 				SET_VARSIZE(attr_data, len + VARHDRSZ);
390 				memcpy(VARDATA(attr_data), tupdata + off, len);
391 			}
392 
393 			off = att_addlength_pointer(off, attr->attlen,
394 										tupdata + off);
395 		}
396 
397 		raw_attrs = accumArrayResult(raw_attrs, PointerGetDatum(attr_data),
398 									 is_null, BYTEAOID, CurrentMemoryContext);
399 		if (attr_data)
400 			pfree(attr_data);
401 	}
402 
403 	if (tupdata_len != off)
404 		ereport(ERROR,
405 				(errcode(ERRCODE_DATA_CORRUPTED),
406 				 errmsg("end of tuple reached without looking at all its data")));
407 
408 	relation_close(rel, AccessShareLock);
409 
410 	return makeArrayResult(raw_attrs, CurrentMemoryContext);
411 }
412 
413 /*
414  * tuple_data_split
415  *
416  * Split raw tuple data taken directly from page into distinct elements
417  * taking into account null values.
418  */
419 PG_FUNCTION_INFO_V1(tuple_data_split);
420 
421 Datum
tuple_data_split(PG_FUNCTION_ARGS)422 tuple_data_split(PG_FUNCTION_ARGS)
423 {
424 	Oid			relid;
425 	bytea	   *raw_data;
426 	uint16		t_infomask;
427 	uint16		t_infomask2;
428 	char	   *t_bits_str;
429 	bool		do_detoast = false;
430 	bits8	   *t_bits = NULL;
431 	Datum		res;
432 
433 	relid = PG_GETARG_OID(0);
434 	raw_data = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_P(1);
435 	t_infomask = PG_GETARG_INT16(2);
436 	t_infomask2 = PG_GETARG_INT16(3);
437 	t_bits_str = PG_ARGISNULL(4) ? NULL :
438 		text_to_cstring(PG_GETARG_TEXT_PP(4));
439 
440 	if (PG_NARGS() >= 6)
441 		do_detoast = PG_GETARG_BOOL(5);
442 
443 	if (!superuser())
444 		ereport(ERROR,
445 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
446 				 errmsg("must be superuser to use raw page functions")));
447 
448 	if (!raw_data)
449 		PG_RETURN_NULL();
450 
451 	/*
452 	 * Convert t_bits string back to the bits8 array as represented in the
453 	 * tuple header.
454 	 */
455 	if (t_infomask & HEAP_HASNULL)
456 	{
457 		int			bits_str_len;
458 		int			bits_len;
459 
460 		bits_len = BITMAPLEN(t_infomask2 & HEAP_NATTS_MASK) * BITS_PER_BYTE;
461 		if (!t_bits_str)
462 			ereport(ERROR,
463 					(errcode(ERRCODE_DATA_CORRUPTED),
464 					 errmsg("argument of t_bits is null, but it is expected to be null and %d character long",
465 							bits_len)));
466 
467 		bits_str_len = strlen(t_bits_str);
468 		if (bits_len != bits_str_len)
469 			ereport(ERROR,
470 					(errcode(ERRCODE_DATA_CORRUPTED),
471 					 errmsg("unexpected length of t_bits %u, expected %d",
472 							bits_str_len, bits_len)));
473 
474 		/* do the conversion */
475 		t_bits = text_to_bits(t_bits_str, bits_str_len);
476 	}
477 	else
478 	{
479 		if (t_bits_str)
480 			ereport(ERROR,
481 					(errcode(ERRCODE_DATA_CORRUPTED),
482 					 errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes length",
483 							strlen(t_bits_str))));
484 	}
485 
486 	/* Split tuple data */
487 	res = tuple_data_split_internal(relid, (char *) raw_data + VARHDRSZ,
488 									VARSIZE(raw_data) - VARHDRSZ,
489 									t_infomask, t_infomask2, t_bits,
490 									do_detoast);
491 
492 	if (t_bits)
493 		pfree(t_bits);
494 
495 	PG_RETURN_ARRAYTYPE_P(res);
496 }
497