1 /*-------------------------------------------------------------------------
2  *
3  * heapfuncs.c
4  *	  Functions to investigate heap pages
5  *
6  * We check the input to these functions for corrupt pointers etc. that
7  * might cause crashes, but at the same time we try to print out as much
8  * information as possible, even if it's nonsense. That's because if a
9  * page is corrupt, we don't know why and how exactly it is corrupt, so we
10  * let the user judge it.
11  *
12  * These functions are restricted to superusers for the fear of introducing
13  * security holes if the input checking isn't as water-tight as it should be.
14  * You'd need to be superuser to obtain a raw page image anyway, so
15  * there's hardly any use case for using these without superuser-rights
16  * anyway.
17  *
18  * Copyright (c) 2007-2017, PostgreSQL Global Development Group
19  *
20  * IDENTIFICATION
21  *	  contrib/pageinspect/heapfuncs.c
22  *
23  *-------------------------------------------------------------------------
24  */
25 
26 #include "postgres.h"
27 
28 #include "pageinspect.h"
29 
30 #include "access/htup_details.h"
31 #include "funcapi.h"
32 #include "catalog/pg_type.h"
33 #include "miscadmin.h"
34 #include "utils/array.h"
35 #include "utils/builtins.h"
36 #include "utils/rel.h"
37 
38 
39 /*
40  * bits_to_text
41  *
42  * Converts a bits8-array of 'len' bits to a human-readable
43  * c-string representation.
44  */
45 static char *
bits_to_text(bits8 * bits,int len)46 bits_to_text(bits8 *bits, int len)
47 {
48 	int			i;
49 	char	   *str;
50 
51 	str = palloc(len + 1);
52 
53 	for (i = 0; i < len; i++)
54 		str[i] = (bits[(i / 8)] & (1 << (i % 8))) ? '1' : '0';
55 
56 	str[i] = '\0';
57 
58 	return str;
59 }
60 
61 
62 /*
63  * text_to_bits
64  *
65  * Converts a c-string representation of bits into a bits8-array. This is
66  * the reverse operation of previous routine.
67  */
68 static bits8 *
text_to_bits(char * str,int len)69 text_to_bits(char *str, int len)
70 {
71 	bits8	   *bits;
72 	int			off = 0;
73 	char		byte = 0;
74 
75 	bits = palloc(len + 1);
76 
77 	while (off < len)
78 	{
79 		if (off % 8 == 0)
80 			byte = 0;
81 
82 		if ((str[off] == '0') || (str[off] == '1'))
83 			byte = byte | ((str[off] - '0') << off % 8);
84 		else
85 			ereport(ERROR,
86 					(errcode(ERRCODE_DATA_CORRUPTED),
87 					 errmsg("illegal character '%c' in t_bits string", str[off])));
88 
89 		if (off % 8 == 7)
90 			bits[off / 8] = byte;
91 
92 		off++;
93 	}
94 
95 	return bits;
96 }
97 
98 /*
99  * heap_page_items
100  *
101  * Allows inspection of line pointers and tuple headers of a heap page.
102  */
103 PG_FUNCTION_INFO_V1(heap_page_items);
104 
105 typedef struct heap_page_items_state
106 {
107 	TupleDesc	tupd;
108 	Page		page;
109 	uint16		offset;
110 } heap_page_items_state;
111 
112 Datum
heap_page_items(PG_FUNCTION_ARGS)113 heap_page_items(PG_FUNCTION_ARGS)
114 {
115 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
116 	heap_page_items_state *inter_call_data = NULL;
117 	FuncCallContext *fctx;
118 	int			raw_page_size;
119 
120 	if (!superuser())
121 		ereport(ERROR,
122 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
123 				 (errmsg("must be superuser to use raw page functions"))));
124 
125 	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
126 
127 	if (SRF_IS_FIRSTCALL())
128 	{
129 		TupleDesc	tupdesc;
130 		MemoryContext mctx;
131 
132 		if (raw_page_size < SizeOfPageHeaderData)
133 			ereport(ERROR,
134 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
135 					 errmsg("input page too small (%d bytes)", raw_page_size)));
136 
137 		fctx = SRF_FIRSTCALL_INIT();
138 		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
139 
140 		inter_call_data = palloc(sizeof(heap_page_items_state));
141 
142 		/* Build a tuple descriptor for our result type */
143 		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
144 			elog(ERROR, "return type must be a row type");
145 
146 		inter_call_data->tupd = tupdesc;
147 
148 		inter_call_data->offset = FirstOffsetNumber;
149 		inter_call_data->page = VARDATA(raw_page);
150 
151 		fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
152 		fctx->user_fctx = inter_call_data;
153 
154 		MemoryContextSwitchTo(mctx);
155 	}
156 
157 	fctx = SRF_PERCALL_SETUP();
158 	inter_call_data = fctx->user_fctx;
159 
160 	if (fctx->call_cntr < fctx->max_calls)
161 	{
162 		Page		page = inter_call_data->page;
163 		HeapTuple	resultTuple;
164 		Datum		result;
165 		ItemId		id;
166 		Datum		values[14];
167 		bool		nulls[14];
168 		uint16		lp_offset;
169 		uint16		lp_flags;
170 		uint16		lp_len;
171 
172 		memset(nulls, 0, sizeof(nulls));
173 
174 		/* Extract information from the line pointer */
175 
176 		id = PageGetItemId(page, inter_call_data->offset);
177 
178 		lp_offset = ItemIdGetOffset(id);
179 		lp_flags = ItemIdGetFlags(id);
180 		lp_len = ItemIdGetLength(id);
181 
182 		values[0] = UInt16GetDatum(inter_call_data->offset);
183 		values[1] = UInt16GetDatum(lp_offset);
184 		values[2] = UInt16GetDatum(lp_flags);
185 		values[3] = UInt16GetDatum(lp_len);
186 
187 		/*
188 		 * We do just enough validity checking to make sure we don't reference
189 		 * data outside the page passed to us. The page could be corrupt in
190 		 * many other ways, but at least we won't crash.
191 		 */
192 		if (ItemIdHasStorage(id) &&
193 			lp_len >= MinHeapTupleSize &&
194 			lp_offset == MAXALIGN(lp_offset) &&
195 			lp_offset + lp_len <= raw_page_size)
196 		{
197 			HeapTupleHeader tuphdr;
198 			bytea	   *tuple_data_bytea;
199 			int			tuple_data_len;
200 
201 			/* Extract information from the tuple header */
202 
203 			tuphdr = (HeapTupleHeader) PageGetItem(page, id);
204 
205 			values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));
206 			values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
207 			/* shared with xvac */
208 			values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr));
209 			values[7] = PointerGetDatum(&tuphdr->t_ctid);
210 			values[8] = UInt32GetDatum(tuphdr->t_infomask2);
211 			values[9] = UInt32GetDatum(tuphdr->t_infomask);
212 			values[10] = UInt8GetDatum(tuphdr->t_hoff);
213 
214 			/* Copy raw tuple data into bytea attribute */
215 			tuple_data_len = lp_len - tuphdr->t_hoff;
216 			tuple_data_bytea = (bytea *) palloc(tuple_data_len + VARHDRSZ);
217 			SET_VARSIZE(tuple_data_bytea, tuple_data_len + VARHDRSZ);
218 			memcpy(VARDATA(tuple_data_bytea), (char *) tuphdr + tuphdr->t_hoff,
219 				   tuple_data_len);
220 			values[13] = PointerGetDatum(tuple_data_bytea);
221 
222 			/*
223 			 * We already checked that the item is completely within the raw
224 			 * page passed to us, with the length given in the line pointer.
225 			 * Let's check that t_hoff doesn't point over lp_len, before using
226 			 * it to access t_bits and oid.
227 			 */
228 			if (tuphdr->t_hoff >= SizeofHeapTupleHeader &&
229 				tuphdr->t_hoff <= lp_len &&
230 				tuphdr->t_hoff == MAXALIGN(tuphdr->t_hoff))
231 			{
232 				if (tuphdr->t_infomask & HEAP_HASNULL)
233 				{
234 					int			bits_len;
235 
236 					bits_len =
237 						BITMAPLEN(HeapTupleHeaderGetNatts(tuphdr)) * BITS_PER_BYTE;
238 					values[11] = CStringGetTextDatum(
239 													 bits_to_text(tuphdr->t_bits, bits_len));
240 				}
241 				else
242 					nulls[11] = true;
243 
244 				if (tuphdr->t_infomask & HEAP_HASOID)
245 					values[12] = HeapTupleHeaderGetOid(tuphdr);
246 				else
247 					nulls[12] = true;
248 			}
249 			else
250 			{
251 				nulls[11] = true;
252 				nulls[12] = true;
253 			}
254 		}
255 		else
256 		{
257 			/*
258 			 * The line pointer is not used, or it's invalid. Set the rest of
259 			 * the fields to NULL
260 			 */
261 			int			i;
262 
263 			for (i = 4; i <= 13; i++)
264 				nulls[i] = true;
265 		}
266 
267 		/* Build and return the result tuple. */
268 		resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
269 		result = HeapTupleGetDatum(resultTuple);
270 
271 		inter_call_data->offset++;
272 
273 		SRF_RETURN_NEXT(fctx, result);
274 	}
275 	else
276 		SRF_RETURN_DONE(fctx);
277 }
278 
279 /*
280  * tuple_data_split_internal
281  *
282  * Split raw tuple data taken directly from a page into an array of bytea
283  * elements. This routine does a lookup on NULL values and creates array
284  * elements accordingly. This is a reimplementation of nocachegetattr()
285  * in heaptuple.c simplified for educational purposes.
286  */
287 static Datum
tuple_data_split_internal(Oid relid,char * tupdata,uint16 tupdata_len,uint16 t_infomask,uint16 t_infomask2,bits8 * t_bits,bool do_detoast)288 tuple_data_split_internal(Oid relid, char *tupdata,
289 						  uint16 tupdata_len, uint16 t_infomask,
290 						  uint16 t_infomask2, bits8 *t_bits,
291 						  bool do_detoast)
292 {
293 	ArrayBuildState *raw_attrs;
294 	int			nattrs;
295 	int			i;
296 	int			off = 0;
297 	Relation	rel;
298 	TupleDesc	tupdesc;
299 
300 	/* Get tuple descriptor from relation OID */
301 	rel = relation_open(relid, AccessShareLock);
302 	tupdesc = RelationGetDescr(rel);
303 
304 	raw_attrs = initArrayResult(BYTEAOID, CurrentMemoryContext, false);
305 	nattrs = tupdesc->natts;
306 
307 	if (nattrs < (t_infomask2 & HEAP_NATTS_MASK))
308 		ereport(ERROR,
309 				(errcode(ERRCODE_DATA_CORRUPTED),
310 				 errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor")));
311 
312 	for (i = 0; i < nattrs; i++)
313 	{
314 		Form_pg_attribute attr;
315 		bool		is_null;
316 		bytea	   *attr_data = NULL;
317 
318 		attr = tupdesc->attrs[i];
319 
320 		/*
321 		 * Tuple header can specify less attributes than tuple descriptor as
322 		 * ALTER TABLE ADD COLUMN without DEFAULT keyword does not actually
323 		 * change tuples in pages, so attributes with numbers greater than
324 		 * (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL.
325 		 */
326 		if (i >= (t_infomask2 & HEAP_NATTS_MASK))
327 			is_null = true;
328 		else
329 			is_null = (t_infomask & HEAP_HASNULL) && att_isnull(i, t_bits);
330 
331 		if (!is_null)
332 		{
333 			int			len;
334 
335 			if (attr->attlen == -1)
336 			{
337 				off = att_align_pointer(off, tupdesc->attrs[i]->attalign, -1,
338 										tupdata + off);
339 
340 				/*
341 				 * As VARSIZE_ANY throws an exception if it can't properly
342 				 * detect the type of external storage in macros VARTAG_SIZE,
343 				 * this check is repeated to have a nicer error handling.
344 				 */
345 				if (VARATT_IS_EXTERNAL(tupdata + off) &&
346 					!VARATT_IS_EXTERNAL_ONDISK(tupdata + off) &&
347 					!VARATT_IS_EXTERNAL_INDIRECT(tupdata + off))
348 					ereport(ERROR,
349 							(errcode(ERRCODE_DATA_CORRUPTED),
350 							 errmsg("first byte of varlena attribute is incorrect for attribute %d", i)));
351 
352 				len = VARSIZE_ANY(tupdata + off);
353 			}
354 			else
355 			{
356 				off = att_align_nominal(off, tupdesc->attrs[i]->attalign);
357 				len = attr->attlen;
358 			}
359 
360 			if (tupdata_len < off + len)
361 				ereport(ERROR,
362 						(errcode(ERRCODE_DATA_CORRUPTED),
363 						 errmsg("unexpected end of tuple data")));
364 
365 			if (attr->attlen == -1 && do_detoast)
366 				attr_data = DatumGetByteaPCopy(tupdata + off);
367 			else
368 			{
369 				attr_data = (bytea *) palloc(len + VARHDRSZ);
370 				SET_VARSIZE(attr_data, len + VARHDRSZ);
371 				memcpy(VARDATA(attr_data), tupdata + off, len);
372 			}
373 
374 			off = att_addlength_pointer(off, tupdesc->attrs[i]->attlen,
375 										tupdata + off);
376 		}
377 
378 		raw_attrs = accumArrayResult(raw_attrs, PointerGetDatum(attr_data),
379 									 is_null, BYTEAOID, CurrentMemoryContext);
380 		if (attr_data)
381 			pfree(attr_data);
382 	}
383 
384 	if (tupdata_len != off)
385 		ereport(ERROR,
386 				(errcode(ERRCODE_DATA_CORRUPTED),
387 				 errmsg("end of tuple reached without looking at all its data")));
388 
389 	relation_close(rel, AccessShareLock);
390 
391 	return makeArrayResult(raw_attrs, CurrentMemoryContext);
392 }
393 
394 /*
395  * tuple_data_split
396  *
397  * Split raw tuple data taken directly from page into distinct elements
398  * taking into account null values.
399  */
400 PG_FUNCTION_INFO_V1(tuple_data_split);
401 
402 Datum
tuple_data_split(PG_FUNCTION_ARGS)403 tuple_data_split(PG_FUNCTION_ARGS)
404 {
405 	Oid			relid;
406 	bytea	   *raw_data;
407 	uint16		t_infomask;
408 	uint16		t_infomask2;
409 	char	   *t_bits_str;
410 	bool		do_detoast = false;
411 	bits8	   *t_bits = NULL;
412 	Datum		res;
413 
414 	relid = PG_GETARG_OID(0);
415 	raw_data = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_P(1);
416 	t_infomask = PG_GETARG_INT16(2);
417 	t_infomask2 = PG_GETARG_INT16(3);
418 	t_bits_str = PG_ARGISNULL(4) ? NULL :
419 		text_to_cstring(PG_GETARG_TEXT_PP(4));
420 
421 	if (PG_NARGS() >= 6)
422 		do_detoast = PG_GETARG_BOOL(5);
423 
424 	if (!superuser())
425 		ereport(ERROR,
426 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
427 				 errmsg("must be superuser to use raw page functions")));
428 
429 	if (!raw_data)
430 		PG_RETURN_NULL();
431 
432 	/*
433 	 * Convert t_bits string back to the bits8 array as represented in the
434 	 * tuple header.
435 	 */
436 	if (t_infomask & HEAP_HASNULL)
437 	{
438 		int			bits_str_len;
439 		int			bits_len;
440 
441 		bits_len = BITMAPLEN(t_infomask2 & HEAP_NATTS_MASK) * BITS_PER_BYTE;
442 		if (!t_bits_str)
443 			ereport(ERROR,
444 					(errcode(ERRCODE_DATA_CORRUPTED),
445 					 errmsg("argument of t_bits is null, but it is expected to be null and %d character long",
446 							bits_len)));
447 
448 		bits_str_len = strlen(t_bits_str);
449 		if (bits_len != bits_str_len)
450 			ereport(ERROR,
451 					(errcode(ERRCODE_DATA_CORRUPTED),
452 					 errmsg("unexpected length of t_bits %u, expected %d",
453 							bits_str_len, bits_len)));
454 
455 		/* do the conversion */
456 		t_bits = text_to_bits(t_bits_str, bits_str_len);
457 	}
458 	else
459 	{
460 		if (t_bits_str)
461 			ereport(ERROR,
462 					(errcode(ERRCODE_DATA_CORRUPTED),
463 					 errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes length",
464 							strlen(t_bits_str))));
465 	}
466 
467 	/* Split tuple data */
468 	res = tuple_data_split_internal(relid, (char *) raw_data + VARHDRSZ,
469 									VARSIZE(raw_data) - VARHDRSZ,
470 									t_infomask, t_infomask2, t_bits,
471 									do_detoast);
472 
473 	if (t_bits)
474 		pfree(t_bits);
475 
476 	PG_RETURN_ARRAYTYPE_P(res);
477 }
478