1 /*-------------------------------------------------------------------------
2  *
3  * heapfuncs.c
4  *	  Functions to investigate heap pages
5  *
6  * We check the input to these functions for corrupt pointers etc. that
7  * might cause crashes, but at the same time we try to print out as much
8  * information as possible, even if it's nonsense. That's because if a
9  * page is corrupt, we don't know why and how exactly it is corrupt, so we
10  * let the user judge it.
11  *
12  * These functions are restricted to superusers for the fear of introducing
13  * security holes if the input checking isn't as water-tight as it should be.
14  * You'd need to be superuser to obtain a raw page image anyway, so
15  * there's hardly any use case for using these without superuser-rights
16  * anyway.
17  *
18  * Copyright (c) 2007-2016, PostgreSQL Global Development Group
19  *
20  * IDENTIFICATION
21  *	  contrib/pageinspect/heapfuncs.c
22  *
23  *-------------------------------------------------------------------------
24  */
25 
26 #include "postgres.h"
27 
28 #include "access/htup_details.h"
29 #include "funcapi.h"
30 #include "catalog/pg_type.h"
31 #include "miscadmin.h"
32 #include "utils/array.h"
33 #include "utils/builtins.h"
34 #include "utils/rel.h"
35 
36 
37 /*
38  * bits_to_text
39  *
40  * Converts a bits8-array of 'len' bits to a human-readable
41  * c-string representation.
42  */
43 static char *
bits_to_text(bits8 * bits,int len)44 bits_to_text(bits8 *bits, int len)
45 {
46 	int			i;
47 	char	   *str;
48 
49 	str = palloc(len + 1);
50 
51 	for (i = 0; i < len; i++)
52 		str[i] = (bits[(i / 8)] & (1 << (i % 8))) ? '1' : '0';
53 
54 	str[i] = '\0';
55 
56 	return str;
57 }
58 
59 
60 /*
61  * text_to_bits
62  *
63  * Converts a c-string representation of bits into a bits8-array. This is
64  * the reverse operation of previous routine.
65  */
66 static bits8 *
text_to_bits(char * str,int len)67 text_to_bits(char *str, int len)
68 {
69 	bits8	   *bits;
70 	int			off = 0;
71 	char		byte = 0;
72 
73 	bits = palloc(len + 1);
74 
75 	while (off < len)
76 	{
77 		if (off % 8 == 0)
78 			byte = 0;
79 
80 		if ((str[off] == '0') || (str[off] == '1'))
81 			byte = byte | ((str[off] - '0') << off % 8);
82 		else
83 			ereport(ERROR,
84 					(errcode(ERRCODE_DATA_CORRUPTED),
85 			   errmsg("illegal character '%c' in t_bits string", str[off])));
86 
87 		if (off % 8 == 7)
88 			bits[off / 8] = byte;
89 
90 		off++;
91 	}
92 
93 	return bits;
94 }
95 
96 /*
97  * heap_page_items
98  *
99  * Allows inspection of line pointers and tuple headers of a heap page.
100  */
101 PG_FUNCTION_INFO_V1(heap_page_items);
102 
103 typedef struct heap_page_items_state
104 {
105 	TupleDesc	tupd;
106 	Page		page;
107 	uint16		offset;
108 } heap_page_items_state;
109 
110 Datum
heap_page_items(PG_FUNCTION_ARGS)111 heap_page_items(PG_FUNCTION_ARGS)
112 {
113 	bytea	   *raw_page = PG_GETARG_BYTEA_P(0);
114 	heap_page_items_state *inter_call_data = NULL;
115 	FuncCallContext *fctx;
116 	int			raw_page_size;
117 
118 	if (!superuser())
119 		ereport(ERROR,
120 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
121 				 (errmsg("must be superuser to use raw page functions"))));
122 
123 	raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
124 
125 	if (SRF_IS_FIRSTCALL())
126 	{
127 		TupleDesc	tupdesc;
128 		MemoryContext mctx;
129 
130 		if (raw_page_size < SizeOfPageHeaderData)
131 			ereport(ERROR,
132 					(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
133 				  errmsg("input page too small (%d bytes)", raw_page_size)));
134 
135 		fctx = SRF_FIRSTCALL_INIT();
136 		mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
137 
138 		inter_call_data = palloc(sizeof(heap_page_items_state));
139 
140 		/* Build a tuple descriptor for our result type */
141 		if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
142 			elog(ERROR, "return type must be a row type");
143 
144 		inter_call_data->tupd = tupdesc;
145 
146 		inter_call_data->offset = FirstOffsetNumber;
147 		inter_call_data->page = VARDATA(raw_page);
148 
149 		fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
150 		fctx->user_fctx = inter_call_data;
151 
152 		MemoryContextSwitchTo(mctx);
153 	}
154 
155 	fctx = SRF_PERCALL_SETUP();
156 	inter_call_data = fctx->user_fctx;
157 
158 	if (fctx->call_cntr < fctx->max_calls)
159 	{
160 		Page		page = inter_call_data->page;
161 		HeapTuple	resultTuple;
162 		Datum		result;
163 		ItemId		id;
164 		Datum		values[14];
165 		bool		nulls[14];
166 		uint16		lp_offset;
167 		uint16		lp_flags;
168 		uint16		lp_len;
169 
170 		memset(nulls, 0, sizeof(nulls));
171 
172 		/* Extract information from the line pointer */
173 
174 		id = PageGetItemId(page, inter_call_data->offset);
175 
176 		lp_offset = ItemIdGetOffset(id);
177 		lp_flags = ItemIdGetFlags(id);
178 		lp_len = ItemIdGetLength(id);
179 
180 		values[0] = UInt16GetDatum(inter_call_data->offset);
181 		values[1] = UInt16GetDatum(lp_offset);
182 		values[2] = UInt16GetDatum(lp_flags);
183 		values[3] = UInt16GetDatum(lp_len);
184 
185 		/*
186 		 * We do just enough validity checking to make sure we don't reference
187 		 * data outside the page passed to us. The page could be corrupt in
188 		 * many other ways, but at least we won't crash.
189 		 */
190 		if (ItemIdHasStorage(id) &&
191 			lp_len >= MinHeapTupleSize &&
192 			lp_offset == MAXALIGN(lp_offset) &&
193 			lp_offset + lp_len <= raw_page_size)
194 		{
195 			HeapTupleHeader tuphdr;
196 			bytea	   *tuple_data_bytea;
197 			int			tuple_data_len;
198 
199 			/* Extract information from the tuple header */
200 
201 			tuphdr = (HeapTupleHeader) PageGetItem(page, id);
202 
203 			values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));
204 			values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
205 			/* shared with xvac */
206 			values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr));
207 			values[7] = PointerGetDatum(&tuphdr->t_ctid);
208 			values[8] = UInt32GetDatum(tuphdr->t_infomask2);
209 			values[9] = UInt32GetDatum(tuphdr->t_infomask);
210 			values[10] = UInt8GetDatum(tuphdr->t_hoff);
211 
212 			/* Copy raw tuple data into bytea attribute */
213 			tuple_data_len = lp_len - tuphdr->t_hoff;
214 			tuple_data_bytea = (bytea *) palloc(tuple_data_len + VARHDRSZ);
215 			SET_VARSIZE(tuple_data_bytea, tuple_data_len + VARHDRSZ);
216 			memcpy(VARDATA(tuple_data_bytea), (char *) tuphdr + tuphdr->t_hoff,
217 				   tuple_data_len);
218 			values[13] = PointerGetDatum(tuple_data_bytea);
219 
220 			/*
221 			 * We already checked that the item is completely within the raw
222 			 * page passed to us, with the length given in the line pointer.
223 			 * Let's check that t_hoff doesn't point over lp_len, before using
224 			 * it to access t_bits and oid.
225 			 */
226 			if (tuphdr->t_hoff >= SizeofHeapTupleHeader &&
227 				tuphdr->t_hoff <= lp_len &&
228 				tuphdr->t_hoff == MAXALIGN(tuphdr->t_hoff))
229 			{
230 				if (tuphdr->t_infomask & HEAP_HASNULL)
231 				{
232 					int			bits_len;
233 
234 					bits_len =
235 						BITMAPLEN(HeapTupleHeaderGetNatts(tuphdr)) * BITS_PER_BYTE;
236 					values[11] = CStringGetTextDatum(
237 									 bits_to_text(tuphdr->t_bits, bits_len));
238 				}
239 				else
240 					nulls[11] = true;
241 
242 				if (tuphdr->t_infomask & HEAP_HASOID)
243 					values[12] = HeapTupleHeaderGetOid(tuphdr);
244 				else
245 					nulls[12] = true;
246 			}
247 			else
248 			{
249 				nulls[11] = true;
250 				nulls[12] = true;
251 			}
252 		}
253 		else
254 		{
255 			/*
256 			 * The line pointer is not used, or it's invalid. Set the rest of
257 			 * the fields to NULL
258 			 */
259 			int			i;
260 
261 			for (i = 4; i <= 13; i++)
262 				nulls[i] = true;
263 		}
264 
265 		/* Build and return the result tuple. */
266 		resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
267 		result = HeapTupleGetDatum(resultTuple);
268 
269 		inter_call_data->offset++;
270 
271 		SRF_RETURN_NEXT(fctx, result);
272 	}
273 	else
274 		SRF_RETURN_DONE(fctx);
275 }
276 
277 /*
278  * tuple_data_split_internal
279  *
280  * Split raw tuple data taken directly from a page into an array of bytea
281  * elements. This routine does a lookup on NULL values and creates array
282  * elements accordingly. This is a reimplementation of nocachegetattr()
283  * in heaptuple.c simplified for educational purposes.
284  */
285 static Datum
tuple_data_split_internal(Oid relid,char * tupdata,uint16 tupdata_len,uint16 t_infomask,uint16 t_infomask2,bits8 * t_bits,bool do_detoast)286 tuple_data_split_internal(Oid relid, char *tupdata,
287 						  uint16 tupdata_len, uint16 t_infomask,
288 						  uint16 t_infomask2, bits8 *t_bits,
289 						  bool do_detoast)
290 {
291 	ArrayBuildState *raw_attrs;
292 	int			nattrs;
293 	int			i;
294 	int			off = 0;
295 	Relation	rel;
296 	TupleDesc	tupdesc;
297 
298 	/* Get tuple descriptor from relation OID */
299 	rel = relation_open(relid, AccessShareLock);
300 	tupdesc = RelationGetDescr(rel);
301 
302 	raw_attrs = initArrayResult(BYTEAOID, CurrentMemoryContext, false);
303 	nattrs = tupdesc->natts;
304 
305 	if (nattrs < (t_infomask2 & HEAP_NATTS_MASK))
306 		ereport(ERROR,
307 				(errcode(ERRCODE_DATA_CORRUPTED),
308 				 errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor")));
309 
310 	for (i = 0; i < nattrs; i++)
311 	{
312 		Form_pg_attribute attr;
313 		bool		is_null;
314 		bytea	   *attr_data = NULL;
315 
316 		attr = tupdesc->attrs[i];
317 
318 		/*
319 		 * Tuple header can specify less attributes than tuple descriptor as
320 		 * ALTER TABLE ADD COLUMN without DEFAULT keyword does not actually
321 		 * change tuples in pages, so attributes with numbers greater than
322 		 * (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL.
323 		 */
324 		if (i >= (t_infomask2 & HEAP_NATTS_MASK))
325 			is_null = true;
326 		else
327 			is_null = (t_infomask & HEAP_HASNULL) && att_isnull(i, t_bits);
328 
329 		if (!is_null)
330 		{
331 			int			len;
332 
333 			if (attr->attlen == -1)
334 			{
335 				off = att_align_pointer(off, tupdesc->attrs[i]->attalign, -1,
336 										tupdata + off);
337 
338 				/*
339 				 * As VARSIZE_ANY throws an exception if it can't properly
340 				 * detect the type of external storage in macros VARTAG_SIZE,
341 				 * this check is repeated to have a nicer error handling.
342 				 */
343 				if (VARATT_IS_EXTERNAL(tupdata + off) &&
344 					!VARATT_IS_EXTERNAL_ONDISK(tupdata + off) &&
345 					!VARATT_IS_EXTERNAL_INDIRECT(tupdata + off))
346 					ereport(ERROR,
347 							(errcode(ERRCODE_DATA_CORRUPTED),
348 							 errmsg("first byte of varlena attribute is incorrect for attribute %d", i)));
349 
350 				len = VARSIZE_ANY(tupdata + off);
351 			}
352 			else
353 			{
354 				off = att_align_nominal(off, tupdesc->attrs[i]->attalign);
355 				len = attr->attlen;
356 			}
357 
358 			if (tupdata_len < off + len)
359 				ereport(ERROR,
360 						(errcode(ERRCODE_DATA_CORRUPTED),
361 						 errmsg("unexpected end of tuple data")));
362 
363 			if (attr->attlen == -1 && do_detoast)
364 				attr_data = DatumGetByteaPCopy(tupdata + off);
365 			else
366 			{
367 				attr_data = (bytea *) palloc(len + VARHDRSZ);
368 				SET_VARSIZE(attr_data, len + VARHDRSZ);
369 				memcpy(VARDATA(attr_data), tupdata + off, len);
370 			}
371 
372 			off = att_addlength_pointer(off, tupdesc->attrs[i]->attlen,
373 										tupdata + off);
374 		}
375 
376 		raw_attrs = accumArrayResult(raw_attrs, PointerGetDatum(attr_data),
377 									 is_null, BYTEAOID, CurrentMemoryContext);
378 		if (attr_data)
379 			pfree(attr_data);
380 	}
381 
382 	if (tupdata_len != off)
383 		ereport(ERROR,
384 				(errcode(ERRCODE_DATA_CORRUPTED),
385 			errmsg("end of tuple reached without looking at all its data")));
386 
387 	relation_close(rel, AccessShareLock);
388 
389 	return makeArrayResult(raw_attrs, CurrentMemoryContext);
390 }
391 
392 /*
393  * tuple_data_split
394  *
395  * Split raw tuple data taken directly from page into distinct elements
396  * taking into account null values.
397  */
398 PG_FUNCTION_INFO_V1(tuple_data_split);
399 
400 Datum
tuple_data_split(PG_FUNCTION_ARGS)401 tuple_data_split(PG_FUNCTION_ARGS)
402 {
403 	Oid			relid;
404 	bytea	   *raw_data;
405 	uint16		t_infomask;
406 	uint16		t_infomask2;
407 	char	   *t_bits_str;
408 	bool		do_detoast = false;
409 	bits8	   *t_bits = NULL;
410 	Datum		res;
411 
412 	relid = PG_GETARG_OID(0);
413 	raw_data = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_P(1);
414 	t_infomask = PG_GETARG_INT16(2);
415 	t_infomask2 = PG_GETARG_INT16(3);
416 	t_bits_str = PG_ARGISNULL(4) ? NULL :
417 		text_to_cstring(PG_GETARG_TEXT_PP(4));
418 
419 	if (PG_NARGS() >= 6)
420 		do_detoast = PG_GETARG_BOOL(5);
421 
422 	if (!superuser())
423 		ereport(ERROR,
424 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
425 				 errmsg("must be superuser to use raw page functions")));
426 
427 	if (!raw_data)
428 		PG_RETURN_NULL();
429 
430 	/*
431 	 * Convert t_bits string back to the bits8 array as represented in the
432 	 * tuple header.
433 	 */
434 	if (t_infomask & HEAP_HASNULL)
435 	{
436 		int			bits_str_len;
437 		int			bits_len;
438 
439 		bits_len = BITMAPLEN(t_infomask2 & HEAP_NATTS_MASK) * BITS_PER_BYTE;
440 		if (!t_bits_str)
441 			ereport(ERROR,
442 					(errcode(ERRCODE_DATA_CORRUPTED),
443 					 errmsg("argument of t_bits is null, but it is expected to be null and %d character long",
444 							bits_len)));
445 
446 		bits_str_len = strlen(t_bits_str);
447 		if (bits_len != bits_str_len)
448 			ereport(ERROR,
449 					(errcode(ERRCODE_DATA_CORRUPTED),
450 					 errmsg("unexpected length of t_bits %u, expected %d",
451 							bits_str_len, bits_len)));
452 
453 		/* do the conversion */
454 		t_bits = text_to_bits(t_bits_str, bits_str_len);
455 	}
456 	else
457 	{
458 		if (t_bits_str)
459 			ereport(ERROR,
460 					(errcode(ERRCODE_DATA_CORRUPTED),
461 					 errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes length",
462 							strlen(t_bits_str))));
463 	}
464 
465 	/* Split tuple data */
466 	res = tuple_data_split_internal(relid, (char *) raw_data + VARHDRSZ,
467 									VARSIZE(raw_data) - VARHDRSZ,
468 									t_infomask, t_infomask2, t_bits,
469 									do_detoast);
470 
471 	if (t_bits)
472 		pfree(t_bits);
473 
474 	PG_RETURN_ARRAYTYPE_P(res);
475 }
476