1 /*-------------------------------------------------------------------------
2 *
3 * heapfuncs.c
4 * Functions to investigate heap pages
5 *
6 * We check the input to these functions for corrupt pointers etc. that
7 * might cause crashes, but at the same time we try to print out as much
8 * information as possible, even if it's nonsense. That's because if a
9 * page is corrupt, we don't know why and how exactly it is corrupt, so we
10 * let the user judge it.
11 *
12 * These functions are restricted to superusers for the fear of introducing
13 * security holes if the input checking isn't as water-tight as it should be.
14 * You'd need to be superuser to obtain a raw page image anyway, so
15 * there's hardly any use case for using these without superuser-rights
16 * anyway.
17 *
18 * Copyright (c) 2007-2018, PostgreSQL Global Development Group
19 *
20 * IDENTIFICATION
21 * contrib/pageinspect/heapfuncs.c
22 *
23 *-------------------------------------------------------------------------
24 */
25
26 #include "postgres.h"
27
28 #include "pageinspect.h"
29
30 #include "access/htup_details.h"
31 #include "funcapi.h"
32 #include "catalog/pg_type.h"
33 #include "miscadmin.h"
34 #include "utils/array.h"
35 #include "utils/builtins.h"
36 #include "utils/rel.h"
37
38
39 /*
40 * bits_to_text
41 *
42 * Converts a bits8-array of 'len' bits to a human-readable
43 * c-string representation.
44 */
45 static char *
bits_to_text(bits8 * bits,int len)46 bits_to_text(bits8 *bits, int len)
47 {
48 int i;
49 char *str;
50
51 str = palloc(len + 1);
52
53 for (i = 0; i < len; i++)
54 str[i] = (bits[(i / 8)] & (1 << (i % 8))) ? '1' : '0';
55
56 str[i] = '\0';
57
58 return str;
59 }
60
61
62 /*
63 * text_to_bits
64 *
65 * Converts a c-string representation of bits into a bits8-array. This is
66 * the reverse operation of previous routine.
67 */
68 static bits8 *
text_to_bits(char * str,int len)69 text_to_bits(char *str, int len)
70 {
71 bits8 *bits;
72 int off = 0;
73 char byte = 0;
74
75 bits = palloc(len + 1);
76
77 while (off < len)
78 {
79 if (off % 8 == 0)
80 byte = 0;
81
82 if ((str[off] == '0') || (str[off] == '1'))
83 byte = byte | ((str[off] - '0') << off % 8);
84 else
85 ereport(ERROR,
86 (errcode(ERRCODE_DATA_CORRUPTED),
87 errmsg("illegal character '%c' in t_bits string", str[off])));
88
89 if (off % 8 == 7)
90 bits[off / 8] = byte;
91
92 off++;
93 }
94
95 return bits;
96 }
97
98 /*
99 * heap_page_items
100 *
101 * Allows inspection of line pointers and tuple headers of a heap page.
102 */
103 PG_FUNCTION_INFO_V1(heap_page_items);
104
105 typedef struct heap_page_items_state
106 {
107 TupleDesc tupd;
108 Page page;
109 uint16 offset;
110 } heap_page_items_state;
111
112 Datum
heap_page_items(PG_FUNCTION_ARGS)113 heap_page_items(PG_FUNCTION_ARGS)
114 {
115 bytea *raw_page = PG_GETARG_BYTEA_P(0);
116 heap_page_items_state *inter_call_data = NULL;
117 FuncCallContext *fctx;
118 int raw_page_size;
119
120 if (!superuser())
121 ereport(ERROR,
122 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
123 (errmsg("must be superuser to use raw page functions"))));
124
125 raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
126
127 if (SRF_IS_FIRSTCALL())
128 {
129 TupleDesc tupdesc;
130 MemoryContext mctx;
131
132 if (raw_page_size < SizeOfPageHeaderData)
133 ereport(ERROR,
134 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
135 errmsg("input page too small (%d bytes)", raw_page_size)));
136
137 fctx = SRF_FIRSTCALL_INIT();
138 mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
139
140 inter_call_data = palloc(sizeof(heap_page_items_state));
141
142 /* Build a tuple descriptor for our result type */
143 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
144 elog(ERROR, "return type must be a row type");
145
146 inter_call_data->tupd = tupdesc;
147
148 inter_call_data->offset = FirstOffsetNumber;
149 inter_call_data->page = VARDATA(raw_page);
150
151 fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
152 fctx->user_fctx = inter_call_data;
153
154 MemoryContextSwitchTo(mctx);
155 }
156
157 fctx = SRF_PERCALL_SETUP();
158 inter_call_data = fctx->user_fctx;
159
160 if (fctx->call_cntr < fctx->max_calls)
161 {
162 Page page = inter_call_data->page;
163 HeapTuple resultTuple;
164 Datum result;
165 ItemId id;
166 Datum values[14];
167 bool nulls[14];
168 uint16 lp_offset;
169 uint16 lp_flags;
170 uint16 lp_len;
171
172 memset(nulls, 0, sizeof(nulls));
173
174 /* Extract information from the line pointer */
175
176 id = PageGetItemId(page, inter_call_data->offset);
177
178 lp_offset = ItemIdGetOffset(id);
179 lp_flags = ItemIdGetFlags(id);
180 lp_len = ItemIdGetLength(id);
181
182 values[0] = UInt16GetDatum(inter_call_data->offset);
183 values[1] = UInt16GetDatum(lp_offset);
184 values[2] = UInt16GetDatum(lp_flags);
185 values[3] = UInt16GetDatum(lp_len);
186
187 /*
188 * We do just enough validity checking to make sure we don't reference
189 * data outside the page passed to us. The page could be corrupt in
190 * many other ways, but at least we won't crash.
191 */
192 if (ItemIdHasStorage(id) &&
193 lp_len >= MinHeapTupleSize &&
194 lp_offset == MAXALIGN(lp_offset) &&
195 lp_offset + lp_len <= raw_page_size)
196 {
197 HeapTupleHeader tuphdr;
198 bytea *tuple_data_bytea;
199 int tuple_data_len;
200
201 /* Extract information from the tuple header */
202
203 tuphdr = (HeapTupleHeader) PageGetItem(page, id);
204
205 values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));
206 values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
207 /* shared with xvac */
208 values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr));
209 values[7] = PointerGetDatum(&tuphdr->t_ctid);
210 values[8] = UInt32GetDatum(tuphdr->t_infomask2);
211 values[9] = UInt32GetDatum(tuphdr->t_infomask);
212 values[10] = UInt8GetDatum(tuphdr->t_hoff);
213
214 /* Copy raw tuple data into bytea attribute */
215 tuple_data_len = lp_len - tuphdr->t_hoff;
216 tuple_data_bytea = (bytea *) palloc(tuple_data_len + VARHDRSZ);
217 SET_VARSIZE(tuple_data_bytea, tuple_data_len + VARHDRSZ);
218 memcpy(VARDATA(tuple_data_bytea), (char *) tuphdr + tuphdr->t_hoff,
219 tuple_data_len);
220 values[13] = PointerGetDatum(tuple_data_bytea);
221
222 /*
223 * We already checked that the item is completely within the raw
224 * page passed to us, with the length given in the line pointer.
225 * Let's check that t_hoff doesn't point over lp_len, before using
226 * it to access t_bits and oid.
227 */
228 if (tuphdr->t_hoff >= SizeofHeapTupleHeader &&
229 tuphdr->t_hoff <= lp_len &&
230 tuphdr->t_hoff == MAXALIGN(tuphdr->t_hoff))
231 {
232 if (tuphdr->t_infomask & HEAP_HASNULL)
233 {
234 int bits_len;
235
236 bits_len =
237 BITMAPLEN(HeapTupleHeaderGetNatts(tuphdr)) * BITS_PER_BYTE;
238 values[11] = CStringGetTextDatum(
239 bits_to_text(tuphdr->t_bits, bits_len));
240 }
241 else
242 nulls[11] = true;
243
244 if (tuphdr->t_infomask & HEAP_HASOID)
245 values[12] = HeapTupleHeaderGetOid(tuphdr);
246 else
247 nulls[12] = true;
248 }
249 else
250 {
251 nulls[11] = true;
252 nulls[12] = true;
253 }
254 }
255 else
256 {
257 /*
258 * The line pointer is not used, or it's invalid. Set the rest of
259 * the fields to NULL
260 */
261 int i;
262
263 for (i = 4; i <= 13; i++)
264 nulls[i] = true;
265 }
266
267 /* Build and return the result tuple. */
268 resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
269 result = HeapTupleGetDatum(resultTuple);
270
271 inter_call_data->offset++;
272
273 SRF_RETURN_NEXT(fctx, result);
274 }
275 else
276 SRF_RETURN_DONE(fctx);
277 }
278
279 /*
280 * tuple_data_split_internal
281 *
282 * Split raw tuple data taken directly from a page into an array of bytea
283 * elements. This routine does a lookup on NULL values and creates array
284 * elements accordingly. This is a reimplementation of nocachegetattr()
285 * in heaptuple.c simplified for educational purposes.
286 */
287 static Datum
tuple_data_split_internal(Oid relid,char * tupdata,uint16 tupdata_len,uint16 t_infomask,uint16 t_infomask2,bits8 * t_bits,bool do_detoast)288 tuple_data_split_internal(Oid relid, char *tupdata,
289 uint16 tupdata_len, uint16 t_infomask,
290 uint16 t_infomask2, bits8 *t_bits,
291 bool do_detoast)
292 {
293 ArrayBuildState *raw_attrs;
294 int nattrs;
295 int i;
296 int off = 0;
297 Relation rel;
298 TupleDesc tupdesc;
299
300 /* Get tuple descriptor from relation OID */
301 rel = relation_open(relid, AccessShareLock);
302 tupdesc = RelationGetDescr(rel);
303
304 raw_attrs = initArrayResult(BYTEAOID, CurrentMemoryContext, false);
305 nattrs = tupdesc->natts;
306
307 if (nattrs < (t_infomask2 & HEAP_NATTS_MASK))
308 ereport(ERROR,
309 (errcode(ERRCODE_DATA_CORRUPTED),
310 errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor")));
311
312 for (i = 0; i < nattrs; i++)
313 {
314 Form_pg_attribute attr;
315 bool is_null;
316 bytea *attr_data = NULL;
317
318 attr = TupleDescAttr(tupdesc, i);
319
320 /*
321 * Tuple header can specify less attributes than tuple descriptor as
322 * ALTER TABLE ADD COLUMN without DEFAULT keyword does not actually
323 * change tuples in pages, so attributes with numbers greater than
324 * (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL.
325 */
326 if (i >= (t_infomask2 & HEAP_NATTS_MASK))
327 is_null = true;
328 else
329 is_null = (t_infomask & HEAP_HASNULL) && att_isnull(i, t_bits);
330
331 if (!is_null)
332 {
333 int len;
334
335 if (attr->attlen == -1)
336 {
337 off = att_align_pointer(off, attr->attalign, -1,
338 tupdata + off);
339
340 /*
341 * As VARSIZE_ANY throws an exception if it can't properly
342 * detect the type of external storage in macros VARTAG_SIZE,
343 * this check is repeated to have a nicer error handling.
344 */
345 if (VARATT_IS_EXTERNAL(tupdata + off) &&
346 !VARATT_IS_EXTERNAL_ONDISK(tupdata + off) &&
347 !VARATT_IS_EXTERNAL_INDIRECT(tupdata + off))
348 ereport(ERROR,
349 (errcode(ERRCODE_DATA_CORRUPTED),
350 errmsg("first byte of varlena attribute is incorrect for attribute %d", i)));
351
352 len = VARSIZE_ANY(tupdata + off);
353 }
354 else
355 {
356 off = att_align_nominal(off, attr->attalign);
357 len = attr->attlen;
358 }
359
360 if (tupdata_len < off + len)
361 ereport(ERROR,
362 (errcode(ERRCODE_DATA_CORRUPTED),
363 errmsg("unexpected end of tuple data")));
364
365 if (attr->attlen == -1 && do_detoast)
366 attr_data = DatumGetByteaPCopy(tupdata + off);
367 else
368 {
369 attr_data = (bytea *) palloc(len + VARHDRSZ);
370 SET_VARSIZE(attr_data, len + VARHDRSZ);
371 memcpy(VARDATA(attr_data), tupdata + off, len);
372 }
373
374 off = att_addlength_pointer(off, attr->attlen,
375 tupdata + off);
376 }
377
378 raw_attrs = accumArrayResult(raw_attrs, PointerGetDatum(attr_data),
379 is_null, BYTEAOID, CurrentMemoryContext);
380 if (attr_data)
381 pfree(attr_data);
382 }
383
384 if (tupdata_len != off)
385 ereport(ERROR,
386 (errcode(ERRCODE_DATA_CORRUPTED),
387 errmsg("end of tuple reached without looking at all its data")));
388
389 relation_close(rel, AccessShareLock);
390
391 return makeArrayResult(raw_attrs, CurrentMemoryContext);
392 }
393
394 /*
395 * tuple_data_split
396 *
397 * Split raw tuple data taken directly from page into distinct elements
398 * taking into account null values.
399 */
400 PG_FUNCTION_INFO_V1(tuple_data_split);
401
402 Datum
tuple_data_split(PG_FUNCTION_ARGS)403 tuple_data_split(PG_FUNCTION_ARGS)
404 {
405 Oid relid;
406 bytea *raw_data;
407 uint16 t_infomask;
408 uint16 t_infomask2;
409 char *t_bits_str;
410 bool do_detoast = false;
411 bits8 *t_bits = NULL;
412 Datum res;
413
414 relid = PG_GETARG_OID(0);
415 raw_data = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_P(1);
416 t_infomask = PG_GETARG_INT16(2);
417 t_infomask2 = PG_GETARG_INT16(3);
418 t_bits_str = PG_ARGISNULL(4) ? NULL :
419 text_to_cstring(PG_GETARG_TEXT_PP(4));
420
421 if (PG_NARGS() >= 6)
422 do_detoast = PG_GETARG_BOOL(5);
423
424 if (!superuser())
425 ereport(ERROR,
426 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
427 errmsg("must be superuser to use raw page functions")));
428
429 if (!raw_data)
430 PG_RETURN_NULL();
431
432 /*
433 * Convert t_bits string back to the bits8 array as represented in the
434 * tuple header.
435 */
436 if (t_infomask & HEAP_HASNULL)
437 {
438 int bits_str_len;
439 int bits_len;
440
441 bits_len = BITMAPLEN(t_infomask2 & HEAP_NATTS_MASK) * BITS_PER_BYTE;
442 if (!t_bits_str)
443 ereport(ERROR,
444 (errcode(ERRCODE_DATA_CORRUPTED),
445 errmsg("argument of t_bits is null, but it is expected to be null and %d character long",
446 bits_len)));
447
448 bits_str_len = strlen(t_bits_str);
449 if (bits_len != bits_str_len)
450 ereport(ERROR,
451 (errcode(ERRCODE_DATA_CORRUPTED),
452 errmsg("unexpected length of t_bits %u, expected %d",
453 bits_str_len, bits_len)));
454
455 /* do the conversion */
456 t_bits = text_to_bits(t_bits_str, bits_str_len);
457 }
458 else
459 {
460 if (t_bits_str)
461 ereport(ERROR,
462 (errcode(ERRCODE_DATA_CORRUPTED),
463 errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes length",
464 strlen(t_bits_str))));
465 }
466
467 /* Split tuple data */
468 res = tuple_data_split_internal(relid, (char *) raw_data + VARHDRSZ,
469 VARSIZE(raw_data) - VARHDRSZ,
470 t_infomask, t_infomask2, t_bits,
471 do_detoast);
472
473 if (t_bits)
474 pfree(t_bits);
475
476 PG_RETURN_ARRAYTYPE_P(res);
477 }
478