1 /*-------------------------------------------------------------------------
2 *
3 * heapfuncs.c
4 * Functions to investigate heap pages
5 *
6 * We check the input to these functions for corrupt pointers etc. that
7 * might cause crashes, but at the same time we try to print out as much
8 * information as possible, even if it's nonsense. That's because if a
9 * page is corrupt, we don't know why and how exactly it is corrupt, so we
10 * let the user judge it.
11 *
12 * These functions are restricted to superusers for the fear of introducing
13 * security holes if the input checking isn't as water-tight as it should be.
14 * You'd need to be superuser to obtain a raw page image anyway, so
15 * there's hardly any use case for using these without superuser-rights
16 * anyway.
17 *
18 * Copyright (c) 2007-2020, PostgreSQL Global Development Group
19 *
20 * IDENTIFICATION
21 * contrib/pageinspect/heapfuncs.c
22 *
23 *-------------------------------------------------------------------------
24 */
25
26 #include "postgres.h"
27
28 #include "access/htup_details.h"
29 #include "access/relation.h"
30 #include "catalog/pg_am_d.h"
31 #include "catalog/pg_type.h"
32 #include "funcapi.h"
33 #include "miscadmin.h"
34 #include "pageinspect.h"
35 #include "port/pg_bitutils.h"
36 #include "utils/array.h"
37 #include "utils/builtins.h"
38 #include "utils/rel.h"
39
40 /*
41 * It's not supported to create tuples with oids anymore, but when pg_upgrade
42 * was used to upgrade from an older version, tuples might still have an
43 * oid. Seems worthwhile to display that.
44 */
45 #define HeapTupleHeaderGetOidOld(tup) \
46 ( \
47 ((tup)->t_infomask & HEAP_HASOID_OLD) ? \
48 *((Oid *) ((char *)(tup) + (tup)->t_hoff - sizeof(Oid))) \
49 : \
50 InvalidOid \
51 )
52
53
54 /*
55 * bits_to_text
56 *
57 * Converts a bits8-array of 'len' bits to a human-readable
58 * c-string representation.
59 */
60 static char *
bits_to_text(bits8 * bits,int len)61 bits_to_text(bits8 *bits, int len)
62 {
63 int i;
64 char *str;
65
66 str = palloc(len + 1);
67
68 for (i = 0; i < len; i++)
69 str[i] = (bits[(i / 8)] & (1 << (i % 8))) ? '1' : '0';
70
71 str[i] = '\0';
72
73 return str;
74 }
75
76
77 /*
78 * text_to_bits
79 *
80 * Converts a c-string representation of bits into a bits8-array. This is
81 * the reverse operation of previous routine.
82 */
83 static bits8 *
text_to_bits(char * str,int len)84 text_to_bits(char *str, int len)
85 {
86 bits8 *bits;
87 int off = 0;
88 char byte = 0;
89
90 bits = palloc(len + 1);
91
92 while (off < len)
93 {
94 if (off % 8 == 0)
95 byte = 0;
96
97 if ((str[off] == '0') || (str[off] == '1'))
98 byte = byte | ((str[off] - '0') << off % 8);
99 else
100 ereport(ERROR,
101 (errcode(ERRCODE_DATA_CORRUPTED),
102 errmsg("illegal character '%c' in t_bits string", str[off])));
103
104 if (off % 8 == 7)
105 bits[off / 8] = byte;
106
107 off++;
108 }
109
110 return bits;
111 }
112
113 /*
114 * heap_page_items
115 *
116 * Allows inspection of line pointers and tuple headers of a heap page.
117 */
118 PG_FUNCTION_INFO_V1(heap_page_items);
119
120 typedef struct heap_page_items_state
121 {
122 TupleDesc tupd;
123 Page page;
124 uint16 offset;
125 } heap_page_items_state;
126
127 Datum
heap_page_items(PG_FUNCTION_ARGS)128 heap_page_items(PG_FUNCTION_ARGS)
129 {
130 bytea *raw_page = PG_GETARG_BYTEA_P(0);
131 heap_page_items_state *inter_call_data = NULL;
132 FuncCallContext *fctx;
133 int raw_page_size;
134
135 if (!superuser())
136 ereport(ERROR,
137 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
138 errmsg("must be superuser to use raw page functions")));
139
140 raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
141
142 if (SRF_IS_FIRSTCALL())
143 {
144 TupleDesc tupdesc;
145 MemoryContext mctx;
146
147 if (raw_page_size < SizeOfPageHeaderData)
148 ereport(ERROR,
149 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
150 errmsg("input page too small (%d bytes)", raw_page_size)));
151
152 fctx = SRF_FIRSTCALL_INIT();
153 mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
154
155 inter_call_data = palloc(sizeof(heap_page_items_state));
156
157 /* Build a tuple descriptor for our result type */
158 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
159 elog(ERROR, "return type must be a row type");
160
161 inter_call_data->tupd = tupdesc;
162
163 inter_call_data->offset = FirstOffsetNumber;
164 inter_call_data->page = VARDATA(raw_page);
165
166 fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
167 fctx->user_fctx = inter_call_data;
168
169 MemoryContextSwitchTo(mctx);
170 }
171
172 fctx = SRF_PERCALL_SETUP();
173 inter_call_data = fctx->user_fctx;
174
175 if (fctx->call_cntr < fctx->max_calls)
176 {
177 Page page = inter_call_data->page;
178 HeapTuple resultTuple;
179 Datum result;
180 ItemId id;
181 Datum values[14];
182 bool nulls[14];
183 uint16 lp_offset;
184 uint16 lp_flags;
185 uint16 lp_len;
186
187 memset(nulls, 0, sizeof(nulls));
188
189 /* Extract information from the line pointer */
190
191 id = PageGetItemId(page, inter_call_data->offset);
192
193 lp_offset = ItemIdGetOffset(id);
194 lp_flags = ItemIdGetFlags(id);
195 lp_len = ItemIdGetLength(id);
196
197 values[0] = UInt16GetDatum(inter_call_data->offset);
198 values[1] = UInt16GetDatum(lp_offset);
199 values[2] = UInt16GetDatum(lp_flags);
200 values[3] = UInt16GetDatum(lp_len);
201
202 /*
203 * We do just enough validity checking to make sure we don't reference
204 * data outside the page passed to us. The page could be corrupt in
205 * many other ways, but at least we won't crash.
206 */
207 if (ItemIdHasStorage(id) &&
208 lp_len >= MinHeapTupleSize &&
209 lp_offset == MAXALIGN(lp_offset) &&
210 lp_offset + lp_len <= raw_page_size)
211 {
212 HeapTupleHeader tuphdr;
213 bytea *tuple_data_bytea;
214 int tuple_data_len;
215
216 /* Extract information from the tuple header */
217
218 tuphdr = (HeapTupleHeader) PageGetItem(page, id);
219
220 values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));
221 values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
222 /* shared with xvac */
223 values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr));
224 values[7] = PointerGetDatum(&tuphdr->t_ctid);
225 values[8] = UInt32GetDatum(tuphdr->t_infomask2);
226 values[9] = UInt32GetDatum(tuphdr->t_infomask);
227 values[10] = UInt8GetDatum(tuphdr->t_hoff);
228
229 /* Copy raw tuple data into bytea attribute */
230 tuple_data_len = lp_len - tuphdr->t_hoff;
231 tuple_data_bytea = (bytea *) palloc(tuple_data_len + VARHDRSZ);
232 SET_VARSIZE(tuple_data_bytea, tuple_data_len + VARHDRSZ);
233 memcpy(VARDATA(tuple_data_bytea), (char *) tuphdr + tuphdr->t_hoff,
234 tuple_data_len);
235 values[13] = PointerGetDatum(tuple_data_bytea);
236
237 /*
238 * We already checked that the item is completely within the raw
239 * page passed to us, with the length given in the line pointer.
240 * Let's check that t_hoff doesn't point over lp_len, before using
241 * it to access t_bits and oid.
242 */
243 if (tuphdr->t_hoff >= SizeofHeapTupleHeader &&
244 tuphdr->t_hoff <= lp_len &&
245 tuphdr->t_hoff == MAXALIGN(tuphdr->t_hoff))
246 {
247 if (tuphdr->t_infomask & HEAP_HASNULL)
248 {
249 int bits_len;
250
251 bits_len =
252 BITMAPLEN(HeapTupleHeaderGetNatts(tuphdr)) * BITS_PER_BYTE;
253 values[11] = CStringGetTextDatum(bits_to_text(tuphdr->t_bits, bits_len));
254 }
255 else
256 nulls[11] = true;
257
258 if (tuphdr->t_infomask & HEAP_HASOID_OLD)
259 values[12] = HeapTupleHeaderGetOidOld(tuphdr);
260 else
261 nulls[12] = true;
262 }
263 else
264 {
265 nulls[11] = true;
266 nulls[12] = true;
267 }
268 }
269 else
270 {
271 /*
272 * The line pointer is not used, or it's invalid. Set the rest of
273 * the fields to NULL
274 */
275 int i;
276
277 for (i = 4; i <= 13; i++)
278 nulls[i] = true;
279 }
280
281 /* Build and return the result tuple. */
282 resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
283 result = HeapTupleGetDatum(resultTuple);
284
285 inter_call_data->offset++;
286
287 SRF_RETURN_NEXT(fctx, result);
288 }
289 else
290 SRF_RETURN_DONE(fctx);
291 }
292
293 /*
294 * tuple_data_split_internal
295 *
296 * Split raw tuple data taken directly from a page into an array of bytea
297 * elements. This routine does a lookup on NULL values and creates array
298 * elements accordingly. This is a reimplementation of nocachegetattr()
299 * in heaptuple.c simplified for educational purposes.
300 */
301 static Datum
tuple_data_split_internal(Oid relid,char * tupdata,uint16 tupdata_len,uint16 t_infomask,uint16 t_infomask2,bits8 * t_bits,bool do_detoast)302 tuple_data_split_internal(Oid relid, char *tupdata,
303 uint16 tupdata_len, uint16 t_infomask,
304 uint16 t_infomask2, bits8 *t_bits,
305 bool do_detoast)
306 {
307 ArrayBuildState *raw_attrs;
308 int nattrs;
309 int i;
310 int off = 0;
311 Relation rel;
312 TupleDesc tupdesc;
313
314 /* Get tuple descriptor from relation OID */
315 rel = relation_open(relid, AccessShareLock);
316 tupdesc = RelationGetDescr(rel);
317
318 raw_attrs = initArrayResult(BYTEAOID, CurrentMemoryContext, false);
319 nattrs = tupdesc->natts;
320
321 if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
322 ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
323 errmsg("only heap AM is supported")));
324
325 if (nattrs < (t_infomask2 & HEAP_NATTS_MASK))
326 ereport(ERROR,
327 (errcode(ERRCODE_DATA_CORRUPTED),
328 errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor")));
329
330 for (i = 0; i < nattrs; i++)
331 {
332 Form_pg_attribute attr;
333 bool is_null;
334 bytea *attr_data = NULL;
335
336 attr = TupleDescAttr(tupdesc, i);
337
338 /*
339 * Tuple header can specify less attributes than tuple descriptor as
340 * ALTER TABLE ADD COLUMN without DEFAULT keyword does not actually
341 * change tuples in pages, so attributes with numbers greater than
342 * (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL.
343 */
344 if (i >= (t_infomask2 & HEAP_NATTS_MASK))
345 is_null = true;
346 else
347 is_null = (t_infomask & HEAP_HASNULL) && att_isnull(i, t_bits);
348
349 if (!is_null)
350 {
351 int len;
352
353 if (attr->attlen == -1)
354 {
355 off = att_align_pointer(off, attr->attalign, -1,
356 tupdata + off);
357
358 /*
359 * As VARSIZE_ANY throws an exception if it can't properly
360 * detect the type of external storage in macros VARTAG_SIZE,
361 * this check is repeated to have a nicer error handling.
362 */
363 if (VARATT_IS_EXTERNAL(tupdata + off) &&
364 !VARATT_IS_EXTERNAL_ONDISK(tupdata + off) &&
365 !VARATT_IS_EXTERNAL_INDIRECT(tupdata + off))
366 ereport(ERROR,
367 (errcode(ERRCODE_DATA_CORRUPTED),
368 errmsg("first byte of varlena attribute is incorrect for attribute %d", i)));
369
370 len = VARSIZE_ANY(tupdata + off);
371 }
372 else
373 {
374 off = att_align_nominal(off, attr->attalign);
375 len = attr->attlen;
376 }
377
378 if (tupdata_len < off + len)
379 ereport(ERROR,
380 (errcode(ERRCODE_DATA_CORRUPTED),
381 errmsg("unexpected end of tuple data")));
382
383 if (attr->attlen == -1 && do_detoast)
384 attr_data = DatumGetByteaPCopy(tupdata + off);
385 else
386 {
387 attr_data = (bytea *) palloc(len + VARHDRSZ);
388 SET_VARSIZE(attr_data, len + VARHDRSZ);
389 memcpy(VARDATA(attr_data), tupdata + off, len);
390 }
391
392 off = att_addlength_pointer(off, attr->attlen,
393 tupdata + off);
394 }
395
396 raw_attrs = accumArrayResult(raw_attrs, PointerGetDatum(attr_data),
397 is_null, BYTEAOID, CurrentMemoryContext);
398 if (attr_data)
399 pfree(attr_data);
400 }
401
402 if (tupdata_len != off)
403 ereport(ERROR,
404 (errcode(ERRCODE_DATA_CORRUPTED),
405 errmsg("end of tuple reached without looking at all its data")));
406
407 relation_close(rel, AccessShareLock);
408
409 return makeArrayResult(raw_attrs, CurrentMemoryContext);
410 }
411
412 /*
413 * tuple_data_split
414 *
415 * Split raw tuple data taken directly from page into distinct elements
416 * taking into account null values.
417 */
418 PG_FUNCTION_INFO_V1(tuple_data_split);
419
420 Datum
tuple_data_split(PG_FUNCTION_ARGS)421 tuple_data_split(PG_FUNCTION_ARGS)
422 {
423 Oid relid;
424 bytea *raw_data;
425 uint16 t_infomask;
426 uint16 t_infomask2;
427 char *t_bits_str;
428 bool do_detoast = false;
429 bits8 *t_bits = NULL;
430 Datum res;
431
432 relid = PG_GETARG_OID(0);
433 raw_data = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_P(1);
434 t_infomask = PG_GETARG_INT16(2);
435 t_infomask2 = PG_GETARG_INT16(3);
436 t_bits_str = PG_ARGISNULL(4) ? NULL :
437 text_to_cstring(PG_GETARG_TEXT_PP(4));
438
439 if (PG_NARGS() >= 6)
440 do_detoast = PG_GETARG_BOOL(5);
441
442 if (!superuser())
443 ereport(ERROR,
444 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
445 errmsg("must be superuser to use raw page functions")));
446
447 if (!raw_data)
448 PG_RETURN_NULL();
449
450 /*
451 * Convert t_bits string back to the bits8 array as represented in the
452 * tuple header.
453 */
454 if (t_infomask & HEAP_HASNULL)
455 {
456 int bits_str_len;
457 int bits_len;
458
459 bits_len = BITMAPLEN(t_infomask2 & HEAP_NATTS_MASK) * BITS_PER_BYTE;
460 if (!t_bits_str)
461 ereport(ERROR,
462 (errcode(ERRCODE_DATA_CORRUPTED),
463 errmsg("argument of t_bits is null, but it is expected to be null and %d character long",
464 bits_len)));
465
466 bits_str_len = strlen(t_bits_str);
467 if (bits_len != bits_str_len)
468 ereport(ERROR,
469 (errcode(ERRCODE_DATA_CORRUPTED),
470 errmsg("unexpected length of t_bits %u, expected %d",
471 bits_str_len, bits_len)));
472
473 /* do the conversion */
474 t_bits = text_to_bits(t_bits_str, bits_str_len);
475 }
476 else
477 {
478 if (t_bits_str)
479 ereport(ERROR,
480 (errcode(ERRCODE_DATA_CORRUPTED),
481 errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes length",
482 strlen(t_bits_str))));
483 }
484
485 /* Split tuple data */
486 res = tuple_data_split_internal(relid, (char *) raw_data + VARHDRSZ,
487 VARSIZE(raw_data) - VARHDRSZ,
488 t_infomask, t_infomask2, t_bits,
489 do_detoast);
490
491 if (t_bits)
492 pfree(t_bits);
493
494 PG_RETURN_ARRAYTYPE_P(res);
495 }
496
497 /*
498 * heap_tuple_infomask_flags
499 *
500 * Decode into a human-readable format t_infomask and t_infomask2 associated
501 * to a tuple. All the flags are described in access/htup_details.h.
502 */
503 PG_FUNCTION_INFO_V1(heap_tuple_infomask_flags);
504
505 Datum
heap_tuple_infomask_flags(PG_FUNCTION_ARGS)506 heap_tuple_infomask_flags(PG_FUNCTION_ARGS)
507 {
508 #define HEAP_TUPLE_INFOMASK_COLS 2
509 Datum values[HEAP_TUPLE_INFOMASK_COLS];
510 bool nulls[HEAP_TUPLE_INFOMASK_COLS];
511 uint16 t_infomask = PG_GETARG_INT16(0);
512 uint16 t_infomask2 = PG_GETARG_INT16(1);
513 int cnt = 0;
514 ArrayType *a;
515 int bitcnt;
516 Datum *flags;
517 TupleDesc tupdesc;
518 HeapTuple tuple;
519
520 if (!superuser())
521 ereport(ERROR,
522 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
523 errmsg("must be superuser to use raw page functions")));
524
525 /* Build a tuple descriptor for our result type */
526 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
527 elog(ERROR, "return type must be a row type");
528
529 bitcnt = pg_popcount((const char *) &t_infomask, sizeof(uint16)) +
530 pg_popcount((const char *) &t_infomask2, sizeof(uint16));
531
532 /* Initialize values and NULL flags arrays */
533 MemSet(values, 0, sizeof(values));
534 MemSet(nulls, 0, sizeof(nulls));
535
536 /* If no flags, return a set of empty arrays */
537 if (bitcnt <= 0)
538 {
539 values[0] = PointerGetDatum(construct_empty_array(TEXTOID));
540 values[1] = PointerGetDatum(construct_empty_array(TEXTOID));
541 tuple = heap_form_tuple(tupdesc, values, nulls);
542 PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
543 }
544
545 /* build set of raw flags */
546 flags = (Datum *) palloc0(sizeof(Datum) * bitcnt);
547
548 /* decode t_infomask */
549 if ((t_infomask & HEAP_HASNULL) != 0)
550 flags[cnt++] = CStringGetTextDatum("HEAP_HASNULL");
551 if ((t_infomask & HEAP_HASVARWIDTH) != 0)
552 flags[cnt++] = CStringGetTextDatum("HEAP_HASVARWIDTH");
553 if ((t_infomask & HEAP_HASEXTERNAL) != 0)
554 flags[cnt++] = CStringGetTextDatum("HEAP_HASEXTERNAL");
555 if ((t_infomask & HEAP_HASOID_OLD) != 0)
556 flags[cnt++] = CStringGetTextDatum("HEAP_HASOID_OLD");
557 if ((t_infomask & HEAP_XMAX_KEYSHR_LOCK) != 0)
558 flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_KEYSHR_LOCK");
559 if ((t_infomask & HEAP_COMBOCID) != 0)
560 flags[cnt++] = CStringGetTextDatum("HEAP_COMBOCID");
561 if ((t_infomask & HEAP_XMAX_EXCL_LOCK) != 0)
562 flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_EXCL_LOCK");
563 if ((t_infomask & HEAP_XMAX_LOCK_ONLY) != 0)
564 flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_LOCK_ONLY");
565 if ((t_infomask & HEAP_XMIN_COMMITTED) != 0)
566 flags[cnt++] = CStringGetTextDatum("HEAP_XMIN_COMMITTED");
567 if ((t_infomask & HEAP_XMIN_INVALID) != 0)
568 flags[cnt++] = CStringGetTextDatum("HEAP_XMIN_INVALID");
569 if ((t_infomask & HEAP_XMAX_COMMITTED) != 0)
570 flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_COMMITTED");
571 if ((t_infomask & HEAP_XMAX_INVALID) != 0)
572 flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_INVALID");
573 if ((t_infomask & HEAP_XMAX_IS_MULTI) != 0)
574 flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_IS_MULTI");
575 if ((t_infomask & HEAP_UPDATED) != 0)
576 flags[cnt++] = CStringGetTextDatum("HEAP_UPDATED");
577 if ((t_infomask & HEAP_MOVED_OFF) != 0)
578 flags[cnt++] = CStringGetTextDatum("HEAP_MOVED_OFF");
579 if ((t_infomask & HEAP_MOVED_IN) != 0)
580 flags[cnt++] = CStringGetTextDatum("HEAP_MOVED_IN");
581
582 /* decode t_infomask2 */
583 if ((t_infomask2 & HEAP_KEYS_UPDATED) != 0)
584 flags[cnt++] = CStringGetTextDatum("HEAP_KEYS_UPDATED");
585 if ((t_infomask2 & HEAP_HOT_UPDATED) != 0)
586 flags[cnt++] = CStringGetTextDatum("HEAP_HOT_UPDATED");
587 if ((t_infomask2 & HEAP_ONLY_TUPLE) != 0)
588 flags[cnt++] = CStringGetTextDatum("HEAP_ONLY_TUPLE");
589
590 /* build value */
591 Assert(cnt <= bitcnt);
592 a = construct_array(flags, cnt, TEXTOID, -1, false, TYPALIGN_INT);
593 values[0] = PointerGetDatum(a);
594
595 /*
596 * Build set of combined flags. Use the same array as previously, this
597 * keeps the code simple.
598 */
599 cnt = 0;
600 MemSet(flags, 0, sizeof(Datum) * bitcnt);
601
602 /* decode combined masks of t_infomask */
603 if ((t_infomask & HEAP_XMAX_SHR_LOCK) == HEAP_XMAX_SHR_LOCK)
604 flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_SHR_LOCK");
605 if ((t_infomask & HEAP_XMIN_FROZEN) == HEAP_XMIN_FROZEN)
606 flags[cnt++] = CStringGetTextDatum("HEAP_XMIN_FROZEN");
607 if ((t_infomask & HEAP_MOVED) == HEAP_MOVED)
608 flags[cnt++] = CStringGetTextDatum("HEAP_MOVED");
609
610 /* Build an empty array if there are no combined flags */
611 if (cnt == 0)
612 a = construct_empty_array(TEXTOID);
613 else
614 a = construct_array(flags, cnt, TEXTOID, -1, false, TYPALIGN_INT);
615 pfree(flags);
616 values[1] = PointerGetDatum(a);
617
618 /* Returns the record as Datum */
619 tuple = heap_form_tuple(tupdesc, values, nulls);
620 PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
621 }
622