1 /*-------------------------------------------------------------------------
2 *
3 * heapfuncs.c
4 * Functions to investigate heap pages
5 *
6 * We check the input to these functions for corrupt pointers etc. that
7 * might cause crashes, but at the same time we try to print out as much
8 * information as possible, even if it's nonsense. That's because if a
9 * page is corrupt, we don't know why and how exactly it is corrupt, so we
10 * let the user judge it.
11 *
12 * These functions are restricted to superusers for the fear of introducing
13 * security holes if the input checking isn't as water-tight as it should be.
14 * You'd need to be superuser to obtain a raw page image anyway, so
15 * there's hardly any use case for using these without superuser-rights
16 * anyway.
17 *
18 * Copyright (c) 2007-2016, PostgreSQL Global Development Group
19 *
20 * IDENTIFICATION
21 * contrib/pageinspect/heapfuncs.c
22 *
23 *-------------------------------------------------------------------------
24 */
25
26 #include "postgres.h"
27
28 #include "access/htup_details.h"
29 #include "funcapi.h"
30 #include "catalog/pg_type.h"
31 #include "miscadmin.h"
32 #include "utils/array.h"
33 #include "utils/builtins.h"
34 #include "utils/rel.h"
35
36
37 /*
38 * bits_to_text
39 *
40 * Converts a bits8-array of 'len' bits to a human-readable
41 * c-string representation.
42 */
43 static char *
bits_to_text(bits8 * bits,int len)44 bits_to_text(bits8 *bits, int len)
45 {
46 int i;
47 char *str;
48
49 str = palloc(len + 1);
50
51 for (i = 0; i < len; i++)
52 str[i] = (bits[(i / 8)] & (1 << (i % 8))) ? '1' : '0';
53
54 str[i] = '\0';
55
56 return str;
57 }
58
59
60 /*
61 * text_to_bits
62 *
63 * Converts a c-string representation of bits into a bits8-array. This is
64 * the reverse operation of previous routine.
65 */
66 static bits8 *
text_to_bits(char * str,int len)67 text_to_bits(char *str, int len)
68 {
69 bits8 *bits;
70 int off = 0;
71 char byte = 0;
72
73 bits = palloc(len + 1);
74
75 while (off < len)
76 {
77 if (off % 8 == 0)
78 byte = 0;
79
80 if ((str[off] == '0') || (str[off] == '1'))
81 byte = byte | ((str[off] - '0') << off % 8);
82 else
83 ereport(ERROR,
84 (errcode(ERRCODE_DATA_CORRUPTED),
85 errmsg("illegal character '%c' in t_bits string", str[off])));
86
87 if (off % 8 == 7)
88 bits[off / 8] = byte;
89
90 off++;
91 }
92
93 return bits;
94 }
95
96 /*
97 * heap_page_items
98 *
99 * Allows inspection of line pointers and tuple headers of a heap page.
100 */
101 PG_FUNCTION_INFO_V1(heap_page_items);
102
103 typedef struct heap_page_items_state
104 {
105 TupleDesc tupd;
106 Page page;
107 uint16 offset;
108 } heap_page_items_state;
109
110 Datum
heap_page_items(PG_FUNCTION_ARGS)111 heap_page_items(PG_FUNCTION_ARGS)
112 {
113 bytea *raw_page = PG_GETARG_BYTEA_P(0);
114 heap_page_items_state *inter_call_data = NULL;
115 FuncCallContext *fctx;
116 int raw_page_size;
117
118 if (!superuser())
119 ereport(ERROR,
120 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
121 (errmsg("must be superuser to use raw page functions"))));
122
123 raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
124
125 if (SRF_IS_FIRSTCALL())
126 {
127 TupleDesc tupdesc;
128 MemoryContext mctx;
129
130 if (raw_page_size < SizeOfPageHeaderData)
131 ereport(ERROR,
132 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
133 errmsg("input page too small (%d bytes)", raw_page_size)));
134
135 fctx = SRF_FIRSTCALL_INIT();
136 mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
137
138 inter_call_data = palloc(sizeof(heap_page_items_state));
139
140 /* Build a tuple descriptor for our result type */
141 if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
142 elog(ERROR, "return type must be a row type");
143
144 inter_call_data->tupd = tupdesc;
145
146 inter_call_data->offset = FirstOffsetNumber;
147 inter_call_data->page = VARDATA(raw_page);
148
149 fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
150 fctx->user_fctx = inter_call_data;
151
152 MemoryContextSwitchTo(mctx);
153 }
154
155 fctx = SRF_PERCALL_SETUP();
156 inter_call_data = fctx->user_fctx;
157
158 if (fctx->call_cntr < fctx->max_calls)
159 {
160 Page page = inter_call_data->page;
161 HeapTuple resultTuple;
162 Datum result;
163 ItemId id;
164 Datum values[14];
165 bool nulls[14];
166 uint16 lp_offset;
167 uint16 lp_flags;
168 uint16 lp_len;
169
170 memset(nulls, 0, sizeof(nulls));
171
172 /* Extract information from the line pointer */
173
174 id = PageGetItemId(page, inter_call_data->offset);
175
176 lp_offset = ItemIdGetOffset(id);
177 lp_flags = ItemIdGetFlags(id);
178 lp_len = ItemIdGetLength(id);
179
180 values[0] = UInt16GetDatum(inter_call_data->offset);
181 values[1] = UInt16GetDatum(lp_offset);
182 values[2] = UInt16GetDatum(lp_flags);
183 values[3] = UInt16GetDatum(lp_len);
184
185 /*
186 * We do just enough validity checking to make sure we don't reference
187 * data outside the page passed to us. The page could be corrupt in
188 * many other ways, but at least we won't crash.
189 */
190 if (ItemIdHasStorage(id) &&
191 lp_len >= MinHeapTupleSize &&
192 lp_offset == MAXALIGN(lp_offset) &&
193 lp_offset + lp_len <= raw_page_size)
194 {
195 HeapTupleHeader tuphdr;
196 bytea *tuple_data_bytea;
197 int tuple_data_len;
198
199 /* Extract information from the tuple header */
200
201 tuphdr = (HeapTupleHeader) PageGetItem(page, id);
202
203 values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));
204 values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
205 /* shared with xvac */
206 values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr));
207 values[7] = PointerGetDatum(&tuphdr->t_ctid);
208 values[8] = UInt32GetDatum(tuphdr->t_infomask2);
209 values[9] = UInt32GetDatum(tuphdr->t_infomask);
210 values[10] = UInt8GetDatum(tuphdr->t_hoff);
211
212 /* Copy raw tuple data into bytea attribute */
213 tuple_data_len = lp_len - tuphdr->t_hoff;
214 tuple_data_bytea = (bytea *) palloc(tuple_data_len + VARHDRSZ);
215 SET_VARSIZE(tuple_data_bytea, tuple_data_len + VARHDRSZ);
216 memcpy(VARDATA(tuple_data_bytea), (char *) tuphdr + tuphdr->t_hoff,
217 tuple_data_len);
218 values[13] = PointerGetDatum(tuple_data_bytea);
219
220 /*
221 * We already checked that the item is completely within the raw
222 * page passed to us, with the length given in the line pointer.
223 * Let's check that t_hoff doesn't point over lp_len, before using
224 * it to access t_bits and oid.
225 */
226 if (tuphdr->t_hoff >= SizeofHeapTupleHeader &&
227 tuphdr->t_hoff <= lp_len &&
228 tuphdr->t_hoff == MAXALIGN(tuphdr->t_hoff))
229 {
230 if (tuphdr->t_infomask & HEAP_HASNULL)
231 {
232 int bits_len;
233
234 bits_len =
235 BITMAPLEN(HeapTupleHeaderGetNatts(tuphdr)) * BITS_PER_BYTE;
236 values[11] = CStringGetTextDatum(
237 bits_to_text(tuphdr->t_bits, bits_len));
238 }
239 else
240 nulls[11] = true;
241
242 if (tuphdr->t_infomask & HEAP_HASOID)
243 values[12] = HeapTupleHeaderGetOid(tuphdr);
244 else
245 nulls[12] = true;
246 }
247 else
248 {
249 nulls[11] = true;
250 nulls[12] = true;
251 }
252 }
253 else
254 {
255 /*
256 * The line pointer is not used, or it's invalid. Set the rest of
257 * the fields to NULL
258 */
259 int i;
260
261 for (i = 4; i <= 13; i++)
262 nulls[i] = true;
263 }
264
265 /* Build and return the result tuple. */
266 resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
267 result = HeapTupleGetDatum(resultTuple);
268
269 inter_call_data->offset++;
270
271 SRF_RETURN_NEXT(fctx, result);
272 }
273 else
274 SRF_RETURN_DONE(fctx);
275 }
276
277 /*
278 * tuple_data_split_internal
279 *
280 * Split raw tuple data taken directly from a page into an array of bytea
281 * elements. This routine does a lookup on NULL values and creates array
282 * elements accordingly. This is a reimplementation of nocachegetattr()
283 * in heaptuple.c simplified for educational purposes.
284 */
285 static Datum
tuple_data_split_internal(Oid relid,char * tupdata,uint16 tupdata_len,uint16 t_infomask,uint16 t_infomask2,bits8 * t_bits,bool do_detoast)286 tuple_data_split_internal(Oid relid, char *tupdata,
287 uint16 tupdata_len, uint16 t_infomask,
288 uint16 t_infomask2, bits8 *t_bits,
289 bool do_detoast)
290 {
291 ArrayBuildState *raw_attrs;
292 int nattrs;
293 int i;
294 int off = 0;
295 Relation rel;
296 TupleDesc tupdesc;
297
298 /* Get tuple descriptor from relation OID */
299 rel = relation_open(relid, AccessShareLock);
300 tupdesc = RelationGetDescr(rel);
301
302 raw_attrs = initArrayResult(BYTEAOID, CurrentMemoryContext, false);
303 nattrs = tupdesc->natts;
304
305 if (nattrs < (t_infomask2 & HEAP_NATTS_MASK))
306 ereport(ERROR,
307 (errcode(ERRCODE_DATA_CORRUPTED),
308 errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor")));
309
310 for (i = 0; i < nattrs; i++)
311 {
312 Form_pg_attribute attr;
313 bool is_null;
314 bytea *attr_data = NULL;
315
316 attr = tupdesc->attrs[i];
317
318 /*
319 * Tuple header can specify less attributes than tuple descriptor as
320 * ALTER TABLE ADD COLUMN without DEFAULT keyword does not actually
321 * change tuples in pages, so attributes with numbers greater than
322 * (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL.
323 */
324 if (i >= (t_infomask2 & HEAP_NATTS_MASK))
325 is_null = true;
326 else
327 is_null = (t_infomask & HEAP_HASNULL) && att_isnull(i, t_bits);
328
329 if (!is_null)
330 {
331 int len;
332
333 if (attr->attlen == -1)
334 {
335 off = att_align_pointer(off, tupdesc->attrs[i]->attalign, -1,
336 tupdata + off);
337
338 /*
339 * As VARSIZE_ANY throws an exception if it can't properly
340 * detect the type of external storage in macros VARTAG_SIZE,
341 * this check is repeated to have a nicer error handling.
342 */
343 if (VARATT_IS_EXTERNAL(tupdata + off) &&
344 !VARATT_IS_EXTERNAL_ONDISK(tupdata + off) &&
345 !VARATT_IS_EXTERNAL_INDIRECT(tupdata + off))
346 ereport(ERROR,
347 (errcode(ERRCODE_DATA_CORRUPTED),
348 errmsg("first byte of varlena attribute is incorrect for attribute %d", i)));
349
350 len = VARSIZE_ANY(tupdata + off);
351 }
352 else
353 {
354 off = att_align_nominal(off, tupdesc->attrs[i]->attalign);
355 len = attr->attlen;
356 }
357
358 if (tupdata_len < off + len)
359 ereport(ERROR,
360 (errcode(ERRCODE_DATA_CORRUPTED),
361 errmsg("unexpected end of tuple data")));
362
363 if (attr->attlen == -1 && do_detoast)
364 attr_data = DatumGetByteaPCopy(tupdata + off);
365 else
366 {
367 attr_data = (bytea *) palloc(len + VARHDRSZ);
368 SET_VARSIZE(attr_data, len + VARHDRSZ);
369 memcpy(VARDATA(attr_data), tupdata + off, len);
370 }
371
372 off = att_addlength_pointer(off, tupdesc->attrs[i]->attlen,
373 tupdata + off);
374 }
375
376 raw_attrs = accumArrayResult(raw_attrs, PointerGetDatum(attr_data),
377 is_null, BYTEAOID, CurrentMemoryContext);
378 if (attr_data)
379 pfree(attr_data);
380 }
381
382 if (tupdata_len != off)
383 ereport(ERROR,
384 (errcode(ERRCODE_DATA_CORRUPTED),
385 errmsg("end of tuple reached without looking at all its data")));
386
387 relation_close(rel, AccessShareLock);
388
389 return makeArrayResult(raw_attrs, CurrentMemoryContext);
390 }
391
392 /*
393 * tuple_data_split
394 *
395 * Split raw tuple data taken directly from page into distinct elements
396 * taking into account null values.
397 */
398 PG_FUNCTION_INFO_V1(tuple_data_split);
399
400 Datum
tuple_data_split(PG_FUNCTION_ARGS)401 tuple_data_split(PG_FUNCTION_ARGS)
402 {
403 Oid relid;
404 bytea *raw_data;
405 uint16 t_infomask;
406 uint16 t_infomask2;
407 char *t_bits_str;
408 bool do_detoast = false;
409 bits8 *t_bits = NULL;
410 Datum res;
411
412 relid = PG_GETARG_OID(0);
413 raw_data = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_P(1);
414 t_infomask = PG_GETARG_INT16(2);
415 t_infomask2 = PG_GETARG_INT16(3);
416 t_bits_str = PG_ARGISNULL(4) ? NULL :
417 text_to_cstring(PG_GETARG_TEXT_PP(4));
418
419 if (PG_NARGS() >= 6)
420 do_detoast = PG_GETARG_BOOL(5);
421
422 if (!superuser())
423 ereport(ERROR,
424 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
425 errmsg("must be superuser to use raw page functions")));
426
427 if (!raw_data)
428 PG_RETURN_NULL();
429
430 /*
431 * Convert t_bits string back to the bits8 array as represented in the
432 * tuple header.
433 */
434 if (t_infomask & HEAP_HASNULL)
435 {
436 int bits_str_len;
437 int bits_len;
438
439 bits_len = BITMAPLEN(t_infomask2 & HEAP_NATTS_MASK) * BITS_PER_BYTE;
440 if (!t_bits_str)
441 ereport(ERROR,
442 (errcode(ERRCODE_DATA_CORRUPTED),
443 errmsg("argument of t_bits is null, but it is expected to be null and %d character long",
444 bits_len)));
445
446 bits_str_len = strlen(t_bits_str);
447 if (bits_len != bits_str_len)
448 ereport(ERROR,
449 (errcode(ERRCODE_DATA_CORRUPTED),
450 errmsg("unexpected length of t_bits %u, expected %d",
451 bits_str_len, bits_len)));
452
453 /* do the conversion */
454 t_bits = text_to_bits(t_bits_str, bits_str_len);
455 }
456 else
457 {
458 if (t_bits_str)
459 ereport(ERROR,
460 (errcode(ERRCODE_DATA_CORRUPTED),
461 errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes length",
462 strlen(t_bits_str))));
463 }
464
465 /* Split tuple data */
466 res = tuple_data_split_internal(relid, (char *) raw_data + VARHDRSZ,
467 VARSIZE(raw_data) - VARHDRSZ,
468 t_infomask, t_infomask2, t_bits,
469 do_detoast);
470
471 if (t_bits)
472 pfree(t_bits);
473
474 PG_RETURN_ARRAYTYPE_P(res);
475 }
476