1 /*-------------------------------------------------------------------------
2 *
3 * heap_surgery.c
4 * Functions to perform surgery on the damaged heap table.
5 *
6 * Copyright (c) 2020-2021, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * contrib/pg_surgery/heap_surgery.c
10 *
11 *-------------------------------------------------------------------------
12 */
13 #include "postgres.h"
14
15 #include "access/heapam.h"
16 #include "access/visibilitymap.h"
17 #include "catalog/pg_am_d.h"
18 #include "catalog/pg_proc_d.h"
19 #include "miscadmin.h"
20 #include "storage/bufmgr.h"
21 #include "utils/acl.h"
22 #include "utils/rel.h"
23
24 PG_MODULE_MAGIC;
25
26 /* Options to forcefully change the state of a heap tuple. */
27 typedef enum HeapTupleForceOption
28 {
29 HEAP_FORCE_KILL,
30 HEAP_FORCE_FREEZE
31 } HeapTupleForceOption;
32
33 PG_FUNCTION_INFO_V1(heap_force_kill);
34 PG_FUNCTION_INFO_V1(heap_force_freeze);
35
36 static int32 tidcmp(const void *a, const void *b);
37 static Datum heap_force_common(FunctionCallInfo fcinfo,
38 HeapTupleForceOption heap_force_opt);
39 static void sanity_check_tid_array(ArrayType *ta, int *ntids);
40 static void sanity_check_relation(Relation rel);
41 static BlockNumber find_tids_one_page(ItemPointer tids, int ntids,
42 OffsetNumber *next_start_ptr);
43
44 /*-------------------------------------------------------------------------
45 * heap_force_kill()
46 *
47 * Force kill the tuple(s) pointed to by the item pointer(s) stored in the
48 * given TID array.
49 *
50 * Usage: SELECT heap_force_kill(regclass, tid[]);
51 *-------------------------------------------------------------------------
52 */
53 Datum
heap_force_kill(PG_FUNCTION_ARGS)54 heap_force_kill(PG_FUNCTION_ARGS)
55 {
56 PG_RETURN_DATUM(heap_force_common(fcinfo, HEAP_FORCE_KILL));
57 }
58
59 /*-------------------------------------------------------------------------
60 * heap_force_freeze()
61 *
62 * Force freeze the tuple(s) pointed to by the item pointer(s) stored in the
63 * given TID array.
64 *
65 * Usage: SELECT heap_force_freeze(regclass, tid[]);
66 *-------------------------------------------------------------------------
67 */
68 Datum
heap_force_freeze(PG_FUNCTION_ARGS)69 heap_force_freeze(PG_FUNCTION_ARGS)
70 {
71 PG_RETURN_DATUM(heap_force_common(fcinfo, HEAP_FORCE_FREEZE));
72 }
73
74 /*-------------------------------------------------------------------------
75 * heap_force_common()
76 *
77 * Common code for heap_force_kill and heap_force_freeze
78 *-------------------------------------------------------------------------
79 */
80 static Datum
heap_force_common(FunctionCallInfo fcinfo,HeapTupleForceOption heap_force_opt)81 heap_force_common(FunctionCallInfo fcinfo, HeapTupleForceOption heap_force_opt)
82 {
83 Oid relid = PG_GETARG_OID(0);
84 ArrayType *ta = PG_GETARG_ARRAYTYPE_P_COPY(1);
85 ItemPointer tids;
86 int ntids,
87 nblocks;
88 Relation rel;
89 OffsetNumber curr_start_ptr,
90 next_start_ptr;
91 bool include_this_tid[MaxHeapTuplesPerPage];
92
93 if (RecoveryInProgress())
94 ereport(ERROR,
95 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
96 errmsg("recovery is in progress"),
97 errhint("heap surgery functions cannot be executed during recovery.")));
98
99 /* Check inputs. */
100 sanity_check_tid_array(ta, &ntids);
101
102 rel = relation_open(relid, RowExclusiveLock);
103
104 /* Check target relation. */
105 sanity_check_relation(rel);
106
107 tids = ((ItemPointer) ARR_DATA_PTR(ta));
108
109 /*
110 * If there is more than one TID in the array, sort them so that we can
111 * easily fetch all the TIDs belonging to one particular page from the
112 * array.
113 */
114 if (ntids > 1)
115 qsort((void *) tids, ntids, sizeof(ItemPointerData), tidcmp);
116
117 curr_start_ptr = next_start_ptr = 0;
118 nblocks = RelationGetNumberOfBlocks(rel);
119
120 /*
121 * Loop, performing the necessary actions for each block.
122 */
123 while (next_start_ptr != ntids)
124 {
125 Buffer buf;
126 Buffer vmbuf = InvalidBuffer;
127 Page page;
128 BlockNumber blkno;
129 OffsetNumber curoff;
130 OffsetNumber maxoffset;
131 int i;
132 bool did_modify_page = false;
133 bool did_modify_vm = false;
134
135 CHECK_FOR_INTERRUPTS();
136
137 /*
138 * Find all the TIDs belonging to one particular page starting from
139 * next_start_ptr and process them one by one.
140 */
141 blkno = find_tids_one_page(tids, ntids, &next_start_ptr);
142
143 /* Check whether the block number is valid. */
144 if (blkno >= nblocks)
145 {
146 /* Update the current_start_ptr before moving to the next page. */
147 curr_start_ptr = next_start_ptr;
148
149 ereport(NOTICE,
150 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
151 errmsg("skipping block %u for relation \"%s\" because the block number is out of range",
152 blkno, RelationGetRelationName(rel))));
153 continue;
154 }
155
156 buf = ReadBuffer(rel, blkno);
157 LockBufferForCleanup(buf);
158
159 page = BufferGetPage(buf);
160
161 maxoffset = PageGetMaxOffsetNumber(page);
162
163 /*
164 * Figure out which TIDs we are going to process and which ones we are
165 * going to skip.
166 */
167 memset(include_this_tid, 0, sizeof(include_this_tid));
168 for (i = curr_start_ptr; i < next_start_ptr; i++)
169 {
170 OffsetNumber offno = ItemPointerGetOffsetNumberNoCheck(&tids[i]);
171 ItemId itemid;
172
173 /* Check whether the offset number is valid. */
174 if (offno == InvalidOffsetNumber || offno > maxoffset)
175 {
176 ereport(NOTICE,
177 errmsg("skipping tid (%u, %u) for relation \"%s\" because the item number is out of range",
178 blkno, offno, RelationGetRelationName(rel)));
179 continue;
180 }
181
182 itemid = PageGetItemId(page, offno);
183
184 /* Only accept an item ID that is used. */
185 if (ItemIdIsRedirected(itemid))
186 {
187 ereport(NOTICE,
188 errmsg("skipping tid (%u, %u) for relation \"%s\" because it redirects to item %u",
189 blkno, offno, RelationGetRelationName(rel),
190 ItemIdGetRedirect(itemid)));
191 continue;
192 }
193 else if (ItemIdIsDead(itemid))
194 {
195 ereport(NOTICE,
196 (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked dead",
197 blkno, offno, RelationGetRelationName(rel))));
198 continue;
199 }
200 else if (!ItemIdIsUsed(itemid))
201 {
202 ereport(NOTICE,
203 (errmsg("skipping tid (%u, %u) for relation \"%s\" because it is marked unused",
204 blkno, offno, RelationGetRelationName(rel))));
205 continue;
206 }
207
208 /* Mark it for processing. */
209 Assert(offno < MaxHeapTuplesPerPage);
210 include_this_tid[offno] = true;
211 }
212
213 /*
214 * Before entering the critical section, pin the visibility map page
215 * if it appears to be necessary.
216 */
217 if (heap_force_opt == HEAP_FORCE_KILL && PageIsAllVisible(page))
218 visibilitymap_pin(rel, blkno, &vmbuf);
219
220 /* No ereport(ERROR) from here until all the changes are logged. */
221 START_CRIT_SECTION();
222
223 for (curoff = FirstOffsetNumber; curoff <= maxoffset;
224 curoff = OffsetNumberNext(curoff))
225 {
226 ItemId itemid;
227
228 if (!include_this_tid[curoff])
229 continue;
230
231 itemid = PageGetItemId(page, curoff);
232 Assert(ItemIdIsNormal(itemid));
233
234 did_modify_page = true;
235
236 if (heap_force_opt == HEAP_FORCE_KILL)
237 {
238 ItemIdSetDead(itemid);
239
240 /*
241 * If the page is marked all-visible, we must clear
242 * PD_ALL_VISIBLE flag on the page header and an all-visible
243 * bit on the visibility map corresponding to the page.
244 */
245 if (PageIsAllVisible(page))
246 {
247 PageClearAllVisible(page);
248 visibilitymap_clear(rel, blkno, vmbuf,
249 VISIBILITYMAP_VALID_BITS);
250 did_modify_vm = true;
251 }
252 }
253 else
254 {
255 HeapTupleHeader htup;
256
257 Assert(heap_force_opt == HEAP_FORCE_FREEZE);
258
259 htup = (HeapTupleHeader) PageGetItem(page, itemid);
260
261 /*
262 * Reset all visibility-related fields of the tuple. This
263 * logic should mimic heap_execute_freeze_tuple(), but we
264 * choose to reset xmin and ctid just to be sure that no
265 * potentially-garbled data is left behind.
266 */
267 ItemPointerSet(&htup->t_ctid, blkno, curoff);
268 HeapTupleHeaderSetXmin(htup, FrozenTransactionId);
269 HeapTupleHeaderSetXmax(htup, InvalidTransactionId);
270 if (htup->t_infomask & HEAP_MOVED)
271 {
272 if (htup->t_infomask & HEAP_MOVED_OFF)
273 HeapTupleHeaderSetXvac(htup, InvalidTransactionId);
274 else
275 HeapTupleHeaderSetXvac(htup, FrozenTransactionId);
276 }
277
278 /*
279 * Clear all the visibility-related bits of this tuple and
280 * mark it as frozen. Also, get rid of HOT_UPDATED and
281 * KEYS_UPDATES bits.
282 */
283 htup->t_infomask &= ~HEAP_XACT_MASK;
284 htup->t_infomask |= (HEAP_XMIN_FROZEN | HEAP_XMAX_INVALID);
285 htup->t_infomask2 &= ~HEAP_HOT_UPDATED;
286 htup->t_infomask2 &= ~HEAP_KEYS_UPDATED;
287 }
288 }
289
290 /*
291 * If the page was modified, only then, we mark the buffer dirty or do
292 * the WAL logging.
293 */
294 if (did_modify_page)
295 {
296 /* Mark buffer dirty before we write WAL. */
297 MarkBufferDirty(buf);
298
299 /* XLOG stuff */
300 if (RelationNeedsWAL(rel))
301 log_newpage_buffer(buf, true);
302 }
303
304 /* WAL log the VM page if it was modified. */
305 if (did_modify_vm && RelationNeedsWAL(rel))
306 log_newpage_buffer(vmbuf, false);
307
308 END_CRIT_SECTION();
309
310 UnlockReleaseBuffer(buf);
311
312 if (vmbuf != InvalidBuffer)
313 ReleaseBuffer(vmbuf);
314
315 /* Update the current_start_ptr before moving to the next page. */
316 curr_start_ptr = next_start_ptr;
317 }
318
319 relation_close(rel, RowExclusiveLock);
320
321 pfree(ta);
322
323 PG_RETURN_VOID();
324 }
325
326 /*-------------------------------------------------------------------------
327 * tidcmp()
328 *
329 * Compare two item pointers, return -1, 0, or +1.
330 *
331 * See ItemPointerCompare for details.
332 * ------------------------------------------------------------------------
333 */
334 static int32
tidcmp(const void * a,const void * b)335 tidcmp(const void *a, const void *b)
336 {
337 ItemPointer iptr1 = ((const ItemPointer) a);
338 ItemPointer iptr2 = ((const ItemPointer) b);
339
340 return ItemPointerCompare(iptr1, iptr2);
341 }
342
343 /*-------------------------------------------------------------------------
344 * sanity_check_tid_array()
345 *
346 * Perform sanity checks on the given tid array, and set *ntids to the
347 * number of items in the array.
348 * ------------------------------------------------------------------------
349 */
350 static void
sanity_check_tid_array(ArrayType * ta,int * ntids)351 sanity_check_tid_array(ArrayType *ta, int *ntids)
352 {
353 if (ARR_HASNULL(ta) && array_contains_nulls(ta))
354 ereport(ERROR,
355 (errcode(ERRCODE_NULL_VALUE_NOT_ALLOWED),
356 errmsg("array must not contain nulls")));
357
358 if (ARR_NDIM(ta) > 1)
359 ereport(ERROR,
360 (errcode(ERRCODE_DATA_EXCEPTION),
361 errmsg("argument must be empty or one-dimensional array")));
362
363 *ntids = ArrayGetNItems(ARR_NDIM(ta), ARR_DIMS(ta));
364 }
365
366 /*-------------------------------------------------------------------------
367 * sanity_check_relation()
368 *
369 * Perform sanity checks on the given relation.
370 * ------------------------------------------------------------------------
371 */
372 static void
sanity_check_relation(Relation rel)373 sanity_check_relation(Relation rel)
374 {
375 if (rel->rd_rel->relkind != RELKIND_RELATION &&
376 rel->rd_rel->relkind != RELKIND_MATVIEW &&
377 rel->rd_rel->relkind != RELKIND_TOASTVALUE)
378 ereport(ERROR,
379 (errcode(ERRCODE_WRONG_OBJECT_TYPE),
380 errmsg("\"%s\" is not a table, materialized view, or TOAST table",
381 RelationGetRelationName(rel))));
382
383 if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
384 ereport(ERROR,
385 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
386 errmsg("only heap AM is supported")));
387
388 /* Must be owner of the table or superuser. */
389 if (!pg_class_ownercheck(RelationGetRelid(rel), GetUserId()))
390 aclcheck_error(ACLCHECK_NOT_OWNER,
391 get_relkind_objtype(rel->rd_rel->relkind),
392 RelationGetRelationName(rel));
393 }
394
395 /*-------------------------------------------------------------------------
396 * find_tids_one_page()
397 *
398 * Find all the tids residing in the same page as tids[next_start_ptr], and
399 * update next_start_ptr so that it points to the first tid in the next page.
400 *
401 * NOTE: The input tids[] array must be sorted.
402 * ------------------------------------------------------------------------
403 */
404 static BlockNumber
find_tids_one_page(ItemPointer tids,int ntids,OffsetNumber * next_start_ptr)405 find_tids_one_page(ItemPointer tids, int ntids, OffsetNumber *next_start_ptr)
406 {
407 int i;
408 BlockNumber prev_blkno,
409 blkno;
410
411 prev_blkno = blkno = InvalidBlockNumber;
412
413 for (i = *next_start_ptr; i < ntids; i++)
414 {
415 ItemPointerData tid = tids[i];
416
417 blkno = ItemPointerGetBlockNumberNoCheck(&tid);
418
419 if (i == *next_start_ptr)
420 prev_blkno = blkno;
421
422 if (prev_blkno != blkno)
423 break;
424 }
425
426 *next_start_ptr = i;
427 return prev_blkno;
428 }
429