1 /*
2 * This file and its contents are licensed under the Timescale License.
3 * Please see the included NOTICE for copyright information and
4 * LICENSE-TIMESCALE for a copy of the license.
5 */
6
7 /*
8 * This file contains source code that was copied and/or modified from the
9 * PostgreSQL database, which is licensed under the open-source PostgreSQL
10 * License. Please see the NOTICE at the top level directory for a copy of
11 * the PostgreSQL License.
12 */
13
14 /* see postgres commit ab5e9caa4a3ec4765348a0482e88edcf3f6aab4a */
15
16 #include <postgres.h>
17 #include <access/amapi.h>
18 #include <access/multixact.h>
19 #include <access/relscan.h>
20 #include <access/rewriteheap.h>
21 #include <access/transam.h>
22 #include <access/xact.h>
23 #include <access/xlog.h>
24 #include <catalog/catalog.h>
25 #include <catalog/dependency.h>
26 #include <catalog/heap.h>
27 #include <catalog/index.h>
28 #include <catalog/namespace.h>
29 #include <catalog/objectaccess.h>
30 #include <catalog/pg_am.h>
31 #include <catalog/toasting.h>
32 #include <commands/cluster.h>
33 #include <commands/tablecmds.h>
34 #include <commands/tablespace.h>
35 #include <commands/vacuum.h>
36 #include <miscadmin.h>
37 #include <nodes/pg_list.h>
38 #include <optimizer/planner.h>
39 #include <storage/bufmgr.h>
40 #include <storage/lmgr.h>
41 #include <storage/predicate.h>
42 #include <storage/smgr.h>
43 #include <utils/acl.h>
44 #include <utils/fmgroids.h>
45 #include <utils/guc.h>
46 #include <utils/inval.h>
47 #include <utils/lsyscache.h>
48 #include <utils/memutils.h>
49 #include <utils/pg_rusage.h>
50 #include <utils/relmapper.h>
51 #include <utils/snapmgr.h>
52 #include <utils/syscache.h>
53 #include <utils/tuplesort.h>
54 #include <executor/spi.h>
55 #include <utils/snapmgr.h>
56
57 #include "compat/compat.h"
58 #if PG13_LT
59 #include <access/tuptoaster.h>
60 #else
61 #include <access/toast_internals.h>
62 #endif
63
64 #include "annotations.h"
65 #include "chunk.h"
66 #include "chunk_copy.h"
67 #include "chunk_index.h"
68 #include "hypertable_cache.h"
69 #include "indexing.h"
70 #include "reorder.h"
71
72 extern void timescale_reorder_rel(Oid tableOid, Oid indexOid, bool verbose, Oid wait_id,
73 Oid destination_tablespace, Oid index_tablespace);
74
75 #define REORDER_ACCESS_EXCLUSIVE_DEADLOCK_TIMEOUT "101000"
76
77 static void timescale_rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose, Oid wait_id,
78 Oid destination_tablespace, Oid index_tablespace);
79 static void copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
80 bool *pSwapToastByContent, TransactionId *pFreezeXid,
81 MultiXactId *pCutoffMulti);
82
83 static void finish_heap_swaps(Oid OIDOldHeap, Oid OIDNewHeap, List *old_index_oids,
84 List *new_index_oids, bool swap_toast_by_content, bool is_internal,
85 TransactionId frozenXid, MultiXactId cutoffMulti, Oid wait_id);
86
87 static void swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content, bool is_internal,
88 TransactionId frozenXid, MultiXactId cutoffMulti);
89
90 static bool chunk_get_reorder_index(Hypertable *ht, Chunk *chunk, Oid index_relid,
91 ChunkIndexMapping *cim_out);
92
93 Datum
tsl_reorder_chunk(PG_FUNCTION_ARGS)94 tsl_reorder_chunk(PG_FUNCTION_ARGS)
95 {
96 Oid chunk_id = PG_ARGISNULL(0) ? InvalidOid : PG_GETARG_OID(0);
97 Oid index_id = PG_ARGISNULL(1) ? InvalidOid : PG_GETARG_OID(1);
98 bool verbose = PG_ARGISNULL(2) ? false : PG_GETARG_BOOL(2);
99
100 /* used for debugging purposes only see finish_heap_swaps */
101 Oid wait_id = PG_NARGS() < 4 || PG_ARGISNULL(3) ? InvalidOid : PG_GETARG_OID(3);
102
103 /*
104 * Allow reorder in transactions for testing purposes only
105 */
106 if (!OidIsValid(wait_id))
107 PreventInTransactionBlock(true, "reorder");
108
109 reorder_chunk(chunk_id, index_id, verbose, wait_id, InvalidOid, InvalidOid);
110 PG_RETURN_VOID();
111 }
112
113 Datum
tsl_move_chunk(PG_FUNCTION_ARGS)114 tsl_move_chunk(PG_FUNCTION_ARGS)
115 {
116 Oid chunk_id = PG_ARGISNULL(0) ? InvalidOid : PG_GETARG_OID(0);
117 Oid destination_tablespace =
118 PG_ARGISNULL(1) ? InvalidOid : get_tablespace_oid(PG_GETARG_NAME(1)->data, false);
119 Oid index_destination_tablespace =
120 PG_ARGISNULL(2) ? InvalidOid : get_tablespace_oid(PG_GETARG_NAME(2)->data, false);
121 Oid index_id = PG_ARGISNULL(3) ? InvalidOid : PG_GETARG_OID(3);
122 bool verbose = PG_ARGISNULL(4) ? false : PG_GETARG_BOOL(4);
123 Chunk *chunk;
124
125 /* used for debugging purposes only see finish_heap_swaps */
126 Oid wait_id = PG_NARGS() < 6 || PG_ARGISNULL(5) ? InvalidOid : PG_GETARG_OID(5);
127
128 /*
129 * Allow move in transactions for testing purposes only
130 */
131 if (!OidIsValid(wait_id))
132 PreventInTransactionBlock(true, "move");
133
134 /*
135 * Index_destination_tablespace is currently a required parameter in order
136 * to avoid situations where there is ambiguity about where indexes should
137 * be placed based on where the index was created and the new tablespace
138 * (and avoid interactions with multi-tablespace hypertable functionality).
139 * Eventually we may want to offer an option to keep indexes in the
140 * tablespace of their parent if it is specified.
141 */
142 if (!OidIsValid(chunk_id) || !OidIsValid(destination_tablespace) ||
143 !OidIsValid(index_destination_tablespace))
144 ereport(ERROR,
145 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
146 errmsg("valid chunk, destination_tablespace, and index_destination_tablespaces "
147 "are required")));
148
149 chunk = ts_chunk_get_by_relid(chunk_id, false);
150
151 if (NULL == chunk)
152 ereport(ERROR,
153 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
154 errmsg("\"%s\" is not a chunk", get_rel_name(chunk_id))));
155
156 if (ts_chunk_contains_compressed_data(chunk))
157 {
158 Chunk *chunk_parent = ts_chunk_get_compressed_chunk_parent(chunk);
159
160 ereport(ERROR,
161 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
162 errmsg("cannot directly move internal compression data"),
163 errdetail("Chunk \"%s\" contains compressed data for chunk \"%s\" and cannot be "
164 "moved directly.",
165 get_rel_name(chunk_id),
166 get_rel_name(chunk_parent->table_id)),
167 errhint("Moving chunk \"%s\" will also move the compressed data.",
168 get_rel_name(chunk_parent->table_id))));
169 }
170
171 /* If chunk is compressed move it by altering tablespace on both chunks */
172 if (OidIsValid(chunk->fd.compressed_chunk_id))
173 {
174 Chunk *compressed_chunk = ts_chunk_get_by_id(chunk->fd.compressed_chunk_id, true);
175 AlterTableCmd cmd = { .type = T_AlterTableCmd,
176 .subtype = AT_SetTableSpace,
177 .name = get_tablespace_name(destination_tablespace) };
178
179 if (OidIsValid(index_id))
180 ereport(NOTICE,
181 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
182 errmsg("ignoring index parameter"),
183 errdetail("Chunk will not be reordered as it has compressed data.")));
184
185 AlterTableInternal(chunk_id, list_make1(&cmd), false);
186 AlterTableInternal(compressed_chunk->table_id, list_make1(&cmd), false);
187 }
188 else
189 {
190 reorder_chunk(chunk_id,
191 index_id,
192 verbose,
193 wait_id,
194 destination_tablespace,
195 index_destination_tablespace);
196 }
197
198 PG_RETURN_VOID();
199 }
200
201 /*
202 * Implement a distributed chunk copy/move operation.
203 *
204 * We use a procedure because multiple steps need to be performed via multiple
205 * transactions across the access node and the two datanodes that are involved.
206 * The progress of the various stages/steps are tracked in the
207 * CHUNK_COPY_OPERATION catalog table
208 */
209 static void
tsl_copy_or_move_chunk_proc(FunctionCallInfo fcinfo,bool delete_on_src_node)210 tsl_copy_or_move_chunk_proc(FunctionCallInfo fcinfo, bool delete_on_src_node)
211 {
212 Oid chunk_id = PG_ARGISNULL(0) ? InvalidOid : PG_GETARG_OID(0);
213 const char *src_node_name = PG_ARGISNULL(1) ? NULL : NameStr(*PG_GETARG_NAME(1));
214 const char *dst_node_name = PG_ARGISNULL(2) ? NULL : NameStr(*PG_GETARG_NAME(2));
215 int rc;
216 bool nonatomic = fcinfo->context && IsA(fcinfo->context, CallContext) &&
217 !castNode(CallContext, fcinfo->context)->atomic;
218
219 TS_PREVENT_FUNC_IF_READ_ONLY();
220
221 PreventInTransactionBlock(true, get_func_name(FC_FN_OID(fcinfo)));
222
223 /* src_node and dst_node both have to be non-NULL */
224 if (src_node_name == NULL || dst_node_name == NULL)
225 ereport(ERROR,
226 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
227 errmsg("invalid source or destination node")));
228
229 if (!OidIsValid(chunk_id))
230 ereport(ERROR, (errcode(ERRCODE_INVALID_PARAMETER_VALUE), errmsg("invalid chunk")));
231
232 if ((rc = SPI_connect_ext(nonatomic ? SPI_OPT_NONATOMIC : 0)) != SPI_OK_CONNECT)
233 elog(ERROR, "SPI_connect failed: %s", SPI_result_code_string(rc));
234
235 /* perform the actual distributed chunk move after a few sanity checks */
236 chunk_copy(chunk_id, src_node_name, dst_node_name, delete_on_src_node);
237
238 if ((rc = SPI_finish()) != SPI_OK_FINISH)
239 elog(ERROR, "SPI_finish failed: %s", SPI_result_code_string(rc));
240 }
241
242 Datum
tsl_move_chunk_proc(PG_FUNCTION_ARGS)243 tsl_move_chunk_proc(PG_FUNCTION_ARGS)
244 {
245 tsl_copy_or_move_chunk_proc(fcinfo, true);
246
247 PG_RETURN_VOID();
248 }
249
250 Datum
tsl_copy_chunk_proc(PG_FUNCTION_ARGS)251 tsl_copy_chunk_proc(PG_FUNCTION_ARGS)
252 {
253 tsl_copy_or_move_chunk_proc(fcinfo, false);
254
255 PG_RETURN_VOID();
256 }
257
258 Datum
tsl_copy_chunk_cleanup_proc(PG_FUNCTION_ARGS)259 tsl_copy_chunk_cleanup_proc(PG_FUNCTION_ARGS)
260 {
261 const char *operation_id = PG_ARGISNULL(0) ? NULL : NameStr(*PG_GETARG_NAME(0));
262 int rc;
263 bool nonatomic = fcinfo->context && IsA(fcinfo->context, CallContext) &&
264 !castNode(CallContext, fcinfo->context)->atomic;
265
266 TS_PREVENT_FUNC_IF_READ_ONLY();
267
268 PreventInTransactionBlock(true, get_func_name(FC_FN_OID(fcinfo)));
269
270 /* valid input has to be provided */
271 if (operation_id == NULL)
272 ereport(ERROR,
273 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
274 errmsg("invalid chunk copy operation id")));
275
276 if ((rc = SPI_connect_ext(nonatomic ? SPI_OPT_NONATOMIC : 0)) != SPI_OK_CONNECT)
277 elog(ERROR, "SPI_connect failed: %s", SPI_result_code_string(rc));
278
279 /* perform the cleanup/repair depending on the stage */
280 chunk_copy_cleanup(operation_id);
281
282 if ((rc = SPI_finish()) != SPI_OK_FINISH)
283 elog(ERROR, "SPI_finish failed: %s", SPI_result_code_string(rc));
284
285 PG_RETURN_VOID();
286 }
287
288 void
reorder_chunk(Oid chunk_id,Oid index_id,bool verbose,Oid wait_id,Oid destination_tablespace,Oid index_tablespace)289 reorder_chunk(Oid chunk_id, Oid index_id, bool verbose, Oid wait_id, Oid destination_tablespace,
290 Oid index_tablespace)
291 {
292 Chunk *chunk;
293 Cache *hcache;
294 Hypertable *ht;
295 ChunkIndexMapping cim;
296
297 if (!OidIsValid(chunk_id))
298 ereport(ERROR,
299 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
300 errmsg("must provide a valid chunk to cluster")));
301
302 chunk = ts_chunk_get_by_relid(chunk_id, false);
303
304 if (NULL == chunk)
305 ereport(ERROR,
306 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
307 errmsg("\"%s\" is not a chunk", get_rel_name(chunk_id))));
308
309 ht = ts_hypertable_cache_get_cache_and_entry(chunk->hypertable_relid, CACHE_FLAG_NONE, &hcache);
310
311 /* Our check gives better error messages, but keep the original one too. */
312 ts_hypertable_permissions_check(ht->main_table_relid, GetUserId());
313
314 if (!pg_class_ownercheck(ht->main_table_relid, GetUserId()))
315 {
316 Oid main_table_relid = ht->main_table_relid;
317
318 ts_cache_release(hcache);
319 aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_TABLE, get_rel_name(main_table_relid));
320 }
321
322 if (hypertable_is_distributed(ht))
323 ereport(ERROR,
324 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
325 errmsg("move_chunk() and reorder_chunk() cannot be used "
326 "with distributed hypertables")));
327
328 if (!chunk_get_reorder_index(ht, chunk, index_id, &cim))
329 {
330 ts_cache_release(hcache);
331 if (OidIsValid(index_id))
332 ereport(ERROR,
333 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
334 errmsg("\"%s\" is not a valid clustering index for table \"%s\"",
335 get_rel_name(index_id),
336 get_rel_name(chunk_id))));
337 else
338 ereport(ERROR,
339 (errcode(ERRCODE_UNDEFINED_OBJECT),
340 errmsg("there is no previously clustered index for table \"%s\"",
341 get_rel_name(chunk_id))));
342 }
343
344 if (OidIsValid(destination_tablespace) && destination_tablespace != MyDatabaseTableSpace)
345 {
346 AclResult aclresult;
347
348 aclresult = pg_tablespace_aclcheck(destination_tablespace, GetUserId(), ACL_CREATE);
349 if (aclresult != ACLCHECK_OK)
350 ereport(ERROR,
351 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
352 errmsg("permission denied for tablespace \"%s\"",
353 get_tablespace_name(destination_tablespace))));
354 ;
355 }
356
357 if (OidIsValid(index_tablespace) && index_tablespace != MyDatabaseTableSpace)
358 {
359 AclResult aclresult;
360
361 aclresult = pg_tablespace_aclcheck(index_tablespace, GetUserId(), ACL_CREATE);
362 if (aclresult != ACLCHECK_OK)
363 ereport(ERROR,
364 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
365 errmsg("permission denied for tablespace \"%s\"",
366 get_tablespace_name(index_tablespace))));
367 }
368
369 Assert(cim.chunkoid == chunk_id);
370
371 /*
372 * We must mark each chunk index as clustered before calling reorder_rel()
373 * because it expects indexes that need to be rechecked (due to new
374 * transaction) to already have that mark set
375 */
376 ts_chunk_index_mark_clustered(cim.chunkoid, cim.indexoid);
377 timescale_reorder_rel(cim.chunkoid,
378 cim.indexoid,
379 verbose,
380 wait_id,
381 destination_tablespace,
382 index_tablespace);
383 ts_cache_release(hcache);
384 }
385
386 /*
387 * Find the index to reorder a chunk on based on a possibly NULL indexname
388 * returns NULL if no such index is found
389 */
390 static bool
chunk_get_reorder_index(Hypertable * ht,Chunk * chunk,Oid index_relid,ChunkIndexMapping * cim_out)391 chunk_get_reorder_index(Hypertable *ht, Chunk *chunk, Oid index_relid, ChunkIndexMapping *cim_out)
392 {
393 /*
394 * Index search order: 1. Explicitly named index 2. Chunk cluster index 3.
395 * Hypertable cluster index
396 */
397 if (OidIsValid(index_relid))
398 {
399 if (ts_chunk_index_get_by_indexrelid(chunk, index_relid, cim_out))
400 return true;
401
402 return ts_chunk_index_get_by_hypertable_indexrelid(chunk, index_relid, cim_out);
403 }
404
405 index_relid = ts_indexing_find_clustered_index(chunk->table_id);
406 if (OidIsValid(index_relid))
407 return ts_chunk_index_get_by_indexrelid(chunk, index_relid, cim_out);
408
409 index_relid = ts_indexing_find_clustered_index(ht->main_table_relid);
410 if (OidIsValid(index_relid))
411 return ts_chunk_index_get_by_hypertable_indexrelid(chunk, index_relid, cim_out);
412
413 return false;
414 }
415
416 /* The following functions are based on their equivalents in postgres's cluster.c */
417
418 /*
419 * timescale_reorder_rel
420 *
421 * This clusters the table by creating a new, clustered table and
422 * swapping the relfilenodes of the new table and the old table, so
423 * the OID of the original table is preserved.
424 *
425 * Indexes are rebuilt in the same manner.
426 */
427 void
timescale_reorder_rel(Oid tableOid,Oid indexOid,bool verbose,Oid wait_id,Oid destination_tablespace,Oid index_tablespace)428 timescale_reorder_rel(Oid tableOid, Oid indexOid, bool verbose, Oid wait_id,
429 Oid destination_tablespace, Oid index_tablespace)
430 {
431 Relation OldHeap;
432 HeapTuple tuple;
433 Form_pg_index indexForm;
434
435 if (!OidIsValid(indexOid))
436 elog(ERROR, "Reorder must specify an index.");
437
438 /* Check for user-requested abort. */
439 CHECK_FOR_INTERRUPTS();
440
441 /*
442 * We grab exclusive access to the target rel and index for the duration
443 * of the transaction. (This is redundant for the single-transaction
444 * case, since cluster() already did it.) The index lock is taken inside
445 * check_index_is_clusterable.
446 */
447 OldHeap = try_relation_open(tableOid, ExclusiveLock);
448
449 /* If the table has gone away, we can skip processing it */
450 if (!OldHeap)
451 {
452 ereport(WARNING, (errcode(ERRCODE_WARNING), errmsg("table disappeared during reorder")));
453 return;
454 }
455
456 /*
457 * Since we may open a new transaction for each relation, we have to check
458 * that the relation still is what we think it is.
459 */
460 /* Check that the user still owns the relation */
461 if (!pg_class_ownercheck(tableOid, GetUserId()))
462 {
463 relation_close(OldHeap, ExclusiveLock);
464 ereport(WARNING, (errcode(ERRCODE_WARNING), errmsg("ownership changed during reorder")));
465 return;
466 }
467
468 if (IsSystemRelation(OldHeap))
469 ereport(ERROR,
470 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
471 errmsg("cannot reorder a system relation")));
472
473 if (OldHeap->rd_rel->relpersistence != RELPERSISTENCE_PERMANENT)
474 ereport(ERROR,
475 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
476 errmsg("can only reorder a permanent table")));
477
478 /* We do not allow reordering on shared catalogs. */
479 if (OldHeap->rd_rel->relisshared)
480 ereport(ERROR,
481 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
482 errmsg("cannot reorder a shared catalog")));
483
484 if (OldHeap->rd_rel->relkind != RELKIND_RELATION)
485 ereport(ERROR,
486 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), errmsg("can only reorder a relation")));
487
488 /*
489 * Check that the index still exists
490 */
491 if (!SearchSysCacheExists1(RELOID, ObjectIdGetDatum(indexOid)))
492 {
493 ereport(WARNING, (errcode(ERRCODE_WARNING), errmsg("index disappeared during reorder")));
494 relation_close(OldHeap, ExclusiveLock);
495 return;
496 }
497
498 /*
499 * Check that the index is still the one with indisclustered set.
500 */
501 tuple = SearchSysCache1(INDEXRELID, ObjectIdGetDatum(indexOid));
502 if (!HeapTupleIsValid(tuple)) /* probably can't happen */
503 {
504 ereport(WARNING, (errcode(ERRCODE_WARNING), errmsg("invalid index heap during reorder")));
505 relation_close(OldHeap, ExclusiveLock);
506 return;
507 }
508 indexForm = (Form_pg_index) GETSTRUCT(tuple);
509
510 /*
511 * We always mark indexes as clustered when we intercept a cluster
512 * command, if it's not marked as such here, something has gone wrong
513 */
514 if (!indexForm->indisclustered)
515 ereport(ERROR,
516 (errcode(ERRCODE_ASSERT_FAILURE), errmsg("invalid index heap during reorder")));
517 ReleaseSysCache(tuple);
518
519 /*
520 * Also check for active uses of the relation in the current transaction,
521 * including open scans and pending AFTER trigger events.
522 */
523 CheckTableNotInUse(OldHeap, "CLUSTER");
524
525 /* Check heap and index are valid to cluster on */
526 check_index_is_clusterable(OldHeap, indexOid, true, ExclusiveLock);
527
528 /* timescale_rebuild_relation does all the dirty work */
529 timescale_rebuild_relation(OldHeap,
530 indexOid,
531 verbose,
532 wait_id,
533 destination_tablespace,
534 index_tablespace);
535
536 /* NB: timescale_rebuild_relation does table_close() on OldHeap */
537 }
538
539 /*
540 * timescale_rebuild_relation: rebuild an existing relation in index or physical order
541 *
542 * OldHeap: table to rebuild --- must be opened and exclusive-locked!
543 * indexOid: index to cluster by, or InvalidOid to rewrite in physical order.
544 *
545 * NB: this routine closes OldHeap at the right time; caller should not.
546 */
547 static void
timescale_rebuild_relation(Relation OldHeap,Oid indexOid,bool verbose,Oid wait_id,Oid destination_tablespace,Oid index_tablespace)548 timescale_rebuild_relation(Relation OldHeap, Oid indexOid, bool verbose, Oid wait_id,
549 Oid destination_tablespace, Oid index_tablespace)
550 {
551 Oid tableOid = RelationGetRelid(OldHeap);
552 Oid tableSpace = OidIsValid(destination_tablespace) ? destination_tablespace :
553 OldHeap->rd_rel->reltablespace;
554 Oid OIDNewHeap;
555 List *old_index_oids;
556 List *new_index_oids;
557 char relpersistence;
558 bool swap_toast_by_content;
559 TransactionId frozenXid;
560 MultiXactId cutoffMulti;
561
562 /* Mark the correct index as clustered */
563 mark_index_clustered(OldHeap, indexOid, true);
564
565 /* Remember info about rel before closing OldHeap */
566 relpersistence = OldHeap->rd_rel->relpersistence;
567
568 /* Close relcache entry, but keep lock until transaction commit */
569 table_close(OldHeap, NoLock);
570
571 /* Create the transient table that will receive the re-ordered data */
572 OIDNewHeap = make_new_heap(tableOid, tableSpace, relpersistence, ExclusiveLock);
573
574 /* Copy the heap data into the new table in the desired order */
575 copy_heap_data(OIDNewHeap,
576 tableOid,
577 indexOid,
578 verbose,
579 &swap_toast_by_content,
580 &frozenXid,
581 &cutoffMulti);
582
583 /* Create versions of the tables indexes for the new table */
584 new_index_oids =
585 ts_chunk_index_duplicate(tableOid, OIDNewHeap, &old_index_oids, index_tablespace);
586
587 /*
588 * Swap the physical files of the target and transient tables, then
589 * rebuild the target's indexes and throw away the transient table.
590 */
591 finish_heap_swaps(tableOid,
592 OIDNewHeap,
593 old_index_oids,
594 new_index_oids,
595 swap_toast_by_content,
596 true,
597 frozenXid,
598 cutoffMulti,
599 wait_id);
600 }
601
602 /*
603 * Do the physical copying of heap data.
604 *
605 * There are three output parameters:
606 * *pSwapToastByContent is set true if toast tables must be swapped by content.
607 * *pFreezeXid receives the TransactionId used as freeze cutoff point.
608 * *pCutoffMulti receives the MultiXactId used as a cutoff point.
609 */
610 static void
copy_heap_data(Oid OIDNewHeap,Oid OIDOldHeap,Oid OIDOldIndex,bool verbose,bool * pSwapToastByContent,TransactionId * pFreezeXid,MultiXactId * pCutoffMulti)611 copy_heap_data(Oid OIDNewHeap, Oid OIDOldHeap, Oid OIDOldIndex, bool verbose,
612 bool *pSwapToastByContent, TransactionId *pFreezeXid, MultiXactId *pCutoffMulti)
613 {
614 Relation NewHeap, OldHeap, OldIndex;
615 Relation relRelation;
616 HeapTuple reltup;
617 Form_pg_class relform;
618 TupleDesc PG_USED_FOR_ASSERTS_ONLY oldTupDesc;
619 TupleDesc newTupDesc;
620 int natts;
621 Datum *values;
622 bool *isnull;
623 TransactionId OldestXmin;
624 TransactionId FreezeXid;
625 MultiXactId MultiXactCutoff;
626 bool use_sort;
627 double num_tuples = 0, tups_vacuumed = 0, tups_recently_dead = 0;
628 BlockNumber num_pages;
629 int elevel = verbose ? INFO : DEBUG2;
630 PGRUsage ru0;
631 pg_rusage_init(&ru0);
632
633 /*
634 * Open the relations we need.
635 */
636 NewHeap = table_open(OIDNewHeap, AccessExclusiveLock);
637 OldHeap = table_open(OIDOldHeap, ExclusiveLock);
638
639 if (OidIsValid(OIDOldIndex))
640 OldIndex = index_open(OIDOldIndex, ExclusiveLock);
641 else
642 OldIndex = NULL;
643
644 /*
645 * Their tuple descriptors should be exactly alike, but here we only need
646 * assume that they have the same number of columns.
647 */
648 oldTupDesc = RelationGetDescr(OldHeap);
649 newTupDesc = RelationGetDescr(NewHeap);
650 Assert(newTupDesc->natts == oldTupDesc->natts);
651
652 /* Preallocate values/isnull arrays */
653 natts = newTupDesc->natts;
654 values = (Datum *) palloc(natts * sizeof(Datum));
655 isnull = (bool *) palloc(natts * sizeof(bool));
656
657 /*
658 * If the OldHeap has a toast table, get lock on the toast table to keep
659 * it from being vacuumed. This is needed because autovacuum processes
660 * toast tables independently of their main tables, with no lock on the
661 * latter. If an autovacuum were to start on the toast table after we
662 * compute our OldestXmin below, it would use a later OldestXmin, and then
663 * possibly remove as DEAD toast tuples belonging to main tuples we think
664 * are only RECENTLY_DEAD. Then we'd fail while trying to copy those
665 * tuples.
666 *
667 * We don't need to open the toast relation here, just lock it. The lock
668 * will be held till end of transaction.
669 */
670 if (OldHeap->rd_rel->reltoastrelid)
671 LockRelationOid(OldHeap->rd_rel->reltoastrelid, ExclusiveLock);
672
673 /* use_wal off requires smgr_targblock be initially invalid */
674 Assert(RelationGetTargetBlock(NewHeap) == InvalidBlockNumber);
675
676 /*
677 * If both tables have TOAST tables, perform toast swap by content. It is
678 * possible that the old table has a toast table but the new one doesn't,
679 * if toastable columns have been dropped. In that case we have to do
680 * swap by links. This is okay because swap by content is only essential
681 * for system catalogs, and we don't support schema changes for them.
682 */
683 if (OldHeap->rd_rel->reltoastrelid && NewHeap->rd_rel->reltoastrelid)
684 {
685 *pSwapToastByContent = true;
686
687 /*
688 * When doing swap by content, any toast pointers written into NewHeap
689 * must use the old toast table's OID, because that's where the toast
690 * data will eventually be found. Set this up by setting rd_toastoid.
691 * This also tells toast_save_datum() to preserve the toast value
692 * OIDs, which we want so as not to invalidate toast pointers in
693 * system catalog caches, and to avoid making multiple copies of a
694 * single toast value.
695 *
696 * Note that we must hold NewHeap open until we are done writing data,
697 * since the relcache will not guarantee to remember this setting once
698 * the relation is closed. Also, this technique depends on the fact
699 * that no one will try to read from the NewHeap until after we've
700 * finished writing it and swapping the rels --- otherwise they could
701 * follow the toast pointers to the wrong place. (It would actually
702 * work for values copied over from the old toast table, but not for
703 * any values that we toast which were previously not toasted.)
704 */
705 NewHeap->rd_toastoid = OldHeap->rd_rel->reltoastrelid;
706 }
707 else
708 *pSwapToastByContent = false;
709
710 /*
711 * Compute xids used to freeze and weed out dead tuples and multixacts.
712 * Since we're going to rewrite the whole table anyway, there's no reason
713 * not to be aggressive about this.
714 */
715 vacuum_set_xid_limits(OldHeap,
716 0,
717 0,
718 0,
719 0,
720 &OldestXmin,
721 &FreezeXid,
722 NULL,
723 &MultiXactCutoff,
724 NULL);
725
726 /*
727 * FreezeXid will become the table's new relfrozenxid, and that mustn't go
728 * backwards, so take the max.
729 */
730 if (TransactionIdPrecedes(FreezeXid, OldHeap->rd_rel->relfrozenxid))
731 FreezeXid = OldHeap->rd_rel->relfrozenxid;
732
733 /*
734 * MultiXactCutoff, similarly, shouldn't go backwards either.
735 */
736 if (MultiXactIdPrecedes(MultiXactCutoff, OldHeap->rd_rel->relminmxid))
737 MultiXactCutoff = OldHeap->rd_rel->relminmxid;
738
739 /* return selected values to caller */
740 *pFreezeXid = FreezeXid;
741 *pCutoffMulti = MultiXactCutoff;
742
743 /*
744 * We know how to use a sort to duplicate the ordering of a btree index,
745 * and will use seqscan-and-sort for that. Otherwise, always use an
746 * indexscan for other indexes or plain seqscan if no index is supplied.
747 */
748 if (OldIndex != NULL && OldIndex->rd_rel->relam == BTREE_AM_OID)
749 use_sort = true;
750 else
751 use_sort = false;
752
753 /* Log what we're doing */
754 if (OldIndex != NULL && !use_sort)
755 ereport(elevel,
756 (errmsg("reordering \"%s.%s\" using index scan on \"%s\"",
757 get_namespace_name(RelationGetNamespace(OldHeap)),
758 RelationGetRelationName(OldHeap),
759 RelationGetRelationName(OldIndex))));
760 else if (use_sort)
761 ereport(elevel,
762 (errmsg("reordering \"%s.%s\" using sequential scan and sort",
763 get_namespace_name(RelationGetNamespace(OldHeap)),
764 RelationGetRelationName(OldHeap))));
765 else
766 ereport(ERROR,
767 (errmsg("tried to use a reorder without an index \"%s.%s\"",
768 get_namespace_name(RelationGetNamespace(OldHeap)),
769 RelationGetRelationName(OldHeap))));
770
771 table_relation_copy_for_cluster(OldHeap,
772 NewHeap,
773 OldIndex,
774 use_sort,
775 OldestXmin,
776 &FreezeXid,
777 &MultiXactCutoff,
778 &num_tuples,
779 &tups_vacuumed,
780 &tups_recently_dead);
781
782 /* Reset rd_toastoid just to be tidy --- it shouldn't be looked at again */
783 NewHeap->rd_toastoid = InvalidOid;
784
785 num_pages = RelationGetNumberOfBlocks(NewHeap);
786
787 /* Log what we did */
788 ereport(elevel,
789 (errmsg("\"%s\": found %.0f removable, %.0f nonremovable row versions in %u pages",
790 RelationGetRelationName(OldHeap),
791 tups_vacuumed,
792 num_tuples,
793 RelationGetNumberOfBlocks(OldHeap)),
794 errdetail("%.0f dead row versions cannot be removed yet.\n"
795 "%s.",
796 tups_recently_dead,
797 pg_rusage_show(&ru0))));
798
799 /* Clean up */
800 pfree(values);
801 pfree(isnull);
802
803 if (OldIndex != NULL)
804 index_close(OldIndex, NoLock);
805 table_close(OldHeap, NoLock);
806 table_close(NewHeap, NoLock);
807
808 /* Update pg_class to reflect the correct values of pages and tuples. */
809 relRelation = table_open(RelationRelationId, RowExclusiveLock);
810
811 reltup = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(OIDNewHeap));
812 if (!HeapTupleIsValid(reltup))
813 elog(ERROR, "cache lookup failed for relation %u", OIDNewHeap);
814 relform = (Form_pg_class) GETSTRUCT(reltup);
815
816 relform->relpages = num_pages;
817 relform->reltuples = num_tuples;
818
819 /* Don't update the stats for pg_class. See swap_relation_files. */
820 Assert(OIDOldHeap != RelationRelationId);
821 CacheInvalidateRelcacheByTuple(reltup);
822
823 /* Clean up. */
824 heap_freetuple(reltup);
825 table_close(relRelation, RowExclusiveLock);
826
827 /* Make the update visible */
828 CommandCounterIncrement();
829 }
830
831 /*
832 * Remove the transient table that was built by make_new_heap, and finish
833 * cleaning up (including rebuilding all indexes on the old heap).
834 *
835 * NB: new_index_oids must be in the same order as RelationGetIndexList
836 *
837 */
838 static void
finish_heap_swaps(Oid OIDOldHeap,Oid OIDNewHeap,List * old_index_oids,List * new_index_oids,bool swap_toast_by_content,bool is_internal,TransactionId frozenXid,MultiXactId cutoffMulti,Oid wait_id)839 finish_heap_swaps(Oid OIDOldHeap, Oid OIDNewHeap, List *old_index_oids, List *new_index_oids,
840 bool swap_toast_by_content, bool is_internal, TransactionId frozenXid,
841 MultiXactId cutoffMulti, Oid wait_id)
842 {
843 ObjectAddress object;
844 Relation oldHeapRel;
845 ListCell *old_index_cell;
846 ListCell *new_index_cell;
847 int config_change;
848
849 #ifdef DEBUG
850
851 /*
852 * For debug purposes we serialize against wait_id if it exists, this
853 * allows us to "pause" reorder immediately before swapping in the new
854 * table
855 */
856 if (OidIsValid(wait_id))
857 {
858 Relation waiter = table_open(wait_id, AccessExclusiveLock);
859
860 table_close(waiter, AccessExclusiveLock);
861 }
862 #endif
863
864 /*
865 * There's a risk of deadlock if some other process is also trying to
866 * upgrade their lock in the same manner as us, at this time. Since our
867 * transaction has performed a large amount of work, and only needs to be
868 * run once per chunk, we do not want to abort it due to this deadlock. To
869 * prevent abort we set our `deadlock_timeout` to a large value in the
870 * expectation that the other process will timeout and abort first.
871 * Currently we set `deadlock_timeout` to 1 hour, as this should be longer
872 * than any other normal process, while still allowing the system to make
873 * progress in the event of a real deadlock. As this is the last lock we
874 * grab, and the setting is local to our transaction we do not bother
875 * changing the guc back.
876 */
877 config_change = set_config_option("deadlock_timeout",
878 REORDER_ACCESS_EXCLUSIVE_DEADLOCK_TIMEOUT,
879 PGC_SUSET,
880 PGC_S_SESSION,
881 GUC_ACTION_LOCAL,
882 true,
883 0,
884 false);
885
886 if (config_change == 0)
887 ereport(ERROR,
888 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
889 errmsg("deadlock_timeout guc does not exist.")));
890 else if (config_change < 0)
891 ereport(ERROR,
892 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
893 errmsg("could not set deadlock_timeout guc.")));
894
895 oldHeapRel = table_open(OIDOldHeap, AccessExclusiveLock);
896
897 /*
898 * All predicate locks on the tuples or pages are about to be made
899 * invalid, because we move tuples around. Promote them to relation
900 * locks. Predicate locks on indexes will be promoted when they are
901 * reindexed.
902 */
903 TransferPredicateLocksToHeapRelation(oldHeapRel);
904
905 /*
906 * Swap the contents of the heap relations (including any toast tables).
907 * Also set old heap's relfrozenxid to frozenXid.
908 */
909 swap_relation_files(OIDOldHeap,
910 OIDNewHeap,
911 swap_toast_by_content,
912 is_internal,
913 frozenXid,
914 cutoffMulti);
915
916 /* Swap the contents of the indexes */
917 Assert(list_length(old_index_oids) == list_length(new_index_oids));
918 forboth (old_index_cell, old_index_oids, new_index_cell, new_index_oids)
919 {
920 Oid old_index_oid = lfirst_oid(old_index_cell);
921 Oid new_index_oid = lfirst_oid(new_index_cell);
922
923 swap_relation_files(old_index_oid,
924 new_index_oid,
925 swap_toast_by_content,
926 true,
927 frozenXid,
928 cutoffMulti);
929 }
930 table_close(oldHeapRel, NoLock);
931
932 CommandCounterIncrement();
933
934 /* Destroy new heap with old filenode */
935 object.classId = RelationRelationId;
936 object.objectId = OIDNewHeap;
937 object.objectSubId = 0;
938
939 /*
940 * The new relation is local to our transaction and we know nothing
941 * depends on it, so DROP_RESTRICT should be OK.
942 */
943 performDeletion(&object, DROP_RESTRICT, PERFORM_DELETION_INTERNAL);
944
945 /* performDeletion does CommandCounterIncrement at end */
946
947 /*
948 * At this point, everything is kosher except that, if we did toast swap
949 * by links, the toast table's name corresponds to the transient table.
950 * The name is irrelevant to the backend because it's referenced by OID,
951 * but users looking at the catalogs could be confused. Rename it to
952 * prevent this problem.
953 *
954 * Note no lock required on the relation, because we already hold an
955 * exclusive lock on it.
956 */
957 if (!swap_toast_by_content)
958 {
959 Relation newrel;
960
961 newrel = table_open(OIDOldHeap, NoLock);
962 if (OidIsValid(newrel->rd_rel->reltoastrelid))
963 {
964 Oid toastidx;
965 char NewToastName[NAMEDATALEN];
966
967 /* Get the associated valid index to be renamed */
968 toastidx = toast_get_valid_index(newrel->rd_rel->reltoastrelid, AccessShareLock);
969
970 /* rename the toast table ... */
971 snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u", OIDOldHeap);
972 RenameRelationInternal(newrel->rd_rel->reltoastrelid, NewToastName, true, false);
973
974 /* ... and its valid index too. */
975 snprintf(NewToastName, NAMEDATALEN, "pg_toast_%u_index", OIDOldHeap);
976
977 RenameRelationInternal(toastidx, NewToastName, true, true);
978 }
979 table_close(newrel, NoLock);
980 }
981
982 /* it's not a catalog table, clear any missing attribute settings */
983 {
984 Relation newrel;
985
986 newrel = table_open(OIDOldHeap, NoLock);
987 RelationClearMissing(newrel);
988 table_close(newrel, NoLock);
989 }
990 }
991
992 /*
993 * Swap the physical files of two given relations.
994 *
995 * We swap the physical identity (reltablespace, relfilenode) while keeping the
996 * same logical identities of the two relations. relpersistence is also
997 * swapped, which is critical since it determines where buffers live for each
998 * relation.
999 *
1000 * We can swap associated TOAST data in either of two ways: recursively swap
1001 * the physical content of the toast tables (and their indexes), or swap the
1002 * TOAST links in the given relations' pg_class entries. The latter is the only
1003 * way to handle cases in which a toast table is added or removed altogether.
1004 *
1005 * Additionally, the first relation is marked with relfrozenxid set to
1006 * frozenXid. It seems a bit ugly to have this here, but the caller would
1007 * have to do it anyway, so having it here saves a heap_update. Note: in
1008 * the swap-toast-links case, we assume we don't need to change the toast
1009 * table's relfrozenxid: the new version of the toast table should already
1010 * have relfrozenxid set to RecentXmin, which is good enough.
1011 *
1012 */
1013 static void
swap_relation_files(Oid r1,Oid r2,bool swap_toast_by_content,bool is_internal,TransactionId frozenXid,MultiXactId cutoffMulti)1014 swap_relation_files(Oid r1, Oid r2, bool swap_toast_by_content, bool is_internal,
1015 TransactionId frozenXid, MultiXactId cutoffMulti)
1016 {
1017 Relation relRelation;
1018 HeapTuple reltup1, reltup2;
1019 Form_pg_class relform1, relform2;
1020 Oid relfilenode1, relfilenode2;
1021 Oid swaptemp;
1022 char swptmpchr;
1023
1024 /* We need writable copies of both pg_class tuples. */
1025 relRelation = table_open(RelationRelationId, RowExclusiveLock);
1026
1027 reltup1 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r1));
1028 if (!HeapTupleIsValid(reltup1))
1029 elog(ERROR, "cache lookup failed for relation %u", r1);
1030 relform1 = (Form_pg_class) GETSTRUCT(reltup1);
1031
1032 reltup2 = SearchSysCacheCopy1(RELOID, ObjectIdGetDatum(r2));
1033 if (!HeapTupleIsValid(reltup2))
1034 elog(ERROR, "cache lookup failed for relation %u", r2);
1035 relform2 = (Form_pg_class) GETSTRUCT(reltup2);
1036
1037 relfilenode1 = relform1->relfilenode;
1038 relfilenode2 = relform2->relfilenode;
1039
1040 if (!OidIsValid(relfilenode1) || !OidIsValid(relfilenode2))
1041 elog(ERROR, "cannot reorder mapped relation \"%s\".", NameStr(relform1->relname));
1042
1043 /* swap relfilenodes, reltablespaces, relpersistence */
1044
1045 swaptemp = relform1->relfilenode;
1046 relform1->relfilenode = relform2->relfilenode;
1047 relform2->relfilenode = swaptemp;
1048
1049 swaptemp = relform1->reltablespace;
1050 relform1->reltablespace = relform2->reltablespace;
1051 relform2->reltablespace = swaptemp;
1052
1053 swptmpchr = relform1->relpersistence;
1054 relform1->relpersistence = relform2->relpersistence;
1055 relform2->relpersistence = swptmpchr;
1056
1057 /* Also swap toast links, if we're swapping by links */
1058 if (!swap_toast_by_content)
1059 {
1060 swaptemp = relform1->reltoastrelid;
1061 relform1->reltoastrelid = relform2->reltoastrelid;
1062 relform2->reltoastrelid = swaptemp;
1063 }
1064
1065 /* set rel1's frozen Xid and minimum MultiXid */
1066 if (relform1->relkind != RELKIND_INDEX)
1067 {
1068 Assert(TransactionIdIsNormal(frozenXid));
1069 relform1->relfrozenxid = frozenXid;
1070 Assert(MultiXactIdIsValid(cutoffMulti));
1071 relform1->relminmxid = cutoffMulti;
1072 }
1073
1074 /* swap size statistics too, since new rel has freshly-updated stats */
1075 {
1076 int32 swap_pages;
1077 float4 swap_tuples;
1078 int32 swap_allvisible;
1079
1080 swap_pages = relform1->relpages;
1081 relform1->relpages = relform2->relpages;
1082 relform2->relpages = swap_pages;
1083
1084 swap_tuples = relform1->reltuples;
1085 relform1->reltuples = relform2->reltuples;
1086 relform2->reltuples = swap_tuples;
1087
1088 swap_allvisible = relform1->relallvisible;
1089 relform1->relallvisible = relform2->relallvisible;
1090 relform2->relallvisible = swap_allvisible;
1091 }
1092
1093 /* Update the tuples in pg_class. */
1094 {
1095 CatalogIndexState indstate;
1096 indstate = CatalogOpenIndexes(relRelation);
1097 CatalogTupleUpdateWithInfo(relRelation, &reltup1->t_self, reltup1, indstate);
1098 CatalogTupleUpdateWithInfo(relRelation, &reltup2->t_self, reltup2, indstate);
1099 CatalogCloseIndexes(indstate);
1100 }
1101
1102 /*
1103 * Post alter hook for modified relations. The change to r2 is always
1104 * internal, but r1 depends on the invocation context.
1105 */
1106 InvokeObjectPostAlterHookArg(RelationRelationId, r1, 0, InvalidOid, is_internal);
1107 InvokeObjectPostAlterHookArg(RelationRelationId, r2, 0, InvalidOid, true);
1108
1109 /*
1110 * If we have toast tables associated with the relations being swapped,
1111 * deal with them too.
1112 */
1113 if (relform1->reltoastrelid || relform2->reltoastrelid)
1114 {
1115 if (swap_toast_by_content)
1116 {
1117 if (relform1->reltoastrelid && relform2->reltoastrelid)
1118 {
1119 /* Recursively swap the contents of the toast tables */
1120 swap_relation_files(relform1->reltoastrelid,
1121 relform2->reltoastrelid,
1122 swap_toast_by_content,
1123 is_internal,
1124 frozenXid,
1125 cutoffMulti);
1126 }
1127 else
1128 {
1129 /* caller messed up */
1130 elog(ERROR, "cannot swap toast files by content when there's only one");
1131 }
1132 }
1133 else
1134 {
1135 /*
1136 * We swapped the ownership links, so we need to change dependency
1137 * data to match.
1138 *
1139 * NOTE: it is possible that only one table has a toast table.
1140 *
1141 * NOTE: at present, a TOAST table's only dependency is the one on
1142 * its owning table. If more are ever created, we'd need to use
1143 * something more selective than deleteDependencyRecordsFor() to
1144 * get rid of just the link we want.
1145 */
1146 ObjectAddress baseobject, toastobject;
1147 long count;
1148
1149 /*
1150 * The original code disallowed this case for system catalogs. We
1151 * don't allow reordering system catalogs, but Assert anyway
1152 */
1153 Assert(!IsSystemClass(r1, relform1));
1154
1155 /* Delete old dependencies */
1156 if (relform1->reltoastrelid)
1157 {
1158 count =
1159 deleteDependencyRecordsFor(RelationRelationId, relform1->reltoastrelid, false);
1160 if (count != 1)
1161 elog(ERROR, "expected one dependency record for TOAST table, found %ld", count);
1162 }
1163 if (relform2->reltoastrelid)
1164 {
1165 count =
1166 deleteDependencyRecordsFor(RelationRelationId, relform2->reltoastrelid, false);
1167 if (count != 1)
1168 elog(ERROR, "expected one dependency record for TOAST table, found %ld", count);
1169 }
1170
1171 /* Register new dependencies */
1172 baseobject.classId = RelationRelationId;
1173 baseobject.objectSubId = 0;
1174 toastobject.classId = RelationRelationId;
1175 toastobject.objectSubId = 0;
1176
1177 if (relform1->reltoastrelid)
1178 {
1179 baseobject.objectId = r1;
1180 toastobject.objectId = relform1->reltoastrelid;
1181 recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
1182 }
1183
1184 if (relform2->reltoastrelid)
1185 {
1186 baseobject.objectId = r2;
1187 toastobject.objectId = relform2->reltoastrelid;
1188 recordDependencyOn(&toastobject, &baseobject, DEPENDENCY_INTERNAL);
1189 }
1190 }
1191 }
1192
1193 /*
1194 * If we're swapping two toast tables by content, do the same for their
1195 * valid index. The swap can actually be safely done only if the relations
1196 * have indexes.
1197 */
1198 if (swap_toast_by_content && relform1->relkind == RELKIND_TOASTVALUE &&
1199 relform2->relkind == RELKIND_TOASTVALUE)
1200 {
1201 Oid toastIndex1, toastIndex2;
1202
1203 /* Get valid index for each relation */
1204 toastIndex1 = toast_get_valid_index(r1, AccessExclusiveLock);
1205 toastIndex2 = toast_get_valid_index(r2, AccessExclusiveLock);
1206
1207 swap_relation_files(toastIndex1,
1208 toastIndex2,
1209 swap_toast_by_content,
1210 is_internal,
1211 InvalidTransactionId,
1212 InvalidMultiXactId);
1213 }
1214
1215 /* Clean up. */
1216 heap_freetuple(reltup1);
1217 heap_freetuple(reltup2);
1218
1219 table_close(relRelation, RowExclusiveLock);
1220
1221 /*
1222 * Close both relcache entries' smgr links. We need this kludge because
1223 * both links will be invalidated during upcoming CommandCounterIncrement.
1224 * Whichever of the rels is the second to be cleared will have a dangling
1225 * reference to the other's smgr entry. Rather than trying to avoid this
1226 * by ordering operations just so, it's easiest to close the links first.
1227 * (Fortunately, since one of the entries is local in our transaction,
1228 * it's sufficient to clear out our own relcache this way; the problem
1229 * cannot arise for other backends when they see our update on the
1230 * non-transient relation.)
1231 *
1232 * Caution: the placement of this step interacts with the decision to
1233 * handle toast rels by recursion. When we are trying to rebuild pg_class
1234 * itself, the smgr close on pg_class must happen after all accesses in
1235 * this function.
1236 */
1237 RelationCloseSmgrByOid(r1);
1238 RelationCloseSmgrByOid(r2);
1239 }
1240