1 /*-------------------------------------------------------------------------
2  *
3  * slotfuncs.c
4  *	   Support functions for replication slots
5  *
6  * Copyright (c) 2012-2020, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *	  src/backend/replication/slotfuncs.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "access/htup_details.h"
16 #include "access/xlog_internal.h"
17 #include "access/xlogutils.h"
18 #include "funcapi.h"
19 #include "miscadmin.h"
20 #include "replication/decode.h"
21 #include "replication/logical.h"
22 #include "replication/slot.h"
23 #include "utils/builtins.h"
24 #include "utils/inval.h"
25 #include "utils/pg_lsn.h"
26 #include "utils/resowner.h"
27 
28 static void
check_permissions(void)29 check_permissions(void)
30 {
31 	if (!superuser() && !has_rolreplication(GetUserId()))
32 		ereport(ERROR,
33 				(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
34 				 errmsg("must be superuser or replication role to use replication slots")));
35 }
36 
37 /*
38  * Helper function for creating a new physical replication slot with
39  * given arguments. Note that this function doesn't release the created
40  * slot.
41  *
42  * If restart_lsn is a valid value, we use it without WAL reservation
43  * routine. So the caller must guarantee that WAL is available.
44  */
45 static void
create_physical_replication_slot(char * name,bool immediately_reserve,bool temporary,XLogRecPtr restart_lsn)46 create_physical_replication_slot(char *name, bool immediately_reserve,
47 								 bool temporary, XLogRecPtr restart_lsn)
48 {
49 	Assert(!MyReplicationSlot);
50 
51 	/* acquire replication slot, this will check for conflicting names */
52 	ReplicationSlotCreate(name, false,
53 						  temporary ? RS_TEMPORARY : RS_PERSISTENT);
54 
55 	if (immediately_reserve)
56 	{
57 		/* Reserve WAL as the user asked for it */
58 		if (XLogRecPtrIsInvalid(restart_lsn))
59 			ReplicationSlotReserveWal();
60 		else
61 			MyReplicationSlot->data.restart_lsn = restart_lsn;
62 
63 		/* Write this slot to disk */
64 		ReplicationSlotMarkDirty();
65 		ReplicationSlotSave();
66 	}
67 }
68 
69 /*
70  * SQL function for creating a new physical (streaming replication)
71  * replication slot.
72  */
73 Datum
pg_create_physical_replication_slot(PG_FUNCTION_ARGS)74 pg_create_physical_replication_slot(PG_FUNCTION_ARGS)
75 {
76 	Name		name = PG_GETARG_NAME(0);
77 	bool		immediately_reserve = PG_GETARG_BOOL(1);
78 	bool		temporary = PG_GETARG_BOOL(2);
79 	Datum		values[2];
80 	bool		nulls[2];
81 	TupleDesc	tupdesc;
82 	HeapTuple	tuple;
83 	Datum		result;
84 
85 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
86 		elog(ERROR, "return type must be a row type");
87 
88 	check_permissions();
89 
90 	CheckSlotRequirements();
91 
92 	create_physical_replication_slot(NameStr(*name),
93 									 immediately_reserve,
94 									 temporary,
95 									 InvalidXLogRecPtr);
96 
97 	values[0] = NameGetDatum(&MyReplicationSlot->data.name);
98 	nulls[0] = false;
99 
100 	if (immediately_reserve)
101 	{
102 		values[1] = LSNGetDatum(MyReplicationSlot->data.restart_lsn);
103 		nulls[1] = false;
104 	}
105 	else
106 		nulls[1] = true;
107 
108 	tuple = heap_form_tuple(tupdesc, values, nulls);
109 	result = HeapTupleGetDatum(tuple);
110 
111 	ReplicationSlotRelease();
112 
113 	PG_RETURN_DATUM(result);
114 }
115 
116 
117 /*
118  * Helper function for creating a new logical replication slot with
119  * given arguments. Note that this function doesn't release the created
120  * slot.
121  *
122  * When find_startpoint is false, the slot's confirmed_flush is not set; it's
123  * caller's responsibility to ensure it's set to something sensible.
124  */
125 static void
create_logical_replication_slot(char * name,char * plugin,bool temporary,XLogRecPtr restart_lsn,bool find_startpoint)126 create_logical_replication_slot(char *name, char *plugin,
127 								bool temporary, XLogRecPtr restart_lsn,
128 								bool find_startpoint)
129 {
130 	LogicalDecodingContext *ctx = NULL;
131 
132 	Assert(!MyReplicationSlot);
133 
134 	/*
135 	 * Acquire a logical decoding slot, this will check for conflicting names.
136 	 * Initially create persistent slot as ephemeral - that allows us to
137 	 * nicely handle errors during initialization because it'll get dropped if
138 	 * this transaction fails. We'll make it persistent at the end. Temporary
139 	 * slots can be created as temporary from beginning as they get dropped on
140 	 * error as well.
141 	 */
142 	ReplicationSlotCreate(name, true,
143 						  temporary ? RS_TEMPORARY : RS_EPHEMERAL);
144 
145 	/*
146 	 * Create logical decoding context to find start point or, if we don't
147 	 * need it, to 1) bump slot's restart_lsn and xmin 2) check plugin sanity.
148 	 *
149 	 * Note: when !find_startpoint this is still important, because it's at
150 	 * this point that the output plugin is validated.
151 	 */
152 	ctx = CreateInitDecodingContext(plugin, NIL,
153 									false,	/* just catalogs is OK */
154 									restart_lsn,
155 									XL_ROUTINE(.page_read = read_local_xlog_page,
156 											   .segment_open = wal_segment_open,
157 											   .segment_close = wal_segment_close),
158 									NULL, NULL, NULL);
159 
160 	/*
161 	 * If caller needs us to determine the decoding start point, do so now.
162 	 * This might take a while.
163 	 */
164 	if (find_startpoint)
165 		DecodingContextFindStartpoint(ctx);
166 
167 	/* don't need the decoding context anymore */
168 	FreeDecodingContext(ctx);
169 }
170 
171 /*
172  * SQL function for creating a new logical replication slot.
173  */
174 Datum
pg_create_logical_replication_slot(PG_FUNCTION_ARGS)175 pg_create_logical_replication_slot(PG_FUNCTION_ARGS)
176 {
177 	Name		name = PG_GETARG_NAME(0);
178 	Name		plugin = PG_GETARG_NAME(1);
179 	bool		temporary = PG_GETARG_BOOL(2);
180 	Datum		result;
181 	TupleDesc	tupdesc;
182 	HeapTuple	tuple;
183 	Datum		values[2];
184 	bool		nulls[2];
185 
186 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
187 		elog(ERROR, "return type must be a row type");
188 
189 	check_permissions();
190 
191 	CheckLogicalDecodingRequirements();
192 
193 	create_logical_replication_slot(NameStr(*name),
194 									NameStr(*plugin),
195 									temporary,
196 									InvalidXLogRecPtr,
197 									true);
198 
199 	values[0] = NameGetDatum(&MyReplicationSlot->data.name);
200 	values[1] = LSNGetDatum(MyReplicationSlot->data.confirmed_flush);
201 
202 	memset(nulls, 0, sizeof(nulls));
203 
204 	tuple = heap_form_tuple(tupdesc, values, nulls);
205 	result = HeapTupleGetDatum(tuple);
206 
207 	/* ok, slot is now fully created, mark it as persistent if needed */
208 	if (!temporary)
209 		ReplicationSlotPersist();
210 	ReplicationSlotRelease();
211 
212 	PG_RETURN_DATUM(result);
213 }
214 
215 
216 /*
217  * SQL function for dropping a replication slot.
218  */
219 Datum
pg_drop_replication_slot(PG_FUNCTION_ARGS)220 pg_drop_replication_slot(PG_FUNCTION_ARGS)
221 {
222 	Name		name = PG_GETARG_NAME(0);
223 
224 	check_permissions();
225 
226 	CheckSlotRequirements();
227 
228 	ReplicationSlotDrop(NameStr(*name), true);
229 
230 	PG_RETURN_VOID();
231 }
232 
233 /*
234  * pg_get_replication_slots - SQL SRF showing active replication slots.
235  */
236 Datum
pg_get_replication_slots(PG_FUNCTION_ARGS)237 pg_get_replication_slots(PG_FUNCTION_ARGS)
238 {
239 #define PG_GET_REPLICATION_SLOTS_COLS 13
240 	ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo;
241 	TupleDesc	tupdesc;
242 	Tuplestorestate *tupstore;
243 	MemoryContext per_query_ctx;
244 	MemoryContext oldcontext;
245 	XLogRecPtr	currlsn;
246 	int			slotno;
247 
248 	/* check to see if caller supports us returning a tuplestore */
249 	if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo))
250 		ereport(ERROR,
251 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
252 				 errmsg("set-valued function called in context that cannot accept a set")));
253 	if (!(rsinfo->allowedModes & SFRM_Materialize))
254 		ereport(ERROR,
255 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
256 				 errmsg("materialize mode required, but it is not allowed in this context")));
257 
258 	/* Build a tuple descriptor for our result type */
259 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
260 		elog(ERROR, "return type must be a row type");
261 
262 	/*
263 	 * We don't require any special permission to see this function's data
264 	 * because nothing should be sensitive. The most critical being the slot
265 	 * name, which shouldn't contain anything particularly sensitive.
266 	 */
267 
268 	per_query_ctx = rsinfo->econtext->ecxt_per_query_memory;
269 	oldcontext = MemoryContextSwitchTo(per_query_ctx);
270 
271 	tupstore = tuplestore_begin_heap(true, false, work_mem);
272 	rsinfo->returnMode = SFRM_Materialize;
273 	rsinfo->setResult = tupstore;
274 	rsinfo->setDesc = tupdesc;
275 
276 	MemoryContextSwitchTo(oldcontext);
277 
278 	currlsn = GetXLogWriteRecPtr();
279 
280 	LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
281 	for (slotno = 0; slotno < max_replication_slots; slotno++)
282 	{
283 		ReplicationSlot *slot = &ReplicationSlotCtl->replication_slots[slotno];
284 		ReplicationSlot slot_contents;
285 		Datum		values[PG_GET_REPLICATION_SLOTS_COLS];
286 		bool		nulls[PG_GET_REPLICATION_SLOTS_COLS];
287 		WALAvailability walstate;
288 		int			i;
289 
290 		if (!slot->in_use)
291 			continue;
292 
293 		/* Copy slot contents while holding spinlock, then examine at leisure */
294 		SpinLockAcquire(&slot->mutex);
295 		slot_contents = *slot;
296 		SpinLockRelease(&slot->mutex);
297 
298 		memset(values, 0, sizeof(values));
299 		memset(nulls, 0, sizeof(nulls));
300 
301 		i = 0;
302 		values[i++] = NameGetDatum(&slot_contents.data.name);
303 
304 		if (slot_contents.data.database == InvalidOid)
305 			nulls[i++] = true;
306 		else
307 			values[i++] = NameGetDatum(&slot_contents.data.plugin);
308 
309 		if (slot_contents.data.database == InvalidOid)
310 			values[i++] = CStringGetTextDatum("physical");
311 		else
312 			values[i++] = CStringGetTextDatum("logical");
313 
314 		if (slot_contents.data.database == InvalidOid)
315 			nulls[i++] = true;
316 		else
317 			values[i++] = ObjectIdGetDatum(slot_contents.data.database);
318 
319 		values[i++] = BoolGetDatum(slot_contents.data.persistency == RS_TEMPORARY);
320 		values[i++] = BoolGetDatum(slot_contents.active_pid != 0);
321 
322 		if (slot_contents.active_pid != 0)
323 			values[i++] = Int32GetDatum(slot_contents.active_pid);
324 		else
325 			nulls[i++] = true;
326 
327 		if (slot_contents.data.xmin != InvalidTransactionId)
328 			values[i++] = TransactionIdGetDatum(slot_contents.data.xmin);
329 		else
330 			nulls[i++] = true;
331 
332 		if (slot_contents.data.catalog_xmin != InvalidTransactionId)
333 			values[i++] = TransactionIdGetDatum(slot_contents.data.catalog_xmin);
334 		else
335 			nulls[i++] = true;
336 
337 		if (slot_contents.data.restart_lsn != InvalidXLogRecPtr)
338 			values[i++] = LSNGetDatum(slot_contents.data.restart_lsn);
339 		else
340 			nulls[i++] = true;
341 
342 		if (slot_contents.data.confirmed_flush != InvalidXLogRecPtr)
343 			values[i++] = LSNGetDatum(slot_contents.data.confirmed_flush);
344 		else
345 			nulls[i++] = true;
346 
347 		/*
348 		 * If invalidated_at is valid and restart_lsn is invalid, we know for
349 		 * certain that the slot has been invalidated.  Otherwise, test
350 		 * availability from restart_lsn.
351 		 */
352 		if (XLogRecPtrIsInvalid(slot_contents.data.restart_lsn) &&
353 			!XLogRecPtrIsInvalid(slot_contents.data.invalidated_at))
354 			walstate = WALAVAIL_REMOVED;
355 		else
356 			walstate = GetWALAvailability(slot_contents.data.restart_lsn);
357 
358 		switch (walstate)
359 		{
360 			case WALAVAIL_INVALID_LSN:
361 				nulls[i++] = true;
362 				break;
363 
364 			case WALAVAIL_RESERVED:
365 				values[i++] = CStringGetTextDatum("reserved");
366 				break;
367 
368 			case WALAVAIL_EXTENDED:
369 				values[i++] = CStringGetTextDatum("extended");
370 				break;
371 
372 			case WALAVAIL_UNRESERVED:
373 				values[i++] = CStringGetTextDatum("unreserved");
374 				break;
375 
376 			case WALAVAIL_REMOVED:
377 
378 				/*
379 				 * If we read the restart_lsn long enough ago, maybe that file
380 				 * has been removed by now.  However, the walsender could have
381 				 * moved forward enough that it jumped to another file after
382 				 * we looked.  If checkpointer signalled the process to
383 				 * termination, then it's definitely lost; but if a process is
384 				 * still alive, then "unreserved" seems more appropriate.
385 				 *
386 				 * If we do change it, save the state for safe_wal_size below.
387 				 */
388 				if (!XLogRecPtrIsInvalid(slot_contents.data.restart_lsn))
389 				{
390 					int			pid;
391 
392 					SpinLockAcquire(&slot->mutex);
393 					pid = slot->active_pid;
394 					slot_contents.data.restart_lsn = slot->data.restart_lsn;
395 					SpinLockRelease(&slot->mutex);
396 					if (pid != 0)
397 					{
398 						values[i++] = CStringGetTextDatum("unreserved");
399 						walstate = WALAVAIL_UNRESERVED;
400 						break;
401 					}
402 				}
403 				values[i++] = CStringGetTextDatum("lost");
404 				break;
405 		}
406 
407 		/*
408 		 * safe_wal_size is only computed for slots that have not been lost,
409 		 * and only if there's a configured maximum size.
410 		 */
411 		if (walstate == WALAVAIL_REMOVED || max_slot_wal_keep_size_mb < 0)
412 			nulls[i++] = true;
413 		else
414 		{
415 			XLogSegNo   targetSeg;
416 			uint64   slotKeepSegs;
417 			uint64   keepSegs;
418 			XLogSegNo   failSeg;
419 			XLogRecPtr  failLSN;
420 
421 			XLByteToSeg(slot_contents.data.restart_lsn, targetSeg, wal_segment_size);
422 
423 			/* determine how many segments slots can be kept by slots */
424 			slotKeepSegs = XLogMBVarToSegs(max_slot_wal_keep_size_mb, wal_segment_size);
425 			/* ditto for wal_keep_size */
426 			keepSegs = XLogMBVarToSegs(wal_keep_size_mb, wal_segment_size);
427 
428 			/* if currpos reaches failLSN, we lose our segment */
429 			failSeg = targetSeg + Max(slotKeepSegs, keepSegs) + 1;
430 			XLogSegNoOffsetToRecPtr(failSeg, 0, wal_segment_size, failLSN);
431 
432 			values[i++] = Int64GetDatum(failLSN - currlsn);
433 		}
434 
435 		Assert(i == PG_GET_REPLICATION_SLOTS_COLS);
436 
437 		tuplestore_putvalues(tupstore, tupdesc, values, nulls);
438 	}
439 
440 	LWLockRelease(ReplicationSlotControlLock);
441 
442 	tuplestore_donestoring(tupstore);
443 
444 	return (Datum) 0;
445 }
446 
447 /*
448  * Helper function for advancing our physical replication slot forward.
449  *
450  * The LSN position to move to is compared simply to the slot's restart_lsn,
451  * knowing that any position older than that would be removed by successive
452  * checkpoints.
453  */
454 static XLogRecPtr
pg_physical_replication_slot_advance(XLogRecPtr moveto)455 pg_physical_replication_slot_advance(XLogRecPtr moveto)
456 {
457 	XLogRecPtr	startlsn = MyReplicationSlot->data.restart_lsn;
458 	XLogRecPtr	retlsn = startlsn;
459 
460 	Assert(moveto != InvalidXLogRecPtr);
461 
462 	if (startlsn < moveto)
463 	{
464 		SpinLockAcquire(&MyReplicationSlot->mutex);
465 		MyReplicationSlot->data.restart_lsn = moveto;
466 		SpinLockRelease(&MyReplicationSlot->mutex);
467 		retlsn = moveto;
468 
469 		/*
470 		 * Dirty the slot so as it is written out at the next checkpoint. Note
471 		 * that the LSN position advanced may still be lost in the event of a
472 		 * crash, but this makes the data consistent after a clean shutdown.
473 		 */
474 		ReplicationSlotMarkDirty();
475 	}
476 
477 	return retlsn;
478 }
479 
480 /*
481  * Helper function for advancing our logical replication slot forward.
482  *
483  * The slot's restart_lsn is used as start point for reading records, while
484  * confirmed_flush is used as base point for the decoding context.
485  *
486  * We cannot just do LogicalConfirmReceivedLocation to update confirmed_flush,
487  * because we need to digest WAL to advance restart_lsn allowing to recycle
488  * WAL and removal of old catalog tuples.  As decoding is done in fast_forward
489  * mode, no changes are generated anyway.
490  */
491 static XLogRecPtr
pg_logical_replication_slot_advance(XLogRecPtr moveto)492 pg_logical_replication_slot_advance(XLogRecPtr moveto)
493 {
494 	LogicalDecodingContext *ctx;
495 	ResourceOwner old_resowner = CurrentResourceOwner;
496 	XLogRecPtr	retlsn;
497 
498 	Assert(moveto != InvalidXLogRecPtr);
499 
500 	PG_TRY();
501 	{
502 		/*
503 		 * Create our decoding context in fast_forward mode, passing start_lsn
504 		 * as InvalidXLogRecPtr, so that we start processing from my slot's
505 		 * confirmed_flush.
506 		 */
507 		ctx = CreateDecodingContext(InvalidXLogRecPtr,
508 									NIL,
509 									true,	/* fast_forward */
510 									XL_ROUTINE(.page_read = read_local_xlog_page,
511 											   .segment_open = wal_segment_open,
512 											   .segment_close = wal_segment_close),
513 									NULL, NULL, NULL);
514 
515 		/*
516 		 * Start reading at the slot's restart_lsn, which we know to point to
517 		 * a valid record.
518 		 */
519 		XLogBeginRead(ctx->reader, MyReplicationSlot->data.restart_lsn);
520 
521 		/* Initialize our return value in case we don't do anything */
522 		retlsn = MyReplicationSlot->data.confirmed_flush;
523 
524 		/* invalidate non-timetravel entries */
525 		InvalidateSystemCaches();
526 
527 		/* Decode at least one record, until we run out of records */
528 		while (ctx->reader->EndRecPtr < moveto)
529 		{
530 			char	   *errm = NULL;
531 			XLogRecord *record;
532 
533 			/*
534 			 * Read records.  No changes are generated in fast_forward mode,
535 			 * but snapbuilder/slot statuses are updated properly.
536 			 */
537 			record = XLogReadRecord(ctx->reader, &errm);
538 			if (errm)
539 				elog(ERROR, "%s", errm);
540 
541 			/*
542 			 * Process the record.  Storage-level changes are ignored in
543 			 * fast_forward mode, but other modules (such as snapbuilder)
544 			 * might still have critical updates to do.
545 			 */
546 			if (record)
547 				LogicalDecodingProcessRecord(ctx, ctx->reader);
548 
549 			/* Stop once the requested target has been reached */
550 			if (moveto <= ctx->reader->EndRecPtr)
551 				break;
552 
553 			CHECK_FOR_INTERRUPTS();
554 		}
555 
556 		/*
557 		 * Logical decoding could have clobbered CurrentResourceOwner during
558 		 * transaction management, so restore the executor's value.  (This is
559 		 * a kluge, but it's not worth cleaning up right now.)
560 		 */
561 		CurrentResourceOwner = old_resowner;
562 
563 		if (ctx->reader->EndRecPtr != InvalidXLogRecPtr)
564 		{
565 			LogicalConfirmReceivedLocation(moveto);
566 
567 			/*
568 			 * If only the confirmed_flush LSN has changed the slot won't get
569 			 * marked as dirty by the above. Callers on the walsender
570 			 * interface are expected to keep track of their own progress and
571 			 * don't need it written out. But SQL-interface users cannot
572 			 * specify their own start positions and it's harder for them to
573 			 * keep track of their progress, so we should make more of an
574 			 * effort to save it for them.
575 			 *
576 			 * Dirty the slot so it is written out at the next checkpoint. The
577 			 * LSN position advanced to may still be lost on a crash but this
578 			 * makes the data consistent after a clean shutdown.
579 			 */
580 			ReplicationSlotMarkDirty();
581 		}
582 
583 		retlsn = MyReplicationSlot->data.confirmed_flush;
584 
585 		/* free context, call shutdown callback */
586 		FreeDecodingContext(ctx);
587 
588 		InvalidateSystemCaches();
589 	}
590 	PG_CATCH();
591 	{
592 		/* clear all timetravel entries */
593 		InvalidateSystemCaches();
594 
595 		PG_RE_THROW();
596 	}
597 	PG_END_TRY();
598 
599 	return retlsn;
600 }
601 
602 /*
603  * SQL function for moving the position in a replication slot.
604  */
605 Datum
pg_replication_slot_advance(PG_FUNCTION_ARGS)606 pg_replication_slot_advance(PG_FUNCTION_ARGS)
607 {
608 	Name		slotname = PG_GETARG_NAME(0);
609 	XLogRecPtr	moveto = PG_GETARG_LSN(1);
610 	XLogRecPtr	endlsn;
611 	XLogRecPtr	minlsn;
612 	TupleDesc	tupdesc;
613 	Datum		values[2];
614 	bool		nulls[2];
615 	HeapTuple	tuple;
616 	Datum		result;
617 
618 	Assert(!MyReplicationSlot);
619 
620 	check_permissions();
621 
622 	if (XLogRecPtrIsInvalid(moveto))
623 		ereport(ERROR,
624 				(errmsg("invalid target WAL LSN")));
625 
626 	/* Build a tuple descriptor for our result type */
627 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
628 		elog(ERROR, "return type must be a row type");
629 
630 	/*
631 	 * We can't move slot past what's been flushed/replayed so clamp the
632 	 * target position accordingly.
633 	 */
634 	if (!RecoveryInProgress())
635 		moveto = Min(moveto, GetFlushRecPtr());
636 	else
637 		moveto = Min(moveto, GetXLogReplayRecPtr(&ThisTimeLineID));
638 
639 	/* Acquire the slot so we "own" it */
640 	(void) ReplicationSlotAcquire(NameStr(*slotname), SAB_Error);
641 
642 	/* A slot whose restart_lsn has never been reserved cannot be advanced */
643 	if (XLogRecPtrIsInvalid(MyReplicationSlot->data.restart_lsn))
644 		ereport(ERROR,
645 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
646 				 errmsg("replication slot \"%s\" cannot be advanced",
647 						NameStr(*slotname)),
648 				 errdetail("This slot has never previously reserved WAL, or has been invalidated.")));
649 
650 	/*
651 	 * Check if the slot is not moving backwards.  Physical slots rely simply
652 	 * on restart_lsn as a minimum point, while logical slots have confirmed
653 	 * consumption up to confirmed_flush, meaning that in both cases data
654 	 * older than that is not available anymore.
655 	 */
656 	if (OidIsValid(MyReplicationSlot->data.database))
657 		minlsn = MyReplicationSlot->data.confirmed_flush;
658 	else
659 		minlsn = MyReplicationSlot->data.restart_lsn;
660 
661 	if (moveto < minlsn)
662 		ereport(ERROR,
663 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
664 				 errmsg("cannot advance replication slot to %X/%X, minimum is %X/%X",
665 						(uint32) (moveto >> 32), (uint32) moveto,
666 						(uint32) (minlsn >> 32), (uint32) minlsn)));
667 
668 	/* Do the actual slot update, depending on the slot type */
669 	if (OidIsValid(MyReplicationSlot->data.database))
670 		endlsn = pg_logical_replication_slot_advance(moveto);
671 	else
672 		endlsn = pg_physical_replication_slot_advance(moveto);
673 
674 	values[0] = NameGetDatum(&MyReplicationSlot->data.name);
675 	nulls[0] = false;
676 
677 	/*
678 	 * Recompute the minimum LSN and xmin across all slots to adjust with the
679 	 * advancing potentially done.
680 	 */
681 	ReplicationSlotsComputeRequiredXmin(false);
682 	ReplicationSlotsComputeRequiredLSN();
683 
684 	ReplicationSlotRelease();
685 
686 	/* Return the reached position. */
687 	values[1] = LSNGetDatum(endlsn);
688 	nulls[1] = false;
689 
690 	tuple = heap_form_tuple(tupdesc, values, nulls);
691 	result = HeapTupleGetDatum(tuple);
692 
693 	PG_RETURN_DATUM(result);
694 }
695 
696 /*
697  * Helper function of copying a replication slot.
698  */
699 static Datum
copy_replication_slot(FunctionCallInfo fcinfo,bool logical_slot)700 copy_replication_slot(FunctionCallInfo fcinfo, bool logical_slot)
701 {
702 	Name		src_name = PG_GETARG_NAME(0);
703 	Name		dst_name = PG_GETARG_NAME(1);
704 	ReplicationSlot *src = NULL;
705 	ReplicationSlot first_slot_contents;
706 	ReplicationSlot second_slot_contents;
707 	XLogRecPtr	src_restart_lsn;
708 	bool		src_islogical;
709 	bool		temporary;
710 	char	   *plugin;
711 	Datum		values[2];
712 	bool		nulls[2];
713 	Datum		result;
714 	TupleDesc	tupdesc;
715 	HeapTuple	tuple;
716 
717 	if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
718 		elog(ERROR, "return type must be a row type");
719 
720 	check_permissions();
721 
722 	if (logical_slot)
723 		CheckLogicalDecodingRequirements();
724 	else
725 		CheckSlotRequirements();
726 
727 	LWLockAcquire(ReplicationSlotControlLock, LW_SHARED);
728 
729 	/*
730 	 * We need to prevent the source slot's reserved WAL from being removed,
731 	 * but we don't want to lock that slot for very long, and it can advance
732 	 * in the meantime.  So obtain the source slot's data, and create a new
733 	 * slot using its restart_lsn.  Afterwards we lock the source slot again
734 	 * and verify that the data we copied (name, type) has not changed
735 	 * incompatibly.  No inconvenient WAL removal can occur once the new slot
736 	 * is created -- but since WAL removal could have occurred before we
737 	 * managed to create the new slot, we advance the new slot's restart_lsn
738 	 * to the source slot's updated restart_lsn the second time we lock it.
739 	 */
740 	for (int i = 0; i < max_replication_slots; i++)
741 	{
742 		ReplicationSlot *s = &ReplicationSlotCtl->replication_slots[i];
743 
744 		if (s->in_use && strcmp(NameStr(s->data.name), NameStr(*src_name)) == 0)
745 		{
746 			/* Copy the slot contents while holding spinlock */
747 			SpinLockAcquire(&s->mutex);
748 			first_slot_contents = *s;
749 			SpinLockRelease(&s->mutex);
750 			src = s;
751 			break;
752 		}
753 	}
754 
755 	LWLockRelease(ReplicationSlotControlLock);
756 
757 	if (src == NULL)
758 		ereport(ERROR,
759 				(errcode(ERRCODE_UNDEFINED_OBJECT),
760 				 errmsg("replication slot \"%s\" does not exist", NameStr(*src_name))));
761 
762 	src_islogical = SlotIsLogical(&first_slot_contents);
763 	src_restart_lsn = first_slot_contents.data.restart_lsn;
764 	temporary = (first_slot_contents.data.persistency == RS_TEMPORARY);
765 	plugin = logical_slot ? NameStr(first_slot_contents.data.plugin) : NULL;
766 
767 	/* Check type of replication slot */
768 	if (src_islogical != logical_slot)
769 		ereport(ERROR,
770 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
771 				 src_islogical ?
772 				 errmsg("cannot copy physical replication slot \"%s\" as a logical replication slot",
773 						NameStr(*src_name)) :
774 				 errmsg("cannot copy logical replication slot \"%s\" as a physical replication slot",
775 						NameStr(*src_name))));
776 
777 	/* Copying non-reserved slot doesn't make sense */
778 	if (XLogRecPtrIsInvalid(src_restart_lsn))
779 		ereport(ERROR,
780 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
781 				 errmsg("cannot copy a replication slot that doesn't reserve WAL")));
782 
783 	/* Overwrite params from optional arguments */
784 	if (PG_NARGS() >= 3)
785 		temporary = PG_GETARG_BOOL(2);
786 	if (PG_NARGS() >= 4)
787 	{
788 		Assert(logical_slot);
789 		plugin = NameStr(*(PG_GETARG_NAME(3)));
790 	}
791 
792 	/* Create new slot and acquire it */
793 	if (logical_slot)
794 	{
795 		/*
796 		 * We must not try to read WAL, since we haven't reserved it yet --
797 		 * hence pass find_startpoint false.  confirmed_flush will be set
798 		 * below, by copying from the source slot.
799 		 */
800 		create_logical_replication_slot(NameStr(*dst_name),
801 										plugin,
802 										temporary,
803 										src_restart_lsn,
804 										false);
805 	}
806 	else
807 		create_physical_replication_slot(NameStr(*dst_name),
808 										 true,
809 										 temporary,
810 										 src_restart_lsn);
811 
812 	/*
813 	 * Update the destination slot to current values of the source slot;
814 	 * recheck that the source slot is still the one we saw previously.
815 	 */
816 	{
817 		TransactionId copy_effective_xmin;
818 		TransactionId copy_effective_catalog_xmin;
819 		TransactionId copy_xmin;
820 		TransactionId copy_catalog_xmin;
821 		XLogRecPtr	copy_restart_lsn;
822 		XLogRecPtr	copy_confirmed_flush;
823 		bool		copy_islogical;
824 		char	   *copy_name;
825 
826 		/* Copy data of source slot again */
827 		SpinLockAcquire(&src->mutex);
828 		second_slot_contents = *src;
829 		SpinLockRelease(&src->mutex);
830 
831 		copy_effective_xmin = second_slot_contents.effective_xmin;
832 		copy_effective_catalog_xmin = second_slot_contents.effective_catalog_xmin;
833 
834 		copy_xmin = second_slot_contents.data.xmin;
835 		copy_catalog_xmin = second_slot_contents.data.catalog_xmin;
836 		copy_restart_lsn = second_slot_contents.data.restart_lsn;
837 		copy_confirmed_flush = second_slot_contents.data.confirmed_flush;
838 
839 		/* for existence check */
840 		copy_name = NameStr(second_slot_contents.data.name);
841 		copy_islogical = SlotIsLogical(&second_slot_contents);
842 
843 		/*
844 		 * Check if the source slot still exists and is valid. We regard it as
845 		 * invalid if the type of replication slot or name has been changed,
846 		 * or the restart_lsn either is invalid or has gone backward. (The
847 		 * restart_lsn could go backwards if the source slot is dropped and
848 		 * copied from an older slot during installation.)
849 		 *
850 		 * Since erroring out will release and drop the destination slot we
851 		 * don't need to release it here.
852 		 */
853 		if (copy_restart_lsn < src_restart_lsn ||
854 			src_islogical != copy_islogical ||
855 			strcmp(copy_name, NameStr(*src_name)) != 0)
856 			ereport(ERROR,
857 					(errmsg("could not copy replication slot \"%s\"",
858 							NameStr(*src_name)),
859 					 errdetail("The source replication slot was modified incompatibly during the copy operation.")));
860 
861 		/* The source slot must have a consistent snapshot */
862 		if (src_islogical && XLogRecPtrIsInvalid(copy_confirmed_flush))
863 			ereport(ERROR,
864 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
865 					 errmsg("cannot copy unfinished logical replication slot \"%s\"",
866 							NameStr(*src_name)),
867 					 errhint("Retry when the source replication slot's confirmed_flush_lsn is valid.")));
868 
869 		/* Install copied values again */
870 		SpinLockAcquire(&MyReplicationSlot->mutex);
871 		MyReplicationSlot->effective_xmin = copy_effective_xmin;
872 		MyReplicationSlot->effective_catalog_xmin = copy_effective_catalog_xmin;
873 
874 		MyReplicationSlot->data.xmin = copy_xmin;
875 		MyReplicationSlot->data.catalog_xmin = copy_catalog_xmin;
876 		MyReplicationSlot->data.restart_lsn = copy_restart_lsn;
877 		MyReplicationSlot->data.confirmed_flush = copy_confirmed_flush;
878 		SpinLockRelease(&MyReplicationSlot->mutex);
879 
880 		ReplicationSlotMarkDirty();
881 		ReplicationSlotsComputeRequiredXmin(false);
882 		ReplicationSlotsComputeRequiredLSN();
883 		ReplicationSlotSave();
884 
885 #ifdef USE_ASSERT_CHECKING
886 		/* Check that the restart_lsn is available */
887 		{
888 			XLogSegNo	segno;
889 
890 			XLByteToSeg(copy_restart_lsn, segno, wal_segment_size);
891 			Assert(XLogGetLastRemovedSegno() < segno);
892 		}
893 #endif
894 	}
895 
896 	/* target slot fully created, mark as persistent if needed */
897 	if (logical_slot && !temporary)
898 		ReplicationSlotPersist();
899 
900 	/* All done.  Set up the return values */
901 	values[0] = NameGetDatum(dst_name);
902 	nulls[0] = false;
903 	if (!XLogRecPtrIsInvalid(MyReplicationSlot->data.confirmed_flush))
904 	{
905 		values[1] = LSNGetDatum(MyReplicationSlot->data.confirmed_flush);
906 		nulls[1] = false;
907 	}
908 	else
909 		nulls[1] = true;
910 
911 	tuple = heap_form_tuple(tupdesc, values, nulls);
912 	result = HeapTupleGetDatum(tuple);
913 
914 	ReplicationSlotRelease();
915 
916 	PG_RETURN_DATUM(result);
917 }
918 
919 /* The wrappers below are all to appease opr_sanity */
920 Datum
pg_copy_logical_replication_slot_a(PG_FUNCTION_ARGS)921 pg_copy_logical_replication_slot_a(PG_FUNCTION_ARGS)
922 {
923 	return copy_replication_slot(fcinfo, true);
924 }
925 
926 Datum
pg_copy_logical_replication_slot_b(PG_FUNCTION_ARGS)927 pg_copy_logical_replication_slot_b(PG_FUNCTION_ARGS)
928 {
929 	return copy_replication_slot(fcinfo, true);
930 }
931 
932 Datum
pg_copy_logical_replication_slot_c(PG_FUNCTION_ARGS)933 pg_copy_logical_replication_slot_c(PG_FUNCTION_ARGS)
934 {
935 	return copy_replication_slot(fcinfo, true);
936 }
937 
938 Datum
pg_copy_physical_replication_slot_a(PG_FUNCTION_ARGS)939 pg_copy_physical_replication_slot_a(PG_FUNCTION_ARGS)
940 {
941 	return copy_replication_slot(fcinfo, false);
942 }
943 
944 Datum
pg_copy_physical_replication_slot_b(PG_FUNCTION_ARGS)945 pg_copy_physical_replication_slot_b(PG_FUNCTION_ARGS)
946 {
947 	return copy_replication_slot(fcinfo, false);
948 }
949