1 /*-------------------------------------------------------------------------
2  * logical.c
3  *	   PostgreSQL logical decoding coordination
4  *
5  * Copyright (c) 2012-2020, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *	  src/backend/replication/logical/logical.c
9  *
10  * NOTES
11  *	  This file coordinates interaction between the various modules that
12  *	  together provide logical decoding, primarily by providing so
13  *	  called LogicalDecodingContexts. The goal is to encapsulate most of the
14  *	  internal complexity for consumers of logical decoding, so they can
15  *	  create and consume a changestream with a low amount of code. Builtin
16  *	  consumers are the walsender and SQL SRF interface, but it's possible to
17  *	  add further ones without changing core code, e.g. to consume changes in
18  *	  a bgworker.
19  *
20  *	  The idea is that a consumer provides three callbacks, one to read WAL,
21  *	  one to prepare a data write, and a final one for actually writing since
22  *	  their implementation depends on the type of consumer.  Check
23  *	  logicalfuncs.c for an example implementation of a fairly simple consumer
24  *	  and an implementation of a WAL reading callback that's suitable for
25  *	  simple consumers.
26  *-------------------------------------------------------------------------
27  */
28 
29 #include "postgres.h"
30 
31 #include "access/xact.h"
32 #include "access/xlog_internal.h"
33 #include "fmgr.h"
34 #include "miscadmin.h"
35 #include "replication/decode.h"
36 #include "replication/logical.h"
37 #include "replication/origin.h"
38 #include "replication/reorderbuffer.h"
39 #include "replication/snapbuild.h"
40 #include "storage/proc.h"
41 #include "storage/procarray.h"
42 #include "utils/memutils.h"
43 
44 /* data for errcontext callback */
45 typedef struct LogicalErrorCallbackState
46 {
47 	LogicalDecodingContext *ctx;
48 	const char *callback_name;
49 	XLogRecPtr	report_location;
50 } LogicalErrorCallbackState;
51 
52 /* wrappers around output plugin callbacks */
53 static void output_plugin_error_callback(void *arg);
54 static void startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
55 							   bool is_init);
56 static void shutdown_cb_wrapper(LogicalDecodingContext *ctx);
57 static void begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn);
58 static void commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
59 							  XLogRecPtr commit_lsn);
60 static void change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
61 							  Relation relation, ReorderBufferChange *change);
62 static void truncate_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
63 								int nrelations, Relation relations[], ReorderBufferChange *change);
64 static void message_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
65 							   XLogRecPtr message_lsn, bool transactional,
66 							   const char *prefix, Size message_size, const char *message);
67 
68 static void LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin);
69 
70 /*
71  * Make sure the current settings & environment are capable of doing logical
72  * decoding.
73  */
74 void
CheckLogicalDecodingRequirements(void)75 CheckLogicalDecodingRequirements(void)
76 {
77 	CheckSlotRequirements();
78 
79 	/*
80 	 * NB: Adding a new requirement likely means that RestoreSlotFromDisk()
81 	 * needs the same check.
82 	 */
83 
84 	if (wal_level < WAL_LEVEL_LOGICAL)
85 		ereport(ERROR,
86 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
87 				 errmsg("logical decoding requires wal_level >= logical")));
88 
89 	if (MyDatabaseId == InvalidOid)
90 		ereport(ERROR,
91 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
92 				 errmsg("logical decoding requires a database connection")));
93 
94 	/* ----
95 	 * TODO: We got to change that someday soon...
96 	 *
97 	 * There's basically three things missing to allow this:
98 	 * 1) We need to be able to correctly and quickly identify the timeline a
99 	 *	  LSN belongs to
100 	 * 2) We need to force hot_standby_feedback to be enabled at all times so
101 	 *	  the primary cannot remove rows we need.
102 	 * 3) support dropping replication slots referring to a database, in
103 	 *	  dbase_redo. There can't be any active ones due to HS recovery
104 	 *	  conflicts, so that should be relatively easy.
105 	 * ----
106 	 */
107 	if (RecoveryInProgress())
108 		ereport(ERROR,
109 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
110 				 errmsg("logical decoding cannot be used while in recovery")));
111 }
112 
113 /*
114  * Helper function for CreateInitDecodingContext() and
115  * CreateDecodingContext() performing common tasks.
116  */
117 static LogicalDecodingContext *
StartupDecodingContext(List * output_plugin_options,XLogRecPtr start_lsn,TransactionId xmin_horizon,bool need_full_snapshot,bool fast_forward,XLogReaderRoutine * xl_routine,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)118 StartupDecodingContext(List *output_plugin_options,
119 					   XLogRecPtr start_lsn,
120 					   TransactionId xmin_horizon,
121 					   bool need_full_snapshot,
122 					   bool fast_forward,
123 					   XLogReaderRoutine *xl_routine,
124 					   LogicalOutputPluginWriterPrepareWrite prepare_write,
125 					   LogicalOutputPluginWriterWrite do_write,
126 					   LogicalOutputPluginWriterUpdateProgress update_progress)
127 {
128 	ReplicationSlot *slot;
129 	MemoryContext context,
130 				old_context;
131 	LogicalDecodingContext *ctx;
132 
133 	/* shorter lines... */
134 	slot = MyReplicationSlot;
135 
136 	context = AllocSetContextCreate(CurrentMemoryContext,
137 									"Logical decoding context",
138 									ALLOCSET_DEFAULT_SIZES);
139 	old_context = MemoryContextSwitchTo(context);
140 	ctx = palloc0(sizeof(LogicalDecodingContext));
141 
142 	ctx->context = context;
143 
144 	/*
145 	 * (re-)load output plugins, so we detect a bad (removed) output plugin
146 	 * now.
147 	 */
148 	if (!fast_forward)
149 		LoadOutputPlugin(&ctx->callbacks, NameStr(slot->data.plugin));
150 
151 	/*
152 	 * Now that the slot's xmin has been set, we can announce ourselves as a
153 	 * logical decoding backend which doesn't need to be checked individually
154 	 * when computing the xmin horizon because the xmin is enforced via
155 	 * replication slots.
156 	 *
157 	 * We can only do so if we're outside of a transaction (i.e. the case when
158 	 * streaming changes via walsender), otherwise an already setup
159 	 * snapshot/xid would end up being ignored. That's not a particularly
160 	 * bothersome restriction since the SQL interface can't be used for
161 	 * streaming anyway.
162 	 */
163 	if (!IsTransactionOrTransactionBlock())
164 	{
165 		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
166 		MyPgXact->vacuumFlags |= PROC_IN_LOGICAL_DECODING;
167 		LWLockRelease(ProcArrayLock);
168 	}
169 
170 	ctx->slot = slot;
171 
172 	ctx->reader = XLogReaderAllocate(wal_segment_size, NULL, xl_routine, ctx);
173 	if (!ctx->reader)
174 		ereport(ERROR,
175 				(errcode(ERRCODE_OUT_OF_MEMORY),
176 				 errmsg("out of memory")));
177 
178 	ctx->reorder = ReorderBufferAllocate();
179 	ctx->snapshot_builder =
180 		AllocateSnapshotBuilder(ctx->reorder, xmin_horizon, start_lsn,
181 								need_full_snapshot);
182 
183 	ctx->reorder->private_data = ctx;
184 
185 	/* wrap output plugin callbacks, so we can add error context information */
186 	ctx->reorder->begin = begin_cb_wrapper;
187 	ctx->reorder->apply_change = change_cb_wrapper;
188 	ctx->reorder->apply_truncate = truncate_cb_wrapper;
189 	ctx->reorder->commit = commit_cb_wrapper;
190 	ctx->reorder->message = message_cb_wrapper;
191 
192 	ctx->out = makeStringInfo();
193 	ctx->prepare_write = prepare_write;
194 	ctx->write = do_write;
195 	ctx->update_progress = update_progress;
196 
197 	ctx->output_plugin_options = output_plugin_options;
198 
199 	ctx->fast_forward = fast_forward;
200 
201 	MemoryContextSwitchTo(old_context);
202 
203 	return ctx;
204 }
205 
206 /*
207  * Create a new decoding context, for a new logical slot.
208  *
209  * plugin -- contains the name of the output plugin
210  * output_plugin_options -- contains options passed to the output plugin
211  * need_full_snapshot -- if true, must obtain a snapshot able to read all
212  *		tables; if false, one that can read only catalogs is acceptable.
213  * restart_lsn -- if given as invalid, it's this routine's responsibility to
214  *		mark WAL as reserved by setting a convenient restart_lsn for the slot.
215  *		Otherwise, we set for decoding to start from the given LSN without
216  *		marking WAL reserved beforehand.  In that scenario, it's up to the
217  *		caller to guarantee that WAL remains available.
218  * xl_routine -- XLogReaderRoutine for underlying XLogReader
219  * prepare_write, do_write, update_progress --
220  *		callbacks that perform the use-case dependent, actual, work.
221  *
222  * Needs to be called while in a memory context that's at least as long lived
223  * as the decoding context because further memory contexts will be created
224  * inside it.
225  *
226  * Returns an initialized decoding context after calling the output plugin's
227  * startup function.
228  */
229 LogicalDecodingContext *
CreateInitDecodingContext(char * plugin,List * output_plugin_options,bool need_full_snapshot,XLogRecPtr restart_lsn,XLogReaderRoutine * xl_routine,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)230 CreateInitDecodingContext(char *plugin,
231 						  List *output_plugin_options,
232 						  bool need_full_snapshot,
233 						  XLogRecPtr restart_lsn,
234 						  XLogReaderRoutine *xl_routine,
235 						  LogicalOutputPluginWriterPrepareWrite prepare_write,
236 						  LogicalOutputPluginWriterWrite do_write,
237 						  LogicalOutputPluginWriterUpdateProgress update_progress)
238 {
239 	TransactionId xmin_horizon = InvalidTransactionId;
240 	ReplicationSlot *slot;
241 	LogicalDecodingContext *ctx;
242 	MemoryContext old_context;
243 
244 	/* shorter lines... */
245 	slot = MyReplicationSlot;
246 
247 	/* first some sanity checks that are unlikely to be violated */
248 	if (slot == NULL)
249 		elog(ERROR, "cannot perform logical decoding without an acquired slot");
250 
251 	if (plugin == NULL)
252 		elog(ERROR, "cannot initialize logical decoding without a specified plugin");
253 
254 	/* Make sure the passed slot is suitable. These are user facing errors. */
255 	if (SlotIsPhysical(slot))
256 		ereport(ERROR,
257 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
258 				 errmsg("cannot use physical replication slot for logical decoding")));
259 
260 	if (slot->data.database != MyDatabaseId)
261 		ereport(ERROR,
262 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
263 				 errmsg("replication slot \"%s\" was not created in this database",
264 						NameStr(slot->data.name))));
265 
266 	if (IsTransactionState() &&
267 		GetTopTransactionIdIfAny() != InvalidTransactionId)
268 		ereport(ERROR,
269 				(errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
270 				 errmsg("cannot create logical replication slot in transaction that has performed writes")));
271 
272 	/* register output plugin name with slot */
273 	SpinLockAcquire(&slot->mutex);
274 	StrNCpy(NameStr(slot->data.plugin), plugin, NAMEDATALEN);
275 	SpinLockRelease(&slot->mutex);
276 
277 	if (XLogRecPtrIsInvalid(restart_lsn))
278 		ReplicationSlotReserveWal();
279 	else
280 	{
281 		SpinLockAcquire(&slot->mutex);
282 		slot->data.restart_lsn = restart_lsn;
283 		SpinLockRelease(&slot->mutex);
284 	}
285 
286 	/* ----
287 	 * This is a bit tricky: We need to determine a safe xmin horizon to start
288 	 * decoding from, to avoid starting from a running xacts record referring
289 	 * to xids whose rows have been vacuumed or pruned
290 	 * already. GetOldestSafeDecodingTransactionId() returns such a value, but
291 	 * without further interlock its return value might immediately be out of
292 	 * date.
293 	 *
294 	 * So we have to acquire the ProcArrayLock to prevent computation of new
295 	 * xmin horizons by other backends, get the safe decoding xid, and inform
296 	 * the slot machinery about the new limit. Once that's done the
297 	 * ProcArrayLock can be released as the slot machinery now is
298 	 * protecting against vacuum.
299 	 *
300 	 * Note that, temporarily, the data, not just the catalog, xmin has to be
301 	 * reserved if a data snapshot is to be exported.  Otherwise the initial
302 	 * data snapshot created here is not guaranteed to be valid. After that
303 	 * the data xmin doesn't need to be managed anymore and the global xmin
304 	 * should be recomputed. As we are fine with losing the pegged data xmin
305 	 * after crash - no chance a snapshot would get exported anymore - we can
306 	 * get away with just setting the slot's
307 	 * effective_xmin. ReplicationSlotRelease will reset it again.
308 	 *
309 	 * ----
310 	 */
311 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
312 
313 	xmin_horizon = GetOldestSafeDecodingTransactionId(!need_full_snapshot);
314 
315 	SpinLockAcquire(&slot->mutex);
316 	slot->effective_catalog_xmin = xmin_horizon;
317 	slot->data.catalog_xmin = xmin_horizon;
318 	if (need_full_snapshot)
319 		slot->effective_xmin = xmin_horizon;
320 	SpinLockRelease(&slot->mutex);
321 
322 	ReplicationSlotsComputeRequiredXmin(true);
323 
324 	LWLockRelease(ProcArrayLock);
325 
326 	ReplicationSlotMarkDirty();
327 	ReplicationSlotSave();
328 
329 	ctx = StartupDecodingContext(NIL, restart_lsn, xmin_horizon,
330 								 need_full_snapshot, false,
331 								 xl_routine, prepare_write, do_write,
332 								 update_progress);
333 
334 	/* call output plugin initialization callback */
335 	old_context = MemoryContextSwitchTo(ctx->context);
336 	if (ctx->callbacks.startup_cb != NULL)
337 		startup_cb_wrapper(ctx, &ctx->options, true);
338 	MemoryContextSwitchTo(old_context);
339 
340 	ctx->reorder->output_rewrites = ctx->options.receive_rewrites;
341 
342 	return ctx;
343 }
344 
345 /*
346  * Create a new decoding context, for a logical slot that has previously been
347  * used already.
348  *
349  * start_lsn
350  *		The LSN at which to start decoding.  If InvalidXLogRecPtr, restart
351  *		from the slot's confirmed_flush; otherwise, start from the specified
352  *		location (but move it forwards to confirmed_flush if it's older than
353  *		that, see below).
354  *
355  * output_plugin_options
356  *		options passed to the output plugin.
357  *
358  * fast_forward
359  *		bypass the generation of logical changes.
360  *
361  * xl_routine
362  *		XLogReaderRoutine used by underlying xlogreader
363  *
364  * prepare_write, do_write, update_progress
365  *		callbacks that have to be filled to perform the use-case dependent,
366  *		actual work.
367  *
368  * Needs to be called while in a memory context that's at least as long lived
369  * as the decoding context because further memory contexts will be created
370  * inside it.
371  *
372  * Returns an initialized decoding context after calling the output plugin's
373  * startup function.
374  */
375 LogicalDecodingContext *
CreateDecodingContext(XLogRecPtr start_lsn,List * output_plugin_options,bool fast_forward,XLogReaderRoutine * xl_routine,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)376 CreateDecodingContext(XLogRecPtr start_lsn,
377 					  List *output_plugin_options,
378 					  bool fast_forward,
379 					  XLogReaderRoutine *xl_routine,
380 					  LogicalOutputPluginWriterPrepareWrite prepare_write,
381 					  LogicalOutputPluginWriterWrite do_write,
382 					  LogicalOutputPluginWriterUpdateProgress update_progress)
383 {
384 	LogicalDecodingContext *ctx;
385 	ReplicationSlot *slot;
386 	MemoryContext old_context;
387 
388 	/* shorter lines... */
389 	slot = MyReplicationSlot;
390 
391 	/* first some sanity checks that are unlikely to be violated */
392 	if (slot == NULL)
393 		elog(ERROR, "cannot perform logical decoding without an acquired slot");
394 
395 	/* make sure the passed slot is suitable, these are user facing errors */
396 	if (SlotIsPhysical(slot))
397 		ereport(ERROR,
398 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
399 				 errmsg("cannot use physical replication slot for logical decoding")));
400 
401 	if (slot->data.database != MyDatabaseId)
402 		ereport(ERROR,
403 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
404 				 errmsg("replication slot \"%s\" was not created in this database",
405 						NameStr(slot->data.name))));
406 
407 	if (start_lsn == InvalidXLogRecPtr)
408 	{
409 		/* continue from last position */
410 		start_lsn = slot->data.confirmed_flush;
411 	}
412 	else if (start_lsn < slot->data.confirmed_flush)
413 	{
414 		/*
415 		 * It might seem like we should error out in this case, but it's
416 		 * pretty common for a client to acknowledge a LSN it doesn't have to
417 		 * do anything for, and thus didn't store persistently, because the
418 		 * xlog records didn't result in anything relevant for logical
419 		 * decoding. Clients have to be able to do that to support synchronous
420 		 * replication.
421 		 */
422 		elog(DEBUG1, "cannot stream from %X/%X, minimum is %X/%X, forwarding",
423 			 (uint32) (start_lsn >> 32), (uint32) start_lsn,
424 			 (uint32) (slot->data.confirmed_flush >> 32),
425 			 (uint32) slot->data.confirmed_flush);
426 
427 		start_lsn = slot->data.confirmed_flush;
428 	}
429 
430 	ctx = StartupDecodingContext(output_plugin_options,
431 								 start_lsn, InvalidTransactionId, false,
432 								 fast_forward, xl_routine, prepare_write,
433 								 do_write, update_progress);
434 
435 	/* call output plugin initialization callback */
436 	old_context = MemoryContextSwitchTo(ctx->context);
437 	if (ctx->callbacks.startup_cb != NULL)
438 		startup_cb_wrapper(ctx, &ctx->options, false);
439 	MemoryContextSwitchTo(old_context);
440 
441 	ctx->reorder->output_rewrites = ctx->options.receive_rewrites;
442 
443 	ereport(LOG,
444 			(errmsg("starting logical decoding for slot \"%s\"",
445 					NameStr(slot->data.name)),
446 			 errdetail("Streaming transactions committing after %X/%X, reading WAL from %X/%X.",
447 					   (uint32) (slot->data.confirmed_flush >> 32),
448 					   (uint32) slot->data.confirmed_flush,
449 					   (uint32) (slot->data.restart_lsn >> 32),
450 					   (uint32) slot->data.restart_lsn)));
451 
452 	return ctx;
453 }
454 
455 /*
456  * Returns true if a consistent initial decoding snapshot has been built.
457  */
458 bool
DecodingContextReady(LogicalDecodingContext * ctx)459 DecodingContextReady(LogicalDecodingContext *ctx)
460 {
461 	return SnapBuildCurrentState(ctx->snapshot_builder) == SNAPBUILD_CONSISTENT;
462 }
463 
464 /*
465  * Read from the decoding slot, until it is ready to start extracting changes.
466  */
467 void
DecodingContextFindStartpoint(LogicalDecodingContext * ctx)468 DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
469 {
470 	ReplicationSlot *slot = ctx->slot;
471 
472 	/* Initialize from where to start reading WAL. */
473 	XLogBeginRead(ctx->reader, slot->data.restart_lsn);
474 
475 	elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%X",
476 		 (uint32) (slot->data.restart_lsn >> 32),
477 		 (uint32) slot->data.restart_lsn);
478 
479 	/* Wait for a consistent starting point */
480 	for (;;)
481 	{
482 		XLogRecord *record;
483 		char	   *err = NULL;
484 
485 		/* the read_page callback waits for new WAL */
486 		record = XLogReadRecord(ctx->reader, &err);
487 		if (err)
488 			elog(ERROR, "%s", err);
489 		if (!record)
490 			elog(ERROR, "no record found"); /* shouldn't happen */
491 
492 		LogicalDecodingProcessRecord(ctx, ctx->reader);
493 
494 		/* only continue till we found a consistent spot */
495 		if (DecodingContextReady(ctx))
496 			break;
497 
498 		CHECK_FOR_INTERRUPTS();
499 	}
500 
501 	SpinLockAcquire(&slot->mutex);
502 	slot->data.confirmed_flush = ctx->reader->EndRecPtr;
503 	SpinLockRelease(&slot->mutex);
504 }
505 
506 /*
507  * Free a previously allocated decoding context, invoking the shutdown
508  * callback if necessary.
509  */
510 void
FreeDecodingContext(LogicalDecodingContext * ctx)511 FreeDecodingContext(LogicalDecodingContext *ctx)
512 {
513 	if (ctx->callbacks.shutdown_cb != NULL)
514 		shutdown_cb_wrapper(ctx);
515 
516 	ReorderBufferFree(ctx->reorder);
517 	FreeSnapshotBuilder(ctx->snapshot_builder);
518 	XLogReaderFree(ctx->reader);
519 	MemoryContextDelete(ctx->context);
520 }
521 
522 /*
523  * Prepare a write using the context's output routine.
524  */
525 void
OutputPluginPrepareWrite(struct LogicalDecodingContext * ctx,bool last_write)526 OutputPluginPrepareWrite(struct LogicalDecodingContext *ctx, bool last_write)
527 {
528 	if (!ctx->accept_writes)
529 		elog(ERROR, "writes are only accepted in commit, begin and change callbacks");
530 
531 	ctx->prepare_write(ctx, ctx->write_location, ctx->write_xid, last_write);
532 	ctx->prepared_write = true;
533 }
534 
535 /*
536  * Perform a write using the context's output routine.
537  */
538 void
OutputPluginWrite(struct LogicalDecodingContext * ctx,bool last_write)539 OutputPluginWrite(struct LogicalDecodingContext *ctx, bool last_write)
540 {
541 	if (!ctx->prepared_write)
542 		elog(ERROR, "OutputPluginPrepareWrite needs to be called before OutputPluginWrite");
543 
544 	ctx->write(ctx, ctx->write_location, ctx->write_xid, last_write);
545 	ctx->prepared_write = false;
546 }
547 
548 /*
549  * Update progress tracking (if supported).
550  */
551 void
OutputPluginUpdateProgress(struct LogicalDecodingContext * ctx)552 OutputPluginUpdateProgress(struct LogicalDecodingContext *ctx)
553 {
554 	if (!ctx->update_progress)
555 		return;
556 
557 	ctx->update_progress(ctx, ctx->write_location, ctx->write_xid);
558 }
559 
560 /*
561  * Load the output plugin, lookup its output plugin init function, and check
562  * that it provides the required callbacks.
563  */
564 static void
LoadOutputPlugin(OutputPluginCallbacks * callbacks,char * plugin)565 LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin)
566 {
567 	LogicalOutputPluginInit plugin_init;
568 
569 	plugin_init = (LogicalOutputPluginInit)
570 		load_external_function(plugin, "_PG_output_plugin_init", false, NULL);
571 
572 	if (plugin_init == NULL)
573 		elog(ERROR, "output plugins have to declare the _PG_output_plugin_init symbol");
574 
575 	/* ask the output plugin to fill the callback struct */
576 	plugin_init(callbacks);
577 
578 	if (callbacks->begin_cb == NULL)
579 		elog(ERROR, "output plugins have to register a begin callback");
580 	if (callbacks->change_cb == NULL)
581 		elog(ERROR, "output plugins have to register a change callback");
582 	if (callbacks->commit_cb == NULL)
583 		elog(ERROR, "output plugins have to register a commit callback");
584 }
585 
586 static void
output_plugin_error_callback(void * arg)587 output_plugin_error_callback(void *arg)
588 {
589 	LogicalErrorCallbackState *state = (LogicalErrorCallbackState *) arg;
590 
591 	/* not all callbacks have an associated LSN  */
592 	if (state->report_location != InvalidXLogRecPtr)
593 		errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%X",
594 				   NameStr(state->ctx->slot->data.name),
595 				   NameStr(state->ctx->slot->data.plugin),
596 				   state->callback_name,
597 				   (uint32) (state->report_location >> 32),
598 				   (uint32) state->report_location);
599 	else
600 		errcontext("slot \"%s\", output plugin \"%s\", in the %s callback",
601 				   NameStr(state->ctx->slot->data.name),
602 				   NameStr(state->ctx->slot->data.plugin),
603 				   state->callback_name);
604 }
605 
606 static void
startup_cb_wrapper(LogicalDecodingContext * ctx,OutputPluginOptions * opt,bool is_init)607 startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool is_init)
608 {
609 	LogicalErrorCallbackState state;
610 	ErrorContextCallback errcallback;
611 
612 	Assert(!ctx->fast_forward);
613 
614 	/* Push callback + info on the error context stack */
615 	state.ctx = ctx;
616 	state.callback_name = "startup";
617 	state.report_location = InvalidXLogRecPtr;
618 	errcallback.callback = output_plugin_error_callback;
619 	errcallback.arg = (void *) &state;
620 	errcallback.previous = error_context_stack;
621 	error_context_stack = &errcallback;
622 
623 	/* set output state */
624 	ctx->accept_writes = false;
625 
626 	/* do the actual work: call callback */
627 	ctx->callbacks.startup_cb(ctx, opt, is_init);
628 
629 	/* Pop the error context stack */
630 	error_context_stack = errcallback.previous;
631 }
632 
633 static void
shutdown_cb_wrapper(LogicalDecodingContext * ctx)634 shutdown_cb_wrapper(LogicalDecodingContext *ctx)
635 {
636 	LogicalErrorCallbackState state;
637 	ErrorContextCallback errcallback;
638 
639 	Assert(!ctx->fast_forward);
640 
641 	/* Push callback + info on the error context stack */
642 	state.ctx = ctx;
643 	state.callback_name = "shutdown";
644 	state.report_location = InvalidXLogRecPtr;
645 	errcallback.callback = output_plugin_error_callback;
646 	errcallback.arg = (void *) &state;
647 	errcallback.previous = error_context_stack;
648 	error_context_stack = &errcallback;
649 
650 	/* set output state */
651 	ctx->accept_writes = false;
652 
653 	/* do the actual work: call callback */
654 	ctx->callbacks.shutdown_cb(ctx);
655 
656 	/* Pop the error context stack */
657 	error_context_stack = errcallback.previous;
658 }
659 
660 
661 /*
662  * Callbacks for ReorderBuffer which add in some more information and then call
663  * output_plugin.h plugins.
664  */
665 static void
begin_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn)666 begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn)
667 {
668 	LogicalDecodingContext *ctx = cache->private_data;
669 	LogicalErrorCallbackState state;
670 	ErrorContextCallback errcallback;
671 
672 	Assert(!ctx->fast_forward);
673 
674 	/* Push callback + info on the error context stack */
675 	state.ctx = ctx;
676 	state.callback_name = "begin";
677 	state.report_location = txn->first_lsn;
678 	errcallback.callback = output_plugin_error_callback;
679 	errcallback.arg = (void *) &state;
680 	errcallback.previous = error_context_stack;
681 	error_context_stack = &errcallback;
682 
683 	/* set output state */
684 	ctx->accept_writes = true;
685 	ctx->write_xid = txn->xid;
686 	ctx->write_location = txn->first_lsn;
687 
688 	/* do the actual work: call callback */
689 	ctx->callbacks.begin_cb(ctx, txn);
690 
691 	/* Pop the error context stack */
692 	error_context_stack = errcallback.previous;
693 }
694 
695 static void
commit_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,XLogRecPtr commit_lsn)696 commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
697 				  XLogRecPtr commit_lsn)
698 {
699 	LogicalDecodingContext *ctx = cache->private_data;
700 	LogicalErrorCallbackState state;
701 	ErrorContextCallback errcallback;
702 
703 	Assert(!ctx->fast_forward);
704 
705 	/* Push callback + info on the error context stack */
706 	state.ctx = ctx;
707 	state.callback_name = "commit";
708 	state.report_location = txn->final_lsn; /* beginning of commit record */
709 	errcallback.callback = output_plugin_error_callback;
710 	errcallback.arg = (void *) &state;
711 	errcallback.previous = error_context_stack;
712 	error_context_stack = &errcallback;
713 
714 	/* set output state */
715 	ctx->accept_writes = true;
716 	ctx->write_xid = txn->xid;
717 	ctx->write_location = txn->end_lsn; /* points to the end of the record */
718 
719 	/* do the actual work: call callback */
720 	ctx->callbacks.commit_cb(ctx, txn, commit_lsn);
721 
722 	/* Pop the error context stack */
723 	error_context_stack = errcallback.previous;
724 }
725 
726 static void
change_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,Relation relation,ReorderBufferChange * change)727 change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
728 				  Relation relation, ReorderBufferChange *change)
729 {
730 	LogicalDecodingContext *ctx = cache->private_data;
731 	LogicalErrorCallbackState state;
732 	ErrorContextCallback errcallback;
733 
734 	Assert(!ctx->fast_forward);
735 
736 	/* Push callback + info on the error context stack */
737 	state.ctx = ctx;
738 	state.callback_name = "change";
739 	state.report_location = change->lsn;
740 	errcallback.callback = output_plugin_error_callback;
741 	errcallback.arg = (void *) &state;
742 	errcallback.previous = error_context_stack;
743 	error_context_stack = &errcallback;
744 
745 	/* set output state */
746 	ctx->accept_writes = true;
747 	ctx->write_xid = txn->xid;
748 
749 	/*
750 	 * report this change's lsn so replies from clients can give an up2date
751 	 * answer. This won't ever be enough (and shouldn't be!) to confirm
752 	 * receipt of this transaction, but it might allow another transaction's
753 	 * commit to be confirmed with one message.
754 	 */
755 	ctx->write_location = change->lsn;
756 
757 	ctx->callbacks.change_cb(ctx, txn, relation, change);
758 
759 	/* Pop the error context stack */
760 	error_context_stack = errcallback.previous;
761 }
762 
763 static void
truncate_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,int nrelations,Relation relations[],ReorderBufferChange * change)764 truncate_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
765 					int nrelations, Relation relations[], ReorderBufferChange *change)
766 {
767 	LogicalDecodingContext *ctx = cache->private_data;
768 	LogicalErrorCallbackState state;
769 	ErrorContextCallback errcallback;
770 
771 	Assert(!ctx->fast_forward);
772 
773 	if (!ctx->callbacks.truncate_cb)
774 		return;
775 
776 	/* Push callback + info on the error context stack */
777 	state.ctx = ctx;
778 	state.callback_name = "truncate";
779 	state.report_location = change->lsn;
780 	errcallback.callback = output_plugin_error_callback;
781 	errcallback.arg = (void *) &state;
782 	errcallback.previous = error_context_stack;
783 	error_context_stack = &errcallback;
784 
785 	/* set output state */
786 	ctx->accept_writes = true;
787 	ctx->write_xid = txn->xid;
788 
789 	/*
790 	 * report this change's lsn so replies from clients can give an up2date
791 	 * answer. This won't ever be enough (and shouldn't be!) to confirm
792 	 * receipt of this transaction, but it might allow another transaction's
793 	 * commit to be confirmed with one message.
794 	 */
795 	ctx->write_location = change->lsn;
796 
797 	ctx->callbacks.truncate_cb(ctx, txn, nrelations, relations, change);
798 
799 	/* Pop the error context stack */
800 	error_context_stack = errcallback.previous;
801 }
802 
803 bool
filter_by_origin_cb_wrapper(LogicalDecodingContext * ctx,RepOriginId origin_id)804 filter_by_origin_cb_wrapper(LogicalDecodingContext *ctx, RepOriginId origin_id)
805 {
806 	LogicalErrorCallbackState state;
807 	ErrorContextCallback errcallback;
808 	bool		ret;
809 
810 	Assert(!ctx->fast_forward);
811 
812 	/* Push callback + info on the error context stack */
813 	state.ctx = ctx;
814 	state.callback_name = "filter_by_origin";
815 	state.report_location = InvalidXLogRecPtr;
816 	errcallback.callback = output_plugin_error_callback;
817 	errcallback.arg = (void *) &state;
818 	errcallback.previous = error_context_stack;
819 	error_context_stack = &errcallback;
820 
821 	/* set output state */
822 	ctx->accept_writes = false;
823 
824 	/* do the actual work: call callback */
825 	ret = ctx->callbacks.filter_by_origin_cb(ctx, origin_id);
826 
827 	/* Pop the error context stack */
828 	error_context_stack = errcallback.previous;
829 
830 	return ret;
831 }
832 
833 static void
message_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,XLogRecPtr message_lsn,bool transactional,const char * prefix,Size message_size,const char * message)834 message_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
835 				   XLogRecPtr message_lsn, bool transactional,
836 				   const char *prefix, Size message_size, const char *message)
837 {
838 	LogicalDecodingContext *ctx = cache->private_data;
839 	LogicalErrorCallbackState state;
840 	ErrorContextCallback errcallback;
841 
842 	Assert(!ctx->fast_forward);
843 
844 	if (ctx->callbacks.message_cb == NULL)
845 		return;
846 
847 	/* Push callback + info on the error context stack */
848 	state.ctx = ctx;
849 	state.callback_name = "message";
850 	state.report_location = message_lsn;
851 	errcallback.callback = output_plugin_error_callback;
852 	errcallback.arg = (void *) &state;
853 	errcallback.previous = error_context_stack;
854 	error_context_stack = &errcallback;
855 
856 	/* set output state */
857 	ctx->accept_writes = true;
858 	ctx->write_xid = txn != NULL ? txn->xid : InvalidTransactionId;
859 	ctx->write_location = message_lsn;
860 
861 	/* do the actual work: call callback */
862 	ctx->callbacks.message_cb(ctx, txn, message_lsn, transactional, prefix,
863 							  message_size, message);
864 
865 	/* Pop the error context stack */
866 	error_context_stack = errcallback.previous;
867 }
868 
869 /*
870  * Set the required catalog xmin horizon for historic snapshots in the current
871  * replication slot.
872  *
873  * Note that in the most cases, we won't be able to immediately use the xmin
874  * to increase the xmin horizon: we need to wait till the client has confirmed
875  * receiving current_lsn with LogicalConfirmReceivedLocation().
876  */
877 void
LogicalIncreaseXminForSlot(XLogRecPtr current_lsn,TransactionId xmin)878 LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
879 {
880 	bool		updated_xmin = false;
881 	ReplicationSlot *slot;
882 
883 	slot = MyReplicationSlot;
884 
885 	Assert(slot != NULL);
886 
887 	SpinLockAcquire(&slot->mutex);
888 
889 	/*
890 	 * don't overwrite if we already have a newer xmin. This can happen if we
891 	 * restart decoding in a slot.
892 	 */
893 	if (TransactionIdPrecedesOrEquals(xmin, slot->data.catalog_xmin))
894 	{
895 	}
896 
897 	/*
898 	 * If the client has already confirmed up to this lsn, we directly can
899 	 * mark this as accepted. This can happen if we restart decoding in a
900 	 * slot.
901 	 */
902 	else if (current_lsn <= slot->data.confirmed_flush)
903 	{
904 		slot->candidate_catalog_xmin = xmin;
905 		slot->candidate_xmin_lsn = current_lsn;
906 
907 		/* our candidate can directly be used */
908 		updated_xmin = true;
909 	}
910 
911 	/*
912 	 * Only increase if the previous values have been applied, otherwise we
913 	 * might never end up updating if the receiver acks too slowly.
914 	 */
915 	else if (slot->candidate_xmin_lsn == InvalidXLogRecPtr)
916 	{
917 		slot->candidate_catalog_xmin = xmin;
918 		slot->candidate_xmin_lsn = current_lsn;
919 	}
920 	SpinLockRelease(&slot->mutex);
921 
922 	/* candidate already valid with the current flush position, apply */
923 	if (updated_xmin)
924 		LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
925 }
926 
927 /*
928  * Mark the minimal LSN (restart_lsn) we need to read to replay all
929  * transactions that have not yet committed at current_lsn.
930  *
931  * Just like LogicalIncreaseXminForSlot this only takes effect when the
932  * client has confirmed to have received current_lsn.
933  */
934 void
LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn,XLogRecPtr restart_lsn)935 LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart_lsn)
936 {
937 	bool		updated_lsn = false;
938 	ReplicationSlot *slot;
939 
940 	slot = MyReplicationSlot;
941 
942 	Assert(slot != NULL);
943 	Assert(restart_lsn != InvalidXLogRecPtr);
944 	Assert(current_lsn != InvalidXLogRecPtr);
945 
946 	SpinLockAcquire(&slot->mutex);
947 
948 	/* don't overwrite if have a newer restart lsn */
949 	if (restart_lsn <= slot->data.restart_lsn)
950 	{
951 	}
952 
953 	/*
954 	 * We might have already flushed far enough to directly accept this lsn,
955 	 * in this case there is no need to check for existing candidate LSNs
956 	 */
957 	else if (current_lsn <= slot->data.confirmed_flush)
958 	{
959 		slot->candidate_restart_valid = current_lsn;
960 		slot->candidate_restart_lsn = restart_lsn;
961 
962 		/* our candidate can directly be used */
963 		updated_lsn = true;
964 	}
965 
966 	/*
967 	 * Only increase if the previous values have been applied, otherwise we
968 	 * might never end up updating if the receiver acks too slowly. A missed
969 	 * value here will just cause some extra effort after reconnecting.
970 	 */
971 	if (slot->candidate_restart_valid == InvalidXLogRecPtr)
972 	{
973 		slot->candidate_restart_valid = current_lsn;
974 		slot->candidate_restart_lsn = restart_lsn;
975 		SpinLockRelease(&slot->mutex);
976 
977 		elog(DEBUG1, "got new restart lsn %X/%X at %X/%X",
978 			 (uint32) (restart_lsn >> 32), (uint32) restart_lsn,
979 			 (uint32) (current_lsn >> 32), (uint32) current_lsn);
980 	}
981 	else
982 	{
983 		XLogRecPtr	candidate_restart_lsn;
984 		XLogRecPtr	candidate_restart_valid;
985 		XLogRecPtr	confirmed_flush;
986 
987 		candidate_restart_lsn = slot->candidate_restart_lsn;
988 		candidate_restart_valid = slot->candidate_restart_valid;
989 		confirmed_flush = slot->data.confirmed_flush;
990 		SpinLockRelease(&slot->mutex);
991 
992 		elog(DEBUG1, "failed to increase restart lsn: proposed %X/%X, after %X/%X, current candidate %X/%X, current after %X/%X, flushed up to %X/%X",
993 			 (uint32) (restart_lsn >> 32), (uint32) restart_lsn,
994 			 (uint32) (current_lsn >> 32), (uint32) current_lsn,
995 			 (uint32) (candidate_restart_lsn >> 32),
996 			 (uint32) candidate_restart_lsn,
997 			 (uint32) (candidate_restart_valid >> 32),
998 			 (uint32) candidate_restart_valid,
999 			 (uint32) (confirmed_flush >> 32),
1000 			 (uint32) confirmed_flush);
1001 	}
1002 
1003 	/* candidates are already valid with the current flush position, apply */
1004 	if (updated_lsn)
1005 		LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
1006 }
1007 
1008 /*
1009  * Handle a consumer's confirmation having received all changes up to lsn.
1010  */
1011 void
LogicalConfirmReceivedLocation(XLogRecPtr lsn)1012 LogicalConfirmReceivedLocation(XLogRecPtr lsn)
1013 {
1014 	Assert(lsn != InvalidXLogRecPtr);
1015 
1016 	/* Do an unlocked check for candidate_lsn first. */
1017 	if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr ||
1018 		MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr)
1019 	{
1020 		bool		updated_xmin = false;
1021 		bool		updated_restart = false;
1022 
1023 		SpinLockAcquire(&MyReplicationSlot->mutex);
1024 
1025 		MyReplicationSlot->data.confirmed_flush = lsn;
1026 
1027 		/* if we're past the location required for bumping xmin, do so */
1028 		if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr &&
1029 			MyReplicationSlot->candidate_xmin_lsn <= lsn)
1030 		{
1031 			/*
1032 			 * We have to write the changed xmin to disk *before* we change
1033 			 * the in-memory value, otherwise after a crash we wouldn't know
1034 			 * that some catalog tuples might have been removed already.
1035 			 *
1036 			 * Ensure that by first writing to ->xmin and only update
1037 			 * ->effective_xmin once the new state is synced to disk. After a
1038 			 * crash ->effective_xmin is set to ->xmin.
1039 			 */
1040 			if (TransactionIdIsValid(MyReplicationSlot->candidate_catalog_xmin) &&
1041 				MyReplicationSlot->data.catalog_xmin != MyReplicationSlot->candidate_catalog_xmin)
1042 			{
1043 				MyReplicationSlot->data.catalog_xmin = MyReplicationSlot->candidate_catalog_xmin;
1044 				MyReplicationSlot->candidate_catalog_xmin = InvalidTransactionId;
1045 				MyReplicationSlot->candidate_xmin_lsn = InvalidXLogRecPtr;
1046 				updated_xmin = true;
1047 			}
1048 		}
1049 
1050 		if (MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr &&
1051 			MyReplicationSlot->candidate_restart_valid <= lsn)
1052 		{
1053 			Assert(MyReplicationSlot->candidate_restart_lsn != InvalidXLogRecPtr);
1054 
1055 			MyReplicationSlot->data.restart_lsn = MyReplicationSlot->candidate_restart_lsn;
1056 			MyReplicationSlot->candidate_restart_lsn = InvalidXLogRecPtr;
1057 			MyReplicationSlot->candidate_restart_valid = InvalidXLogRecPtr;
1058 			updated_restart = true;
1059 		}
1060 
1061 		SpinLockRelease(&MyReplicationSlot->mutex);
1062 
1063 		/* first write new xmin to disk, so we know what's up after a crash */
1064 		if (updated_xmin || updated_restart)
1065 		{
1066 			ReplicationSlotMarkDirty();
1067 			ReplicationSlotSave();
1068 			elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart);
1069 		}
1070 
1071 		/*
1072 		 * Now the new xmin is safely on disk, we can let the global value
1073 		 * advance. We do not take ProcArrayLock or similar since we only
1074 		 * advance xmin here and there's not much harm done by a concurrent
1075 		 * computation missing that.
1076 		 */
1077 		if (updated_xmin)
1078 		{
1079 			SpinLockAcquire(&MyReplicationSlot->mutex);
1080 			MyReplicationSlot->effective_catalog_xmin = MyReplicationSlot->data.catalog_xmin;
1081 			SpinLockRelease(&MyReplicationSlot->mutex);
1082 
1083 			ReplicationSlotsComputeRequiredXmin(false);
1084 			ReplicationSlotsComputeRequiredLSN();
1085 		}
1086 	}
1087 	else
1088 	{
1089 		SpinLockAcquire(&MyReplicationSlot->mutex);
1090 		MyReplicationSlot->data.confirmed_flush = lsn;
1091 		SpinLockRelease(&MyReplicationSlot->mutex);
1092 	}
1093 }
1094