1 /*-------------------------------------------------------------------------
2  * logical.c
3  *	   PostgreSQL logical decoding coordination
4  *
5  * Copyright (c) 2012-2019, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *	  src/backend/replication/logical/logical.c
9  *
10  * NOTES
11  *	  This file coordinates interaction between the various modules that
12  *	  together provide logical decoding, primarily by providing so
13  *	  called LogicalDecodingContexts. The goal is to encapsulate most of the
14  *	  internal complexity for consumers of logical decoding, so they can
15  *	  create and consume a changestream with a low amount of code. Builtin
16  *	  consumers are the walsender and SQL SRF interface, but it's possible to
17  *	  add further ones without changing core code, e.g. to consume changes in
18  *	  a bgworker.
19  *
20  *	  The idea is that a consumer provides three callbacks, one to read WAL,
21  *	  one to prepare a data write, and a final one for actually writing since
22  *	  their implementation depends on the type of consumer.  Check
23  *	  logicalfuncs.c for an example implementation of a fairly simple consumer
24  *	  and an implementation of a WAL reading callback that's suitable for
25  *	  simple consumers.
26  *-------------------------------------------------------------------------
27  */
28 
29 #include "postgres.h"
30 
31 #include "miscadmin.h"
32 
33 #include "access/xact.h"
34 #include "access/xlog_internal.h"
35 
36 #include "replication/decode.h"
37 #include "replication/logical.h"
38 #include "replication/reorderbuffer.h"
39 #include "replication/origin.h"
40 #include "replication/snapbuild.h"
41 
42 #include "storage/proc.h"
43 #include "storage/procarray.h"
44 
45 #include "utils/memutils.h"
46 
47 /* data for errcontext callback */
48 typedef struct LogicalErrorCallbackState
49 {
50 	LogicalDecodingContext *ctx;
51 	const char *callback_name;
52 	XLogRecPtr	report_location;
53 } LogicalErrorCallbackState;
54 
55 /* wrappers around output plugin callbacks */
56 static void output_plugin_error_callback(void *arg);
57 static void startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
58 							   bool is_init);
59 static void shutdown_cb_wrapper(LogicalDecodingContext *ctx);
60 static void begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn);
61 static void commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
62 							  XLogRecPtr commit_lsn);
63 static void change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
64 							  Relation relation, ReorderBufferChange *change);
65 static void truncate_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
66 								int nrelations, Relation relations[], ReorderBufferChange *change);
67 static void message_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
68 							   XLogRecPtr message_lsn, bool transactional,
69 							   const char *prefix, Size message_size, const char *message);
70 
71 static void LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin);
72 
73 /*
74  * Make sure the current settings & environment are capable of doing logical
75  * decoding.
76  */
77 void
CheckLogicalDecodingRequirements(void)78 CheckLogicalDecodingRequirements(void)
79 {
80 	CheckSlotRequirements();
81 
82 	/*
83 	 * NB: Adding a new requirement likely means that RestoreSlotFromDisk()
84 	 * needs the same check.
85 	 */
86 
87 	if (wal_level < WAL_LEVEL_LOGICAL)
88 		ereport(ERROR,
89 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
90 				 errmsg("logical decoding requires wal_level >= logical")));
91 
92 	if (MyDatabaseId == InvalidOid)
93 		ereport(ERROR,
94 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
95 				 errmsg("logical decoding requires a database connection")));
96 
97 	/* ----
98 	 * TODO: We got to change that someday soon...
99 	 *
100 	 * There's basically three things missing to allow this:
101 	 * 1) We need to be able to correctly and quickly identify the timeline a
102 	 *	  LSN belongs to
103 	 * 2) We need to force hot_standby_feedback to be enabled at all times so
104 	 *	  the primary cannot remove rows we need.
105 	 * 3) support dropping replication slots referring to a database, in
106 	 *	  dbase_redo. There can't be any active ones due to HS recovery
107 	 *	  conflicts, so that should be relatively easy.
108 	 * ----
109 	 */
110 	if (RecoveryInProgress())
111 		ereport(ERROR,
112 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
113 				 errmsg("logical decoding cannot be used while in recovery")));
114 }
115 
116 /*
117  * Helper function for CreateInitDecodingContext() and
118  * CreateDecodingContext() performing common tasks.
119  */
120 static LogicalDecodingContext *
StartupDecodingContext(List * output_plugin_options,XLogRecPtr start_lsn,TransactionId xmin_horizon,bool need_full_snapshot,bool fast_forward,XLogPageReadCB read_page,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)121 StartupDecodingContext(List *output_plugin_options,
122 					   XLogRecPtr start_lsn,
123 					   TransactionId xmin_horizon,
124 					   bool need_full_snapshot,
125 					   bool fast_forward,
126 					   XLogPageReadCB read_page,
127 					   LogicalOutputPluginWriterPrepareWrite prepare_write,
128 					   LogicalOutputPluginWriterWrite do_write,
129 					   LogicalOutputPluginWriterUpdateProgress update_progress)
130 {
131 	ReplicationSlot *slot;
132 	MemoryContext context,
133 				old_context;
134 	LogicalDecodingContext *ctx;
135 
136 	/* shorter lines... */
137 	slot = MyReplicationSlot;
138 
139 	context = AllocSetContextCreate(CurrentMemoryContext,
140 									"Logical decoding context",
141 									ALLOCSET_DEFAULT_SIZES);
142 	old_context = MemoryContextSwitchTo(context);
143 	ctx = palloc0(sizeof(LogicalDecodingContext));
144 
145 	ctx->context = context;
146 
147 	/*
148 	 * (re-)load output plugins, so we detect a bad (removed) output plugin
149 	 * now.
150 	 */
151 	if (!fast_forward)
152 		LoadOutputPlugin(&ctx->callbacks, NameStr(slot->data.plugin));
153 
154 	/*
155 	 * Now that the slot's xmin has been set, we can announce ourselves as a
156 	 * logical decoding backend which doesn't need to be checked individually
157 	 * when computing the xmin horizon because the xmin is enforced via
158 	 * replication slots.
159 	 *
160 	 * We can only do so if we're outside of a transaction (i.e. the case when
161 	 * streaming changes via walsender), otherwise an already setup
162 	 * snapshot/xid would end up being ignored. That's not a particularly
163 	 * bothersome restriction since the SQL interface can't be used for
164 	 * streaming anyway.
165 	 */
166 	if (!IsTransactionOrTransactionBlock())
167 	{
168 		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
169 		MyPgXact->vacuumFlags |= PROC_IN_LOGICAL_DECODING;
170 		LWLockRelease(ProcArrayLock);
171 	}
172 
173 	ctx->slot = slot;
174 
175 	ctx->reader = XLogReaderAllocate(wal_segment_size, read_page, ctx);
176 	if (!ctx->reader)
177 		ereport(ERROR,
178 				(errcode(ERRCODE_OUT_OF_MEMORY),
179 				 errmsg("out of memory")));
180 
181 	ctx->reorder = ReorderBufferAllocate();
182 	ctx->snapshot_builder =
183 		AllocateSnapshotBuilder(ctx->reorder, xmin_horizon, start_lsn,
184 								need_full_snapshot);
185 
186 	ctx->reorder->private_data = ctx;
187 
188 	/* wrap output plugin callbacks, so we can add error context information */
189 	ctx->reorder->begin = begin_cb_wrapper;
190 	ctx->reorder->apply_change = change_cb_wrapper;
191 	ctx->reorder->apply_truncate = truncate_cb_wrapper;
192 	ctx->reorder->commit = commit_cb_wrapper;
193 	ctx->reorder->message = message_cb_wrapper;
194 
195 	ctx->out = makeStringInfo();
196 	ctx->prepare_write = prepare_write;
197 	ctx->write = do_write;
198 	ctx->update_progress = update_progress;
199 
200 	ctx->output_plugin_options = output_plugin_options;
201 
202 	ctx->fast_forward = fast_forward;
203 
204 	MemoryContextSwitchTo(old_context);
205 
206 	return ctx;
207 }
208 
209 /*
210  * Create a new decoding context, for a new logical slot.
211  *
212  * plugin -- contains the name of the output plugin
213  * output_plugin_options -- contains options passed to the output plugin
214  * need_full_snapshot -- if true, must obtain a snapshot able to read all
215  *		tables; if false, one that can read only catalogs is acceptable.
216  * restart_lsn -- if given as invalid, it's this routine's responsibility to
217  *		mark WAL as reserved by setting a convenient restart_lsn for the slot.
218  *		Otherwise, we set for decoding to start from the given LSN without
219  *		marking WAL reserved beforehand.  In that scenario, it's up to the
220  *		caller to guarantee that WAL remains available.
221  * read_page, prepare_write, do_write, update_progress --
222  *		callbacks that perform the use-case dependent, actual, work.
223  *
224  * Needs to be called while in a memory context that's at least as long lived
225  * as the decoding context because further memory contexts will be created
226  * inside it.
227  *
228  * Returns an initialized decoding context after calling the output plugin's
229  * startup function.
230  */
231 LogicalDecodingContext *
CreateInitDecodingContext(char * plugin,List * output_plugin_options,bool need_full_snapshot,XLogRecPtr restart_lsn,XLogPageReadCB read_page,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)232 CreateInitDecodingContext(char *plugin,
233 						  List *output_plugin_options,
234 						  bool need_full_snapshot,
235 						  XLogRecPtr restart_lsn,
236 						  XLogPageReadCB read_page,
237 						  LogicalOutputPluginWriterPrepareWrite prepare_write,
238 						  LogicalOutputPluginWriterWrite do_write,
239 						  LogicalOutputPluginWriterUpdateProgress update_progress)
240 {
241 	TransactionId xmin_horizon = InvalidTransactionId;
242 	ReplicationSlot *slot;
243 	LogicalDecodingContext *ctx;
244 	MemoryContext old_context;
245 
246 	/* shorter lines... */
247 	slot = MyReplicationSlot;
248 
249 	/* first some sanity checks that are unlikely to be violated */
250 	if (slot == NULL)
251 		elog(ERROR, "cannot perform logical decoding without an acquired slot");
252 
253 	if (plugin == NULL)
254 		elog(ERROR, "cannot initialize logical decoding without a specified plugin");
255 
256 	/* Make sure the passed slot is suitable. These are user facing errors. */
257 	if (SlotIsPhysical(slot))
258 		ereport(ERROR,
259 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
260 				 errmsg("cannot use physical replication slot for logical decoding")));
261 
262 	if (slot->data.database != MyDatabaseId)
263 		ereport(ERROR,
264 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
265 				 errmsg("replication slot \"%s\" was not created in this database",
266 						NameStr(slot->data.name))));
267 
268 	if (IsTransactionState() &&
269 		GetTopTransactionIdIfAny() != InvalidTransactionId)
270 		ereport(ERROR,
271 				(errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
272 				 errmsg("cannot create logical replication slot in transaction that has performed writes")));
273 
274 	/* register output plugin name with slot */
275 	SpinLockAcquire(&slot->mutex);
276 	StrNCpy(NameStr(slot->data.plugin), plugin, NAMEDATALEN);
277 	SpinLockRelease(&slot->mutex);
278 
279 	if (XLogRecPtrIsInvalid(restart_lsn))
280 		ReplicationSlotReserveWal();
281 	else
282 	{
283 		SpinLockAcquire(&slot->mutex);
284 		slot->data.restart_lsn = restart_lsn;
285 		SpinLockRelease(&slot->mutex);
286 	}
287 
288 	/* ----
289 	 * This is a bit tricky: We need to determine a safe xmin horizon to start
290 	 * decoding from, to avoid starting from a running xacts record referring
291 	 * to xids whose rows have been vacuumed or pruned
292 	 * already. GetOldestSafeDecodingTransactionId() returns such a value, but
293 	 * without further interlock its return value might immediately be out of
294 	 * date.
295 	 *
296 	 * So we have to acquire the ProcArrayLock to prevent computation of new
297 	 * xmin horizons by other backends, get the safe decoding xid, and inform
298 	 * the slot machinery about the new limit. Once that's done the
299 	 * ProcArrayLock can be released as the slot machinery now is
300 	 * protecting against vacuum.
301 	 *
302 	 * Note that, temporarily, the data, not just the catalog, xmin has to be
303 	 * reserved if a data snapshot is to be exported.  Otherwise the initial
304 	 * data snapshot created here is not guaranteed to be valid. After that
305 	 * the data xmin doesn't need to be managed anymore and the global xmin
306 	 * should be recomputed. As we are fine with losing the pegged data xmin
307 	 * after crash - no chance a snapshot would get exported anymore - we can
308 	 * get away with just setting the slot's
309 	 * effective_xmin. ReplicationSlotRelease will reset it again.
310 	 *
311 	 * ----
312 	 */
313 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
314 
315 	xmin_horizon = GetOldestSafeDecodingTransactionId(!need_full_snapshot);
316 
317 	SpinLockAcquire(&slot->mutex);
318 	slot->effective_catalog_xmin = xmin_horizon;
319 	slot->data.catalog_xmin = xmin_horizon;
320 	if (need_full_snapshot)
321 		slot->effective_xmin = xmin_horizon;
322 	SpinLockRelease(&slot->mutex);
323 
324 	ReplicationSlotsComputeRequiredXmin(true);
325 
326 	LWLockRelease(ProcArrayLock);
327 
328 	ReplicationSlotMarkDirty();
329 	ReplicationSlotSave();
330 
331 	ctx = StartupDecodingContext(NIL, restart_lsn, xmin_horizon,
332 								 need_full_snapshot, false,
333 								 read_page, prepare_write, do_write,
334 								 update_progress);
335 
336 	/* call output plugin initialization callback */
337 	old_context = MemoryContextSwitchTo(ctx->context);
338 	if (ctx->callbacks.startup_cb != NULL)
339 		startup_cb_wrapper(ctx, &ctx->options, true);
340 	MemoryContextSwitchTo(old_context);
341 
342 	ctx->reorder->output_rewrites = ctx->options.receive_rewrites;
343 
344 	return ctx;
345 }
346 
347 /*
348  * Create a new decoding context, for a logical slot that has previously been
349  * used already.
350  *
351  * start_lsn
352  *		The LSN at which to start decoding.  If InvalidXLogRecPtr, restart
353  *		from the slot's confirmed_flush; otherwise, start from the specified
354  *		location (but move it forwards to confirmed_flush if it's older than
355  *		that, see below).
356  *
357  * output_plugin_options
358  *		options passed to the output plugin.
359  *
360  * fast_forward
361  *		bypass the generation of logical changes.
362  *
363  * read_page, prepare_write, do_write, update_progress
364  *		callbacks that have to be filled to perform the use-case dependent,
365  *		actual work.
366  *
367  * Needs to be called while in a memory context that's at least as long lived
368  * as the decoding context because further memory contexts will be created
369  * inside it.
370  *
371  * Returns an initialized decoding context after calling the output plugin's
372  * startup function.
373  */
374 LogicalDecodingContext *
CreateDecodingContext(XLogRecPtr start_lsn,List * output_plugin_options,bool fast_forward,XLogPageReadCB read_page,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)375 CreateDecodingContext(XLogRecPtr start_lsn,
376 					  List *output_plugin_options,
377 					  bool fast_forward,
378 					  XLogPageReadCB read_page,
379 					  LogicalOutputPluginWriterPrepareWrite prepare_write,
380 					  LogicalOutputPluginWriterWrite do_write,
381 					  LogicalOutputPluginWriterUpdateProgress update_progress)
382 {
383 	LogicalDecodingContext *ctx;
384 	ReplicationSlot *slot;
385 	MemoryContext old_context;
386 
387 	/* shorter lines... */
388 	slot = MyReplicationSlot;
389 
390 	/* first some sanity checks that are unlikely to be violated */
391 	if (slot == NULL)
392 		elog(ERROR, "cannot perform logical decoding without an acquired slot");
393 
394 	/* make sure the passed slot is suitable, these are user facing errors */
395 	if (SlotIsPhysical(slot))
396 		ereport(ERROR,
397 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
398 				 (errmsg("cannot use physical replication slot for logical decoding"))));
399 
400 	if (slot->data.database != MyDatabaseId)
401 		ereport(ERROR,
402 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
403 				 (errmsg("replication slot \"%s\" was not created in this database",
404 						 NameStr(slot->data.name)))));
405 
406 	if (start_lsn == InvalidXLogRecPtr)
407 	{
408 		/* continue from last position */
409 		start_lsn = slot->data.confirmed_flush;
410 	}
411 	else if (start_lsn < slot->data.confirmed_flush)
412 	{
413 		/*
414 		 * It might seem like we should error out in this case, but it's
415 		 * pretty common for a client to acknowledge a LSN it doesn't have to
416 		 * do anything for, and thus didn't store persistently, because the
417 		 * xlog records didn't result in anything relevant for logical
418 		 * decoding. Clients have to be able to do that to support synchronous
419 		 * replication.
420 		 */
421 		elog(DEBUG1, "cannot stream from %X/%X, minimum is %X/%X, forwarding",
422 			 (uint32) (start_lsn >> 32), (uint32) start_lsn,
423 			 (uint32) (slot->data.confirmed_flush >> 32),
424 			 (uint32) slot->data.confirmed_flush);
425 
426 		start_lsn = slot->data.confirmed_flush;
427 	}
428 
429 	ctx = StartupDecodingContext(output_plugin_options,
430 								 start_lsn, InvalidTransactionId, false,
431 								 fast_forward, read_page, prepare_write,
432 								 do_write, update_progress);
433 
434 	/* call output plugin initialization callback */
435 	old_context = MemoryContextSwitchTo(ctx->context);
436 	if (ctx->callbacks.startup_cb != NULL)
437 		startup_cb_wrapper(ctx, &ctx->options, false);
438 	MemoryContextSwitchTo(old_context);
439 
440 	ctx->reorder->output_rewrites = ctx->options.receive_rewrites;
441 
442 	ereport(LOG,
443 			(errmsg("starting logical decoding for slot \"%s\"",
444 					NameStr(slot->data.name)),
445 			 errdetail("Streaming transactions committing after %X/%X, reading WAL from %X/%X.",
446 					   (uint32) (slot->data.confirmed_flush >> 32),
447 					   (uint32) slot->data.confirmed_flush,
448 					   (uint32) (slot->data.restart_lsn >> 32),
449 					   (uint32) slot->data.restart_lsn)));
450 
451 	return ctx;
452 }
453 
454 /*
455  * Returns true if a consistent initial decoding snapshot has been built.
456  */
457 bool
DecodingContextReady(LogicalDecodingContext * ctx)458 DecodingContextReady(LogicalDecodingContext *ctx)
459 {
460 	return SnapBuildCurrentState(ctx->snapshot_builder) == SNAPBUILD_CONSISTENT;
461 }
462 
463 /*
464  * Read from the decoding slot, until it is ready to start extracting changes.
465  */
466 void
DecodingContextFindStartpoint(LogicalDecodingContext * ctx)467 DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
468 {
469 	XLogRecPtr	startptr;
470 	ReplicationSlot *slot = ctx->slot;
471 
472 	/* Initialize from where to start reading WAL. */
473 	startptr = slot->data.restart_lsn;
474 
475 	elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%X",
476 		 (uint32) (slot->data.restart_lsn >> 32),
477 		 (uint32) slot->data.restart_lsn);
478 
479 	/* Wait for a consistent starting point */
480 	for (;;)
481 	{
482 		XLogRecord *record;
483 		char	   *err = NULL;
484 
485 		/* the read_page callback waits for new WAL */
486 		record = XLogReadRecord(ctx->reader, startptr, &err);
487 		if (err)
488 			elog(ERROR, "%s", err);
489 		if (!record)
490 			elog(ERROR, "no record found"); /* shouldn't happen */
491 
492 		startptr = InvalidXLogRecPtr;
493 
494 		LogicalDecodingProcessRecord(ctx, ctx->reader);
495 
496 		/* only continue till we found a consistent spot */
497 		if (DecodingContextReady(ctx))
498 			break;
499 
500 		CHECK_FOR_INTERRUPTS();
501 	}
502 
503 	SpinLockAcquire(&slot->mutex);
504 	slot->data.confirmed_flush = ctx->reader->EndRecPtr;
505 	SpinLockRelease(&slot->mutex);
506 }
507 
508 /*
509  * Free a previously allocated decoding context, invoking the shutdown
510  * callback if necessary.
511  */
512 void
FreeDecodingContext(LogicalDecodingContext * ctx)513 FreeDecodingContext(LogicalDecodingContext *ctx)
514 {
515 	if (ctx->callbacks.shutdown_cb != NULL)
516 		shutdown_cb_wrapper(ctx);
517 
518 	ReorderBufferFree(ctx->reorder);
519 	FreeSnapshotBuilder(ctx->snapshot_builder);
520 	XLogReaderFree(ctx->reader);
521 	MemoryContextDelete(ctx->context);
522 }
523 
524 /*
525  * Prepare a write using the context's output routine.
526  */
527 void
OutputPluginPrepareWrite(struct LogicalDecodingContext * ctx,bool last_write)528 OutputPluginPrepareWrite(struct LogicalDecodingContext *ctx, bool last_write)
529 {
530 	if (!ctx->accept_writes)
531 		elog(ERROR, "writes are only accepted in commit, begin and change callbacks");
532 
533 	ctx->prepare_write(ctx, ctx->write_location, ctx->write_xid, last_write);
534 	ctx->prepared_write = true;
535 }
536 
537 /*
538  * Perform a write using the context's output routine.
539  */
540 void
OutputPluginWrite(struct LogicalDecodingContext * ctx,bool last_write)541 OutputPluginWrite(struct LogicalDecodingContext *ctx, bool last_write)
542 {
543 	if (!ctx->prepared_write)
544 		elog(ERROR, "OutputPluginPrepareWrite needs to be called before OutputPluginWrite");
545 
546 	ctx->write(ctx, ctx->write_location, ctx->write_xid, last_write);
547 	ctx->prepared_write = false;
548 }
549 
550 /*
551  * Update progress tracking (if supported).
552  */
553 void
OutputPluginUpdateProgress(struct LogicalDecodingContext * ctx)554 OutputPluginUpdateProgress(struct LogicalDecodingContext *ctx)
555 {
556 	if (!ctx->update_progress)
557 		return;
558 
559 	ctx->update_progress(ctx, ctx->write_location, ctx->write_xid);
560 }
561 
562 /*
563  * Load the output plugin, lookup its output plugin init function, and check
564  * that it provides the required callbacks.
565  */
566 static void
LoadOutputPlugin(OutputPluginCallbacks * callbacks,char * plugin)567 LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin)
568 {
569 	LogicalOutputPluginInit plugin_init;
570 
571 	plugin_init = (LogicalOutputPluginInit)
572 		load_external_function(plugin, "_PG_output_plugin_init", false, NULL);
573 
574 	if (plugin_init == NULL)
575 		elog(ERROR, "output plugins have to declare the _PG_output_plugin_init symbol");
576 
577 	/* ask the output plugin to fill the callback struct */
578 	plugin_init(callbacks);
579 
580 	if (callbacks->begin_cb == NULL)
581 		elog(ERROR, "output plugins have to register a begin callback");
582 	if (callbacks->change_cb == NULL)
583 		elog(ERROR, "output plugins have to register a change callback");
584 	if (callbacks->commit_cb == NULL)
585 		elog(ERROR, "output plugins have to register a commit callback");
586 }
587 
588 static void
output_plugin_error_callback(void * arg)589 output_plugin_error_callback(void *arg)
590 {
591 	LogicalErrorCallbackState *state = (LogicalErrorCallbackState *) arg;
592 
593 	/* not all callbacks have an associated LSN  */
594 	if (state->report_location != InvalidXLogRecPtr)
595 		errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%X",
596 				   NameStr(state->ctx->slot->data.name),
597 				   NameStr(state->ctx->slot->data.plugin),
598 				   state->callback_name,
599 				   (uint32) (state->report_location >> 32),
600 				   (uint32) state->report_location);
601 	else
602 		errcontext("slot \"%s\", output plugin \"%s\", in the %s callback",
603 				   NameStr(state->ctx->slot->data.name),
604 				   NameStr(state->ctx->slot->data.plugin),
605 				   state->callback_name);
606 }
607 
608 static void
startup_cb_wrapper(LogicalDecodingContext * ctx,OutputPluginOptions * opt,bool is_init)609 startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool is_init)
610 {
611 	LogicalErrorCallbackState state;
612 	ErrorContextCallback errcallback;
613 
614 	Assert(!ctx->fast_forward);
615 
616 	/* Push callback + info on the error context stack */
617 	state.ctx = ctx;
618 	state.callback_name = "startup";
619 	state.report_location = InvalidXLogRecPtr;
620 	errcallback.callback = output_plugin_error_callback;
621 	errcallback.arg = (void *) &state;
622 	errcallback.previous = error_context_stack;
623 	error_context_stack = &errcallback;
624 
625 	/* set output state */
626 	ctx->accept_writes = false;
627 
628 	/* do the actual work: call callback */
629 	ctx->callbacks.startup_cb(ctx, opt, is_init);
630 
631 	/* Pop the error context stack */
632 	error_context_stack = errcallback.previous;
633 }
634 
635 static void
shutdown_cb_wrapper(LogicalDecodingContext * ctx)636 shutdown_cb_wrapper(LogicalDecodingContext *ctx)
637 {
638 	LogicalErrorCallbackState state;
639 	ErrorContextCallback errcallback;
640 
641 	Assert(!ctx->fast_forward);
642 
643 	/* Push callback + info on the error context stack */
644 	state.ctx = ctx;
645 	state.callback_name = "shutdown";
646 	state.report_location = InvalidXLogRecPtr;
647 	errcallback.callback = output_plugin_error_callback;
648 	errcallback.arg = (void *) &state;
649 	errcallback.previous = error_context_stack;
650 	error_context_stack = &errcallback;
651 
652 	/* set output state */
653 	ctx->accept_writes = false;
654 
655 	/* do the actual work: call callback */
656 	ctx->callbacks.shutdown_cb(ctx);
657 
658 	/* Pop the error context stack */
659 	error_context_stack = errcallback.previous;
660 }
661 
662 
663 /*
664  * Callbacks for ReorderBuffer which add in some more information and then call
665  * output_plugin.h plugins.
666  */
667 static void
begin_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn)668 begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn)
669 {
670 	LogicalDecodingContext *ctx = cache->private_data;
671 	LogicalErrorCallbackState state;
672 	ErrorContextCallback errcallback;
673 
674 	Assert(!ctx->fast_forward);
675 
676 	/* Push callback + info on the error context stack */
677 	state.ctx = ctx;
678 	state.callback_name = "begin";
679 	state.report_location = txn->first_lsn;
680 	errcallback.callback = output_plugin_error_callback;
681 	errcallback.arg = (void *) &state;
682 	errcallback.previous = error_context_stack;
683 	error_context_stack = &errcallback;
684 
685 	/* set output state */
686 	ctx->accept_writes = true;
687 	ctx->write_xid = txn->xid;
688 	ctx->write_location = txn->first_lsn;
689 
690 	/* do the actual work: call callback */
691 	ctx->callbacks.begin_cb(ctx, txn);
692 
693 	/* Pop the error context stack */
694 	error_context_stack = errcallback.previous;
695 }
696 
697 static void
commit_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,XLogRecPtr commit_lsn)698 commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
699 				  XLogRecPtr commit_lsn)
700 {
701 	LogicalDecodingContext *ctx = cache->private_data;
702 	LogicalErrorCallbackState state;
703 	ErrorContextCallback errcallback;
704 
705 	Assert(!ctx->fast_forward);
706 
707 	/* Push callback + info on the error context stack */
708 	state.ctx = ctx;
709 	state.callback_name = "commit";
710 	state.report_location = txn->final_lsn; /* beginning of commit record */
711 	errcallback.callback = output_plugin_error_callback;
712 	errcallback.arg = (void *) &state;
713 	errcallback.previous = error_context_stack;
714 	error_context_stack = &errcallback;
715 
716 	/* set output state */
717 	ctx->accept_writes = true;
718 	ctx->write_xid = txn->xid;
719 	ctx->write_location = txn->end_lsn; /* points to the end of the record */
720 
721 	/* do the actual work: call callback */
722 	ctx->callbacks.commit_cb(ctx, txn, commit_lsn);
723 
724 	/* Pop the error context stack */
725 	error_context_stack = errcallback.previous;
726 }
727 
728 static void
change_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,Relation relation,ReorderBufferChange * change)729 change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
730 				  Relation relation, ReorderBufferChange *change)
731 {
732 	LogicalDecodingContext *ctx = cache->private_data;
733 	LogicalErrorCallbackState state;
734 	ErrorContextCallback errcallback;
735 
736 	Assert(!ctx->fast_forward);
737 
738 	/* Push callback + info on the error context stack */
739 	state.ctx = ctx;
740 	state.callback_name = "change";
741 	state.report_location = change->lsn;
742 	errcallback.callback = output_plugin_error_callback;
743 	errcallback.arg = (void *) &state;
744 	errcallback.previous = error_context_stack;
745 	error_context_stack = &errcallback;
746 
747 	/* set output state */
748 	ctx->accept_writes = true;
749 	ctx->write_xid = txn->xid;
750 
751 	/*
752 	 * report this change's lsn so replies from clients can give an up2date
753 	 * answer. This won't ever be enough (and shouldn't be!) to confirm
754 	 * receipt of this transaction, but it might allow another transaction's
755 	 * commit to be confirmed with one message.
756 	 */
757 	ctx->write_location = change->lsn;
758 
759 	ctx->callbacks.change_cb(ctx, txn, relation, change);
760 
761 	/* Pop the error context stack */
762 	error_context_stack = errcallback.previous;
763 }
764 
765 static void
truncate_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,int nrelations,Relation relations[],ReorderBufferChange * change)766 truncate_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
767 					int nrelations, Relation relations[], ReorderBufferChange *change)
768 {
769 	LogicalDecodingContext *ctx = cache->private_data;
770 	LogicalErrorCallbackState state;
771 	ErrorContextCallback errcallback;
772 
773 	Assert(!ctx->fast_forward);
774 
775 	if (!ctx->callbacks.truncate_cb)
776 		return;
777 
778 	/* Push callback + info on the error context stack */
779 	state.ctx = ctx;
780 	state.callback_name = "truncate";
781 	state.report_location = change->lsn;
782 	errcallback.callback = output_plugin_error_callback;
783 	errcallback.arg = (void *) &state;
784 	errcallback.previous = error_context_stack;
785 	error_context_stack = &errcallback;
786 
787 	/* set output state */
788 	ctx->accept_writes = true;
789 	ctx->write_xid = txn->xid;
790 
791 	/*
792 	 * report this change's lsn so replies from clients can give an up2date
793 	 * answer. This won't ever be enough (and shouldn't be!) to confirm
794 	 * receipt of this transaction, but it might allow another transaction's
795 	 * commit to be confirmed with one message.
796 	 */
797 	ctx->write_location = change->lsn;
798 
799 	ctx->callbacks.truncate_cb(ctx, txn, nrelations, relations, change);
800 
801 	/* Pop the error context stack */
802 	error_context_stack = errcallback.previous;
803 }
804 
805 bool
filter_by_origin_cb_wrapper(LogicalDecodingContext * ctx,RepOriginId origin_id)806 filter_by_origin_cb_wrapper(LogicalDecodingContext *ctx, RepOriginId origin_id)
807 {
808 	LogicalErrorCallbackState state;
809 	ErrorContextCallback errcallback;
810 	bool		ret;
811 
812 	Assert(!ctx->fast_forward);
813 
814 	/* Push callback + info on the error context stack */
815 	state.ctx = ctx;
816 	state.callback_name = "filter_by_origin";
817 	state.report_location = InvalidXLogRecPtr;
818 	errcallback.callback = output_plugin_error_callback;
819 	errcallback.arg = (void *) &state;
820 	errcallback.previous = error_context_stack;
821 	error_context_stack = &errcallback;
822 
823 	/* set output state */
824 	ctx->accept_writes = false;
825 
826 	/* do the actual work: call callback */
827 	ret = ctx->callbacks.filter_by_origin_cb(ctx, origin_id);
828 
829 	/* Pop the error context stack */
830 	error_context_stack = errcallback.previous;
831 
832 	return ret;
833 }
834 
835 static void
message_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,XLogRecPtr message_lsn,bool transactional,const char * prefix,Size message_size,const char * message)836 message_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
837 				   XLogRecPtr message_lsn, bool transactional,
838 				   const char *prefix, Size message_size, const char *message)
839 {
840 	LogicalDecodingContext *ctx = cache->private_data;
841 	LogicalErrorCallbackState state;
842 	ErrorContextCallback errcallback;
843 
844 	Assert(!ctx->fast_forward);
845 
846 	if (ctx->callbacks.message_cb == NULL)
847 		return;
848 
849 	/* Push callback + info on the error context stack */
850 	state.ctx = ctx;
851 	state.callback_name = "message";
852 	state.report_location = message_lsn;
853 	errcallback.callback = output_plugin_error_callback;
854 	errcallback.arg = (void *) &state;
855 	errcallback.previous = error_context_stack;
856 	error_context_stack = &errcallback;
857 
858 	/* set output state */
859 	ctx->accept_writes = true;
860 	ctx->write_xid = txn != NULL ? txn->xid : InvalidTransactionId;
861 	ctx->write_location = message_lsn;
862 
863 	/* do the actual work: call callback */
864 	ctx->callbacks.message_cb(ctx, txn, message_lsn, transactional, prefix,
865 							  message_size, message);
866 
867 	/* Pop the error context stack */
868 	error_context_stack = errcallback.previous;
869 }
870 
871 /*
872  * Set the required catalog xmin horizon for historic snapshots in the current
873  * replication slot.
874  *
875  * Note that in the most cases, we won't be able to immediately use the xmin
876  * to increase the xmin horizon: we need to wait till the client has confirmed
877  * receiving current_lsn with LogicalConfirmReceivedLocation().
878  */
879 void
LogicalIncreaseXminForSlot(XLogRecPtr current_lsn,TransactionId xmin)880 LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
881 {
882 	bool		updated_xmin = false;
883 	ReplicationSlot *slot;
884 
885 	slot = MyReplicationSlot;
886 
887 	Assert(slot != NULL);
888 
889 	SpinLockAcquire(&slot->mutex);
890 
891 	/*
892 	 * don't overwrite if we already have a newer xmin. This can happen if we
893 	 * restart decoding in a slot.
894 	 */
895 	if (TransactionIdPrecedesOrEquals(xmin, slot->data.catalog_xmin))
896 	{
897 	}
898 
899 	/*
900 	 * If the client has already confirmed up to this lsn, we directly can
901 	 * mark this as accepted. This can happen if we restart decoding in a
902 	 * slot.
903 	 */
904 	else if (current_lsn <= slot->data.confirmed_flush)
905 	{
906 		slot->candidate_catalog_xmin = xmin;
907 		slot->candidate_xmin_lsn = current_lsn;
908 
909 		/* our candidate can directly be used */
910 		updated_xmin = true;
911 	}
912 
913 	/*
914 	 * Only increase if the previous values have been applied, otherwise we
915 	 * might never end up updating if the receiver acks too slowly.
916 	 */
917 	else if (slot->candidate_xmin_lsn == InvalidXLogRecPtr)
918 	{
919 		slot->candidate_catalog_xmin = xmin;
920 		slot->candidate_xmin_lsn = current_lsn;
921 	}
922 	SpinLockRelease(&slot->mutex);
923 
924 	/* candidate already valid with the current flush position, apply */
925 	if (updated_xmin)
926 		LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
927 }
928 
929 /*
930  * Mark the minimal LSN (restart_lsn) we need to read to replay all
931  * transactions that have not yet committed at current_lsn.
932  *
933  * Just like LogicalIncreaseXminForSlot this only takes effect when the
934  * client has confirmed to have received current_lsn.
935  */
936 void
LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn,XLogRecPtr restart_lsn)937 LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart_lsn)
938 {
939 	bool		updated_lsn = false;
940 	ReplicationSlot *slot;
941 
942 	slot = MyReplicationSlot;
943 
944 	Assert(slot != NULL);
945 	Assert(restart_lsn != InvalidXLogRecPtr);
946 	Assert(current_lsn != InvalidXLogRecPtr);
947 
948 	SpinLockAcquire(&slot->mutex);
949 
950 	/* don't overwrite if have a newer restart lsn */
951 	if (restart_lsn <= slot->data.restart_lsn)
952 	{
953 	}
954 
955 	/*
956 	 * We might have already flushed far enough to directly accept this lsn,
957 	 * in this case there is no need to check for existing candidate LSNs
958 	 */
959 	else if (current_lsn <= slot->data.confirmed_flush)
960 	{
961 		slot->candidate_restart_valid = current_lsn;
962 		slot->candidate_restart_lsn = restart_lsn;
963 
964 		/* our candidate can directly be used */
965 		updated_lsn = true;
966 	}
967 
968 	/*
969 	 * Only increase if the previous values have been applied, otherwise we
970 	 * might never end up updating if the receiver acks too slowly. A missed
971 	 * value here will just cause some extra effort after reconnecting.
972 	 */
973 	if (slot->candidate_restart_valid == InvalidXLogRecPtr)
974 	{
975 		slot->candidate_restart_valid = current_lsn;
976 		slot->candidate_restart_lsn = restart_lsn;
977 		SpinLockRelease(&slot->mutex);
978 
979 		elog(DEBUG1, "got new restart lsn %X/%X at %X/%X",
980 			 (uint32) (restart_lsn >> 32), (uint32) restart_lsn,
981 			 (uint32) (current_lsn >> 32), (uint32) current_lsn);
982 	}
983 	else
984 	{
985 		XLogRecPtr	candidate_restart_lsn;
986 		XLogRecPtr	candidate_restart_valid;
987 		XLogRecPtr	confirmed_flush;
988 
989 		candidate_restart_lsn = slot->candidate_restart_lsn;
990 		candidate_restart_valid = slot->candidate_restart_valid;
991 		confirmed_flush = slot->data.confirmed_flush;
992 		SpinLockRelease(&slot->mutex);
993 
994 		elog(DEBUG1, "failed to increase restart lsn: proposed %X/%X, after %X/%X, current candidate %X/%X, current after %X/%X, flushed up to %X/%X",
995 			 (uint32) (restart_lsn >> 32), (uint32) restart_lsn,
996 			 (uint32) (current_lsn >> 32), (uint32) current_lsn,
997 			 (uint32) (candidate_restart_lsn >> 32),
998 			 (uint32) candidate_restart_lsn,
999 			 (uint32) (candidate_restart_valid >> 32),
1000 			 (uint32) candidate_restart_valid,
1001 			 (uint32) (confirmed_flush >> 32),
1002 			 (uint32) confirmed_flush);
1003 	}
1004 
1005 	/* candidates are already valid with the current flush position, apply */
1006 	if (updated_lsn)
1007 		LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
1008 }
1009 
1010 /*
1011  * Handle a consumer's confirmation having received all changes up to lsn.
1012  */
1013 void
LogicalConfirmReceivedLocation(XLogRecPtr lsn)1014 LogicalConfirmReceivedLocation(XLogRecPtr lsn)
1015 {
1016 	Assert(lsn != InvalidXLogRecPtr);
1017 
1018 	/* Do an unlocked check for candidate_lsn first. */
1019 	if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr ||
1020 		MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr)
1021 	{
1022 		bool		updated_xmin = false;
1023 		bool		updated_restart = false;
1024 
1025 		SpinLockAcquire(&MyReplicationSlot->mutex);
1026 
1027 		MyReplicationSlot->data.confirmed_flush = lsn;
1028 
1029 		/* if we're past the location required for bumping xmin, do so */
1030 		if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr &&
1031 			MyReplicationSlot->candidate_xmin_lsn <= lsn)
1032 		{
1033 			/*
1034 			 * We have to write the changed xmin to disk *before* we change
1035 			 * the in-memory value, otherwise after a crash we wouldn't know
1036 			 * that some catalog tuples might have been removed already.
1037 			 *
1038 			 * Ensure that by first writing to ->xmin and only update
1039 			 * ->effective_xmin once the new state is synced to disk. After a
1040 			 * crash ->effective_xmin is set to ->xmin.
1041 			 */
1042 			if (TransactionIdIsValid(MyReplicationSlot->candidate_catalog_xmin) &&
1043 				MyReplicationSlot->data.catalog_xmin != MyReplicationSlot->candidate_catalog_xmin)
1044 			{
1045 				MyReplicationSlot->data.catalog_xmin = MyReplicationSlot->candidate_catalog_xmin;
1046 				MyReplicationSlot->candidate_catalog_xmin = InvalidTransactionId;
1047 				MyReplicationSlot->candidate_xmin_lsn = InvalidXLogRecPtr;
1048 				updated_xmin = true;
1049 			}
1050 		}
1051 
1052 		if (MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr &&
1053 			MyReplicationSlot->candidate_restart_valid <= lsn)
1054 		{
1055 			Assert(MyReplicationSlot->candidate_restart_lsn != InvalidXLogRecPtr);
1056 
1057 			MyReplicationSlot->data.restart_lsn = MyReplicationSlot->candidate_restart_lsn;
1058 			MyReplicationSlot->candidate_restart_lsn = InvalidXLogRecPtr;
1059 			MyReplicationSlot->candidate_restart_valid = InvalidXLogRecPtr;
1060 			updated_restart = true;
1061 		}
1062 
1063 		SpinLockRelease(&MyReplicationSlot->mutex);
1064 
1065 		/* first write new xmin to disk, so we know what's up after a crash */
1066 		if (updated_xmin || updated_restart)
1067 		{
1068 			ReplicationSlotMarkDirty();
1069 			ReplicationSlotSave();
1070 			elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart);
1071 		}
1072 
1073 		/*
1074 		 * Now the new xmin is safely on disk, we can let the global value
1075 		 * advance. We do not take ProcArrayLock or similar since we only
1076 		 * advance xmin here and there's not much harm done by a concurrent
1077 		 * computation missing that.
1078 		 */
1079 		if (updated_xmin)
1080 		{
1081 			SpinLockAcquire(&MyReplicationSlot->mutex);
1082 			MyReplicationSlot->effective_catalog_xmin = MyReplicationSlot->data.catalog_xmin;
1083 			SpinLockRelease(&MyReplicationSlot->mutex);
1084 
1085 			ReplicationSlotsComputeRequiredXmin(false);
1086 			ReplicationSlotsComputeRequiredLSN();
1087 		}
1088 	}
1089 	else
1090 	{
1091 		SpinLockAcquire(&MyReplicationSlot->mutex);
1092 		MyReplicationSlot->data.confirmed_flush = lsn;
1093 		SpinLockRelease(&MyReplicationSlot->mutex);
1094 	}
1095 }
1096