1 /*-------------------------------------------------------------------------
2  * logical.c
3  *	   PostgreSQL logical decoding coordination
4  *
5  * Copyright (c) 2012-2018, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *	  src/backend/replication/logical/logical.c
9  *
10  * NOTES
11  *	  This file coordinates interaction between the various modules that
12  *	  together provide logical decoding, primarily by providing so
13  *	  called LogicalDecodingContexts. The goal is to encapsulate most of the
14  *	  internal complexity for consumers of logical decoding, so they can
15  *	  create and consume a changestream with a low amount of code. Builtin
16  *	  consumers are the walsender and SQL SRF interface, but it's possible to
17  *	  add further ones without changing core code, e.g. to consume changes in
18  *	  a bgworker.
19  *
20  *	  The idea is that a consumer provides three callbacks, one to read WAL,
21  *	  one to prepare a data write, and a final one for actually writing since
22  *	  their implementation depends on the type of consumer.  Check
23  *	  logicalfuncs.c for an example implementation of a fairly simple consumer
24  *	  and an implementation of a WAL reading callback that's suitable for
25  *	  simple consumers.
26  *-------------------------------------------------------------------------
27  */
28 
29 #include "postgres.h"
30 
31 #include "miscadmin.h"
32 
33 #include "access/xact.h"
34 #include "access/xlog_internal.h"
35 
36 #include "replication/decode.h"
37 #include "replication/logical.h"
38 #include "replication/reorderbuffer.h"
39 #include "replication/origin.h"
40 #include "replication/snapbuild.h"
41 
42 #include "storage/proc.h"
43 #include "storage/procarray.h"
44 
45 #include "utils/memutils.h"
46 
47 /* data for errcontext callback */
48 typedef struct LogicalErrorCallbackState
49 {
50 	LogicalDecodingContext *ctx;
51 	const char *callback_name;
52 	XLogRecPtr	report_location;
53 } LogicalErrorCallbackState;
54 
55 /* wrappers around output plugin callbacks */
56 static void output_plugin_error_callback(void *arg);
57 static void startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
58 				   bool is_init);
59 static void shutdown_cb_wrapper(LogicalDecodingContext *ctx);
60 static void begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn);
61 static void commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
62 				  XLogRecPtr commit_lsn);
63 static void change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
64 				  Relation relation, ReorderBufferChange *change);
65 static void truncate_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
66 					int nrelations, Relation relations[], ReorderBufferChange *change);
67 static void message_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
68 				   XLogRecPtr message_lsn, bool transactional,
69 				   const char *prefix, Size message_size, const char *message);
70 
71 static void LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin);
72 
73 /*
74  * Make sure the current settings & environment are capable of doing logical
75  * decoding.
76  */
77 void
CheckLogicalDecodingRequirements(void)78 CheckLogicalDecodingRequirements(void)
79 {
80 	CheckSlotRequirements();
81 
82 	/*
83 	 * NB: Adding a new requirement likely means that RestoreSlotFromDisk()
84 	 * needs the same check.
85 	 */
86 
87 	if (wal_level < WAL_LEVEL_LOGICAL)
88 		ereport(ERROR,
89 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
90 				 errmsg("logical decoding requires wal_level >= logical")));
91 
92 	if (MyDatabaseId == InvalidOid)
93 		ereport(ERROR,
94 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
95 				 errmsg("logical decoding requires a database connection")));
96 
97 	/* ----
98 	 * TODO: We got to change that someday soon...
99 	 *
100 	 * There's basically three things missing to allow this:
101 	 * 1) We need to be able to correctly and quickly identify the timeline a
102 	 *	  LSN belongs to
103 	 * 2) We need to force hot_standby_feedback to be enabled at all times so
104 	 *	  the primary cannot remove rows we need.
105 	 * 3) support dropping replication slots referring to a database, in
106 	 *	  dbase_redo. There can't be any active ones due to HS recovery
107 	 *	  conflicts, so that should be relatively easy.
108 	 * ----
109 	 */
110 	if (RecoveryInProgress())
111 		ereport(ERROR,
112 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
113 				 errmsg("logical decoding cannot be used while in recovery")));
114 }
115 
116 /*
117  * Helper function for CreateInitialDecodingContext() and
118  * CreateDecodingContext() performing common tasks.
119  */
120 static LogicalDecodingContext *
StartupDecodingContext(List * output_plugin_options,XLogRecPtr start_lsn,TransactionId xmin_horizon,bool need_full_snapshot,bool fast_forward,XLogPageReadCB read_page,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)121 StartupDecodingContext(List *output_plugin_options,
122 					   XLogRecPtr start_lsn,
123 					   TransactionId xmin_horizon,
124 					   bool need_full_snapshot,
125 					   bool fast_forward,
126 					   XLogPageReadCB read_page,
127 					   LogicalOutputPluginWriterPrepareWrite prepare_write,
128 					   LogicalOutputPluginWriterWrite do_write,
129 					   LogicalOutputPluginWriterUpdateProgress update_progress)
130 {
131 	ReplicationSlot *slot;
132 	MemoryContext context,
133 				old_context;
134 	LogicalDecodingContext *ctx;
135 
136 	/* shorter lines... */
137 	slot = MyReplicationSlot;
138 
139 	context = AllocSetContextCreate(CurrentMemoryContext,
140 									"Logical decoding context",
141 									ALLOCSET_DEFAULT_SIZES);
142 	old_context = MemoryContextSwitchTo(context);
143 	ctx = palloc0(sizeof(LogicalDecodingContext));
144 
145 	ctx->context = context;
146 
147 	/*
148 	 * (re-)load output plugins, so we detect a bad (removed) output plugin
149 	 * now.
150 	 */
151 	if (!fast_forward)
152 		LoadOutputPlugin(&ctx->callbacks, NameStr(slot->data.plugin));
153 
154 	/*
155 	 * Now that the slot's xmin has been set, we can announce ourselves as a
156 	 * logical decoding backend which doesn't need to be checked individually
157 	 * when computing the xmin horizon because the xmin is enforced via
158 	 * replication slots.
159 	 *
160 	 * We can only do so if we're outside of a transaction (i.e. the case when
161 	 * streaming changes via walsender), otherwise an already setup
162 	 * snapshot/xid would end up being ignored. That's not a particularly
163 	 * bothersome restriction since the SQL interface can't be used for
164 	 * streaming anyway.
165 	 */
166 	if (!IsTransactionOrTransactionBlock())
167 	{
168 		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
169 		MyPgXact->vacuumFlags |= PROC_IN_LOGICAL_DECODING;
170 		LWLockRelease(ProcArrayLock);
171 	}
172 
173 	ctx->slot = slot;
174 
175 	ctx->reader = XLogReaderAllocate(wal_segment_size, read_page, ctx);
176 	if (!ctx->reader)
177 		ereport(ERROR,
178 				(errcode(ERRCODE_OUT_OF_MEMORY),
179 				 errmsg("out of memory")));
180 
181 	ctx->reader->private_data = ctx;
182 
183 	ctx->reorder = ReorderBufferAllocate();
184 	ctx->snapshot_builder =
185 		AllocateSnapshotBuilder(ctx->reorder, xmin_horizon, start_lsn,
186 								need_full_snapshot);
187 
188 	ctx->reorder->private_data = ctx;
189 
190 	/* wrap output plugin callbacks, so we can add error context information */
191 	ctx->reorder->begin = begin_cb_wrapper;
192 	ctx->reorder->apply_change = change_cb_wrapper;
193 	ctx->reorder->apply_truncate = truncate_cb_wrapper;
194 	ctx->reorder->commit = commit_cb_wrapper;
195 	ctx->reorder->message = message_cb_wrapper;
196 
197 	ctx->out = makeStringInfo();
198 	ctx->prepare_write = prepare_write;
199 	ctx->write = do_write;
200 	ctx->update_progress = update_progress;
201 
202 	ctx->output_plugin_options = output_plugin_options;
203 
204 	ctx->fast_forward = fast_forward;
205 
206 	MemoryContextSwitchTo(old_context);
207 
208 	return ctx;
209 }
210 
211 /*
212  * Create a new decoding context, for a new logical slot.
213  *
214  * plugin contains the name of the output plugin
215  * output_plugin_options contains options passed to the output plugin
216  * read_page, prepare_write, do_write, update_progress
217  *		callbacks that have to be filled to perform the use-case dependent,
218  *		actual, work.
219  *
220  * Needs to be called while in a memory context that's at least as long lived
221  * as the decoding context because further memory contexts will be created
222  * inside it.
223  *
224  * Returns an initialized decoding context after calling the output plugin's
225  * startup function.
226  */
227 LogicalDecodingContext *
CreateInitDecodingContext(char * plugin,List * output_plugin_options,bool need_full_snapshot,XLogPageReadCB read_page,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)228 CreateInitDecodingContext(char *plugin,
229 						  List *output_plugin_options,
230 						  bool need_full_snapshot,
231 						  XLogPageReadCB read_page,
232 						  LogicalOutputPluginWriterPrepareWrite prepare_write,
233 						  LogicalOutputPluginWriterWrite do_write,
234 						  LogicalOutputPluginWriterUpdateProgress update_progress)
235 {
236 	TransactionId xmin_horizon = InvalidTransactionId;
237 	ReplicationSlot *slot;
238 	LogicalDecodingContext *ctx;
239 	MemoryContext old_context;
240 
241 	/* shorter lines... */
242 	slot = MyReplicationSlot;
243 
244 	/* first some sanity checks that are unlikely to be violated */
245 	if (slot == NULL)
246 		elog(ERROR, "cannot perform logical decoding without an acquired slot");
247 
248 	if (plugin == NULL)
249 		elog(ERROR, "cannot initialize logical decoding without a specified plugin");
250 
251 	/* Make sure the passed slot is suitable. These are user facing errors. */
252 	if (SlotIsPhysical(slot))
253 		ereport(ERROR,
254 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
255 				 errmsg("cannot use physical replication slot for logical decoding")));
256 
257 	if (slot->data.database != MyDatabaseId)
258 		ereport(ERROR,
259 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
260 				 errmsg("replication slot \"%s\" was not created in this database",
261 						NameStr(slot->data.name))));
262 
263 	if (IsTransactionState() &&
264 		GetTopTransactionIdIfAny() != InvalidTransactionId)
265 		ereport(ERROR,
266 				(errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
267 				 errmsg("cannot create logical replication slot in transaction that has performed writes")));
268 
269 	/* register output plugin name with slot */
270 	SpinLockAcquire(&slot->mutex);
271 	StrNCpy(NameStr(slot->data.plugin), plugin, NAMEDATALEN);
272 	SpinLockRelease(&slot->mutex);
273 
274 	ReplicationSlotReserveWal();
275 
276 	/* ----
277 	 * This is a bit tricky: We need to determine a safe xmin horizon to start
278 	 * decoding from, to avoid starting from a running xacts record referring
279 	 * to xids whose rows have been vacuumed or pruned
280 	 * already. GetOldestSafeDecodingTransactionId() returns such a value, but
281 	 * without further interlock its return value might immediately be out of
282 	 * date.
283 	 *
284 	 * So we have to acquire the ProcArrayLock to prevent computation of new
285 	 * xmin horizons by other backends, get the safe decoding xid, and inform
286 	 * the slot machinery about the new limit. Once that's done the
287 	 * ProcArrayLock can be released as the slot machinery now is
288 	 * protecting against vacuum.
289 	 *
290 	 * Note that, temporarily, the data, not just the catalog, xmin has to be
291 	 * reserved if a data snapshot is to be exported.  Otherwise the initial
292 	 * data snapshot created here is not guaranteed to be valid. After that
293 	 * the data xmin doesn't need to be managed anymore and the global xmin
294 	 * should be recomputed. As we are fine with losing the pegged data xmin
295 	 * after crash - no chance a snapshot would get exported anymore - we can
296 	 * get away with just setting the slot's
297 	 * effective_xmin. ReplicationSlotRelease will reset it again.
298 	 *
299 	 * ----
300 	 */
301 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
302 
303 	xmin_horizon = GetOldestSafeDecodingTransactionId(!need_full_snapshot);
304 
305 	SpinLockAcquire(&slot->mutex);
306 	slot->effective_catalog_xmin = xmin_horizon;
307 	slot->data.catalog_xmin = xmin_horizon;
308 	if (need_full_snapshot)
309 		slot->effective_xmin = xmin_horizon;
310 	SpinLockRelease(&slot->mutex);
311 
312 	ReplicationSlotsComputeRequiredXmin(true);
313 
314 	LWLockRelease(ProcArrayLock);
315 
316 	ReplicationSlotMarkDirty();
317 	ReplicationSlotSave();
318 
319 	ctx = StartupDecodingContext(NIL, InvalidXLogRecPtr, xmin_horizon,
320 								 need_full_snapshot, false,
321 								 read_page, prepare_write, do_write,
322 								 update_progress);
323 
324 	/* call output plugin initialization callback */
325 	old_context = MemoryContextSwitchTo(ctx->context);
326 	if (ctx->callbacks.startup_cb != NULL)
327 		startup_cb_wrapper(ctx, &ctx->options, true);
328 	MemoryContextSwitchTo(old_context);
329 
330 	ctx->reorder->output_rewrites = ctx->options.receive_rewrites;
331 
332 	return ctx;
333 }
334 
335 /*
336  * Create a new decoding context, for a logical slot that has previously been
337  * used already.
338  *
339  * start_lsn
340  *		The LSN at which to start decoding.  If InvalidXLogRecPtr, restart
341  *		from the slot's confirmed_flush; otherwise, start from the specified
342  *		location (but move it forwards to confirmed_flush if it's older than
343  *		that, see below).
344  *
345  * output_plugin_options
346  *		options passed to the output plugin.
347  *
348  * fast_forward
349  *		bypass the generation of logical changes.
350  *
351  * read_page, prepare_write, do_write, update_progress
352  *		callbacks that have to be filled to perform the use-case dependent,
353  *		actual work.
354  *
355  * Needs to be called while in a memory context that's at least as long lived
356  * as the decoding context because further memory contexts will be created
357  * inside it.
358  *
359  * Returns an initialized decoding context after calling the output plugin's
360  * startup function.
361  */
362 LogicalDecodingContext *
CreateDecodingContext(XLogRecPtr start_lsn,List * output_plugin_options,bool fast_forward,XLogPageReadCB read_page,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)363 CreateDecodingContext(XLogRecPtr start_lsn,
364 					  List *output_plugin_options,
365 					  bool fast_forward,
366 					  XLogPageReadCB read_page,
367 					  LogicalOutputPluginWriterPrepareWrite prepare_write,
368 					  LogicalOutputPluginWriterWrite do_write,
369 					  LogicalOutputPluginWriterUpdateProgress update_progress)
370 {
371 	LogicalDecodingContext *ctx;
372 	ReplicationSlot *slot;
373 	MemoryContext old_context;
374 
375 	/* shorter lines... */
376 	slot = MyReplicationSlot;
377 
378 	/* first some sanity checks that are unlikely to be violated */
379 	if (slot == NULL)
380 		elog(ERROR, "cannot perform logical decoding without an acquired slot");
381 
382 	/* make sure the passed slot is suitable, these are user facing errors */
383 	if (SlotIsPhysical(slot))
384 		ereport(ERROR,
385 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
386 				 (errmsg("cannot use physical replication slot for logical decoding"))));
387 
388 	if (slot->data.database != MyDatabaseId)
389 		ereport(ERROR,
390 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
391 				 (errmsg("replication slot \"%s\" was not created in this database",
392 						 NameStr(slot->data.name)))));
393 
394 	if (start_lsn == InvalidXLogRecPtr)
395 	{
396 		/* continue from last position */
397 		start_lsn = slot->data.confirmed_flush;
398 	}
399 	else if (start_lsn < slot->data.confirmed_flush)
400 	{
401 		/*
402 		 * It might seem like we should error out in this case, but it's
403 		 * pretty common for a client to acknowledge a LSN it doesn't have to
404 		 * do anything for, and thus didn't store persistently, because the
405 		 * xlog records didn't result in anything relevant for logical
406 		 * decoding. Clients have to be able to do that to support synchronous
407 		 * replication.
408 		 */
409 		elog(DEBUG1, "cannot stream from %X/%X, minimum is %X/%X, forwarding",
410 			 (uint32) (start_lsn >> 32), (uint32) start_lsn,
411 			 (uint32) (slot->data.confirmed_flush >> 32),
412 			 (uint32) slot->data.confirmed_flush);
413 
414 		start_lsn = slot->data.confirmed_flush;
415 	}
416 
417 	ctx = StartupDecodingContext(output_plugin_options,
418 								 start_lsn, InvalidTransactionId, false,
419 								 fast_forward, read_page, prepare_write,
420 								 do_write, update_progress);
421 
422 	/* call output plugin initialization callback */
423 	old_context = MemoryContextSwitchTo(ctx->context);
424 	if (ctx->callbacks.startup_cb != NULL)
425 		startup_cb_wrapper(ctx, &ctx->options, false);
426 	MemoryContextSwitchTo(old_context);
427 
428 	ctx->reorder->output_rewrites = ctx->options.receive_rewrites;
429 
430 	ereport(LOG,
431 			(errmsg("starting logical decoding for slot \"%s\"",
432 					NameStr(slot->data.name)),
433 			 errdetail("Streaming transactions committing after %X/%X, reading WAL from %X/%X.",
434 					   (uint32) (slot->data.confirmed_flush >> 32),
435 					   (uint32) slot->data.confirmed_flush,
436 					   (uint32) (slot->data.restart_lsn >> 32),
437 					   (uint32) slot->data.restart_lsn)));
438 
439 	return ctx;
440 }
441 
442 /*
443  * Returns true if a consistent initial decoding snapshot has been built.
444  */
445 bool
DecodingContextReady(LogicalDecodingContext * ctx)446 DecodingContextReady(LogicalDecodingContext *ctx)
447 {
448 	return SnapBuildCurrentState(ctx->snapshot_builder) == SNAPBUILD_CONSISTENT;
449 }
450 
451 /*
452  * Read from the decoding slot, until it is ready to start extracting changes.
453  */
454 void
DecodingContextFindStartpoint(LogicalDecodingContext * ctx)455 DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
456 {
457 	XLogRecPtr	startptr;
458 	ReplicationSlot *slot = ctx->slot;
459 
460 	/* Initialize from where to start reading WAL. */
461 	startptr = slot->data.restart_lsn;
462 
463 	elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%X",
464 		 (uint32) (slot->data.restart_lsn >> 32),
465 		 (uint32) slot->data.restart_lsn);
466 
467 	/* Wait for a consistent starting point */
468 	for (;;)
469 	{
470 		XLogRecord *record;
471 		char	   *err = NULL;
472 
473 		/* the read_page callback waits for new WAL */
474 		record = XLogReadRecord(ctx->reader, startptr, &err);
475 		if (err)
476 			elog(ERROR, "%s", err);
477 		if (!record)
478 			elog(ERROR, "no record found"); /* shouldn't happen */
479 
480 		startptr = InvalidXLogRecPtr;
481 
482 		LogicalDecodingProcessRecord(ctx, ctx->reader);
483 
484 		/* only continue till we found a consistent spot */
485 		if (DecodingContextReady(ctx))
486 			break;
487 
488 		CHECK_FOR_INTERRUPTS();
489 	}
490 
491 	SpinLockAcquire(&slot->mutex);
492 	slot->data.confirmed_flush = ctx->reader->EndRecPtr;
493 	SpinLockRelease(&slot->mutex);
494 }
495 
496 /*
497  * Free a previously allocated decoding context, invoking the shutdown
498  * callback if necessary.
499  */
500 void
FreeDecodingContext(LogicalDecodingContext * ctx)501 FreeDecodingContext(LogicalDecodingContext *ctx)
502 {
503 	if (ctx->callbacks.shutdown_cb != NULL)
504 		shutdown_cb_wrapper(ctx);
505 
506 	ReorderBufferFree(ctx->reorder);
507 	FreeSnapshotBuilder(ctx->snapshot_builder);
508 	XLogReaderFree(ctx->reader);
509 	MemoryContextDelete(ctx->context);
510 }
511 
512 /*
513  * Prepare a write using the context's output routine.
514  */
515 void
OutputPluginPrepareWrite(struct LogicalDecodingContext * ctx,bool last_write)516 OutputPluginPrepareWrite(struct LogicalDecodingContext *ctx, bool last_write)
517 {
518 	if (!ctx->accept_writes)
519 		elog(ERROR, "writes are only accepted in commit, begin and change callbacks");
520 
521 	ctx->prepare_write(ctx, ctx->write_location, ctx->write_xid, last_write);
522 	ctx->prepared_write = true;
523 }
524 
525 /*
526  * Perform a write using the context's output routine.
527  */
528 void
OutputPluginWrite(struct LogicalDecodingContext * ctx,bool last_write)529 OutputPluginWrite(struct LogicalDecodingContext *ctx, bool last_write)
530 {
531 	if (!ctx->prepared_write)
532 		elog(ERROR, "OutputPluginPrepareWrite needs to be called before OutputPluginWrite");
533 
534 	ctx->write(ctx, ctx->write_location, ctx->write_xid, last_write);
535 	ctx->prepared_write = false;
536 }
537 
538 /*
539  * Update progress tracking (if supported).
540  */
541 void
OutputPluginUpdateProgress(struct LogicalDecodingContext * ctx)542 OutputPluginUpdateProgress(struct LogicalDecodingContext *ctx)
543 {
544 	if (!ctx->update_progress)
545 		return;
546 
547 	ctx->update_progress(ctx, ctx->write_location, ctx->write_xid);
548 }
549 
550 /*
551  * Load the output plugin, lookup its output plugin init function, and check
552  * that it provides the required callbacks.
553  */
554 static void
LoadOutputPlugin(OutputPluginCallbacks * callbacks,char * plugin)555 LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin)
556 {
557 	LogicalOutputPluginInit plugin_init;
558 
559 	plugin_init = (LogicalOutputPluginInit)
560 		load_external_function(plugin, "_PG_output_plugin_init", false, NULL);
561 
562 	if (plugin_init == NULL)
563 		elog(ERROR, "output plugins have to declare the _PG_output_plugin_init symbol");
564 
565 	/* ask the output plugin to fill the callback struct */
566 	plugin_init(callbacks);
567 
568 	if (callbacks->begin_cb == NULL)
569 		elog(ERROR, "output plugins have to register a begin callback");
570 	if (callbacks->change_cb == NULL)
571 		elog(ERROR, "output plugins have to register a change callback");
572 	if (callbacks->commit_cb == NULL)
573 		elog(ERROR, "output plugins have to register a commit callback");
574 }
575 
576 static void
output_plugin_error_callback(void * arg)577 output_plugin_error_callback(void *arg)
578 {
579 	LogicalErrorCallbackState *state = (LogicalErrorCallbackState *) arg;
580 
581 	/* not all callbacks have an associated LSN  */
582 	if (state->report_location != InvalidXLogRecPtr)
583 		errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%X",
584 				   NameStr(state->ctx->slot->data.name),
585 				   NameStr(state->ctx->slot->data.plugin),
586 				   state->callback_name,
587 				   (uint32) (state->report_location >> 32),
588 				   (uint32) state->report_location);
589 	else
590 		errcontext("slot \"%s\", output plugin \"%s\", in the %s callback",
591 				   NameStr(state->ctx->slot->data.name),
592 				   NameStr(state->ctx->slot->data.plugin),
593 				   state->callback_name);
594 }
595 
596 static void
startup_cb_wrapper(LogicalDecodingContext * ctx,OutputPluginOptions * opt,bool is_init)597 startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool is_init)
598 {
599 	LogicalErrorCallbackState state;
600 	ErrorContextCallback errcallback;
601 
602 	Assert(!ctx->fast_forward);
603 
604 	/* Push callback + info on the error context stack */
605 	state.ctx = ctx;
606 	state.callback_name = "startup";
607 	state.report_location = InvalidXLogRecPtr;
608 	errcallback.callback = output_plugin_error_callback;
609 	errcallback.arg = (void *) &state;
610 	errcallback.previous = error_context_stack;
611 	error_context_stack = &errcallback;
612 
613 	/* set output state */
614 	ctx->accept_writes = false;
615 
616 	/* do the actual work: call callback */
617 	ctx->callbacks.startup_cb(ctx, opt, is_init);
618 
619 	/* Pop the error context stack */
620 	error_context_stack = errcallback.previous;
621 }
622 
623 static void
shutdown_cb_wrapper(LogicalDecodingContext * ctx)624 shutdown_cb_wrapper(LogicalDecodingContext *ctx)
625 {
626 	LogicalErrorCallbackState state;
627 	ErrorContextCallback errcallback;
628 
629 	Assert(!ctx->fast_forward);
630 
631 	/* Push callback + info on the error context stack */
632 	state.ctx = ctx;
633 	state.callback_name = "shutdown";
634 	state.report_location = InvalidXLogRecPtr;
635 	errcallback.callback = output_plugin_error_callback;
636 	errcallback.arg = (void *) &state;
637 	errcallback.previous = error_context_stack;
638 	error_context_stack = &errcallback;
639 
640 	/* set output state */
641 	ctx->accept_writes = false;
642 
643 	/* do the actual work: call callback */
644 	ctx->callbacks.shutdown_cb(ctx);
645 
646 	/* Pop the error context stack */
647 	error_context_stack = errcallback.previous;
648 }
649 
650 
651 /*
652  * Callbacks for ReorderBuffer which add in some more information and then call
653  * output_plugin.h plugins.
654  */
655 static void
begin_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn)656 begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn)
657 {
658 	LogicalDecodingContext *ctx = cache->private_data;
659 	LogicalErrorCallbackState state;
660 	ErrorContextCallback errcallback;
661 
662 	Assert(!ctx->fast_forward);
663 
664 	/* Push callback + info on the error context stack */
665 	state.ctx = ctx;
666 	state.callback_name = "begin";
667 	state.report_location = txn->first_lsn;
668 	errcallback.callback = output_plugin_error_callback;
669 	errcallback.arg = (void *) &state;
670 	errcallback.previous = error_context_stack;
671 	error_context_stack = &errcallback;
672 
673 	/* set output state */
674 	ctx->accept_writes = true;
675 	ctx->write_xid = txn->xid;
676 	ctx->write_location = txn->first_lsn;
677 
678 	/* do the actual work: call callback */
679 	ctx->callbacks.begin_cb(ctx, txn);
680 
681 	/* Pop the error context stack */
682 	error_context_stack = errcallback.previous;
683 }
684 
685 static void
commit_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,XLogRecPtr commit_lsn)686 commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
687 				  XLogRecPtr commit_lsn)
688 {
689 	LogicalDecodingContext *ctx = cache->private_data;
690 	LogicalErrorCallbackState state;
691 	ErrorContextCallback errcallback;
692 
693 	Assert(!ctx->fast_forward);
694 
695 	/* Push callback + info on the error context stack */
696 	state.ctx = ctx;
697 	state.callback_name = "commit";
698 	state.report_location = txn->final_lsn; /* beginning of commit record */
699 	errcallback.callback = output_plugin_error_callback;
700 	errcallback.arg = (void *) &state;
701 	errcallback.previous = error_context_stack;
702 	error_context_stack = &errcallback;
703 
704 	/* set output state */
705 	ctx->accept_writes = true;
706 	ctx->write_xid = txn->xid;
707 	ctx->write_location = txn->end_lsn; /* points to the end of the record */
708 
709 	/* do the actual work: call callback */
710 	ctx->callbacks.commit_cb(ctx, txn, commit_lsn);
711 
712 	/* Pop the error context stack */
713 	error_context_stack = errcallback.previous;
714 }
715 
716 static void
change_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,Relation relation,ReorderBufferChange * change)717 change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
718 				  Relation relation, ReorderBufferChange *change)
719 {
720 	LogicalDecodingContext *ctx = cache->private_data;
721 	LogicalErrorCallbackState state;
722 	ErrorContextCallback errcallback;
723 
724 	Assert(!ctx->fast_forward);
725 
726 	/* Push callback + info on the error context stack */
727 	state.ctx = ctx;
728 	state.callback_name = "change";
729 	state.report_location = change->lsn;
730 	errcallback.callback = output_plugin_error_callback;
731 	errcallback.arg = (void *) &state;
732 	errcallback.previous = error_context_stack;
733 	error_context_stack = &errcallback;
734 
735 	/* set output state */
736 	ctx->accept_writes = true;
737 	ctx->write_xid = txn->xid;
738 
739 	/*
740 	 * report this change's lsn so replies from clients can give an up2date
741 	 * answer. This won't ever be enough (and shouldn't be!) to confirm
742 	 * receipt of this transaction, but it might allow another transaction's
743 	 * commit to be confirmed with one message.
744 	 */
745 	ctx->write_location = change->lsn;
746 
747 	ctx->callbacks.change_cb(ctx, txn, relation, change);
748 
749 	/* Pop the error context stack */
750 	error_context_stack = errcallback.previous;
751 }
752 
753 static void
truncate_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,int nrelations,Relation relations[],ReorderBufferChange * change)754 truncate_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
755 					int nrelations, Relation relations[], ReorderBufferChange *change)
756 {
757 	LogicalDecodingContext *ctx = cache->private_data;
758 	LogicalErrorCallbackState state;
759 	ErrorContextCallback errcallback;
760 
761 	Assert(!ctx->fast_forward);
762 
763 	if (!ctx->callbacks.truncate_cb)
764 		return;
765 
766 	/* Push callback + info on the error context stack */
767 	state.ctx = ctx;
768 	state.callback_name = "truncate";
769 	state.report_location = change->lsn;
770 	errcallback.callback = output_plugin_error_callback;
771 	errcallback.arg = (void *) &state;
772 	errcallback.previous = error_context_stack;
773 	error_context_stack = &errcallback;
774 
775 	/* set output state */
776 	ctx->accept_writes = true;
777 	ctx->write_xid = txn->xid;
778 
779 	/*
780 	 * report this change's lsn so replies from clients can give an up2date
781 	 * answer. This won't ever be enough (and shouldn't be!) to confirm
782 	 * receipt of this transaction, but it might allow another transaction's
783 	 * commit to be confirmed with one message.
784 	 */
785 	ctx->write_location = change->lsn;
786 
787 	ctx->callbacks.truncate_cb(ctx, txn, nrelations, relations, change);
788 
789 	/* Pop the error context stack */
790 	error_context_stack = errcallback.previous;
791 }
792 
793 bool
filter_by_origin_cb_wrapper(LogicalDecodingContext * ctx,RepOriginId origin_id)794 filter_by_origin_cb_wrapper(LogicalDecodingContext *ctx, RepOriginId origin_id)
795 {
796 	LogicalErrorCallbackState state;
797 	ErrorContextCallback errcallback;
798 	bool		ret;
799 
800 	Assert(!ctx->fast_forward);
801 
802 	/* Push callback + info on the error context stack */
803 	state.ctx = ctx;
804 	state.callback_name = "filter_by_origin";
805 	state.report_location = InvalidXLogRecPtr;
806 	errcallback.callback = output_plugin_error_callback;
807 	errcallback.arg = (void *) &state;
808 	errcallback.previous = error_context_stack;
809 	error_context_stack = &errcallback;
810 
811 	/* set output state */
812 	ctx->accept_writes = false;
813 
814 	/* do the actual work: call callback */
815 	ret = ctx->callbacks.filter_by_origin_cb(ctx, origin_id);
816 
817 	/* Pop the error context stack */
818 	error_context_stack = errcallback.previous;
819 
820 	return ret;
821 }
822 
823 static void
message_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,XLogRecPtr message_lsn,bool transactional,const char * prefix,Size message_size,const char * message)824 message_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
825 				   XLogRecPtr message_lsn, bool transactional,
826 				   const char *prefix, Size message_size, const char *message)
827 {
828 	LogicalDecodingContext *ctx = cache->private_data;
829 	LogicalErrorCallbackState state;
830 	ErrorContextCallback errcallback;
831 
832 	Assert(!ctx->fast_forward);
833 
834 	if (ctx->callbacks.message_cb == NULL)
835 		return;
836 
837 	/* Push callback + info on the error context stack */
838 	state.ctx = ctx;
839 	state.callback_name = "message";
840 	state.report_location = message_lsn;
841 	errcallback.callback = output_plugin_error_callback;
842 	errcallback.arg = (void *) &state;
843 	errcallback.previous = error_context_stack;
844 	error_context_stack = &errcallback;
845 
846 	/* set output state */
847 	ctx->accept_writes = true;
848 	ctx->write_xid = txn != NULL ? txn->xid : InvalidTransactionId;
849 	ctx->write_location = message_lsn;
850 
851 	/* do the actual work: call callback */
852 	ctx->callbacks.message_cb(ctx, txn, message_lsn, transactional, prefix,
853 							  message_size, message);
854 
855 	/* Pop the error context stack */
856 	error_context_stack = errcallback.previous;
857 }
858 
859 /*
860  * Set the required catalog xmin horizon for historic snapshots in the current
861  * replication slot.
862  *
863  * Note that in the most cases, we won't be able to immediately use the xmin
864  * to increase the xmin horizon: we need to wait till the client has confirmed
865  * receiving current_lsn with LogicalConfirmReceivedLocation().
866  */
867 void
LogicalIncreaseXminForSlot(XLogRecPtr current_lsn,TransactionId xmin)868 LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
869 {
870 	bool		updated_xmin = false;
871 	ReplicationSlot *slot;
872 
873 	slot = MyReplicationSlot;
874 
875 	Assert(slot != NULL);
876 
877 	SpinLockAcquire(&slot->mutex);
878 
879 	/*
880 	 * don't overwrite if we already have a newer xmin. This can happen if we
881 	 * restart decoding in a slot.
882 	 */
883 	if (TransactionIdPrecedesOrEquals(xmin, slot->data.catalog_xmin))
884 	{
885 	}
886 
887 	/*
888 	 * If the client has already confirmed up to this lsn, we directly can
889 	 * mark this as accepted. This can happen if we restart decoding in a
890 	 * slot.
891 	 */
892 	else if (current_lsn <= slot->data.confirmed_flush)
893 	{
894 		slot->candidate_catalog_xmin = xmin;
895 		slot->candidate_xmin_lsn = current_lsn;
896 
897 		/* our candidate can directly be used */
898 		updated_xmin = true;
899 	}
900 
901 	/*
902 	 * Only increase if the previous values have been applied, otherwise we
903 	 * might never end up updating if the receiver acks too slowly.
904 	 */
905 	else if (slot->candidate_xmin_lsn == InvalidXLogRecPtr)
906 	{
907 		slot->candidate_catalog_xmin = xmin;
908 		slot->candidate_xmin_lsn = current_lsn;
909 	}
910 	SpinLockRelease(&slot->mutex);
911 
912 	/* candidate already valid with the current flush position, apply */
913 	if (updated_xmin)
914 		LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
915 }
916 
917 /*
918  * Mark the minimal LSN (restart_lsn) we need to read to replay all
919  * transactions that have not yet committed at current_lsn.
920  *
921  * Just like IncreaseRestartDecodingForSlot this only takes effect when the
922  * client has confirmed to have received current_lsn.
923  */
924 void
LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn,XLogRecPtr restart_lsn)925 LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart_lsn)
926 {
927 	bool		updated_lsn = false;
928 	ReplicationSlot *slot;
929 
930 	slot = MyReplicationSlot;
931 
932 	Assert(slot != NULL);
933 	Assert(restart_lsn != InvalidXLogRecPtr);
934 	Assert(current_lsn != InvalidXLogRecPtr);
935 
936 	SpinLockAcquire(&slot->mutex);
937 
938 	/* don't overwrite if have a newer restart lsn */
939 	if (restart_lsn <= slot->data.restart_lsn)
940 	{
941 	}
942 
943 	/*
944 	 * We might have already flushed far enough to directly accept this lsn,
945 	 * in this case there is no need to check for existing candidate LSNs
946 	 */
947 	else if (current_lsn <= slot->data.confirmed_flush)
948 	{
949 		slot->candidate_restart_valid = current_lsn;
950 		slot->candidate_restart_lsn = restart_lsn;
951 
952 		/* our candidate can directly be used */
953 		updated_lsn = true;
954 	}
955 
956 	/*
957 	 * Only increase if the previous values have been applied, otherwise we
958 	 * might never end up updating if the receiver acks too slowly. A missed
959 	 * value here will just cause some extra effort after reconnecting.
960 	 */
961 	if (slot->candidate_restart_valid == InvalidXLogRecPtr)
962 	{
963 		slot->candidate_restart_valid = current_lsn;
964 		slot->candidate_restart_lsn = restart_lsn;
965 		SpinLockRelease(&slot->mutex);
966 
967 		elog(DEBUG1, "got new restart lsn %X/%X at %X/%X",
968 			 (uint32) (restart_lsn >> 32), (uint32) restart_lsn,
969 			 (uint32) (current_lsn >> 32), (uint32) current_lsn);
970 	}
971 	else
972 	{
973 		XLogRecPtr	candidate_restart_lsn;
974 		XLogRecPtr	candidate_restart_valid;
975 		XLogRecPtr	confirmed_flush;
976 
977 		candidate_restart_lsn = slot->candidate_restart_lsn;
978 		candidate_restart_valid = slot->candidate_restart_valid;
979 		confirmed_flush = slot->data.confirmed_flush;
980 		SpinLockRelease(&slot->mutex);
981 
982 		elog(DEBUG1, "failed to increase restart lsn: proposed %X/%X, after %X/%X, current candidate %X/%X, current after %X/%X, flushed up to %X/%X",
983 			 (uint32) (restart_lsn >> 32), (uint32) restart_lsn,
984 			 (uint32) (current_lsn >> 32), (uint32) current_lsn,
985 			 (uint32) (candidate_restart_lsn >> 32),
986 			 (uint32) candidate_restart_lsn,
987 			 (uint32) (candidate_restart_valid >> 32),
988 			 (uint32) candidate_restart_valid,
989 			 (uint32) (confirmed_flush >> 32),
990 			 (uint32) confirmed_flush);
991 	}
992 
993 	/* candidates are already valid with the current flush position, apply */
994 	if (updated_lsn)
995 		LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
996 }
997 
998 /*
999  * Handle a consumer's confirmation having received all changes up to lsn.
1000  */
1001 void
LogicalConfirmReceivedLocation(XLogRecPtr lsn)1002 LogicalConfirmReceivedLocation(XLogRecPtr lsn)
1003 {
1004 	Assert(lsn != InvalidXLogRecPtr);
1005 
1006 	/* Do an unlocked check for candidate_lsn first. */
1007 	if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr ||
1008 		MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr)
1009 	{
1010 		bool		updated_xmin = false;
1011 		bool		updated_restart = false;
1012 
1013 		SpinLockAcquire(&MyReplicationSlot->mutex);
1014 
1015 		MyReplicationSlot->data.confirmed_flush = lsn;
1016 
1017 		/* if we're past the location required for bumping xmin, do so */
1018 		if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr &&
1019 			MyReplicationSlot->candidate_xmin_lsn <= lsn)
1020 		{
1021 			/*
1022 			 * We have to write the changed xmin to disk *before* we change
1023 			 * the in-memory value, otherwise after a crash we wouldn't know
1024 			 * that some catalog tuples might have been removed already.
1025 			 *
1026 			 * Ensure that by first writing to ->xmin and only update
1027 			 * ->effective_xmin once the new state is synced to disk. After a
1028 			 * crash ->effective_xmin is set to ->xmin.
1029 			 */
1030 			if (TransactionIdIsValid(MyReplicationSlot->candidate_catalog_xmin) &&
1031 				MyReplicationSlot->data.catalog_xmin != MyReplicationSlot->candidate_catalog_xmin)
1032 			{
1033 				MyReplicationSlot->data.catalog_xmin = MyReplicationSlot->candidate_catalog_xmin;
1034 				MyReplicationSlot->candidate_catalog_xmin = InvalidTransactionId;
1035 				MyReplicationSlot->candidate_xmin_lsn = InvalidXLogRecPtr;
1036 				updated_xmin = true;
1037 			}
1038 		}
1039 
1040 		if (MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr &&
1041 			MyReplicationSlot->candidate_restart_valid <= lsn)
1042 		{
1043 			Assert(MyReplicationSlot->candidate_restart_lsn != InvalidXLogRecPtr);
1044 
1045 			MyReplicationSlot->data.restart_lsn = MyReplicationSlot->candidate_restart_lsn;
1046 			MyReplicationSlot->candidate_restart_lsn = InvalidXLogRecPtr;
1047 			MyReplicationSlot->candidate_restart_valid = InvalidXLogRecPtr;
1048 			updated_restart = true;
1049 		}
1050 
1051 		SpinLockRelease(&MyReplicationSlot->mutex);
1052 
1053 		/* first write new xmin to disk, so we know what's up after a crash */
1054 		if (updated_xmin || updated_restart)
1055 		{
1056 			ReplicationSlotMarkDirty();
1057 			ReplicationSlotSave();
1058 			elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart);
1059 		}
1060 
1061 		/*
1062 		 * Now the new xmin is safely on disk, we can let the global value
1063 		 * advance. We do not take ProcArrayLock or similar since we only
1064 		 * advance xmin here and there's not much harm done by a concurrent
1065 		 * computation missing that.
1066 		 */
1067 		if (updated_xmin)
1068 		{
1069 			SpinLockAcquire(&MyReplicationSlot->mutex);
1070 			MyReplicationSlot->effective_catalog_xmin = MyReplicationSlot->data.catalog_xmin;
1071 			SpinLockRelease(&MyReplicationSlot->mutex);
1072 
1073 			ReplicationSlotsComputeRequiredXmin(false);
1074 			ReplicationSlotsComputeRequiredLSN();
1075 		}
1076 	}
1077 	else
1078 	{
1079 		SpinLockAcquire(&MyReplicationSlot->mutex);
1080 		MyReplicationSlot->data.confirmed_flush = lsn;
1081 		SpinLockRelease(&MyReplicationSlot->mutex);
1082 	}
1083 }
1084