1 /*-------------------------------------------------------------------------
2  * logical.c
3  *	   PostgreSQL logical decoding coordination
4  *
5  * Copyright (c) 2012-2017, PostgreSQL Global Development Group
6  *
7  * IDENTIFICATION
8  *	  src/backend/replication/logical/logical.c
9  *
10  * NOTES
11  *	  This file coordinates interaction between the various modules that
12  *	  together provide logical decoding, primarily by providing so
13  *	  called LogicalDecodingContexts. The goal is to encapsulate most of the
14  *	  internal complexity for consumers of logical decoding, so they can
15  *	  create and consume a changestream with a low amount of code. Builtin
16  *	  consumers are the walsender and SQL SRF interface, but it's possible to
17  *	  add further ones without changing core code, e.g. to consume changes in
18  *	  a bgworker.
19  *
20  *	  The idea is that a consumer provides three callbacks, one to read WAL,
21  *	  one to prepare a data write, and a final one for actually writing since
22  *	  their implementation depends on the type of consumer.  Check
23  *	  logicalfuncs.c for an example implementation of a fairly simple consumer
24  *	  and an implementation of a WAL reading callback that's suitable for
25  *	  simple consumers.
26  *-------------------------------------------------------------------------
27  */
28 
29 #include "postgres.h"
30 
31 #include "miscadmin.h"
32 
33 #include "access/xact.h"
34 #include "access/xlog_internal.h"
35 
36 #include "replication/decode.h"
37 #include "replication/logical.h"
38 #include "replication/reorderbuffer.h"
39 #include "replication/origin.h"
40 #include "replication/snapbuild.h"
41 
42 #include "storage/proc.h"
43 #include "storage/procarray.h"
44 
45 #include "utils/memutils.h"
46 
47 /* data for errcontext callback */
48 typedef struct LogicalErrorCallbackState
49 {
50 	LogicalDecodingContext *ctx;
51 	const char *callback_name;
52 	XLogRecPtr	report_location;
53 } LogicalErrorCallbackState;
54 
55 /* wrappers around output plugin callbacks */
56 static void output_plugin_error_callback(void *arg);
57 static void startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
58 				   bool is_init);
59 static void shutdown_cb_wrapper(LogicalDecodingContext *ctx);
60 static void begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn);
61 static void commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
62 				  XLogRecPtr commit_lsn);
63 static void change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
64 				  Relation relation, ReorderBufferChange *change);
65 static void message_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
66 				   XLogRecPtr message_lsn, bool transactional,
67 				   const char *prefix, Size message_size, const char *message);
68 
69 static void LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin);
70 
71 /*
72  * Make sure the current settings & environment are capable of doing logical
73  * decoding.
74  */
75 void
CheckLogicalDecodingRequirements(void)76 CheckLogicalDecodingRequirements(void)
77 {
78 	CheckSlotRequirements();
79 
80 	/*
81 	 * NB: Adding a new requirement likely means that RestoreSlotFromDisk()
82 	 * needs the same check.
83 	 */
84 
85 	if (wal_level < WAL_LEVEL_LOGICAL)
86 		ereport(ERROR,
87 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
88 				 errmsg("logical decoding requires wal_level >= logical")));
89 
90 	if (MyDatabaseId == InvalidOid)
91 		ereport(ERROR,
92 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
93 				 errmsg("logical decoding requires a database connection")));
94 
95 	/* ----
96 	 * TODO: We got to change that someday soon...
97 	 *
98 	 * There's basically three things missing to allow this:
99 	 * 1) We need to be able to correctly and quickly identify the timeline a
100 	 *	  LSN belongs to
101 	 * 2) We need to force hot_standby_feedback to be enabled at all times so
102 	 *	  the primary cannot remove rows we need.
103 	 * 3) support dropping replication slots referring to a database, in
104 	 *	  dbase_redo. There can't be any active ones due to HS recovery
105 	 *	  conflicts, so that should be relatively easy.
106 	 * ----
107 	 */
108 	if (RecoveryInProgress())
109 		ereport(ERROR,
110 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
111 				 errmsg("logical decoding cannot be used while in recovery")));
112 }
113 
114 /*
115  * Helper function for CreateInitialDecodingContext() and
116  * CreateDecodingContext() performing common tasks.
117  */
118 static LogicalDecodingContext *
StartupDecodingContext(List * output_plugin_options,XLogRecPtr start_lsn,TransactionId xmin_horizon,bool need_full_snapshot,XLogPageReadCB read_page,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)119 StartupDecodingContext(List *output_plugin_options,
120 					   XLogRecPtr start_lsn,
121 					   TransactionId xmin_horizon,
122 					   bool need_full_snapshot,
123 					   XLogPageReadCB read_page,
124 					   LogicalOutputPluginWriterPrepareWrite prepare_write,
125 					   LogicalOutputPluginWriterWrite do_write,
126 					   LogicalOutputPluginWriterUpdateProgress update_progress)
127 {
128 	ReplicationSlot *slot;
129 	MemoryContext context,
130 				old_context;
131 	LogicalDecodingContext *ctx;
132 
133 	/* shorter lines... */
134 	slot = MyReplicationSlot;
135 
136 	context = AllocSetContextCreate(CurrentMemoryContext,
137 									"Logical decoding context",
138 									ALLOCSET_DEFAULT_SIZES);
139 	old_context = MemoryContextSwitchTo(context);
140 	ctx = palloc0(sizeof(LogicalDecodingContext));
141 
142 	ctx->context = context;
143 
144 	/*
145 	 * (re-)load output plugins, so we detect a bad (removed) output plugin
146 	 * now.
147 	 */
148 	LoadOutputPlugin(&ctx->callbacks, NameStr(slot->data.plugin));
149 
150 	/*
151 	 * Now that the slot's xmin has been set, we can announce ourselves as a
152 	 * logical decoding backend which doesn't need to be checked individually
153 	 * when computing the xmin horizon because the xmin is enforced via
154 	 * replication slots.
155 	 *
156 	 * We can only do so if we're outside of a transaction (i.e. the case when
157 	 * streaming changes via walsender), otherwise an already setup
158 	 * snapshot/xid would end up being ignored. That's not a particularly
159 	 * bothersome restriction since the SQL interface can't be used for
160 	 * streaming anyway.
161 	 */
162 	if (!IsTransactionOrTransactionBlock())
163 	{
164 		LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
165 		MyPgXact->vacuumFlags |= PROC_IN_LOGICAL_DECODING;
166 		LWLockRelease(ProcArrayLock);
167 	}
168 
169 	ctx->slot = slot;
170 
171 	ctx->reader = XLogReaderAllocate(read_page, ctx);
172 	if (!ctx->reader)
173 		ereport(ERROR,
174 				(errcode(ERRCODE_OUT_OF_MEMORY),
175 				 errmsg("out of memory")));
176 
177 	ctx->reader->private_data = ctx;
178 
179 	ctx->reorder = ReorderBufferAllocate();
180 	ctx->snapshot_builder =
181 		AllocateSnapshotBuilder(ctx->reorder, xmin_horizon, start_lsn,
182 								need_full_snapshot);
183 
184 	ctx->reorder->private_data = ctx;
185 
186 	/* wrap output plugin callbacks, so we can add error context information */
187 	ctx->reorder->begin = begin_cb_wrapper;
188 	ctx->reorder->apply_change = change_cb_wrapper;
189 	ctx->reorder->commit = commit_cb_wrapper;
190 	ctx->reorder->message = message_cb_wrapper;
191 
192 	ctx->out = makeStringInfo();
193 	ctx->prepare_write = prepare_write;
194 	ctx->write = do_write;
195 	ctx->update_progress = update_progress;
196 
197 	ctx->output_plugin_options = output_plugin_options;
198 
199 	MemoryContextSwitchTo(old_context);
200 
201 	return ctx;
202 }
203 
204 /*
205  * Create a new decoding context, for a new logical slot.
206  *
207  * plugin contains the name of the output plugin
208  * output_plugin_options contains options passed to the output plugin
209  * read_page, prepare_write, do_write, update_progress
210  *		callbacks that have to be filled to perform the use-case dependent,
211  *		actual, work.
212  *
213  * Needs to be called while in a memory context that's at least as long lived
214  * as the decoding context because further memory contexts will be created
215  * inside it.
216  *
217  * Returns an initialized decoding context after calling the output plugin's
218  * startup function.
219  */
220 LogicalDecodingContext *
CreateInitDecodingContext(char * plugin,List * output_plugin_options,bool need_full_snapshot,XLogPageReadCB read_page,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)221 CreateInitDecodingContext(char *plugin,
222 						  List *output_plugin_options,
223 						  bool need_full_snapshot,
224 						  XLogPageReadCB read_page,
225 						  LogicalOutputPluginWriterPrepareWrite prepare_write,
226 						  LogicalOutputPluginWriterWrite do_write,
227 						  LogicalOutputPluginWriterUpdateProgress update_progress)
228 {
229 	TransactionId xmin_horizon = InvalidTransactionId;
230 	ReplicationSlot *slot;
231 	LogicalDecodingContext *ctx;
232 	MemoryContext old_context;
233 
234 	/* shorter lines... */
235 	slot = MyReplicationSlot;
236 
237 	/* first some sanity checks that are unlikely to be violated */
238 	if (slot == NULL)
239 		elog(ERROR, "cannot perform logical decoding without an acquired slot");
240 
241 	if (plugin == NULL)
242 		elog(ERROR, "cannot initialize logical decoding without a specified plugin");
243 
244 	/* Make sure the passed slot is suitable. These are user facing errors. */
245 	if (SlotIsPhysical(slot))
246 		ereport(ERROR,
247 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
248 				 errmsg("cannot use physical replication slot for logical decoding")));
249 
250 	if (slot->data.database != MyDatabaseId)
251 		ereport(ERROR,
252 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
253 				 errmsg("replication slot \"%s\" was not created in this database",
254 						NameStr(slot->data.name))));
255 
256 	if (IsTransactionState() &&
257 		GetTopTransactionIdIfAny() != InvalidTransactionId)
258 		ereport(ERROR,
259 				(errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
260 				 errmsg("cannot create logical replication slot in transaction that has performed writes")));
261 
262 	/* register output plugin name with slot */
263 	SpinLockAcquire(&slot->mutex);
264 	StrNCpy(NameStr(slot->data.plugin), plugin, NAMEDATALEN);
265 	SpinLockRelease(&slot->mutex);
266 
267 	ReplicationSlotReserveWal();
268 
269 	/* ----
270 	 * This is a bit tricky: We need to determine a safe xmin horizon to start
271 	 * decoding from, to avoid starting from a running xacts record referring
272 	 * to xids whose rows have been vacuumed or pruned
273 	 * already. GetOldestSafeDecodingTransactionId() returns such a value, but
274 	 * without further interlock its return value might immediately be out of
275 	 * date.
276 	 *
277 	 * So we have to acquire the ProcArrayLock to prevent computation of new
278 	 * xmin horizons by other backends, get the safe decoding xid, and inform
279 	 * the slot machinery about the new limit. Once that's done the
280 	 * ProcArrayLock can be released as the slot machinery now is
281 	 * protecting against vacuum.
282 	 *
283 	 * Note that, temporarily, the data, not just the catalog, xmin has to be
284 	 * reserved if a data snapshot is to be exported.  Otherwise the initial
285 	 * data snapshot created here is not guaranteed to be valid. After that
286 	 * the data xmin doesn't need to be managed anymore and the global xmin
287 	 * should be recomputed. As we are fine with losing the pegged data xmin
288 	 * after crash - no chance a snapshot would get exported anymore - we can
289 	 * get away with just setting the slot's
290 	 * effective_xmin. ReplicationSlotRelease will reset it again.
291 	 *
292 	 * ----
293 	 */
294 	LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
295 
296 	xmin_horizon = GetOldestSafeDecodingTransactionId(!need_full_snapshot);
297 
298 	slot->effective_catalog_xmin = xmin_horizon;
299 	slot->data.catalog_xmin = xmin_horizon;
300 	if (need_full_snapshot)
301 		slot->effective_xmin = xmin_horizon;
302 
303 	ReplicationSlotsComputeRequiredXmin(true);
304 
305 	LWLockRelease(ProcArrayLock);
306 
307 	ReplicationSlotMarkDirty();
308 	ReplicationSlotSave();
309 
310 	ctx = StartupDecodingContext(NIL, InvalidXLogRecPtr, xmin_horizon,
311 								 need_full_snapshot, read_page, prepare_write,
312 								 do_write, update_progress);
313 
314 	/* call output plugin initialization callback */
315 	old_context = MemoryContextSwitchTo(ctx->context);
316 	if (ctx->callbacks.startup_cb != NULL)
317 		startup_cb_wrapper(ctx, &ctx->options, true);
318 	MemoryContextSwitchTo(old_context);
319 
320 	return ctx;
321 }
322 
323 /*
324  * Create a new decoding context, for a logical slot that has previously been
325  * used already.
326  *
327  * start_lsn
328  *		The LSN at which to start decoding.  If InvalidXLogRecPtr, restart
329  *		from the slot's confirmed_flush; otherwise, start from the specified
330  *		location (but move it forwards to confirmed_flush if it's older than
331  *		that, see below).
332  *
333  * output_plugin_options
334  *		contains options passed to the output plugin.
335  *
336  * read_page, prepare_write, do_write, update_progress
337  *		callbacks that have to be filled to perform the use-case dependent,
338  *		actual work.
339  *
340  * Needs to be called while in a memory context that's at least as long lived
341  * as the decoding context because further memory contexts will be created
342  * inside it.
343  *
344  * Returns an initialized decoding context after calling the output plugin's
345  * startup function.
346  */
347 LogicalDecodingContext *
CreateDecodingContext(XLogRecPtr start_lsn,List * output_plugin_options,XLogPageReadCB read_page,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)348 CreateDecodingContext(XLogRecPtr start_lsn,
349 					  List *output_plugin_options,
350 					  XLogPageReadCB read_page,
351 					  LogicalOutputPluginWriterPrepareWrite prepare_write,
352 					  LogicalOutputPluginWriterWrite do_write,
353 					  LogicalOutputPluginWriterUpdateProgress update_progress)
354 {
355 	LogicalDecodingContext *ctx;
356 	ReplicationSlot *slot;
357 	MemoryContext old_context;
358 
359 	/* shorter lines... */
360 	slot = MyReplicationSlot;
361 
362 	/* first some sanity checks that are unlikely to be violated */
363 	if (slot == NULL)
364 		elog(ERROR, "cannot perform logical decoding without an acquired slot");
365 
366 	/* make sure the passed slot is suitable, these are user facing errors */
367 	if (SlotIsPhysical(slot))
368 		ereport(ERROR,
369 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
370 				 (errmsg("cannot use physical replication slot for logical decoding"))));
371 
372 	if (slot->data.database != MyDatabaseId)
373 		ereport(ERROR,
374 				(errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
375 				 (errmsg("replication slot \"%s\" was not created in this database",
376 						 NameStr(slot->data.name)))));
377 
378 	if (start_lsn == InvalidXLogRecPtr)
379 	{
380 		/* continue from last position */
381 		start_lsn = slot->data.confirmed_flush;
382 	}
383 	else if (start_lsn < slot->data.confirmed_flush)
384 	{
385 		/*
386 		 * It might seem like we should error out in this case, but it's
387 		 * pretty common for a client to acknowledge a LSN it doesn't have to
388 		 * do anything for, and thus didn't store persistently, because the
389 		 * xlog records didn't result in anything relevant for logical
390 		 * decoding. Clients have to be able to do that to support synchronous
391 		 * replication.
392 		 */
393 		elog(DEBUG1, "cannot stream from %X/%X, minimum is %X/%X, forwarding",
394 			 (uint32) (start_lsn >> 32), (uint32) start_lsn,
395 			 (uint32) (slot->data.confirmed_flush >> 32),
396 			 (uint32) slot->data.confirmed_flush);
397 
398 		start_lsn = slot->data.confirmed_flush;
399 	}
400 
401 	ctx = StartupDecodingContext(output_plugin_options,
402 								 start_lsn, InvalidTransactionId, false,
403 								 read_page, prepare_write, do_write,
404 								 update_progress);
405 
406 	/* call output plugin initialization callback */
407 	old_context = MemoryContextSwitchTo(ctx->context);
408 	if (ctx->callbacks.startup_cb != NULL)
409 		startup_cb_wrapper(ctx, &ctx->options, false);
410 	MemoryContextSwitchTo(old_context);
411 
412 	ereport(LOG,
413 			(errmsg("starting logical decoding for slot \"%s\"",
414 					NameStr(slot->data.name)),
415 			 errdetail("streaming transactions committing after %X/%X, reading WAL from %X/%X",
416 					   (uint32) (slot->data.confirmed_flush >> 32),
417 					   (uint32) slot->data.confirmed_flush,
418 					   (uint32) (slot->data.restart_lsn >> 32),
419 					   (uint32) slot->data.restart_lsn)));
420 
421 	return ctx;
422 }
423 
424 /*
425  * Returns true if a consistent initial decoding snapshot has been built.
426  */
427 bool
DecodingContextReady(LogicalDecodingContext * ctx)428 DecodingContextReady(LogicalDecodingContext *ctx)
429 {
430 	return SnapBuildCurrentState(ctx->snapshot_builder) == SNAPBUILD_CONSISTENT;
431 }
432 
433 /*
434  * Read from the decoding slot, until it is ready to start extracting changes.
435  */
436 void
DecodingContextFindStartpoint(LogicalDecodingContext * ctx)437 DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
438 {
439 	XLogRecPtr	startptr;
440 
441 	/* Initialize from where to start reading WAL. */
442 	startptr = ctx->slot->data.restart_lsn;
443 
444 	elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%X",
445 		 (uint32) (ctx->slot->data.restart_lsn >> 32),
446 		 (uint32) ctx->slot->data.restart_lsn);
447 
448 	/* Wait for a consistent starting point */
449 	for (;;)
450 	{
451 		XLogRecord *record;
452 		char	   *err = NULL;
453 
454 		/* the read_page callback waits for new WAL */
455 		record = XLogReadRecord(ctx->reader, startptr, &err);
456 		if (err)
457 			elog(ERROR, "%s", err);
458 		if (!record)
459 			elog(ERROR, "no record found"); /* shouldn't happen */
460 
461 		startptr = InvalidXLogRecPtr;
462 
463 		LogicalDecodingProcessRecord(ctx, ctx->reader);
464 
465 		/* only continue till we found a consistent spot */
466 		if (DecodingContextReady(ctx))
467 			break;
468 
469 		CHECK_FOR_INTERRUPTS();
470 	}
471 
472 	ctx->slot->data.confirmed_flush = ctx->reader->EndRecPtr;
473 }
474 
475 /*
476  * Free a previously allocated decoding context, invoking the shutdown
477  * callback if necessary.
478  */
479 void
FreeDecodingContext(LogicalDecodingContext * ctx)480 FreeDecodingContext(LogicalDecodingContext *ctx)
481 {
482 	if (ctx->callbacks.shutdown_cb != NULL)
483 		shutdown_cb_wrapper(ctx);
484 
485 	ReorderBufferFree(ctx->reorder);
486 	FreeSnapshotBuilder(ctx->snapshot_builder);
487 	XLogReaderFree(ctx->reader);
488 	MemoryContextDelete(ctx->context);
489 }
490 
491 /*
492  * Prepare a write using the context's output routine.
493  */
494 void
OutputPluginPrepareWrite(struct LogicalDecodingContext * ctx,bool last_write)495 OutputPluginPrepareWrite(struct LogicalDecodingContext *ctx, bool last_write)
496 {
497 	if (!ctx->accept_writes)
498 		elog(ERROR, "writes are only accepted in commit, begin and change callbacks");
499 
500 	ctx->prepare_write(ctx, ctx->write_location, ctx->write_xid, last_write);
501 	ctx->prepared_write = true;
502 }
503 
504 /*
505  * Perform a write using the context's output routine.
506  */
507 void
OutputPluginWrite(struct LogicalDecodingContext * ctx,bool last_write)508 OutputPluginWrite(struct LogicalDecodingContext *ctx, bool last_write)
509 {
510 	if (!ctx->prepared_write)
511 		elog(ERROR, "OutputPluginPrepareWrite needs to be called before OutputPluginWrite");
512 
513 	ctx->write(ctx, ctx->write_location, ctx->write_xid, last_write);
514 	ctx->prepared_write = false;
515 }
516 
517 /*
518  * Update progress tracking (if supported).
519  */
520 void
OutputPluginUpdateProgress(struct LogicalDecodingContext * ctx)521 OutputPluginUpdateProgress(struct LogicalDecodingContext *ctx)
522 {
523 	if (!ctx->update_progress)
524 		return;
525 
526 	ctx->update_progress(ctx, ctx->write_location, ctx->write_xid);
527 }
528 
529 /*
530  * Load the output plugin, lookup its output plugin init function, and check
531  * that it provides the required callbacks.
532  */
533 static void
LoadOutputPlugin(OutputPluginCallbacks * callbacks,char * plugin)534 LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin)
535 {
536 	LogicalOutputPluginInit plugin_init;
537 
538 	plugin_init = (LogicalOutputPluginInit)
539 		load_external_function(plugin, "_PG_output_plugin_init", false, NULL);
540 
541 	if (plugin_init == NULL)
542 		elog(ERROR, "output plugins have to declare the _PG_output_plugin_init symbol");
543 
544 	/* ask the output plugin to fill the callback struct */
545 	plugin_init(callbacks);
546 
547 	if (callbacks->begin_cb == NULL)
548 		elog(ERROR, "output plugins have to register a begin callback");
549 	if (callbacks->change_cb == NULL)
550 		elog(ERROR, "output plugins have to register a change callback");
551 	if (callbacks->commit_cb == NULL)
552 		elog(ERROR, "output plugins have to register a commit callback");
553 }
554 
555 static void
output_plugin_error_callback(void * arg)556 output_plugin_error_callback(void *arg)
557 {
558 	LogicalErrorCallbackState *state = (LogicalErrorCallbackState *) arg;
559 
560 	/* not all callbacks have an associated LSN  */
561 	if (state->report_location != InvalidXLogRecPtr)
562 		errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%X",
563 				   NameStr(state->ctx->slot->data.name),
564 				   NameStr(state->ctx->slot->data.plugin),
565 				   state->callback_name,
566 				   (uint32) (state->report_location >> 32),
567 				   (uint32) state->report_location);
568 	else
569 		errcontext("slot \"%s\", output plugin \"%s\", in the %s callback",
570 				   NameStr(state->ctx->slot->data.name),
571 				   NameStr(state->ctx->slot->data.plugin),
572 				   state->callback_name);
573 }
574 
575 static void
startup_cb_wrapper(LogicalDecodingContext * ctx,OutputPluginOptions * opt,bool is_init)576 startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool is_init)
577 {
578 	LogicalErrorCallbackState state;
579 	ErrorContextCallback errcallback;
580 
581 	/* Push callback + info on the error context stack */
582 	state.ctx = ctx;
583 	state.callback_name = "startup";
584 	state.report_location = InvalidXLogRecPtr;
585 	errcallback.callback = output_plugin_error_callback;
586 	errcallback.arg = (void *) &state;
587 	errcallback.previous = error_context_stack;
588 	error_context_stack = &errcallback;
589 
590 	/* set output state */
591 	ctx->accept_writes = false;
592 
593 	/* do the actual work: call callback */
594 	ctx->callbacks.startup_cb(ctx, opt, is_init);
595 
596 	/* Pop the error context stack */
597 	error_context_stack = errcallback.previous;
598 }
599 
600 static void
shutdown_cb_wrapper(LogicalDecodingContext * ctx)601 shutdown_cb_wrapper(LogicalDecodingContext *ctx)
602 {
603 	LogicalErrorCallbackState state;
604 	ErrorContextCallback errcallback;
605 
606 	/* Push callback + info on the error context stack */
607 	state.ctx = ctx;
608 	state.callback_name = "shutdown";
609 	state.report_location = InvalidXLogRecPtr;
610 	errcallback.callback = output_plugin_error_callback;
611 	errcallback.arg = (void *) &state;
612 	errcallback.previous = error_context_stack;
613 	error_context_stack = &errcallback;
614 
615 	/* set output state */
616 	ctx->accept_writes = false;
617 
618 	/* do the actual work: call callback */
619 	ctx->callbacks.shutdown_cb(ctx);
620 
621 	/* Pop the error context stack */
622 	error_context_stack = errcallback.previous;
623 }
624 
625 
626 /*
627  * Callbacks for ReorderBuffer which add in some more information and then call
628  * output_plugin.h plugins.
629  */
630 static void
begin_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn)631 begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn)
632 {
633 	LogicalDecodingContext *ctx = cache->private_data;
634 	LogicalErrorCallbackState state;
635 	ErrorContextCallback errcallback;
636 
637 	/* Push callback + info on the error context stack */
638 	state.ctx = ctx;
639 	state.callback_name = "begin";
640 	state.report_location = txn->first_lsn;
641 	errcallback.callback = output_plugin_error_callback;
642 	errcallback.arg = (void *) &state;
643 	errcallback.previous = error_context_stack;
644 	error_context_stack = &errcallback;
645 
646 	/* set output state */
647 	ctx->accept_writes = true;
648 	ctx->write_xid = txn->xid;
649 	ctx->write_location = txn->first_lsn;
650 
651 	/* do the actual work: call callback */
652 	ctx->callbacks.begin_cb(ctx, txn);
653 
654 	/* Pop the error context stack */
655 	error_context_stack = errcallback.previous;
656 }
657 
658 static void
commit_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,XLogRecPtr commit_lsn)659 commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
660 				  XLogRecPtr commit_lsn)
661 {
662 	LogicalDecodingContext *ctx = cache->private_data;
663 	LogicalErrorCallbackState state;
664 	ErrorContextCallback errcallback;
665 
666 	/* Push callback + info on the error context stack */
667 	state.ctx = ctx;
668 	state.callback_name = "commit";
669 	state.report_location = txn->final_lsn; /* beginning of commit record */
670 	errcallback.callback = output_plugin_error_callback;
671 	errcallback.arg = (void *) &state;
672 	errcallback.previous = error_context_stack;
673 	error_context_stack = &errcallback;
674 
675 	/* set output state */
676 	ctx->accept_writes = true;
677 	ctx->write_xid = txn->xid;
678 	ctx->write_location = txn->end_lsn; /* points to the end of the record */
679 
680 	/* do the actual work: call callback */
681 	ctx->callbacks.commit_cb(ctx, txn, commit_lsn);
682 
683 	/* Pop the error context stack */
684 	error_context_stack = errcallback.previous;
685 }
686 
687 static void
change_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,Relation relation,ReorderBufferChange * change)688 change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
689 				  Relation relation, ReorderBufferChange *change)
690 {
691 	LogicalDecodingContext *ctx = cache->private_data;
692 	LogicalErrorCallbackState state;
693 	ErrorContextCallback errcallback;
694 
695 	/* Push callback + info on the error context stack */
696 	state.ctx = ctx;
697 	state.callback_name = "change";
698 	state.report_location = change->lsn;
699 	errcallback.callback = output_plugin_error_callback;
700 	errcallback.arg = (void *) &state;
701 	errcallback.previous = error_context_stack;
702 	error_context_stack = &errcallback;
703 
704 	/* set output state */
705 	ctx->accept_writes = true;
706 	ctx->write_xid = txn->xid;
707 
708 	/*
709 	 * report this change's lsn so replies from clients can give an up2date
710 	 * answer. This won't ever be enough (and shouldn't be!) to confirm
711 	 * receipt of this transaction, but it might allow another transaction's
712 	 * commit to be confirmed with one message.
713 	 */
714 	ctx->write_location = change->lsn;
715 
716 	ctx->callbacks.change_cb(ctx, txn, relation, change);
717 
718 	/* Pop the error context stack */
719 	error_context_stack = errcallback.previous;
720 }
721 
722 bool
filter_by_origin_cb_wrapper(LogicalDecodingContext * ctx,RepOriginId origin_id)723 filter_by_origin_cb_wrapper(LogicalDecodingContext *ctx, RepOriginId origin_id)
724 {
725 	LogicalErrorCallbackState state;
726 	ErrorContextCallback errcallback;
727 	bool		ret;
728 
729 	/* Push callback + info on the error context stack */
730 	state.ctx = ctx;
731 	state.callback_name = "filter_by_origin";
732 	state.report_location = InvalidXLogRecPtr;
733 	errcallback.callback = output_plugin_error_callback;
734 	errcallback.arg = (void *) &state;
735 	errcallback.previous = error_context_stack;
736 	error_context_stack = &errcallback;
737 
738 	/* set output state */
739 	ctx->accept_writes = false;
740 
741 	/* do the actual work: call callback */
742 	ret = ctx->callbacks.filter_by_origin_cb(ctx, origin_id);
743 
744 	/* Pop the error context stack */
745 	error_context_stack = errcallback.previous;
746 
747 	return ret;
748 }
749 
750 static void
message_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,XLogRecPtr message_lsn,bool transactional,const char * prefix,Size message_size,const char * message)751 message_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
752 				   XLogRecPtr message_lsn, bool transactional,
753 				   const char *prefix, Size message_size, const char *message)
754 {
755 	LogicalDecodingContext *ctx = cache->private_data;
756 	LogicalErrorCallbackState state;
757 	ErrorContextCallback errcallback;
758 
759 	if (ctx->callbacks.message_cb == NULL)
760 		return;
761 
762 	/* Push callback + info on the error context stack */
763 	state.ctx = ctx;
764 	state.callback_name = "message";
765 	state.report_location = message_lsn;
766 	errcallback.callback = output_plugin_error_callback;
767 	errcallback.arg = (void *) &state;
768 	errcallback.previous = error_context_stack;
769 	error_context_stack = &errcallback;
770 
771 	/* set output state */
772 	ctx->accept_writes = true;
773 	ctx->write_xid = txn != NULL ? txn->xid : InvalidTransactionId;
774 	ctx->write_location = message_lsn;
775 
776 	/* do the actual work: call callback */
777 	ctx->callbacks.message_cb(ctx, txn, message_lsn, transactional, prefix,
778 							  message_size, message);
779 
780 	/* Pop the error context stack */
781 	error_context_stack = errcallback.previous;
782 }
783 
784 /*
785  * Set the required catalog xmin horizon for historic snapshots in the current
786  * replication slot.
787  *
788  * Note that in the most cases, we won't be able to immediately use the xmin
789  * to increase the xmin horizon: we need to wait till the client has confirmed
790  * receiving current_lsn with LogicalConfirmReceivedLocation().
791  */
792 void
LogicalIncreaseXminForSlot(XLogRecPtr current_lsn,TransactionId xmin)793 LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
794 {
795 	bool		updated_xmin = false;
796 	ReplicationSlot *slot;
797 
798 	slot = MyReplicationSlot;
799 
800 	Assert(slot != NULL);
801 
802 	SpinLockAcquire(&slot->mutex);
803 
804 	/*
805 	 * don't overwrite if we already have a newer xmin. This can happen if we
806 	 * restart decoding in a slot.
807 	 */
808 	if (TransactionIdPrecedesOrEquals(xmin, slot->data.catalog_xmin))
809 	{
810 	}
811 
812 	/*
813 	 * If the client has already confirmed up to this lsn, we directly can
814 	 * mark this as accepted. This can happen if we restart decoding in a
815 	 * slot.
816 	 */
817 	else if (current_lsn <= slot->data.confirmed_flush)
818 	{
819 		slot->candidate_catalog_xmin = xmin;
820 		slot->candidate_xmin_lsn = current_lsn;
821 
822 		/* our candidate can directly be used */
823 		updated_xmin = true;
824 	}
825 
826 	/*
827 	 * Only increase if the previous values have been applied, otherwise we
828 	 * might never end up updating if the receiver acks too slowly.
829 	 */
830 	else if (slot->candidate_xmin_lsn == InvalidXLogRecPtr)
831 	{
832 		slot->candidate_catalog_xmin = xmin;
833 		slot->candidate_xmin_lsn = current_lsn;
834 	}
835 	SpinLockRelease(&slot->mutex);
836 
837 	/* candidate already valid with the current flush position, apply */
838 	if (updated_xmin)
839 		LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
840 }
841 
842 /*
843  * Mark the minimal LSN (restart_lsn) we need to read to replay all
844  * transactions that have not yet committed at current_lsn.
845  *
846  * Just like IncreaseRestartDecodingForSlot this only takes effect when the
847  * client has confirmed to have received current_lsn.
848  */
849 void
LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn,XLogRecPtr restart_lsn)850 LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart_lsn)
851 {
852 	bool		updated_lsn = false;
853 	ReplicationSlot *slot;
854 
855 	slot = MyReplicationSlot;
856 
857 	Assert(slot != NULL);
858 	Assert(restart_lsn != InvalidXLogRecPtr);
859 	Assert(current_lsn != InvalidXLogRecPtr);
860 
861 	SpinLockAcquire(&slot->mutex);
862 
863 	/* don't overwrite if have a newer restart lsn */
864 	if (restart_lsn <= slot->data.restart_lsn)
865 	{
866 	}
867 
868 	/*
869 	 * We might have already flushed far enough to directly accept this lsn,
870 	 * in this case there is no need to check for existing candidate LSNs
871 	 */
872 	else if (current_lsn <= slot->data.confirmed_flush)
873 	{
874 		slot->candidate_restart_valid = current_lsn;
875 		slot->candidate_restart_lsn = restart_lsn;
876 
877 		/* our candidate can directly be used */
878 		updated_lsn = true;
879 	}
880 
881 	/*
882 	 * Only increase if the previous values have been applied, otherwise we
883 	 * might never end up updating if the receiver acks too slowly. A missed
884 	 * value here will just cause some extra effort after reconnecting.
885 	 */
886 	if (slot->candidate_restart_valid == InvalidXLogRecPtr)
887 	{
888 		slot->candidate_restart_valid = current_lsn;
889 		slot->candidate_restart_lsn = restart_lsn;
890 		SpinLockRelease(&slot->mutex);
891 
892 		elog(DEBUG1, "got new restart lsn %X/%X at %X/%X",
893 			 (uint32) (restart_lsn >> 32), (uint32) restart_lsn,
894 			 (uint32) (current_lsn >> 32), (uint32) current_lsn);
895 	}
896 	else
897 	{
898 		XLogRecPtr	candidate_restart_lsn;
899 		XLogRecPtr	candidate_restart_valid;
900 		XLogRecPtr	confirmed_flush;
901 
902 		candidate_restart_lsn = slot->candidate_restart_lsn;
903 		candidate_restart_valid = slot->candidate_restart_valid;
904 		confirmed_flush = slot->data.confirmed_flush;
905 		SpinLockRelease(&slot->mutex);
906 
907 		elog(DEBUG1, "failed to increase restart lsn: proposed %X/%X, after %X/%X, current candidate %X/%X, current after %X/%X, flushed up to %X/%X",
908 			 (uint32) (restart_lsn >> 32), (uint32) restart_lsn,
909 			 (uint32) (current_lsn >> 32), (uint32) current_lsn,
910 			 (uint32) (candidate_restart_lsn >> 32),
911 			 (uint32) candidate_restart_lsn,
912 			 (uint32) (candidate_restart_valid >> 32),
913 			 (uint32) candidate_restart_valid,
914 			 (uint32) (confirmed_flush >> 32),
915 			 (uint32) confirmed_flush);
916 	}
917 
918 	/* candidates are already valid with the current flush position, apply */
919 	if (updated_lsn)
920 		LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
921 }
922 
923 /*
924  * Handle a consumer's confirmation having received all changes up to lsn.
925  */
926 void
LogicalConfirmReceivedLocation(XLogRecPtr lsn)927 LogicalConfirmReceivedLocation(XLogRecPtr lsn)
928 {
929 	Assert(lsn != InvalidXLogRecPtr);
930 
931 	/* Do an unlocked check for candidate_lsn first. */
932 	if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr ||
933 		MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr)
934 	{
935 		bool		updated_xmin = false;
936 		bool		updated_restart = false;
937 
938 		SpinLockAcquire(&MyReplicationSlot->mutex);
939 
940 		MyReplicationSlot->data.confirmed_flush = lsn;
941 
942 		/* if we're past the location required for bumping xmin, do so */
943 		if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr &&
944 			MyReplicationSlot->candidate_xmin_lsn <= lsn)
945 		{
946 			/*
947 			 * We have to write the changed xmin to disk *before* we change
948 			 * the in-memory value, otherwise after a crash we wouldn't know
949 			 * that some catalog tuples might have been removed already.
950 			 *
951 			 * Ensure that by first writing to ->xmin and only update
952 			 * ->effective_xmin once the new state is synced to disk. After a
953 			 * crash ->effective_xmin is set to ->xmin.
954 			 */
955 			if (TransactionIdIsValid(MyReplicationSlot->candidate_catalog_xmin) &&
956 				MyReplicationSlot->data.catalog_xmin != MyReplicationSlot->candidate_catalog_xmin)
957 			{
958 				MyReplicationSlot->data.catalog_xmin = MyReplicationSlot->candidate_catalog_xmin;
959 				MyReplicationSlot->candidate_catalog_xmin = InvalidTransactionId;
960 				MyReplicationSlot->candidate_xmin_lsn = InvalidXLogRecPtr;
961 				updated_xmin = true;
962 			}
963 		}
964 
965 		if (MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr &&
966 			MyReplicationSlot->candidate_restart_valid <= lsn)
967 		{
968 			Assert(MyReplicationSlot->candidate_restart_lsn != InvalidXLogRecPtr);
969 
970 			MyReplicationSlot->data.restart_lsn = MyReplicationSlot->candidate_restart_lsn;
971 			MyReplicationSlot->candidate_restart_lsn = InvalidXLogRecPtr;
972 			MyReplicationSlot->candidate_restart_valid = InvalidXLogRecPtr;
973 			updated_restart = true;
974 		}
975 
976 		SpinLockRelease(&MyReplicationSlot->mutex);
977 
978 		/* first write new xmin to disk, so we know what's up after a crash */
979 		if (updated_xmin || updated_restart)
980 		{
981 			ReplicationSlotMarkDirty();
982 			ReplicationSlotSave();
983 			elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart);
984 		}
985 
986 		/*
987 		 * Now the new xmin is safely on disk, we can let the global value
988 		 * advance. We do not take ProcArrayLock or similar since we only
989 		 * advance xmin here and there's not much harm done by a concurrent
990 		 * computation missing that.
991 		 */
992 		if (updated_xmin)
993 		{
994 			SpinLockAcquire(&MyReplicationSlot->mutex);
995 			MyReplicationSlot->effective_catalog_xmin = MyReplicationSlot->data.catalog_xmin;
996 			SpinLockRelease(&MyReplicationSlot->mutex);
997 
998 			ReplicationSlotsComputeRequiredXmin(false);
999 			ReplicationSlotsComputeRequiredLSN();
1000 		}
1001 	}
1002 	else
1003 	{
1004 		SpinLockAcquire(&MyReplicationSlot->mutex);
1005 		MyReplicationSlot->data.confirmed_flush = lsn;
1006 		SpinLockRelease(&MyReplicationSlot->mutex);
1007 	}
1008 }
1009