1 /*-------------------------------------------------------------------------
2 * logical.c
3 * PostgreSQL logical decoding coordination
4 *
5 * Copyright (c) 2012-2020, PostgreSQL Global Development Group
6 *
7 * IDENTIFICATION
8 * src/backend/replication/logical/logical.c
9 *
10 * NOTES
11 * This file coordinates interaction between the various modules that
12 * together provide logical decoding, primarily by providing so
13 * called LogicalDecodingContexts. The goal is to encapsulate most of the
14 * internal complexity for consumers of logical decoding, so they can
15 * create and consume a changestream with a low amount of code. Builtin
16 * consumers are the walsender and SQL SRF interface, but it's possible to
17 * add further ones without changing core code, e.g. to consume changes in
18 * a bgworker.
19 *
20 * The idea is that a consumer provides three callbacks, one to read WAL,
21 * one to prepare a data write, and a final one for actually writing since
22 * their implementation depends on the type of consumer. Check
23 * logicalfuncs.c for an example implementation of a fairly simple consumer
24 * and an implementation of a WAL reading callback that's suitable for
25 * simple consumers.
26 *-------------------------------------------------------------------------
27 */
28
29 #include "postgres.h"
30
31 #include "access/xact.h"
32 #include "access/xlog_internal.h"
33 #include "fmgr.h"
34 #include "miscadmin.h"
35 #include "replication/decode.h"
36 #include "replication/logical.h"
37 #include "replication/origin.h"
38 #include "replication/reorderbuffer.h"
39 #include "replication/snapbuild.h"
40 #include "storage/proc.h"
41 #include "storage/procarray.h"
42 #include "utils/memutils.h"
43
44 /* data for errcontext callback */
45 typedef struct LogicalErrorCallbackState
46 {
47 LogicalDecodingContext *ctx;
48 const char *callback_name;
49 XLogRecPtr report_location;
50 } LogicalErrorCallbackState;
51
52 /* wrappers around output plugin callbacks */
53 static void output_plugin_error_callback(void *arg);
54 static void startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
55 bool is_init);
56 static void shutdown_cb_wrapper(LogicalDecodingContext *ctx);
57 static void begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn);
58 static void commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
59 XLogRecPtr commit_lsn);
60 static void change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
61 Relation relation, ReorderBufferChange *change);
62 static void truncate_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
63 int nrelations, Relation relations[], ReorderBufferChange *change);
64 static void message_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
65 XLogRecPtr message_lsn, bool transactional,
66 const char *prefix, Size message_size, const char *message);
67
68 static void LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin);
69
70 /*
71 * Make sure the current settings & environment are capable of doing logical
72 * decoding.
73 */
74 void
CheckLogicalDecodingRequirements(void)75 CheckLogicalDecodingRequirements(void)
76 {
77 CheckSlotRequirements();
78
79 /*
80 * NB: Adding a new requirement likely means that RestoreSlotFromDisk()
81 * needs the same check.
82 */
83
84 if (wal_level < WAL_LEVEL_LOGICAL)
85 ereport(ERROR,
86 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
87 errmsg("logical decoding requires wal_level >= logical")));
88
89 if (MyDatabaseId == InvalidOid)
90 ereport(ERROR,
91 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
92 errmsg("logical decoding requires a database connection")));
93
94 /* ----
95 * TODO: We got to change that someday soon...
96 *
97 * There's basically three things missing to allow this:
98 * 1) We need to be able to correctly and quickly identify the timeline a
99 * LSN belongs to
100 * 2) We need to force hot_standby_feedback to be enabled at all times so
101 * the primary cannot remove rows we need.
102 * 3) support dropping replication slots referring to a database, in
103 * dbase_redo. There can't be any active ones due to HS recovery
104 * conflicts, so that should be relatively easy.
105 * ----
106 */
107 if (RecoveryInProgress())
108 ereport(ERROR,
109 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
110 errmsg("logical decoding cannot be used while in recovery")));
111 }
112
113 /*
114 * Helper function for CreateInitDecodingContext() and
115 * CreateDecodingContext() performing common tasks.
116 */
117 static LogicalDecodingContext *
StartupDecodingContext(List * output_plugin_options,XLogRecPtr start_lsn,TransactionId xmin_horizon,bool need_full_snapshot,bool fast_forward,XLogReaderRoutine * xl_routine,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)118 StartupDecodingContext(List *output_plugin_options,
119 XLogRecPtr start_lsn,
120 TransactionId xmin_horizon,
121 bool need_full_snapshot,
122 bool fast_forward,
123 XLogReaderRoutine *xl_routine,
124 LogicalOutputPluginWriterPrepareWrite prepare_write,
125 LogicalOutputPluginWriterWrite do_write,
126 LogicalOutputPluginWriterUpdateProgress update_progress)
127 {
128 ReplicationSlot *slot;
129 MemoryContext context,
130 old_context;
131 LogicalDecodingContext *ctx;
132
133 /* shorter lines... */
134 slot = MyReplicationSlot;
135
136 context = AllocSetContextCreate(CurrentMemoryContext,
137 "Logical decoding context",
138 ALLOCSET_DEFAULT_SIZES);
139 old_context = MemoryContextSwitchTo(context);
140 ctx = palloc0(sizeof(LogicalDecodingContext));
141
142 ctx->context = context;
143
144 /*
145 * (re-)load output plugins, so we detect a bad (removed) output plugin
146 * now.
147 */
148 if (!fast_forward)
149 LoadOutputPlugin(&ctx->callbacks, NameStr(slot->data.plugin));
150
151 /*
152 * Now that the slot's xmin has been set, we can announce ourselves as a
153 * logical decoding backend which doesn't need to be checked individually
154 * when computing the xmin horizon because the xmin is enforced via
155 * replication slots.
156 *
157 * We can only do so if we're outside of a transaction (i.e. the case when
158 * streaming changes via walsender), otherwise an already setup
159 * snapshot/xid would end up being ignored. That's not a particularly
160 * bothersome restriction since the SQL interface can't be used for
161 * streaming anyway.
162 */
163 if (!IsTransactionOrTransactionBlock())
164 {
165 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
166 MyPgXact->vacuumFlags |= PROC_IN_LOGICAL_DECODING;
167 LWLockRelease(ProcArrayLock);
168 }
169
170 ctx->slot = slot;
171
172 ctx->reader = XLogReaderAllocate(wal_segment_size, NULL, xl_routine, ctx);
173 if (!ctx->reader)
174 ereport(ERROR,
175 (errcode(ERRCODE_OUT_OF_MEMORY),
176 errmsg("out of memory")));
177
178 ctx->reorder = ReorderBufferAllocate();
179 ctx->snapshot_builder =
180 AllocateSnapshotBuilder(ctx->reorder, xmin_horizon, start_lsn,
181 need_full_snapshot);
182
183 ctx->reorder->private_data = ctx;
184
185 /* wrap output plugin callbacks, so we can add error context information */
186 ctx->reorder->begin = begin_cb_wrapper;
187 ctx->reorder->apply_change = change_cb_wrapper;
188 ctx->reorder->apply_truncate = truncate_cb_wrapper;
189 ctx->reorder->commit = commit_cb_wrapper;
190 ctx->reorder->message = message_cb_wrapper;
191
192 ctx->out = makeStringInfo();
193 ctx->prepare_write = prepare_write;
194 ctx->write = do_write;
195 ctx->update_progress = update_progress;
196
197 ctx->output_plugin_options = output_plugin_options;
198
199 ctx->fast_forward = fast_forward;
200
201 MemoryContextSwitchTo(old_context);
202
203 return ctx;
204 }
205
206 /*
207 * Create a new decoding context, for a new logical slot.
208 *
209 * plugin -- contains the name of the output plugin
210 * output_plugin_options -- contains options passed to the output plugin
211 * need_full_snapshot -- if true, must obtain a snapshot able to read all
212 * tables; if false, one that can read only catalogs is acceptable.
213 * restart_lsn -- if given as invalid, it's this routine's responsibility to
214 * mark WAL as reserved by setting a convenient restart_lsn for the slot.
215 * Otherwise, we set for decoding to start from the given LSN without
216 * marking WAL reserved beforehand. In that scenario, it's up to the
217 * caller to guarantee that WAL remains available.
218 * xl_routine -- XLogReaderRoutine for underlying XLogReader
219 * prepare_write, do_write, update_progress --
220 * callbacks that perform the use-case dependent, actual, work.
221 *
222 * Needs to be called while in a memory context that's at least as long lived
223 * as the decoding context because further memory contexts will be created
224 * inside it.
225 *
226 * Returns an initialized decoding context after calling the output plugin's
227 * startup function.
228 */
229 LogicalDecodingContext *
CreateInitDecodingContext(char * plugin,List * output_plugin_options,bool need_full_snapshot,XLogRecPtr restart_lsn,XLogReaderRoutine * xl_routine,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)230 CreateInitDecodingContext(char *plugin,
231 List *output_plugin_options,
232 bool need_full_snapshot,
233 XLogRecPtr restart_lsn,
234 XLogReaderRoutine *xl_routine,
235 LogicalOutputPluginWriterPrepareWrite prepare_write,
236 LogicalOutputPluginWriterWrite do_write,
237 LogicalOutputPluginWriterUpdateProgress update_progress)
238 {
239 TransactionId xmin_horizon = InvalidTransactionId;
240 ReplicationSlot *slot;
241 LogicalDecodingContext *ctx;
242 MemoryContext old_context;
243
244 /* shorter lines... */
245 slot = MyReplicationSlot;
246
247 /* first some sanity checks that are unlikely to be violated */
248 if (slot == NULL)
249 elog(ERROR, "cannot perform logical decoding without an acquired slot");
250
251 if (plugin == NULL)
252 elog(ERROR, "cannot initialize logical decoding without a specified plugin");
253
254 /* Make sure the passed slot is suitable. These are user facing errors. */
255 if (SlotIsPhysical(slot))
256 ereport(ERROR,
257 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
258 errmsg("cannot use physical replication slot for logical decoding")));
259
260 if (slot->data.database != MyDatabaseId)
261 ereport(ERROR,
262 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
263 errmsg("replication slot \"%s\" was not created in this database",
264 NameStr(slot->data.name))));
265
266 if (IsTransactionState() &&
267 GetTopTransactionIdIfAny() != InvalidTransactionId)
268 ereport(ERROR,
269 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
270 errmsg("cannot create logical replication slot in transaction that has performed writes")));
271
272 /* register output plugin name with slot */
273 SpinLockAcquire(&slot->mutex);
274 StrNCpy(NameStr(slot->data.plugin), plugin, NAMEDATALEN);
275 SpinLockRelease(&slot->mutex);
276
277 if (XLogRecPtrIsInvalid(restart_lsn))
278 ReplicationSlotReserveWal();
279 else
280 {
281 SpinLockAcquire(&slot->mutex);
282 slot->data.restart_lsn = restart_lsn;
283 SpinLockRelease(&slot->mutex);
284 }
285
286 /* ----
287 * This is a bit tricky: We need to determine a safe xmin horizon to start
288 * decoding from, to avoid starting from a running xacts record referring
289 * to xids whose rows have been vacuumed or pruned
290 * already. GetOldestSafeDecodingTransactionId() returns such a value, but
291 * without further interlock its return value might immediately be out of
292 * date.
293 *
294 * So we have to acquire the ProcArrayLock to prevent computation of new
295 * xmin horizons by other backends, get the safe decoding xid, and inform
296 * the slot machinery about the new limit. Once that's done the
297 * ProcArrayLock can be released as the slot machinery now is
298 * protecting against vacuum.
299 *
300 * Note that, temporarily, the data, not just the catalog, xmin has to be
301 * reserved if a data snapshot is to be exported. Otherwise the initial
302 * data snapshot created here is not guaranteed to be valid. After that
303 * the data xmin doesn't need to be managed anymore and the global xmin
304 * should be recomputed. As we are fine with losing the pegged data xmin
305 * after crash - no chance a snapshot would get exported anymore - we can
306 * get away with just setting the slot's
307 * effective_xmin. ReplicationSlotRelease will reset it again.
308 *
309 * ----
310 */
311 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
312
313 xmin_horizon = GetOldestSafeDecodingTransactionId(!need_full_snapshot);
314
315 SpinLockAcquire(&slot->mutex);
316 slot->effective_catalog_xmin = xmin_horizon;
317 slot->data.catalog_xmin = xmin_horizon;
318 if (need_full_snapshot)
319 slot->effective_xmin = xmin_horizon;
320 SpinLockRelease(&slot->mutex);
321
322 ReplicationSlotsComputeRequiredXmin(true);
323
324 LWLockRelease(ProcArrayLock);
325
326 ReplicationSlotMarkDirty();
327 ReplicationSlotSave();
328
329 ctx = StartupDecodingContext(NIL, restart_lsn, xmin_horizon,
330 need_full_snapshot, false,
331 xl_routine, prepare_write, do_write,
332 update_progress);
333
334 /* call output plugin initialization callback */
335 old_context = MemoryContextSwitchTo(ctx->context);
336 if (ctx->callbacks.startup_cb != NULL)
337 startup_cb_wrapper(ctx, &ctx->options, true);
338 MemoryContextSwitchTo(old_context);
339
340 ctx->reorder->output_rewrites = ctx->options.receive_rewrites;
341
342 return ctx;
343 }
344
345 /*
346 * Create a new decoding context, for a logical slot that has previously been
347 * used already.
348 *
349 * start_lsn
350 * The LSN at which to start decoding. If InvalidXLogRecPtr, restart
351 * from the slot's confirmed_flush; otherwise, start from the specified
352 * location (but move it forwards to confirmed_flush if it's older than
353 * that, see below).
354 *
355 * output_plugin_options
356 * options passed to the output plugin.
357 *
358 * fast_forward
359 * bypass the generation of logical changes.
360 *
361 * xl_routine
362 * XLogReaderRoutine used by underlying xlogreader
363 *
364 * prepare_write, do_write, update_progress
365 * callbacks that have to be filled to perform the use-case dependent,
366 * actual work.
367 *
368 * Needs to be called while in a memory context that's at least as long lived
369 * as the decoding context because further memory contexts will be created
370 * inside it.
371 *
372 * Returns an initialized decoding context after calling the output plugin's
373 * startup function.
374 */
375 LogicalDecodingContext *
CreateDecodingContext(XLogRecPtr start_lsn,List * output_plugin_options,bool fast_forward,XLogReaderRoutine * xl_routine,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)376 CreateDecodingContext(XLogRecPtr start_lsn,
377 List *output_plugin_options,
378 bool fast_forward,
379 XLogReaderRoutine *xl_routine,
380 LogicalOutputPluginWriterPrepareWrite prepare_write,
381 LogicalOutputPluginWriterWrite do_write,
382 LogicalOutputPluginWriterUpdateProgress update_progress)
383 {
384 LogicalDecodingContext *ctx;
385 ReplicationSlot *slot;
386 MemoryContext old_context;
387
388 /* shorter lines... */
389 slot = MyReplicationSlot;
390
391 /* first some sanity checks that are unlikely to be violated */
392 if (slot == NULL)
393 elog(ERROR, "cannot perform logical decoding without an acquired slot");
394
395 /* make sure the passed slot is suitable, these are user facing errors */
396 if (SlotIsPhysical(slot))
397 ereport(ERROR,
398 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
399 errmsg("cannot use physical replication slot for logical decoding")));
400
401 if (slot->data.database != MyDatabaseId)
402 ereport(ERROR,
403 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
404 errmsg("replication slot \"%s\" was not created in this database",
405 NameStr(slot->data.name))));
406
407 if (start_lsn == InvalidXLogRecPtr)
408 {
409 /* continue from last position */
410 start_lsn = slot->data.confirmed_flush;
411 }
412 else if (start_lsn < slot->data.confirmed_flush)
413 {
414 /*
415 * It might seem like we should error out in this case, but it's
416 * pretty common for a client to acknowledge a LSN it doesn't have to
417 * do anything for, and thus didn't store persistently, because the
418 * xlog records didn't result in anything relevant for logical
419 * decoding. Clients have to be able to do that to support synchronous
420 * replication.
421 */
422 elog(DEBUG1, "cannot stream from %X/%X, minimum is %X/%X, forwarding",
423 (uint32) (start_lsn >> 32), (uint32) start_lsn,
424 (uint32) (slot->data.confirmed_flush >> 32),
425 (uint32) slot->data.confirmed_flush);
426
427 start_lsn = slot->data.confirmed_flush;
428 }
429
430 ctx = StartupDecodingContext(output_plugin_options,
431 start_lsn, InvalidTransactionId, false,
432 fast_forward, xl_routine, prepare_write,
433 do_write, update_progress);
434
435 /* call output plugin initialization callback */
436 old_context = MemoryContextSwitchTo(ctx->context);
437 if (ctx->callbacks.startup_cb != NULL)
438 startup_cb_wrapper(ctx, &ctx->options, false);
439 MemoryContextSwitchTo(old_context);
440
441 ctx->reorder->output_rewrites = ctx->options.receive_rewrites;
442
443 ereport(LOG,
444 (errmsg("starting logical decoding for slot \"%s\"",
445 NameStr(slot->data.name)),
446 errdetail("Streaming transactions committing after %X/%X, reading WAL from %X/%X.",
447 (uint32) (slot->data.confirmed_flush >> 32),
448 (uint32) slot->data.confirmed_flush,
449 (uint32) (slot->data.restart_lsn >> 32),
450 (uint32) slot->data.restart_lsn)));
451
452 return ctx;
453 }
454
455 /*
456 * Returns true if a consistent initial decoding snapshot has been built.
457 */
458 bool
DecodingContextReady(LogicalDecodingContext * ctx)459 DecodingContextReady(LogicalDecodingContext *ctx)
460 {
461 return SnapBuildCurrentState(ctx->snapshot_builder) == SNAPBUILD_CONSISTENT;
462 }
463
464 /*
465 * Read from the decoding slot, until it is ready to start extracting changes.
466 */
467 void
DecodingContextFindStartpoint(LogicalDecodingContext * ctx)468 DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
469 {
470 ReplicationSlot *slot = ctx->slot;
471
472 /* Initialize from where to start reading WAL. */
473 XLogBeginRead(ctx->reader, slot->data.restart_lsn);
474
475 elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%X",
476 (uint32) (slot->data.restart_lsn >> 32),
477 (uint32) slot->data.restart_lsn);
478
479 /* Wait for a consistent starting point */
480 for (;;)
481 {
482 XLogRecord *record;
483 char *err = NULL;
484
485 /* the read_page callback waits for new WAL */
486 record = XLogReadRecord(ctx->reader, &err);
487 if (err)
488 elog(ERROR, "%s", err);
489 if (!record)
490 elog(ERROR, "no record found"); /* shouldn't happen */
491
492 LogicalDecodingProcessRecord(ctx, ctx->reader);
493
494 /* only continue till we found a consistent spot */
495 if (DecodingContextReady(ctx))
496 break;
497
498 CHECK_FOR_INTERRUPTS();
499 }
500
501 SpinLockAcquire(&slot->mutex);
502 slot->data.confirmed_flush = ctx->reader->EndRecPtr;
503 SpinLockRelease(&slot->mutex);
504 }
505
506 /*
507 * Free a previously allocated decoding context, invoking the shutdown
508 * callback if necessary.
509 */
510 void
FreeDecodingContext(LogicalDecodingContext * ctx)511 FreeDecodingContext(LogicalDecodingContext *ctx)
512 {
513 if (ctx->callbacks.shutdown_cb != NULL)
514 shutdown_cb_wrapper(ctx);
515
516 ReorderBufferFree(ctx->reorder);
517 FreeSnapshotBuilder(ctx->snapshot_builder);
518 XLogReaderFree(ctx->reader);
519 MemoryContextDelete(ctx->context);
520 }
521
522 /*
523 * Prepare a write using the context's output routine.
524 */
525 void
OutputPluginPrepareWrite(struct LogicalDecodingContext * ctx,bool last_write)526 OutputPluginPrepareWrite(struct LogicalDecodingContext *ctx, bool last_write)
527 {
528 if (!ctx->accept_writes)
529 elog(ERROR, "writes are only accepted in commit, begin and change callbacks");
530
531 ctx->prepare_write(ctx, ctx->write_location, ctx->write_xid, last_write);
532 ctx->prepared_write = true;
533 }
534
535 /*
536 * Perform a write using the context's output routine.
537 */
538 void
OutputPluginWrite(struct LogicalDecodingContext * ctx,bool last_write)539 OutputPluginWrite(struct LogicalDecodingContext *ctx, bool last_write)
540 {
541 if (!ctx->prepared_write)
542 elog(ERROR, "OutputPluginPrepareWrite needs to be called before OutputPluginWrite");
543
544 ctx->write(ctx, ctx->write_location, ctx->write_xid, last_write);
545 ctx->prepared_write = false;
546 }
547
548 /*
549 * Update progress tracking (if supported).
550 */
551 void
OutputPluginUpdateProgress(struct LogicalDecodingContext * ctx)552 OutputPluginUpdateProgress(struct LogicalDecodingContext *ctx)
553 {
554 if (!ctx->update_progress)
555 return;
556
557 ctx->update_progress(ctx, ctx->write_location, ctx->write_xid);
558 }
559
560 /*
561 * Load the output plugin, lookup its output plugin init function, and check
562 * that it provides the required callbacks.
563 */
564 static void
LoadOutputPlugin(OutputPluginCallbacks * callbacks,char * plugin)565 LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin)
566 {
567 LogicalOutputPluginInit plugin_init;
568
569 plugin_init = (LogicalOutputPluginInit)
570 load_external_function(plugin, "_PG_output_plugin_init", false, NULL);
571
572 if (plugin_init == NULL)
573 elog(ERROR, "output plugins have to declare the _PG_output_plugin_init symbol");
574
575 /* ask the output plugin to fill the callback struct */
576 plugin_init(callbacks);
577
578 if (callbacks->begin_cb == NULL)
579 elog(ERROR, "output plugins have to register a begin callback");
580 if (callbacks->change_cb == NULL)
581 elog(ERROR, "output plugins have to register a change callback");
582 if (callbacks->commit_cb == NULL)
583 elog(ERROR, "output plugins have to register a commit callback");
584 }
585
586 static void
output_plugin_error_callback(void * arg)587 output_plugin_error_callback(void *arg)
588 {
589 LogicalErrorCallbackState *state = (LogicalErrorCallbackState *) arg;
590
591 /* not all callbacks have an associated LSN */
592 if (state->report_location != InvalidXLogRecPtr)
593 errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%X",
594 NameStr(state->ctx->slot->data.name),
595 NameStr(state->ctx->slot->data.plugin),
596 state->callback_name,
597 (uint32) (state->report_location >> 32),
598 (uint32) state->report_location);
599 else
600 errcontext("slot \"%s\", output plugin \"%s\", in the %s callback",
601 NameStr(state->ctx->slot->data.name),
602 NameStr(state->ctx->slot->data.plugin),
603 state->callback_name);
604 }
605
606 static void
startup_cb_wrapper(LogicalDecodingContext * ctx,OutputPluginOptions * opt,bool is_init)607 startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool is_init)
608 {
609 LogicalErrorCallbackState state;
610 ErrorContextCallback errcallback;
611
612 Assert(!ctx->fast_forward);
613
614 /* Push callback + info on the error context stack */
615 state.ctx = ctx;
616 state.callback_name = "startup";
617 state.report_location = InvalidXLogRecPtr;
618 errcallback.callback = output_plugin_error_callback;
619 errcallback.arg = (void *) &state;
620 errcallback.previous = error_context_stack;
621 error_context_stack = &errcallback;
622
623 /* set output state */
624 ctx->accept_writes = false;
625
626 /* do the actual work: call callback */
627 ctx->callbacks.startup_cb(ctx, opt, is_init);
628
629 /* Pop the error context stack */
630 error_context_stack = errcallback.previous;
631 }
632
633 static void
shutdown_cb_wrapper(LogicalDecodingContext * ctx)634 shutdown_cb_wrapper(LogicalDecodingContext *ctx)
635 {
636 LogicalErrorCallbackState state;
637 ErrorContextCallback errcallback;
638
639 Assert(!ctx->fast_forward);
640
641 /* Push callback + info on the error context stack */
642 state.ctx = ctx;
643 state.callback_name = "shutdown";
644 state.report_location = InvalidXLogRecPtr;
645 errcallback.callback = output_plugin_error_callback;
646 errcallback.arg = (void *) &state;
647 errcallback.previous = error_context_stack;
648 error_context_stack = &errcallback;
649
650 /* set output state */
651 ctx->accept_writes = false;
652
653 /* do the actual work: call callback */
654 ctx->callbacks.shutdown_cb(ctx);
655
656 /* Pop the error context stack */
657 error_context_stack = errcallback.previous;
658 }
659
660
661 /*
662 * Callbacks for ReorderBuffer which add in some more information and then call
663 * output_plugin.h plugins.
664 */
665 static void
begin_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn)666 begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn)
667 {
668 LogicalDecodingContext *ctx = cache->private_data;
669 LogicalErrorCallbackState state;
670 ErrorContextCallback errcallback;
671
672 Assert(!ctx->fast_forward);
673
674 /* Push callback + info on the error context stack */
675 state.ctx = ctx;
676 state.callback_name = "begin";
677 state.report_location = txn->first_lsn;
678 errcallback.callback = output_plugin_error_callback;
679 errcallback.arg = (void *) &state;
680 errcallback.previous = error_context_stack;
681 error_context_stack = &errcallback;
682
683 /* set output state */
684 ctx->accept_writes = true;
685 ctx->write_xid = txn->xid;
686 ctx->write_location = txn->first_lsn;
687
688 /* do the actual work: call callback */
689 ctx->callbacks.begin_cb(ctx, txn);
690
691 /* Pop the error context stack */
692 error_context_stack = errcallback.previous;
693 }
694
695 static void
commit_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,XLogRecPtr commit_lsn)696 commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
697 XLogRecPtr commit_lsn)
698 {
699 LogicalDecodingContext *ctx = cache->private_data;
700 LogicalErrorCallbackState state;
701 ErrorContextCallback errcallback;
702
703 Assert(!ctx->fast_forward);
704
705 /* Push callback + info on the error context stack */
706 state.ctx = ctx;
707 state.callback_name = "commit";
708 state.report_location = txn->final_lsn; /* beginning of commit record */
709 errcallback.callback = output_plugin_error_callback;
710 errcallback.arg = (void *) &state;
711 errcallback.previous = error_context_stack;
712 error_context_stack = &errcallback;
713
714 /* set output state */
715 ctx->accept_writes = true;
716 ctx->write_xid = txn->xid;
717 ctx->write_location = txn->end_lsn; /* points to the end of the record */
718
719 /* do the actual work: call callback */
720 ctx->callbacks.commit_cb(ctx, txn, commit_lsn);
721
722 /* Pop the error context stack */
723 error_context_stack = errcallback.previous;
724 }
725
726 static void
change_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,Relation relation,ReorderBufferChange * change)727 change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
728 Relation relation, ReorderBufferChange *change)
729 {
730 LogicalDecodingContext *ctx = cache->private_data;
731 LogicalErrorCallbackState state;
732 ErrorContextCallback errcallback;
733
734 Assert(!ctx->fast_forward);
735
736 /* Push callback + info on the error context stack */
737 state.ctx = ctx;
738 state.callback_name = "change";
739 state.report_location = change->lsn;
740 errcallback.callback = output_plugin_error_callback;
741 errcallback.arg = (void *) &state;
742 errcallback.previous = error_context_stack;
743 error_context_stack = &errcallback;
744
745 /* set output state */
746 ctx->accept_writes = true;
747 ctx->write_xid = txn->xid;
748
749 /*
750 * report this change's lsn so replies from clients can give an up2date
751 * answer. This won't ever be enough (and shouldn't be!) to confirm
752 * receipt of this transaction, but it might allow another transaction's
753 * commit to be confirmed with one message.
754 */
755 ctx->write_location = change->lsn;
756
757 ctx->callbacks.change_cb(ctx, txn, relation, change);
758
759 /* Pop the error context stack */
760 error_context_stack = errcallback.previous;
761 }
762
763 static void
truncate_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,int nrelations,Relation relations[],ReorderBufferChange * change)764 truncate_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
765 int nrelations, Relation relations[], ReorderBufferChange *change)
766 {
767 LogicalDecodingContext *ctx = cache->private_data;
768 LogicalErrorCallbackState state;
769 ErrorContextCallback errcallback;
770
771 Assert(!ctx->fast_forward);
772
773 if (!ctx->callbacks.truncate_cb)
774 return;
775
776 /* Push callback + info on the error context stack */
777 state.ctx = ctx;
778 state.callback_name = "truncate";
779 state.report_location = change->lsn;
780 errcallback.callback = output_plugin_error_callback;
781 errcallback.arg = (void *) &state;
782 errcallback.previous = error_context_stack;
783 error_context_stack = &errcallback;
784
785 /* set output state */
786 ctx->accept_writes = true;
787 ctx->write_xid = txn->xid;
788
789 /*
790 * report this change's lsn so replies from clients can give an up2date
791 * answer. This won't ever be enough (and shouldn't be!) to confirm
792 * receipt of this transaction, but it might allow another transaction's
793 * commit to be confirmed with one message.
794 */
795 ctx->write_location = change->lsn;
796
797 ctx->callbacks.truncate_cb(ctx, txn, nrelations, relations, change);
798
799 /* Pop the error context stack */
800 error_context_stack = errcallback.previous;
801 }
802
803 bool
filter_by_origin_cb_wrapper(LogicalDecodingContext * ctx,RepOriginId origin_id)804 filter_by_origin_cb_wrapper(LogicalDecodingContext *ctx, RepOriginId origin_id)
805 {
806 LogicalErrorCallbackState state;
807 ErrorContextCallback errcallback;
808 bool ret;
809
810 Assert(!ctx->fast_forward);
811
812 /* Push callback + info on the error context stack */
813 state.ctx = ctx;
814 state.callback_name = "filter_by_origin";
815 state.report_location = InvalidXLogRecPtr;
816 errcallback.callback = output_plugin_error_callback;
817 errcallback.arg = (void *) &state;
818 errcallback.previous = error_context_stack;
819 error_context_stack = &errcallback;
820
821 /* set output state */
822 ctx->accept_writes = false;
823
824 /* do the actual work: call callback */
825 ret = ctx->callbacks.filter_by_origin_cb(ctx, origin_id);
826
827 /* Pop the error context stack */
828 error_context_stack = errcallback.previous;
829
830 return ret;
831 }
832
833 static void
message_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,XLogRecPtr message_lsn,bool transactional,const char * prefix,Size message_size,const char * message)834 message_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
835 XLogRecPtr message_lsn, bool transactional,
836 const char *prefix, Size message_size, const char *message)
837 {
838 LogicalDecodingContext *ctx = cache->private_data;
839 LogicalErrorCallbackState state;
840 ErrorContextCallback errcallback;
841
842 Assert(!ctx->fast_forward);
843
844 if (ctx->callbacks.message_cb == NULL)
845 return;
846
847 /* Push callback + info on the error context stack */
848 state.ctx = ctx;
849 state.callback_name = "message";
850 state.report_location = message_lsn;
851 errcallback.callback = output_plugin_error_callback;
852 errcallback.arg = (void *) &state;
853 errcallback.previous = error_context_stack;
854 error_context_stack = &errcallback;
855
856 /* set output state */
857 ctx->accept_writes = true;
858 ctx->write_xid = txn != NULL ? txn->xid : InvalidTransactionId;
859 ctx->write_location = message_lsn;
860
861 /* do the actual work: call callback */
862 ctx->callbacks.message_cb(ctx, txn, message_lsn, transactional, prefix,
863 message_size, message);
864
865 /* Pop the error context stack */
866 error_context_stack = errcallback.previous;
867 }
868
869 /*
870 * Set the required catalog xmin horizon for historic snapshots in the current
871 * replication slot.
872 *
873 * Note that in the most cases, we won't be able to immediately use the xmin
874 * to increase the xmin horizon: we need to wait till the client has confirmed
875 * receiving current_lsn with LogicalConfirmReceivedLocation().
876 */
877 void
LogicalIncreaseXminForSlot(XLogRecPtr current_lsn,TransactionId xmin)878 LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
879 {
880 bool updated_xmin = false;
881 ReplicationSlot *slot;
882
883 slot = MyReplicationSlot;
884
885 Assert(slot != NULL);
886
887 SpinLockAcquire(&slot->mutex);
888
889 /*
890 * don't overwrite if we already have a newer xmin. This can happen if we
891 * restart decoding in a slot.
892 */
893 if (TransactionIdPrecedesOrEquals(xmin, slot->data.catalog_xmin))
894 {
895 }
896
897 /*
898 * If the client has already confirmed up to this lsn, we directly can
899 * mark this as accepted. This can happen if we restart decoding in a
900 * slot.
901 */
902 else if (current_lsn <= slot->data.confirmed_flush)
903 {
904 slot->candidate_catalog_xmin = xmin;
905 slot->candidate_xmin_lsn = current_lsn;
906
907 /* our candidate can directly be used */
908 updated_xmin = true;
909 }
910
911 /*
912 * Only increase if the previous values have been applied, otherwise we
913 * might never end up updating if the receiver acks too slowly.
914 */
915 else if (slot->candidate_xmin_lsn == InvalidXLogRecPtr)
916 {
917 slot->candidate_catalog_xmin = xmin;
918 slot->candidate_xmin_lsn = current_lsn;
919 }
920 SpinLockRelease(&slot->mutex);
921
922 /* candidate already valid with the current flush position, apply */
923 if (updated_xmin)
924 LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
925 }
926
927 /*
928 * Mark the minimal LSN (restart_lsn) we need to read to replay all
929 * transactions that have not yet committed at current_lsn.
930 *
931 * Just like LogicalIncreaseXminForSlot this only takes effect when the
932 * client has confirmed to have received current_lsn.
933 */
934 void
LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn,XLogRecPtr restart_lsn)935 LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart_lsn)
936 {
937 bool updated_lsn = false;
938 ReplicationSlot *slot;
939
940 slot = MyReplicationSlot;
941
942 Assert(slot != NULL);
943 Assert(restart_lsn != InvalidXLogRecPtr);
944 Assert(current_lsn != InvalidXLogRecPtr);
945
946 SpinLockAcquire(&slot->mutex);
947
948 /* don't overwrite if have a newer restart lsn */
949 if (restart_lsn <= slot->data.restart_lsn)
950 {
951 }
952
953 /*
954 * We might have already flushed far enough to directly accept this lsn,
955 * in this case there is no need to check for existing candidate LSNs
956 */
957 else if (current_lsn <= slot->data.confirmed_flush)
958 {
959 slot->candidate_restart_valid = current_lsn;
960 slot->candidate_restart_lsn = restart_lsn;
961
962 /* our candidate can directly be used */
963 updated_lsn = true;
964 }
965
966 /*
967 * Only increase if the previous values have been applied, otherwise we
968 * might never end up updating if the receiver acks too slowly. A missed
969 * value here will just cause some extra effort after reconnecting.
970 */
971 if (slot->candidate_restart_valid == InvalidXLogRecPtr)
972 {
973 slot->candidate_restart_valid = current_lsn;
974 slot->candidate_restart_lsn = restart_lsn;
975 SpinLockRelease(&slot->mutex);
976
977 elog(DEBUG1, "got new restart lsn %X/%X at %X/%X",
978 (uint32) (restart_lsn >> 32), (uint32) restart_lsn,
979 (uint32) (current_lsn >> 32), (uint32) current_lsn);
980 }
981 else
982 {
983 XLogRecPtr candidate_restart_lsn;
984 XLogRecPtr candidate_restart_valid;
985 XLogRecPtr confirmed_flush;
986
987 candidate_restart_lsn = slot->candidate_restart_lsn;
988 candidate_restart_valid = slot->candidate_restart_valid;
989 confirmed_flush = slot->data.confirmed_flush;
990 SpinLockRelease(&slot->mutex);
991
992 elog(DEBUG1, "failed to increase restart lsn: proposed %X/%X, after %X/%X, current candidate %X/%X, current after %X/%X, flushed up to %X/%X",
993 (uint32) (restart_lsn >> 32), (uint32) restart_lsn,
994 (uint32) (current_lsn >> 32), (uint32) current_lsn,
995 (uint32) (candidate_restart_lsn >> 32),
996 (uint32) candidate_restart_lsn,
997 (uint32) (candidate_restart_valid >> 32),
998 (uint32) candidate_restart_valid,
999 (uint32) (confirmed_flush >> 32),
1000 (uint32) confirmed_flush);
1001 }
1002
1003 /* candidates are already valid with the current flush position, apply */
1004 if (updated_lsn)
1005 LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
1006 }
1007
1008 /*
1009 * Handle a consumer's confirmation having received all changes up to lsn.
1010 */
1011 void
LogicalConfirmReceivedLocation(XLogRecPtr lsn)1012 LogicalConfirmReceivedLocation(XLogRecPtr lsn)
1013 {
1014 Assert(lsn != InvalidXLogRecPtr);
1015
1016 /* Do an unlocked check for candidate_lsn first. */
1017 if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr ||
1018 MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr)
1019 {
1020 bool updated_xmin = false;
1021 bool updated_restart = false;
1022
1023 SpinLockAcquire(&MyReplicationSlot->mutex);
1024
1025 MyReplicationSlot->data.confirmed_flush = lsn;
1026
1027 /* if we're past the location required for bumping xmin, do so */
1028 if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr &&
1029 MyReplicationSlot->candidate_xmin_lsn <= lsn)
1030 {
1031 /*
1032 * We have to write the changed xmin to disk *before* we change
1033 * the in-memory value, otherwise after a crash we wouldn't know
1034 * that some catalog tuples might have been removed already.
1035 *
1036 * Ensure that by first writing to ->xmin and only update
1037 * ->effective_xmin once the new state is synced to disk. After a
1038 * crash ->effective_xmin is set to ->xmin.
1039 */
1040 if (TransactionIdIsValid(MyReplicationSlot->candidate_catalog_xmin) &&
1041 MyReplicationSlot->data.catalog_xmin != MyReplicationSlot->candidate_catalog_xmin)
1042 {
1043 MyReplicationSlot->data.catalog_xmin = MyReplicationSlot->candidate_catalog_xmin;
1044 MyReplicationSlot->candidate_catalog_xmin = InvalidTransactionId;
1045 MyReplicationSlot->candidate_xmin_lsn = InvalidXLogRecPtr;
1046 updated_xmin = true;
1047 }
1048 }
1049
1050 if (MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr &&
1051 MyReplicationSlot->candidate_restart_valid <= lsn)
1052 {
1053 Assert(MyReplicationSlot->candidate_restart_lsn != InvalidXLogRecPtr);
1054
1055 MyReplicationSlot->data.restart_lsn = MyReplicationSlot->candidate_restart_lsn;
1056 MyReplicationSlot->candidate_restart_lsn = InvalidXLogRecPtr;
1057 MyReplicationSlot->candidate_restart_valid = InvalidXLogRecPtr;
1058 updated_restart = true;
1059 }
1060
1061 SpinLockRelease(&MyReplicationSlot->mutex);
1062
1063 /* first write new xmin to disk, so we know what's up after a crash */
1064 if (updated_xmin || updated_restart)
1065 {
1066 ReplicationSlotMarkDirty();
1067 ReplicationSlotSave();
1068 elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart);
1069 }
1070
1071 /*
1072 * Now the new xmin is safely on disk, we can let the global value
1073 * advance. We do not take ProcArrayLock or similar since we only
1074 * advance xmin here and there's not much harm done by a concurrent
1075 * computation missing that.
1076 */
1077 if (updated_xmin)
1078 {
1079 SpinLockAcquire(&MyReplicationSlot->mutex);
1080 MyReplicationSlot->effective_catalog_xmin = MyReplicationSlot->data.catalog_xmin;
1081 SpinLockRelease(&MyReplicationSlot->mutex);
1082
1083 ReplicationSlotsComputeRequiredXmin(false);
1084 ReplicationSlotsComputeRequiredLSN();
1085 }
1086 }
1087 else
1088 {
1089 SpinLockAcquire(&MyReplicationSlot->mutex);
1090 MyReplicationSlot->data.confirmed_flush = lsn;
1091 SpinLockRelease(&MyReplicationSlot->mutex);
1092 }
1093 }
1094