1 /*-------------------------------------------------------------------------
2 * logical.c
3 * PostgreSQL logical decoding coordination
4 *
5 * Copyright (c) 2012-2019, PostgreSQL Global Development Group
6 *
7 * IDENTIFICATION
8 * src/backend/replication/logical/logical.c
9 *
10 * NOTES
11 * This file coordinates interaction between the various modules that
12 * together provide logical decoding, primarily by providing so
13 * called LogicalDecodingContexts. The goal is to encapsulate most of the
14 * internal complexity for consumers of logical decoding, so they can
15 * create and consume a changestream with a low amount of code. Builtin
16 * consumers are the walsender and SQL SRF interface, but it's possible to
17 * add further ones without changing core code, e.g. to consume changes in
18 * a bgworker.
19 *
20 * The idea is that a consumer provides three callbacks, one to read WAL,
21 * one to prepare a data write, and a final one for actually writing since
22 * their implementation depends on the type of consumer. Check
23 * logicalfuncs.c for an example implementation of a fairly simple consumer
24 * and an implementation of a WAL reading callback that's suitable for
25 * simple consumers.
26 *-------------------------------------------------------------------------
27 */
28
29 #include "postgres.h"
30
31 #include "miscadmin.h"
32
33 #include "access/xact.h"
34 #include "access/xlog_internal.h"
35
36 #include "replication/decode.h"
37 #include "replication/logical.h"
38 #include "replication/reorderbuffer.h"
39 #include "replication/origin.h"
40 #include "replication/snapbuild.h"
41
42 #include "storage/proc.h"
43 #include "storage/procarray.h"
44
45 #include "utils/memutils.h"
46
47 /* data for errcontext callback */
48 typedef struct LogicalErrorCallbackState
49 {
50 LogicalDecodingContext *ctx;
51 const char *callback_name;
52 XLogRecPtr report_location;
53 } LogicalErrorCallbackState;
54
55 /* wrappers around output plugin callbacks */
56 static void output_plugin_error_callback(void *arg);
57 static void startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
58 bool is_init);
59 static void shutdown_cb_wrapper(LogicalDecodingContext *ctx);
60 static void begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn);
61 static void commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
62 XLogRecPtr commit_lsn);
63 static void change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
64 Relation relation, ReorderBufferChange *change);
65 static void truncate_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
66 int nrelations, Relation relations[], ReorderBufferChange *change);
67 static void message_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
68 XLogRecPtr message_lsn, bool transactional,
69 const char *prefix, Size message_size, const char *message);
70
71 static void LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin);
72
73 /*
74 * Make sure the current settings & environment are capable of doing logical
75 * decoding.
76 */
77 void
CheckLogicalDecodingRequirements(void)78 CheckLogicalDecodingRequirements(void)
79 {
80 CheckSlotRequirements();
81
82 /*
83 * NB: Adding a new requirement likely means that RestoreSlotFromDisk()
84 * needs the same check.
85 */
86
87 if (wal_level < WAL_LEVEL_LOGICAL)
88 ereport(ERROR,
89 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
90 errmsg("logical decoding requires wal_level >= logical")));
91
92 if (MyDatabaseId == InvalidOid)
93 ereport(ERROR,
94 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
95 errmsg("logical decoding requires a database connection")));
96
97 /* ----
98 * TODO: We got to change that someday soon...
99 *
100 * There's basically three things missing to allow this:
101 * 1) We need to be able to correctly and quickly identify the timeline a
102 * LSN belongs to
103 * 2) We need to force hot_standby_feedback to be enabled at all times so
104 * the primary cannot remove rows we need.
105 * 3) support dropping replication slots referring to a database, in
106 * dbase_redo. There can't be any active ones due to HS recovery
107 * conflicts, so that should be relatively easy.
108 * ----
109 */
110 if (RecoveryInProgress())
111 ereport(ERROR,
112 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
113 errmsg("logical decoding cannot be used while in recovery")));
114 }
115
116 /*
117 * Helper function for CreateInitDecodingContext() and
118 * CreateDecodingContext() performing common tasks.
119 */
120 static LogicalDecodingContext *
StartupDecodingContext(List * output_plugin_options,XLogRecPtr start_lsn,TransactionId xmin_horizon,bool need_full_snapshot,bool fast_forward,XLogPageReadCB read_page,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)121 StartupDecodingContext(List *output_plugin_options,
122 XLogRecPtr start_lsn,
123 TransactionId xmin_horizon,
124 bool need_full_snapshot,
125 bool fast_forward,
126 XLogPageReadCB read_page,
127 LogicalOutputPluginWriterPrepareWrite prepare_write,
128 LogicalOutputPluginWriterWrite do_write,
129 LogicalOutputPluginWriterUpdateProgress update_progress)
130 {
131 ReplicationSlot *slot;
132 MemoryContext context,
133 old_context;
134 LogicalDecodingContext *ctx;
135
136 /* shorter lines... */
137 slot = MyReplicationSlot;
138
139 context = AllocSetContextCreate(CurrentMemoryContext,
140 "Logical decoding context",
141 ALLOCSET_DEFAULT_SIZES);
142 old_context = MemoryContextSwitchTo(context);
143 ctx = palloc0(sizeof(LogicalDecodingContext));
144
145 ctx->context = context;
146
147 /*
148 * (re-)load output plugins, so we detect a bad (removed) output plugin
149 * now.
150 */
151 if (!fast_forward)
152 LoadOutputPlugin(&ctx->callbacks, NameStr(slot->data.plugin));
153
154 /*
155 * Now that the slot's xmin has been set, we can announce ourselves as a
156 * logical decoding backend which doesn't need to be checked individually
157 * when computing the xmin horizon because the xmin is enforced via
158 * replication slots.
159 *
160 * We can only do so if we're outside of a transaction (i.e. the case when
161 * streaming changes via walsender), otherwise an already setup
162 * snapshot/xid would end up being ignored. That's not a particularly
163 * bothersome restriction since the SQL interface can't be used for
164 * streaming anyway.
165 */
166 if (!IsTransactionOrTransactionBlock())
167 {
168 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
169 MyPgXact->vacuumFlags |= PROC_IN_LOGICAL_DECODING;
170 LWLockRelease(ProcArrayLock);
171 }
172
173 ctx->slot = slot;
174
175 ctx->reader = XLogReaderAllocate(wal_segment_size, read_page, ctx);
176 if (!ctx->reader)
177 ereport(ERROR,
178 (errcode(ERRCODE_OUT_OF_MEMORY),
179 errmsg("out of memory")));
180
181 ctx->reorder = ReorderBufferAllocate();
182 ctx->snapshot_builder =
183 AllocateSnapshotBuilder(ctx->reorder, xmin_horizon, start_lsn,
184 need_full_snapshot);
185
186 ctx->reorder->private_data = ctx;
187
188 /* wrap output plugin callbacks, so we can add error context information */
189 ctx->reorder->begin = begin_cb_wrapper;
190 ctx->reorder->apply_change = change_cb_wrapper;
191 ctx->reorder->apply_truncate = truncate_cb_wrapper;
192 ctx->reorder->commit = commit_cb_wrapper;
193 ctx->reorder->message = message_cb_wrapper;
194
195 ctx->out = makeStringInfo();
196 ctx->prepare_write = prepare_write;
197 ctx->write = do_write;
198 ctx->update_progress = update_progress;
199
200 ctx->output_plugin_options = output_plugin_options;
201
202 ctx->fast_forward = fast_forward;
203
204 MemoryContextSwitchTo(old_context);
205
206 return ctx;
207 }
208
209 /*
210 * Create a new decoding context, for a new logical slot.
211 *
212 * plugin -- contains the name of the output plugin
213 * output_plugin_options -- contains options passed to the output plugin
214 * need_full_snapshot -- if true, must obtain a snapshot able to read all
215 * tables; if false, one that can read only catalogs is acceptable.
216 * restart_lsn -- if given as invalid, it's this routine's responsibility to
217 * mark WAL as reserved by setting a convenient restart_lsn for the slot.
218 * Otherwise, we set for decoding to start from the given LSN without
219 * marking WAL reserved beforehand. In that scenario, it's up to the
220 * caller to guarantee that WAL remains available.
221 * read_page, prepare_write, do_write, update_progress --
222 * callbacks that perform the use-case dependent, actual, work.
223 *
224 * Needs to be called while in a memory context that's at least as long lived
225 * as the decoding context because further memory contexts will be created
226 * inside it.
227 *
228 * Returns an initialized decoding context after calling the output plugin's
229 * startup function.
230 */
231 LogicalDecodingContext *
CreateInitDecodingContext(char * plugin,List * output_plugin_options,bool need_full_snapshot,XLogRecPtr restart_lsn,XLogPageReadCB read_page,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)232 CreateInitDecodingContext(char *plugin,
233 List *output_plugin_options,
234 bool need_full_snapshot,
235 XLogRecPtr restart_lsn,
236 XLogPageReadCB read_page,
237 LogicalOutputPluginWriterPrepareWrite prepare_write,
238 LogicalOutputPluginWriterWrite do_write,
239 LogicalOutputPluginWriterUpdateProgress update_progress)
240 {
241 TransactionId xmin_horizon = InvalidTransactionId;
242 ReplicationSlot *slot;
243 LogicalDecodingContext *ctx;
244 MemoryContext old_context;
245
246 /* shorter lines... */
247 slot = MyReplicationSlot;
248
249 /* first some sanity checks that are unlikely to be violated */
250 if (slot == NULL)
251 elog(ERROR, "cannot perform logical decoding without an acquired slot");
252
253 if (plugin == NULL)
254 elog(ERROR, "cannot initialize logical decoding without a specified plugin");
255
256 /* Make sure the passed slot is suitable. These are user facing errors. */
257 if (SlotIsPhysical(slot))
258 ereport(ERROR,
259 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
260 errmsg("cannot use physical replication slot for logical decoding")));
261
262 if (slot->data.database != MyDatabaseId)
263 ereport(ERROR,
264 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
265 errmsg("replication slot \"%s\" was not created in this database",
266 NameStr(slot->data.name))));
267
268 if (IsTransactionState() &&
269 GetTopTransactionIdIfAny() != InvalidTransactionId)
270 ereport(ERROR,
271 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
272 errmsg("cannot create logical replication slot in transaction that has performed writes")));
273
274 /* register output plugin name with slot */
275 SpinLockAcquire(&slot->mutex);
276 StrNCpy(NameStr(slot->data.plugin), plugin, NAMEDATALEN);
277 SpinLockRelease(&slot->mutex);
278
279 if (XLogRecPtrIsInvalid(restart_lsn))
280 ReplicationSlotReserveWal();
281 else
282 {
283 SpinLockAcquire(&slot->mutex);
284 slot->data.restart_lsn = restart_lsn;
285 SpinLockRelease(&slot->mutex);
286 }
287
288 /* ----
289 * This is a bit tricky: We need to determine a safe xmin horizon to start
290 * decoding from, to avoid starting from a running xacts record referring
291 * to xids whose rows have been vacuumed or pruned
292 * already. GetOldestSafeDecodingTransactionId() returns such a value, but
293 * without further interlock its return value might immediately be out of
294 * date.
295 *
296 * So we have to acquire the ProcArrayLock to prevent computation of new
297 * xmin horizons by other backends, get the safe decoding xid, and inform
298 * the slot machinery about the new limit. Once that's done the
299 * ProcArrayLock can be released as the slot machinery now is
300 * protecting against vacuum.
301 *
302 * Note that, temporarily, the data, not just the catalog, xmin has to be
303 * reserved if a data snapshot is to be exported. Otherwise the initial
304 * data snapshot created here is not guaranteed to be valid. After that
305 * the data xmin doesn't need to be managed anymore and the global xmin
306 * should be recomputed. As we are fine with losing the pegged data xmin
307 * after crash - no chance a snapshot would get exported anymore - we can
308 * get away with just setting the slot's
309 * effective_xmin. ReplicationSlotRelease will reset it again.
310 *
311 * ----
312 */
313 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
314
315 xmin_horizon = GetOldestSafeDecodingTransactionId(!need_full_snapshot);
316
317 SpinLockAcquire(&slot->mutex);
318 slot->effective_catalog_xmin = xmin_horizon;
319 slot->data.catalog_xmin = xmin_horizon;
320 if (need_full_snapshot)
321 slot->effective_xmin = xmin_horizon;
322 SpinLockRelease(&slot->mutex);
323
324 ReplicationSlotsComputeRequiredXmin(true);
325
326 LWLockRelease(ProcArrayLock);
327
328 ReplicationSlotMarkDirty();
329 ReplicationSlotSave();
330
331 ctx = StartupDecodingContext(NIL, restart_lsn, xmin_horizon,
332 need_full_snapshot, false,
333 read_page, prepare_write, do_write,
334 update_progress);
335
336 /* call output plugin initialization callback */
337 old_context = MemoryContextSwitchTo(ctx->context);
338 if (ctx->callbacks.startup_cb != NULL)
339 startup_cb_wrapper(ctx, &ctx->options, true);
340 MemoryContextSwitchTo(old_context);
341
342 ctx->reorder->output_rewrites = ctx->options.receive_rewrites;
343
344 return ctx;
345 }
346
347 /*
348 * Create a new decoding context, for a logical slot that has previously been
349 * used already.
350 *
351 * start_lsn
352 * The LSN at which to start decoding. If InvalidXLogRecPtr, restart
353 * from the slot's confirmed_flush; otherwise, start from the specified
354 * location (but move it forwards to confirmed_flush if it's older than
355 * that, see below).
356 *
357 * output_plugin_options
358 * options passed to the output plugin.
359 *
360 * fast_forward
361 * bypass the generation of logical changes.
362 *
363 * read_page, prepare_write, do_write, update_progress
364 * callbacks that have to be filled to perform the use-case dependent,
365 * actual work.
366 *
367 * Needs to be called while in a memory context that's at least as long lived
368 * as the decoding context because further memory contexts will be created
369 * inside it.
370 *
371 * Returns an initialized decoding context after calling the output plugin's
372 * startup function.
373 */
374 LogicalDecodingContext *
CreateDecodingContext(XLogRecPtr start_lsn,List * output_plugin_options,bool fast_forward,XLogPageReadCB read_page,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)375 CreateDecodingContext(XLogRecPtr start_lsn,
376 List *output_plugin_options,
377 bool fast_forward,
378 XLogPageReadCB read_page,
379 LogicalOutputPluginWriterPrepareWrite prepare_write,
380 LogicalOutputPluginWriterWrite do_write,
381 LogicalOutputPluginWriterUpdateProgress update_progress)
382 {
383 LogicalDecodingContext *ctx;
384 ReplicationSlot *slot;
385 MemoryContext old_context;
386
387 /* shorter lines... */
388 slot = MyReplicationSlot;
389
390 /* first some sanity checks that are unlikely to be violated */
391 if (slot == NULL)
392 elog(ERROR, "cannot perform logical decoding without an acquired slot");
393
394 /* make sure the passed slot is suitable, these are user facing errors */
395 if (SlotIsPhysical(slot))
396 ereport(ERROR,
397 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
398 (errmsg("cannot use physical replication slot for logical decoding"))));
399
400 if (slot->data.database != MyDatabaseId)
401 ereport(ERROR,
402 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
403 (errmsg("replication slot \"%s\" was not created in this database",
404 NameStr(slot->data.name)))));
405
406 if (start_lsn == InvalidXLogRecPtr)
407 {
408 /* continue from last position */
409 start_lsn = slot->data.confirmed_flush;
410 }
411 else if (start_lsn < slot->data.confirmed_flush)
412 {
413 /*
414 * It might seem like we should error out in this case, but it's
415 * pretty common for a client to acknowledge a LSN it doesn't have to
416 * do anything for, and thus didn't store persistently, because the
417 * xlog records didn't result in anything relevant for logical
418 * decoding. Clients have to be able to do that to support synchronous
419 * replication.
420 */
421 elog(DEBUG1, "cannot stream from %X/%X, minimum is %X/%X, forwarding",
422 (uint32) (start_lsn >> 32), (uint32) start_lsn,
423 (uint32) (slot->data.confirmed_flush >> 32),
424 (uint32) slot->data.confirmed_flush);
425
426 start_lsn = slot->data.confirmed_flush;
427 }
428
429 ctx = StartupDecodingContext(output_plugin_options,
430 start_lsn, InvalidTransactionId, false,
431 fast_forward, read_page, prepare_write,
432 do_write, update_progress);
433
434 /* call output plugin initialization callback */
435 old_context = MemoryContextSwitchTo(ctx->context);
436 if (ctx->callbacks.startup_cb != NULL)
437 startup_cb_wrapper(ctx, &ctx->options, false);
438 MemoryContextSwitchTo(old_context);
439
440 ctx->reorder->output_rewrites = ctx->options.receive_rewrites;
441
442 ereport(LOG,
443 (errmsg("starting logical decoding for slot \"%s\"",
444 NameStr(slot->data.name)),
445 errdetail("Streaming transactions committing after %X/%X, reading WAL from %X/%X.",
446 (uint32) (slot->data.confirmed_flush >> 32),
447 (uint32) slot->data.confirmed_flush,
448 (uint32) (slot->data.restart_lsn >> 32),
449 (uint32) slot->data.restart_lsn)));
450
451 return ctx;
452 }
453
454 /*
455 * Returns true if a consistent initial decoding snapshot has been built.
456 */
457 bool
DecodingContextReady(LogicalDecodingContext * ctx)458 DecodingContextReady(LogicalDecodingContext *ctx)
459 {
460 return SnapBuildCurrentState(ctx->snapshot_builder) == SNAPBUILD_CONSISTENT;
461 }
462
463 /*
464 * Read from the decoding slot, until it is ready to start extracting changes.
465 */
466 void
DecodingContextFindStartpoint(LogicalDecodingContext * ctx)467 DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
468 {
469 XLogRecPtr startptr;
470 ReplicationSlot *slot = ctx->slot;
471
472 /* Initialize from where to start reading WAL. */
473 startptr = slot->data.restart_lsn;
474
475 elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%X",
476 (uint32) (slot->data.restart_lsn >> 32),
477 (uint32) slot->data.restart_lsn);
478
479 /* Wait for a consistent starting point */
480 for (;;)
481 {
482 XLogRecord *record;
483 char *err = NULL;
484
485 /* the read_page callback waits for new WAL */
486 record = XLogReadRecord(ctx->reader, startptr, &err);
487 if (err)
488 elog(ERROR, "%s", err);
489 if (!record)
490 elog(ERROR, "no record found"); /* shouldn't happen */
491
492 startptr = InvalidXLogRecPtr;
493
494 LogicalDecodingProcessRecord(ctx, ctx->reader);
495
496 /* only continue till we found a consistent spot */
497 if (DecodingContextReady(ctx))
498 break;
499
500 CHECK_FOR_INTERRUPTS();
501 }
502
503 SpinLockAcquire(&slot->mutex);
504 slot->data.confirmed_flush = ctx->reader->EndRecPtr;
505 SpinLockRelease(&slot->mutex);
506 }
507
508 /*
509 * Free a previously allocated decoding context, invoking the shutdown
510 * callback if necessary.
511 */
512 void
FreeDecodingContext(LogicalDecodingContext * ctx)513 FreeDecodingContext(LogicalDecodingContext *ctx)
514 {
515 if (ctx->callbacks.shutdown_cb != NULL)
516 shutdown_cb_wrapper(ctx);
517
518 ReorderBufferFree(ctx->reorder);
519 FreeSnapshotBuilder(ctx->snapshot_builder);
520 XLogReaderFree(ctx->reader);
521 MemoryContextDelete(ctx->context);
522 }
523
524 /*
525 * Prepare a write using the context's output routine.
526 */
527 void
OutputPluginPrepareWrite(struct LogicalDecodingContext * ctx,bool last_write)528 OutputPluginPrepareWrite(struct LogicalDecodingContext *ctx, bool last_write)
529 {
530 if (!ctx->accept_writes)
531 elog(ERROR, "writes are only accepted in commit, begin and change callbacks");
532
533 ctx->prepare_write(ctx, ctx->write_location, ctx->write_xid, last_write);
534 ctx->prepared_write = true;
535 }
536
537 /*
538 * Perform a write using the context's output routine.
539 */
540 void
OutputPluginWrite(struct LogicalDecodingContext * ctx,bool last_write)541 OutputPluginWrite(struct LogicalDecodingContext *ctx, bool last_write)
542 {
543 if (!ctx->prepared_write)
544 elog(ERROR, "OutputPluginPrepareWrite needs to be called before OutputPluginWrite");
545
546 ctx->write(ctx, ctx->write_location, ctx->write_xid, last_write);
547 ctx->prepared_write = false;
548 }
549
550 /*
551 * Update progress tracking (if supported).
552 */
553 void
OutputPluginUpdateProgress(struct LogicalDecodingContext * ctx)554 OutputPluginUpdateProgress(struct LogicalDecodingContext *ctx)
555 {
556 if (!ctx->update_progress)
557 return;
558
559 ctx->update_progress(ctx, ctx->write_location, ctx->write_xid);
560 }
561
562 /*
563 * Load the output plugin, lookup its output plugin init function, and check
564 * that it provides the required callbacks.
565 */
566 static void
LoadOutputPlugin(OutputPluginCallbacks * callbacks,char * plugin)567 LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin)
568 {
569 LogicalOutputPluginInit plugin_init;
570
571 plugin_init = (LogicalOutputPluginInit)
572 load_external_function(plugin, "_PG_output_plugin_init", false, NULL);
573
574 if (plugin_init == NULL)
575 elog(ERROR, "output plugins have to declare the _PG_output_plugin_init symbol");
576
577 /* ask the output plugin to fill the callback struct */
578 plugin_init(callbacks);
579
580 if (callbacks->begin_cb == NULL)
581 elog(ERROR, "output plugins have to register a begin callback");
582 if (callbacks->change_cb == NULL)
583 elog(ERROR, "output plugins have to register a change callback");
584 if (callbacks->commit_cb == NULL)
585 elog(ERROR, "output plugins have to register a commit callback");
586 }
587
588 static void
output_plugin_error_callback(void * arg)589 output_plugin_error_callback(void *arg)
590 {
591 LogicalErrorCallbackState *state = (LogicalErrorCallbackState *) arg;
592
593 /* not all callbacks have an associated LSN */
594 if (state->report_location != InvalidXLogRecPtr)
595 errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%X",
596 NameStr(state->ctx->slot->data.name),
597 NameStr(state->ctx->slot->data.plugin),
598 state->callback_name,
599 (uint32) (state->report_location >> 32),
600 (uint32) state->report_location);
601 else
602 errcontext("slot \"%s\", output plugin \"%s\", in the %s callback",
603 NameStr(state->ctx->slot->data.name),
604 NameStr(state->ctx->slot->data.plugin),
605 state->callback_name);
606 }
607
608 static void
startup_cb_wrapper(LogicalDecodingContext * ctx,OutputPluginOptions * opt,bool is_init)609 startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool is_init)
610 {
611 LogicalErrorCallbackState state;
612 ErrorContextCallback errcallback;
613
614 Assert(!ctx->fast_forward);
615
616 /* Push callback + info on the error context stack */
617 state.ctx = ctx;
618 state.callback_name = "startup";
619 state.report_location = InvalidXLogRecPtr;
620 errcallback.callback = output_plugin_error_callback;
621 errcallback.arg = (void *) &state;
622 errcallback.previous = error_context_stack;
623 error_context_stack = &errcallback;
624
625 /* set output state */
626 ctx->accept_writes = false;
627
628 /* do the actual work: call callback */
629 ctx->callbacks.startup_cb(ctx, opt, is_init);
630
631 /* Pop the error context stack */
632 error_context_stack = errcallback.previous;
633 }
634
635 static void
shutdown_cb_wrapper(LogicalDecodingContext * ctx)636 shutdown_cb_wrapper(LogicalDecodingContext *ctx)
637 {
638 LogicalErrorCallbackState state;
639 ErrorContextCallback errcallback;
640
641 Assert(!ctx->fast_forward);
642
643 /* Push callback + info on the error context stack */
644 state.ctx = ctx;
645 state.callback_name = "shutdown";
646 state.report_location = InvalidXLogRecPtr;
647 errcallback.callback = output_plugin_error_callback;
648 errcallback.arg = (void *) &state;
649 errcallback.previous = error_context_stack;
650 error_context_stack = &errcallback;
651
652 /* set output state */
653 ctx->accept_writes = false;
654
655 /* do the actual work: call callback */
656 ctx->callbacks.shutdown_cb(ctx);
657
658 /* Pop the error context stack */
659 error_context_stack = errcallback.previous;
660 }
661
662
663 /*
664 * Callbacks for ReorderBuffer which add in some more information and then call
665 * output_plugin.h plugins.
666 */
667 static void
begin_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn)668 begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn)
669 {
670 LogicalDecodingContext *ctx = cache->private_data;
671 LogicalErrorCallbackState state;
672 ErrorContextCallback errcallback;
673
674 Assert(!ctx->fast_forward);
675
676 /* Push callback + info on the error context stack */
677 state.ctx = ctx;
678 state.callback_name = "begin";
679 state.report_location = txn->first_lsn;
680 errcallback.callback = output_plugin_error_callback;
681 errcallback.arg = (void *) &state;
682 errcallback.previous = error_context_stack;
683 error_context_stack = &errcallback;
684
685 /* set output state */
686 ctx->accept_writes = true;
687 ctx->write_xid = txn->xid;
688 ctx->write_location = txn->first_lsn;
689
690 /* do the actual work: call callback */
691 ctx->callbacks.begin_cb(ctx, txn);
692
693 /* Pop the error context stack */
694 error_context_stack = errcallback.previous;
695 }
696
697 static void
commit_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,XLogRecPtr commit_lsn)698 commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
699 XLogRecPtr commit_lsn)
700 {
701 LogicalDecodingContext *ctx = cache->private_data;
702 LogicalErrorCallbackState state;
703 ErrorContextCallback errcallback;
704
705 Assert(!ctx->fast_forward);
706
707 /* Push callback + info on the error context stack */
708 state.ctx = ctx;
709 state.callback_name = "commit";
710 state.report_location = txn->final_lsn; /* beginning of commit record */
711 errcallback.callback = output_plugin_error_callback;
712 errcallback.arg = (void *) &state;
713 errcallback.previous = error_context_stack;
714 error_context_stack = &errcallback;
715
716 /* set output state */
717 ctx->accept_writes = true;
718 ctx->write_xid = txn->xid;
719 ctx->write_location = txn->end_lsn; /* points to the end of the record */
720
721 /* do the actual work: call callback */
722 ctx->callbacks.commit_cb(ctx, txn, commit_lsn);
723
724 /* Pop the error context stack */
725 error_context_stack = errcallback.previous;
726 }
727
728 static void
change_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,Relation relation,ReorderBufferChange * change)729 change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
730 Relation relation, ReorderBufferChange *change)
731 {
732 LogicalDecodingContext *ctx = cache->private_data;
733 LogicalErrorCallbackState state;
734 ErrorContextCallback errcallback;
735
736 Assert(!ctx->fast_forward);
737
738 /* Push callback + info on the error context stack */
739 state.ctx = ctx;
740 state.callback_name = "change";
741 state.report_location = change->lsn;
742 errcallback.callback = output_plugin_error_callback;
743 errcallback.arg = (void *) &state;
744 errcallback.previous = error_context_stack;
745 error_context_stack = &errcallback;
746
747 /* set output state */
748 ctx->accept_writes = true;
749 ctx->write_xid = txn->xid;
750
751 /*
752 * report this change's lsn so replies from clients can give an up2date
753 * answer. This won't ever be enough (and shouldn't be!) to confirm
754 * receipt of this transaction, but it might allow another transaction's
755 * commit to be confirmed with one message.
756 */
757 ctx->write_location = change->lsn;
758
759 ctx->callbacks.change_cb(ctx, txn, relation, change);
760
761 /* Pop the error context stack */
762 error_context_stack = errcallback.previous;
763 }
764
765 static void
truncate_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,int nrelations,Relation relations[],ReorderBufferChange * change)766 truncate_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
767 int nrelations, Relation relations[], ReorderBufferChange *change)
768 {
769 LogicalDecodingContext *ctx = cache->private_data;
770 LogicalErrorCallbackState state;
771 ErrorContextCallback errcallback;
772
773 Assert(!ctx->fast_forward);
774
775 if (!ctx->callbacks.truncate_cb)
776 return;
777
778 /* Push callback + info on the error context stack */
779 state.ctx = ctx;
780 state.callback_name = "truncate";
781 state.report_location = change->lsn;
782 errcallback.callback = output_plugin_error_callback;
783 errcallback.arg = (void *) &state;
784 errcallback.previous = error_context_stack;
785 error_context_stack = &errcallback;
786
787 /* set output state */
788 ctx->accept_writes = true;
789 ctx->write_xid = txn->xid;
790
791 /*
792 * report this change's lsn so replies from clients can give an up2date
793 * answer. This won't ever be enough (and shouldn't be!) to confirm
794 * receipt of this transaction, but it might allow another transaction's
795 * commit to be confirmed with one message.
796 */
797 ctx->write_location = change->lsn;
798
799 ctx->callbacks.truncate_cb(ctx, txn, nrelations, relations, change);
800
801 /* Pop the error context stack */
802 error_context_stack = errcallback.previous;
803 }
804
805 bool
filter_by_origin_cb_wrapper(LogicalDecodingContext * ctx,RepOriginId origin_id)806 filter_by_origin_cb_wrapper(LogicalDecodingContext *ctx, RepOriginId origin_id)
807 {
808 LogicalErrorCallbackState state;
809 ErrorContextCallback errcallback;
810 bool ret;
811
812 Assert(!ctx->fast_forward);
813
814 /* Push callback + info on the error context stack */
815 state.ctx = ctx;
816 state.callback_name = "filter_by_origin";
817 state.report_location = InvalidXLogRecPtr;
818 errcallback.callback = output_plugin_error_callback;
819 errcallback.arg = (void *) &state;
820 errcallback.previous = error_context_stack;
821 error_context_stack = &errcallback;
822
823 /* set output state */
824 ctx->accept_writes = false;
825
826 /* do the actual work: call callback */
827 ret = ctx->callbacks.filter_by_origin_cb(ctx, origin_id);
828
829 /* Pop the error context stack */
830 error_context_stack = errcallback.previous;
831
832 return ret;
833 }
834
835 static void
message_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,XLogRecPtr message_lsn,bool transactional,const char * prefix,Size message_size,const char * message)836 message_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
837 XLogRecPtr message_lsn, bool transactional,
838 const char *prefix, Size message_size, const char *message)
839 {
840 LogicalDecodingContext *ctx = cache->private_data;
841 LogicalErrorCallbackState state;
842 ErrorContextCallback errcallback;
843
844 Assert(!ctx->fast_forward);
845
846 if (ctx->callbacks.message_cb == NULL)
847 return;
848
849 /* Push callback + info on the error context stack */
850 state.ctx = ctx;
851 state.callback_name = "message";
852 state.report_location = message_lsn;
853 errcallback.callback = output_plugin_error_callback;
854 errcallback.arg = (void *) &state;
855 errcallback.previous = error_context_stack;
856 error_context_stack = &errcallback;
857
858 /* set output state */
859 ctx->accept_writes = true;
860 ctx->write_xid = txn != NULL ? txn->xid : InvalidTransactionId;
861 ctx->write_location = message_lsn;
862
863 /* do the actual work: call callback */
864 ctx->callbacks.message_cb(ctx, txn, message_lsn, transactional, prefix,
865 message_size, message);
866
867 /* Pop the error context stack */
868 error_context_stack = errcallback.previous;
869 }
870
871 /*
872 * Set the required catalog xmin horizon for historic snapshots in the current
873 * replication slot.
874 *
875 * Note that in the most cases, we won't be able to immediately use the xmin
876 * to increase the xmin horizon: we need to wait till the client has confirmed
877 * receiving current_lsn with LogicalConfirmReceivedLocation().
878 */
879 void
LogicalIncreaseXminForSlot(XLogRecPtr current_lsn,TransactionId xmin)880 LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
881 {
882 bool updated_xmin = false;
883 ReplicationSlot *slot;
884
885 slot = MyReplicationSlot;
886
887 Assert(slot != NULL);
888
889 SpinLockAcquire(&slot->mutex);
890
891 /*
892 * don't overwrite if we already have a newer xmin. This can happen if we
893 * restart decoding in a slot.
894 */
895 if (TransactionIdPrecedesOrEquals(xmin, slot->data.catalog_xmin))
896 {
897 }
898
899 /*
900 * If the client has already confirmed up to this lsn, we directly can
901 * mark this as accepted. This can happen if we restart decoding in a
902 * slot.
903 */
904 else if (current_lsn <= slot->data.confirmed_flush)
905 {
906 slot->candidate_catalog_xmin = xmin;
907 slot->candidate_xmin_lsn = current_lsn;
908
909 /* our candidate can directly be used */
910 updated_xmin = true;
911 }
912
913 /*
914 * Only increase if the previous values have been applied, otherwise we
915 * might never end up updating if the receiver acks too slowly.
916 */
917 else if (slot->candidate_xmin_lsn == InvalidXLogRecPtr)
918 {
919 slot->candidate_catalog_xmin = xmin;
920 slot->candidate_xmin_lsn = current_lsn;
921 }
922 SpinLockRelease(&slot->mutex);
923
924 /* candidate already valid with the current flush position, apply */
925 if (updated_xmin)
926 LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
927 }
928
929 /*
930 * Mark the minimal LSN (restart_lsn) we need to read to replay all
931 * transactions that have not yet committed at current_lsn.
932 *
933 * Just like LogicalIncreaseXminForSlot this only takes effect when the
934 * client has confirmed to have received current_lsn.
935 */
936 void
LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn,XLogRecPtr restart_lsn)937 LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart_lsn)
938 {
939 bool updated_lsn = false;
940 ReplicationSlot *slot;
941
942 slot = MyReplicationSlot;
943
944 Assert(slot != NULL);
945 Assert(restart_lsn != InvalidXLogRecPtr);
946 Assert(current_lsn != InvalidXLogRecPtr);
947
948 SpinLockAcquire(&slot->mutex);
949
950 /* don't overwrite if have a newer restart lsn */
951 if (restart_lsn <= slot->data.restart_lsn)
952 {
953 }
954
955 /*
956 * We might have already flushed far enough to directly accept this lsn,
957 * in this case there is no need to check for existing candidate LSNs
958 */
959 else if (current_lsn <= slot->data.confirmed_flush)
960 {
961 slot->candidate_restart_valid = current_lsn;
962 slot->candidate_restart_lsn = restart_lsn;
963
964 /* our candidate can directly be used */
965 updated_lsn = true;
966 }
967
968 /*
969 * Only increase if the previous values have been applied, otherwise we
970 * might never end up updating if the receiver acks too slowly. A missed
971 * value here will just cause some extra effort after reconnecting.
972 */
973 if (slot->candidate_restart_valid == InvalidXLogRecPtr)
974 {
975 slot->candidate_restart_valid = current_lsn;
976 slot->candidate_restart_lsn = restart_lsn;
977 SpinLockRelease(&slot->mutex);
978
979 elog(DEBUG1, "got new restart lsn %X/%X at %X/%X",
980 (uint32) (restart_lsn >> 32), (uint32) restart_lsn,
981 (uint32) (current_lsn >> 32), (uint32) current_lsn);
982 }
983 else
984 {
985 XLogRecPtr candidate_restart_lsn;
986 XLogRecPtr candidate_restart_valid;
987 XLogRecPtr confirmed_flush;
988
989 candidate_restart_lsn = slot->candidate_restart_lsn;
990 candidate_restart_valid = slot->candidate_restart_valid;
991 confirmed_flush = slot->data.confirmed_flush;
992 SpinLockRelease(&slot->mutex);
993
994 elog(DEBUG1, "failed to increase restart lsn: proposed %X/%X, after %X/%X, current candidate %X/%X, current after %X/%X, flushed up to %X/%X",
995 (uint32) (restart_lsn >> 32), (uint32) restart_lsn,
996 (uint32) (current_lsn >> 32), (uint32) current_lsn,
997 (uint32) (candidate_restart_lsn >> 32),
998 (uint32) candidate_restart_lsn,
999 (uint32) (candidate_restart_valid >> 32),
1000 (uint32) candidate_restart_valid,
1001 (uint32) (confirmed_flush >> 32),
1002 (uint32) confirmed_flush);
1003 }
1004
1005 /* candidates are already valid with the current flush position, apply */
1006 if (updated_lsn)
1007 LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
1008 }
1009
1010 /*
1011 * Handle a consumer's confirmation having received all changes up to lsn.
1012 */
1013 void
LogicalConfirmReceivedLocation(XLogRecPtr lsn)1014 LogicalConfirmReceivedLocation(XLogRecPtr lsn)
1015 {
1016 Assert(lsn != InvalidXLogRecPtr);
1017
1018 /* Do an unlocked check for candidate_lsn first. */
1019 if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr ||
1020 MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr)
1021 {
1022 bool updated_xmin = false;
1023 bool updated_restart = false;
1024
1025 SpinLockAcquire(&MyReplicationSlot->mutex);
1026
1027 MyReplicationSlot->data.confirmed_flush = lsn;
1028
1029 /* if we're past the location required for bumping xmin, do so */
1030 if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr &&
1031 MyReplicationSlot->candidate_xmin_lsn <= lsn)
1032 {
1033 /*
1034 * We have to write the changed xmin to disk *before* we change
1035 * the in-memory value, otherwise after a crash we wouldn't know
1036 * that some catalog tuples might have been removed already.
1037 *
1038 * Ensure that by first writing to ->xmin and only update
1039 * ->effective_xmin once the new state is synced to disk. After a
1040 * crash ->effective_xmin is set to ->xmin.
1041 */
1042 if (TransactionIdIsValid(MyReplicationSlot->candidate_catalog_xmin) &&
1043 MyReplicationSlot->data.catalog_xmin != MyReplicationSlot->candidate_catalog_xmin)
1044 {
1045 MyReplicationSlot->data.catalog_xmin = MyReplicationSlot->candidate_catalog_xmin;
1046 MyReplicationSlot->candidate_catalog_xmin = InvalidTransactionId;
1047 MyReplicationSlot->candidate_xmin_lsn = InvalidXLogRecPtr;
1048 updated_xmin = true;
1049 }
1050 }
1051
1052 if (MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr &&
1053 MyReplicationSlot->candidate_restart_valid <= lsn)
1054 {
1055 Assert(MyReplicationSlot->candidate_restart_lsn != InvalidXLogRecPtr);
1056
1057 MyReplicationSlot->data.restart_lsn = MyReplicationSlot->candidate_restart_lsn;
1058 MyReplicationSlot->candidate_restart_lsn = InvalidXLogRecPtr;
1059 MyReplicationSlot->candidate_restart_valid = InvalidXLogRecPtr;
1060 updated_restart = true;
1061 }
1062
1063 SpinLockRelease(&MyReplicationSlot->mutex);
1064
1065 /* first write new xmin to disk, so we know what's up after a crash */
1066 if (updated_xmin || updated_restart)
1067 {
1068 ReplicationSlotMarkDirty();
1069 ReplicationSlotSave();
1070 elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart);
1071 }
1072
1073 /*
1074 * Now the new xmin is safely on disk, we can let the global value
1075 * advance. We do not take ProcArrayLock or similar since we only
1076 * advance xmin here and there's not much harm done by a concurrent
1077 * computation missing that.
1078 */
1079 if (updated_xmin)
1080 {
1081 SpinLockAcquire(&MyReplicationSlot->mutex);
1082 MyReplicationSlot->effective_catalog_xmin = MyReplicationSlot->data.catalog_xmin;
1083 SpinLockRelease(&MyReplicationSlot->mutex);
1084
1085 ReplicationSlotsComputeRequiredXmin(false);
1086 ReplicationSlotsComputeRequiredLSN();
1087 }
1088 }
1089 else
1090 {
1091 SpinLockAcquire(&MyReplicationSlot->mutex);
1092 MyReplicationSlot->data.confirmed_flush = lsn;
1093 SpinLockRelease(&MyReplicationSlot->mutex);
1094 }
1095 }
1096