1 /*-------------------------------------------------------------------------
2 * logical.c
3 * PostgreSQL logical decoding coordination
4 *
5 * Copyright (c) 2012-2018, PostgreSQL Global Development Group
6 *
7 * IDENTIFICATION
8 * src/backend/replication/logical/logical.c
9 *
10 * NOTES
11 * This file coordinates interaction between the various modules that
12 * together provide logical decoding, primarily by providing so
13 * called LogicalDecodingContexts. The goal is to encapsulate most of the
14 * internal complexity for consumers of logical decoding, so they can
15 * create and consume a changestream with a low amount of code. Builtin
16 * consumers are the walsender and SQL SRF interface, but it's possible to
17 * add further ones without changing core code, e.g. to consume changes in
18 * a bgworker.
19 *
20 * The idea is that a consumer provides three callbacks, one to read WAL,
21 * one to prepare a data write, and a final one for actually writing since
22 * their implementation depends on the type of consumer. Check
23 * logicalfuncs.c for an example implementation of a fairly simple consumer
24 * and an implementation of a WAL reading callback that's suitable for
25 * simple consumers.
26 *-------------------------------------------------------------------------
27 */
28
29 #include "postgres.h"
30
31 #include "miscadmin.h"
32
33 #include "access/xact.h"
34 #include "access/xlog_internal.h"
35
36 #include "replication/decode.h"
37 #include "replication/logical.h"
38 #include "replication/reorderbuffer.h"
39 #include "replication/origin.h"
40 #include "replication/snapbuild.h"
41
42 #include "storage/proc.h"
43 #include "storage/procarray.h"
44
45 #include "utils/memutils.h"
46
47 /* data for errcontext callback */
48 typedef struct LogicalErrorCallbackState
49 {
50 LogicalDecodingContext *ctx;
51 const char *callback_name;
52 XLogRecPtr report_location;
53 } LogicalErrorCallbackState;
54
55 /* wrappers around output plugin callbacks */
56 static void output_plugin_error_callback(void *arg);
57 static void startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt,
58 bool is_init);
59 static void shutdown_cb_wrapper(LogicalDecodingContext *ctx);
60 static void begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn);
61 static void commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
62 XLogRecPtr commit_lsn);
63 static void change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
64 Relation relation, ReorderBufferChange *change);
65 static void truncate_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
66 int nrelations, Relation relations[], ReorderBufferChange *change);
67 static void message_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
68 XLogRecPtr message_lsn, bool transactional,
69 const char *prefix, Size message_size, const char *message);
70
71 static void LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin);
72
73 /*
74 * Make sure the current settings & environment are capable of doing logical
75 * decoding.
76 */
77 void
CheckLogicalDecodingRequirements(void)78 CheckLogicalDecodingRequirements(void)
79 {
80 CheckSlotRequirements();
81
82 /*
83 * NB: Adding a new requirement likely means that RestoreSlotFromDisk()
84 * needs the same check.
85 */
86
87 if (wal_level < WAL_LEVEL_LOGICAL)
88 ereport(ERROR,
89 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
90 errmsg("logical decoding requires wal_level >= logical")));
91
92 if (MyDatabaseId == InvalidOid)
93 ereport(ERROR,
94 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
95 errmsg("logical decoding requires a database connection")));
96
97 /* ----
98 * TODO: We got to change that someday soon...
99 *
100 * There's basically three things missing to allow this:
101 * 1) We need to be able to correctly and quickly identify the timeline a
102 * LSN belongs to
103 * 2) We need to force hot_standby_feedback to be enabled at all times so
104 * the primary cannot remove rows we need.
105 * 3) support dropping replication slots referring to a database, in
106 * dbase_redo. There can't be any active ones due to HS recovery
107 * conflicts, so that should be relatively easy.
108 * ----
109 */
110 if (RecoveryInProgress())
111 ereport(ERROR,
112 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
113 errmsg("logical decoding cannot be used while in recovery")));
114 }
115
116 /*
117 * Helper function for CreateInitialDecodingContext() and
118 * CreateDecodingContext() performing common tasks.
119 */
120 static LogicalDecodingContext *
StartupDecodingContext(List * output_plugin_options,XLogRecPtr start_lsn,TransactionId xmin_horizon,bool need_full_snapshot,bool fast_forward,XLogPageReadCB read_page,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)121 StartupDecodingContext(List *output_plugin_options,
122 XLogRecPtr start_lsn,
123 TransactionId xmin_horizon,
124 bool need_full_snapshot,
125 bool fast_forward,
126 XLogPageReadCB read_page,
127 LogicalOutputPluginWriterPrepareWrite prepare_write,
128 LogicalOutputPluginWriterWrite do_write,
129 LogicalOutputPluginWriterUpdateProgress update_progress)
130 {
131 ReplicationSlot *slot;
132 MemoryContext context,
133 old_context;
134 LogicalDecodingContext *ctx;
135
136 /* shorter lines... */
137 slot = MyReplicationSlot;
138
139 context = AllocSetContextCreate(CurrentMemoryContext,
140 "Logical decoding context",
141 ALLOCSET_DEFAULT_SIZES);
142 old_context = MemoryContextSwitchTo(context);
143 ctx = palloc0(sizeof(LogicalDecodingContext));
144
145 ctx->context = context;
146
147 /*
148 * (re-)load output plugins, so we detect a bad (removed) output plugin
149 * now.
150 */
151 if (!fast_forward)
152 LoadOutputPlugin(&ctx->callbacks, NameStr(slot->data.plugin));
153
154 /*
155 * Now that the slot's xmin has been set, we can announce ourselves as a
156 * logical decoding backend which doesn't need to be checked individually
157 * when computing the xmin horizon because the xmin is enforced via
158 * replication slots.
159 *
160 * We can only do so if we're outside of a transaction (i.e. the case when
161 * streaming changes via walsender), otherwise an already setup
162 * snapshot/xid would end up being ignored. That's not a particularly
163 * bothersome restriction since the SQL interface can't be used for
164 * streaming anyway.
165 */
166 if (!IsTransactionOrTransactionBlock())
167 {
168 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
169 MyPgXact->vacuumFlags |= PROC_IN_LOGICAL_DECODING;
170 LWLockRelease(ProcArrayLock);
171 }
172
173 ctx->slot = slot;
174
175 ctx->reader = XLogReaderAllocate(wal_segment_size, read_page, ctx);
176 if (!ctx->reader)
177 ereport(ERROR,
178 (errcode(ERRCODE_OUT_OF_MEMORY),
179 errmsg("out of memory")));
180
181 ctx->reader->private_data = ctx;
182
183 ctx->reorder = ReorderBufferAllocate();
184 ctx->snapshot_builder =
185 AllocateSnapshotBuilder(ctx->reorder, xmin_horizon, start_lsn,
186 need_full_snapshot);
187
188 ctx->reorder->private_data = ctx;
189
190 /* wrap output plugin callbacks, so we can add error context information */
191 ctx->reorder->begin = begin_cb_wrapper;
192 ctx->reorder->apply_change = change_cb_wrapper;
193 ctx->reorder->apply_truncate = truncate_cb_wrapper;
194 ctx->reorder->commit = commit_cb_wrapper;
195 ctx->reorder->message = message_cb_wrapper;
196
197 ctx->out = makeStringInfo();
198 ctx->prepare_write = prepare_write;
199 ctx->write = do_write;
200 ctx->update_progress = update_progress;
201
202 ctx->output_plugin_options = output_plugin_options;
203
204 ctx->fast_forward = fast_forward;
205
206 MemoryContextSwitchTo(old_context);
207
208 return ctx;
209 }
210
211 /*
212 * Create a new decoding context, for a new logical slot.
213 *
214 * plugin contains the name of the output plugin
215 * output_plugin_options contains options passed to the output plugin
216 * read_page, prepare_write, do_write, update_progress
217 * callbacks that have to be filled to perform the use-case dependent,
218 * actual, work.
219 *
220 * Needs to be called while in a memory context that's at least as long lived
221 * as the decoding context because further memory contexts will be created
222 * inside it.
223 *
224 * Returns an initialized decoding context after calling the output plugin's
225 * startup function.
226 */
227 LogicalDecodingContext *
CreateInitDecodingContext(char * plugin,List * output_plugin_options,bool need_full_snapshot,XLogPageReadCB read_page,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)228 CreateInitDecodingContext(char *plugin,
229 List *output_plugin_options,
230 bool need_full_snapshot,
231 XLogPageReadCB read_page,
232 LogicalOutputPluginWriterPrepareWrite prepare_write,
233 LogicalOutputPluginWriterWrite do_write,
234 LogicalOutputPluginWriterUpdateProgress update_progress)
235 {
236 TransactionId xmin_horizon = InvalidTransactionId;
237 ReplicationSlot *slot;
238 LogicalDecodingContext *ctx;
239 MemoryContext old_context;
240
241 /* shorter lines... */
242 slot = MyReplicationSlot;
243
244 /* first some sanity checks that are unlikely to be violated */
245 if (slot == NULL)
246 elog(ERROR, "cannot perform logical decoding without an acquired slot");
247
248 if (plugin == NULL)
249 elog(ERROR, "cannot initialize logical decoding without a specified plugin");
250
251 /* Make sure the passed slot is suitable. These are user facing errors. */
252 if (SlotIsPhysical(slot))
253 ereport(ERROR,
254 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
255 errmsg("cannot use physical replication slot for logical decoding")));
256
257 if (slot->data.database != MyDatabaseId)
258 ereport(ERROR,
259 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
260 errmsg("replication slot \"%s\" was not created in this database",
261 NameStr(slot->data.name))));
262
263 if (IsTransactionState() &&
264 GetTopTransactionIdIfAny() != InvalidTransactionId)
265 ereport(ERROR,
266 (errcode(ERRCODE_ACTIVE_SQL_TRANSACTION),
267 errmsg("cannot create logical replication slot in transaction that has performed writes")));
268
269 /* register output plugin name with slot */
270 SpinLockAcquire(&slot->mutex);
271 StrNCpy(NameStr(slot->data.plugin), plugin, NAMEDATALEN);
272 SpinLockRelease(&slot->mutex);
273
274 ReplicationSlotReserveWal();
275
276 /* ----
277 * This is a bit tricky: We need to determine a safe xmin horizon to start
278 * decoding from, to avoid starting from a running xacts record referring
279 * to xids whose rows have been vacuumed or pruned
280 * already. GetOldestSafeDecodingTransactionId() returns such a value, but
281 * without further interlock its return value might immediately be out of
282 * date.
283 *
284 * So we have to acquire the ProcArrayLock to prevent computation of new
285 * xmin horizons by other backends, get the safe decoding xid, and inform
286 * the slot machinery about the new limit. Once that's done the
287 * ProcArrayLock can be released as the slot machinery now is
288 * protecting against vacuum.
289 *
290 * Note that, temporarily, the data, not just the catalog, xmin has to be
291 * reserved if a data snapshot is to be exported. Otherwise the initial
292 * data snapshot created here is not guaranteed to be valid. After that
293 * the data xmin doesn't need to be managed anymore and the global xmin
294 * should be recomputed. As we are fine with losing the pegged data xmin
295 * after crash - no chance a snapshot would get exported anymore - we can
296 * get away with just setting the slot's
297 * effective_xmin. ReplicationSlotRelease will reset it again.
298 *
299 * ----
300 */
301 LWLockAcquire(ProcArrayLock, LW_EXCLUSIVE);
302
303 xmin_horizon = GetOldestSafeDecodingTransactionId(!need_full_snapshot);
304
305 SpinLockAcquire(&slot->mutex);
306 slot->effective_catalog_xmin = xmin_horizon;
307 slot->data.catalog_xmin = xmin_horizon;
308 if (need_full_snapshot)
309 slot->effective_xmin = xmin_horizon;
310 SpinLockRelease(&slot->mutex);
311
312 ReplicationSlotsComputeRequiredXmin(true);
313
314 LWLockRelease(ProcArrayLock);
315
316 ReplicationSlotMarkDirty();
317 ReplicationSlotSave();
318
319 ctx = StartupDecodingContext(NIL, InvalidXLogRecPtr, xmin_horizon,
320 need_full_snapshot, false,
321 read_page, prepare_write, do_write,
322 update_progress);
323
324 /* call output plugin initialization callback */
325 old_context = MemoryContextSwitchTo(ctx->context);
326 if (ctx->callbacks.startup_cb != NULL)
327 startup_cb_wrapper(ctx, &ctx->options, true);
328 MemoryContextSwitchTo(old_context);
329
330 ctx->reorder->output_rewrites = ctx->options.receive_rewrites;
331
332 return ctx;
333 }
334
335 /*
336 * Create a new decoding context, for a logical slot that has previously been
337 * used already.
338 *
339 * start_lsn
340 * The LSN at which to start decoding. If InvalidXLogRecPtr, restart
341 * from the slot's confirmed_flush; otherwise, start from the specified
342 * location (but move it forwards to confirmed_flush if it's older than
343 * that, see below).
344 *
345 * output_plugin_options
346 * options passed to the output plugin.
347 *
348 * fast_forward
349 * bypass the generation of logical changes.
350 *
351 * read_page, prepare_write, do_write, update_progress
352 * callbacks that have to be filled to perform the use-case dependent,
353 * actual work.
354 *
355 * Needs to be called while in a memory context that's at least as long lived
356 * as the decoding context because further memory contexts will be created
357 * inside it.
358 *
359 * Returns an initialized decoding context after calling the output plugin's
360 * startup function.
361 */
362 LogicalDecodingContext *
CreateDecodingContext(XLogRecPtr start_lsn,List * output_plugin_options,bool fast_forward,XLogPageReadCB read_page,LogicalOutputPluginWriterPrepareWrite prepare_write,LogicalOutputPluginWriterWrite do_write,LogicalOutputPluginWriterUpdateProgress update_progress)363 CreateDecodingContext(XLogRecPtr start_lsn,
364 List *output_plugin_options,
365 bool fast_forward,
366 XLogPageReadCB read_page,
367 LogicalOutputPluginWriterPrepareWrite prepare_write,
368 LogicalOutputPluginWriterWrite do_write,
369 LogicalOutputPluginWriterUpdateProgress update_progress)
370 {
371 LogicalDecodingContext *ctx;
372 ReplicationSlot *slot;
373 MemoryContext old_context;
374
375 /* shorter lines... */
376 slot = MyReplicationSlot;
377
378 /* first some sanity checks that are unlikely to be violated */
379 if (slot == NULL)
380 elog(ERROR, "cannot perform logical decoding without an acquired slot");
381
382 /* make sure the passed slot is suitable, these are user facing errors */
383 if (SlotIsPhysical(slot))
384 ereport(ERROR,
385 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
386 (errmsg("cannot use physical replication slot for logical decoding"))));
387
388 if (slot->data.database != MyDatabaseId)
389 ereport(ERROR,
390 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
391 (errmsg("replication slot \"%s\" was not created in this database",
392 NameStr(slot->data.name)))));
393
394 if (start_lsn == InvalidXLogRecPtr)
395 {
396 /* continue from last position */
397 start_lsn = slot->data.confirmed_flush;
398 }
399 else if (start_lsn < slot->data.confirmed_flush)
400 {
401 /*
402 * It might seem like we should error out in this case, but it's
403 * pretty common for a client to acknowledge a LSN it doesn't have to
404 * do anything for, and thus didn't store persistently, because the
405 * xlog records didn't result in anything relevant for logical
406 * decoding. Clients have to be able to do that to support synchronous
407 * replication.
408 */
409 elog(DEBUG1, "cannot stream from %X/%X, minimum is %X/%X, forwarding",
410 (uint32) (start_lsn >> 32), (uint32) start_lsn,
411 (uint32) (slot->data.confirmed_flush >> 32),
412 (uint32) slot->data.confirmed_flush);
413
414 start_lsn = slot->data.confirmed_flush;
415 }
416
417 ctx = StartupDecodingContext(output_plugin_options,
418 start_lsn, InvalidTransactionId, false,
419 fast_forward, read_page, prepare_write,
420 do_write, update_progress);
421
422 /* call output plugin initialization callback */
423 old_context = MemoryContextSwitchTo(ctx->context);
424 if (ctx->callbacks.startup_cb != NULL)
425 startup_cb_wrapper(ctx, &ctx->options, false);
426 MemoryContextSwitchTo(old_context);
427
428 ctx->reorder->output_rewrites = ctx->options.receive_rewrites;
429
430 ereport(LOG,
431 (errmsg("starting logical decoding for slot \"%s\"",
432 NameStr(slot->data.name)),
433 errdetail("Streaming transactions committing after %X/%X, reading WAL from %X/%X.",
434 (uint32) (slot->data.confirmed_flush >> 32),
435 (uint32) slot->data.confirmed_flush,
436 (uint32) (slot->data.restart_lsn >> 32),
437 (uint32) slot->data.restart_lsn)));
438
439 return ctx;
440 }
441
442 /*
443 * Returns true if a consistent initial decoding snapshot has been built.
444 */
445 bool
DecodingContextReady(LogicalDecodingContext * ctx)446 DecodingContextReady(LogicalDecodingContext *ctx)
447 {
448 return SnapBuildCurrentState(ctx->snapshot_builder) == SNAPBUILD_CONSISTENT;
449 }
450
451 /*
452 * Read from the decoding slot, until it is ready to start extracting changes.
453 */
454 void
DecodingContextFindStartpoint(LogicalDecodingContext * ctx)455 DecodingContextFindStartpoint(LogicalDecodingContext *ctx)
456 {
457 XLogRecPtr startptr;
458 ReplicationSlot *slot = ctx->slot;
459
460 /* Initialize from where to start reading WAL. */
461 startptr = slot->data.restart_lsn;
462
463 elog(DEBUG1, "searching for logical decoding starting point, starting at %X/%X",
464 (uint32) (slot->data.restart_lsn >> 32),
465 (uint32) slot->data.restart_lsn);
466
467 /* Wait for a consistent starting point */
468 for (;;)
469 {
470 XLogRecord *record;
471 char *err = NULL;
472
473 /* the read_page callback waits for new WAL */
474 record = XLogReadRecord(ctx->reader, startptr, &err);
475 if (err)
476 elog(ERROR, "%s", err);
477 if (!record)
478 elog(ERROR, "no record found"); /* shouldn't happen */
479
480 startptr = InvalidXLogRecPtr;
481
482 LogicalDecodingProcessRecord(ctx, ctx->reader);
483
484 /* only continue till we found a consistent spot */
485 if (DecodingContextReady(ctx))
486 break;
487
488 CHECK_FOR_INTERRUPTS();
489 }
490
491 SpinLockAcquire(&slot->mutex);
492 slot->data.confirmed_flush = ctx->reader->EndRecPtr;
493 SpinLockRelease(&slot->mutex);
494 }
495
496 /*
497 * Free a previously allocated decoding context, invoking the shutdown
498 * callback if necessary.
499 */
500 void
FreeDecodingContext(LogicalDecodingContext * ctx)501 FreeDecodingContext(LogicalDecodingContext *ctx)
502 {
503 if (ctx->callbacks.shutdown_cb != NULL)
504 shutdown_cb_wrapper(ctx);
505
506 ReorderBufferFree(ctx->reorder);
507 FreeSnapshotBuilder(ctx->snapshot_builder);
508 XLogReaderFree(ctx->reader);
509 MemoryContextDelete(ctx->context);
510 }
511
512 /*
513 * Prepare a write using the context's output routine.
514 */
515 void
OutputPluginPrepareWrite(struct LogicalDecodingContext * ctx,bool last_write)516 OutputPluginPrepareWrite(struct LogicalDecodingContext *ctx, bool last_write)
517 {
518 if (!ctx->accept_writes)
519 elog(ERROR, "writes are only accepted in commit, begin and change callbacks");
520
521 ctx->prepare_write(ctx, ctx->write_location, ctx->write_xid, last_write);
522 ctx->prepared_write = true;
523 }
524
525 /*
526 * Perform a write using the context's output routine.
527 */
528 void
OutputPluginWrite(struct LogicalDecodingContext * ctx,bool last_write)529 OutputPluginWrite(struct LogicalDecodingContext *ctx, bool last_write)
530 {
531 if (!ctx->prepared_write)
532 elog(ERROR, "OutputPluginPrepareWrite needs to be called before OutputPluginWrite");
533
534 ctx->write(ctx, ctx->write_location, ctx->write_xid, last_write);
535 ctx->prepared_write = false;
536 }
537
538 /*
539 * Update progress tracking (if supported).
540 */
541 void
OutputPluginUpdateProgress(struct LogicalDecodingContext * ctx)542 OutputPluginUpdateProgress(struct LogicalDecodingContext *ctx)
543 {
544 if (!ctx->update_progress)
545 return;
546
547 ctx->update_progress(ctx, ctx->write_location, ctx->write_xid);
548 }
549
550 /*
551 * Load the output plugin, lookup its output plugin init function, and check
552 * that it provides the required callbacks.
553 */
554 static void
LoadOutputPlugin(OutputPluginCallbacks * callbacks,char * plugin)555 LoadOutputPlugin(OutputPluginCallbacks *callbacks, char *plugin)
556 {
557 LogicalOutputPluginInit plugin_init;
558
559 plugin_init = (LogicalOutputPluginInit)
560 load_external_function(plugin, "_PG_output_plugin_init", false, NULL);
561
562 if (plugin_init == NULL)
563 elog(ERROR, "output plugins have to declare the _PG_output_plugin_init symbol");
564
565 /* ask the output plugin to fill the callback struct */
566 plugin_init(callbacks);
567
568 if (callbacks->begin_cb == NULL)
569 elog(ERROR, "output plugins have to register a begin callback");
570 if (callbacks->change_cb == NULL)
571 elog(ERROR, "output plugins have to register a change callback");
572 if (callbacks->commit_cb == NULL)
573 elog(ERROR, "output plugins have to register a commit callback");
574 }
575
576 static void
output_plugin_error_callback(void * arg)577 output_plugin_error_callback(void *arg)
578 {
579 LogicalErrorCallbackState *state = (LogicalErrorCallbackState *) arg;
580
581 /* not all callbacks have an associated LSN */
582 if (state->report_location != InvalidXLogRecPtr)
583 errcontext("slot \"%s\", output plugin \"%s\", in the %s callback, associated LSN %X/%X",
584 NameStr(state->ctx->slot->data.name),
585 NameStr(state->ctx->slot->data.plugin),
586 state->callback_name,
587 (uint32) (state->report_location >> 32),
588 (uint32) state->report_location);
589 else
590 errcontext("slot \"%s\", output plugin \"%s\", in the %s callback",
591 NameStr(state->ctx->slot->data.name),
592 NameStr(state->ctx->slot->data.plugin),
593 state->callback_name);
594 }
595
596 static void
startup_cb_wrapper(LogicalDecodingContext * ctx,OutputPluginOptions * opt,bool is_init)597 startup_cb_wrapper(LogicalDecodingContext *ctx, OutputPluginOptions *opt, bool is_init)
598 {
599 LogicalErrorCallbackState state;
600 ErrorContextCallback errcallback;
601
602 Assert(!ctx->fast_forward);
603
604 /* Push callback + info on the error context stack */
605 state.ctx = ctx;
606 state.callback_name = "startup";
607 state.report_location = InvalidXLogRecPtr;
608 errcallback.callback = output_plugin_error_callback;
609 errcallback.arg = (void *) &state;
610 errcallback.previous = error_context_stack;
611 error_context_stack = &errcallback;
612
613 /* set output state */
614 ctx->accept_writes = false;
615
616 /* do the actual work: call callback */
617 ctx->callbacks.startup_cb(ctx, opt, is_init);
618
619 /* Pop the error context stack */
620 error_context_stack = errcallback.previous;
621 }
622
623 static void
shutdown_cb_wrapper(LogicalDecodingContext * ctx)624 shutdown_cb_wrapper(LogicalDecodingContext *ctx)
625 {
626 LogicalErrorCallbackState state;
627 ErrorContextCallback errcallback;
628
629 Assert(!ctx->fast_forward);
630
631 /* Push callback + info on the error context stack */
632 state.ctx = ctx;
633 state.callback_name = "shutdown";
634 state.report_location = InvalidXLogRecPtr;
635 errcallback.callback = output_plugin_error_callback;
636 errcallback.arg = (void *) &state;
637 errcallback.previous = error_context_stack;
638 error_context_stack = &errcallback;
639
640 /* set output state */
641 ctx->accept_writes = false;
642
643 /* do the actual work: call callback */
644 ctx->callbacks.shutdown_cb(ctx);
645
646 /* Pop the error context stack */
647 error_context_stack = errcallback.previous;
648 }
649
650
651 /*
652 * Callbacks for ReorderBuffer which add in some more information and then call
653 * output_plugin.h plugins.
654 */
655 static void
begin_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn)656 begin_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn)
657 {
658 LogicalDecodingContext *ctx = cache->private_data;
659 LogicalErrorCallbackState state;
660 ErrorContextCallback errcallback;
661
662 Assert(!ctx->fast_forward);
663
664 /* Push callback + info on the error context stack */
665 state.ctx = ctx;
666 state.callback_name = "begin";
667 state.report_location = txn->first_lsn;
668 errcallback.callback = output_plugin_error_callback;
669 errcallback.arg = (void *) &state;
670 errcallback.previous = error_context_stack;
671 error_context_stack = &errcallback;
672
673 /* set output state */
674 ctx->accept_writes = true;
675 ctx->write_xid = txn->xid;
676 ctx->write_location = txn->first_lsn;
677
678 /* do the actual work: call callback */
679 ctx->callbacks.begin_cb(ctx, txn);
680
681 /* Pop the error context stack */
682 error_context_stack = errcallback.previous;
683 }
684
685 static void
commit_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,XLogRecPtr commit_lsn)686 commit_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
687 XLogRecPtr commit_lsn)
688 {
689 LogicalDecodingContext *ctx = cache->private_data;
690 LogicalErrorCallbackState state;
691 ErrorContextCallback errcallback;
692
693 Assert(!ctx->fast_forward);
694
695 /* Push callback + info on the error context stack */
696 state.ctx = ctx;
697 state.callback_name = "commit";
698 state.report_location = txn->final_lsn; /* beginning of commit record */
699 errcallback.callback = output_plugin_error_callback;
700 errcallback.arg = (void *) &state;
701 errcallback.previous = error_context_stack;
702 error_context_stack = &errcallback;
703
704 /* set output state */
705 ctx->accept_writes = true;
706 ctx->write_xid = txn->xid;
707 ctx->write_location = txn->end_lsn; /* points to the end of the record */
708
709 /* do the actual work: call callback */
710 ctx->callbacks.commit_cb(ctx, txn, commit_lsn);
711
712 /* Pop the error context stack */
713 error_context_stack = errcallback.previous;
714 }
715
716 static void
change_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,Relation relation,ReorderBufferChange * change)717 change_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
718 Relation relation, ReorderBufferChange *change)
719 {
720 LogicalDecodingContext *ctx = cache->private_data;
721 LogicalErrorCallbackState state;
722 ErrorContextCallback errcallback;
723
724 Assert(!ctx->fast_forward);
725
726 /* Push callback + info on the error context stack */
727 state.ctx = ctx;
728 state.callback_name = "change";
729 state.report_location = change->lsn;
730 errcallback.callback = output_plugin_error_callback;
731 errcallback.arg = (void *) &state;
732 errcallback.previous = error_context_stack;
733 error_context_stack = &errcallback;
734
735 /* set output state */
736 ctx->accept_writes = true;
737 ctx->write_xid = txn->xid;
738
739 /*
740 * report this change's lsn so replies from clients can give an up2date
741 * answer. This won't ever be enough (and shouldn't be!) to confirm
742 * receipt of this transaction, but it might allow another transaction's
743 * commit to be confirmed with one message.
744 */
745 ctx->write_location = change->lsn;
746
747 ctx->callbacks.change_cb(ctx, txn, relation, change);
748
749 /* Pop the error context stack */
750 error_context_stack = errcallback.previous;
751 }
752
753 static void
truncate_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,int nrelations,Relation relations[],ReorderBufferChange * change)754 truncate_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
755 int nrelations, Relation relations[], ReorderBufferChange *change)
756 {
757 LogicalDecodingContext *ctx = cache->private_data;
758 LogicalErrorCallbackState state;
759 ErrorContextCallback errcallback;
760
761 Assert(!ctx->fast_forward);
762
763 if (!ctx->callbacks.truncate_cb)
764 return;
765
766 /* Push callback + info on the error context stack */
767 state.ctx = ctx;
768 state.callback_name = "truncate";
769 state.report_location = change->lsn;
770 errcallback.callback = output_plugin_error_callback;
771 errcallback.arg = (void *) &state;
772 errcallback.previous = error_context_stack;
773 error_context_stack = &errcallback;
774
775 /* set output state */
776 ctx->accept_writes = true;
777 ctx->write_xid = txn->xid;
778
779 /*
780 * report this change's lsn so replies from clients can give an up2date
781 * answer. This won't ever be enough (and shouldn't be!) to confirm
782 * receipt of this transaction, but it might allow another transaction's
783 * commit to be confirmed with one message.
784 */
785 ctx->write_location = change->lsn;
786
787 ctx->callbacks.truncate_cb(ctx, txn, nrelations, relations, change);
788
789 /* Pop the error context stack */
790 error_context_stack = errcallback.previous;
791 }
792
793 bool
filter_by_origin_cb_wrapper(LogicalDecodingContext * ctx,RepOriginId origin_id)794 filter_by_origin_cb_wrapper(LogicalDecodingContext *ctx, RepOriginId origin_id)
795 {
796 LogicalErrorCallbackState state;
797 ErrorContextCallback errcallback;
798 bool ret;
799
800 Assert(!ctx->fast_forward);
801
802 /* Push callback + info on the error context stack */
803 state.ctx = ctx;
804 state.callback_name = "filter_by_origin";
805 state.report_location = InvalidXLogRecPtr;
806 errcallback.callback = output_plugin_error_callback;
807 errcallback.arg = (void *) &state;
808 errcallback.previous = error_context_stack;
809 error_context_stack = &errcallback;
810
811 /* set output state */
812 ctx->accept_writes = false;
813
814 /* do the actual work: call callback */
815 ret = ctx->callbacks.filter_by_origin_cb(ctx, origin_id);
816
817 /* Pop the error context stack */
818 error_context_stack = errcallback.previous;
819
820 return ret;
821 }
822
823 static void
message_cb_wrapper(ReorderBuffer * cache,ReorderBufferTXN * txn,XLogRecPtr message_lsn,bool transactional,const char * prefix,Size message_size,const char * message)824 message_cb_wrapper(ReorderBuffer *cache, ReorderBufferTXN *txn,
825 XLogRecPtr message_lsn, bool transactional,
826 const char *prefix, Size message_size, const char *message)
827 {
828 LogicalDecodingContext *ctx = cache->private_data;
829 LogicalErrorCallbackState state;
830 ErrorContextCallback errcallback;
831
832 Assert(!ctx->fast_forward);
833
834 if (ctx->callbacks.message_cb == NULL)
835 return;
836
837 /* Push callback + info on the error context stack */
838 state.ctx = ctx;
839 state.callback_name = "message";
840 state.report_location = message_lsn;
841 errcallback.callback = output_plugin_error_callback;
842 errcallback.arg = (void *) &state;
843 errcallback.previous = error_context_stack;
844 error_context_stack = &errcallback;
845
846 /* set output state */
847 ctx->accept_writes = true;
848 ctx->write_xid = txn != NULL ? txn->xid : InvalidTransactionId;
849 ctx->write_location = message_lsn;
850
851 /* do the actual work: call callback */
852 ctx->callbacks.message_cb(ctx, txn, message_lsn, transactional, prefix,
853 message_size, message);
854
855 /* Pop the error context stack */
856 error_context_stack = errcallback.previous;
857 }
858
859 /*
860 * Set the required catalog xmin horizon for historic snapshots in the current
861 * replication slot.
862 *
863 * Note that in the most cases, we won't be able to immediately use the xmin
864 * to increase the xmin horizon: we need to wait till the client has confirmed
865 * receiving current_lsn with LogicalConfirmReceivedLocation().
866 */
867 void
LogicalIncreaseXminForSlot(XLogRecPtr current_lsn,TransactionId xmin)868 LogicalIncreaseXminForSlot(XLogRecPtr current_lsn, TransactionId xmin)
869 {
870 bool updated_xmin = false;
871 ReplicationSlot *slot;
872
873 slot = MyReplicationSlot;
874
875 Assert(slot != NULL);
876
877 SpinLockAcquire(&slot->mutex);
878
879 /*
880 * don't overwrite if we already have a newer xmin. This can happen if we
881 * restart decoding in a slot.
882 */
883 if (TransactionIdPrecedesOrEquals(xmin, slot->data.catalog_xmin))
884 {
885 }
886
887 /*
888 * If the client has already confirmed up to this lsn, we directly can
889 * mark this as accepted. This can happen if we restart decoding in a
890 * slot.
891 */
892 else if (current_lsn <= slot->data.confirmed_flush)
893 {
894 slot->candidate_catalog_xmin = xmin;
895 slot->candidate_xmin_lsn = current_lsn;
896
897 /* our candidate can directly be used */
898 updated_xmin = true;
899 }
900
901 /*
902 * Only increase if the previous values have been applied, otherwise we
903 * might never end up updating if the receiver acks too slowly.
904 */
905 else if (slot->candidate_xmin_lsn == InvalidXLogRecPtr)
906 {
907 slot->candidate_catalog_xmin = xmin;
908 slot->candidate_xmin_lsn = current_lsn;
909 }
910 SpinLockRelease(&slot->mutex);
911
912 /* candidate already valid with the current flush position, apply */
913 if (updated_xmin)
914 LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
915 }
916
917 /*
918 * Mark the minimal LSN (restart_lsn) we need to read to replay all
919 * transactions that have not yet committed at current_lsn.
920 *
921 * Just like IncreaseRestartDecodingForSlot this only takes effect when the
922 * client has confirmed to have received current_lsn.
923 */
924 void
LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn,XLogRecPtr restart_lsn)925 LogicalIncreaseRestartDecodingForSlot(XLogRecPtr current_lsn, XLogRecPtr restart_lsn)
926 {
927 bool updated_lsn = false;
928 ReplicationSlot *slot;
929
930 slot = MyReplicationSlot;
931
932 Assert(slot != NULL);
933 Assert(restart_lsn != InvalidXLogRecPtr);
934 Assert(current_lsn != InvalidXLogRecPtr);
935
936 SpinLockAcquire(&slot->mutex);
937
938 /* don't overwrite if have a newer restart lsn */
939 if (restart_lsn <= slot->data.restart_lsn)
940 {
941 }
942
943 /*
944 * We might have already flushed far enough to directly accept this lsn,
945 * in this case there is no need to check for existing candidate LSNs
946 */
947 else if (current_lsn <= slot->data.confirmed_flush)
948 {
949 slot->candidate_restart_valid = current_lsn;
950 slot->candidate_restart_lsn = restart_lsn;
951
952 /* our candidate can directly be used */
953 updated_lsn = true;
954 }
955
956 /*
957 * Only increase if the previous values have been applied, otherwise we
958 * might never end up updating if the receiver acks too slowly. A missed
959 * value here will just cause some extra effort after reconnecting.
960 */
961 if (slot->candidate_restart_valid == InvalidXLogRecPtr)
962 {
963 slot->candidate_restart_valid = current_lsn;
964 slot->candidate_restart_lsn = restart_lsn;
965 SpinLockRelease(&slot->mutex);
966
967 elog(DEBUG1, "got new restart lsn %X/%X at %X/%X",
968 (uint32) (restart_lsn >> 32), (uint32) restart_lsn,
969 (uint32) (current_lsn >> 32), (uint32) current_lsn);
970 }
971 else
972 {
973 XLogRecPtr candidate_restart_lsn;
974 XLogRecPtr candidate_restart_valid;
975 XLogRecPtr confirmed_flush;
976
977 candidate_restart_lsn = slot->candidate_restart_lsn;
978 candidate_restart_valid = slot->candidate_restart_valid;
979 confirmed_flush = slot->data.confirmed_flush;
980 SpinLockRelease(&slot->mutex);
981
982 elog(DEBUG1, "failed to increase restart lsn: proposed %X/%X, after %X/%X, current candidate %X/%X, current after %X/%X, flushed up to %X/%X",
983 (uint32) (restart_lsn >> 32), (uint32) restart_lsn,
984 (uint32) (current_lsn >> 32), (uint32) current_lsn,
985 (uint32) (candidate_restart_lsn >> 32),
986 (uint32) candidate_restart_lsn,
987 (uint32) (candidate_restart_valid >> 32),
988 (uint32) candidate_restart_valid,
989 (uint32) (confirmed_flush >> 32),
990 (uint32) confirmed_flush);
991 }
992
993 /* candidates are already valid with the current flush position, apply */
994 if (updated_lsn)
995 LogicalConfirmReceivedLocation(slot->data.confirmed_flush);
996 }
997
998 /*
999 * Handle a consumer's confirmation having received all changes up to lsn.
1000 */
1001 void
LogicalConfirmReceivedLocation(XLogRecPtr lsn)1002 LogicalConfirmReceivedLocation(XLogRecPtr lsn)
1003 {
1004 Assert(lsn != InvalidXLogRecPtr);
1005
1006 /* Do an unlocked check for candidate_lsn first. */
1007 if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr ||
1008 MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr)
1009 {
1010 bool updated_xmin = false;
1011 bool updated_restart = false;
1012
1013 SpinLockAcquire(&MyReplicationSlot->mutex);
1014
1015 MyReplicationSlot->data.confirmed_flush = lsn;
1016
1017 /* if we're past the location required for bumping xmin, do so */
1018 if (MyReplicationSlot->candidate_xmin_lsn != InvalidXLogRecPtr &&
1019 MyReplicationSlot->candidate_xmin_lsn <= lsn)
1020 {
1021 /*
1022 * We have to write the changed xmin to disk *before* we change
1023 * the in-memory value, otherwise after a crash we wouldn't know
1024 * that some catalog tuples might have been removed already.
1025 *
1026 * Ensure that by first writing to ->xmin and only update
1027 * ->effective_xmin once the new state is synced to disk. After a
1028 * crash ->effective_xmin is set to ->xmin.
1029 */
1030 if (TransactionIdIsValid(MyReplicationSlot->candidate_catalog_xmin) &&
1031 MyReplicationSlot->data.catalog_xmin != MyReplicationSlot->candidate_catalog_xmin)
1032 {
1033 MyReplicationSlot->data.catalog_xmin = MyReplicationSlot->candidate_catalog_xmin;
1034 MyReplicationSlot->candidate_catalog_xmin = InvalidTransactionId;
1035 MyReplicationSlot->candidate_xmin_lsn = InvalidXLogRecPtr;
1036 updated_xmin = true;
1037 }
1038 }
1039
1040 if (MyReplicationSlot->candidate_restart_valid != InvalidXLogRecPtr &&
1041 MyReplicationSlot->candidate_restart_valid <= lsn)
1042 {
1043 Assert(MyReplicationSlot->candidate_restart_lsn != InvalidXLogRecPtr);
1044
1045 MyReplicationSlot->data.restart_lsn = MyReplicationSlot->candidate_restart_lsn;
1046 MyReplicationSlot->candidate_restart_lsn = InvalidXLogRecPtr;
1047 MyReplicationSlot->candidate_restart_valid = InvalidXLogRecPtr;
1048 updated_restart = true;
1049 }
1050
1051 SpinLockRelease(&MyReplicationSlot->mutex);
1052
1053 /* first write new xmin to disk, so we know what's up after a crash */
1054 if (updated_xmin || updated_restart)
1055 {
1056 ReplicationSlotMarkDirty();
1057 ReplicationSlotSave();
1058 elog(DEBUG1, "updated xmin: %u restart: %u", updated_xmin, updated_restart);
1059 }
1060
1061 /*
1062 * Now the new xmin is safely on disk, we can let the global value
1063 * advance. We do not take ProcArrayLock or similar since we only
1064 * advance xmin here and there's not much harm done by a concurrent
1065 * computation missing that.
1066 */
1067 if (updated_xmin)
1068 {
1069 SpinLockAcquire(&MyReplicationSlot->mutex);
1070 MyReplicationSlot->effective_catalog_xmin = MyReplicationSlot->data.catalog_xmin;
1071 SpinLockRelease(&MyReplicationSlot->mutex);
1072
1073 ReplicationSlotsComputeRequiredXmin(false);
1074 ReplicationSlotsComputeRequiredLSN();
1075 }
1076 }
1077 else
1078 {
1079 SpinLockAcquire(&MyReplicationSlot->mutex);
1080 MyReplicationSlot->data.confirmed_flush = lsn;
1081 SpinLockRelease(&MyReplicationSlot->mutex);
1082 }
1083 }
1084