1 /*--------------------------------------------------------------------
2 * bgworker.c
3 * POSTGRES pluggable background workers implementation
4 *
5 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
6 *
7 * IDENTIFICATION
8 * src/backend/postmaster/bgworker.c
9 *
10 *-------------------------------------------------------------------------
11 */
12
13 #include "postgres.h"
14
15 #include <unistd.h>
16
17 #include "libpq/pqsignal.h"
18 #include "access/parallel.h"
19 #include "miscadmin.h"
20 #include "pgstat.h"
21 #include "port/atomics.h"
22 #include "postmaster/bgworker_internals.h"
23 #include "postmaster/postmaster.h"
24 #include "replication/logicallauncher.h"
25 #include "replication/logicalworker.h"
26 #include "storage/dsm.h"
27 #include "storage/ipc.h"
28 #include "storage/latch.h"
29 #include "storage/lwlock.h"
30 #include "storage/pg_shmem.h"
31 #include "storage/pmsignal.h"
32 #include "storage/proc.h"
33 #include "storage/procsignal.h"
34 #include "storage/shmem.h"
35 #include "tcop/tcopprot.h"
36 #include "utils/ascii.h"
37 #include "utils/ps_status.h"
38 #include "utils/timeout.h"
39
40 /*
41 * The postmaster's list of registered background workers, in private memory.
42 */
43 slist_head BackgroundWorkerList = SLIST_STATIC_INIT(BackgroundWorkerList);
44
45 /*
46 * BackgroundWorkerSlots exist in shared memory and can be accessed (via
47 * the BackgroundWorkerArray) by both the postmaster and by regular backends.
48 * However, the postmaster cannot take locks, even spinlocks, because this
49 * might allow it to crash or become wedged if shared memory gets corrupted.
50 * Such an outcome is intolerable. Therefore, we need a lockless protocol
51 * for coordinating access to this data.
52 *
53 * The 'in_use' flag is used to hand off responsibility for the slot between
54 * the postmaster and the rest of the system. When 'in_use' is false,
55 * the postmaster will ignore the slot entirely, except for the 'in_use' flag
56 * itself, which it may read. In this state, regular backends may modify the
57 * slot. Once a backend sets 'in_use' to true, the slot becomes the
58 * responsibility of the postmaster. Regular backends may no longer modify it,
59 * but the postmaster may examine it. Thus, a backend initializing a slot
60 * must fully initialize the slot - and insert a write memory barrier - before
61 * marking it as in use.
62 *
63 * As an exception, however, even when the slot is in use, regular backends
64 * may set the 'terminate' flag for a slot, telling the postmaster not
65 * to restart it. Once the background worker is no longer running, the slot
66 * will be released for reuse.
67 *
68 * In addition to coordinating with the postmaster, backends modifying this
69 * data structure must coordinate with each other. Since they can take locks,
70 * this is straightforward: any backend wishing to manipulate a slot must
71 * take BackgroundWorkerLock in exclusive mode. Backends wishing to read
72 * data that might get concurrently modified by other backends should take
73 * this lock in shared mode. No matter what, backends reading this data
74 * structure must be able to tolerate concurrent modifications by the
75 * postmaster.
76 */
77 typedef struct BackgroundWorkerSlot
78 {
79 bool in_use;
80 bool terminate;
81 pid_t pid; /* InvalidPid = not started yet; 0 = dead */
82 uint64 generation; /* incremented when slot is recycled */
83 BackgroundWorker worker;
84 } BackgroundWorkerSlot;
85
86 /*
87 * In order to limit the total number of parallel workers (according to
88 * max_parallel_workers GUC), we maintain the number of active parallel
89 * workers. Since the postmaster cannot take locks, two variables are used for
90 * this purpose: the number of registered parallel workers (modified by the
91 * backends, protected by BackgroundWorkerLock) and the number of terminated
92 * parallel workers (modified only by the postmaster, lockless). The active
93 * number of parallel workers is the number of registered workers minus the
94 * terminated ones. These counters can of course overflow, but it's not
95 * important here since the subtraction will still give the right number.
96 */
97 typedef struct BackgroundWorkerArray
98 {
99 int total_slots;
100 uint32 parallel_register_count;
101 uint32 parallel_terminate_count;
102 BackgroundWorkerSlot slot[FLEXIBLE_ARRAY_MEMBER];
103 } BackgroundWorkerArray;
104
105 struct BackgroundWorkerHandle
106 {
107 int slot;
108 uint64 generation;
109 };
110
111 static BackgroundWorkerArray *BackgroundWorkerData;
112
113 /*
114 * List of internal background worker entry points. We need this for
115 * reasons explained in LookupBackgroundWorkerFunction(), below.
116 */
117 static const struct
118 {
119 const char *fn_name;
120 bgworker_main_type fn_addr;
121 } InternalBGWorkers[] =
122
123 {
124 {
125 "ParallelWorkerMain", ParallelWorkerMain
126 },
127 {
128 "ApplyLauncherMain", ApplyLauncherMain
129 },
130 {
131 "ApplyWorkerMain", ApplyWorkerMain
132 }
133 };
134
135 /* Private functions. */
136 static bgworker_main_type LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname);
137
138
139 /*
140 * Calculate shared memory needed.
141 */
142 Size
BackgroundWorkerShmemSize(void)143 BackgroundWorkerShmemSize(void)
144 {
145 Size size;
146
147 /* Array of workers is variably sized. */
148 size = offsetof(BackgroundWorkerArray, slot);
149 size = add_size(size, mul_size(max_worker_processes,
150 sizeof(BackgroundWorkerSlot)));
151
152 return size;
153 }
154
155 /*
156 * Initialize shared memory.
157 */
158 void
BackgroundWorkerShmemInit(void)159 BackgroundWorkerShmemInit(void)
160 {
161 bool found;
162
163 BackgroundWorkerData = ShmemInitStruct("Background Worker Data",
164 BackgroundWorkerShmemSize(),
165 &found);
166 if (!IsUnderPostmaster)
167 {
168 slist_iter siter;
169 int slotno = 0;
170
171 BackgroundWorkerData->total_slots = max_worker_processes;
172 BackgroundWorkerData->parallel_register_count = 0;
173 BackgroundWorkerData->parallel_terminate_count = 0;
174
175 /*
176 * Copy contents of worker list into shared memory. Record the shared
177 * memory slot assigned to each worker. This ensures a 1-to-1
178 * correspondence between the postmaster's private list and the array
179 * in shared memory.
180 */
181 slist_foreach(siter, &BackgroundWorkerList)
182 {
183 BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
184 RegisteredBgWorker *rw;
185
186 rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
187 Assert(slotno < max_worker_processes);
188 slot->in_use = true;
189 slot->terminate = false;
190 slot->pid = InvalidPid;
191 slot->generation = 0;
192 rw->rw_shmem_slot = slotno;
193 rw->rw_worker.bgw_notify_pid = 0; /* might be reinit after crash */
194 memcpy(&slot->worker, &rw->rw_worker, sizeof(BackgroundWorker));
195 ++slotno;
196 }
197
198 /*
199 * Mark any remaining slots as not in use.
200 */
201 while (slotno < max_worker_processes)
202 {
203 BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
204
205 slot->in_use = false;
206 ++slotno;
207 }
208 }
209 else
210 Assert(found);
211 }
212
213 /*
214 * Search the postmaster's backend-private list of RegisteredBgWorker objects
215 * for the one that maps to the given slot number.
216 */
217 static RegisteredBgWorker *
FindRegisteredWorkerBySlotNumber(int slotno)218 FindRegisteredWorkerBySlotNumber(int slotno)
219 {
220 slist_iter siter;
221
222 slist_foreach(siter, &BackgroundWorkerList)
223 {
224 RegisteredBgWorker *rw;
225
226 rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
227 if (rw->rw_shmem_slot == slotno)
228 return rw;
229 }
230
231 return NULL;
232 }
233
234 /*
235 * Notice changes to shared memory made by other backends.
236 * Accept new worker requests only if allow_new_workers is true.
237 *
238 * This code runs in the postmaster, so we must be very careful not to assume
239 * that shared memory contents are sane. Otherwise, a rogue backend could
240 * take out the postmaster.
241 */
242 void
BackgroundWorkerStateChange(bool allow_new_workers)243 BackgroundWorkerStateChange(bool allow_new_workers)
244 {
245 int slotno;
246
247 /*
248 * The total number of slots stored in shared memory should match our
249 * notion of max_worker_processes. If it does not, something is very
250 * wrong. Further down, we always refer to this value as
251 * max_worker_processes, in case shared memory gets corrupted while we're
252 * looping.
253 */
254 if (max_worker_processes != BackgroundWorkerData->total_slots)
255 {
256 elog(LOG,
257 "inconsistent background worker state (max_worker_processes=%d, total_slots=%d",
258 max_worker_processes,
259 BackgroundWorkerData->total_slots);
260 return;
261 }
262
263 /*
264 * Iterate through slots, looking for newly-registered workers or workers
265 * who must die.
266 */
267 for (slotno = 0; slotno < max_worker_processes; ++slotno)
268 {
269 BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
270 RegisteredBgWorker *rw;
271
272 if (!slot->in_use)
273 continue;
274
275 /*
276 * Make sure we don't see the in_use flag before the updated slot
277 * contents.
278 */
279 pg_read_barrier();
280
281 /* See whether we already know about this worker. */
282 rw = FindRegisteredWorkerBySlotNumber(slotno);
283 if (rw != NULL)
284 {
285 /*
286 * In general, the worker data can't change after it's initially
287 * registered. However, someone can set the terminate flag.
288 */
289 if (slot->terminate && !rw->rw_terminate)
290 {
291 rw->rw_terminate = true;
292 if (rw->rw_pid != 0)
293 kill(rw->rw_pid, SIGTERM);
294 else
295 {
296 /* Report never-started, now-terminated worker as dead. */
297 ReportBackgroundWorkerPID(rw);
298 }
299 }
300 continue;
301 }
302
303 /*
304 * If we aren't allowing new workers, then immediately mark it for
305 * termination; the next stanza will take care of cleaning it up.
306 * Doing this ensures that any process waiting for the worker will get
307 * awoken, even though the worker will never be allowed to run.
308 */
309 if (!allow_new_workers)
310 slot->terminate = true;
311
312 /*
313 * If the worker is marked for termination, we don't need to add it to
314 * the registered workers list; we can just free the slot. However, if
315 * bgw_notify_pid is set, the process that registered the worker may
316 * need to know that we've processed the terminate request, so be sure
317 * to signal it.
318 */
319 if (slot->terminate)
320 {
321 int notify_pid;
322
323 /*
324 * We need a memory barrier here to make sure that the load of
325 * bgw_notify_pid and the update of parallel_terminate_count
326 * complete before the store to in_use.
327 */
328 notify_pid = slot->worker.bgw_notify_pid;
329 if ((slot->worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
330 BackgroundWorkerData->parallel_terminate_count++;
331 slot->pid = 0;
332
333 pg_memory_barrier();
334 slot->in_use = false;
335
336 if (notify_pid != 0)
337 kill(notify_pid, SIGUSR1);
338
339 continue;
340 }
341
342 /*
343 * Copy the registration data into the registered workers list.
344 */
345 rw = malloc(sizeof(RegisteredBgWorker));
346 if (rw == NULL)
347 {
348 ereport(LOG,
349 (errcode(ERRCODE_OUT_OF_MEMORY),
350 errmsg("out of memory")));
351 return;
352 }
353
354 /*
355 * Copy strings in a paranoid way. If shared memory is corrupted, the
356 * source data might not even be NUL-terminated.
357 */
358 ascii_safe_strlcpy(rw->rw_worker.bgw_name,
359 slot->worker.bgw_name, BGW_MAXLEN);
360 ascii_safe_strlcpy(rw->rw_worker.bgw_library_name,
361 slot->worker.bgw_library_name, BGW_MAXLEN);
362 ascii_safe_strlcpy(rw->rw_worker.bgw_function_name,
363 slot->worker.bgw_function_name, BGW_MAXLEN);
364
365 /*
366 * Copy various fixed-size fields.
367 *
368 * flags, start_time, and restart_time are examined by the postmaster,
369 * but nothing too bad will happen if they are corrupted. The
370 * remaining fields will only be examined by the child process. It
371 * might crash, but we won't.
372 */
373 rw->rw_worker.bgw_flags = slot->worker.bgw_flags;
374 rw->rw_worker.bgw_start_time = slot->worker.bgw_start_time;
375 rw->rw_worker.bgw_restart_time = slot->worker.bgw_restart_time;
376 rw->rw_worker.bgw_main_arg = slot->worker.bgw_main_arg;
377 memcpy(rw->rw_worker.bgw_extra, slot->worker.bgw_extra, BGW_EXTRALEN);
378
379 /*
380 * Copy the PID to be notified about state changes, but only if the
381 * postmaster knows about a backend with that PID. It isn't an error
382 * if the postmaster doesn't know about the PID, because the backend
383 * that requested the worker could have died (or been killed) just
384 * after doing so. Nonetheless, at least until we get some experience
385 * with how this plays out in the wild, log a message at a relative
386 * high debug level.
387 */
388 rw->rw_worker.bgw_notify_pid = slot->worker.bgw_notify_pid;
389 if (!PostmasterMarkPIDForWorkerNotify(rw->rw_worker.bgw_notify_pid))
390 {
391 elog(DEBUG1, "worker notification PID %lu is not valid",
392 (long) rw->rw_worker.bgw_notify_pid);
393 rw->rw_worker.bgw_notify_pid = 0;
394 }
395
396 /* Initialize postmaster bookkeeping. */
397 rw->rw_backend = NULL;
398 rw->rw_pid = 0;
399 rw->rw_child_slot = 0;
400 rw->rw_crashed_at = 0;
401 rw->rw_shmem_slot = slotno;
402 rw->rw_terminate = false;
403
404 /* Log it! */
405 ereport(DEBUG1,
406 (errmsg("registering background worker \"%s\"",
407 rw->rw_worker.bgw_name)));
408
409 slist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
410 }
411 }
412
413 /*
414 * Forget about a background worker that's no longer needed.
415 *
416 * The worker must be identified by passing an slist_mutable_iter that
417 * points to it. This convention allows deletion of workers during
418 * searches of the worker list, and saves having to search the list again.
419 *
420 * Caller is responsible for notifying bgw_notify_pid, if appropriate.
421 *
422 * This function must be invoked only in the postmaster.
423 */
424 void
ForgetBackgroundWorker(slist_mutable_iter * cur)425 ForgetBackgroundWorker(slist_mutable_iter *cur)
426 {
427 RegisteredBgWorker *rw;
428 BackgroundWorkerSlot *slot;
429
430 rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur);
431
432 Assert(rw->rw_shmem_slot < max_worker_processes);
433 slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
434 Assert(slot->in_use);
435
436 /*
437 * We need a memory barrier here to make sure that the update of
438 * parallel_terminate_count completes before the store to in_use.
439 */
440 if ((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
441 BackgroundWorkerData->parallel_terminate_count++;
442
443 pg_memory_barrier();
444 slot->in_use = false;
445
446 ereport(DEBUG1,
447 (errmsg("unregistering background worker \"%s\"",
448 rw->rw_worker.bgw_name)));
449
450 slist_delete_current(cur);
451 free(rw);
452 }
453
454 /*
455 * Report the PID of a newly-launched background worker in shared memory.
456 *
457 * This function should only be called from the postmaster.
458 */
459 void
ReportBackgroundWorkerPID(RegisteredBgWorker * rw)460 ReportBackgroundWorkerPID(RegisteredBgWorker *rw)
461 {
462 BackgroundWorkerSlot *slot;
463
464 Assert(rw->rw_shmem_slot < max_worker_processes);
465 slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
466 slot->pid = rw->rw_pid;
467
468 if (rw->rw_worker.bgw_notify_pid != 0)
469 kill(rw->rw_worker.bgw_notify_pid, SIGUSR1);
470 }
471
472 /*
473 * Report that the PID of a background worker is now zero because a
474 * previously-running background worker has exited.
475 *
476 * This function should only be called from the postmaster.
477 */
478 void
ReportBackgroundWorkerExit(slist_mutable_iter * cur)479 ReportBackgroundWorkerExit(slist_mutable_iter *cur)
480 {
481 RegisteredBgWorker *rw;
482 BackgroundWorkerSlot *slot;
483 int notify_pid;
484
485 rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur);
486
487 Assert(rw->rw_shmem_slot < max_worker_processes);
488 slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
489 slot->pid = rw->rw_pid;
490 notify_pid = rw->rw_worker.bgw_notify_pid;
491
492 /*
493 * If this worker is slated for deregistration, do that before notifying
494 * the process which started it. Otherwise, if that process tries to
495 * reuse the slot immediately, it might not be available yet. In theory
496 * that could happen anyway if the process checks slot->pid at just the
497 * wrong moment, but this makes the window narrower.
498 */
499 if (rw->rw_terminate ||
500 rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
501 ForgetBackgroundWorker(cur);
502
503 if (notify_pid != 0)
504 kill(notify_pid, SIGUSR1);
505 }
506
507 /*
508 * Cancel SIGUSR1 notifications for a PID belonging to an exiting backend.
509 *
510 * This function should only be called from the postmaster.
511 */
512 void
BackgroundWorkerStopNotifications(pid_t pid)513 BackgroundWorkerStopNotifications(pid_t pid)
514 {
515 slist_iter siter;
516
517 slist_foreach(siter, &BackgroundWorkerList)
518 {
519 RegisteredBgWorker *rw;
520
521 rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
522 if (rw->rw_worker.bgw_notify_pid == pid)
523 rw->rw_worker.bgw_notify_pid = 0;
524 }
525 }
526
527 /*
528 * Cancel any not-yet-started worker requests that have waiting processes.
529 *
530 * This is called during a normal ("smart" or "fast") database shutdown.
531 * After this point, no new background workers will be started, so anything
532 * that might be waiting for them needs to be kicked off its wait. We do
533 * that by cancelling the bgworker registration entirely, which is perhaps
534 * overkill, but since we're shutting down it does not matter whether the
535 * registration record sticks around.
536 *
537 * This function should only be called from the postmaster.
538 */
539 void
ForgetUnstartedBackgroundWorkers(void)540 ForgetUnstartedBackgroundWorkers(void)
541 {
542 slist_mutable_iter iter;
543
544 slist_foreach_modify(iter, &BackgroundWorkerList)
545 {
546 RegisteredBgWorker *rw;
547 BackgroundWorkerSlot *slot;
548
549 rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
550 Assert(rw->rw_shmem_slot < max_worker_processes);
551 slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
552
553 /* If it's not yet started, and there's someone waiting ... */
554 if (slot->pid == InvalidPid &&
555 rw->rw_worker.bgw_notify_pid != 0)
556 {
557 /* ... then zap it, and notify the waiter */
558 int notify_pid = rw->rw_worker.bgw_notify_pid;
559
560 ForgetBackgroundWorker(&iter);
561 if (notify_pid != 0)
562 kill(notify_pid, SIGUSR1);
563 }
564 }
565 }
566
567 /*
568 * Reset background worker crash state.
569 *
570 * We assume that, after a crash-and-restart cycle, background workers without
571 * the never-restart flag should be restarted immediately, instead of waiting
572 * for bgw_restart_time to elapse. On the other hand, workers with that flag
573 * should be forgotten immediately, since we won't ever restart them.
574 *
575 * This function should only be called from the postmaster.
576 */
577 void
ResetBackgroundWorkerCrashTimes(void)578 ResetBackgroundWorkerCrashTimes(void)
579 {
580 slist_mutable_iter iter;
581
582 slist_foreach_modify(iter, &BackgroundWorkerList)
583 {
584 RegisteredBgWorker *rw;
585
586 rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
587
588 if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
589 {
590 /*
591 * Workers marked BGW_NVER_RESTART shouldn't get relaunched after
592 * the crash, so forget about them. (If we wait until after the
593 * crash to forget about them, and they are parallel workers,
594 * parallel_terminate_count will get incremented after we've
595 * already zeroed parallel_register_count, which would be bad.)
596 */
597 ForgetBackgroundWorker(&iter);
598 }
599 else
600 {
601 /*
602 * The accounting which we do via parallel_register_count and
603 * parallel_terminate_count would get messed up if a worker marked
604 * parallel could survive a crash and restart cycle. All such
605 * workers should be marked BGW_NEVER_RESTART, and thus control
606 * should never reach this branch.
607 */
608 Assert((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) == 0);
609
610 /*
611 * Allow this worker to be restarted immediately after we finish
612 * resetting.
613 */
614 rw->rw_crashed_at = 0;
615
616 /*
617 * If there was anyone waiting for it, they're history.
618 */
619 rw->rw_worker.bgw_notify_pid = 0;
620 }
621 }
622 }
623
624 #ifdef EXEC_BACKEND
625 /*
626 * In EXEC_BACKEND mode, workers use this to retrieve their details from
627 * shared memory.
628 */
629 BackgroundWorker *
BackgroundWorkerEntry(int slotno)630 BackgroundWorkerEntry(int slotno)
631 {
632 static BackgroundWorker myEntry;
633 BackgroundWorkerSlot *slot;
634
635 Assert(slotno < BackgroundWorkerData->total_slots);
636 slot = &BackgroundWorkerData->slot[slotno];
637 Assert(slot->in_use);
638
639 /* must copy this in case we don't intend to retain shmem access */
640 memcpy(&myEntry, &slot->worker, sizeof myEntry);
641 return &myEntry;
642 }
643 #endif
644
645 /*
646 * Complain about the BackgroundWorker definition using error level elevel.
647 * Return true if it looks ok, false if not (unless elevel >= ERROR, in
648 * which case we won't return at all in the not-OK case).
649 */
650 static bool
SanityCheckBackgroundWorker(BackgroundWorker * worker,int elevel)651 SanityCheckBackgroundWorker(BackgroundWorker *worker, int elevel)
652 {
653 /* sanity check for flags */
654 if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)
655 {
656 if (!(worker->bgw_flags & BGWORKER_SHMEM_ACCESS))
657 {
658 ereport(elevel,
659 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
660 errmsg("background worker \"%s\": must attach to shared memory in order to request a database connection",
661 worker->bgw_name)));
662 return false;
663 }
664
665 if (worker->bgw_start_time == BgWorkerStart_PostmasterStart)
666 {
667 ereport(elevel,
668 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
669 errmsg("background worker \"%s\": cannot request database access if starting at postmaster start",
670 worker->bgw_name)));
671 return false;
672 }
673
674 /* XXX other checks? */
675 }
676
677 if ((worker->bgw_restart_time < 0 &&
678 worker->bgw_restart_time != BGW_NEVER_RESTART) ||
679 (worker->bgw_restart_time > USECS_PER_DAY / 1000))
680 {
681 ereport(elevel,
682 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
683 errmsg("background worker \"%s\": invalid restart interval",
684 worker->bgw_name)));
685 return false;
686 }
687
688 /*
689 * Parallel workers may not be configured for restart, because the
690 * parallel_register_count/parallel_terminate_count accounting can't
691 * handle parallel workers lasting through a crash-and-restart cycle.
692 */
693 if (worker->bgw_restart_time != BGW_NEVER_RESTART &&
694 (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
695 {
696 ereport(elevel,
697 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
698 errmsg("background worker \"%s\": parallel workers may not be configured for restart",
699 worker->bgw_name)));
700 return false;
701 }
702
703 return true;
704 }
705
706 static void
bgworker_quickdie(SIGNAL_ARGS)707 bgworker_quickdie(SIGNAL_ARGS)
708 {
709 /*
710 * We DO NOT want to run proc_exit() or atexit() callbacks -- we're here
711 * because shared memory may be corrupted, so we don't want to try to
712 * clean up our transaction. Just nail the windows shut and get out of
713 * town. The callbacks wouldn't be safe to run from a signal handler,
714 * anyway.
715 *
716 * Note we do _exit(2) not _exit(0). This is to force the postmaster into
717 * a system reset cycle if someone sends a manual SIGQUIT to a random
718 * backend. This is necessary precisely because we don't clean up our
719 * shared memory state. (The "dead man switch" mechanism in pmsignal.c
720 * should ensure the postmaster sees this as a crash, too, but no harm in
721 * being doubly sure.)
722 */
723 _exit(2);
724 }
725
726 /*
727 * Standard SIGTERM handler for background workers
728 */
729 static void
bgworker_die(SIGNAL_ARGS)730 bgworker_die(SIGNAL_ARGS)
731 {
732 PG_SETMASK(&BlockSig);
733
734 ereport(FATAL,
735 (errcode(ERRCODE_ADMIN_SHUTDOWN),
736 errmsg("terminating background worker \"%s\" due to administrator command",
737 MyBgworkerEntry->bgw_name)));
738 }
739
740 /*
741 * Standard SIGUSR1 handler for unconnected workers
742 *
743 * Here, we want to make sure an unconnected worker will at least heed
744 * latch activity.
745 */
746 static void
bgworker_sigusr1_handler(SIGNAL_ARGS)747 bgworker_sigusr1_handler(SIGNAL_ARGS)
748 {
749 int save_errno = errno;
750
751 latch_sigusr1_handler();
752
753 errno = save_errno;
754 }
755
756 /*
757 * Start a new background worker
758 *
759 * This is the main entry point for background worker, to be called from
760 * postmaster.
761 */
762 void
StartBackgroundWorker(void)763 StartBackgroundWorker(void)
764 {
765 sigjmp_buf local_sigjmp_buf;
766 char buf[MAXPGPATH];
767 BackgroundWorker *worker = MyBgworkerEntry;
768 bgworker_main_type entrypt;
769
770 if (worker == NULL)
771 elog(FATAL, "unable to find bgworker entry");
772
773 IsBackgroundWorker = true;
774
775 /* Identify myself via ps */
776 snprintf(buf, MAXPGPATH, "bgworker: %s", worker->bgw_name);
777 init_ps_display(buf, "", "", "");
778
779 /*
780 * If we're not supposed to have shared memory access, then detach from
781 * shared memory. If we didn't request shared memory access, the
782 * postmaster won't force a cluster-wide restart if we exit unexpectedly,
783 * so we'd better make sure that we don't mess anything up that would
784 * require that sort of cleanup.
785 */
786 if ((worker->bgw_flags & BGWORKER_SHMEM_ACCESS) == 0)
787 {
788 dsm_detach_all();
789 PGSharedMemoryDetach();
790 }
791
792 SetProcessingMode(InitProcessing);
793
794 /* Apply PostAuthDelay */
795 if (PostAuthDelay > 0)
796 pg_usleep(PostAuthDelay * 1000000L);
797
798 /*
799 * Set up signal handlers.
800 */
801 if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)
802 {
803 /*
804 * SIGINT is used to signal canceling the current action
805 */
806 pqsignal(SIGINT, StatementCancelHandler);
807 pqsignal(SIGUSR1, procsignal_sigusr1_handler);
808 pqsignal(SIGFPE, FloatExceptionHandler);
809
810 /* XXX Any other handlers needed here? */
811 }
812 else
813 {
814 pqsignal(SIGINT, SIG_IGN);
815 pqsignal(SIGUSR1, bgworker_sigusr1_handler);
816 pqsignal(SIGFPE, SIG_IGN);
817 }
818 pqsignal(SIGTERM, bgworker_die);
819 pqsignal(SIGHUP, SIG_IGN);
820
821 pqsignal(SIGQUIT, bgworker_quickdie);
822 InitializeTimeouts(); /* establishes SIGALRM handler */
823
824 pqsignal(SIGPIPE, SIG_IGN);
825 pqsignal(SIGUSR2, SIG_IGN);
826 pqsignal(SIGCHLD, SIG_DFL);
827
828 /*
829 * If an exception is encountered, processing resumes here.
830 *
831 * We just need to clean up, report the error, and go away.
832 */
833 if (sigsetjmp(local_sigjmp_buf, 1) != 0)
834 {
835 /* Since not using PG_TRY, must reset error stack by hand */
836 error_context_stack = NULL;
837
838 /* Prevent interrupts while cleaning up */
839 HOLD_INTERRUPTS();
840
841 /*
842 * sigsetjmp will have blocked all signals, but we may need to accept
843 * signals while communicating with our parallel leader. Once we've
844 * done HOLD_INTERRUPTS() it should be safe to unblock signals.
845 */
846 BackgroundWorkerUnblockSignals();
847
848 /* Report the error to the parallel leader and the server log */
849 EmitErrorReport();
850
851 /*
852 * Do we need more cleanup here? For shmem-connected bgworkers, we
853 * will call InitProcess below, which will install ProcKill as exit
854 * callback. That will take care of releasing locks, etc.
855 */
856
857 /* and go away */
858 proc_exit(1);
859 }
860
861 /* We can now handle ereport(ERROR) */
862 PG_exception_stack = &local_sigjmp_buf;
863
864 /*
865 * If the background worker request shared memory access, set that up now;
866 * else, detach all shared memory segments.
867 */
868 if (worker->bgw_flags & BGWORKER_SHMEM_ACCESS)
869 {
870 /*
871 * Early initialization. Some of this could be useful even for
872 * background workers that aren't using shared memory, but they can
873 * call the individual startup routines for those subsystems if
874 * needed.
875 */
876 BaseInit();
877
878 /*
879 * Create a per-backend PGPROC struct in shared memory, except in the
880 * EXEC_BACKEND case where this was done in SubPostmasterMain. We must
881 * do this before we can use LWLocks (and in the EXEC_BACKEND case we
882 * already had to do some stuff with LWLocks).
883 */
884 #ifndef EXEC_BACKEND
885 InitProcess();
886 #endif
887 }
888
889 /*
890 * Look up the entry point function, loading its library if necessary.
891 */
892 entrypt = LookupBackgroundWorkerFunction(worker->bgw_library_name,
893 worker->bgw_function_name);
894
895 /*
896 * Note that in normal processes, we would call InitPostgres here. For a
897 * worker, however, we don't know what database to connect to, yet; so we
898 * need to wait until the user code does it via
899 * BackgroundWorkerInitializeConnection().
900 */
901
902 /*
903 * Now invoke the user-defined worker code
904 */
905 entrypt(worker->bgw_main_arg);
906
907 /* ... and if it returns, we're done */
908 proc_exit(0);
909 }
910
911 /*
912 * Register a new static background worker.
913 *
914 * This can only be called directly from postmaster or in the _PG_init
915 * function of a module library that's loaded by shared_preload_libraries;
916 * otherwise it will have no effect.
917 */
918 void
RegisterBackgroundWorker(BackgroundWorker * worker)919 RegisterBackgroundWorker(BackgroundWorker *worker)
920 {
921 RegisteredBgWorker *rw;
922 static int numworkers = 0;
923
924 if (!IsUnderPostmaster)
925 ereport(DEBUG1,
926 (errmsg("registering background worker \"%s\"", worker->bgw_name)));
927
928 if (!process_shared_preload_libraries_in_progress &&
929 strcmp(worker->bgw_library_name, "postgres") != 0)
930 {
931 if (!IsUnderPostmaster)
932 ereport(LOG,
933 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
934 errmsg("background worker \"%s\": must be registered in shared_preload_libraries",
935 worker->bgw_name)));
936 return;
937 }
938
939 if (!SanityCheckBackgroundWorker(worker, LOG))
940 return;
941
942 if (worker->bgw_notify_pid != 0)
943 {
944 ereport(LOG,
945 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
946 errmsg("background worker \"%s\": only dynamic background workers can request notification",
947 worker->bgw_name)));
948 return;
949 }
950
951 /*
952 * Enforce maximum number of workers. Note this is overly restrictive: we
953 * could allow more non-shmem-connected workers, because these don't count
954 * towards the MAX_BACKENDS limit elsewhere. For now, it doesn't seem
955 * important to relax this restriction.
956 */
957 if (++numworkers > max_worker_processes)
958 {
959 ereport(LOG,
960 (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
961 errmsg("too many background workers"),
962 errdetail_plural("Up to %d background worker can be registered with the current settings.",
963 "Up to %d background workers can be registered with the current settings.",
964 max_worker_processes,
965 max_worker_processes),
966 errhint("Consider increasing the configuration parameter \"max_worker_processes\".")));
967 return;
968 }
969
970 /*
971 * Copy the registration data into the registered workers list.
972 */
973 rw = malloc(sizeof(RegisteredBgWorker));
974 if (rw == NULL)
975 {
976 ereport(LOG,
977 (errcode(ERRCODE_OUT_OF_MEMORY),
978 errmsg("out of memory")));
979 return;
980 }
981
982 rw->rw_worker = *worker;
983 rw->rw_backend = NULL;
984 rw->rw_pid = 0;
985 rw->rw_child_slot = 0;
986 rw->rw_crashed_at = 0;
987 rw->rw_terminate = false;
988
989 slist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
990 }
991
992 /*
993 * Register a new background worker from a regular backend.
994 *
995 * Returns true on success and false on failure. Failure typically indicates
996 * that no background worker slots are currently available.
997 *
998 * If handle != NULL, we'll set *handle to a pointer that can subsequently
999 * be used as an argument to GetBackgroundWorkerPid(). The caller can
1000 * free this pointer using pfree(), if desired.
1001 */
1002 bool
RegisterDynamicBackgroundWorker(BackgroundWorker * worker,BackgroundWorkerHandle ** handle)1003 RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
1004 BackgroundWorkerHandle **handle)
1005 {
1006 int slotno;
1007 bool success = false;
1008 bool parallel;
1009 uint64 generation = 0;
1010
1011 /*
1012 * We can't register dynamic background workers from the postmaster. If
1013 * this is a standalone backend, we're the only process and can't start
1014 * any more. In a multi-process environment, it might be theoretically
1015 * possible, but we don't currently support it due to locking
1016 * considerations; see comments on the BackgroundWorkerSlot data
1017 * structure.
1018 */
1019 if (!IsUnderPostmaster)
1020 return false;
1021
1022 if (!SanityCheckBackgroundWorker(worker, ERROR))
1023 return false;
1024
1025 parallel = (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0;
1026
1027 LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
1028
1029 /*
1030 * If this is a parallel worker, check whether there are already too many
1031 * parallel workers; if so, don't register another one. Our view of
1032 * parallel_terminate_count may be slightly stale, but that doesn't really
1033 * matter: we would have gotten the same result if we'd arrived here
1034 * slightly earlier anyway. There's no help for it, either, since the
1035 * postmaster must not take locks; a memory barrier wouldn't guarantee
1036 * anything useful.
1037 */
1038 if (parallel && (BackgroundWorkerData->parallel_register_count -
1039 BackgroundWorkerData->parallel_terminate_count) >=
1040 max_parallel_workers)
1041 {
1042 Assert(BackgroundWorkerData->parallel_register_count -
1043 BackgroundWorkerData->parallel_terminate_count <=
1044 MAX_PARALLEL_WORKER_LIMIT);
1045 LWLockRelease(BackgroundWorkerLock);
1046 return false;
1047 }
1048
1049 /*
1050 * Look for an unused slot. If we find one, grab it.
1051 */
1052 for (slotno = 0; slotno < BackgroundWorkerData->total_slots; ++slotno)
1053 {
1054 BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
1055
1056 if (!slot->in_use)
1057 {
1058 memcpy(&slot->worker, worker, sizeof(BackgroundWorker));
1059 slot->pid = InvalidPid; /* indicates not started yet */
1060 slot->generation++;
1061 slot->terminate = false;
1062 generation = slot->generation;
1063 if (parallel)
1064 BackgroundWorkerData->parallel_register_count++;
1065
1066 /*
1067 * Make sure postmaster doesn't see the slot as in use before it
1068 * sees the new contents.
1069 */
1070 pg_write_barrier();
1071
1072 slot->in_use = true;
1073 success = true;
1074 break;
1075 }
1076 }
1077
1078 LWLockRelease(BackgroundWorkerLock);
1079
1080 /* If we found a slot, tell the postmaster to notice the change. */
1081 if (success)
1082 SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE);
1083
1084 /*
1085 * If we found a slot and the user has provided a handle, initialize it.
1086 */
1087 if (success && handle)
1088 {
1089 *handle = palloc(sizeof(BackgroundWorkerHandle));
1090 (*handle)->slot = slotno;
1091 (*handle)->generation = generation;
1092 }
1093
1094 return success;
1095 }
1096
1097 /*
1098 * Get the PID of a dynamically-registered background worker.
1099 *
1100 * If the worker is determined to be running, the return value will be
1101 * BGWH_STARTED and *pidp will get the PID of the worker process. If the
1102 * postmaster has not yet attempted to start the worker, the return value will
1103 * be BGWH_NOT_YET_STARTED. Otherwise, the return value is BGWH_STOPPED.
1104 *
1105 * BGWH_STOPPED can indicate either that the worker is temporarily stopped
1106 * (because it is configured for automatic restart and exited non-zero),
1107 * or that the worker is permanently stopped (because it exited with exit
1108 * code 0, or was not configured for automatic restart), or even that the
1109 * worker was unregistered without ever starting (either because startup
1110 * failed and the worker is not configured for automatic restart, or because
1111 * TerminateBackgroundWorker was used before the worker was successfully
1112 * started).
1113 */
1114 BgwHandleStatus
GetBackgroundWorkerPid(BackgroundWorkerHandle * handle,pid_t * pidp)1115 GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp)
1116 {
1117 BackgroundWorkerSlot *slot;
1118 pid_t pid;
1119
1120 Assert(handle->slot < max_worker_processes);
1121 slot = &BackgroundWorkerData->slot[handle->slot];
1122
1123 /*
1124 * We could probably arrange to synchronize access to data using memory
1125 * barriers only, but for now, let's just keep it simple and grab the
1126 * lock. It seems unlikely that there will be enough traffic here to
1127 * result in meaningful contention.
1128 */
1129 LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
1130
1131 /*
1132 * The generation number can't be concurrently changed while we hold the
1133 * lock. The pid, which is updated by the postmaster, can change at any
1134 * time, but we assume such changes are atomic. So the value we read
1135 * won't be garbage, but it might be out of date by the time the caller
1136 * examines it (but that's unavoidable anyway).
1137 *
1138 * The in_use flag could be in the process of changing from true to false,
1139 * but if it is already false then it can't change further.
1140 */
1141 if (handle->generation != slot->generation || !slot->in_use)
1142 pid = 0;
1143 else
1144 pid = slot->pid;
1145
1146 /* All done. */
1147 LWLockRelease(BackgroundWorkerLock);
1148
1149 if (pid == 0)
1150 return BGWH_STOPPED;
1151 else if (pid == InvalidPid)
1152 return BGWH_NOT_YET_STARTED;
1153 *pidp = pid;
1154 return BGWH_STARTED;
1155 }
1156
1157 /*
1158 * Wait for a background worker to start up.
1159 *
1160 * This is like GetBackgroundWorkerPid(), except that if the worker has not
1161 * yet started, we wait for it to do so; thus, BGWH_NOT_YET_STARTED is never
1162 * returned. However, if the postmaster has died, we give up and return
1163 * BGWH_POSTMASTER_DIED, since it that case we know that startup will not
1164 * take place.
1165 *
1166 * The caller *must* have set our PID as the worker's bgw_notify_pid,
1167 * else we will not be awoken promptly when the worker's state changes.
1168 */
1169 BgwHandleStatus
WaitForBackgroundWorkerStartup(BackgroundWorkerHandle * handle,pid_t * pidp)1170 WaitForBackgroundWorkerStartup(BackgroundWorkerHandle *handle, pid_t *pidp)
1171 {
1172 BgwHandleStatus status;
1173 int rc;
1174
1175 for (;;)
1176 {
1177 pid_t pid;
1178
1179 CHECK_FOR_INTERRUPTS();
1180
1181 status = GetBackgroundWorkerPid(handle, &pid);
1182 if (status == BGWH_STARTED)
1183 *pidp = pid;
1184 if (status != BGWH_NOT_YET_STARTED)
1185 break;
1186
1187 rc = WaitLatch(MyLatch,
1188 WL_LATCH_SET | WL_POSTMASTER_DEATH, 0,
1189 WAIT_EVENT_BGWORKER_STARTUP);
1190
1191 if (rc & WL_POSTMASTER_DEATH)
1192 {
1193 status = BGWH_POSTMASTER_DIED;
1194 break;
1195 }
1196
1197 ResetLatch(MyLatch);
1198 }
1199
1200 return status;
1201 }
1202
1203 /*
1204 * Wait for a background worker to stop.
1205 *
1206 * If the worker hasn't yet started, or is running, we wait for it to stop
1207 * and then return BGWH_STOPPED. However, if the postmaster has died, we give
1208 * up and return BGWH_POSTMASTER_DIED, because it's the postmaster that
1209 * notifies us when a worker's state changes.
1210 *
1211 * The caller *must* have set our PID as the worker's bgw_notify_pid,
1212 * else we will not be awoken promptly when the worker's state changes.
1213 */
1214 BgwHandleStatus
WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle * handle)1215 WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle *handle)
1216 {
1217 BgwHandleStatus status;
1218 int rc;
1219
1220 for (;;)
1221 {
1222 pid_t pid;
1223
1224 CHECK_FOR_INTERRUPTS();
1225
1226 status = GetBackgroundWorkerPid(handle, &pid);
1227 if (status == BGWH_STOPPED)
1228 break;
1229
1230 rc = WaitLatch(MyLatch,
1231 WL_LATCH_SET | WL_POSTMASTER_DEATH, 0,
1232 WAIT_EVENT_BGWORKER_SHUTDOWN);
1233
1234 if (rc & WL_POSTMASTER_DEATH)
1235 {
1236 status = BGWH_POSTMASTER_DIED;
1237 break;
1238 }
1239
1240 ResetLatch(MyLatch);
1241 }
1242
1243 return status;
1244 }
1245
1246 /*
1247 * Instruct the postmaster to terminate a background worker.
1248 *
1249 * Note that it's safe to do this without regard to whether the worker is
1250 * still running, or even if the worker may already have existed and been
1251 * unregistered.
1252 */
1253 void
TerminateBackgroundWorker(BackgroundWorkerHandle * handle)1254 TerminateBackgroundWorker(BackgroundWorkerHandle *handle)
1255 {
1256 BackgroundWorkerSlot *slot;
1257 bool signal_postmaster = false;
1258
1259 Assert(handle->slot < max_worker_processes);
1260 slot = &BackgroundWorkerData->slot[handle->slot];
1261
1262 /* Set terminate flag in shared memory, unless slot has been reused. */
1263 LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
1264 if (handle->generation == slot->generation)
1265 {
1266 slot->terminate = true;
1267 signal_postmaster = true;
1268 }
1269 LWLockRelease(BackgroundWorkerLock);
1270
1271 /* Make sure the postmaster notices the change to shared memory. */
1272 if (signal_postmaster)
1273 SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE);
1274 }
1275
1276 /*
1277 * Look up (and possibly load) a bgworker entry point function.
1278 *
1279 * For functions contained in the core code, we use library name "postgres"
1280 * and consult the InternalBGWorkers array. External functions are
1281 * looked up, and loaded if necessary, using load_external_function().
1282 *
1283 * The point of this is to pass function names as strings across process
1284 * boundaries. We can't pass actual function addresses because of the
1285 * possibility that the function has been loaded at a different address
1286 * in a different process. This is obviously a hazard for functions in
1287 * loadable libraries, but it can happen even for functions in the core code
1288 * on platforms using EXEC_BACKEND (e.g., Windows).
1289 *
1290 * At some point it might be worthwhile to get rid of InternalBGWorkers[]
1291 * in favor of applying load_external_function() for core functions too;
1292 * but that raises portability issues that are not worth addressing now.
1293 */
1294 static bgworker_main_type
LookupBackgroundWorkerFunction(const char * libraryname,const char * funcname)1295 LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname)
1296 {
1297 /*
1298 * If the function is to be loaded from postgres itself, search the
1299 * InternalBGWorkers array.
1300 */
1301 if (strcmp(libraryname, "postgres") == 0)
1302 {
1303 int i;
1304
1305 for (i = 0; i < lengthof(InternalBGWorkers); i++)
1306 {
1307 if (strcmp(InternalBGWorkers[i].fn_name, funcname) == 0)
1308 return InternalBGWorkers[i].fn_addr;
1309 }
1310
1311 /* We can only reach this by programming error. */
1312 elog(ERROR, "internal function \"%s\" not found", funcname);
1313 }
1314
1315 /* Otherwise load from external library. */
1316 return (bgworker_main_type)
1317 load_external_function(libraryname, funcname, true, NULL);
1318 }
1319