1 /*--------------------------------------------------------------------
2 * bgworker.c
3 * POSTGRES pluggable background workers implementation
4 *
5 * Portions Copyright (c) 1996-2018, PostgreSQL Global Development Group
6 *
7 * IDENTIFICATION
8 * src/backend/postmaster/bgworker.c
9 *
10 *-------------------------------------------------------------------------
11 */
12
13 #include "postgres.h"
14
15 #include <unistd.h>
16
17 #include "libpq/pqsignal.h"
18 #include "access/parallel.h"
19 #include "miscadmin.h"
20 #include "pgstat.h"
21 #include "port/atomics.h"
22 #include "postmaster/bgworker_internals.h"
23 #include "postmaster/postmaster.h"
24 #include "replication/logicallauncher.h"
25 #include "replication/logicalworker.h"
26 #include "storage/dsm.h"
27 #include "storage/ipc.h"
28 #include "storage/latch.h"
29 #include "storage/lwlock.h"
30 #include "storage/pg_shmem.h"
31 #include "storage/pmsignal.h"
32 #include "storage/proc.h"
33 #include "storage/procsignal.h"
34 #include "storage/shmem.h"
35 #include "tcop/tcopprot.h"
36 #include "utils/ascii.h"
37 #include "utils/ps_status.h"
38 #include "utils/timeout.h"
39
40 /*
41 * The postmaster's list of registered background workers, in private memory.
42 */
43 slist_head BackgroundWorkerList = SLIST_STATIC_INIT(BackgroundWorkerList);
44
45 /*
46 * BackgroundWorkerSlots exist in shared memory and can be accessed (via
47 * the BackgroundWorkerArray) by both the postmaster and by regular backends.
48 * However, the postmaster cannot take locks, even spinlocks, because this
49 * might allow it to crash or become wedged if shared memory gets corrupted.
50 * Such an outcome is intolerable. Therefore, we need a lockless protocol
51 * for coordinating access to this data.
52 *
53 * The 'in_use' flag is used to hand off responsibility for the slot between
54 * the postmaster and the rest of the system. When 'in_use' is false,
55 * the postmaster will ignore the slot entirely, except for the 'in_use' flag
56 * itself, which it may read. In this state, regular backends may modify the
57 * slot. Once a backend sets 'in_use' to true, the slot becomes the
58 * responsibility of the postmaster. Regular backends may no longer modify it,
59 * but the postmaster may examine it. Thus, a backend initializing a slot
60 * must fully initialize the slot - and insert a write memory barrier - before
61 * marking it as in use.
62 *
63 * As an exception, however, even when the slot is in use, regular backends
64 * may set the 'terminate' flag for a slot, telling the postmaster not
65 * to restart it. Once the background worker is no longer running, the slot
66 * will be released for reuse.
67 *
68 * In addition to coordinating with the postmaster, backends modifying this
69 * data structure must coordinate with each other. Since they can take locks,
70 * this is straightforward: any backend wishing to manipulate a slot must
71 * take BackgroundWorkerLock in exclusive mode. Backends wishing to read
72 * data that might get concurrently modified by other backends should take
73 * this lock in shared mode. No matter what, backends reading this data
74 * structure must be able to tolerate concurrent modifications by the
75 * postmaster.
76 */
77 typedef struct BackgroundWorkerSlot
78 {
79 bool in_use;
80 bool terminate;
81 pid_t pid; /* InvalidPid = not started yet; 0 = dead */
82 uint64 generation; /* incremented when slot is recycled */
83 BackgroundWorker worker;
84 } BackgroundWorkerSlot;
85
86 /*
87 * In order to limit the total number of parallel workers (according to
88 * max_parallel_workers GUC), we maintain the number of active parallel
89 * workers. Since the postmaster cannot take locks, two variables are used for
90 * this purpose: the number of registered parallel workers (modified by the
91 * backends, protected by BackgroundWorkerLock) and the number of terminated
92 * parallel workers (modified only by the postmaster, lockless). The active
93 * number of parallel workers is the number of registered workers minus the
94 * terminated ones. These counters can of course overflow, but it's not
95 * important here since the subtraction will still give the right number.
96 */
97 typedef struct BackgroundWorkerArray
98 {
99 int total_slots;
100 uint32 parallel_register_count;
101 uint32 parallel_terminate_count;
102 BackgroundWorkerSlot slot[FLEXIBLE_ARRAY_MEMBER];
103 } BackgroundWorkerArray;
104
105 struct BackgroundWorkerHandle
106 {
107 int slot;
108 uint64 generation;
109 };
110
111 static BackgroundWorkerArray *BackgroundWorkerData;
112
113 /*
114 * List of internal background worker entry points. We need this for
115 * reasons explained in LookupBackgroundWorkerFunction(), below.
116 */
117 static const struct
118 {
119 const char *fn_name;
120 bgworker_main_type fn_addr;
121 } InternalBGWorkers[] =
122
123 {
124 {
125 "ParallelWorkerMain", ParallelWorkerMain
126 },
127 {
128 "ApplyLauncherMain", ApplyLauncherMain
129 },
130 {
131 "ApplyWorkerMain", ApplyWorkerMain
132 }
133 };
134
135 /* Private functions. */
136 static bgworker_main_type LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname);
137
138
139 /*
140 * Calculate shared memory needed.
141 */
142 Size
BackgroundWorkerShmemSize(void)143 BackgroundWorkerShmemSize(void)
144 {
145 Size size;
146
147 /* Array of workers is variably sized. */
148 size = offsetof(BackgroundWorkerArray, slot);
149 size = add_size(size, mul_size(max_worker_processes,
150 sizeof(BackgroundWorkerSlot)));
151
152 return size;
153 }
154
155 /*
156 * Initialize shared memory.
157 */
158 void
BackgroundWorkerShmemInit(void)159 BackgroundWorkerShmemInit(void)
160 {
161 bool found;
162
163 BackgroundWorkerData = ShmemInitStruct("Background Worker Data",
164 BackgroundWorkerShmemSize(),
165 &found);
166 if (!IsUnderPostmaster)
167 {
168 slist_iter siter;
169 int slotno = 0;
170
171 BackgroundWorkerData->total_slots = max_worker_processes;
172 BackgroundWorkerData->parallel_register_count = 0;
173 BackgroundWorkerData->parallel_terminate_count = 0;
174
175 /*
176 * Copy contents of worker list into shared memory. Record the shared
177 * memory slot assigned to each worker. This ensures a 1-to-1
178 * correspondence between the postmaster's private list and the array
179 * in shared memory.
180 */
181 slist_foreach(siter, &BackgroundWorkerList)
182 {
183 BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
184 RegisteredBgWorker *rw;
185
186 rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
187 Assert(slotno < max_worker_processes);
188 slot->in_use = true;
189 slot->terminate = false;
190 slot->pid = InvalidPid;
191 slot->generation = 0;
192 rw->rw_shmem_slot = slotno;
193 rw->rw_worker.bgw_notify_pid = 0; /* might be reinit after crash */
194 memcpy(&slot->worker, &rw->rw_worker, sizeof(BackgroundWorker));
195 ++slotno;
196 }
197
198 /*
199 * Mark any remaining slots as not in use.
200 */
201 while (slotno < max_worker_processes)
202 {
203 BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
204
205 slot->in_use = false;
206 ++slotno;
207 }
208 }
209 else
210 Assert(found);
211 }
212
213 /*
214 * Search the postmaster's backend-private list of RegisteredBgWorker objects
215 * for the one that maps to the given slot number.
216 */
217 static RegisteredBgWorker *
FindRegisteredWorkerBySlotNumber(int slotno)218 FindRegisteredWorkerBySlotNumber(int slotno)
219 {
220 slist_iter siter;
221
222 slist_foreach(siter, &BackgroundWorkerList)
223 {
224 RegisteredBgWorker *rw;
225
226 rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
227 if (rw->rw_shmem_slot == slotno)
228 return rw;
229 }
230
231 return NULL;
232 }
233
234 /*
235 * Notice changes to shared memory made by other backends.
236 * Accept new worker requests only if allow_new_workers is true.
237 *
238 * This code runs in the postmaster, so we must be very careful not to assume
239 * that shared memory contents are sane. Otherwise, a rogue backend could
240 * take out the postmaster.
241 */
242 void
BackgroundWorkerStateChange(bool allow_new_workers)243 BackgroundWorkerStateChange(bool allow_new_workers)
244 {
245 int slotno;
246
247 /*
248 * The total number of slots stored in shared memory should match our
249 * notion of max_worker_processes. If it does not, something is very
250 * wrong. Further down, we always refer to this value as
251 * max_worker_processes, in case shared memory gets corrupted while we're
252 * looping.
253 */
254 if (max_worker_processes != BackgroundWorkerData->total_slots)
255 {
256 elog(LOG,
257 "inconsistent background worker state (max_worker_processes=%d, total_slots=%d",
258 max_worker_processes,
259 BackgroundWorkerData->total_slots);
260 return;
261 }
262
263 /*
264 * Iterate through slots, looking for newly-registered workers or workers
265 * who must die.
266 */
267 for (slotno = 0; slotno < max_worker_processes; ++slotno)
268 {
269 BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
270 RegisteredBgWorker *rw;
271
272 if (!slot->in_use)
273 continue;
274
275 /*
276 * Make sure we don't see the in_use flag before the updated slot
277 * contents.
278 */
279 pg_read_barrier();
280
281 /* See whether we already know about this worker. */
282 rw = FindRegisteredWorkerBySlotNumber(slotno);
283 if (rw != NULL)
284 {
285 /*
286 * In general, the worker data can't change after it's initially
287 * registered. However, someone can set the terminate flag.
288 */
289 if (slot->terminate && !rw->rw_terminate)
290 {
291 rw->rw_terminate = true;
292 if (rw->rw_pid != 0)
293 kill(rw->rw_pid, SIGTERM);
294 else
295 {
296 /* Report never-started, now-terminated worker as dead. */
297 ReportBackgroundWorkerPID(rw);
298 }
299 }
300 continue;
301 }
302
303 /*
304 * If we aren't allowing new workers, then immediately mark it for
305 * termination; the next stanza will take care of cleaning it up.
306 * Doing this ensures that any process waiting for the worker will get
307 * awoken, even though the worker will never be allowed to run.
308 */
309 if (!allow_new_workers)
310 slot->terminate = true;
311
312 /*
313 * If the worker is marked for termination, we don't need to add it to
314 * the registered workers list; we can just free the slot. However, if
315 * bgw_notify_pid is set, the process that registered the worker may
316 * need to know that we've processed the terminate request, so be sure
317 * to signal it.
318 */
319 if (slot->terminate)
320 {
321 int notify_pid;
322
323 /*
324 * We need a memory barrier here to make sure that the load of
325 * bgw_notify_pid and the update of parallel_terminate_count
326 * complete before the store to in_use.
327 */
328 notify_pid = slot->worker.bgw_notify_pid;
329 if ((slot->worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
330 BackgroundWorkerData->parallel_terminate_count++;
331 slot->pid = 0;
332
333 pg_memory_barrier();
334 slot->in_use = false;
335
336 if (notify_pid != 0)
337 kill(notify_pid, SIGUSR1);
338
339 continue;
340 }
341
342 /*
343 * Copy the registration data into the registered workers list.
344 */
345 rw = malloc(sizeof(RegisteredBgWorker));
346 if (rw == NULL)
347 {
348 ereport(LOG,
349 (errcode(ERRCODE_OUT_OF_MEMORY),
350 errmsg("out of memory")));
351 return;
352 }
353
354 /*
355 * Copy strings in a paranoid way. If shared memory is corrupted, the
356 * source data might not even be NUL-terminated.
357 */
358 ascii_safe_strlcpy(rw->rw_worker.bgw_name,
359 slot->worker.bgw_name, BGW_MAXLEN);
360 ascii_safe_strlcpy(rw->rw_worker.bgw_type,
361 slot->worker.bgw_type, BGW_MAXLEN);
362 ascii_safe_strlcpy(rw->rw_worker.bgw_library_name,
363 slot->worker.bgw_library_name, BGW_MAXLEN);
364 ascii_safe_strlcpy(rw->rw_worker.bgw_function_name,
365 slot->worker.bgw_function_name, BGW_MAXLEN);
366
367 /*
368 * Copy various fixed-size fields.
369 *
370 * flags, start_time, and restart_time are examined by the postmaster,
371 * but nothing too bad will happen if they are corrupted. The
372 * remaining fields will only be examined by the child process. It
373 * might crash, but we won't.
374 */
375 rw->rw_worker.bgw_flags = slot->worker.bgw_flags;
376 rw->rw_worker.bgw_start_time = slot->worker.bgw_start_time;
377 rw->rw_worker.bgw_restart_time = slot->worker.bgw_restart_time;
378 rw->rw_worker.bgw_main_arg = slot->worker.bgw_main_arg;
379 memcpy(rw->rw_worker.bgw_extra, slot->worker.bgw_extra, BGW_EXTRALEN);
380
381 /*
382 * Copy the PID to be notified about state changes, but only if the
383 * postmaster knows about a backend with that PID. It isn't an error
384 * if the postmaster doesn't know about the PID, because the backend
385 * that requested the worker could have died (or been killed) just
386 * after doing so. Nonetheless, at least until we get some experience
387 * with how this plays out in the wild, log a message at a relative
388 * high debug level.
389 */
390 rw->rw_worker.bgw_notify_pid = slot->worker.bgw_notify_pid;
391 if (!PostmasterMarkPIDForWorkerNotify(rw->rw_worker.bgw_notify_pid))
392 {
393 elog(DEBUG1, "worker notification PID %lu is not valid",
394 (long) rw->rw_worker.bgw_notify_pid);
395 rw->rw_worker.bgw_notify_pid = 0;
396 }
397
398 /* Initialize postmaster bookkeeping. */
399 rw->rw_backend = NULL;
400 rw->rw_pid = 0;
401 rw->rw_child_slot = 0;
402 rw->rw_crashed_at = 0;
403 rw->rw_shmem_slot = slotno;
404 rw->rw_terminate = false;
405
406 /* Log it! */
407 ereport(DEBUG1,
408 (errmsg("registering background worker \"%s\"",
409 rw->rw_worker.bgw_name)));
410
411 slist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
412 }
413 }
414
415 /*
416 * Forget about a background worker that's no longer needed.
417 *
418 * The worker must be identified by passing an slist_mutable_iter that
419 * points to it. This convention allows deletion of workers during
420 * searches of the worker list, and saves having to search the list again.
421 *
422 * Caller is responsible for notifying bgw_notify_pid, if appropriate.
423 *
424 * This function must be invoked only in the postmaster.
425 */
426 void
ForgetBackgroundWorker(slist_mutable_iter * cur)427 ForgetBackgroundWorker(slist_mutable_iter *cur)
428 {
429 RegisteredBgWorker *rw;
430 BackgroundWorkerSlot *slot;
431
432 rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur);
433
434 Assert(rw->rw_shmem_slot < max_worker_processes);
435 slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
436 Assert(slot->in_use);
437
438 /*
439 * We need a memory barrier here to make sure that the update of
440 * parallel_terminate_count completes before the store to in_use.
441 */
442 if ((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
443 BackgroundWorkerData->parallel_terminate_count++;
444
445 pg_memory_barrier();
446 slot->in_use = false;
447
448 ereport(DEBUG1,
449 (errmsg("unregistering background worker \"%s\"",
450 rw->rw_worker.bgw_name)));
451
452 slist_delete_current(cur);
453 free(rw);
454 }
455
456 /*
457 * Report the PID of a newly-launched background worker in shared memory.
458 *
459 * This function should only be called from the postmaster.
460 */
461 void
ReportBackgroundWorkerPID(RegisteredBgWorker * rw)462 ReportBackgroundWorkerPID(RegisteredBgWorker *rw)
463 {
464 BackgroundWorkerSlot *slot;
465
466 Assert(rw->rw_shmem_slot < max_worker_processes);
467 slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
468 slot->pid = rw->rw_pid;
469
470 if (rw->rw_worker.bgw_notify_pid != 0)
471 kill(rw->rw_worker.bgw_notify_pid, SIGUSR1);
472 }
473
474 /*
475 * Report that the PID of a background worker is now zero because a
476 * previously-running background worker has exited.
477 *
478 * This function should only be called from the postmaster.
479 */
480 void
ReportBackgroundWorkerExit(slist_mutable_iter * cur)481 ReportBackgroundWorkerExit(slist_mutable_iter *cur)
482 {
483 RegisteredBgWorker *rw;
484 BackgroundWorkerSlot *slot;
485 int notify_pid;
486
487 rw = slist_container(RegisteredBgWorker, rw_lnode, cur->cur);
488
489 Assert(rw->rw_shmem_slot < max_worker_processes);
490 slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
491 slot->pid = rw->rw_pid;
492 notify_pid = rw->rw_worker.bgw_notify_pid;
493
494 /*
495 * If this worker is slated for deregistration, do that before notifying
496 * the process which started it. Otherwise, if that process tries to
497 * reuse the slot immediately, it might not be available yet. In theory
498 * that could happen anyway if the process checks slot->pid at just the
499 * wrong moment, but this makes the window narrower.
500 */
501 if (rw->rw_terminate ||
502 rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
503 ForgetBackgroundWorker(cur);
504
505 if (notify_pid != 0)
506 kill(notify_pid, SIGUSR1);
507 }
508
509 /*
510 * Cancel SIGUSR1 notifications for a PID belonging to an exiting backend.
511 *
512 * This function should only be called from the postmaster.
513 */
514 void
BackgroundWorkerStopNotifications(pid_t pid)515 BackgroundWorkerStopNotifications(pid_t pid)
516 {
517 slist_iter siter;
518
519 slist_foreach(siter, &BackgroundWorkerList)
520 {
521 RegisteredBgWorker *rw;
522
523 rw = slist_container(RegisteredBgWorker, rw_lnode, siter.cur);
524 if (rw->rw_worker.bgw_notify_pid == pid)
525 rw->rw_worker.bgw_notify_pid = 0;
526 }
527 }
528
529 /*
530 * Cancel any not-yet-started worker requests that have waiting processes.
531 *
532 * This is called during a normal ("smart" or "fast") database shutdown.
533 * After this point, no new background workers will be started, so anything
534 * that might be waiting for them needs to be kicked off its wait. We do
535 * that by cancelling the bgworker registration entirely, which is perhaps
536 * overkill, but since we're shutting down it does not matter whether the
537 * registration record sticks around.
538 *
539 * This function should only be called from the postmaster.
540 */
541 void
ForgetUnstartedBackgroundWorkers(void)542 ForgetUnstartedBackgroundWorkers(void)
543 {
544 slist_mutable_iter iter;
545
546 slist_foreach_modify(iter, &BackgroundWorkerList)
547 {
548 RegisteredBgWorker *rw;
549 BackgroundWorkerSlot *slot;
550
551 rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
552 Assert(rw->rw_shmem_slot < max_worker_processes);
553 slot = &BackgroundWorkerData->slot[rw->rw_shmem_slot];
554
555 /* If it's not yet started, and there's someone waiting ... */
556 if (slot->pid == InvalidPid &&
557 rw->rw_worker.bgw_notify_pid != 0)
558 {
559 /* ... then zap it, and notify the waiter */
560 int notify_pid = rw->rw_worker.bgw_notify_pid;
561
562 ForgetBackgroundWorker(&iter);
563 if (notify_pid != 0)
564 kill(notify_pid, SIGUSR1);
565 }
566 }
567 }
568
569 /*
570 * Reset background worker crash state.
571 *
572 * We assume that, after a crash-and-restart cycle, background workers without
573 * the never-restart flag should be restarted immediately, instead of waiting
574 * for bgw_restart_time to elapse. On the other hand, workers with that flag
575 * should be forgotten immediately, since we won't ever restart them.
576 *
577 * This function should only be called from the postmaster.
578 */
579 void
ResetBackgroundWorkerCrashTimes(void)580 ResetBackgroundWorkerCrashTimes(void)
581 {
582 slist_mutable_iter iter;
583
584 slist_foreach_modify(iter, &BackgroundWorkerList)
585 {
586 RegisteredBgWorker *rw;
587
588 rw = slist_container(RegisteredBgWorker, rw_lnode, iter.cur);
589
590 if (rw->rw_worker.bgw_restart_time == BGW_NEVER_RESTART)
591 {
592 /*
593 * Workers marked BGW_NVER_RESTART shouldn't get relaunched after
594 * the crash, so forget about them. (If we wait until after the
595 * crash to forget about them, and they are parallel workers,
596 * parallel_terminate_count will get incremented after we've
597 * already zeroed parallel_register_count, which would be bad.)
598 */
599 ForgetBackgroundWorker(&iter);
600 }
601 else
602 {
603 /*
604 * The accounting which we do via parallel_register_count and
605 * parallel_terminate_count would get messed up if a worker marked
606 * parallel could survive a crash and restart cycle. All such
607 * workers should be marked BGW_NEVER_RESTART, and thus control
608 * should never reach this branch.
609 */
610 Assert((rw->rw_worker.bgw_flags & BGWORKER_CLASS_PARALLEL) == 0);
611
612 /*
613 * Allow this worker to be restarted immediately after we finish
614 * resetting.
615 */
616 rw->rw_crashed_at = 0;
617
618 /*
619 * If there was anyone waiting for it, they're history.
620 */
621 rw->rw_worker.bgw_notify_pid = 0;
622 }
623 }
624 }
625
626 #ifdef EXEC_BACKEND
627 /*
628 * In EXEC_BACKEND mode, workers use this to retrieve their details from
629 * shared memory.
630 */
631 BackgroundWorker *
BackgroundWorkerEntry(int slotno)632 BackgroundWorkerEntry(int slotno)
633 {
634 static BackgroundWorker myEntry;
635 BackgroundWorkerSlot *slot;
636
637 Assert(slotno < BackgroundWorkerData->total_slots);
638 slot = &BackgroundWorkerData->slot[slotno];
639 Assert(slot->in_use);
640
641 /* must copy this in case we don't intend to retain shmem access */
642 memcpy(&myEntry, &slot->worker, sizeof myEntry);
643 return &myEntry;
644 }
645 #endif
646
647 /*
648 * Complain about the BackgroundWorker definition using error level elevel.
649 * Return true if it looks ok, false if not (unless elevel >= ERROR, in
650 * which case we won't return at all in the not-OK case).
651 */
652 static bool
SanityCheckBackgroundWorker(BackgroundWorker * worker,int elevel)653 SanityCheckBackgroundWorker(BackgroundWorker *worker, int elevel)
654 {
655 /* sanity check for flags */
656 if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)
657 {
658 if (!(worker->bgw_flags & BGWORKER_SHMEM_ACCESS))
659 {
660 ereport(elevel,
661 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
662 errmsg("background worker \"%s\": must attach to shared memory in order to request a database connection",
663 worker->bgw_name)));
664 return false;
665 }
666
667 if (worker->bgw_start_time == BgWorkerStart_PostmasterStart)
668 {
669 ereport(elevel,
670 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
671 errmsg("background worker \"%s\": cannot request database access if starting at postmaster start",
672 worker->bgw_name)));
673 return false;
674 }
675
676 /* XXX other checks? */
677 }
678
679 if ((worker->bgw_restart_time < 0 &&
680 worker->bgw_restart_time != BGW_NEVER_RESTART) ||
681 (worker->bgw_restart_time > USECS_PER_DAY / 1000))
682 {
683 ereport(elevel,
684 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
685 errmsg("background worker \"%s\": invalid restart interval",
686 worker->bgw_name)));
687 return false;
688 }
689
690 /*
691 * Parallel workers may not be configured for restart, because the
692 * parallel_register_count/parallel_terminate_count accounting can't
693 * handle parallel workers lasting through a crash-and-restart cycle.
694 */
695 if (worker->bgw_restart_time != BGW_NEVER_RESTART &&
696 (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0)
697 {
698 ereport(elevel,
699 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
700 errmsg("background worker \"%s\": parallel workers may not be configured for restart",
701 worker->bgw_name)));
702 return false;
703 }
704
705 /*
706 * If bgw_type is not filled in, use bgw_name.
707 */
708 if (strcmp(worker->bgw_type, "") == 0)
709 strcpy(worker->bgw_type, worker->bgw_name);
710
711 return true;
712 }
713
714 static void
bgworker_quickdie(SIGNAL_ARGS)715 bgworker_quickdie(SIGNAL_ARGS)
716 {
717 /*
718 * We DO NOT want to run proc_exit() or atexit() callbacks -- we're here
719 * because shared memory may be corrupted, so we don't want to try to
720 * clean up our transaction. Just nail the windows shut and get out of
721 * town. The callbacks wouldn't be safe to run from a signal handler,
722 * anyway.
723 *
724 * Note we do _exit(2) not _exit(0). This is to force the postmaster into
725 * a system reset cycle if someone sends a manual SIGQUIT to a random
726 * backend. This is necessary precisely because we don't clean up our
727 * shared memory state. (The "dead man switch" mechanism in pmsignal.c
728 * should ensure the postmaster sees this as a crash, too, but no harm in
729 * being doubly sure.)
730 */
731 _exit(2);
732 }
733
734 /*
735 * Standard SIGTERM handler for background workers
736 */
737 static void
bgworker_die(SIGNAL_ARGS)738 bgworker_die(SIGNAL_ARGS)
739 {
740 PG_SETMASK(&BlockSig);
741
742 ereport(FATAL,
743 (errcode(ERRCODE_ADMIN_SHUTDOWN),
744 errmsg("terminating background worker \"%s\" due to administrator command",
745 MyBgworkerEntry->bgw_type)));
746 }
747
748 /*
749 * Standard SIGUSR1 handler for unconnected workers
750 *
751 * Here, we want to make sure an unconnected worker will at least heed
752 * latch activity.
753 */
754 static void
bgworker_sigusr1_handler(SIGNAL_ARGS)755 bgworker_sigusr1_handler(SIGNAL_ARGS)
756 {
757 int save_errno = errno;
758
759 latch_sigusr1_handler();
760
761 errno = save_errno;
762 }
763
764 /*
765 * Start a new background worker
766 *
767 * This is the main entry point for background worker, to be called from
768 * postmaster.
769 */
770 void
StartBackgroundWorker(void)771 StartBackgroundWorker(void)
772 {
773 sigjmp_buf local_sigjmp_buf;
774 BackgroundWorker *worker = MyBgworkerEntry;
775 bgworker_main_type entrypt;
776
777 if (worker == NULL)
778 elog(FATAL, "unable to find bgworker entry");
779
780 IsBackgroundWorker = true;
781
782 /* Identify myself via ps */
783 init_ps_display(worker->bgw_name, "", "", "");
784
785 /*
786 * If we're not supposed to have shared memory access, then detach from
787 * shared memory. If we didn't request shared memory access, the
788 * postmaster won't force a cluster-wide restart if we exit unexpectedly,
789 * so we'd better make sure that we don't mess anything up that would
790 * require that sort of cleanup.
791 */
792 if ((worker->bgw_flags & BGWORKER_SHMEM_ACCESS) == 0)
793 {
794 dsm_detach_all();
795 PGSharedMemoryDetach();
796 }
797
798 SetProcessingMode(InitProcessing);
799
800 /* Apply PostAuthDelay */
801 if (PostAuthDelay > 0)
802 pg_usleep(PostAuthDelay * 1000000L);
803
804 /*
805 * Set up signal handlers.
806 */
807 if (worker->bgw_flags & BGWORKER_BACKEND_DATABASE_CONNECTION)
808 {
809 /*
810 * SIGINT is used to signal canceling the current action
811 */
812 pqsignal(SIGINT, StatementCancelHandler);
813 pqsignal(SIGUSR1, procsignal_sigusr1_handler);
814 pqsignal(SIGFPE, FloatExceptionHandler);
815
816 /* XXX Any other handlers needed here? */
817 }
818 else
819 {
820 pqsignal(SIGINT, SIG_IGN);
821 pqsignal(SIGUSR1, bgworker_sigusr1_handler);
822 pqsignal(SIGFPE, SIG_IGN);
823 }
824 pqsignal(SIGTERM, bgworker_die);
825 pqsignal(SIGHUP, SIG_IGN);
826
827 pqsignal(SIGQUIT, bgworker_quickdie);
828 InitializeTimeouts(); /* establishes SIGALRM handler */
829
830 pqsignal(SIGPIPE, SIG_IGN);
831 pqsignal(SIGUSR2, SIG_IGN);
832 pqsignal(SIGCHLD, SIG_DFL);
833
834 /*
835 * If an exception is encountered, processing resumes here.
836 *
837 * We just need to clean up, report the error, and go away.
838 */
839 if (sigsetjmp(local_sigjmp_buf, 1) != 0)
840 {
841 /* Since not using PG_TRY, must reset error stack by hand */
842 error_context_stack = NULL;
843
844 /* Prevent interrupts while cleaning up */
845 HOLD_INTERRUPTS();
846
847 /*
848 * sigsetjmp will have blocked all signals, but we may need to accept
849 * signals while communicating with our parallel leader. Once we've
850 * done HOLD_INTERRUPTS() it should be safe to unblock signals.
851 */
852 BackgroundWorkerUnblockSignals();
853
854 /* Report the error to the parallel leader and the server log */
855 EmitErrorReport();
856
857 /*
858 * Do we need more cleanup here? For shmem-connected bgworkers, we
859 * will call InitProcess below, which will install ProcKill as exit
860 * callback. That will take care of releasing locks, etc.
861 */
862
863 /* and go away */
864 proc_exit(1);
865 }
866
867 /* We can now handle ereport(ERROR) */
868 PG_exception_stack = &local_sigjmp_buf;
869
870 /*
871 * If the background worker request shared memory access, set that up now;
872 * else, detach all shared memory segments.
873 */
874 if (worker->bgw_flags & BGWORKER_SHMEM_ACCESS)
875 {
876 /*
877 * Early initialization. Some of this could be useful even for
878 * background workers that aren't using shared memory, but they can
879 * call the individual startup routines for those subsystems if
880 * needed.
881 */
882 BaseInit();
883
884 /*
885 * Create a per-backend PGPROC struct in shared memory, except in the
886 * EXEC_BACKEND case where this was done in SubPostmasterMain. We must
887 * do this before we can use LWLocks (and in the EXEC_BACKEND case we
888 * already had to do some stuff with LWLocks).
889 */
890 #ifndef EXEC_BACKEND
891 InitProcess();
892 #endif
893 }
894
895 /*
896 * Look up the entry point function, loading its library if necessary.
897 */
898 entrypt = LookupBackgroundWorkerFunction(worker->bgw_library_name,
899 worker->bgw_function_name);
900
901 /*
902 * Note that in normal processes, we would call InitPostgres here. For a
903 * worker, however, we don't know what database to connect to, yet; so we
904 * need to wait until the user code does it via
905 * BackgroundWorkerInitializeConnection().
906 */
907
908 /*
909 * Now invoke the user-defined worker code
910 */
911 entrypt(worker->bgw_main_arg);
912
913 /* ... and if it returns, we're done */
914 proc_exit(0);
915 }
916
917 /*
918 * Register a new static background worker.
919 *
920 * This can only be called directly from postmaster or in the _PG_init
921 * function of a module library that's loaded by shared_preload_libraries;
922 * otherwise it will have no effect.
923 */
924 void
RegisterBackgroundWorker(BackgroundWorker * worker)925 RegisterBackgroundWorker(BackgroundWorker *worker)
926 {
927 RegisteredBgWorker *rw;
928 static int numworkers = 0;
929
930 if (!IsUnderPostmaster)
931 ereport(DEBUG1,
932 (errmsg("registering background worker \"%s\"", worker->bgw_name)));
933
934 if (!process_shared_preload_libraries_in_progress &&
935 strcmp(worker->bgw_library_name, "postgres") != 0)
936 {
937 if (!IsUnderPostmaster)
938 ereport(LOG,
939 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
940 errmsg("background worker \"%s\": must be registered in shared_preload_libraries",
941 worker->bgw_name)));
942 return;
943 }
944
945 if (!SanityCheckBackgroundWorker(worker, LOG))
946 return;
947
948 if (worker->bgw_notify_pid != 0)
949 {
950 ereport(LOG,
951 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
952 errmsg("background worker \"%s\": only dynamic background workers can request notification",
953 worker->bgw_name)));
954 return;
955 }
956
957 /*
958 * Enforce maximum number of workers. Note this is overly restrictive: we
959 * could allow more non-shmem-connected workers, because these don't count
960 * towards the MAX_BACKENDS limit elsewhere. For now, it doesn't seem
961 * important to relax this restriction.
962 */
963 if (++numworkers > max_worker_processes)
964 {
965 ereport(LOG,
966 (errcode(ERRCODE_CONFIGURATION_LIMIT_EXCEEDED),
967 errmsg("too many background workers"),
968 errdetail_plural("Up to %d background worker can be registered with the current settings.",
969 "Up to %d background workers can be registered with the current settings.",
970 max_worker_processes,
971 max_worker_processes),
972 errhint("Consider increasing the configuration parameter \"max_worker_processes\".")));
973 return;
974 }
975
976 /*
977 * Copy the registration data into the registered workers list.
978 */
979 rw = malloc(sizeof(RegisteredBgWorker));
980 if (rw == NULL)
981 {
982 ereport(LOG,
983 (errcode(ERRCODE_OUT_OF_MEMORY),
984 errmsg("out of memory")));
985 return;
986 }
987
988 rw->rw_worker = *worker;
989 rw->rw_backend = NULL;
990 rw->rw_pid = 0;
991 rw->rw_child_slot = 0;
992 rw->rw_crashed_at = 0;
993 rw->rw_terminate = false;
994
995 slist_push_head(&BackgroundWorkerList, &rw->rw_lnode);
996 }
997
998 /*
999 * Register a new background worker from a regular backend.
1000 *
1001 * Returns true on success and false on failure. Failure typically indicates
1002 * that no background worker slots are currently available.
1003 *
1004 * If handle != NULL, we'll set *handle to a pointer that can subsequently
1005 * be used as an argument to GetBackgroundWorkerPid(). The caller can
1006 * free this pointer using pfree(), if desired.
1007 */
1008 bool
RegisterDynamicBackgroundWorker(BackgroundWorker * worker,BackgroundWorkerHandle ** handle)1009 RegisterDynamicBackgroundWorker(BackgroundWorker *worker,
1010 BackgroundWorkerHandle **handle)
1011 {
1012 int slotno;
1013 bool success = false;
1014 bool parallel;
1015 uint64 generation = 0;
1016
1017 /*
1018 * We can't register dynamic background workers from the postmaster. If
1019 * this is a standalone backend, we're the only process and can't start
1020 * any more. In a multi-process environment, it might be theoretically
1021 * possible, but we don't currently support it due to locking
1022 * considerations; see comments on the BackgroundWorkerSlot data
1023 * structure.
1024 */
1025 if (!IsUnderPostmaster)
1026 return false;
1027
1028 if (!SanityCheckBackgroundWorker(worker, ERROR))
1029 return false;
1030
1031 parallel = (worker->bgw_flags & BGWORKER_CLASS_PARALLEL) != 0;
1032
1033 LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
1034
1035 /*
1036 * If this is a parallel worker, check whether there are already too many
1037 * parallel workers; if so, don't register another one. Our view of
1038 * parallel_terminate_count may be slightly stale, but that doesn't really
1039 * matter: we would have gotten the same result if we'd arrived here
1040 * slightly earlier anyway. There's no help for it, either, since the
1041 * postmaster must not take locks; a memory barrier wouldn't guarantee
1042 * anything useful.
1043 */
1044 if (parallel && (BackgroundWorkerData->parallel_register_count -
1045 BackgroundWorkerData->parallel_terminate_count) >=
1046 max_parallel_workers)
1047 {
1048 Assert(BackgroundWorkerData->parallel_register_count -
1049 BackgroundWorkerData->parallel_terminate_count <=
1050 MAX_PARALLEL_WORKER_LIMIT);
1051 LWLockRelease(BackgroundWorkerLock);
1052 return false;
1053 }
1054
1055 /*
1056 * Look for an unused slot. If we find one, grab it.
1057 */
1058 for (slotno = 0; slotno < BackgroundWorkerData->total_slots; ++slotno)
1059 {
1060 BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
1061
1062 if (!slot->in_use)
1063 {
1064 memcpy(&slot->worker, worker, sizeof(BackgroundWorker));
1065 slot->pid = InvalidPid; /* indicates not started yet */
1066 slot->generation++;
1067 slot->terminate = false;
1068 generation = slot->generation;
1069 if (parallel)
1070 BackgroundWorkerData->parallel_register_count++;
1071
1072 /*
1073 * Make sure postmaster doesn't see the slot as in use before it
1074 * sees the new contents.
1075 */
1076 pg_write_barrier();
1077
1078 slot->in_use = true;
1079 success = true;
1080 break;
1081 }
1082 }
1083
1084 LWLockRelease(BackgroundWorkerLock);
1085
1086 /* If we found a slot, tell the postmaster to notice the change. */
1087 if (success)
1088 SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE);
1089
1090 /*
1091 * If we found a slot and the user has provided a handle, initialize it.
1092 */
1093 if (success && handle)
1094 {
1095 *handle = palloc(sizeof(BackgroundWorkerHandle));
1096 (*handle)->slot = slotno;
1097 (*handle)->generation = generation;
1098 }
1099
1100 return success;
1101 }
1102
1103 /*
1104 * Get the PID of a dynamically-registered background worker.
1105 *
1106 * If the worker is determined to be running, the return value will be
1107 * BGWH_STARTED and *pidp will get the PID of the worker process. If the
1108 * postmaster has not yet attempted to start the worker, the return value will
1109 * be BGWH_NOT_YET_STARTED. Otherwise, the return value is BGWH_STOPPED.
1110 *
1111 * BGWH_STOPPED can indicate either that the worker is temporarily stopped
1112 * (because it is configured for automatic restart and exited non-zero),
1113 * or that the worker is permanently stopped (because it exited with exit
1114 * code 0, or was not configured for automatic restart), or even that the
1115 * worker was unregistered without ever starting (either because startup
1116 * failed and the worker is not configured for automatic restart, or because
1117 * TerminateBackgroundWorker was used before the worker was successfully
1118 * started).
1119 */
1120 BgwHandleStatus
GetBackgroundWorkerPid(BackgroundWorkerHandle * handle,pid_t * pidp)1121 GetBackgroundWorkerPid(BackgroundWorkerHandle *handle, pid_t *pidp)
1122 {
1123 BackgroundWorkerSlot *slot;
1124 pid_t pid;
1125
1126 Assert(handle->slot < max_worker_processes);
1127 slot = &BackgroundWorkerData->slot[handle->slot];
1128
1129 /*
1130 * We could probably arrange to synchronize access to data using memory
1131 * barriers only, but for now, let's just keep it simple and grab the
1132 * lock. It seems unlikely that there will be enough traffic here to
1133 * result in meaningful contention.
1134 */
1135 LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
1136
1137 /*
1138 * The generation number can't be concurrently changed while we hold the
1139 * lock. The pid, which is updated by the postmaster, can change at any
1140 * time, but we assume such changes are atomic. So the value we read
1141 * won't be garbage, but it might be out of date by the time the caller
1142 * examines it (but that's unavoidable anyway).
1143 *
1144 * The in_use flag could be in the process of changing from true to false,
1145 * but if it is already false then it can't change further.
1146 */
1147 if (handle->generation != slot->generation || !slot->in_use)
1148 pid = 0;
1149 else
1150 pid = slot->pid;
1151
1152 /* All done. */
1153 LWLockRelease(BackgroundWorkerLock);
1154
1155 if (pid == 0)
1156 return BGWH_STOPPED;
1157 else if (pid == InvalidPid)
1158 return BGWH_NOT_YET_STARTED;
1159 *pidp = pid;
1160 return BGWH_STARTED;
1161 }
1162
1163 /*
1164 * Wait for a background worker to start up.
1165 *
1166 * This is like GetBackgroundWorkerPid(), except that if the worker has not
1167 * yet started, we wait for it to do so; thus, BGWH_NOT_YET_STARTED is never
1168 * returned. However, if the postmaster has died, we give up and return
1169 * BGWH_POSTMASTER_DIED, since it that case we know that startup will not
1170 * take place.
1171 *
1172 * The caller *must* have set our PID as the worker's bgw_notify_pid,
1173 * else we will not be awoken promptly when the worker's state changes.
1174 */
1175 BgwHandleStatus
WaitForBackgroundWorkerStartup(BackgroundWorkerHandle * handle,pid_t * pidp)1176 WaitForBackgroundWorkerStartup(BackgroundWorkerHandle *handle, pid_t *pidp)
1177 {
1178 BgwHandleStatus status;
1179 int rc;
1180
1181 for (;;)
1182 {
1183 pid_t pid;
1184
1185 CHECK_FOR_INTERRUPTS();
1186
1187 status = GetBackgroundWorkerPid(handle, &pid);
1188 if (status == BGWH_STARTED)
1189 *pidp = pid;
1190 if (status != BGWH_NOT_YET_STARTED)
1191 break;
1192
1193 rc = WaitLatch(MyLatch,
1194 WL_LATCH_SET | WL_POSTMASTER_DEATH, 0,
1195 WAIT_EVENT_BGWORKER_STARTUP);
1196
1197 if (rc & WL_POSTMASTER_DEATH)
1198 {
1199 status = BGWH_POSTMASTER_DIED;
1200 break;
1201 }
1202
1203 ResetLatch(MyLatch);
1204 }
1205
1206 return status;
1207 }
1208
1209 /*
1210 * Wait for a background worker to stop.
1211 *
1212 * If the worker hasn't yet started, or is running, we wait for it to stop
1213 * and then return BGWH_STOPPED. However, if the postmaster has died, we give
1214 * up and return BGWH_POSTMASTER_DIED, because it's the postmaster that
1215 * notifies us when a worker's state changes.
1216 *
1217 * The caller *must* have set our PID as the worker's bgw_notify_pid,
1218 * else we will not be awoken promptly when the worker's state changes.
1219 */
1220 BgwHandleStatus
WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle * handle)1221 WaitForBackgroundWorkerShutdown(BackgroundWorkerHandle *handle)
1222 {
1223 BgwHandleStatus status;
1224 int rc;
1225
1226 for (;;)
1227 {
1228 pid_t pid;
1229
1230 CHECK_FOR_INTERRUPTS();
1231
1232 status = GetBackgroundWorkerPid(handle, &pid);
1233 if (status == BGWH_STOPPED)
1234 break;
1235
1236 rc = WaitLatch(MyLatch,
1237 WL_LATCH_SET | WL_POSTMASTER_DEATH, 0,
1238 WAIT_EVENT_BGWORKER_SHUTDOWN);
1239
1240 if (rc & WL_POSTMASTER_DEATH)
1241 {
1242 status = BGWH_POSTMASTER_DIED;
1243 break;
1244 }
1245
1246 ResetLatch(MyLatch);
1247 }
1248
1249 return status;
1250 }
1251
1252 /*
1253 * Instruct the postmaster to terminate a background worker.
1254 *
1255 * Note that it's safe to do this without regard to whether the worker is
1256 * still running, or even if the worker may already have existed and been
1257 * unregistered.
1258 */
1259 void
TerminateBackgroundWorker(BackgroundWorkerHandle * handle)1260 TerminateBackgroundWorker(BackgroundWorkerHandle *handle)
1261 {
1262 BackgroundWorkerSlot *slot;
1263 bool signal_postmaster = false;
1264
1265 Assert(handle->slot < max_worker_processes);
1266 slot = &BackgroundWorkerData->slot[handle->slot];
1267
1268 /* Set terminate flag in shared memory, unless slot has been reused. */
1269 LWLockAcquire(BackgroundWorkerLock, LW_EXCLUSIVE);
1270 if (handle->generation == slot->generation)
1271 {
1272 slot->terminate = true;
1273 signal_postmaster = true;
1274 }
1275 LWLockRelease(BackgroundWorkerLock);
1276
1277 /* Make sure the postmaster notices the change to shared memory. */
1278 if (signal_postmaster)
1279 SendPostmasterSignal(PMSIGNAL_BACKGROUND_WORKER_CHANGE);
1280 }
1281
1282 /*
1283 * Look up (and possibly load) a bgworker entry point function.
1284 *
1285 * For functions contained in the core code, we use library name "postgres"
1286 * and consult the InternalBGWorkers array. External functions are
1287 * looked up, and loaded if necessary, using load_external_function().
1288 *
1289 * The point of this is to pass function names as strings across process
1290 * boundaries. We can't pass actual function addresses because of the
1291 * possibility that the function has been loaded at a different address
1292 * in a different process. This is obviously a hazard for functions in
1293 * loadable libraries, but it can happen even for functions in the core code
1294 * on platforms using EXEC_BACKEND (e.g., Windows).
1295 *
1296 * At some point it might be worthwhile to get rid of InternalBGWorkers[]
1297 * in favor of applying load_external_function() for core functions too;
1298 * but that raises portability issues that are not worth addressing now.
1299 */
1300 static bgworker_main_type
LookupBackgroundWorkerFunction(const char * libraryname,const char * funcname)1301 LookupBackgroundWorkerFunction(const char *libraryname, const char *funcname)
1302 {
1303 /*
1304 * If the function is to be loaded from postgres itself, search the
1305 * InternalBGWorkers array.
1306 */
1307 if (strcmp(libraryname, "postgres") == 0)
1308 {
1309 int i;
1310
1311 for (i = 0; i < lengthof(InternalBGWorkers); i++)
1312 {
1313 if (strcmp(InternalBGWorkers[i].fn_name, funcname) == 0)
1314 return InternalBGWorkers[i].fn_addr;
1315 }
1316
1317 /* We can only reach this by programming error. */
1318 elog(ERROR, "internal function \"%s\" not found", funcname);
1319 }
1320
1321 /* Otherwise load from external library. */
1322 return (bgworker_main_type)
1323 load_external_function(libraryname, funcname, true, NULL);
1324 }
1325
1326 /*
1327 * Given a PID, get the bgw_type of the background worker. Returns NULL if
1328 * not a valid background worker.
1329 *
1330 * The return value is in static memory belonging to this function, so it has
1331 * to be used before calling this function again. This is so that the caller
1332 * doesn't have to worry about the background worker locking protocol.
1333 */
1334 const char *
GetBackgroundWorkerTypeByPid(pid_t pid)1335 GetBackgroundWorkerTypeByPid(pid_t pid)
1336 {
1337 int slotno;
1338 bool found = false;
1339 static char result[BGW_MAXLEN];
1340
1341 LWLockAcquire(BackgroundWorkerLock, LW_SHARED);
1342
1343 for (slotno = 0; slotno < BackgroundWorkerData->total_slots; slotno++)
1344 {
1345 BackgroundWorkerSlot *slot = &BackgroundWorkerData->slot[slotno];
1346
1347 if (slot->pid > 0 && slot->pid == pid)
1348 {
1349 strcpy(result, slot->worker.bgw_type);
1350 found = true;
1351 break;
1352 }
1353 }
1354
1355 LWLockRelease(BackgroundWorkerLock);
1356
1357 if (!found)
1358 return NULL;
1359
1360 return result;
1361 }
1362