1 /*-------------------------------------------------------------------------
2 *
3 * autoprewarm.c
4 * Periodically dump information about the blocks present in
5 * shared_buffers, and reload them on server restart.
6 *
7 * Due to locking considerations, we can't actually begin prewarming
8 * until the server reaches a consistent state. We need the catalogs
9 * to be consistent so that we can figure out which relation to lock,
10 * and we need to lock the relations so that we don't try to prewarm
11 * pages from a relation that is in the process of being dropped.
12 *
13 * While prewarming, autoprewarm will use two workers. There's a
14 * leader worker that reads and sorts the list of blocks to be
15 * prewarmed and then launches a per-database worker for each
16 * relevant database in turn. The former keeps running after the
17 * initial prewarm is complete to update the dump file periodically.
18 *
19 * Copyright (c) 2016-2021, PostgreSQL Global Development Group
20 *
21 * IDENTIFICATION
22 * contrib/pg_prewarm/autoprewarm.c
23 *
24 *-------------------------------------------------------------------------
25 */
26
27 #include "postgres.h"
28
29 #include <unistd.h>
30
31 #include "access/relation.h"
32 #include "access/xact.h"
33 #include "catalog/pg_class.h"
34 #include "catalog/pg_type.h"
35 #include "miscadmin.h"
36 #include "pgstat.h"
37 #include "postmaster/bgworker.h"
38 #include "postmaster/interrupt.h"
39 #include "storage/buf_internals.h"
40 #include "storage/dsm.h"
41 #include "storage/ipc.h"
42 #include "storage/latch.h"
43 #include "storage/lwlock.h"
44 #include "storage/proc.h"
45 #include "storage/procsignal.h"
46 #include "storage/shmem.h"
47 #include "storage/smgr.h"
48 #include "tcop/tcopprot.h"
49 #include "utils/acl.h"
50 #include "utils/datetime.h"
51 #include "utils/guc.h"
52 #include "utils/memutils.h"
53 #include "utils/rel.h"
54 #include "utils/relfilenodemap.h"
55 #include "utils/resowner.h"
56
57 #define AUTOPREWARM_FILE "autoprewarm.blocks"
58
59 /* Metadata for each block we dump. */
60 typedef struct BlockInfoRecord
61 {
62 Oid database;
63 Oid tablespace;
64 Oid filenode;
65 ForkNumber forknum;
66 BlockNumber blocknum;
67 } BlockInfoRecord;
68
69 /* Shared state information for autoprewarm bgworker. */
70 typedef struct AutoPrewarmSharedState
71 {
72 LWLock lock; /* mutual exclusion */
73 pid_t bgworker_pid; /* for main bgworker */
74 pid_t pid_using_dumpfile; /* for autoprewarm or block dump */
75
76 /* Following items are for communication with per-database worker */
77 dsm_handle block_info_handle;
78 Oid database;
79 int prewarm_start_idx;
80 int prewarm_stop_idx;
81 int prewarmed_blocks;
82 } AutoPrewarmSharedState;
83
84 void _PG_init(void);
85 void autoprewarm_main(Datum main_arg);
86 void autoprewarm_database_main(Datum main_arg);
87
88 PG_FUNCTION_INFO_V1(autoprewarm_start_worker);
89 PG_FUNCTION_INFO_V1(autoprewarm_dump_now);
90
91 static void apw_load_buffers(void);
92 static int apw_dump_now(bool is_bgworker, bool dump_unlogged);
93 static void apw_start_leader_worker(void);
94 static void apw_start_database_worker(void);
95 static bool apw_init_shmem(void);
96 static void apw_detach_shmem(int code, Datum arg);
97 static int apw_compare_blockinfo(const void *p, const void *q);
98
99 /* Pointer to shared-memory state. */
100 static AutoPrewarmSharedState *apw_state = NULL;
101
102 /* GUC variables. */
103 static bool autoprewarm = true; /* start worker? */
104 static int autoprewarm_interval; /* dump interval */
105
106 /*
107 * Module load callback.
108 */
109 void
_PG_init(void)110 _PG_init(void)
111 {
112 DefineCustomIntVariable("pg_prewarm.autoprewarm_interval",
113 "Sets the interval between dumps of shared buffers",
114 "If set to zero, time-based dumping is disabled.",
115 &autoprewarm_interval,
116 300,
117 0, INT_MAX / 1000,
118 PGC_SIGHUP,
119 GUC_UNIT_S,
120 NULL,
121 NULL,
122 NULL);
123
124 if (!process_shared_preload_libraries_in_progress)
125 return;
126
127 /* can't define PGC_POSTMASTER variable after startup */
128 DefineCustomBoolVariable("pg_prewarm.autoprewarm",
129 "Starts the autoprewarm worker.",
130 NULL,
131 &autoprewarm,
132 true,
133 PGC_POSTMASTER,
134 0,
135 NULL,
136 NULL,
137 NULL);
138
139 EmitWarningsOnPlaceholders("pg_prewarm");
140
141 RequestAddinShmemSpace(MAXALIGN(sizeof(AutoPrewarmSharedState)));
142
143 /* Register autoprewarm worker, if enabled. */
144 if (autoprewarm)
145 apw_start_leader_worker();
146 }
147
148 /*
149 * Main entry point for the leader autoprewarm process. Per-database workers
150 * have a separate entry point.
151 */
152 void
autoprewarm_main(Datum main_arg)153 autoprewarm_main(Datum main_arg)
154 {
155 bool first_time = true;
156 bool final_dump_allowed = true;
157 TimestampTz last_dump_time = 0;
158
159 /* Establish signal handlers; once that's done, unblock signals. */
160 pqsignal(SIGTERM, SignalHandlerForShutdownRequest);
161 pqsignal(SIGHUP, SignalHandlerForConfigReload);
162 pqsignal(SIGUSR1, procsignal_sigusr1_handler);
163 BackgroundWorkerUnblockSignals();
164
165 /* Create (if necessary) and attach to our shared memory area. */
166 if (apw_init_shmem())
167 first_time = false;
168
169 /* Set on-detach hook so that our PID will be cleared on exit. */
170 on_shmem_exit(apw_detach_shmem, 0);
171
172 /*
173 * Store our PID in the shared memory area --- unless there's already
174 * another worker running, in which case just exit.
175 */
176 LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
177 if (apw_state->bgworker_pid != InvalidPid)
178 {
179 LWLockRelease(&apw_state->lock);
180 ereport(LOG,
181 (errmsg("autoprewarm worker is already running under PID %lu",
182 (unsigned long) apw_state->bgworker_pid)));
183 return;
184 }
185 apw_state->bgworker_pid = MyProcPid;
186 LWLockRelease(&apw_state->lock);
187
188 /*
189 * Preload buffers from the dump file only if we just created the shared
190 * memory region. Otherwise, it's either already been done or shouldn't
191 * be done - e.g. because the old dump file has been overwritten since the
192 * server was started.
193 *
194 * There's not much point in performing a dump immediately after we finish
195 * preloading; so, if we do end up preloading, consider the last dump time
196 * to be equal to the current time.
197 *
198 * If apw_load_buffers() is terminated early by a shutdown request,
199 * prevent dumping out our state below the loop, because we'd effectively
200 * just truncate the saved state to however much we'd managed to preload.
201 */
202 if (first_time)
203 {
204 apw_load_buffers();
205 final_dump_allowed = !ShutdownRequestPending;
206 last_dump_time = GetCurrentTimestamp();
207 }
208
209 /* Periodically dump buffers until terminated. */
210 while (!ShutdownRequestPending)
211 {
212 /* In case of a SIGHUP, just reload the configuration. */
213 if (ConfigReloadPending)
214 {
215 ConfigReloadPending = false;
216 ProcessConfigFile(PGC_SIGHUP);
217 }
218
219 if (autoprewarm_interval <= 0)
220 {
221 /* We're only dumping at shutdown, so just wait forever. */
222 (void) WaitLatch(MyLatch,
223 WL_LATCH_SET | WL_EXIT_ON_PM_DEATH,
224 -1L,
225 PG_WAIT_EXTENSION);
226 }
227 else
228 {
229 TimestampTz next_dump_time;
230 long delay_in_ms;
231
232 /* Compute the next dump time. */
233 next_dump_time =
234 TimestampTzPlusMilliseconds(last_dump_time,
235 autoprewarm_interval * 1000);
236 delay_in_ms =
237 TimestampDifferenceMilliseconds(GetCurrentTimestamp(),
238 next_dump_time);
239
240 /* Perform a dump if it's time. */
241 if (delay_in_ms <= 0)
242 {
243 last_dump_time = GetCurrentTimestamp();
244 apw_dump_now(true, false);
245 continue;
246 }
247
248 /* Sleep until the next dump time. */
249 (void) WaitLatch(MyLatch,
250 WL_LATCH_SET | WL_TIMEOUT | WL_EXIT_ON_PM_DEATH,
251 delay_in_ms,
252 PG_WAIT_EXTENSION);
253 }
254
255 /* Reset the latch, loop. */
256 ResetLatch(MyLatch);
257 }
258
259 /*
260 * Dump one last time. We assume this is probably the result of a system
261 * shutdown, although it's possible that we've merely been terminated.
262 */
263 if (final_dump_allowed)
264 apw_dump_now(true, true);
265 }
266
267 /*
268 * Read the dump file and launch per-database workers one at a time to
269 * prewarm the buffers found there.
270 */
271 static void
apw_load_buffers(void)272 apw_load_buffers(void)
273 {
274 FILE *file = NULL;
275 int num_elements,
276 i;
277 BlockInfoRecord *blkinfo;
278 dsm_segment *seg;
279
280 /*
281 * Skip the prewarm if the dump file is in use; otherwise, prevent any
282 * other process from writing it while we're using it.
283 */
284 LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
285 if (apw_state->pid_using_dumpfile == InvalidPid)
286 apw_state->pid_using_dumpfile = MyProcPid;
287 else
288 {
289 LWLockRelease(&apw_state->lock);
290 ereport(LOG,
291 (errmsg("skipping prewarm because block dump file is being written by PID %lu",
292 (unsigned long) apw_state->pid_using_dumpfile)));
293 return;
294 }
295 LWLockRelease(&apw_state->lock);
296
297 /*
298 * Open the block dump file. Exit quietly if it doesn't exist, but report
299 * any other error.
300 */
301 file = AllocateFile(AUTOPREWARM_FILE, "r");
302 if (!file)
303 {
304 if (errno == ENOENT)
305 {
306 LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
307 apw_state->pid_using_dumpfile = InvalidPid;
308 LWLockRelease(&apw_state->lock);
309 return; /* No file to load. */
310 }
311 ereport(ERROR,
312 (errcode_for_file_access(),
313 errmsg("could not read file \"%s\": %m",
314 AUTOPREWARM_FILE)));
315 }
316
317 /* First line of the file is a record count. */
318 if (fscanf(file, "<<%d>>\n", &num_elements) != 1)
319 ereport(ERROR,
320 (errcode_for_file_access(),
321 errmsg("could not read from file \"%s\": %m",
322 AUTOPREWARM_FILE)));
323
324 /* Allocate a dynamic shared memory segment to store the record data. */
325 seg = dsm_create(sizeof(BlockInfoRecord) * num_elements, 0);
326 blkinfo = (BlockInfoRecord *) dsm_segment_address(seg);
327
328 /* Read records, one per line. */
329 for (i = 0; i < num_elements; i++)
330 {
331 unsigned forknum;
332
333 if (fscanf(file, "%u,%u,%u,%u,%u\n", &blkinfo[i].database,
334 &blkinfo[i].tablespace, &blkinfo[i].filenode,
335 &forknum, &blkinfo[i].blocknum) != 5)
336 ereport(ERROR,
337 (errmsg("autoprewarm block dump file is corrupted at line %d",
338 i + 1)));
339 blkinfo[i].forknum = forknum;
340 }
341
342 FreeFile(file);
343
344 /* Sort the blocks to be loaded. */
345 pg_qsort(blkinfo, num_elements, sizeof(BlockInfoRecord),
346 apw_compare_blockinfo);
347
348 /* Populate shared memory state. */
349 apw_state->block_info_handle = dsm_segment_handle(seg);
350 apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx = 0;
351 apw_state->prewarmed_blocks = 0;
352
353 /* Get the info position of the first block of the next database. */
354 while (apw_state->prewarm_start_idx < num_elements)
355 {
356 int j = apw_state->prewarm_start_idx;
357 Oid current_db = blkinfo[j].database;
358
359 /*
360 * Advance the prewarm_stop_idx to the first BlockInfoRecord that does
361 * not belong to this database.
362 */
363 j++;
364 while (j < num_elements)
365 {
366 if (current_db != blkinfo[j].database)
367 {
368 /*
369 * Combine BlockInfoRecords for global objects with those of
370 * the database.
371 */
372 if (current_db != InvalidOid)
373 break;
374 current_db = blkinfo[j].database;
375 }
376
377 j++;
378 }
379
380 /*
381 * If we reach this point with current_db == InvalidOid, then only
382 * BlockInfoRecords belonging to global objects exist. We can't
383 * prewarm without a database connection, so just bail out.
384 */
385 if (current_db == InvalidOid)
386 break;
387
388 /* Configure stop point and database for next per-database worker. */
389 apw_state->prewarm_stop_idx = j;
390 apw_state->database = current_db;
391 Assert(apw_state->prewarm_start_idx < apw_state->prewarm_stop_idx);
392
393 /* If we've run out of free buffers, don't launch another worker. */
394 if (!have_free_buffer())
395 break;
396
397 /*
398 * Likewise, don't launch if we've already been told to shut down.
399 * (The launch would fail anyway, but we might as well skip it.)
400 */
401 if (ShutdownRequestPending)
402 break;
403
404 /*
405 * Start a per-database worker to load blocks for this database; this
406 * function will return once the per-database worker exits.
407 */
408 apw_start_database_worker();
409
410 /* Prepare for next database. */
411 apw_state->prewarm_start_idx = apw_state->prewarm_stop_idx;
412 }
413
414 /* Clean up. */
415 dsm_detach(seg);
416 LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
417 apw_state->block_info_handle = DSM_HANDLE_INVALID;
418 apw_state->pid_using_dumpfile = InvalidPid;
419 LWLockRelease(&apw_state->lock);
420
421 /* Report our success, if we were able to finish. */
422 if (!ShutdownRequestPending)
423 ereport(LOG,
424 (errmsg("autoprewarm successfully prewarmed %d of %d previously-loaded blocks",
425 apw_state->prewarmed_blocks, num_elements)));
426 }
427
428 /*
429 * Prewarm all blocks for one database (and possibly also global objects, if
430 * those got grouped with this database).
431 */
432 void
autoprewarm_database_main(Datum main_arg)433 autoprewarm_database_main(Datum main_arg)
434 {
435 int pos;
436 BlockInfoRecord *block_info;
437 Relation rel = NULL;
438 BlockNumber nblocks = 0;
439 BlockInfoRecord *old_blk = NULL;
440 dsm_segment *seg;
441
442 /* Establish signal handlers; once that's done, unblock signals. */
443 pqsignal(SIGTERM, die);
444 BackgroundWorkerUnblockSignals();
445
446 /* Connect to correct database and get block information. */
447 apw_init_shmem();
448 seg = dsm_attach(apw_state->block_info_handle);
449 if (seg == NULL)
450 ereport(ERROR,
451 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
452 errmsg("could not map dynamic shared memory segment")));
453 BackgroundWorkerInitializeConnectionByOid(apw_state->database, InvalidOid, 0);
454 block_info = (BlockInfoRecord *) dsm_segment_address(seg);
455 pos = apw_state->prewarm_start_idx;
456
457 /*
458 * Loop until we run out of blocks to prewarm or until we run out of free
459 * buffers.
460 */
461 while (pos < apw_state->prewarm_stop_idx && have_free_buffer())
462 {
463 BlockInfoRecord *blk = &block_info[pos++];
464 Buffer buf;
465
466 CHECK_FOR_INTERRUPTS();
467
468 /*
469 * Quit if we've reached records for another database. If previous
470 * blocks are of some global objects, then continue pre-warming.
471 */
472 if (old_blk != NULL && old_blk->database != blk->database &&
473 old_blk->database != 0)
474 break;
475
476 /*
477 * As soon as we encounter a block of a new relation, close the old
478 * relation. Note that rel will be NULL if try_relation_open failed
479 * previously; in that case, there is nothing to close.
480 */
481 if (old_blk != NULL && old_blk->filenode != blk->filenode &&
482 rel != NULL)
483 {
484 relation_close(rel, AccessShareLock);
485 rel = NULL;
486 CommitTransactionCommand();
487 }
488
489 /*
490 * Try to open each new relation, but only once, when we first
491 * encounter it. If it's been dropped, skip the associated blocks.
492 */
493 if (old_blk == NULL || old_blk->filenode != blk->filenode)
494 {
495 Oid reloid;
496
497 Assert(rel == NULL);
498 StartTransactionCommand();
499 reloid = RelidByRelfilenode(blk->tablespace, blk->filenode);
500 if (OidIsValid(reloid))
501 rel = try_relation_open(reloid, AccessShareLock);
502
503 if (!rel)
504 CommitTransactionCommand();
505 }
506 if (!rel)
507 {
508 old_blk = blk;
509 continue;
510 }
511
512 /* Once per fork, check for fork existence and size. */
513 if (old_blk == NULL ||
514 old_blk->filenode != blk->filenode ||
515 old_blk->forknum != blk->forknum)
516 {
517 RelationOpenSmgr(rel);
518
519 /*
520 * smgrexists is not safe for illegal forknum, hence check whether
521 * the passed forknum is valid before using it in smgrexists.
522 */
523 if (blk->forknum > InvalidForkNumber &&
524 blk->forknum <= MAX_FORKNUM &&
525 smgrexists(rel->rd_smgr, blk->forknum))
526 nblocks = RelationGetNumberOfBlocksInFork(rel, blk->forknum);
527 else
528 nblocks = 0;
529 }
530
531 /* Check whether blocknum is valid and within fork file size. */
532 if (blk->blocknum >= nblocks)
533 {
534 /* Move to next forknum. */
535 old_blk = blk;
536 continue;
537 }
538
539 /* Prewarm buffer. */
540 buf = ReadBufferExtended(rel, blk->forknum, blk->blocknum, RBM_NORMAL,
541 NULL);
542 if (BufferIsValid(buf))
543 {
544 apw_state->prewarmed_blocks++;
545 ReleaseBuffer(buf);
546 }
547
548 old_blk = blk;
549 }
550
551 dsm_detach(seg);
552
553 /* Release lock on previous relation. */
554 if (rel)
555 {
556 relation_close(rel, AccessShareLock);
557 CommitTransactionCommand();
558 }
559 }
560
561 /*
562 * Dump information on blocks in shared buffers. We use a text format here
563 * so that it's easy to understand and even change the file contents if
564 * necessary.
565 * Returns the number of blocks dumped.
566 */
567 static int
apw_dump_now(bool is_bgworker,bool dump_unlogged)568 apw_dump_now(bool is_bgworker, bool dump_unlogged)
569 {
570 int num_blocks;
571 int i;
572 int ret;
573 BlockInfoRecord *block_info_array;
574 BufferDesc *bufHdr;
575 FILE *file;
576 char transient_dump_file_path[MAXPGPATH];
577 pid_t pid;
578
579 LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
580 pid = apw_state->pid_using_dumpfile;
581 if (apw_state->pid_using_dumpfile == InvalidPid)
582 apw_state->pid_using_dumpfile = MyProcPid;
583 LWLockRelease(&apw_state->lock);
584
585 if (pid != InvalidPid)
586 {
587 if (!is_bgworker)
588 ereport(ERROR,
589 (errmsg("could not perform block dump because dump file is being used by PID %lu",
590 (unsigned long) apw_state->pid_using_dumpfile)));
591
592 ereport(LOG,
593 (errmsg("skipping block dump because it is already being performed by PID %lu",
594 (unsigned long) apw_state->pid_using_dumpfile)));
595 return 0;
596 }
597
598 block_info_array =
599 (BlockInfoRecord *) palloc(sizeof(BlockInfoRecord) * NBuffers);
600
601 for (num_blocks = 0, i = 0; i < NBuffers; i++)
602 {
603 uint32 buf_state;
604
605 CHECK_FOR_INTERRUPTS();
606
607 bufHdr = GetBufferDescriptor(i);
608
609 /* Lock each buffer header before inspecting. */
610 buf_state = LockBufHdr(bufHdr);
611
612 /*
613 * Unlogged tables will be automatically truncated after a crash or
614 * unclean shutdown. In such cases we need not prewarm them. Dump them
615 * only if requested by caller.
616 */
617 if (buf_state & BM_TAG_VALID &&
618 ((buf_state & BM_PERMANENT) || dump_unlogged))
619 {
620 block_info_array[num_blocks].database = bufHdr->tag.rnode.dbNode;
621 block_info_array[num_blocks].tablespace = bufHdr->tag.rnode.spcNode;
622 block_info_array[num_blocks].filenode = bufHdr->tag.rnode.relNode;
623 block_info_array[num_blocks].forknum = bufHdr->tag.forkNum;
624 block_info_array[num_blocks].blocknum = bufHdr->tag.blockNum;
625 ++num_blocks;
626 }
627
628 UnlockBufHdr(bufHdr, buf_state);
629 }
630
631 snprintf(transient_dump_file_path, MAXPGPATH, "%s.tmp", AUTOPREWARM_FILE);
632 file = AllocateFile(transient_dump_file_path, "w");
633 if (!file)
634 ereport(ERROR,
635 (errcode_for_file_access(),
636 errmsg("could not open file \"%s\": %m",
637 transient_dump_file_path)));
638
639 ret = fprintf(file, "<<%d>>\n", num_blocks);
640 if (ret < 0)
641 {
642 int save_errno = errno;
643
644 FreeFile(file);
645 unlink(transient_dump_file_path);
646 errno = save_errno;
647 ereport(ERROR,
648 (errcode_for_file_access(),
649 errmsg("could not write to file \"%s\": %m",
650 transient_dump_file_path)));
651 }
652
653 for (i = 0; i < num_blocks; i++)
654 {
655 CHECK_FOR_INTERRUPTS();
656
657 ret = fprintf(file, "%u,%u,%u,%u,%u\n",
658 block_info_array[i].database,
659 block_info_array[i].tablespace,
660 block_info_array[i].filenode,
661 (uint32) block_info_array[i].forknum,
662 block_info_array[i].blocknum);
663 if (ret < 0)
664 {
665 int save_errno = errno;
666
667 FreeFile(file);
668 unlink(transient_dump_file_path);
669 errno = save_errno;
670 ereport(ERROR,
671 (errcode_for_file_access(),
672 errmsg("could not write to file \"%s\": %m",
673 transient_dump_file_path)));
674 }
675 }
676
677 pfree(block_info_array);
678
679 /*
680 * Rename transient_dump_file_path to AUTOPREWARM_FILE to make things
681 * permanent.
682 */
683 ret = FreeFile(file);
684 if (ret != 0)
685 {
686 int save_errno = errno;
687
688 unlink(transient_dump_file_path);
689 errno = save_errno;
690 ereport(ERROR,
691 (errcode_for_file_access(),
692 errmsg("could not close file \"%s\": %m",
693 transient_dump_file_path)));
694 }
695
696 (void) durable_rename(transient_dump_file_path, AUTOPREWARM_FILE, ERROR);
697 apw_state->pid_using_dumpfile = InvalidPid;
698
699 ereport(DEBUG1,
700 (errmsg_internal("wrote block details for %d blocks", num_blocks)));
701 return num_blocks;
702 }
703
704 /*
705 * SQL-callable function to launch autoprewarm.
706 */
707 Datum
autoprewarm_start_worker(PG_FUNCTION_ARGS)708 autoprewarm_start_worker(PG_FUNCTION_ARGS)
709 {
710 pid_t pid;
711
712 if (!autoprewarm)
713 ereport(ERROR,
714 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
715 errmsg("autoprewarm is disabled")));
716
717 apw_init_shmem();
718 LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
719 pid = apw_state->bgworker_pid;
720 LWLockRelease(&apw_state->lock);
721
722 if (pid != InvalidPid)
723 ereport(ERROR,
724 (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE),
725 errmsg("autoprewarm worker is already running under PID %lu",
726 (unsigned long) pid)));
727
728 apw_start_leader_worker();
729
730 PG_RETURN_VOID();
731 }
732
733 /*
734 * SQL-callable function to perform an immediate block dump.
735 *
736 * Note: this is declared to return int8, as insurance against some
737 * very distant day when we might make NBuffers wider than int.
738 */
739 Datum
autoprewarm_dump_now(PG_FUNCTION_ARGS)740 autoprewarm_dump_now(PG_FUNCTION_ARGS)
741 {
742 int num_blocks;
743
744 apw_init_shmem();
745
746 PG_ENSURE_ERROR_CLEANUP(apw_detach_shmem, 0);
747 {
748 num_blocks = apw_dump_now(false, true);
749 }
750 PG_END_ENSURE_ERROR_CLEANUP(apw_detach_shmem, 0);
751
752 PG_RETURN_INT64((int64) num_blocks);
753 }
754
755 /*
756 * Allocate and initialize autoprewarm related shared memory, if not already
757 * done, and set up backend-local pointer to that state. Returns true if an
758 * existing shared memory segment was found.
759 */
760 static bool
apw_init_shmem(void)761 apw_init_shmem(void)
762 {
763 bool found;
764
765 LWLockAcquire(AddinShmemInitLock, LW_EXCLUSIVE);
766 apw_state = ShmemInitStruct("autoprewarm",
767 sizeof(AutoPrewarmSharedState),
768 &found);
769 if (!found)
770 {
771 /* First time through ... */
772 LWLockInitialize(&apw_state->lock, LWLockNewTrancheId());
773 apw_state->bgworker_pid = InvalidPid;
774 apw_state->pid_using_dumpfile = InvalidPid;
775 }
776 LWLockRelease(AddinShmemInitLock);
777
778 LWLockRegisterTranche(apw_state->lock.tranche, "autoprewarm");
779
780 return found;
781 }
782
783 /*
784 * Clear our PID from autoprewarm shared state.
785 */
786 static void
apw_detach_shmem(int code,Datum arg)787 apw_detach_shmem(int code, Datum arg)
788 {
789 LWLockAcquire(&apw_state->lock, LW_EXCLUSIVE);
790 if (apw_state->pid_using_dumpfile == MyProcPid)
791 apw_state->pid_using_dumpfile = InvalidPid;
792 if (apw_state->bgworker_pid == MyProcPid)
793 apw_state->bgworker_pid = InvalidPid;
794 LWLockRelease(&apw_state->lock);
795 }
796
797 /*
798 * Start autoprewarm leader worker process.
799 */
800 static void
apw_start_leader_worker(void)801 apw_start_leader_worker(void)
802 {
803 BackgroundWorker worker;
804 BackgroundWorkerHandle *handle;
805 BgwHandleStatus status;
806 pid_t pid;
807
808 MemSet(&worker, 0, sizeof(BackgroundWorker));
809 worker.bgw_flags = BGWORKER_SHMEM_ACCESS;
810 worker.bgw_start_time = BgWorkerStart_ConsistentState;
811 strcpy(worker.bgw_library_name, "pg_prewarm");
812 strcpy(worker.bgw_function_name, "autoprewarm_main");
813 strcpy(worker.bgw_name, "autoprewarm leader");
814 strcpy(worker.bgw_type, "autoprewarm leader");
815
816 if (process_shared_preload_libraries_in_progress)
817 {
818 RegisterBackgroundWorker(&worker);
819 return;
820 }
821
822 /* must set notify PID to wait for startup */
823 worker.bgw_notify_pid = MyProcPid;
824
825 if (!RegisterDynamicBackgroundWorker(&worker, &handle))
826 ereport(ERROR,
827 (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
828 errmsg("could not register background process"),
829 errhint("You may need to increase max_worker_processes.")));
830
831 status = WaitForBackgroundWorkerStartup(handle, &pid);
832 if (status != BGWH_STARTED)
833 ereport(ERROR,
834 (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
835 errmsg("could not start background process"),
836 errhint("More details may be available in the server log.")));
837 }
838
839 /*
840 * Start autoprewarm per-database worker process.
841 */
842 static void
apw_start_database_worker(void)843 apw_start_database_worker(void)
844 {
845 BackgroundWorker worker;
846 BackgroundWorkerHandle *handle;
847
848 MemSet(&worker, 0, sizeof(BackgroundWorker));
849 worker.bgw_flags =
850 BGWORKER_SHMEM_ACCESS | BGWORKER_BACKEND_DATABASE_CONNECTION;
851 worker.bgw_start_time = BgWorkerStart_ConsistentState;
852 worker.bgw_restart_time = BGW_NEVER_RESTART;
853 strcpy(worker.bgw_library_name, "pg_prewarm");
854 strcpy(worker.bgw_function_name, "autoprewarm_database_main");
855 strcpy(worker.bgw_name, "autoprewarm worker");
856 strcpy(worker.bgw_type, "autoprewarm worker");
857
858 /* must set notify PID to wait for shutdown */
859 worker.bgw_notify_pid = MyProcPid;
860
861 if (!RegisterDynamicBackgroundWorker(&worker, &handle))
862 ereport(ERROR,
863 (errcode(ERRCODE_INSUFFICIENT_RESOURCES),
864 errmsg("registering dynamic bgworker autoprewarm failed"),
865 errhint("Consider increasing configuration parameter \"max_worker_processes\".")));
866
867 /*
868 * Ignore return value; if it fails, postmaster has died, but we have
869 * checks for that elsewhere.
870 */
871 WaitForBackgroundWorkerShutdown(handle);
872 }
873
874 /* Compare member elements to check whether they are not equal. */
875 #define cmp_member_elem(fld) \
876 do { \
877 if (a->fld < b->fld) \
878 return -1; \
879 else if (a->fld > b->fld) \
880 return 1; \
881 } while(0)
882
883 /*
884 * apw_compare_blockinfo
885 *
886 * We depend on all records for a particular database being consecutive
887 * in the dump file; each per-database worker will preload blocks until
888 * it sees a block for some other database. Sorting by tablespace,
889 * filenode, forknum, and blocknum isn't critical for correctness, but
890 * helps us get a sequential I/O pattern.
891 */
892 static int
apw_compare_blockinfo(const void * p,const void * q)893 apw_compare_blockinfo(const void *p, const void *q)
894 {
895 const BlockInfoRecord *a = (const BlockInfoRecord *) p;
896 const BlockInfoRecord *b = (const BlockInfoRecord *) q;
897
898 cmp_member_elem(database);
899 cmp_member_elem(tablespace);
900 cmp_member_elem(filenode);
901 cmp_member_elem(forknum);
902 cmp_member_elem(blocknum);
903
904 return 0;
905 }
906