1 /*-------------------------------------------------------------------------
2 *
3 * win32_shmem.c
4 * Implement shared memory using win32 facilities
5 *
6 * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/port/win32_shmem.c
10 *
11 *-------------------------------------------------------------------------
12 */
13 #include "postgres.h"
14
15 #include "miscadmin.h"
16 #include "storage/dsm.h"
17 #include "storage/ipc.h"
18 #include "storage/pg_shmem.h"
19
20 /*
21 * Early in a process's life, Windows asynchronously creates threads for the
22 * process's "default thread pool"
23 * (https://docs.microsoft.com/en-us/windows/desktop/ProcThread/thread-pools).
24 * Occasionally, thread creation allocates a stack after
25 * PGSharedMemoryReAttach() has released UsedShmemSegAddr and before it has
26 * mapped shared memory at UsedShmemSegAddr. This would cause mapping to fail
27 * if the allocator preferred the just-released region for allocating the new
28 * thread stack. We observed such failures in some Windows Server 2016
29 * configurations. To give the system another region to prefer, reserve and
30 * release an additional, protective region immediately before reserving or
31 * releasing shared memory. The idea is that, if the allocator handed out
32 * REGION1 pages before REGION2 pages at one occasion, it will do so whenever
33 * both regions are free. Windows Server 2016 exhibits that behavior, and a
34 * system behaving differently would have less need to protect
35 * UsedShmemSegAddr. The protective region must be at least large enough for
36 * one thread stack. However, ten times as much is less than 2% of the 32-bit
37 * address space and is negligible relative to the 64-bit address space.
38 */
39 #define PROTECTIVE_REGION_SIZE (10 * WIN32_STACK_RLIMIT)
40 void *ShmemProtectiveRegion = NULL;
41
42 HANDLE UsedShmemSegID = INVALID_HANDLE_VALUE;
43 void *UsedShmemSegAddr = NULL;
44 static Size UsedShmemSegSize = 0;
45
46 static bool EnableLockPagesPrivilege(int elevel);
47 static void pgwin32_SharedMemoryDelete(int status, Datum shmId);
48
49 /*
50 * Generate shared memory segment name. Expand the data directory, to generate
51 * an identifier unique for this data directory. Then replace all backslashes
52 * with forward slashes, since backslashes aren't permitted in global object names.
53 *
54 * Store the shared memory segment in the Global\ namespace (requires NT2 TSE or
55 * 2000, but that's all we support for other reasons as well), to make sure you can't
56 * open two postmasters in different sessions against the same data directory.
57 *
58 * XXX: What happens with junctions? It's only someone breaking things on purpose,
59 * and this is still better than before, but we might want to do something about
60 * that sometime in the future.
61 */
62 static char *
GetSharedMemName(void)63 GetSharedMemName(void)
64 {
65 char *retptr;
66 DWORD bufsize;
67 DWORD r;
68 char *cp;
69
70 bufsize = GetFullPathName(DataDir, 0, NULL, NULL);
71 if (bufsize == 0)
72 elog(FATAL, "could not get size for full pathname of datadir %s: error code %lu",
73 DataDir, GetLastError());
74
75 retptr = malloc(bufsize + 18); /* 18 for Global\PostgreSQL: */
76 if (retptr == NULL)
77 elog(FATAL, "could not allocate memory for shared memory name");
78
79 strcpy(retptr, "Global\\PostgreSQL:");
80 r = GetFullPathName(DataDir, bufsize, retptr + 18, NULL);
81 if (r == 0 || r > bufsize)
82 elog(FATAL, "could not generate full pathname for datadir %s: error code %lu",
83 DataDir, GetLastError());
84
85 /*
86 * XXX: Intentionally overwriting the Global\ part here. This was not the
87 * original approach, but putting it in the actual Global\ namespace
88 * causes permission errors in a lot of cases, so we leave it in the
89 * default namespace for now.
90 */
91 for (cp = retptr; *cp; cp++)
92 if (*cp == '\\')
93 *cp = '/';
94
95 return retptr;
96 }
97
98
99 /*
100 * PGSharedMemoryIsInUse
101 *
102 * Is a previously-existing shmem segment still existing and in use?
103 *
104 * The point of this exercise is to detect the case where a prior postmaster
105 * crashed, but it left child backends that are still running. Therefore
106 * we only care about shmem segments that are associated with the intended
107 * DataDir. This is an important consideration since accidental matches of
108 * shmem segment IDs are reasonably common.
109 */
110 bool
PGSharedMemoryIsInUse(unsigned long id1,unsigned long id2)111 PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
112 {
113 char *szShareMem;
114 HANDLE hmap;
115
116 szShareMem = GetSharedMemName();
117
118 hmap = OpenFileMapping(FILE_MAP_READ, FALSE, szShareMem);
119
120 free(szShareMem);
121
122 if (hmap == NULL)
123 return false;
124
125 CloseHandle(hmap);
126 return true;
127 }
128
129 /*
130 * EnableLockPagesPrivilege
131 *
132 * Try to acquire SeLockMemoryPrivilege so we can use large pages.
133 */
134 static bool
EnableLockPagesPrivilege(int elevel)135 EnableLockPagesPrivilege(int elevel)
136 {
137 HANDLE hToken;
138 TOKEN_PRIVILEGES tp;
139 LUID luid;
140
141 if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
142 {
143 ereport(elevel,
144 (errmsg("could not enable user right \"%s\": error code %lu",
145
146 /*
147 * translator: This is a term from Windows and should be translated to
148 * match the Windows localization.
149 */
150 _("Lock pages in memory"),
151 GetLastError()),
152 errdetail("Failed system call was %s.", "OpenProcessToken")));
153 return FALSE;
154 }
155
156 if (!LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luid))
157 {
158 ereport(elevel,
159 (errmsg("could not enable user right \"%s\": error code %lu", _("Lock pages in memory"), GetLastError()),
160 errdetail("Failed system call was %s.", "LookupPrivilegeValue")));
161 CloseHandle(hToken);
162 return FALSE;
163 }
164 tp.PrivilegeCount = 1;
165 tp.Privileges[0].Luid = luid;
166 tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
167
168 if (!AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL))
169 {
170 ereport(elevel,
171 (errmsg("could not enable user right \"%s\": error code %lu", _("Lock pages in memory"), GetLastError()),
172 errdetail("Failed system call was %s.", "AdjustTokenPrivileges")));
173 CloseHandle(hToken);
174 return FALSE;
175 }
176
177 if (GetLastError() != ERROR_SUCCESS)
178 {
179 if (GetLastError() == ERROR_NOT_ALL_ASSIGNED)
180 ereport(elevel,
181 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
182 errmsg("could not enable user right \"%s\"", _("Lock pages in memory")),
183 errhint("Assign user right \"%s\" to the Windows user account which runs PostgreSQL.",
184 _("Lock pages in memory"))));
185 else
186 ereport(elevel,
187 (errmsg("could not enable user right \"%s\": error code %lu", _("Lock pages in memory"), GetLastError()),
188 errdetail("Failed system call was %s.", "AdjustTokenPrivileges")));
189 CloseHandle(hToken);
190 return FALSE;
191 }
192
193 CloseHandle(hToken);
194
195 return TRUE;
196 }
197
198 /*
199 * PGSharedMemoryCreate
200 *
201 * Create a shared memory segment of the given size and initialize its
202 * standard header.
203 */
204 PGShmemHeader *
PGSharedMemoryCreate(Size size,PGShmemHeader ** shim)205 PGSharedMemoryCreate(Size size,
206 PGShmemHeader **shim)
207 {
208 void *memAddress;
209 PGShmemHeader *hdr;
210 HANDLE hmap,
211 hmap2;
212 char *szShareMem;
213 int i;
214 DWORD size_high;
215 DWORD size_low;
216 SIZE_T largePageSize = 0;
217 Size orig_size = size;
218 DWORD flProtect = PAGE_READWRITE;
219
220 ShmemProtectiveRegion = VirtualAlloc(NULL, PROTECTIVE_REGION_SIZE,
221 MEM_RESERVE, PAGE_NOACCESS);
222 if (ShmemProtectiveRegion == NULL)
223 elog(FATAL, "could not reserve memory region: error code %lu",
224 GetLastError());
225
226 /* Room for a header? */
227 Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
228
229 szShareMem = GetSharedMemName();
230
231 UsedShmemSegAddr = NULL;
232
233 if (huge_pages == HUGE_PAGES_ON || huge_pages == HUGE_PAGES_TRY)
234 {
235 /* Does the processor support large pages? */
236 largePageSize = GetLargePageMinimum();
237 if (largePageSize == 0)
238 {
239 ereport(huge_pages == HUGE_PAGES_ON ? FATAL : DEBUG1,
240 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
241 errmsg("the processor does not support large pages")));
242 ereport(DEBUG1,
243 (errmsg_internal("disabling huge pages")));
244 }
245 else if (!EnableLockPagesPrivilege(huge_pages == HUGE_PAGES_ON ? FATAL : DEBUG1))
246 {
247 ereport(DEBUG1,
248 (errmsg_internal("disabling huge pages")));
249 }
250 else
251 {
252 /* Huge pages available and privilege enabled, so turn on */
253 flProtect = PAGE_READWRITE | SEC_COMMIT | SEC_LARGE_PAGES;
254
255 /* Round size up as appropriate. */
256 if (size % largePageSize != 0)
257 size += largePageSize - (size % largePageSize);
258 }
259 }
260
261 retry:
262 #ifdef _WIN64
263 size_high = size >> 32;
264 #else
265 size_high = 0;
266 #endif
267 size_low = (DWORD) size;
268
269 /*
270 * When recycling a shared memory segment, it may take a short while
271 * before it gets dropped from the global namespace. So re-try after
272 * sleeping for a second, and continue retrying 10 times. (both the 1
273 * second time and the 10 retries are completely arbitrary)
274 */
275 for (i = 0; i < 10; i++)
276 {
277 /*
278 * In case CreateFileMapping() doesn't set the error code to 0 on
279 * success
280 */
281 SetLastError(0);
282
283 hmap = CreateFileMapping(INVALID_HANDLE_VALUE, /* Use the pagefile */
284 NULL, /* Default security attrs */
285 flProtect,
286 size_high, /* Size Upper 32 Bits */
287 size_low, /* Size Lower 32 bits */
288 szShareMem);
289
290 if (!hmap)
291 {
292 if (GetLastError() == ERROR_NO_SYSTEM_RESOURCES &&
293 huge_pages == HUGE_PAGES_TRY &&
294 (flProtect & SEC_LARGE_PAGES) != 0)
295 {
296 elog(DEBUG1, "CreateFileMapping(%zu) with SEC_LARGE_PAGES failed, "
297 "huge pages disabled",
298 size);
299
300 /*
301 * Use the original size, not the rounded-up value, when
302 * falling back to non-huge pages.
303 */
304 size = orig_size;
305 flProtect = PAGE_READWRITE;
306 goto retry;
307 }
308 else
309 ereport(FATAL,
310 (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
311 errdetail("Failed system call was CreateFileMapping(size=%zu, name=%s).",
312 size, szShareMem)));
313 }
314
315 /*
316 * If the segment already existed, CreateFileMapping() will return a
317 * handle to the existing one and set ERROR_ALREADY_EXISTS.
318 */
319 if (GetLastError() == ERROR_ALREADY_EXISTS)
320 {
321 CloseHandle(hmap); /* Close the handle, since we got a valid one
322 * to the previous segment. */
323 hmap = NULL;
324 Sleep(1000);
325 continue;
326 }
327 break;
328 }
329
330 /*
331 * If the last call in the loop still returned ERROR_ALREADY_EXISTS, this
332 * shared memory segment exists and we assume it belongs to somebody else.
333 */
334 if (!hmap)
335 ereport(FATAL,
336 (errmsg("pre-existing shared memory block is still in use"),
337 errhint("Check if there are any old server processes still running, and terminate them.")));
338
339 free(szShareMem);
340
341 /*
342 * Make the handle inheritable
343 */
344 if (!DuplicateHandle(GetCurrentProcess(), hmap, GetCurrentProcess(), &hmap2, 0, TRUE, DUPLICATE_SAME_ACCESS))
345 ereport(FATAL,
346 (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
347 errdetail("Failed system call was DuplicateHandle.")));
348
349 /*
350 * Close the old, non-inheritable handle. If this fails we don't really
351 * care.
352 */
353 if (!CloseHandle(hmap))
354 elog(LOG, "could not close handle to shared memory: error code %lu", GetLastError());
355
356
357 /*
358 * Get a pointer to the new shared memory segment. Map the whole segment
359 * at once, and let the system decide on the initial address.
360 */
361 memAddress = MapViewOfFileEx(hmap2, FILE_MAP_WRITE | FILE_MAP_READ, 0, 0, 0, NULL);
362 if (!memAddress)
363 ereport(FATAL,
364 (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
365 errdetail("Failed system call was MapViewOfFileEx.")));
366
367
368
369 /*
370 * OK, we created a new segment. Mark it as created by this process. The
371 * order of assignments here is critical so that another Postgres process
372 * can't see the header as valid but belonging to an invalid PID!
373 */
374 hdr = (PGShmemHeader *) memAddress;
375 hdr->creatorPID = getpid();
376 hdr->magic = PGShmemMagic;
377
378 /*
379 * Initialize space allocation status for segment.
380 */
381 hdr->totalsize = size;
382 hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
383 hdr->dsm_control = 0;
384
385 /* Save info for possible future use */
386 UsedShmemSegAddr = memAddress;
387 UsedShmemSegSize = size;
388 UsedShmemSegID = hmap2;
389
390 /* Register on-exit routine to delete the new segment */
391 on_shmem_exit(pgwin32_SharedMemoryDelete, PointerGetDatum(hmap2));
392
393 *shim = hdr;
394 return hdr;
395 }
396
397 /*
398 * PGSharedMemoryReAttach
399 *
400 * This is called during startup of a postmaster child process to re-attach to
401 * an already existing shared memory segment, using the handle inherited from
402 * the postmaster.
403 *
404 * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
405 * parameters to this routine. The caller must have already restored them to
406 * the postmaster's values.
407 */
408 void
PGSharedMemoryReAttach(void)409 PGSharedMemoryReAttach(void)
410 {
411 PGShmemHeader *hdr;
412 void *origUsedShmemSegAddr = UsedShmemSegAddr;
413
414 Assert(ShmemProtectiveRegion != NULL);
415 Assert(UsedShmemSegAddr != NULL);
416 Assert(IsUnderPostmaster);
417
418 /*
419 * Release memory region reservations made by the postmaster
420 */
421 if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
422 elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
423 ShmemProtectiveRegion, GetLastError());
424 if (VirtualFree(UsedShmemSegAddr, 0, MEM_RELEASE) == 0)
425 elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
426 UsedShmemSegAddr, GetLastError());
427
428 hdr = (PGShmemHeader *) MapViewOfFileEx(UsedShmemSegID, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, 0, UsedShmemSegAddr);
429 if (!hdr)
430 elog(FATAL, "could not reattach to shared memory (key=%p, addr=%p): error code %lu",
431 UsedShmemSegID, UsedShmemSegAddr, GetLastError());
432 if (hdr != origUsedShmemSegAddr)
433 elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
434 hdr, origUsedShmemSegAddr);
435 if (hdr->magic != PGShmemMagic)
436 elog(FATAL, "reattaching to shared memory returned non-PostgreSQL memory");
437 dsm_set_control_handle(hdr->dsm_control);
438
439 UsedShmemSegAddr = hdr; /* probably redundant */
440 }
441
442 /*
443 * PGSharedMemoryNoReAttach
444 *
445 * This is called during startup of a postmaster child process when we choose
446 * *not* to re-attach to the existing shared memory segment. We must clean up
447 * to leave things in the appropriate state.
448 *
449 * The child process startup logic might or might not call PGSharedMemoryDetach
450 * after this; make sure that it will be a no-op if called.
451 *
452 * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
453 * parameters to this routine. The caller must have already restored them to
454 * the postmaster's values.
455 */
456 void
PGSharedMemoryNoReAttach(void)457 PGSharedMemoryNoReAttach(void)
458 {
459 Assert(ShmemProtectiveRegion != NULL);
460 Assert(UsedShmemSegAddr != NULL);
461 Assert(IsUnderPostmaster);
462
463 /*
464 * Under Windows we will not have mapped the segment, so we don't need to
465 * un-map it. Just reset UsedShmemSegAddr to show we're not attached.
466 */
467 UsedShmemSegAddr = NULL;
468
469 /*
470 * We *must* close the inherited shmem segment handle, else Windows will
471 * consider the existence of this process to mean it can't release the
472 * shmem segment yet. We can now use PGSharedMemoryDetach to do that.
473 */
474 PGSharedMemoryDetach();
475 }
476
477 /*
478 * PGSharedMemoryDetach
479 *
480 * Detach from the shared memory segment, if still attached. This is not
481 * intended to be called explicitly by the process that originally created the
482 * segment (it will have an on_shmem_exit callback registered to do that).
483 * Rather, this is for subprocesses that have inherited an attachment and want
484 * to get rid of it.
485 *
486 * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
487 * parameters to this routine.
488 */
489 void
PGSharedMemoryDetach(void)490 PGSharedMemoryDetach(void)
491 {
492 /*
493 * Releasing the protective region liberates an unimportant quantity of
494 * address space, but be tidy.
495 */
496 if (ShmemProtectiveRegion != NULL)
497 {
498 if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
499 elog(LOG, "failed to release reserved memory region (addr=%p): error code %lu",
500 ShmemProtectiveRegion, GetLastError());
501
502 ShmemProtectiveRegion = NULL;
503 }
504
505 /* Unmap the view, if it's mapped */
506 if (UsedShmemSegAddr != NULL)
507 {
508 if (!UnmapViewOfFile(UsedShmemSegAddr))
509 elog(LOG, "could not unmap view of shared memory: error code %lu",
510 GetLastError());
511
512 UsedShmemSegAddr = NULL;
513 }
514
515 /* And close the shmem handle, if we have one */
516 if (UsedShmemSegID != INVALID_HANDLE_VALUE)
517 {
518 if (!CloseHandle(UsedShmemSegID))
519 elog(LOG, "could not close handle to shared memory: error code %lu",
520 GetLastError());
521
522 UsedShmemSegID = INVALID_HANDLE_VALUE;
523 }
524 }
525
526
527 /*
528 * pgwin32_SharedMemoryDelete
529 *
530 * Detach from and delete the shared memory segment
531 * (called as an on_shmem_exit callback, hence funny argument list)
532 */
533 static void
pgwin32_SharedMemoryDelete(int status,Datum shmId)534 pgwin32_SharedMemoryDelete(int status, Datum shmId)
535 {
536 Assert(DatumGetPointer(shmId) == UsedShmemSegID);
537 PGSharedMemoryDetach();
538 }
539
540 /*
541 * pgwin32_ReserveSharedMemoryRegion(hChild)
542 *
543 * Reserve the memory region that will be used for shared memory in a child
544 * process. It is called before the child process starts, to make sure the
545 * memory is available.
546 *
547 * Once the child starts, DLLs loading in different order or threads getting
548 * scheduled differently may allocate memory which can conflict with the
549 * address space we need for our shared memory. By reserving the shared
550 * memory region before the child starts, and freeing it only just before we
551 * attempt to get access to the shared memory forces these allocations to
552 * be given different address ranges that don't conflict.
553 *
554 * NOTE! This function executes in the postmaster, and should for this
555 * reason not use elog(FATAL) since that would take down the postmaster.
556 */
557 int
pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)558 pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)
559 {
560 void *address;
561
562 Assert(ShmemProtectiveRegion != NULL);
563 Assert(UsedShmemSegAddr != NULL);
564 Assert(UsedShmemSegSize != 0);
565
566 /* ShmemProtectiveRegion */
567 address = VirtualAllocEx(hChild, ShmemProtectiveRegion,
568 PROTECTIVE_REGION_SIZE,
569 MEM_RESERVE, PAGE_NOACCESS);
570 if (address == NULL)
571 {
572 /* Don't use FATAL since we're running in the postmaster */
573 elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
574 ShmemProtectiveRegion, hChild, GetLastError());
575 return false;
576 }
577 if (address != ShmemProtectiveRegion)
578 {
579 /*
580 * Should never happen - in theory if allocation granularity causes
581 * strange effects it could, so check just in case.
582 *
583 * Don't use FATAL since we're running in the postmaster.
584 */
585 elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
586 address, ShmemProtectiveRegion);
587 return false;
588 }
589
590 /* UsedShmemSegAddr */
591 address = VirtualAllocEx(hChild, UsedShmemSegAddr, UsedShmemSegSize,
592 MEM_RESERVE, PAGE_READWRITE);
593 if (address == NULL)
594 {
595 elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
596 UsedShmemSegAddr, hChild, GetLastError());
597 return false;
598 }
599 if (address != UsedShmemSegAddr)
600 {
601 elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
602 address, UsedShmemSegAddr);
603 return false;
604 }
605
606 return true;
607 }
608