1 /*-------------------------------------------------------------------------
2 *
3 * win32_shmem.c
4 * Implement shared memory using win32 facilities
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/port/win32_shmem.c
10 *
11 *-------------------------------------------------------------------------
12 */
13 #include "postgres.h"
14
15 #include "miscadmin.h"
16 #include "storage/dsm.h"
17 #include "storage/ipc.h"
18 #include "storage/pg_shmem.h"
19
20 /*
21 * Early in a process's life, Windows asynchronously creates threads for the
22 * process's "default thread pool"
23 * (https://docs.microsoft.com/en-us/windows/desktop/ProcThread/thread-pools).
24 * Occasionally, thread creation allocates a stack after
25 * PGSharedMemoryReAttach() has released UsedShmemSegAddr and before it has
26 * mapped shared memory at UsedShmemSegAddr. This would cause mapping to fail
27 * if the allocator preferred the just-released region for allocating the new
28 * thread stack. We observed such failures in some Windows Server 2016
29 * configurations. To give the system another region to prefer, reserve and
30 * release an additional, protective region immediately before reserving or
31 * releasing shared memory. The idea is that, if the allocator handed out
32 * REGION1 pages before REGION2 pages at one occasion, it will do so whenever
33 * both regions are free. Windows Server 2016 exhibits that behavior, and a
34 * system behaving differently would have less need to protect
35 * UsedShmemSegAddr. The protective region must be at least large enough for
36 * one thread stack. However, ten times as much is less than 2% of the 32-bit
37 * address space and is negligible relative to the 64-bit address space.
38 */
39 #define PROTECTIVE_REGION_SIZE (10 * WIN32_STACK_RLIMIT)
40 void *ShmemProtectiveRegion = NULL;
41
42 HANDLE UsedShmemSegID = INVALID_HANDLE_VALUE;
43 void *UsedShmemSegAddr = NULL;
44 static Size UsedShmemSegSize = 0;
45
46 static bool EnableLockPagesPrivilege(int elevel);
47 static void pgwin32_SharedMemoryDelete(int status, Datum shmId);
48
49 /*
50 * Generate shared memory segment name. Expand the data directory, to generate
51 * an identifier unique for this data directory. Then replace all backslashes
52 * with forward slashes, since backslashes aren't permitted in global object names.
53 *
54 * Store the shared memory segment in the Global\ namespace (requires NT2 TSE or
55 * 2000, but that's all we support for other reasons as well), to make sure you can't
56 * open two postmasters in different sessions against the same data directory.
57 *
58 * XXX: What happens with junctions? It's only someone breaking things on purpose,
59 * and this is still better than before, but we might want to do something about
60 * that sometime in the future.
61 */
62 static char *
GetSharedMemName(void)63 GetSharedMemName(void)
64 {
65 char *retptr;
66 DWORD bufsize;
67 DWORD r;
68 char *cp;
69
70 bufsize = GetFullPathName(DataDir, 0, NULL, NULL);
71 if (bufsize == 0)
72 elog(FATAL, "could not get size for full pathname of datadir %s: error code %lu",
73 DataDir, GetLastError());
74
75 retptr = malloc(bufsize + 18); /* 18 for Global\PostgreSQL: */
76 if (retptr == NULL)
77 elog(FATAL, "could not allocate memory for shared memory name");
78
79 strcpy(retptr, "Global\\PostgreSQL:");
80 r = GetFullPathName(DataDir, bufsize, retptr + 18, NULL);
81 if (r == 0 || r > bufsize)
82 elog(FATAL, "could not generate full pathname for datadir %s: error code %lu",
83 DataDir, GetLastError());
84
85 /*
86 * XXX: Intentionally overwriting the Global\ part here. This was not the
87 * original approach, but putting it in the actual Global\ namespace
88 * causes permission errors in a lot of cases, so we leave it in the
89 * default namespace for now.
90 */
91 for (cp = retptr; *cp; cp++)
92 if (*cp == '\\')
93 *cp = '/';
94
95 return retptr;
96 }
97
98
99 /*
100 * PGSharedMemoryIsInUse
101 *
102 * Is a previously-existing shmem segment still existing and in use?
103 *
104 * The point of this exercise is to detect the case where a prior postmaster
105 * crashed, but it left child backends that are still running. Therefore
106 * we only care about shmem segments that are associated with the intended
107 * DataDir. This is an important consideration since accidental matches of
108 * shmem segment IDs are reasonably common.
109 */
110 bool
PGSharedMemoryIsInUse(unsigned long id1,unsigned long id2)111 PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
112 {
113 char *szShareMem;
114 HANDLE hmap;
115
116 szShareMem = GetSharedMemName();
117
118 hmap = OpenFileMapping(FILE_MAP_READ, FALSE, szShareMem);
119
120 free(szShareMem);
121
122 if (hmap == NULL)
123 return false;
124
125 CloseHandle(hmap);
126 return true;
127 }
128
129 /*
130 * EnableLockPagesPrivilege
131 *
132 * Try to acquire SeLockMemoryPrivilege so we can use large pages.
133 */
134 static bool
EnableLockPagesPrivilege(int elevel)135 EnableLockPagesPrivilege(int elevel)
136 {
137 HANDLE hToken;
138 TOKEN_PRIVILEGES tp;
139 LUID luid;
140
141 if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
142 {
143 ereport(elevel,
144 (errmsg("could not enable Lock Pages in Memory user right: error code %lu", GetLastError()),
145 errdetail("Failed system call was %s.", "OpenProcessToken")));
146 return FALSE;
147 }
148
149 if (!LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luid))
150 {
151 ereport(elevel,
152 (errmsg("could not enable Lock Pages in Memory user right: error code %lu", GetLastError()),
153 errdetail("Failed system call was %s.", "LookupPrivilegeValue")));
154 CloseHandle(hToken);
155 return FALSE;
156 }
157 tp.PrivilegeCount = 1;
158 tp.Privileges[0].Luid = luid;
159 tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
160
161 if (!AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL))
162 {
163 ereport(elevel,
164 (errmsg("could not enable Lock Pages in Memory user right: error code %lu", GetLastError()),
165 errdetail("Failed system call was %s.", "AdjustTokenPrivileges")));
166 CloseHandle(hToken);
167 return FALSE;
168 }
169
170 if (GetLastError() != ERROR_SUCCESS)
171 {
172 if (GetLastError() == ERROR_NOT_ALL_ASSIGNED)
173 ereport(elevel,
174 (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
175 errmsg("could not enable Lock Pages in Memory user right"),
176 errhint("Assign Lock Pages in Memory user right to the Windows user account which runs PostgreSQL.")));
177 else
178 ereport(elevel,
179 (errmsg("could not enable Lock Pages in Memory user right: error code %lu", GetLastError()),
180 errdetail("Failed system call was %s.", "AdjustTokenPrivileges")));
181 CloseHandle(hToken);
182 return FALSE;
183 }
184
185 CloseHandle(hToken);
186
187 return TRUE;
188 }
189
190 /*
191 * PGSharedMemoryCreate
192 *
193 * Create a shared memory segment of the given size and initialize its
194 * standard header.
195 */
196 PGShmemHeader *
PGSharedMemoryCreate(Size size,int port,PGShmemHeader ** shim)197 PGSharedMemoryCreate(Size size, int port,
198 PGShmemHeader **shim)
199 {
200 void *memAddress;
201 PGShmemHeader *hdr;
202 HANDLE hmap,
203 hmap2;
204 char *szShareMem;
205 int i;
206 DWORD size_high;
207 DWORD size_low;
208 SIZE_T largePageSize = 0;
209 Size orig_size = size;
210 DWORD flProtect = PAGE_READWRITE;
211
212 ShmemProtectiveRegion = VirtualAlloc(NULL, PROTECTIVE_REGION_SIZE,
213 MEM_RESERVE, PAGE_NOACCESS);
214 if (ShmemProtectiveRegion == NULL)
215 elog(FATAL, "could not reserve memory region: error code %lu",
216 GetLastError());
217
218 /* Room for a header? */
219 Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
220
221 szShareMem = GetSharedMemName();
222
223 UsedShmemSegAddr = NULL;
224
225 if (huge_pages == HUGE_PAGES_ON || huge_pages == HUGE_PAGES_TRY)
226 {
227 /* Does the processor support large pages? */
228 largePageSize = GetLargePageMinimum();
229 if (largePageSize == 0)
230 {
231 ereport(huge_pages == HUGE_PAGES_ON ? FATAL : DEBUG1,
232 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
233 errmsg("the processor does not support large pages")));
234 ereport(DEBUG1,
235 (errmsg("disabling huge pages")));
236 }
237 else if (!EnableLockPagesPrivilege(huge_pages == HUGE_PAGES_ON ? FATAL : DEBUG1))
238 {
239 ereport(DEBUG1,
240 (errmsg("disabling huge pages")));
241 }
242 else
243 {
244 /* Huge pages available and privilege enabled, so turn on */
245 flProtect = PAGE_READWRITE | SEC_COMMIT | SEC_LARGE_PAGES;
246
247 /* Round size up as appropriate. */
248 if (size % largePageSize != 0)
249 size += largePageSize - (size % largePageSize);
250 }
251 }
252
253 retry:
254 #ifdef _WIN64
255 size_high = size >> 32;
256 #else
257 size_high = 0;
258 #endif
259 size_low = (DWORD) size;
260
261 /*
262 * When recycling a shared memory segment, it may take a short while
263 * before it gets dropped from the global namespace. So re-try after
264 * sleeping for a second, and continue retrying 10 times. (both the 1
265 * second time and the 10 retries are completely arbitrary)
266 */
267 for (i = 0; i < 10; i++)
268 {
269 /*
270 * In case CreateFileMapping() doesn't set the error code to 0 on
271 * success
272 */
273 SetLastError(0);
274
275 hmap = CreateFileMapping(INVALID_HANDLE_VALUE, /* Use the pagefile */
276 NULL, /* Default security attrs */
277 flProtect,
278 size_high, /* Size Upper 32 Bits */
279 size_low, /* Size Lower 32 bits */
280 szShareMem);
281
282 if (!hmap)
283 {
284 if (GetLastError() == ERROR_NO_SYSTEM_RESOURCES &&
285 huge_pages == HUGE_PAGES_TRY &&
286 (flProtect & SEC_LARGE_PAGES) != 0)
287 {
288 elog(DEBUG1, "CreateFileMapping(%zu) with SEC_LARGE_PAGES failed, "
289 "huge pages disabled",
290 size);
291
292 /*
293 * Use the original size, not the rounded-up value, when
294 * falling back to non-huge pages.
295 */
296 size = orig_size;
297 flProtect = PAGE_READWRITE;
298 goto retry;
299 }
300 else
301 ereport(FATAL,
302 (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
303 errdetail("Failed system call was CreateFileMapping(size=%zu, name=%s).",
304 size, szShareMem)));
305 }
306
307 /*
308 * If the segment already existed, CreateFileMapping() will return a
309 * handle to the existing one and set ERROR_ALREADY_EXISTS.
310 */
311 if (GetLastError() == ERROR_ALREADY_EXISTS)
312 {
313 CloseHandle(hmap); /* Close the handle, since we got a valid one
314 * to the previous segment. */
315 hmap = NULL;
316 Sleep(1000);
317 continue;
318 }
319 break;
320 }
321
322 /*
323 * If the last call in the loop still returned ERROR_ALREADY_EXISTS, this
324 * shared memory segment exists and we assume it belongs to somebody else.
325 */
326 if (!hmap)
327 ereport(FATAL,
328 (errmsg("pre-existing shared memory block is still in use"),
329 errhint("Check if there are any old server processes still running, and terminate them.")));
330
331 free(szShareMem);
332
333 /*
334 * Make the handle inheritable
335 */
336 if (!DuplicateHandle(GetCurrentProcess(), hmap, GetCurrentProcess(), &hmap2, 0, TRUE, DUPLICATE_SAME_ACCESS))
337 ereport(FATAL,
338 (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
339 errdetail("Failed system call was DuplicateHandle.")));
340
341 /*
342 * Close the old, non-inheritable handle. If this fails we don't really
343 * care.
344 */
345 if (!CloseHandle(hmap))
346 elog(LOG, "could not close handle to shared memory: error code %lu", GetLastError());
347
348
349 /*
350 * Get a pointer to the new shared memory segment. Map the whole segment
351 * at once, and let the system decide on the initial address.
352 */
353 memAddress = MapViewOfFileEx(hmap2, FILE_MAP_WRITE | FILE_MAP_READ, 0, 0, 0, NULL);
354 if (!memAddress)
355 ereport(FATAL,
356 (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
357 errdetail("Failed system call was MapViewOfFileEx.")));
358
359
360
361 /*
362 * OK, we created a new segment. Mark it as created by this process. The
363 * order of assignments here is critical so that another Postgres process
364 * can't see the header as valid but belonging to an invalid PID!
365 */
366 hdr = (PGShmemHeader *) memAddress;
367 hdr->creatorPID = getpid();
368 hdr->magic = PGShmemMagic;
369
370 /*
371 * Initialize space allocation status for segment.
372 */
373 hdr->totalsize = size;
374 hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
375 hdr->dsm_control = 0;
376
377 /* Save info for possible future use */
378 UsedShmemSegAddr = memAddress;
379 UsedShmemSegSize = size;
380 UsedShmemSegID = hmap2;
381
382 /* Register on-exit routine to delete the new segment */
383 on_shmem_exit(pgwin32_SharedMemoryDelete, PointerGetDatum(hmap2));
384
385 *shim = hdr;
386 return hdr;
387 }
388
389 /*
390 * PGSharedMemoryReAttach
391 *
392 * This is called during startup of a postmaster child process to re-attach to
393 * an already existing shared memory segment, using the handle inherited from
394 * the postmaster.
395 *
396 * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
397 * parameters to this routine. The caller must have already restored them to
398 * the postmaster's values.
399 */
400 void
PGSharedMemoryReAttach(void)401 PGSharedMemoryReAttach(void)
402 {
403 PGShmemHeader *hdr;
404 void *origUsedShmemSegAddr = UsedShmemSegAddr;
405
406 Assert(ShmemProtectiveRegion != NULL);
407 Assert(UsedShmemSegAddr != NULL);
408 Assert(IsUnderPostmaster);
409
410 /*
411 * Release memory region reservations made by the postmaster
412 */
413 if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
414 elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
415 ShmemProtectiveRegion, GetLastError());
416 if (VirtualFree(UsedShmemSegAddr, 0, MEM_RELEASE) == 0)
417 elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
418 UsedShmemSegAddr, GetLastError());
419
420 hdr = (PGShmemHeader *) MapViewOfFileEx(UsedShmemSegID, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, 0, UsedShmemSegAddr);
421 if (!hdr)
422 elog(FATAL, "could not reattach to shared memory (key=%p, addr=%p): error code %lu",
423 UsedShmemSegID, UsedShmemSegAddr, GetLastError());
424 if (hdr != origUsedShmemSegAddr)
425 elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
426 hdr, origUsedShmemSegAddr);
427 if (hdr->magic != PGShmemMagic)
428 elog(FATAL, "reattaching to shared memory returned non-PostgreSQL memory");
429 dsm_set_control_handle(hdr->dsm_control);
430
431 UsedShmemSegAddr = hdr; /* probably redundant */
432 }
433
434 /*
435 * PGSharedMemoryNoReAttach
436 *
437 * This is called during startup of a postmaster child process when we choose
438 * *not* to re-attach to the existing shared memory segment. We must clean up
439 * to leave things in the appropriate state.
440 *
441 * The child process startup logic might or might not call PGSharedMemoryDetach
442 * after this; make sure that it will be a no-op if called.
443 *
444 * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
445 * parameters to this routine. The caller must have already restored them to
446 * the postmaster's values.
447 */
448 void
PGSharedMemoryNoReAttach(void)449 PGSharedMemoryNoReAttach(void)
450 {
451 Assert(ShmemProtectiveRegion != NULL);
452 Assert(UsedShmemSegAddr != NULL);
453 Assert(IsUnderPostmaster);
454
455 /*
456 * Under Windows we will not have mapped the segment, so we don't need to
457 * un-map it. Just reset UsedShmemSegAddr to show we're not attached.
458 */
459 UsedShmemSegAddr = NULL;
460
461 /*
462 * We *must* close the inherited shmem segment handle, else Windows will
463 * consider the existence of this process to mean it can't release the
464 * shmem segment yet. We can now use PGSharedMemoryDetach to do that.
465 */
466 PGSharedMemoryDetach();
467 }
468
469 /*
470 * PGSharedMemoryDetach
471 *
472 * Detach from the shared memory segment, if still attached. This is not
473 * intended to be called explicitly by the process that originally created the
474 * segment (it will have an on_shmem_exit callback registered to do that).
475 * Rather, this is for subprocesses that have inherited an attachment and want
476 * to get rid of it.
477 *
478 * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
479 * parameters to this routine.
480 */
481 void
PGSharedMemoryDetach(void)482 PGSharedMemoryDetach(void)
483 {
484 /*
485 * Releasing the protective region liberates an unimportant quantity of
486 * address space, but be tidy.
487 */
488 if (ShmemProtectiveRegion != NULL)
489 {
490 if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
491 elog(LOG, "failed to release reserved memory region (addr=%p): error code %lu",
492 ShmemProtectiveRegion, GetLastError());
493
494 ShmemProtectiveRegion = NULL;
495 }
496
497 /* Unmap the view, if it's mapped */
498 if (UsedShmemSegAddr != NULL)
499 {
500 if (!UnmapViewOfFile(UsedShmemSegAddr))
501 elog(LOG, "could not unmap view of shared memory: error code %lu",
502 GetLastError());
503
504 UsedShmemSegAddr = NULL;
505 }
506
507 /* And close the shmem handle, if we have one */
508 if (UsedShmemSegID != INVALID_HANDLE_VALUE)
509 {
510 if (!CloseHandle(UsedShmemSegID))
511 elog(LOG, "could not close handle to shared memory: error code %lu",
512 GetLastError());
513
514 UsedShmemSegID = INVALID_HANDLE_VALUE;
515 }
516 }
517
518
519 /*
520 * pgwin32_SharedMemoryDelete
521 *
522 * Detach from and delete the shared memory segment
523 * (called as an on_shmem_exit callback, hence funny argument list)
524 */
525 static void
pgwin32_SharedMemoryDelete(int status,Datum shmId)526 pgwin32_SharedMemoryDelete(int status, Datum shmId)
527 {
528 Assert(DatumGetPointer(shmId) == UsedShmemSegID);
529 PGSharedMemoryDetach();
530 }
531
532 /*
533 * pgwin32_ReserveSharedMemoryRegion(hChild)
534 *
535 * Reserve the memory region that will be used for shared memory in a child
536 * process. It is called before the child process starts, to make sure the
537 * memory is available.
538 *
539 * Once the child starts, DLLs loading in different order or threads getting
540 * scheduled differently may allocate memory which can conflict with the
541 * address space we need for our shared memory. By reserving the shared
542 * memory region before the child starts, and freeing it only just before we
543 * attempt to get access to the shared memory forces these allocations to
544 * be given different address ranges that don't conflict.
545 *
546 * NOTE! This function executes in the postmaster, and should for this
547 * reason not use elog(FATAL) since that would take down the postmaster.
548 */
549 int
pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)550 pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)
551 {
552 void *address;
553
554 Assert(ShmemProtectiveRegion != NULL);
555 Assert(UsedShmemSegAddr != NULL);
556 Assert(UsedShmemSegSize != 0);
557
558 /* ShmemProtectiveRegion */
559 address = VirtualAllocEx(hChild, ShmemProtectiveRegion,
560 PROTECTIVE_REGION_SIZE,
561 MEM_RESERVE, PAGE_NOACCESS);
562 if (address == NULL)
563 {
564 /* Don't use FATAL since we're running in the postmaster */
565 elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
566 ShmemProtectiveRegion, hChild, GetLastError());
567 return false;
568 }
569 if (address != ShmemProtectiveRegion)
570 {
571 /*
572 * Should never happen - in theory if allocation granularity causes
573 * strange effects it could, so check just in case.
574 *
575 * Don't use FATAL since we're running in the postmaster.
576 */
577 elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
578 address, ShmemProtectiveRegion);
579 return false;
580 }
581
582 /* UsedShmemSegAddr */
583 address = VirtualAllocEx(hChild, UsedShmemSegAddr, UsedShmemSegSize,
584 MEM_RESERVE, PAGE_READWRITE);
585 if (address == NULL)
586 {
587 elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
588 UsedShmemSegAddr, hChild, GetLastError());
589 return false;
590 }
591 if (address != UsedShmemSegAddr)
592 {
593 elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
594 address, UsedShmemSegAddr);
595 return false;
596 }
597
598 return true;
599 }
600