1 /*-------------------------------------------------------------------------
2 *
3 * win32_shmem.c
4 * Implement shared memory using win32 facilities
5 *
6 * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7 *
8 * IDENTIFICATION
9 * src/backend/port/win32_shmem.c
10 *
11 *-------------------------------------------------------------------------
12 */
13 #include "postgres.h"
14
15 #include "miscadmin.h"
16 #include "storage/dsm.h"
17 #include "storage/ipc.h"
18 #include "storage/pg_shmem.h"
19
20 /*
21 * Early in a process's life, Windows asynchronously creates threads for the
22 * process's "default thread pool"
23 * (https://docs.microsoft.com/en-us/windows/desktop/ProcThread/thread-pools).
24 * Occasionally, thread creation allocates a stack after
25 * PGSharedMemoryReAttach() has released UsedShmemSegAddr and before it has
26 * mapped shared memory at UsedShmemSegAddr. This would cause mapping to fail
27 * if the allocator preferred the just-released region for allocating the new
28 * thread stack. We observed such failures in some Windows Server 2016
29 * configurations. To give the system another region to prefer, reserve and
30 * release an additional, protective region immediately before reserving or
31 * releasing shared memory. The idea is that, if the allocator handed out
32 * REGION1 pages before REGION2 pages at one occasion, it will do so whenever
33 * both regions are free. Windows Server 2016 exhibits that behavior, and a
34 * system behaving differently would have less need to protect
35 * UsedShmemSegAddr. The protective region must be at least large enough for
36 * one thread stack. However, ten times as much is less than 2% of the 32-bit
37 * address space and is negligible relative to the 64-bit address space.
38 */
39 #define PROTECTIVE_REGION_SIZE (10 * WIN32_STACK_RLIMIT)
40 void *ShmemProtectiveRegion = NULL;
41
42 HANDLE UsedShmemSegID = INVALID_HANDLE_VALUE;
43 void *UsedShmemSegAddr = NULL;
44 static Size UsedShmemSegSize = 0;
45
46 static void pgwin32_SharedMemoryDelete(int status, Datum shmId);
47
48 /*
49 * Generate shared memory segment name. Expand the data directory, to generate
50 * an identifier unique for this data directory. Then replace all backslashes
51 * with forward slashes, since backslashes aren't permitted in global object names.
52 *
53 * Store the shared memory segment in the Global\ namespace (requires NT2 TSE or
54 * 2000, but that's all we support for other reasons as well), to make sure you can't
55 * open two postmasters in different sessions against the same data directory.
56 *
57 * XXX: What happens with junctions? It's only someone breaking things on purpose,
58 * and this is still better than before, but we might want to do something about
59 * that sometime in the future.
60 */
61 static char *
GetSharedMemName(void)62 GetSharedMemName(void)
63 {
64 char *retptr;
65 DWORD bufsize;
66 DWORD r;
67 char *cp;
68
69 bufsize = GetFullPathName(DataDir, 0, NULL, NULL);
70 if (bufsize == 0)
71 elog(FATAL, "could not get size for full pathname of datadir %s: error code %lu",
72 DataDir, GetLastError());
73
74 retptr = malloc(bufsize + 18); /* 18 for Global\PostgreSQL: */
75 if (retptr == NULL)
76 elog(FATAL, "could not allocate memory for shared memory name");
77
78 strcpy(retptr, "Global\\PostgreSQL:");
79 r = GetFullPathName(DataDir, bufsize, retptr + 18, NULL);
80 if (r == 0 || r > bufsize)
81 elog(FATAL, "could not generate full pathname for datadir %s: error code %lu",
82 DataDir, GetLastError());
83
84 /*
85 * XXX: Intentionally overwriting the Global\ part here. This was not the
86 * original approach, but putting it in the actual Global\ namespace
87 * causes permission errors in a lot of cases, so we leave it in the
88 * default namespace for now.
89 */
90 for (cp = retptr; *cp; cp++)
91 if (*cp == '\\')
92 *cp = '/';
93
94 return retptr;
95 }
96
97
98 /*
99 * PGSharedMemoryIsInUse
100 *
101 * Is a previously-existing shmem segment still existing and in use?
102 *
103 * The point of this exercise is to detect the case where a prior postmaster
104 * crashed, but it left child backends that are still running. Therefore
105 * we only care about shmem segments that are associated with the intended
106 * DataDir. This is an important consideration since accidental matches of
107 * shmem segment IDs are reasonably common.
108 */
109 bool
PGSharedMemoryIsInUse(unsigned long id1,unsigned long id2)110 PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
111 {
112 char *szShareMem;
113 HANDLE hmap;
114
115 szShareMem = GetSharedMemName();
116
117 hmap = OpenFileMapping(FILE_MAP_READ, FALSE, szShareMem);
118
119 free(szShareMem);
120
121 if (hmap == NULL)
122 return false;
123
124 CloseHandle(hmap);
125 return true;
126 }
127
128
129 /*
130 * PGSharedMemoryCreate
131 *
132 * Create a shared memory segment of the given size and initialize its
133 * standard header.
134 */
135 PGShmemHeader *
PGSharedMemoryCreate(Size size,int port,PGShmemHeader ** shim)136 PGSharedMemoryCreate(Size size, int port,
137 PGShmemHeader **shim)
138 {
139 void *memAddress;
140 PGShmemHeader *hdr;
141 HANDLE hmap,
142 hmap2;
143 char *szShareMem;
144 int i;
145 DWORD size_high;
146 DWORD size_low;
147
148 if (huge_pages == HUGE_PAGES_ON)
149 ereport(ERROR,
150 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
151 errmsg("huge pages not supported on this platform")));
152
153 ShmemProtectiveRegion = VirtualAlloc(NULL, PROTECTIVE_REGION_SIZE,
154 MEM_RESERVE, PAGE_NOACCESS);
155 if (ShmemProtectiveRegion == NULL)
156 elog(FATAL, "could not reserve memory region: error code %lu",
157 GetLastError());
158
159 /* Room for a header? */
160 Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
161
162 szShareMem = GetSharedMemName();
163
164 UsedShmemSegAddr = NULL;
165
166 #ifdef _WIN64
167 size_high = size >> 32;
168 #else
169 size_high = 0;
170 #endif
171 size_low = (DWORD) size;
172
173 /*
174 * When recycling a shared memory segment, it may take a short while
175 * before it gets dropped from the global namespace. So re-try after
176 * sleeping for a second, and continue retrying 10 times. (both the 1
177 * second time and the 10 retries are completely arbitrary)
178 */
179 for (i = 0; i < 10; i++)
180 {
181 /*
182 * In case CreateFileMapping() doesn't set the error code to 0 on
183 * success
184 */
185 SetLastError(0);
186
187 hmap = CreateFileMapping(INVALID_HANDLE_VALUE, /* Use the pagefile */
188 NULL, /* Default security attrs */
189 PAGE_READWRITE, /* Memory is Read/Write */
190 size_high, /* Size Upper 32 Bits */
191 size_low, /* Size Lower 32 bits */
192 szShareMem);
193
194 if (!hmap)
195 ereport(FATAL,
196 (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
197 errdetail("Failed system call was CreateFileMapping(size=%zu, name=%s).",
198 size, szShareMem)));
199
200 /*
201 * If the segment already existed, CreateFileMapping() will return a
202 * handle to the existing one and set ERROR_ALREADY_EXISTS.
203 */
204 if (GetLastError() == ERROR_ALREADY_EXISTS)
205 {
206 CloseHandle(hmap); /* Close the handle, since we got a valid one
207 * to the previous segment. */
208 hmap = NULL;
209 Sleep(1000);
210 continue;
211 }
212 break;
213 }
214
215 /*
216 * If the last call in the loop still returned ERROR_ALREADY_EXISTS, this
217 * shared memory segment exists and we assume it belongs to somebody else.
218 */
219 if (!hmap)
220 ereport(FATAL,
221 (errmsg("pre-existing shared memory block is still in use"),
222 errhint("Check if there are any old server processes still running, and terminate them.")));
223
224 free(szShareMem);
225
226 /*
227 * Make the handle inheritable
228 */
229 if (!DuplicateHandle(GetCurrentProcess(), hmap, GetCurrentProcess(), &hmap2, 0, TRUE, DUPLICATE_SAME_ACCESS))
230 ereport(FATAL,
231 (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
232 errdetail("Failed system call was DuplicateHandle.")));
233
234 /*
235 * Close the old, non-inheritable handle. If this fails we don't really
236 * care.
237 */
238 if (!CloseHandle(hmap))
239 elog(LOG, "could not close handle to shared memory: error code %lu", GetLastError());
240
241
242 /*
243 * Get a pointer to the new shared memory segment. Map the whole segment
244 * at once, and let the system decide on the initial address.
245 */
246 memAddress = MapViewOfFileEx(hmap2, FILE_MAP_WRITE | FILE_MAP_READ, 0, 0, 0, NULL);
247 if (!memAddress)
248 ereport(FATAL,
249 (errmsg("could not create shared memory segment: error code %lu", GetLastError()),
250 errdetail("Failed system call was MapViewOfFileEx.")));
251
252
253
254 /*
255 * OK, we created a new segment. Mark it as created by this process. The
256 * order of assignments here is critical so that another Postgres process
257 * can't see the header as valid but belonging to an invalid PID!
258 */
259 hdr = (PGShmemHeader *) memAddress;
260 hdr->creatorPID = getpid();
261 hdr->magic = PGShmemMagic;
262
263 /*
264 * Initialize space allocation status for segment.
265 */
266 hdr->totalsize = size;
267 hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
268 hdr->dsm_control = 0;
269
270 /* Save info for possible future use */
271 UsedShmemSegAddr = memAddress;
272 UsedShmemSegSize = size;
273 UsedShmemSegID = hmap2;
274
275 /* Register on-exit routine to delete the new segment */
276 on_shmem_exit(pgwin32_SharedMemoryDelete, PointerGetDatum(hmap2));
277
278 *shim = hdr;
279 return hdr;
280 }
281
282 /*
283 * PGSharedMemoryReAttach
284 *
285 * This is called during startup of a postmaster child process to re-attach to
286 * an already existing shared memory segment, using the handle inherited from
287 * the postmaster.
288 *
289 * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
290 * parameters to this routine. The caller must have already restored them to
291 * the postmaster's values.
292 */
293 void
PGSharedMemoryReAttach(void)294 PGSharedMemoryReAttach(void)
295 {
296 PGShmemHeader *hdr;
297 void *origUsedShmemSegAddr = UsedShmemSegAddr;
298
299 Assert(ShmemProtectiveRegion != NULL);
300 Assert(UsedShmemSegAddr != NULL);
301 Assert(IsUnderPostmaster);
302
303 /*
304 * Release memory region reservations made by the postmaster
305 */
306 if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
307 elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
308 ShmemProtectiveRegion, GetLastError());
309 if (VirtualFree(UsedShmemSegAddr, 0, MEM_RELEASE) == 0)
310 elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
311 UsedShmemSegAddr, GetLastError());
312
313 hdr = (PGShmemHeader *) MapViewOfFileEx(UsedShmemSegID, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, 0, UsedShmemSegAddr);
314 if (!hdr)
315 elog(FATAL, "could not reattach to shared memory (key=%p, addr=%p): error code %lu",
316 UsedShmemSegID, UsedShmemSegAddr, GetLastError());
317 if (hdr != origUsedShmemSegAddr)
318 elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
319 hdr, origUsedShmemSegAddr);
320 if (hdr->magic != PGShmemMagic)
321 elog(FATAL, "reattaching to shared memory returned non-PostgreSQL memory");
322 dsm_set_control_handle(hdr->dsm_control);
323
324 UsedShmemSegAddr = hdr; /* probably redundant */
325 }
326
327 /*
328 * PGSharedMemoryNoReAttach
329 *
330 * This is called during startup of a postmaster child process when we choose
331 * *not* to re-attach to the existing shared memory segment. We must clean up
332 * to leave things in the appropriate state.
333 *
334 * The child process startup logic might or might not call PGSharedMemoryDetach
335 * after this; make sure that it will be a no-op if called.
336 *
337 * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
338 * parameters to this routine. The caller must have already restored them to
339 * the postmaster's values.
340 */
341 void
PGSharedMemoryNoReAttach(void)342 PGSharedMemoryNoReAttach(void)
343 {
344 Assert(ShmemProtectiveRegion != NULL);
345 Assert(UsedShmemSegAddr != NULL);
346 Assert(IsUnderPostmaster);
347
348 /*
349 * Under Windows we will not have mapped the segment, so we don't need to
350 * un-map it. Just reset UsedShmemSegAddr to show we're not attached.
351 */
352 UsedShmemSegAddr = NULL;
353
354 /*
355 * We *must* close the inherited shmem segment handle, else Windows will
356 * consider the existence of this process to mean it can't release the
357 * shmem segment yet. We can now use PGSharedMemoryDetach to do that.
358 */
359 PGSharedMemoryDetach();
360 }
361
362 /*
363 * PGSharedMemoryDetach
364 *
365 * Detach from the shared memory segment, if still attached. This is not
366 * intended to be called explicitly by the process that originally created the
367 * segment (it will have an on_shmem_exit callback registered to do that).
368 * Rather, this is for subprocesses that have inherited an attachment and want
369 * to get rid of it.
370 *
371 * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
372 * parameters to this routine.
373 */
374 void
PGSharedMemoryDetach(void)375 PGSharedMemoryDetach(void)
376 {
377 /*
378 * Releasing the protective region liberates an unimportant quantity of
379 * address space, but be tidy.
380 */
381 if (ShmemProtectiveRegion != NULL)
382 {
383 if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
384 elog(LOG, "failed to release reserved memory region (addr=%p): error code %lu",
385 ShmemProtectiveRegion, GetLastError());
386
387 ShmemProtectiveRegion = NULL;
388 }
389
390 /* Unmap the view, if it's mapped */
391 if (UsedShmemSegAddr != NULL)
392 {
393 if (!UnmapViewOfFile(UsedShmemSegAddr))
394 elog(LOG, "could not unmap view of shared memory: error code %lu",
395 GetLastError());
396
397 UsedShmemSegAddr = NULL;
398 }
399
400 /* And close the shmem handle, if we have one */
401 if (UsedShmemSegID != INVALID_HANDLE_VALUE)
402 {
403 if (!CloseHandle(UsedShmemSegID))
404 elog(LOG, "could not close handle to shared memory: error code %lu",
405 GetLastError());
406
407 UsedShmemSegID = INVALID_HANDLE_VALUE;
408 }
409 }
410
411
412 /*
413 * pgwin32_SharedMemoryDelete
414 *
415 * Detach from and delete the shared memory segment
416 * (called as an on_shmem_exit callback, hence funny argument list)
417 */
418 static void
pgwin32_SharedMemoryDelete(int status,Datum shmId)419 pgwin32_SharedMemoryDelete(int status, Datum shmId)
420 {
421 Assert(DatumGetPointer(shmId) == UsedShmemSegID);
422 PGSharedMemoryDetach();
423 }
424
425 /*
426 * pgwin32_ReserveSharedMemoryRegion(hChild)
427 *
428 * Reserve the memory region that will be used for shared memory in a child
429 * process. It is called before the child process starts, to make sure the
430 * memory is available.
431 *
432 * Once the child starts, DLLs loading in different order or threads getting
433 * scheduled differently may allocate memory which can conflict with the
434 * address space we need for our shared memory. By reserving the shared
435 * memory region before the child starts, and freeing it only just before we
436 * attempt to get access to the shared memory forces these allocations to
437 * be given different address ranges that don't conflict.
438 *
439 * NOTE! This function executes in the postmaster, and should for this
440 * reason not use elog(FATAL) since that would take down the postmaster.
441 */
442 int
pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)443 pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)
444 {
445 void *address;
446
447 Assert(ShmemProtectiveRegion != NULL);
448 Assert(UsedShmemSegAddr != NULL);
449 Assert(UsedShmemSegSize != 0);
450
451 /* ShmemProtectiveRegion */
452 address = VirtualAllocEx(hChild, ShmemProtectiveRegion,
453 PROTECTIVE_REGION_SIZE,
454 MEM_RESERVE, PAGE_NOACCESS);
455 if (address == NULL)
456 {
457 /* Don't use FATAL since we're running in the postmaster */
458 elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
459 ShmemProtectiveRegion, hChild, GetLastError());
460 return false;
461 }
462 if (address != ShmemProtectiveRegion)
463 {
464 /*
465 * Should never happen - in theory if allocation granularity causes
466 * strange effects it could, so check just in case.
467 *
468 * Don't use FATAL since we're running in the postmaster.
469 */
470 elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
471 address, ShmemProtectiveRegion);
472 return false;
473 }
474
475 /* UsedShmemSegAddr */
476 address = VirtualAllocEx(hChild, UsedShmemSegAddr, UsedShmemSegSize,
477 MEM_RESERVE, PAGE_READWRITE);
478 if (address == NULL)
479 {
480 elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
481 UsedShmemSegAddr, hChild, GetLastError());
482 return false;
483 }
484 if (address != UsedShmemSegAddr)
485 {
486 elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
487 address, UsedShmemSegAddr);
488 return false;
489 }
490
491 return true;
492 }
493