1 /*-------------------------------------------------------------------------
2  *
3  * win32_shmem.c
4  *	  Implement shared memory using win32 facilities
5  *
6  * Portions Copyright (c) 1996-2017, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *	  src/backend/port/win32_shmem.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "miscadmin.h"
16 #include "storage/dsm.h"
17 #include "storage/ipc.h"
18 #include "storage/pg_shmem.h"
19 
20 /*
21  * Early in a process's life, Windows asynchronously creates threads for the
22  * process's "default thread pool"
23  * (https://docs.microsoft.com/en-us/windows/desktop/ProcThread/thread-pools).
24  * Occasionally, thread creation allocates a stack after
25  * PGSharedMemoryReAttach() has released UsedShmemSegAddr and before it has
26  * mapped shared memory at UsedShmemSegAddr.  This would cause mapping to fail
27  * if the allocator preferred the just-released region for allocating the new
28  * thread stack.  We observed such failures in some Windows Server 2016
29  * configurations.  To give the system another region to prefer, reserve and
30  * release an additional, protective region immediately before reserving or
31  * releasing shared memory.  The idea is that, if the allocator handed out
32  * REGION1 pages before REGION2 pages at one occasion, it will do so whenever
33  * both regions are free.  Windows Server 2016 exhibits that behavior, and a
34  * system behaving differently would have less need to protect
35  * UsedShmemSegAddr.  The protective region must be at least large enough for
36  * one thread stack.  However, ten times as much is less than 2% of the 32-bit
37  * address space and is negligible relative to the 64-bit address space.
38  */
39 #define PROTECTIVE_REGION_SIZE (10 * WIN32_STACK_RLIMIT)
40 void	   *ShmemProtectiveRegion = NULL;
41 
42 HANDLE		UsedShmemSegID = INVALID_HANDLE_VALUE;
43 void	   *UsedShmemSegAddr = NULL;
44 static Size UsedShmemSegSize = 0;
45 
46 static void pgwin32_SharedMemoryDelete(int status, Datum shmId);
47 
48 /*
49  * Generate shared memory segment name. Expand the data directory, to generate
50  * an identifier unique for this data directory. Then replace all backslashes
51  * with forward slashes, since backslashes aren't permitted in global object names.
52  *
53  * Store the shared memory segment in the Global\ namespace (requires NT2 TSE or
54  * 2000, but that's all we support for other reasons as well), to make sure you can't
55  * open two postmasters in different sessions against the same data directory.
56  *
57  * XXX: What happens with junctions? It's only someone breaking things on purpose,
58  *		and this is still better than before, but we might want to do something about
59  *		that sometime in the future.
60  */
61 static char *
GetSharedMemName(void)62 GetSharedMemName(void)
63 {
64 	char	   *retptr;
65 	DWORD		bufsize;
66 	DWORD		r;
67 	char	   *cp;
68 
69 	bufsize = GetFullPathName(DataDir, 0, NULL, NULL);
70 	if (bufsize == 0)
71 		elog(FATAL, "could not get size for full pathname of datadir %s: error code %lu",
72 			 DataDir, GetLastError());
73 
74 	retptr = malloc(bufsize + 18);	/* 18 for Global\PostgreSQL: */
75 	if (retptr == NULL)
76 		elog(FATAL, "could not allocate memory for shared memory name");
77 
78 	strcpy(retptr, "Global\\PostgreSQL:");
79 	r = GetFullPathName(DataDir, bufsize, retptr + 18, NULL);
80 	if (r == 0 || r > bufsize)
81 		elog(FATAL, "could not generate full pathname for datadir %s: error code %lu",
82 			 DataDir, GetLastError());
83 
84 	/*
85 	 * XXX: Intentionally overwriting the Global\ part here. This was not the
86 	 * original approach, but putting it in the actual Global\ namespace
87 	 * causes permission errors in a lot of cases, so we leave it in the
88 	 * default namespace for now.
89 	 */
90 	for (cp = retptr; *cp; cp++)
91 		if (*cp == '\\')
92 			*cp = '/';
93 
94 	return retptr;
95 }
96 
97 
98 /*
99  * PGSharedMemoryIsInUse
100  *
101  * Is a previously-existing shmem segment still existing and in use?
102  *
103  * The point of this exercise is to detect the case where a prior postmaster
104  * crashed, but it left child backends that are still running.  Therefore
105  * we only care about shmem segments that are associated with the intended
106  * DataDir.  This is an important consideration since accidental matches of
107  * shmem segment IDs are reasonably common.
108  */
109 bool
PGSharedMemoryIsInUse(unsigned long id1,unsigned long id2)110 PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
111 {
112 	char	   *szShareMem;
113 	HANDLE		hmap;
114 
115 	szShareMem = GetSharedMemName();
116 
117 	hmap = OpenFileMapping(FILE_MAP_READ, FALSE, szShareMem);
118 
119 	free(szShareMem);
120 
121 	if (hmap == NULL)
122 		return false;
123 
124 	CloseHandle(hmap);
125 	return true;
126 }
127 
128 
129 /*
130  * PGSharedMemoryCreate
131  *
132  * Create a shared memory segment of the given size and initialize its
133  * standard header.
134  */
135 PGShmemHeader *
PGSharedMemoryCreate(Size size,int port,PGShmemHeader ** shim)136 PGSharedMemoryCreate(Size size, int port,
137 					 PGShmemHeader **shim)
138 {
139 	void	   *memAddress;
140 	PGShmemHeader *hdr;
141 	HANDLE		hmap,
142 				hmap2;
143 	char	   *szShareMem;
144 	int			i;
145 	DWORD		size_high;
146 	DWORD		size_low;
147 
148 	if (huge_pages == HUGE_PAGES_ON)
149 		ereport(ERROR,
150 				(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
151 				 errmsg("huge pages not supported on this platform")));
152 
153 	ShmemProtectiveRegion = VirtualAlloc(NULL, PROTECTIVE_REGION_SIZE,
154 										 MEM_RESERVE, PAGE_NOACCESS);
155 	if (ShmemProtectiveRegion == NULL)
156 		elog(FATAL, "could not reserve memory region: error code %lu",
157 			 GetLastError());
158 
159 	/* Room for a header? */
160 	Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
161 
162 	szShareMem = GetSharedMemName();
163 
164 	UsedShmemSegAddr = NULL;
165 
166 #ifdef _WIN64
167 	size_high = size >> 32;
168 #else
169 	size_high = 0;
170 #endif
171 	size_low = (DWORD) size;
172 
173 	/*
174 	 * When recycling a shared memory segment, it may take a short while
175 	 * before it gets dropped from the global namespace. So re-try after
176 	 * sleeping for a second, and continue retrying 10 times. (both the 1
177 	 * second time and the 10 retries are completely arbitrary)
178 	 */
179 	for (i = 0; i < 10; i++)
180 	{
181 		/*
182 		 * In case CreateFileMapping() doesn't set the error code to 0 on
183 		 * success
184 		 */
185 		SetLastError(0);
186 
187 		hmap = CreateFileMapping(INVALID_HANDLE_VALUE,	/* Use the pagefile */
188 								 NULL,	/* Default security attrs */
189 								 PAGE_READWRITE,	/* Memory is Read/Write */
190 								 size_high, /* Size Upper 32 Bits	*/
191 								 size_low,	/* Size Lower 32 bits */
192 								 szShareMem);
193 
194 		if (!hmap)
195 			ereport(FATAL,
196 					(errmsg("could not create shared memory segment: error code %lu", GetLastError()),
197 					 errdetail("Failed system call was CreateFileMapping(size=%zu, name=%s).",
198 							   size, szShareMem)));
199 
200 		/*
201 		 * If the segment already existed, CreateFileMapping() will return a
202 		 * handle to the existing one and set ERROR_ALREADY_EXISTS.
203 		 */
204 		if (GetLastError() == ERROR_ALREADY_EXISTS)
205 		{
206 			CloseHandle(hmap);	/* Close the handle, since we got a valid one
207 								 * to the previous segment. */
208 			hmap = NULL;
209 			Sleep(1000);
210 			continue;
211 		}
212 		break;
213 	}
214 
215 	/*
216 	 * If the last call in the loop still returned ERROR_ALREADY_EXISTS, this
217 	 * shared memory segment exists and we assume it belongs to somebody else.
218 	 */
219 	if (!hmap)
220 		ereport(FATAL,
221 				(errmsg("pre-existing shared memory block is still in use"),
222 				 errhint("Check if there are any old server processes still running, and terminate them.")));
223 
224 	free(szShareMem);
225 
226 	/*
227 	 * Make the handle inheritable
228 	 */
229 	if (!DuplicateHandle(GetCurrentProcess(), hmap, GetCurrentProcess(), &hmap2, 0, TRUE, DUPLICATE_SAME_ACCESS))
230 		ereport(FATAL,
231 				(errmsg("could not create shared memory segment: error code %lu", GetLastError()),
232 				 errdetail("Failed system call was DuplicateHandle.")));
233 
234 	/*
235 	 * Close the old, non-inheritable handle. If this fails we don't really
236 	 * care.
237 	 */
238 	if (!CloseHandle(hmap))
239 		elog(LOG, "could not close handle to shared memory: error code %lu", GetLastError());
240 
241 
242 	/*
243 	 * Get a pointer to the new shared memory segment. Map the whole segment
244 	 * at once, and let the system decide on the initial address.
245 	 */
246 	memAddress = MapViewOfFileEx(hmap2, FILE_MAP_WRITE | FILE_MAP_READ, 0, 0, 0, NULL);
247 	if (!memAddress)
248 		ereport(FATAL,
249 				(errmsg("could not create shared memory segment: error code %lu", GetLastError()),
250 				 errdetail("Failed system call was MapViewOfFileEx.")));
251 
252 
253 
254 	/*
255 	 * OK, we created a new segment.  Mark it as created by this process. The
256 	 * order of assignments here is critical so that another Postgres process
257 	 * can't see the header as valid but belonging to an invalid PID!
258 	 */
259 	hdr = (PGShmemHeader *) memAddress;
260 	hdr->creatorPID = getpid();
261 	hdr->magic = PGShmemMagic;
262 
263 	/*
264 	 * Initialize space allocation status for segment.
265 	 */
266 	hdr->totalsize = size;
267 	hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
268 	hdr->dsm_control = 0;
269 
270 	/* Save info for possible future use */
271 	UsedShmemSegAddr = memAddress;
272 	UsedShmemSegSize = size;
273 	UsedShmemSegID = hmap2;
274 
275 	/* Register on-exit routine to delete the new segment */
276 	on_shmem_exit(pgwin32_SharedMemoryDelete, PointerGetDatum(hmap2));
277 
278 	*shim = hdr;
279 	return hdr;
280 }
281 
282 /*
283  * PGSharedMemoryReAttach
284  *
285  * This is called during startup of a postmaster child process to re-attach to
286  * an already existing shared memory segment, using the handle inherited from
287  * the postmaster.
288  *
289  * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
290  * parameters to this routine.  The caller must have already restored them to
291  * the postmaster's values.
292  */
293 void
PGSharedMemoryReAttach(void)294 PGSharedMemoryReAttach(void)
295 {
296 	PGShmemHeader *hdr;
297 	void	   *origUsedShmemSegAddr = UsedShmemSegAddr;
298 
299 	Assert(ShmemProtectiveRegion != NULL);
300 	Assert(UsedShmemSegAddr != NULL);
301 	Assert(IsUnderPostmaster);
302 
303 	/*
304 	 * Release memory region reservations made by the postmaster
305 	 */
306 	if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
307 		elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
308 			 ShmemProtectiveRegion, GetLastError());
309 	if (VirtualFree(UsedShmemSegAddr, 0, MEM_RELEASE) == 0)
310 		elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
311 			 UsedShmemSegAddr, GetLastError());
312 
313 	hdr = (PGShmemHeader *) MapViewOfFileEx(UsedShmemSegID, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, 0, UsedShmemSegAddr);
314 	if (!hdr)
315 		elog(FATAL, "could not reattach to shared memory (key=%p, addr=%p): error code %lu",
316 			 UsedShmemSegID, UsedShmemSegAddr, GetLastError());
317 	if (hdr != origUsedShmemSegAddr)
318 		elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
319 			 hdr, origUsedShmemSegAddr);
320 	if (hdr->magic != PGShmemMagic)
321 		elog(FATAL, "reattaching to shared memory returned non-PostgreSQL memory");
322 	dsm_set_control_handle(hdr->dsm_control);
323 
324 	UsedShmemSegAddr = hdr;		/* probably redundant */
325 }
326 
327 /*
328  * PGSharedMemoryNoReAttach
329  *
330  * This is called during startup of a postmaster child process when we choose
331  * *not* to re-attach to the existing shared memory segment.  We must clean up
332  * to leave things in the appropriate state.
333  *
334  * The child process startup logic might or might not call PGSharedMemoryDetach
335  * after this; make sure that it will be a no-op if called.
336  *
337  * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
338  * parameters to this routine.  The caller must have already restored them to
339  * the postmaster's values.
340  */
341 void
PGSharedMemoryNoReAttach(void)342 PGSharedMemoryNoReAttach(void)
343 {
344 	Assert(ShmemProtectiveRegion != NULL);
345 	Assert(UsedShmemSegAddr != NULL);
346 	Assert(IsUnderPostmaster);
347 
348 	/*
349 	 * Under Windows we will not have mapped the segment, so we don't need to
350 	 * un-map it.  Just reset UsedShmemSegAddr to show we're not attached.
351 	 */
352 	UsedShmemSegAddr = NULL;
353 
354 	/*
355 	 * We *must* close the inherited shmem segment handle, else Windows will
356 	 * consider the existence of this process to mean it can't release the
357 	 * shmem segment yet.  We can now use PGSharedMemoryDetach to do that.
358 	 */
359 	PGSharedMemoryDetach();
360 }
361 
362 /*
363  * PGSharedMemoryDetach
364  *
365  * Detach from the shared memory segment, if still attached.  This is not
366  * intended to be called explicitly by the process that originally created the
367  * segment (it will have an on_shmem_exit callback registered to do that).
368  * Rather, this is for subprocesses that have inherited an attachment and want
369  * to get rid of it.
370  *
371  * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
372  * parameters to this routine.
373  */
374 void
PGSharedMemoryDetach(void)375 PGSharedMemoryDetach(void)
376 {
377 	/*
378 	 * Releasing the protective region liberates an unimportant quantity of
379 	 * address space, but be tidy.
380 	 */
381 	if (ShmemProtectiveRegion != NULL)
382 	{
383 		if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
384 			elog(LOG, "failed to release reserved memory region (addr=%p): error code %lu",
385 				 ShmemProtectiveRegion, GetLastError());
386 
387 		ShmemProtectiveRegion = NULL;
388 	}
389 
390 	/* Unmap the view, if it's mapped */
391 	if (UsedShmemSegAddr != NULL)
392 	{
393 		if (!UnmapViewOfFile(UsedShmemSegAddr))
394 			elog(LOG, "could not unmap view of shared memory: error code %lu",
395 				 GetLastError());
396 
397 		UsedShmemSegAddr = NULL;
398 	}
399 
400 	/* And close the shmem handle, if we have one */
401 	if (UsedShmemSegID != INVALID_HANDLE_VALUE)
402 	{
403 		if (!CloseHandle(UsedShmemSegID))
404 			elog(LOG, "could not close handle to shared memory: error code %lu",
405 				 GetLastError());
406 
407 		UsedShmemSegID = INVALID_HANDLE_VALUE;
408 	}
409 }
410 
411 
412 /*
413  * pgwin32_SharedMemoryDelete
414  *
415  * Detach from and delete the shared memory segment
416  * (called as an on_shmem_exit callback, hence funny argument list)
417  */
418 static void
pgwin32_SharedMemoryDelete(int status,Datum shmId)419 pgwin32_SharedMemoryDelete(int status, Datum shmId)
420 {
421 	Assert(DatumGetPointer(shmId) == UsedShmemSegID);
422 	PGSharedMemoryDetach();
423 }
424 
425 /*
426  * pgwin32_ReserveSharedMemoryRegion(hChild)
427  *
428  * Reserve the memory region that will be used for shared memory in a child
429  * process. It is called before the child process starts, to make sure the
430  * memory is available.
431  *
432  * Once the child starts, DLLs loading in different order or threads getting
433  * scheduled differently may allocate memory which can conflict with the
434  * address space we need for our shared memory. By reserving the shared
435  * memory region before the child starts, and freeing it only just before we
436  * attempt to get access to the shared memory forces these allocations to
437  * be given different address ranges that don't conflict.
438  *
439  * NOTE! This function executes in the postmaster, and should for this
440  * reason not use elog(FATAL) since that would take down the postmaster.
441  */
442 int
pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)443 pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)
444 {
445 	void	   *address;
446 
447 	Assert(ShmemProtectiveRegion != NULL);
448 	Assert(UsedShmemSegAddr != NULL);
449 	Assert(UsedShmemSegSize != 0);
450 
451 	/* ShmemProtectiveRegion */
452 	address = VirtualAllocEx(hChild, ShmemProtectiveRegion,
453 							 PROTECTIVE_REGION_SIZE,
454 							 MEM_RESERVE, PAGE_NOACCESS);
455 	if (address == NULL)
456 	{
457 		/* Don't use FATAL since we're running in the postmaster */
458 		elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
459 			 ShmemProtectiveRegion, hChild, GetLastError());
460 		return false;
461 	}
462 	if (address != ShmemProtectiveRegion)
463 	{
464 		/*
465 		 * Should never happen - in theory if allocation granularity causes
466 		 * strange effects it could, so check just in case.
467 		 *
468 		 * Don't use FATAL since we're running in the postmaster.
469 		 */
470 		elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
471 			 address, ShmemProtectiveRegion);
472 		return false;
473 	}
474 
475 	/* UsedShmemSegAddr */
476 	address = VirtualAllocEx(hChild, UsedShmemSegAddr, UsedShmemSegSize,
477 							 MEM_RESERVE, PAGE_READWRITE);
478 	if (address == NULL)
479 	{
480 		elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
481 			 UsedShmemSegAddr, hChild, GetLastError());
482 		return false;
483 	}
484 	if (address != UsedShmemSegAddr)
485 	{
486 		elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
487 			 address, UsedShmemSegAddr);
488 		return false;
489 	}
490 
491 	return true;
492 }
493