1 /*-------------------------------------------------------------------------
2  *
3  * win32_shmem.c
4  *	  Implement shared memory using win32 facilities
5  *
6  * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
7  *
8  * IDENTIFICATION
9  *	  src/backend/port/win32_shmem.c
10  *
11  *-------------------------------------------------------------------------
12  */
13 #include "postgres.h"
14 
15 #include "miscadmin.h"
16 #include "storage/dsm.h"
17 #include "storage/ipc.h"
18 #include "storage/pg_shmem.h"
19 
20 /*
21  * Early in a process's life, Windows asynchronously creates threads for the
22  * process's "default thread pool"
23  * (https://docs.microsoft.com/en-us/windows/desktop/ProcThread/thread-pools).
24  * Occasionally, thread creation allocates a stack after
25  * PGSharedMemoryReAttach() has released UsedShmemSegAddr and before it has
26  * mapped shared memory at UsedShmemSegAddr.  This would cause mapping to fail
27  * if the allocator preferred the just-released region for allocating the new
28  * thread stack.  We observed such failures in some Windows Server 2016
29  * configurations.  To give the system another region to prefer, reserve and
30  * release an additional, protective region immediately before reserving or
31  * releasing shared memory.  The idea is that, if the allocator handed out
32  * REGION1 pages before REGION2 pages at one occasion, it will do so whenever
33  * both regions are free.  Windows Server 2016 exhibits that behavior, and a
34  * system behaving differently would have less need to protect
35  * UsedShmemSegAddr.  The protective region must be at least large enough for
36  * one thread stack.  However, ten times as much is less than 2% of the 32-bit
37  * address space and is negligible relative to the 64-bit address space.
38  */
39 #define PROTECTIVE_REGION_SIZE (10 * WIN32_STACK_RLIMIT)
40 void	   *ShmemProtectiveRegion = NULL;
41 
42 HANDLE		UsedShmemSegID = INVALID_HANDLE_VALUE;
43 void	   *UsedShmemSegAddr = NULL;
44 static Size UsedShmemSegSize = 0;
45 
46 static bool EnableLockPagesPrivilege(int elevel);
47 static void pgwin32_SharedMemoryDelete(int status, Datum shmId);
48 
49 /*
50  * Generate shared memory segment name. Expand the data directory, to generate
51  * an identifier unique for this data directory. Then replace all backslashes
52  * with forward slashes, since backslashes aren't permitted in global object names.
53  *
54  * Store the shared memory segment in the Global\ namespace (requires NT2 TSE or
55  * 2000, but that's all we support for other reasons as well), to make sure you can't
56  * open two postmasters in different sessions against the same data directory.
57  *
58  * XXX: What happens with junctions? It's only someone breaking things on purpose,
59  *		and this is still better than before, but we might want to do something about
60  *		that sometime in the future.
61  */
62 static char *
GetSharedMemName(void)63 GetSharedMemName(void)
64 {
65 	char	   *retptr;
66 	DWORD		bufsize;
67 	DWORD		r;
68 	char	   *cp;
69 
70 	bufsize = GetFullPathName(DataDir, 0, NULL, NULL);
71 	if (bufsize == 0)
72 		elog(FATAL, "could not get size for full pathname of datadir %s: error code %lu",
73 			 DataDir, GetLastError());
74 
75 	retptr = malloc(bufsize + 18);	/* 18 for Global\PostgreSQL: */
76 	if (retptr == NULL)
77 		elog(FATAL, "could not allocate memory for shared memory name");
78 
79 	strcpy(retptr, "Global\\PostgreSQL:");
80 	r = GetFullPathName(DataDir, bufsize, retptr + 18, NULL);
81 	if (r == 0 || r > bufsize)
82 		elog(FATAL, "could not generate full pathname for datadir %s: error code %lu",
83 			 DataDir, GetLastError());
84 
85 	/*
86 	 * XXX: Intentionally overwriting the Global\ part here. This was not the
87 	 * original approach, but putting it in the actual Global\ namespace
88 	 * causes permission errors in a lot of cases, so we leave it in the
89 	 * default namespace for now.
90 	 */
91 	for (cp = retptr; *cp; cp++)
92 		if (*cp == '\\')
93 			*cp = '/';
94 
95 	return retptr;
96 }
97 
98 
99 /*
100  * PGSharedMemoryIsInUse
101  *
102  * Is a previously-existing shmem segment still existing and in use?
103  *
104  * The point of this exercise is to detect the case where a prior postmaster
105  * crashed, but it left child backends that are still running.  Therefore
106  * we only care about shmem segments that are associated with the intended
107  * DataDir.  This is an important consideration since accidental matches of
108  * shmem segment IDs are reasonably common.
109  */
110 bool
PGSharedMemoryIsInUse(unsigned long id1,unsigned long id2)111 PGSharedMemoryIsInUse(unsigned long id1, unsigned long id2)
112 {
113 	char	   *szShareMem;
114 	HANDLE		hmap;
115 
116 	szShareMem = GetSharedMemName();
117 
118 	hmap = OpenFileMapping(FILE_MAP_READ, FALSE, szShareMem);
119 
120 	free(szShareMem);
121 
122 	if (hmap == NULL)
123 		return false;
124 
125 	CloseHandle(hmap);
126 	return true;
127 }
128 
129 /*
130  * EnableLockPagesPrivilege
131  *
132  * Try to acquire SeLockMemoryPrivilege so we can use large pages.
133  */
134 static bool
EnableLockPagesPrivilege(int elevel)135 EnableLockPagesPrivilege(int elevel)
136 {
137 	HANDLE		hToken;
138 	TOKEN_PRIVILEGES tp;
139 	LUID		luid;
140 
141 	if (!OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &hToken))
142 	{
143 		ereport(elevel,
144 				(errmsg("could not enable user right \"%s\": error code %lu",
145 
146 		/*
147 		 * translator: This is a term from Windows and should be translated to
148 		 * match the Windows localization.
149 		 */
150 						_("Lock pages in memory"),
151 						GetLastError()),
152 				 errdetail("Failed system call was %s.", "OpenProcessToken")));
153 		return FALSE;
154 	}
155 
156 	if (!LookupPrivilegeValue(NULL, SE_LOCK_MEMORY_NAME, &luid))
157 	{
158 		ereport(elevel,
159 				(errmsg("could not enable user right \"%s\": error code %lu", _("Lock pages in memory"), GetLastError()),
160 				 errdetail("Failed system call was %s.", "LookupPrivilegeValue")));
161 		CloseHandle(hToken);
162 		return FALSE;
163 	}
164 	tp.PrivilegeCount = 1;
165 	tp.Privileges[0].Luid = luid;
166 	tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED;
167 
168 	if (!AdjustTokenPrivileges(hToken, FALSE, &tp, 0, NULL, NULL))
169 	{
170 		ereport(elevel,
171 				(errmsg("could not enable user right \"%s\": error code %lu", _("Lock pages in memory"), GetLastError()),
172 				 errdetail("Failed system call was %s.", "AdjustTokenPrivileges")));
173 		CloseHandle(hToken);
174 		return FALSE;
175 	}
176 
177 	if (GetLastError() != ERROR_SUCCESS)
178 	{
179 		if (GetLastError() == ERROR_NOT_ALL_ASSIGNED)
180 			ereport(elevel,
181 					(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
182 					 errmsg("could not enable user right \"%s\"", _("Lock pages in memory")),
183 					 errhint("Assign user right \"%s\" to the Windows user account which runs PostgreSQL.",
184 							 _("Lock pages in memory"))));
185 		else
186 			ereport(elevel,
187 					(errmsg("could not enable user right \"%s\": error code %lu", _("Lock pages in memory"), GetLastError()),
188 					 errdetail("Failed system call was %s.", "AdjustTokenPrivileges")));
189 		CloseHandle(hToken);
190 		return FALSE;
191 	}
192 
193 	CloseHandle(hToken);
194 
195 	return TRUE;
196 }
197 
198 /*
199  * PGSharedMemoryCreate
200  *
201  * Create a shared memory segment of the given size and initialize its
202  * standard header.
203  */
204 PGShmemHeader *
PGSharedMemoryCreate(Size size,PGShmemHeader ** shim)205 PGSharedMemoryCreate(Size size,
206 					 PGShmemHeader **shim)
207 {
208 	void	   *memAddress;
209 	PGShmemHeader *hdr;
210 	HANDLE		hmap,
211 				hmap2;
212 	char	   *szShareMem;
213 	int			i;
214 	DWORD		size_high;
215 	DWORD		size_low;
216 	SIZE_T		largePageSize = 0;
217 	Size		orig_size = size;
218 	DWORD		flProtect = PAGE_READWRITE;
219 
220 	ShmemProtectiveRegion = VirtualAlloc(NULL, PROTECTIVE_REGION_SIZE,
221 										 MEM_RESERVE, PAGE_NOACCESS);
222 	if (ShmemProtectiveRegion == NULL)
223 		elog(FATAL, "could not reserve memory region: error code %lu",
224 			 GetLastError());
225 
226 	/* Room for a header? */
227 	Assert(size > MAXALIGN(sizeof(PGShmemHeader)));
228 
229 	szShareMem = GetSharedMemName();
230 
231 	UsedShmemSegAddr = NULL;
232 
233 	if (huge_pages == HUGE_PAGES_ON || huge_pages == HUGE_PAGES_TRY)
234 	{
235 		/* Does the processor support large pages? */
236 		largePageSize = GetLargePageMinimum();
237 		if (largePageSize == 0)
238 		{
239 			ereport(huge_pages == HUGE_PAGES_ON ? FATAL : DEBUG1,
240 					(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
241 					 errmsg("the processor does not support large pages")));
242 			ereport(DEBUG1,
243 					(errmsg_internal("disabling huge pages")));
244 		}
245 		else if (!EnableLockPagesPrivilege(huge_pages == HUGE_PAGES_ON ? FATAL : DEBUG1))
246 		{
247 			ereport(DEBUG1,
248 					(errmsg_internal("disabling huge pages")));
249 		}
250 		else
251 		{
252 			/* Huge pages available and privilege enabled, so turn on */
253 			flProtect = PAGE_READWRITE | SEC_COMMIT | SEC_LARGE_PAGES;
254 
255 			/* Round size up as appropriate. */
256 			if (size % largePageSize != 0)
257 				size += largePageSize - (size % largePageSize);
258 		}
259 	}
260 
261 retry:
262 #ifdef _WIN64
263 	size_high = size >> 32;
264 #else
265 	size_high = 0;
266 #endif
267 	size_low = (DWORD) size;
268 
269 	/*
270 	 * When recycling a shared memory segment, it may take a short while
271 	 * before it gets dropped from the global namespace. So re-try after
272 	 * sleeping for a second, and continue retrying 10 times. (both the 1
273 	 * second time and the 10 retries are completely arbitrary)
274 	 */
275 	for (i = 0; i < 10; i++)
276 	{
277 		/*
278 		 * In case CreateFileMapping() doesn't set the error code to 0 on
279 		 * success
280 		 */
281 		SetLastError(0);
282 
283 		hmap = CreateFileMapping(INVALID_HANDLE_VALUE,	/* Use the pagefile */
284 								 NULL,	/* Default security attrs */
285 								 flProtect,
286 								 size_high, /* Size Upper 32 Bits	*/
287 								 size_low,	/* Size Lower 32 bits */
288 								 szShareMem);
289 
290 		if (!hmap)
291 		{
292 			if (GetLastError() == ERROR_NO_SYSTEM_RESOURCES &&
293 				huge_pages == HUGE_PAGES_TRY &&
294 				(flProtect & SEC_LARGE_PAGES) != 0)
295 			{
296 				elog(DEBUG1, "CreateFileMapping(%zu) with SEC_LARGE_PAGES failed, "
297 					 "huge pages disabled",
298 					 size);
299 
300 				/*
301 				 * Use the original size, not the rounded-up value, when
302 				 * falling back to non-huge pages.
303 				 */
304 				size = orig_size;
305 				flProtect = PAGE_READWRITE;
306 				goto retry;
307 			}
308 			else
309 				ereport(FATAL,
310 						(errmsg("could not create shared memory segment: error code %lu", GetLastError()),
311 						 errdetail("Failed system call was CreateFileMapping(size=%zu, name=%s).",
312 								   size, szShareMem)));
313 		}
314 
315 		/*
316 		 * If the segment already existed, CreateFileMapping() will return a
317 		 * handle to the existing one and set ERROR_ALREADY_EXISTS.
318 		 */
319 		if (GetLastError() == ERROR_ALREADY_EXISTS)
320 		{
321 			CloseHandle(hmap);	/* Close the handle, since we got a valid one
322 								 * to the previous segment. */
323 			hmap = NULL;
324 			Sleep(1000);
325 			continue;
326 		}
327 		break;
328 	}
329 
330 	/*
331 	 * If the last call in the loop still returned ERROR_ALREADY_EXISTS, this
332 	 * shared memory segment exists and we assume it belongs to somebody else.
333 	 */
334 	if (!hmap)
335 		ereport(FATAL,
336 				(errmsg("pre-existing shared memory block is still in use"),
337 				 errhint("Check if there are any old server processes still running, and terminate them.")));
338 
339 	free(szShareMem);
340 
341 	/*
342 	 * Make the handle inheritable
343 	 */
344 	if (!DuplicateHandle(GetCurrentProcess(), hmap, GetCurrentProcess(), &hmap2, 0, TRUE, DUPLICATE_SAME_ACCESS))
345 		ereport(FATAL,
346 				(errmsg("could not create shared memory segment: error code %lu", GetLastError()),
347 				 errdetail("Failed system call was DuplicateHandle.")));
348 
349 	/*
350 	 * Close the old, non-inheritable handle. If this fails we don't really
351 	 * care.
352 	 */
353 	if (!CloseHandle(hmap))
354 		elog(LOG, "could not close handle to shared memory: error code %lu", GetLastError());
355 
356 
357 	/*
358 	 * Get a pointer to the new shared memory segment. Map the whole segment
359 	 * at once, and let the system decide on the initial address.
360 	 */
361 	memAddress = MapViewOfFileEx(hmap2, FILE_MAP_WRITE | FILE_MAP_READ, 0, 0, 0, NULL);
362 	if (!memAddress)
363 		ereport(FATAL,
364 				(errmsg("could not create shared memory segment: error code %lu", GetLastError()),
365 				 errdetail("Failed system call was MapViewOfFileEx.")));
366 
367 
368 
369 	/*
370 	 * OK, we created a new segment.  Mark it as created by this process. The
371 	 * order of assignments here is critical so that another Postgres process
372 	 * can't see the header as valid but belonging to an invalid PID!
373 	 */
374 	hdr = (PGShmemHeader *) memAddress;
375 	hdr->creatorPID = getpid();
376 	hdr->magic = PGShmemMagic;
377 
378 	/*
379 	 * Initialize space allocation status for segment.
380 	 */
381 	hdr->totalsize = size;
382 	hdr->freeoffset = MAXALIGN(sizeof(PGShmemHeader));
383 	hdr->dsm_control = 0;
384 
385 	/* Save info for possible future use */
386 	UsedShmemSegAddr = memAddress;
387 	UsedShmemSegSize = size;
388 	UsedShmemSegID = hmap2;
389 
390 	/* Register on-exit routine to delete the new segment */
391 	on_shmem_exit(pgwin32_SharedMemoryDelete, PointerGetDatum(hmap2));
392 
393 	*shim = hdr;
394 	return hdr;
395 }
396 
397 /*
398  * PGSharedMemoryReAttach
399  *
400  * This is called during startup of a postmaster child process to re-attach to
401  * an already existing shared memory segment, using the handle inherited from
402  * the postmaster.
403  *
404  * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
405  * parameters to this routine.  The caller must have already restored them to
406  * the postmaster's values.
407  */
408 void
PGSharedMemoryReAttach(void)409 PGSharedMemoryReAttach(void)
410 {
411 	PGShmemHeader *hdr;
412 	void	   *origUsedShmemSegAddr = UsedShmemSegAddr;
413 
414 	Assert(ShmemProtectiveRegion != NULL);
415 	Assert(UsedShmemSegAddr != NULL);
416 	Assert(IsUnderPostmaster);
417 
418 	/*
419 	 * Release memory region reservations made by the postmaster
420 	 */
421 	if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
422 		elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
423 			 ShmemProtectiveRegion, GetLastError());
424 	if (VirtualFree(UsedShmemSegAddr, 0, MEM_RELEASE) == 0)
425 		elog(FATAL, "failed to release reserved memory region (addr=%p): error code %lu",
426 			 UsedShmemSegAddr, GetLastError());
427 
428 	hdr = (PGShmemHeader *) MapViewOfFileEx(UsedShmemSegID, FILE_MAP_READ | FILE_MAP_WRITE, 0, 0, 0, UsedShmemSegAddr);
429 	if (!hdr)
430 		elog(FATAL, "could not reattach to shared memory (key=%p, addr=%p): error code %lu",
431 			 UsedShmemSegID, UsedShmemSegAddr, GetLastError());
432 	if (hdr != origUsedShmemSegAddr)
433 		elog(FATAL, "reattaching to shared memory returned unexpected address (got %p, expected %p)",
434 			 hdr, origUsedShmemSegAddr);
435 	if (hdr->magic != PGShmemMagic)
436 		elog(FATAL, "reattaching to shared memory returned non-PostgreSQL memory");
437 	dsm_set_control_handle(hdr->dsm_control);
438 
439 	UsedShmemSegAddr = hdr;		/* probably redundant */
440 }
441 
442 /*
443  * PGSharedMemoryNoReAttach
444  *
445  * This is called during startup of a postmaster child process when we choose
446  * *not* to re-attach to the existing shared memory segment.  We must clean up
447  * to leave things in the appropriate state.
448  *
449  * The child process startup logic might or might not call PGSharedMemoryDetach
450  * after this; make sure that it will be a no-op if called.
451  *
452  * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
453  * parameters to this routine.  The caller must have already restored them to
454  * the postmaster's values.
455  */
456 void
PGSharedMemoryNoReAttach(void)457 PGSharedMemoryNoReAttach(void)
458 {
459 	Assert(ShmemProtectiveRegion != NULL);
460 	Assert(UsedShmemSegAddr != NULL);
461 	Assert(IsUnderPostmaster);
462 
463 	/*
464 	 * Under Windows we will not have mapped the segment, so we don't need to
465 	 * un-map it.  Just reset UsedShmemSegAddr to show we're not attached.
466 	 */
467 	UsedShmemSegAddr = NULL;
468 
469 	/*
470 	 * We *must* close the inherited shmem segment handle, else Windows will
471 	 * consider the existence of this process to mean it can't release the
472 	 * shmem segment yet.  We can now use PGSharedMemoryDetach to do that.
473 	 */
474 	PGSharedMemoryDetach();
475 }
476 
477 /*
478  * PGSharedMemoryDetach
479  *
480  * Detach from the shared memory segment, if still attached.  This is not
481  * intended to be called explicitly by the process that originally created the
482  * segment (it will have an on_shmem_exit callback registered to do that).
483  * Rather, this is for subprocesses that have inherited an attachment and want
484  * to get rid of it.
485  *
486  * ShmemProtectiveRegion, UsedShmemSegID and UsedShmemSegAddr are implicit
487  * parameters to this routine.
488  */
489 void
PGSharedMemoryDetach(void)490 PGSharedMemoryDetach(void)
491 {
492 	/*
493 	 * Releasing the protective region liberates an unimportant quantity of
494 	 * address space, but be tidy.
495 	 */
496 	if (ShmemProtectiveRegion != NULL)
497 	{
498 		if (VirtualFree(ShmemProtectiveRegion, 0, MEM_RELEASE) == 0)
499 			elog(LOG, "failed to release reserved memory region (addr=%p): error code %lu",
500 				 ShmemProtectiveRegion, GetLastError());
501 
502 		ShmemProtectiveRegion = NULL;
503 	}
504 
505 	/* Unmap the view, if it's mapped */
506 	if (UsedShmemSegAddr != NULL)
507 	{
508 		if (!UnmapViewOfFile(UsedShmemSegAddr))
509 			elog(LOG, "could not unmap view of shared memory: error code %lu",
510 				 GetLastError());
511 
512 		UsedShmemSegAddr = NULL;
513 	}
514 
515 	/* And close the shmem handle, if we have one */
516 	if (UsedShmemSegID != INVALID_HANDLE_VALUE)
517 	{
518 		if (!CloseHandle(UsedShmemSegID))
519 			elog(LOG, "could not close handle to shared memory: error code %lu",
520 				 GetLastError());
521 
522 		UsedShmemSegID = INVALID_HANDLE_VALUE;
523 	}
524 }
525 
526 
527 /*
528  * pgwin32_SharedMemoryDelete
529  *
530  * Detach from and delete the shared memory segment
531  * (called as an on_shmem_exit callback, hence funny argument list)
532  */
533 static void
pgwin32_SharedMemoryDelete(int status,Datum shmId)534 pgwin32_SharedMemoryDelete(int status, Datum shmId)
535 {
536 	Assert(DatumGetPointer(shmId) == UsedShmemSegID);
537 	PGSharedMemoryDetach();
538 }
539 
540 /*
541  * pgwin32_ReserveSharedMemoryRegion(hChild)
542  *
543  * Reserve the memory region that will be used for shared memory in a child
544  * process. It is called before the child process starts, to make sure the
545  * memory is available.
546  *
547  * Once the child starts, DLLs loading in different order or threads getting
548  * scheduled differently may allocate memory which can conflict with the
549  * address space we need for our shared memory. By reserving the shared
550  * memory region before the child starts, and freeing it only just before we
551  * attempt to get access to the shared memory forces these allocations to
552  * be given different address ranges that don't conflict.
553  *
554  * NOTE! This function executes in the postmaster, and should for this
555  * reason not use elog(FATAL) since that would take down the postmaster.
556  */
557 int
pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)558 pgwin32_ReserveSharedMemoryRegion(HANDLE hChild)
559 {
560 	void	   *address;
561 
562 	Assert(ShmemProtectiveRegion != NULL);
563 	Assert(UsedShmemSegAddr != NULL);
564 	Assert(UsedShmemSegSize != 0);
565 
566 	/* ShmemProtectiveRegion */
567 	address = VirtualAllocEx(hChild, ShmemProtectiveRegion,
568 							 PROTECTIVE_REGION_SIZE,
569 							 MEM_RESERVE, PAGE_NOACCESS);
570 	if (address == NULL)
571 	{
572 		/* Don't use FATAL since we're running in the postmaster */
573 		elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
574 			 ShmemProtectiveRegion, hChild, GetLastError());
575 		return false;
576 	}
577 	if (address != ShmemProtectiveRegion)
578 	{
579 		/*
580 		 * Should never happen - in theory if allocation granularity causes
581 		 * strange effects it could, so check just in case.
582 		 *
583 		 * Don't use FATAL since we're running in the postmaster.
584 		 */
585 		elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
586 			 address, ShmemProtectiveRegion);
587 		return false;
588 	}
589 
590 	/* UsedShmemSegAddr */
591 	address = VirtualAllocEx(hChild, UsedShmemSegAddr, UsedShmemSegSize,
592 							 MEM_RESERVE, PAGE_READWRITE);
593 	if (address == NULL)
594 	{
595 		elog(LOG, "could not reserve shared memory region (addr=%p) for child %p: error code %lu",
596 			 UsedShmemSegAddr, hChild, GetLastError());
597 		return false;
598 	}
599 	if (address != UsedShmemSegAddr)
600 	{
601 		elog(LOG, "reserved shared memory region got incorrect address %p, expected %p",
602 			 address, UsedShmemSegAddr);
603 		return false;
604 	}
605 
606 	return true;
607 }
608