1 /* Copyright (C) 2018 Wildfire Games.
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 /*
24  * virtual memory interface. supercedes POSIX mmap; provides support for
25  * large pages, autocommit, and specifying protection flags during allocation.
26  */
27 
28 #include "precompiled.h"
29 #include "lib/sysdep/vm.h"
30 
31 #include "lib/sysdep/os/win/wutil.h"
32 #include <excpt.h>
33 
34 #include "lib/timer.h"
35 #include "lib/bits.h"	// round_down
36 #include "lib/alignment.h"	// CACHE_ALIGNED
37 #include "lib/module_init.h"
38 #include "lib/sysdep/cpu.h"    // cpu_AtomicAdd
39 #include "lib/sysdep/numa.h"
40 #include "lib/sysdep/arch/x86_x64/x86_x64.h"	// x86_x64::ApicId
41 #include "lib/sysdep/arch/x86_x64/apic.h"	// ProcessorFromApicId
42 #include "lib/sysdep/os/win/wversion.h"
43 #include "lib/sysdep/os/win/winit.h"
44 WINIT_REGISTER_CRITICAL_INIT(wvm_Init);
45 
46 
47 //-----------------------------------------------------------------------------
48 // functions not supported by 32-bit Windows XP
49 
50 static WUTIL_FUNC(pGetCurrentProcessorNumber, DWORD, (VOID));
51 static WUTIL_FUNC(pGetNumaProcessorNode, BOOL, (UCHAR, PUCHAR));
52 static WUTIL_FUNC(pVirtualAllocExNuma, LPVOID, (HANDLE, LPVOID, SIZE_T, DWORD, DWORD, DWORD));
53 
EmulateGetCurrentProcessorNumber(VOID)54 static DWORD WINAPI EmulateGetCurrentProcessorNumber(VOID)
55 {
56 	const ApicId apicId = GetApicId();
57 	const DWORD processor = (DWORD)ProcessorFromApicId(apicId);
58 	ASSERT(processor < os_cpu_MaxProcessors);
59 	return processor;
60 }
61 
EmulateGetNumaProcessorNode(UCHAR UNUSED (processor),PUCHAR node)62 static BOOL WINAPI EmulateGetNumaProcessorNode(UCHAR UNUSED(processor), PUCHAR node)
63 {
64 	// given that the system doesn't support GetNumaProcessorNode,
65 	// it will also lack VirtualAllocExNuma, so the node value we assign
66 	// is ignored by EmulateVirtualAllocExNuma.
67 	*node = 0;
68 	return TRUE;
69 }
70 
EmulateVirtualAllocExNuma(HANDLE UNUSED (hProcess),LPVOID p,SIZE_T size,DWORD allocationType,DWORD protect,DWORD UNUSED (node))71 static LPVOID WINAPI EmulateVirtualAllocExNuma(HANDLE UNUSED(hProcess), LPVOID p, SIZE_T size, DWORD allocationType, DWORD protect, DWORD UNUSED(node))
72 {
73 	return VirtualAlloc(p, size, allocationType, protect);
74 }
75 
76 
wvm_Init()77 static Status wvm_Init()
78 {
79 	WUTIL_IMPORT_KERNEL32(GetCurrentProcessorNumber, pGetCurrentProcessorNumber);
80 	WUTIL_IMPORT_KERNEL32(GetNumaProcessorNode, pGetNumaProcessorNode);
81 	WUTIL_IMPORT_KERNEL32(VirtualAllocExNuma, pVirtualAllocExNuma);
82 
83 	if(!pGetCurrentProcessorNumber)
84 		pGetCurrentProcessorNumber = &EmulateGetCurrentProcessorNumber;
85 	if(!pGetNumaProcessorNode)
86 		pGetNumaProcessorNode = &EmulateGetNumaProcessorNode;
87 	if(!pVirtualAllocExNuma)
88 		pVirtualAllocExNuma = &EmulateVirtualAllocExNuma;
89 
90 	return INFO::OK;
91 }
92 
93 
94 namespace vm {
95 
96 
97 //-----------------------------------------------------------------------------
98 // per-processor statistics
99 
100 // (alignment avoids false sharing)
CACHE_ALIGNED(struct Statistics)101 CACHE_ALIGNED(struct Statistics)	// POD
102 {
103 	// thread-safe (required due to concurrent commits)
104 	void NotifyLargePageCommit()
105 	{
106 		cpu_AtomicAdd(&largePageCommits, +1);
107 	}
108 
109 	void NotifySmallPageCommit()
110 	{
111 		cpu_AtomicAdd(&smallPageCommits, +1);
112 	}
113 
114 	intptr_t largePageCommits;
115 	intptr_t smallPageCommits;
116 };
117 static CACHE_ALIGNED(Statistics) statistics[os_cpu_MaxProcessors];
118 
DumpStatistics()119 void DumpStatistics()
120 {
121 	ENSURE(IsAligned(&statistics[0], cacheLineSize));
122 	ENSURE(IsAligned(&statistics[1], cacheLineSize));
123 
124 	size_t smallPageCommits = 0;
125 	size_t largePageCommits = 0;
126 	uintptr_t processorsWithNoCommits = 0;
127 	for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
128 	{
129 		const Statistics& s = statistics[processor];
130 		if(s.smallPageCommits == 0 && s.largePageCommits == 0)
131 			processorsWithNoCommits |= Bit<uintptr_t>(processor);
132 		smallPageCommits += s.smallPageCommits;
133 		largePageCommits += s.largePageCommits;
134 	}
135 
136 	const size_t totalCommits = smallPageCommits+largePageCommits;
137 	if(totalCommits == 0)	// this module wasn't used => don't print debug output
138 		return;
139 
140 	const size_t largePageRatio = totalCommits? largePageCommits*100/totalCommits : 0;
141 	debug_printf("%d commits (%d, i.e. %d%% of them via large pages)\n", totalCommits, largePageCommits, largePageRatio);
142 	if(processorsWithNoCommits != 0)
143 		debug_printf("  processors with no commits: %x\n", processorsWithNoCommits);
144 
145 	if(numa_NumNodes() > 1)
146 		debug_printf("NUMA factor: %.2f\n", numa_Factor());
147 }
148 
149 
150 //-----------------------------------------------------------------------------
151 // allocator with large-page and NUMA support
152 
153 static bool largePageAllocationTookTooLong = false;
154 
ShouldUseLargePages(size_t allocationSize,DWORD allocationType,PageType pageType)155 static bool ShouldUseLargePages(size_t allocationSize, DWORD allocationType, PageType pageType)
156 {
157 	// don't even check for large page support.
158 	if(pageType == kSmall)
159 		return false;
160 
161 	// can't use large pages when reserving - VirtualAlloc would fail with
162 	// ERROR_INVALID_PARAMETER.
163 	if((allocationType & MEM_COMMIT) == 0)
164 		return false;
165 
166 	// OS lacks support for large pages.
167 	if(os_cpu_LargePageSize() == 0)
168 		return false;
169 
170 	// large pages are available and application wants them used.
171 	if(pageType == kLarge)
172 		return true;
173 
174 	// default: use a heuristic.
175 	{
176 		// internal fragmentation would be excessive.
177 		if(allocationSize <= g_LargePageSize / 2)
178 			return false;
179 
180 		// a previous attempt already took too long.
181 		if(largePageAllocationTookTooLong)
182 			return false;
183 
184 		// pre-Vista Windows OSes attempt to cope with page fragmentation by
185 		// trimming the working set of all processes, thus swapping them out,
186 		// and waiting for contiguous regions to appear. this is terribly
187 		// slow (multiple seconds), hence the following heuristic:
188 		if(wversion_Number() < WVERSION_VISTA)
189 		{
190 			// if there's not plenty of free memory, then memory is surely
191 			// already fragmented.
192 			if(os_cpu_MemoryAvailable() < 2000)	// 2 GB
193 				return false;
194 		}
195 	}
196 
197 	return true;
198 }
199 
200 
201 // used for reserving address space, committing pages, or both.
AllocateLargeOrSmallPages(uintptr_t address,size_t size,DWORD allocationType,PageType pageType=kDefault,int prot=PROT_READ|PROT_WRITE)202 static void* AllocateLargeOrSmallPages(uintptr_t address, size_t size, DWORD allocationType, PageType pageType = kDefault, int prot = PROT_READ|PROT_WRITE)
203 {
204 	const HANDLE hProcess = GetCurrentProcess();
205 	const DWORD protect = MemoryProtectionFromPosix(prot);
206 
207 	UCHAR node;
208 	const DWORD processor = pGetCurrentProcessorNumber();
209 	WARN_IF_FALSE(pGetNumaProcessorNode((UCHAR)processor, &node));
210 
211 	if(ShouldUseLargePages(size, allocationType, pageType))
212 	{
213 		// MEM_LARGE_PAGES requires aligned addresses and sizes
214 		const size_t largePageSize = os_cpu_LargePageSize();
215 		const uintptr_t alignedAddress = round_down(address, largePageSize);
216 		const size_t alignedSize = round_up(size+largePageSize-1, largePageSize);
217 		// note: this call can take SECONDS, which is why several checks are
218 		// undertaken before we even try. these aren't authoritative, so we
219 		// at least prevent future attempts if it takes too long.
220 		const double startTime = timer_Time(); COMPILER_FENCE;
221 		void* largePages = pVirtualAllocExNuma(hProcess, LPVOID(alignedAddress), alignedSize, allocationType|MEM_LARGE_PAGES, protect, node);
222 		const double elapsedTime = timer_Time() - startTime; COMPILER_FENCE;
223 		if(elapsedTime > 0.5)
224 			largePageAllocationTookTooLong = true;	// avoid large pages next time
225 		if(largePages)
226 		{
227 			if((allocationType & MEM_COMMIT) != 0)
228 				statistics[processor].NotifyLargePageCommit();
229 			return largePages;
230 		}
231 	}
232 
233 	// try (again) with regular pages
234 	void* smallPages = pVirtualAllocExNuma(hProcess, LPVOID(address), size, allocationType, protect, node);
235 	if(smallPages)
236 	{
237 		if((allocationType & MEM_COMMIT) != 0)
238 			statistics[processor].NotifySmallPageCommit();
239 		return smallPages;
240 	}
241 	else
242 	{
243 		MEMORY_BASIC_INFORMATION mbi = {0};
244 		(void)VirtualQuery(LPCVOID(address), &mbi, sizeof(mbi));	// return value is #bytes written in mbi
245 		debug_printf("Allocation failed: base=%p allocBase=%p allocProt=%d size=%d state=%d prot=%d type=%d\n", mbi.BaseAddress, mbi.AllocationBase, mbi.AllocationProtect, mbi.RegionSize, mbi.State, mbi.Protect, mbi.Type);
246 	}
247 
248 	return 0;
249 }
250 
251 
252 //-----------------------------------------------------------------------------
253 // address space reservation
254 
255 // indicates the extent of a range of address space,
256 // and the parameters for committing large/small pages in it.
257 //
258 // this bookkeeping information increases the safety of on-demand commits,
259 // enables different parameters for separate allocations, and allows
260 // variable alignment because it retains the original base address.
261 // (storing this information within the allocated memory would
262 // require mapping an additional page and may waste an entire
263 // large page if the base address happens to be aligned already.)
CACHE_ALIGNED(struct AddressRangeDescriptor)264 CACHE_ALIGNED(struct AddressRangeDescriptor)	// POD
265 {
266 	// attempt to activate this descriptor and reserve address space.
267 	// side effect: initializes all fields if successful.
268 	//
269 	// @param size, commitSize, pageType, prot - see ReserveAddressSpace.
270 	// @return INFO::SKIPPED if this descriptor is already in use,
271 	//   INFO::OK on success, otherwise ERR::NO_MEM (after showing an
272 	//   error message).
273 	Status Allocate(size_t size, size_t commitSize, PageType pageType, int prot)
274 	{
275 		// if this descriptor wasn't yet in use, mark it as busy
276 		// (double-checking is cheaper than cpu_CAS)
277 		if(base != 0 || !cpu_CAS(&base, intptr_t(0), intptr_t(this)))
278 			return INFO::SKIPPED;
279 
280 		ENSURE(size != 0);		// probably indicates a bug in caller
281 		ENSURE((commitSize % g_LargePageSize) == 0 || pageType == kSmall);
282 		ASSERT(pageType == kLarge || pageType == kSmall || pageType == kDefault);
283 		ASSERT(prot == PROT_NONE || (prot & ~(PROT_READ|PROT_WRITE|PROT_EXEC)) == 0);
284 		m_CommitSize = commitSize;
285 		m_PageType = pageType;
286 		m_Prot = prot;
287 		m_Alignment = pageType == kSmall ? g_PageSize : g_LargePageSize;
288 		m_TotalSize = round_up(size + m_Alignment - 1, m_Alignment);
289 
290 		// NB: it is meaningless to ask for large pages when reserving
291 		// (see ShouldUseLargePages). pageType only affects subsequent commits.
292 		base = (intptr_t)AllocateLargeOrSmallPages(0, m_TotalSize, MEM_RESERVE);
293 		if(!base)
294 		{
295 			debug_printf("AllocateLargeOrSmallPages of %lld failed\n", (u64)m_TotalSize);
296 			DEBUG_DISPLAY_ERROR(ErrorString());
297 			return ERR::NO_MEM;	// NOWARN (error string is more helpful)
298 		}
299 
300 		alignedBase = round_up(uintptr_t(base), m_Alignment);
301 		alignedEnd = alignedBase + round_up(size, m_Alignment);
302 		return INFO::OK;
303 	}
304 
305 	void Free()
306 	{
307 		vm::Free((void*)base, m_TotalSize);
308 		m_Alignment = alignedBase = alignedEnd = 0;
309 		m_TotalSize = 0;
310 		COMPILER_FENCE;
311 		base = 0;	// release descriptor for subsequent reuse
312 	}
313 
314 	bool Contains(uintptr_t address) const
315 	{
316 		// safety check: we should never see pointers in the no-man's-land
317 		// between the original and rounded up base addresses.
318 		ENSURE(!(uintptr_t(base) <= address && address < alignedBase));
319 
320 		return (alignedBase <= address && address < alignedEnd);
321 	}
322 
323 	bool Commit(uintptr_t address)
324 	{
325 		// (safe because Allocate rounded up to alignment)
326 		const uintptr_t alignedAddress = round_down(address, m_Alignment);
327 		ENSURE(alignedBase <= alignedAddress && alignedAddress + m_CommitSize <= alignedEnd);
328 		return vm::Commit(alignedAddress, m_CommitSize, m_PageType, m_Prot);
329 	}
330 
331 	// corresponds to the respective page size (Windows requires
332 	// naturally aligned addresses and sizes when committing large pages).
333 	// note that VirtualAlloc's alignment defaults to 64 KiB.
334 	uintptr_t m_Alignment;
335 
336 	uintptr_t alignedBase;	// multiple of alignment
337 	uintptr_t alignedEnd;	// "
338 
339 	// (actual requested size / allocated address is required by
340 	// ReleaseAddressSpace due to variable alignment.)
341 	volatile intptr_t base;	// (type is dictated by cpu_CAS)
342 	size_t m_TotalSize;
343 
344 	// parameters to be relayed to vm::Commit
345 	size_t m_CommitSize;
346 	PageType m_PageType;
347 	int m_Prot;
348 
349 //private:
350 	static const wchar_t* ErrorString()
351 	{
352 #if ARCH_IA32
353 		return L"Out of address space (64-bit OS may help)";
354 #elif OS_WIN
355 		// because early AMD64 lacked CMPXCHG16B, the Windows lock-free slist
356 		// must squeeze the address, ABA tag and list length (a questionable
357 		// design decision) into 64 bits. that leaves 39 bits for the
358 		// address, plus 4 implied zero bits due to 16-byte alignment.
359 		// [http://www.alex-ionescu.com/?p=50]
360 		return L"Out of address space (Windows only provides 8 TiB)";
361 #else
362 		return L"Out of address space";
363 #endif
364 	}
365 };
366 
367 // (array size governs the max. number of extant allocations)
368 static AddressRangeDescriptor ranges[2*os_cpu_MaxProcessors];
369 
370 
FindDescriptor(uintptr_t address)371 static AddressRangeDescriptor* FindDescriptor(uintptr_t address)
372 {
373 	for(size_t idxRange = 0; idxRange < ARRAY_SIZE(ranges); idxRange++)
374 	{
375 		AddressRangeDescriptor& d = ranges[idxRange];
376 		if(d.Contains(address))
377 			return &d;
378 	}
379 
380 	return 0;	// not contained in any allocated ranges
381 }
382 
383 
ReserveAddressSpace(size_t size,size_t commitSize,PageType pageType,int prot)384 void* ReserveAddressSpace(size_t size, size_t commitSize, PageType pageType, int prot)
385 {
386 	for(size_t idxRange = 0; idxRange < ARRAY_SIZE(ranges); idxRange++)
387 	{
388 		Status ret = ranges[idxRange].Allocate(size, commitSize, pageType, prot);
389 		if(ret == INFO::OK)
390 			return (void*)ranges[idxRange].alignedBase;
391 		if(ret == ERR::NO_MEM)
392 			return 0;
393 		// else: descriptor already in use, try the next one
394 	}
395 
396 	// all descriptors are in use; ranges[] was too small
397 	DEBUG_WARN_ERR(ERR::LIMIT);
398 	return 0;
399 }
400 
401 
ReleaseAddressSpace(void * p,size_t UNUSED (size))402 void ReleaseAddressSpace(void* p, size_t UNUSED(size))
403 {
404 	// it is customary to ignore null pointers
405 	if(!p)
406 		return;
407 
408 	AddressRangeDescriptor* d = FindDescriptor(uintptr_t(p));
409 	if(d)
410 		d->Free();
411 	else
412 	{
413 		debug_printf("No AddressRangeDescriptor contains %P\n", p);
414 		ENSURE(0);
415 	}
416 }
417 
418 
419 //-----------------------------------------------------------------------------
420 // commit/decommit, allocate/free, protect
421 
422 TIMER_ADD_CLIENT(tc_commit);
423 
Commit(uintptr_t address,size_t size,PageType pageType,int prot)424 bool Commit(uintptr_t address, size_t size, PageType pageType, int prot)
425 {
426 	TIMER_ACCRUE_ATOMIC(tc_commit);
427 
428 	return AllocateLargeOrSmallPages(address, size, MEM_COMMIT, pageType, prot) != 0;
429 }
430 
431 
Decommit(uintptr_t address,size_t size)432 bool Decommit(uintptr_t address, size_t size)
433 {
434 	return VirtualFree(LPVOID(address), size, MEM_DECOMMIT) != FALSE;
435 }
436 
437 
Protect(uintptr_t address,size_t size,int prot)438 bool Protect(uintptr_t address, size_t size, int prot)
439 {
440 	const DWORD protect = MemoryProtectionFromPosix(prot);
441 	DWORD oldProtect;	// required by VirtualProtect
442 	const BOOL ok = VirtualProtect(LPVOID(address), size, protect, &oldProtect);
443 	return ok != FALSE;
444 }
445 
446 
Allocate(size_t size,PageType pageType,int prot)447 void* Allocate(size_t size, PageType pageType, int prot)
448 {
449 	return AllocateLargeOrSmallPages(0, size, MEM_RESERVE|MEM_COMMIT, pageType, prot);
450 }
451 
452 
Free(void * p,size_t UNUSED (size))453 void Free(void* p, size_t UNUSED(size))
454 {
455 	if(p)	// otherwise, VirtualFree complains
456 	{
457 		const BOOL ok = VirtualFree(p, 0, MEM_RELEASE);
458 		WARN_IF_FALSE(ok);
459 	}
460 }
461 
462 
463 //-----------------------------------------------------------------------------
464 // on-demand commit
465 
466 // NB: avoid using debug_printf here because OutputDebugString has been
467 // observed to generate vectored exceptions when running outside the IDE.
VectoredHandler(const PEXCEPTION_POINTERS ep)468 static LONG CALLBACK VectoredHandler(const PEXCEPTION_POINTERS ep)
469 {
470 	const PEXCEPTION_RECORD er = ep->ExceptionRecord;
471 
472 	// we only want to handle access violations. (strictly speaking,
473 	// unmapped memory causes page faults, but Windows reports them
474 	// with EXCEPTION_ACCESS_VIOLATION.)
475 	if(er->ExceptionCode != EXCEPTION_ACCESS_VIOLATION)
476 		return EXCEPTION_CONTINUE_SEARCH;
477 
478 	// NB: read exceptions are legitimate and occur when updating an
479 	// accumulator for the first time.
480 
481 	// get the source/destination of the read/write operation that
482 	// failed. (NB: don't use er->ExceptionAddress - that's the
483 	// location of the code that encountered the fault)
484 	const uintptr_t address = (uintptr_t)er->ExceptionInformation[1];
485 
486 	// if unknown (e.g. access violation in kernel address space or
487 	// violation of alignment requirements), we don't want to handle it.
488 	if(address == ~uintptr_t(0))
489 		return EXCEPTION_CONTINUE_SEARCH;
490 
491 	// the address space must have been allocated by ReserveAddressSpace
492 	// (otherwise we wouldn't know the desired commitSize/pageType/prot).
493 	AddressRangeDescriptor* d = FindDescriptor(address);
494 	if(!d)
495 		return EXCEPTION_CONTINUE_SEARCH;
496 
497 	// NB: the first access to a page isn't necessarily at offset 0
498 	// (memcpy isn't guaranteed to copy sequentially). rounding down
499 	// is safe and necessary - see AddressRangeDescriptor::alignment.
500 	const uintptr_t alignedAddress = round_down(address, d->m_Alignment);
501 	bool ok = d->Commit(alignedAddress);
502 	if(!ok)
503 	{
504 		debug_printf("VectoredHandler: Commit(0x%p) failed; address=0x%p\n", alignedAddress, address);
505 		ENSURE(0);
506 		return EXCEPTION_CONTINUE_SEARCH;
507 	}
508 
509 	// continue at (i.e. retry) the same instruction.
510 	return EXCEPTION_CONTINUE_EXECUTION;
511 }
512 
513 
514 static PVOID handler;
515 static ModuleInitState initState;
516 static volatile intptr_t references = 0;	// atomic
517 
InitHandler()518 static Status InitHandler()
519 {
520 	ENSURE(handler == 0);
521 	handler = AddVectoredExceptionHandler(TRUE, VectoredHandler);
522 	ENSURE(handler != 0);
523 	return INFO::OK;
524 }
525 
ShutdownHandler()526 static void ShutdownHandler()
527 {
528 	ENSURE(handler != 0);
529 	const ULONG ret = RemoveVectoredExceptionHandler(handler);
530 	ENSURE(ret != 0);
531 	handler = 0;
532 }
533 
BeginOnDemandCommits()534 void BeginOnDemandCommits()
535 {
536 	ModuleInit(&initState, InitHandler);
537 	cpu_AtomicAdd(&references, +1);
538 }
539 
EndOnDemandCommits()540 void EndOnDemandCommits()
541 {
542 	if(cpu_AtomicAdd(&references, -1) == 1)
543 		ModuleShutdown(&initState, ShutdownHandler);
544 }
545 
546 }	// namespace vm
547