1 /* Copyright (C) 2014 Wildfire Games.
2  *
3  * Permission is hereby granted, free of charge, to any person obtaining
4  * a copy of this software and associated documentation files (the
5  * "Software"), to deal in the Software without restriction, including
6  * without limitation the rights to use, copy, modify, merge, publish,
7  * distribute, sublicense, and/or sell copies of the Software, and to
8  * permit persons to whom the Software is furnished to do so, subject to
9  * the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included
12  * in all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18  * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 /*
24  * Windows implementation of sysdep/cpu
25  */
26 
27 #include "precompiled.h"
28 #include "lib/sysdep/os/win/wcpu.h"
29 #include "lib/sysdep/os_cpu.h"
30 
31 #include "lib/bits.h"
32 #include "lib/alignment.h"
33 #include "lib/module_init.h"
34 #include "lib/sysdep/os/win/wutil.h"
35 #include "lib/sysdep/arch/x86_x64/x86_x64.h"
36 
37 
os_cpu_ProcessorMask()38 uintptr_t os_cpu_ProcessorMask()
39 {
40 	static uintptr_t processorMask;
41 
42 	if(!processorMask)
43 	{
44 		const HANDLE hProcess = GetCurrentProcess();
45 		DWORD_PTR processAffinity, systemAffinity;
46 		const BOOL ok = GetProcessAffinityMask(hProcess, &processAffinity, &systemAffinity);
47 		ENSURE(ok);
48 		ENSURE(processAffinity != 0);
49 		processorMask = processAffinity;
50 	}
51 
52 	return processorMask;
53 }
54 
55 
os_cpu_NumProcessors()56 size_t os_cpu_NumProcessors()
57 {
58 	static size_t numProcessors;
59 
60 	if(!numProcessors)
61 	{
62 		numProcessors = PopulationCount(os_cpu_ProcessorMask());
63 
64 		// sanity check
65 		SYSTEM_INFO si;
66 		GetSystemInfo(&si);	// guaranteed to succeed
67 		ENSURE(numProcessors <= (size_t)si.dwNumberOfProcessors);
68 		ENSURE(numProcessors >= 1);
69 	}
70 
71 	return numProcessors;
72 }
73 
74 
75 //-----------------------------------------------------------------------------
76 
wcpu_ReadFrequencyFromRegistry(u32 & freqMhz)77 Status wcpu_ReadFrequencyFromRegistry(u32& freqMhz)
78 {
79 	HKEY hKey;
80 	if(RegOpenKeyExW(HKEY_LOCAL_MACHINE, L"HARDWARE\\DESCRIPTION\\System\\CentralProcessor\\0", 0, KEY_QUERY_VALUE, &hKey) != ERROR_SUCCESS)
81 		return ERR::NOT_SUPPORTED;
82 
83 	DWORD size = sizeof(freqMhz);
84 	LONG ret = RegQueryValueExW(hKey, L"~MHz", 0, 0, (LPBYTE)&freqMhz, &size);
85 
86 	RegCloseKey(hKey);
87 
88 	if(ret != ERROR_SUCCESS)
89 		WARN_RETURN(ERR::FAIL);
90 
91 	return INFO::OK;
92 }
93 
94 
os_cpu_PageSize()95 size_t os_cpu_PageSize()
96 {
97 	static size_t systemPageSize;
98 
99 	if(!systemPageSize)
100 	{
101 		SYSTEM_INFO si;
102 		GetSystemInfo(&si);	// guaranteed to succeed
103 		systemPageSize = (size_t)si.dwPageSize;
104 	}
105 
106 	return systemPageSize;
107 }
108 
109 
os_cpu_LargePageSize()110 size_t os_cpu_LargePageSize()
111 {
112 	static size_t largePageSize = ~(size_t)0;	// "0" has special significance
113 
114 	if(largePageSize == ~(size_t)0)
115 	{
116 		WUTIL_FUNC(pGetLargePageMinimum, SIZE_T, (void));
117 		WUTIL_IMPORT_KERNEL32(GetLargePageMinimum, pGetLargePageMinimum);
118 		if(pGetLargePageMinimum)
119 		{
120 			largePageSize = pGetLargePageMinimum();
121 			// Note: checks disabled due to failing on Vista SP2 with old Xeon CPU
122 			//	see http://trac.wildfiregames.com/ticket/2346
123 			//ENSURE(largePageSize != 0);	// IA-32 and AMD64 definitely support large pages
124 			//ENSURE(largePageSize > os_cpu_PageSize());
125 		}
126 		// no OS support for large pages
127 		else
128 			largePageSize = 0;
129 	}
130 
131 	return largePageSize;
132 }
133 
134 
GetMemoryStatus(MEMORYSTATUSEX & mse)135 static void GetMemoryStatus(MEMORYSTATUSEX& mse)
136 {
137 	// note: we no longer bother dynamically importing GlobalMemoryStatusEx -
138 	// it's available on Win2k and above. this function safely handles
139 	// systems with > 4 GB of memory.
140 	mse.dwLength = sizeof(mse);
141 	const BOOL ok = GlobalMemoryStatusEx(&mse);
142 	WARN_IF_FALSE(ok);
143 }
144 
os_cpu_QueryMemorySize()145 size_t os_cpu_QueryMemorySize()
146 {
147 	MEMORYSTATUSEX mse;
148 	GetMemoryStatus(mse);
149 	DWORDLONG memorySize = mse.ullTotalPhys;
150 
151 	// Richter, "Programming Applications for Windows": the reported
152 	// value doesn't include non-paged pool reserved during boot;
153 	// it's not considered available to the kernel. (the amount is
154 	// 528 KiB on a 512 MiB WinXP/Win2k machine). we'll round up
155 	// to the nearest megabyte to fix this.
156 	memorySize = round_up(memorySize, DWORDLONG(1*MiB));		// (Align<> cannot compute DWORDLONG)
157 
158 	return size_t(memorySize / MiB);
159 }
160 
os_cpu_MemoryAvailable()161 size_t os_cpu_MemoryAvailable()
162 {
163 	MEMORYSTATUSEX mse;
164 	GetMemoryStatus(mse);
165 	const size_t memoryAvailableMiB = size_t(mse.ullAvailPhys / MiB);
166 	return memoryAvailableMiB;
167 }
168 
169 
170 //-----------------------------------------------------------------------------
171 
wcpu_AffinityFromProcessorMask(DWORD_PTR processAffinity,uintptr_t processorMask)172 DWORD_PTR wcpu_AffinityFromProcessorMask(DWORD_PTR processAffinity, uintptr_t processorMask)
173 {
174 	DWORD_PTR affinity = 0;
175 
176 	size_t processor = (size_t)-1;
177 	for(DWORD processorNumber = 0; processorNumber < (DWORD)os_cpu_MaxProcessors; processorNumber++)
178 	{
179 		if(IsBitSet(processAffinity, processorNumber))
180 		{
181 			++processor;	// index among the affinity's set bits
182 
183 			if(IsBitSet(processorMask, processor))
184 				affinity |= DWORD_PTR(1) << processorNumber;
185 		}
186 	}
187 
188 	return affinity;
189 }
190 
wcpu_ProcessorMaskFromAffinity(DWORD_PTR processAffinity,DWORD_PTR affinity)191 uintptr_t wcpu_ProcessorMaskFromAffinity(DWORD_PTR processAffinity, DWORD_PTR affinity)
192 {
193 	uintptr_t processorMask = 0;
194 
195 	size_t processor = (size_t)-1;
196 	for(DWORD processorNumber = 0; processorNumber < (DWORD)os_cpu_MaxProcessors; processorNumber++)
197 	{
198 		if(IsBitSet(processAffinity, processorNumber))
199 		{
200 			++processor;	// now corresponds to processorNumber
201 
202 			if(IsBitSet(affinity, processorNumber))
203 				processorMask |= uintptr_t(1) << processor;
204 		}
205 	}
206 
207 	return processorMask;
208 }
209 
210 
211 //-----------------------------------------------------------------------------
212 
VerifyRunningOnCorrectProcessors(DWORD_PTR affinity)213 static void VerifyRunningOnCorrectProcessors(DWORD_PTR affinity)
214 {
215 	DWORD currentProcessor;
216 
217 	// note: NtGetCurrentProcessorNumber and RtlGetCurrentProcessorNumber aren't
218 	// implemented on WinXP SP2.
219 	WUTIL_FUNC(pGetCurrentProcessorNumber, DWORD, (void));
220 	WUTIL_IMPORT_KERNEL32(GetCurrentProcessorNumber, pGetCurrentProcessorNumber);
221 	if(pGetCurrentProcessorNumber)
222 		currentProcessor = pGetCurrentProcessorNumber();
223 	else
224 	{
225 		// note: searching for the current APIC ID or IDT address in a
226 		// table won't work because initializing the table also requires
227 		// this function. LSL only works on Vista (which already
228 		// has GetCurrentProcessorNumber).
229 		return;
230 	}
231 
232 	ENSURE(IsBitSet(affinity, currentProcessor));
233 }
234 
235 
os_cpu_SetThreadAffinityMask(uintptr_t processorMask)236 uintptr_t os_cpu_SetThreadAffinityMask(uintptr_t processorMask)
237 {
238 	const size_t numProcessors = os_cpu_NumProcessors();
239 	// (avoid undefined result when right shift count >= number of bits)
240 	ENSURE(numProcessors == sizeof(processorMask)*CHAR_BIT || (processorMask >> numProcessors) == 0);
241 
242 	DWORD_PTR processAffinity, systemAffinity;
243 	const BOOL ok = GetProcessAffinityMask(GetCurrentProcess(), &processAffinity, &systemAffinity);
244 	WARN_IF_FALSE(ok);
245 
246 	const DWORD_PTR affinity = wcpu_AffinityFromProcessorMask(processAffinity, processorMask);
247 	const DWORD_PTR previousAffinity = SetThreadAffinityMask(GetCurrentThread(), affinity);
248 	ENSURE(previousAffinity != 0);	// ensure function didn't fail
249 	// (MSDN says SetThreadAffinityMask takes care of rescheduling)
250 	VerifyRunningOnCorrectProcessors(affinity);
251 
252 	const uintptr_t previousProcessorMask = wcpu_ProcessorMaskFromAffinity(processAffinity, previousAffinity);
253 	return previousProcessorMask;
254 }
255 
256 
os_cpu_CallByEachCPU(OsCpuCallback cb,uintptr_t cbData)257 Status os_cpu_CallByEachCPU(OsCpuCallback cb, uintptr_t cbData)
258 {
259 	// abort if we can't run on all system processors
260 	DWORD_PTR processAffinity, systemAffinity;
261 	{
262 		const BOOL ok = GetProcessAffinityMask(GetCurrentProcess(), &processAffinity, &systemAffinity);
263 		WARN_IF_FALSE(ok);
264 		if(processAffinity != systemAffinity)
265 			return ERR::OS_CPU_RESTRICTED_AFFINITY;	// NOWARN
266 	}
267 
268 	const uintptr_t previousAffinity = os_cpu_SetThreadAffinityMask(os_cpu_ProcessorMask());
269 
270 	for(size_t processor = 0; processor < os_cpu_NumProcessors(); processor++)
271 	{
272 		const uintptr_t processorMask = uintptr_t(1) << processor;
273 		os_cpu_SetThreadAffinityMask(processorMask);
274 		cb(processor, cbData);
275 	}
276 
277 	(void)os_cpu_SetThreadAffinityMask(previousAffinity);
278 
279 	return INFO::OK;
280 }
281