1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 // Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions
7 // are met:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in
12 // the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google, Inc. nor the names of its contributors
15 // may be used to endorse or promote products derived from this
16 // software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 // COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25 // OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 // AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 // SUCH DAMAGE.
30
31 #include <windows.h>
32 #include <mmsystem.h>
33 #include <process.h>
34
35 #include "nsWindowsDllInterceptor.h"
36 #include "mozilla/StackWalk_windows.h"
37 #include "mozilla/WindowsVersion.h"
38
39 namespace mozilla {
40 namespace baseprofiler {
41
MicrosecondsSince1970()42 static int64_t MicrosecondsSince1970() {
43 int64_t prt;
44 FILETIME ft;
45 SYSTEMTIME st;
46
47 GetSystemTime(&st);
48 SystemTimeToFileTime(&st, &ft);
49 static_assert(sizeof(ft) == sizeof(prt), "Expect FILETIME to be 64 bits");
50 memcpy(&prt, &ft, sizeof(prt));
51 const int64_t epochBias = 116444736000000000LL;
52 prt = (prt - epochBias) / 10;
53
54 return prt;
55 }
56
GetStackTop(void * aGuess)57 void* GetStackTop(void* aGuess) {
58 PNT_TIB pTib = reinterpret_cast<PNT_TIB>(NtCurrentTeb());
59 return reinterpret_cast<void*>(pTib->StackBase);
60 }
61
PopulateRegsFromContext(Registers & aRegs,CONTEXT * aContext)62 static void PopulateRegsFromContext(Registers& aRegs, CONTEXT* aContext) {
63 #if defined(GP_ARCH_amd64)
64 aRegs.mPC = reinterpret_cast<Address>(aContext->Rip);
65 aRegs.mSP = reinterpret_cast<Address>(aContext->Rsp);
66 aRegs.mFP = reinterpret_cast<Address>(aContext->Rbp);
67 #elif defined(GP_ARCH_x86)
68 aRegs.mPC = reinterpret_cast<Address>(aContext->Eip);
69 aRegs.mSP = reinterpret_cast<Address>(aContext->Esp);
70 aRegs.mFP = reinterpret_cast<Address>(aContext->Ebp);
71 #elif defined(GP_ARCH_arm64)
72 aRegs.mPC = reinterpret_cast<Address>(aContext->Pc);
73 aRegs.mSP = reinterpret_cast<Address>(aContext->Sp);
74 aRegs.mFP = reinterpret_cast<Address>(aContext->Fp);
75 #else
76 # error "bad arch"
77 #endif
78 aRegs.mLR = 0;
79 }
80
81 // Gets a real (i.e. not pseudo) handle for the current thread, with the
82 // permissions needed for profiling.
83 // @return a real HANDLE for the current thread.
GetRealCurrentThreadHandleForProfiling()84 static HANDLE GetRealCurrentThreadHandleForProfiling() {
85 HANDLE realCurrentThreadHandle;
86 if (!::DuplicateHandle(
87 ::GetCurrentProcess(), ::GetCurrentThread(), ::GetCurrentProcess(),
88 &realCurrentThreadHandle,
89 THREAD_GET_CONTEXT | THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION,
90 FALSE, 0)) {
91 return nullptr;
92 }
93
94 return realCurrentThreadHandle;
95 }
96
97 class PlatformData {
98 public:
99 // Get a handle to the calling thread. This is the thread that we are
100 // going to profile. We need a real handle because we are going to use it in
101 // the sampler thread.
PlatformData(BaseProfilerThreadId aThreadId)102 explicit PlatformData(BaseProfilerThreadId aThreadId)
103 : mProfiledThread(GetRealCurrentThreadHandleForProfiling()) {
104 MOZ_ASSERT(DWORD(aThreadId.ToNumber()) == ::GetCurrentThreadId());
105 }
106
~PlatformData()107 ~PlatformData() {
108 if (mProfiledThread != nullptr) {
109 CloseHandle(mProfiledThread);
110 mProfiledThread = nullptr;
111 }
112 }
113
ProfiledThread()114 HANDLE ProfiledThread() { return mProfiledThread; }
115
116 private:
117 HANDLE mProfiledThread;
118 };
119
120 #if defined(USE_MOZ_STACK_WALK)
121 HANDLE
GetThreadHandle(PlatformData * aData)122 GetThreadHandle(PlatformData* aData) { return aData->ProfiledThread(); }
123 #endif
124
125 static const HANDLE kNoThread = INVALID_HANDLE_VALUE;
126
127 ////////////////////////////////////////////////////////////////////////
128 // BEGIN Sampler target specifics
129
Sampler(PSLockRef aLock)130 Sampler::Sampler(PSLockRef aLock) {}
131
Disable(PSLockRef aLock)132 void Sampler::Disable(PSLockRef aLock) {}
133
134 template <typename Func>
SuspendAndSampleAndResumeThread(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const TimeStamp & aNow,const Func & aProcessRegs)135 void Sampler::SuspendAndSampleAndResumeThread(
136 PSLockRef aLock, const RegisteredThread& aRegisteredThread,
137 const TimeStamp& aNow, const Func& aProcessRegs) {
138 HANDLE profiled_thread =
139 aRegisteredThread.GetPlatformData()->ProfiledThread();
140 if (profiled_thread == nullptr) {
141 return;
142 }
143
144 // Context used for sampling the register state of the profiled thread.
145 CONTEXT context;
146 memset(&context, 0, sizeof(context));
147
148 //----------------------------------------------------------------//
149 // Suspend the samplee thread and get its context.
150
151 static const DWORD kSuspendFailed = static_cast<DWORD>(-1);
152 if (SuspendThread(profiled_thread) == kSuspendFailed) {
153 return;
154 }
155
156 // SuspendThread is asynchronous, so the thread may still be running.
157 // Call GetThreadContext first to ensure the thread is really suspended.
158 // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
159
160 // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in
161 // RtlVirtualUnwind (see bug 1120126) so we set all the flags.
162 #if defined(GP_ARCH_amd64)
163 context.ContextFlags = CONTEXT_FULL;
164 #else
165 context.ContextFlags = CONTEXT_CONTROL;
166 #endif
167 if (!GetThreadContext(profiled_thread, &context)) {
168 ResumeThread(profiled_thread);
169 return;
170 }
171
172 //----------------------------------------------------------------//
173 // Sample the target thread.
174
175 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
176 //
177 // The profiler's "critical section" begins here. We must be very careful
178 // what we do here, or risk deadlock. See the corresponding comment in
179 // platform-linux-android.cpp for details.
180
181 Registers regs;
182 PopulateRegsFromContext(regs, &context);
183 aProcessRegs(regs, aNow);
184
185 //----------------------------------------------------------------//
186 // Resume the target thread.
187
188 ResumeThread(profiled_thread);
189
190 // The profiler's critical section ends here.
191 //
192 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
193 }
194
195 // END Sampler target specifics
196 ////////////////////////////////////////////////////////////////////////
197
198 ////////////////////////////////////////////////////////////////////////
199 // BEGIN SamplerThread target specifics
200
ThreadEntry(void * aArg)201 static unsigned int __stdcall ThreadEntry(void* aArg) {
202 auto thread = static_cast<SamplerThread*>(aArg);
203 thread->Run();
204 return 0;
205 }
206
SamplerThread(PSLockRef aLock,uint32_t aActivityGeneration,double aIntervalMilliseconds,uint32_t aFeatures)207 SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
208 double aIntervalMilliseconds, uint32_t aFeatures)
209 : mSampler(aLock),
210 mActivityGeneration(aActivityGeneration),
211 mIntervalMicroseconds(
212 std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))),
213 mNoTimerResolutionChange(
214 ProfilerFeature::HasNoTimerResolutionChange(aFeatures)) {
215 if ((!mNoTimerResolutionChange) && (mIntervalMicroseconds < 10 * 1000)) {
216 // By default the timer resolution (which tends to be 1/64Hz, around 16ms)
217 // is not changed. However, if the requested interval is sufficiently low,
218 // the resolution will be adjusted to match. Note that this affects all
219 // timers in Firefox, and could therefore hide issues while profiling. This
220 // change may be prevented with the "notimerresolutionchange" feature.
221 ::timeBeginPeriod(mIntervalMicroseconds / 1000);
222 }
223
224 // Create a new thread. It is important to use _beginthreadex() instead of
225 // the Win32 function CreateThread(), because the CreateThread() does not
226 // initialize thread-specific structures in the C runtime library.
227 mThread = reinterpret_cast<HANDLE>(_beginthreadex(nullptr,
228 /* stack_size */ 0,
229 ThreadEntry, this,
230 /* initflag */ 0, nullptr));
231 if (mThread == 0) {
232 MOZ_CRASH("_beginthreadex failed");
233 }
234 }
235
~SamplerThread()236 SamplerThread::~SamplerThread() {
237 WaitForSingleObject(mThread, INFINITE);
238
239 // Close our own handle for the thread.
240 if (mThread != kNoThread) {
241 CloseHandle(mThread);
242 }
243 }
244
SleepMicro(uint32_t aMicroseconds)245 void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
246 // For now, keep the old behaviour of minimum Sleep(1), even for
247 // smaller-than-usual sleeps after an overshoot, unless the user has
248 // explicitly opted into a sub-millisecond profiler interval.
249 if (mIntervalMicroseconds >= 1000) {
250 ::Sleep(std::max(1u, aMicroseconds / 1000));
251 } else {
252 TimeStamp start = TimeStamp::Now();
253 TimeStamp end = start + TimeDuration::FromMicroseconds(aMicroseconds);
254
255 // First, sleep for as many whole milliseconds as possible.
256 if (aMicroseconds >= 1000) {
257 ::Sleep(aMicroseconds / 1000);
258 }
259
260 // Then, spin until enough time has passed.
261 while (TimeStamp::Now() < end) {
262 YieldProcessor();
263 }
264 }
265 }
266
Stop(PSLockRef aLock)267 void SamplerThread::Stop(PSLockRef aLock) {
268 if ((!mNoTimerResolutionChange) && (mIntervalMicroseconds < 10 * 1000)) {
269 // Disable any timer resolution changes we've made. Do it now while
270 // gPSMutex is locked, i.e. before any other SamplerThread can be created
271 // and call ::timeBeginPeriod().
272 //
273 // It's safe to do this now even though this SamplerThread is still alive,
274 // because the next time the main loop of Run() iterates it won't get past
275 // the mActivityGeneration check, and so it won't make any more ::Sleep()
276 // calls.
277 ::timeEndPeriod(mIntervalMicroseconds / 1000);
278 }
279
280 mSampler.Disable(aLock);
281 }
282
283 // END SamplerThread target specifics
284 ////////////////////////////////////////////////////////////////////////
285
PlatformInit(PSLockRef aLock)286 static void PlatformInit(PSLockRef aLock) {}
287
288 #if defined(HAVE_NATIVE_UNWIND)
SyncPopulate()289 void Registers::SyncPopulate() {
290 CONTEXT context;
291 RtlCaptureContext(&context);
292 PopulateRegsFromContext(*this, &context);
293 }
294 #endif
295
296 #if defined(GP_PLAT_amd64_windows)
297 static WindowsDllInterceptor NtDllIntercept;
298
299 typedef NTSTATUS(NTAPI* LdrUnloadDll_func)(HMODULE module);
300 static WindowsDllInterceptor::FuncHookType<LdrUnloadDll_func> stub_LdrUnloadDll;
301
patched_LdrUnloadDll(HMODULE module)302 static NTSTATUS NTAPI patched_LdrUnloadDll(HMODULE module) {
303 // Prevent the stack walker from suspending this thread when LdrUnloadDll
304 // holds the RtlLookupFunctionEntry lock.
305 AutoSuppressStackWalking suppress;
306 return stub_LdrUnloadDll(module);
307 }
308
309 // These pointers are disguised as PVOID to avoid pulling in obscure headers
310 typedef PVOID(WINAPI* LdrResolveDelayLoadedAPI_func)(
311 PVOID ParentModuleBase, PVOID DelayloadDescriptor, PVOID FailureDllHook,
312 PVOID FailureSystemHook, PVOID ThunkAddress, ULONG Flags);
313 static WindowsDllInterceptor::FuncHookType<LdrResolveDelayLoadedAPI_func>
314 stub_LdrResolveDelayLoadedAPI;
315
patched_LdrResolveDelayLoadedAPI(PVOID ParentModuleBase,PVOID DelayloadDescriptor,PVOID FailureDllHook,PVOID FailureSystemHook,PVOID ThunkAddress,ULONG Flags)316 static PVOID WINAPI patched_LdrResolveDelayLoadedAPI(
317 PVOID ParentModuleBase, PVOID DelayloadDescriptor, PVOID FailureDllHook,
318 PVOID FailureSystemHook, PVOID ThunkAddress, ULONG Flags) {
319 // Prevent the stack walker from suspending this thread when
320 // LdrResolveDelayLoadAPI holds the RtlLookupFunctionEntry lock.
321 AutoSuppressStackWalking suppress;
322 return stub_LdrResolveDelayLoadedAPI(ParentModuleBase, DelayloadDescriptor,
323 FailureDllHook, FailureSystemHook,
324 ThunkAddress, Flags);
325 }
326
InitializeWin64ProfilerHooks()327 MFBT_API void InitializeWin64ProfilerHooks() {
328 // This function could be called by both profilers, but we only want to run
329 // it once.
330 static bool ran = false;
331 if (ran) {
332 return;
333 }
334 ran = true;
335
336 NtDllIntercept.Init("ntdll.dll");
337 stub_LdrUnloadDll.Set(NtDllIntercept, "LdrUnloadDll", &patched_LdrUnloadDll);
338 if (IsWin8OrLater()) { // LdrResolveDelayLoadedAPI was introduced in Win8
339 stub_LdrResolveDelayLoadedAPI.Set(NtDllIntercept,
340 "LdrResolveDelayLoadedAPI",
341 &patched_LdrResolveDelayLoadedAPI);
342 }
343 }
344 #endif // defined(GP_PLAT_amd64_windows)
345
346 } // namespace baseprofiler
347 } // namespace mozilla
348