1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 // Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions
7 // are met:
8 //  * Redistributions of source code must retain the above copyright
9 //    notice, this list of conditions and the following disclaimer.
10 //  * Redistributions in binary form must reproduce the above copyright
11 //    notice, this list of conditions and the following disclaimer in
12 //    the documentation and/or other materials provided with the
13 //    distribution.
14 //  * Neither the name of Google, Inc. nor the names of its contributors
15 //    may be used to endorse or promote products derived from this
16 //    software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 // COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25 // OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 // AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 // SUCH DAMAGE.
30 
31 #include <windows.h>
32 #include <mmsystem.h>
33 #include <process.h>
34 
35 #include "nsWindowsDllInterceptor.h"
36 #include "mozilla/StackWalk_windows.h"
37 #include "mozilla/WindowsVersion.h"
38 
39 namespace mozilla {
40 namespace baseprofiler {
41 
MicrosecondsSince1970()42 static int64_t MicrosecondsSince1970() {
43   int64_t prt;
44   FILETIME ft;
45   SYSTEMTIME st;
46 
47   GetSystemTime(&st);
48   SystemTimeToFileTime(&st, &ft);
49   static_assert(sizeof(ft) == sizeof(prt), "Expect FILETIME to be 64 bits");
50   memcpy(&prt, &ft, sizeof(prt));
51   const int64_t epochBias = 116444736000000000LL;
52   prt = (prt - epochBias) / 10;
53 
54   return prt;
55 }
56 
GetStackTop(void * aGuess)57 void* GetStackTop(void* aGuess) {
58   PNT_TIB pTib = reinterpret_cast<PNT_TIB>(NtCurrentTeb());
59   return reinterpret_cast<void*>(pTib->StackBase);
60 }
61 
PopulateRegsFromContext(Registers & aRegs,CONTEXT * aContext)62 static void PopulateRegsFromContext(Registers& aRegs, CONTEXT* aContext) {
63 #if defined(GP_ARCH_amd64)
64   aRegs.mPC = reinterpret_cast<Address>(aContext->Rip);
65   aRegs.mSP = reinterpret_cast<Address>(aContext->Rsp);
66   aRegs.mFP = reinterpret_cast<Address>(aContext->Rbp);
67 #elif defined(GP_ARCH_x86)
68   aRegs.mPC = reinterpret_cast<Address>(aContext->Eip);
69   aRegs.mSP = reinterpret_cast<Address>(aContext->Esp);
70   aRegs.mFP = reinterpret_cast<Address>(aContext->Ebp);
71 #elif defined(GP_ARCH_arm64)
72   aRegs.mPC = reinterpret_cast<Address>(aContext->Pc);
73   aRegs.mSP = reinterpret_cast<Address>(aContext->Sp);
74   aRegs.mFP = reinterpret_cast<Address>(aContext->Fp);
75 #else
76 #  error "bad arch"
77 #endif
78   aRegs.mLR = 0;
79 }
80 
81 // Gets a real (i.e. not pseudo) handle for the current thread, with the
82 // permissions needed for profiling.
83 // @return a real HANDLE for the current thread.
GetRealCurrentThreadHandleForProfiling()84 static HANDLE GetRealCurrentThreadHandleForProfiling() {
85   HANDLE realCurrentThreadHandle;
86   if (!::DuplicateHandle(
87           ::GetCurrentProcess(), ::GetCurrentThread(), ::GetCurrentProcess(),
88           &realCurrentThreadHandle,
89           THREAD_GET_CONTEXT | THREAD_SUSPEND_RESUME | THREAD_QUERY_INFORMATION,
90           FALSE, 0)) {
91     return nullptr;
92   }
93 
94   return realCurrentThreadHandle;
95 }
96 
97 class PlatformData {
98  public:
99   // Get a handle to the calling thread. This is the thread that we are
100   // going to profile. We need a real handle because we are going to use it in
101   // the sampler thread.
PlatformData(BaseProfilerThreadId aThreadId)102   explicit PlatformData(BaseProfilerThreadId aThreadId)
103       : mProfiledThread(GetRealCurrentThreadHandleForProfiling()) {
104     MOZ_ASSERT(DWORD(aThreadId.ToNumber()) == ::GetCurrentThreadId());
105   }
106 
~PlatformData()107   ~PlatformData() {
108     if (mProfiledThread != nullptr) {
109       CloseHandle(mProfiledThread);
110       mProfiledThread = nullptr;
111     }
112   }
113 
ProfiledThread()114   HANDLE ProfiledThread() { return mProfiledThread; }
115 
116  private:
117   HANDLE mProfiledThread;
118 };
119 
120 #if defined(USE_MOZ_STACK_WALK)
121 HANDLE
GetThreadHandle(PlatformData * aData)122 GetThreadHandle(PlatformData* aData) { return aData->ProfiledThread(); }
123 #endif
124 
125 static const HANDLE kNoThread = INVALID_HANDLE_VALUE;
126 
127 ////////////////////////////////////////////////////////////////////////
128 // BEGIN Sampler target specifics
129 
Sampler(PSLockRef aLock)130 Sampler::Sampler(PSLockRef aLock) {}
131 
Disable(PSLockRef aLock)132 void Sampler::Disable(PSLockRef aLock) {}
133 
134 template <typename Func>
SuspendAndSampleAndResumeThread(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const TimeStamp & aNow,const Func & aProcessRegs)135 void Sampler::SuspendAndSampleAndResumeThread(
136     PSLockRef aLock, const RegisteredThread& aRegisteredThread,
137     const TimeStamp& aNow, const Func& aProcessRegs) {
138   HANDLE profiled_thread =
139       aRegisteredThread.GetPlatformData()->ProfiledThread();
140   if (profiled_thread == nullptr) {
141     return;
142   }
143 
144   // Context used for sampling the register state of the profiled thread.
145   CONTEXT context;
146   memset(&context, 0, sizeof(context));
147 
148   //----------------------------------------------------------------//
149   // Suspend the samplee thread and get its context.
150 
151   static const DWORD kSuspendFailed = static_cast<DWORD>(-1);
152   if (SuspendThread(profiled_thread) == kSuspendFailed) {
153     return;
154   }
155 
156   // SuspendThread is asynchronous, so the thread may still be running.
157   // Call GetThreadContext first to ensure the thread is really suspended.
158   // See https://blogs.msdn.microsoft.com/oldnewthing/20150205-00/?p=44743.
159 
160   // Using only CONTEXT_CONTROL is faster but on 64-bit it causes crashes in
161   // RtlVirtualUnwind (see bug 1120126) so we set all the flags.
162 #if defined(GP_ARCH_amd64)
163   context.ContextFlags = CONTEXT_FULL;
164 #else
165   context.ContextFlags = CONTEXT_CONTROL;
166 #endif
167   if (!GetThreadContext(profiled_thread, &context)) {
168     ResumeThread(profiled_thread);
169     return;
170   }
171 
172   //----------------------------------------------------------------//
173   // Sample the target thread.
174 
175   // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
176   //
177   // The profiler's "critical section" begins here.  We must be very careful
178   // what we do here, or risk deadlock.  See the corresponding comment in
179   // platform-linux-android.cpp for details.
180 
181   Registers regs;
182   PopulateRegsFromContext(regs, &context);
183   aProcessRegs(regs, aNow);
184 
185   //----------------------------------------------------------------//
186   // Resume the target thread.
187 
188   ResumeThread(profiled_thread);
189 
190   // The profiler's critical section ends here.
191   //
192   // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
193 }
194 
195 // END Sampler target specifics
196 ////////////////////////////////////////////////////////////////////////
197 
198 ////////////////////////////////////////////////////////////////////////
199 // BEGIN SamplerThread target specifics
200 
ThreadEntry(void * aArg)201 static unsigned int __stdcall ThreadEntry(void* aArg) {
202   auto thread = static_cast<SamplerThread*>(aArg);
203   thread->Run();
204   return 0;
205 }
206 
SamplerThread(PSLockRef aLock,uint32_t aActivityGeneration,double aIntervalMilliseconds,uint32_t aFeatures)207 SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
208                              double aIntervalMilliseconds, uint32_t aFeatures)
209     : mSampler(aLock),
210       mActivityGeneration(aActivityGeneration),
211       mIntervalMicroseconds(
212           std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))),
213       mNoTimerResolutionChange(
214           ProfilerFeature::HasNoTimerResolutionChange(aFeatures)) {
215   if ((!mNoTimerResolutionChange) && (mIntervalMicroseconds < 10 * 1000)) {
216     // By default the timer resolution (which tends to be 1/64Hz, around 16ms)
217     // is not changed. However, if the requested interval is sufficiently low,
218     // the resolution will be adjusted to match. Note that this affects all
219     // timers in Firefox, and could therefore hide issues while profiling. This
220     // change may be prevented with the "notimerresolutionchange" feature.
221     ::timeBeginPeriod(mIntervalMicroseconds / 1000);
222   }
223 
224   // Create a new thread. It is important to use _beginthreadex() instead of
225   // the Win32 function CreateThread(), because the CreateThread() does not
226   // initialize thread-specific structures in the C runtime library.
227   mThread = reinterpret_cast<HANDLE>(_beginthreadex(nullptr,
228                                                     /* stack_size */ 0,
229                                                     ThreadEntry, this,
230                                                     /* initflag */ 0, nullptr));
231   if (mThread == 0) {
232     MOZ_CRASH("_beginthreadex failed");
233   }
234 }
235 
~SamplerThread()236 SamplerThread::~SamplerThread() {
237   WaitForSingleObject(mThread, INFINITE);
238 
239   // Close our own handle for the thread.
240   if (mThread != kNoThread) {
241     CloseHandle(mThread);
242   }
243 }
244 
SleepMicro(uint32_t aMicroseconds)245 void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
246   // For now, keep the old behaviour of minimum Sleep(1), even for
247   // smaller-than-usual sleeps after an overshoot, unless the user has
248   // explicitly opted into a sub-millisecond profiler interval.
249   if (mIntervalMicroseconds >= 1000) {
250     ::Sleep(std::max(1u, aMicroseconds / 1000));
251   } else {
252     TimeStamp start = TimeStamp::Now();
253     TimeStamp end = start + TimeDuration::FromMicroseconds(aMicroseconds);
254 
255     // First, sleep for as many whole milliseconds as possible.
256     if (aMicroseconds >= 1000) {
257       ::Sleep(aMicroseconds / 1000);
258     }
259 
260     // Then, spin until enough time has passed.
261     while (TimeStamp::Now() < end) {
262       YieldProcessor();
263     }
264   }
265 }
266 
Stop(PSLockRef aLock)267 void SamplerThread::Stop(PSLockRef aLock) {
268   if ((!mNoTimerResolutionChange) && (mIntervalMicroseconds < 10 * 1000)) {
269     // Disable any timer resolution changes we've made. Do it now while
270     // gPSMutex is locked, i.e. before any other SamplerThread can be created
271     // and call ::timeBeginPeriod().
272     //
273     // It's safe to do this now even though this SamplerThread is still alive,
274     // because the next time the main loop of Run() iterates it won't get past
275     // the mActivityGeneration check, and so it won't make any more ::Sleep()
276     // calls.
277     ::timeEndPeriod(mIntervalMicroseconds / 1000);
278   }
279 
280   mSampler.Disable(aLock);
281 }
282 
283 // END SamplerThread target specifics
284 ////////////////////////////////////////////////////////////////////////
285 
PlatformInit(PSLockRef aLock)286 static void PlatformInit(PSLockRef aLock) {}
287 
288 #if defined(HAVE_NATIVE_UNWIND)
SyncPopulate()289 void Registers::SyncPopulate() {
290   CONTEXT context;
291   RtlCaptureContext(&context);
292   PopulateRegsFromContext(*this, &context);
293 }
294 #endif
295 
296 #if defined(GP_PLAT_amd64_windows)
297 static WindowsDllInterceptor NtDllIntercept;
298 
299 typedef NTSTATUS(NTAPI* LdrUnloadDll_func)(HMODULE module);
300 static WindowsDllInterceptor::FuncHookType<LdrUnloadDll_func> stub_LdrUnloadDll;
301 
patched_LdrUnloadDll(HMODULE module)302 static NTSTATUS NTAPI patched_LdrUnloadDll(HMODULE module) {
303   // Prevent the stack walker from suspending this thread when LdrUnloadDll
304   // holds the RtlLookupFunctionEntry lock.
305   AutoSuppressStackWalking suppress;
306   return stub_LdrUnloadDll(module);
307 }
308 
309 // These pointers are disguised as PVOID to avoid pulling in obscure headers
310 typedef PVOID(WINAPI* LdrResolveDelayLoadedAPI_func)(
311     PVOID ParentModuleBase, PVOID DelayloadDescriptor, PVOID FailureDllHook,
312     PVOID FailureSystemHook, PVOID ThunkAddress, ULONG Flags);
313 static WindowsDllInterceptor::FuncHookType<LdrResolveDelayLoadedAPI_func>
314     stub_LdrResolveDelayLoadedAPI;
315 
patched_LdrResolveDelayLoadedAPI(PVOID ParentModuleBase,PVOID DelayloadDescriptor,PVOID FailureDllHook,PVOID FailureSystemHook,PVOID ThunkAddress,ULONG Flags)316 static PVOID WINAPI patched_LdrResolveDelayLoadedAPI(
317     PVOID ParentModuleBase, PVOID DelayloadDescriptor, PVOID FailureDllHook,
318     PVOID FailureSystemHook, PVOID ThunkAddress, ULONG Flags) {
319   // Prevent the stack walker from suspending this thread when
320   // LdrResolveDelayLoadAPI holds the RtlLookupFunctionEntry lock.
321   AutoSuppressStackWalking suppress;
322   return stub_LdrResolveDelayLoadedAPI(ParentModuleBase, DelayloadDescriptor,
323                                        FailureDllHook, FailureSystemHook,
324                                        ThunkAddress, Flags);
325 }
326 
InitializeWin64ProfilerHooks()327 MFBT_API void InitializeWin64ProfilerHooks() {
328   // This function could be called by both profilers, but we only want to run
329   // it once.
330   static bool ran = false;
331   if (ran) {
332     return;
333   }
334   ran = true;
335 
336   NtDllIntercept.Init("ntdll.dll");
337   stub_LdrUnloadDll.Set(NtDllIntercept, "LdrUnloadDll", &patched_LdrUnloadDll);
338   if (IsWin8OrLater()) {  // LdrResolveDelayLoadedAPI was introduced in Win8
339     stub_LdrResolveDelayLoadedAPI.Set(NtDllIntercept,
340                                       "LdrResolveDelayLoadedAPI",
341                                       &patched_LdrResolveDelayLoadedAPI);
342   }
343 }
344 #endif  // defined(GP_PLAT_amd64_windows)
345 
346 }  // namespace baseprofiler
347 }  // namespace mozilla
348