1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 // Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions
7 // are met:
8 //  * Redistributions of source code must retain the above copyright
9 //    notice, this list of conditions and the following disclaimer.
10 //  * Redistributions in binary form must reproduce the above copyright
11 //    notice, this list of conditions and the following disclaimer in
12 //    the documentation and/or other materials provided with the
13 //    distribution.
14 //  * Neither the name of Google, Inc. nor the names of its contributors
15 //    may be used to endorse or promote products derived from this
16 //    software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 // COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25 // OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 // AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 // SUCH DAMAGE.
30 
31 // This file is used for both Linux and Android.
32 
33 #include <stdio.h>
34 #include <math.h>
35 
36 #include <pthread.h>
37 #include <semaphore.h>
38 #include <signal.h>
39 #include <sys/time.h>
40 #include <sys/resource.h>
41 #include <sys/syscall.h>
42 #include <sys/types.h>
43 #include <stdlib.h>
44 #include <sched.h>
45 #include <ucontext.h>
46 // Ubuntu Dapper requires memory pages to be marked as
47 // executable. Otherwise, OS raises an exception when executing code
48 // in that page.
49 #include <sys/types.h>  // mmap & munmap
50 #include <sys/mman.h>   // mmap & munmap
51 #include <sys/stat.h>   // open
52 #include <fcntl.h>      // open
53 #include <unistd.h>     // sysconf
54 #include <semaphore.h>
55 #ifdef __GLIBC__
56 #include <execinfo.h>  // backtrace, backtrace_symbols
57 #endif                 // def __GLIBC__
58 #include <strings.h>   // index
59 #include <errno.h>
60 #include <stdarg.h>
61 
62 #include "prenv.h"
63 #include "mozilla/LinuxSignal.h"
64 #include "mozilla/PodOperations.h"
65 #include "mozilla/DebugOnly.h"
66 
67 #include <string.h>
68 #include <list>
69 
70 using namespace mozilla;
71 
GetCurrentId()72 /* static */ int Thread::GetCurrentId() { return gettid(); }
73 
GetStackTop(void * aGuess)74 void* GetStackTop(void* aGuess) { return aGuess; }
75 
PopulateRegsFromContext(Registers & aRegs,ucontext_t * aContext)76 static void PopulateRegsFromContext(Registers& aRegs, ucontext_t* aContext) {
77   aRegs.mContext = aContext;
78   mcontext_t& mcontext = aContext->uc_mcontext;
79 
80   // Extracting the sample from the context is extremely machine dependent.
81 #if defined(GP_ARCH_x86)
82   aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]);
83   aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]);
84   aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]);
85   aRegs.mLR = 0;
86 #elif defined(GP_ARCH_amd64)
87   aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]);
88   aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]);
89   aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]);
90   aRegs.mLR = 0;
91 #elif defined(GP_ARCH_arm)
92   aRegs.mPC = reinterpret_cast<Address>(mcontext.arm_pc);
93   aRegs.mSP = reinterpret_cast<Address>(mcontext.arm_sp);
94   aRegs.mFP = reinterpret_cast<Address>(mcontext.arm_fp);
95   aRegs.mLR = reinterpret_cast<Address>(mcontext.arm_lr);
96 #elif defined(GP_ARCH_aarch64)
97   aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
98   aRegs.mSP = reinterpret_cast<Address>(mcontext.sp);
99   aRegs.mFP = reinterpret_cast<Address>(mcontext.regs[29]);
100   aRegs.mLR = reinterpret_cast<Address>(mcontext.regs[30]);
101 #elif defined(GP_ARCH_mips64)
102   aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
103   aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[29]);
104   aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[30]);
105 
106 #else
107 #error "bad platform"
108 #endif
109 }
110 
111 #if defined(GP_OS_android)
112 #define SYS_tgkill __NR_tgkill
113 #endif
114 
tgkill(pid_t tgid,pid_t tid,int signalno)115 int tgkill(pid_t tgid, pid_t tid, int signalno) {
116   return syscall(SYS_tgkill, tgid, tid, signalno);
117 }
118 
119 class PlatformData {
120  public:
PlatformData(int aThreadId)121   explicit PlatformData(int aThreadId) { MOZ_COUNT_CTOR(PlatformData); }
122 
~PlatformData()123   ~PlatformData() { MOZ_COUNT_DTOR(PlatformData); }
124 };
125 
126 ////////////////////////////////////////////////////////////////////////
127 // BEGIN Sampler target specifics
128 
129 // The only way to reliably interrupt a Linux thread and inspect its register
130 // and stack state is by sending a signal to it, and doing the work inside the
131 // signal handler.  But we don't want to run much code inside the signal
132 // handler, since POSIX severely restricts what we can do in signal handlers.
133 // So we use a system of semaphores to suspend the thread and allow the
134 // sampler thread to do all the work of unwinding and copying out whatever
135 // data it wants.
136 //
137 // A four-message protocol is used to reliably suspend and later resume the
138 // thread to be sampled (the samplee):
139 //
140 // Sampler (signal sender) thread              Samplee (thread to be sampled)
141 //
142 // Prepare the SigHandlerCoordinator
143 // and point sSigHandlerCoordinator at it
144 //
145 // send SIGPROF to samplee ------- MSG 1 ----> (enter signal handler)
146 // wait(mMessage2)                             Copy register state
147 //                                               into sSigHandlerCoordinator
148 //                         <------ MSG 2 ----- post(mMessage2)
149 // Samplee is now suspended.                   wait(mMessage3)
150 //   Examine its stack/register
151 //   state at leisure
152 //
153 // Release samplee:
154 //   post(mMessage3)       ------- MSG 3 ----->
155 // wait(mMessage4)                              Samplee now resumes.  Tell
156 //                                                the sampler that we are done.
157 //                         <------ MSG 4 ------ post(mMessage4)
158 // Now we know the samplee's signal             (leave signal handler)
159 //   handler has finished using
160 //   sSigHandlerCoordinator.  We can
161 //   safely reuse it for some other thread.
162 //
163 
164 // A type used to coordinate between the sampler (signal sending) thread and
165 // the thread currently being sampled (the samplee, which receives the
166 // signals).
167 //
168 // The first message is sent using a SIGPROF signal delivery.  The subsequent
169 // three are sent using sem_wait/sem_post pairs.  They are named accordingly
170 // in the following struct.
171 struct SigHandlerCoordinator {
SigHandlerCoordinatorSigHandlerCoordinator172   SigHandlerCoordinator() {
173     PodZero(&mUContext);
174     int r = sem_init(&mMessage2, /* pshared */ 0, 0);
175     r |= sem_init(&mMessage3, /* pshared */ 0, 0);
176     r |= sem_init(&mMessage4, /* pshared */ 0, 0);
177     MOZ_ASSERT(r == 0);
178   }
179 
~SigHandlerCoordinatorSigHandlerCoordinator180   ~SigHandlerCoordinator() {
181     int r = sem_destroy(&mMessage2);
182     r |= sem_destroy(&mMessage3);
183     r |= sem_destroy(&mMessage4);
184     MOZ_ASSERT(r == 0);
185   }
186 
187   sem_t mMessage2;       // To sampler: "context is in sSigHandlerCoordinator"
188   sem_t mMessage3;       // To samplee: "resume"
189   sem_t mMessage4;       // To sampler: "finished with sSigHandlerCoordinator"
190   ucontext_t mUContext;  // Context at signal
191 };
192 
193 struct SigHandlerCoordinator* Sampler::sSigHandlerCoordinator = nullptr;
194 
SigprofHandler(int aSignal,siginfo_t * aInfo,void * aContext)195 static void SigprofHandler(int aSignal, siginfo_t* aInfo, void* aContext) {
196   // Avoid TSan warning about clobbering errno.
197   int savedErrno = errno;
198 
199   MOZ_ASSERT(aSignal == SIGPROF);
200   MOZ_ASSERT(Sampler::sSigHandlerCoordinator);
201 
202   // By sending us this signal, the sampler thread has sent us message 1 in
203   // the comment above, with the meaning "|sSigHandlerCoordinator| is ready
204   // for use, please copy your register context into it."
205   Sampler::sSigHandlerCoordinator->mUContext =
206       *static_cast<ucontext_t*>(aContext);
207 
208   // Send message 2: tell the sampler thread that the context has been copied
209   // into |sSigHandlerCoordinator->mUContext|.  sem_post can never fail by
210   // being interrupted by a signal, so there's no loop around this call.
211   int r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage2);
212   MOZ_ASSERT(r == 0);
213 
214   // At this point, the sampler thread assumes we are suspended, so we must
215   // not touch any global state here.
216 
217   // Wait for message 3: the sampler thread tells us to resume.
218   while (true) {
219     r = sem_wait(&Sampler::sSigHandlerCoordinator->mMessage3);
220     if (r == -1 && errno == EINTR) {
221       // Interrupted by a signal.  Try again.
222       continue;
223     }
224     // We don't expect any other kind of failure
225     MOZ_ASSERT(r == 0);
226     break;
227   }
228 
229   // Send message 4: tell the sampler thread that we are finished accessing
230   // |sSigHandlerCoordinator|.  After this point it is not safe to touch
231   // |sSigHandlerCoordinator|.
232   r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage4);
233   MOZ_ASSERT(r == 0);
234 
235   errno = savedErrno;
236 }
237 
Sampler(PSLockRef aLock)238 Sampler::Sampler(PSLockRef aLock)
239     : mMyPid(getpid())
240       // We don't know what the sampler thread's ID will be until it runs, so
241       // set mSamplerTid to a dummy value and fill it in for real in
242       // SuspendAndSampleAndResumeThread().
243       ,
244       mSamplerTid(-1) {
245 #if defined(USE_EHABI_STACKWALK)
246   mozilla::EHABIStackWalkInit();
247 #endif
248 
249   // NOTE: We don't initialize LUL here, instead initializing it in
250   // SamplerThread's constructor. This is because with the
251   // profiler_suspend_and_sample_thread entry point, we want to be able to
252   // sample without waiting for LUL to be initialized.
253 
254   // Request profiling signals.
255   struct sigaction sa;
256   sa.sa_sigaction = MOZ_SIGNAL_TRAMPOLINE(SigprofHandler);
257   sigemptyset(&sa.sa_mask);
258   sa.sa_flags = SA_RESTART | SA_SIGINFO;
259   if (sigaction(SIGPROF, &sa, &mOldSigprofHandler) != 0) {
260     MOZ_CRASH("Error installing SIGPROF handler in the profiler");
261   }
262 }
263 
Disable(PSLockRef aLock)264 void Sampler::Disable(PSLockRef aLock) {
265   // Restore old signal handler. This is global state so it's important that
266   // we do it now, while gPSMutex is locked.
267   sigaction(SIGPROF, &mOldSigprofHandler, 0);
268 }
269 
270 template <typename Func>
SuspendAndSampleAndResumeThread(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Func & aProcessRegs)271 void Sampler::SuspendAndSampleAndResumeThread(
272     PSLockRef aLock, const RegisteredThread& aRegisteredThread,
273     const Func& aProcessRegs) {
274   // Only one sampler thread can be sampling at once.  So we expect to have
275   // complete control over |sSigHandlerCoordinator|.
276   MOZ_ASSERT(!sSigHandlerCoordinator);
277 
278   if (mSamplerTid == -1) {
279     mSamplerTid = gettid();
280   }
281   int sampleeTid = aRegisteredThread.Info()->ThreadId();
282   MOZ_RELEASE_ASSERT(sampleeTid != mSamplerTid);
283 
284   //----------------------------------------------------------------//
285   // Suspend the samplee thread and get its context.
286 
287   SigHandlerCoordinator coord;  // on sampler thread's stack
288   sSigHandlerCoordinator = &coord;
289 
290   // Send message 1 to the samplee (the thread to be sampled), by
291   // signalling at it.
292   int r = tgkill(mMyPid, sampleeTid, SIGPROF);
293   MOZ_ASSERT(r == 0);
294 
295   // Wait for message 2 from the samplee, indicating that the context
296   // is available and that the thread is suspended.
297   while (true) {
298     r = sem_wait(&sSigHandlerCoordinator->mMessage2);
299     if (r == -1 && errno == EINTR) {
300       // Interrupted by a signal.  Try again.
301       continue;
302     }
303     // We don't expect any other kind of failure.
304     MOZ_ASSERT(r == 0);
305     break;
306   }
307 
308   //----------------------------------------------------------------//
309   // Sample the target thread.
310 
311   // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
312   //
313   // The profiler's "critical section" begins here.  In the critical section,
314   // we must not do any dynamic memory allocation, nor try to acquire any lock
315   // or any other unshareable resource.  This is because the thread to be
316   // sampled has been suspended at some entirely arbitrary point, and we have
317   // no idea which unsharable resources (locks, essentially) it holds.  So any
318   // attempt to acquire any lock, including the implied locks used by the
319   // malloc implementation, risks deadlock.  This includes TimeStamp::Now(),
320   // which gets a lock on Windows.
321 
322   // The samplee thread is now frozen and sSigHandlerCoordinator->mUContext is
323   // valid.  We can poke around in it and unwind its stack as we like.
324 
325   // Extract the current register values.
326   Registers regs;
327   PopulateRegsFromContext(regs, &sSigHandlerCoordinator->mUContext);
328   aProcessRegs(regs);
329 
330   //----------------------------------------------------------------//
331   // Resume the target thread.
332 
333   // Send message 3 to the samplee, which tells it to resume.
334   r = sem_post(&sSigHandlerCoordinator->mMessage3);
335   MOZ_ASSERT(r == 0);
336 
337   // Wait for message 4 from the samplee, which tells us that it has
338   // finished with |sSigHandlerCoordinator|.
339   while (true) {
340     r = sem_wait(&sSigHandlerCoordinator->mMessage4);
341     if (r == -1 && errno == EINTR) {
342       continue;
343     }
344     MOZ_ASSERT(r == 0);
345     break;
346   }
347 
348   // The profiler's critical section ends here.  After this point, none of the
349   // critical section limitations documented above apply.
350   //
351   // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
352 
353   // This isn't strictly necessary, but doing so does help pick up anomalies
354   // in which the signal handler is running when it shouldn't be.
355   sSigHandlerCoordinator = nullptr;
356 }
357 
358 // END Sampler target specifics
359 ////////////////////////////////////////////////////////////////////////
360 
361 ////////////////////////////////////////////////////////////////////////
362 // BEGIN SamplerThread target specifics
363 
ThreadEntry(void * aArg)364 static void* ThreadEntry(void* aArg) {
365   auto thread = static_cast<SamplerThread*>(aArg);
366   thread->Run();
367   return nullptr;
368 }
369 
SamplerThread(PSLockRef aLock,uint32_t aActivityGeneration,double aIntervalMilliseconds)370 SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
371                              double aIntervalMilliseconds)
372     : Sampler(aLock),
373       mActivityGeneration(aActivityGeneration),
374       mIntervalMicroseconds(
375           std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))) {
376 #if defined(USE_LUL_STACKWALK)
377   lul::LUL* lul = CorePS::Lul(aLock);
378   if (!lul) {
379     CorePS::SetLul(aLock, MakeUnique<lul::LUL>(logging_sink_for_LUL));
380     // Read all the unwind info currently available.
381     lul = CorePS::Lul(aLock);
382     read_procmaps(lul);
383 
384     // Switch into unwind mode. After this point, we can't add or remove any
385     // unwind info to/from this LUL instance. The only thing we can do with
386     // it is Unwind() calls.
387     lul->EnableUnwinding();
388 
389     // Has a test been requested?
390     if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) {
391       int nTests = 0, nTestsPassed = 0;
392       RunLulUnitTests(&nTests, &nTestsPassed, lul);
393     }
394   }
395 #endif
396 
397   // Start the sampling thread. It repeatedly sends a SIGPROF signal. Sending
398   // the signal ourselves instead of relying on itimer provides much better
399   // accuracy.
400   if (pthread_create(&mThread, nullptr, ThreadEntry, this) != 0) {
401     MOZ_CRASH("pthread_create failed");
402   }
403 }
404 
~SamplerThread()405 SamplerThread::~SamplerThread() { pthread_join(mThread, nullptr); }
406 
SleepMicro(uint32_t aMicroseconds)407 void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
408   if (aMicroseconds >= 1000000) {
409     // Use usleep for larger intervals, because the nanosleep
410     // code below only supports intervals < 1 second.
411     MOZ_ALWAYS_TRUE(!::usleep(aMicroseconds));
412     return;
413   }
414 
415   struct timespec ts;
416   ts.tv_sec = 0;
417   ts.tv_nsec = aMicroseconds * 1000UL;
418 
419   int rv = ::nanosleep(&ts, &ts);
420 
421   while (rv != 0 && errno == EINTR) {
422     // Keep waiting in case of interrupt.
423     // nanosleep puts the remaining time back into ts.
424     rv = ::nanosleep(&ts, &ts);
425   }
426 
427   MOZ_ASSERT(!rv, "nanosleep call failed");
428 }
429 
Stop(PSLockRef aLock)430 void SamplerThread::Stop(PSLockRef aLock) {
431   // Restore old signal handler. This is global state so it's important that
432   // we do it now, while gPSMutex is locked. It's safe to do this now even
433   // though this SamplerThread is still alive, because the next time the main
434   // loop of Run() iterates it won't get past the mActivityGeneration check,
435   // and so won't send any signals.
436   Sampler::Disable(aLock);
437 }
438 
439   // END SamplerThread target specifics
440   ////////////////////////////////////////////////////////////////////////
441 
442 #if defined(GP_OS_linux)
443 
444 // We use pthread_atfork() to temporarily disable signal delivery during any
445 // fork() call. Without that, fork() can be repeatedly interrupted by signal
446 // delivery, requiring it to be repeatedly restarted, which can lead to *long*
447 // delays. See bug 837390.
448 //
449 // We provide no paf_child() function to run in the child after forking. This
450 // is fine because we always immediately exec() after fork(), and exec()
451 // clobbers all process state. (At one point we did have a paf_child()
452 // function, but it caused problems related to locking gPSMutex. See bug
453 // 1348374.)
454 //
455 // Unfortunately all this is only doable on non-Android because Bionic doesn't
456 // have pthread_atfork.
457 
458 // In the parent, before the fork, record IsPaused, and then pause.
paf_prepare()459 static void paf_prepare() {
460   MOZ_RELEASE_ASSERT(CorePS::Exists());
461 
462   PSAutoLock lock(gPSMutex);
463 
464   if (ActivePS::Exists(lock)) {
465     ActivePS::SetWasPaused(lock, ActivePS::IsPaused(lock));
466     ActivePS::SetIsPaused(lock, true);
467   }
468 }
469 
470 // In the parent, after the fork, return IsPaused to the pre-fork state.
paf_parent()471 static void paf_parent() {
472   MOZ_RELEASE_ASSERT(CorePS::Exists());
473 
474   PSAutoLock lock(gPSMutex);
475 
476   if (ActivePS::Exists(lock)) {
477     ActivePS::SetIsPaused(lock, ActivePS::WasPaused(lock));
478     ActivePS::SetWasPaused(lock, false);
479   }
480 }
481 
PlatformInit(PSLockRef aLock)482 static void PlatformInit(PSLockRef aLock) {
483   // Set up the fork handlers.
484   pthread_atfork(paf_prepare, paf_parent, nullptr);
485 }
486 
487 #else
488 
PlatformInit(PSLockRef aLock)489 static void PlatformInit(PSLockRef aLock) {}
490 
491 #endif
492 
493 #if defined(HAVE_NATIVE_UNWIND)
494 // Context used by synchronous samples. It's safe to have a single one because
495 // only one synchronous sample can be taken at a time (due to
496 // profiler_get_backtrace()'s PSAutoLock).
497 ucontext_t sSyncUContext;
498 
SyncPopulate()499 void Registers::SyncPopulate() {
500   if (!getcontext(&sSyncUContext)) {
501     PopulateRegsFromContext(*this, &sSyncUContext);
502   }
503 }
504 #endif
505