1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 // Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions
7 // are met:
8 //  * Redistributions of source code must retain the above copyright
9 //    notice, this list of conditions and the following disclaimer.
10 //  * Redistributions in binary form must reproduce the above copyright
11 //    notice, this list of conditions and the following disclaimer in
12 //    the documentation and/or other materials provided with the
13 //    distribution.
14 //  * Neither the name of Google, Inc. nor the names of its contributors
15 //    may be used to endorse or promote products derived from this
16 //    software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 // COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25 // OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 // AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 // SUCH DAMAGE.
30 
31 // This file is used for both Linux and Android.
32 
33 #include <stdio.h>
34 #include <math.h>
35 
36 #include <pthread.h>
37 #if defined(GP_OS_freebsd)
38 #  include <sys/thr.h>
39 #endif
40 #include <semaphore.h>
41 #include <signal.h>
42 #include <sys/time.h>
43 #include <sys/resource.h>
44 #include <sys/syscall.h>
45 #include <sys/types.h>
46 #include <stdlib.h>
47 #include <sched.h>
48 #include <ucontext.h>
49 // Ubuntu Dapper requires memory pages to be marked as
50 // executable. Otherwise, OS raises an exception when executing code
51 // in that page.
52 #include <sys/types.h>  // mmap & munmap
53 #include <sys/mman.h>   // mmap & munmap
54 #include <sys/stat.h>   // open
55 #include <fcntl.h>      // open
56 #include <unistd.h>     // sysconf
57 #include <semaphore.h>
58 #ifdef __GLIBC__
59 #  include <execinfo.h>  // backtrace, backtrace_symbols
60 #endif                   // def __GLIBC__
61 #include <strings.h>     // index
62 #include <errno.h>
63 #include <stdarg.h>
64 
65 #include "prenv.h"
66 #include "mozilla/PodOperations.h"
67 #include "mozilla/DebugOnly.h"
68 
69 #include <string.h>
70 #include <list>
71 
72 using namespace mozilla;
73 
74 namespace mozilla {
75 namespace baseprofiler {
76 
MicrosecondsSince1970()77 static int64_t MicrosecondsSince1970() {
78   struct timeval tv;
79   gettimeofday(&tv, NULL);
80   return int64_t(tv.tv_sec) * 1000000 + int64_t(tv.tv_usec);
81 }
82 
GetStackTop(void * aGuess)83 void* GetStackTop(void* aGuess) { return aGuess; }
84 
PopulateRegsFromContext(Registers & aRegs,ucontext_t * aContext)85 static void PopulateRegsFromContext(Registers& aRegs, ucontext_t* aContext) {
86   aRegs.mContext = aContext;
87   mcontext_t& mcontext = aContext->uc_mcontext;
88 
89   // Extracting the sample from the context is extremely machine dependent.
90 #if defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
91   aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]);
92   aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]);
93   aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]);
94   aRegs.mLR = 0;
95 #elif defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
96   aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]);
97   aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]);
98   aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]);
99   aRegs.mLR = 0;
100 #elif defined(GP_PLAT_amd64_freebsd)
101   aRegs.mPC = reinterpret_cast<Address>(mcontext.mc_rip);
102   aRegs.mSP = reinterpret_cast<Address>(mcontext.mc_rsp);
103   aRegs.mFP = reinterpret_cast<Address>(mcontext.mc_rbp);
104   aRegs.mLR = 0;
105 #elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
106   aRegs.mPC = reinterpret_cast<Address>(mcontext.arm_pc);
107   aRegs.mSP = reinterpret_cast<Address>(mcontext.arm_sp);
108   aRegs.mFP = reinterpret_cast<Address>(mcontext.arm_fp);
109   aRegs.mLR = reinterpret_cast<Address>(mcontext.arm_lr);
110 #elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
111   aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
112   aRegs.mSP = reinterpret_cast<Address>(mcontext.sp);
113   aRegs.mFP = reinterpret_cast<Address>(mcontext.regs[29]);
114   aRegs.mLR = reinterpret_cast<Address>(mcontext.regs[30]);
115 #elif defined(GP_PLAT_arm64_freebsd)
116   aRegs.mPC = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_elr);
117   aRegs.mSP = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_sp);
118   aRegs.mFP = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_x[29]);
119   aRegs.mLR = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_lr);
120 #elif defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_mips64_android)
121   aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
122   aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[29]);
123   aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[30]);
124 
125 #else
126 #  error "bad platform"
127 #endif
128 }
129 
130 #if defined(GP_OS_android)
131 #  define SYS_tgkill __NR_tgkill
132 #endif
133 
134 #if defined(GP_OS_linux) || defined(GP_OS_android)
tgkill(pid_t tgid,pid_t tid,int signalno)135 int tgkill(pid_t tgid, pid_t tid, int signalno) {
136   return syscall(SYS_tgkill, tgid, tid, signalno);
137 }
138 #endif
139 
140 #if defined(GP_OS_freebsd)
141 #  define tgkill thr_kill2
142 #endif
143 
144 class PlatformData {
145  public:
PlatformData(BaseProfilerThreadId aThreadId)146   explicit PlatformData(BaseProfilerThreadId aThreadId) {}
147 
~PlatformData()148   ~PlatformData() {}
149 };
150 
151 ////////////////////////////////////////////////////////////////////////
152 // BEGIN Sampler target specifics
153 
154 // The only way to reliably interrupt a Linux thread and inspect its register
155 // and stack state is by sending a signal to it, and doing the work inside the
156 // signal handler.  But we don't want to run much code inside the signal
157 // handler, since POSIX severely restricts what we can do in signal handlers.
158 // So we use a system of semaphores to suspend the thread and allow the
159 // sampler thread to do all the work of unwinding and copying out whatever
160 // data it wants.
161 //
162 // A four-message protocol is used to reliably suspend and later resume the
163 // thread to be sampled (the samplee):
164 //
165 // Sampler (signal sender) thread              Samplee (thread to be sampled)
166 //
167 // Prepare the SigHandlerCoordinator
168 // and point sSigHandlerCoordinator at it
169 //
170 // send SIGPROF to samplee ------- MSG 1 ----> (enter signal handler)
171 // wait(mMessage2)                             Copy register state
172 //                                               into sSigHandlerCoordinator
173 //                         <------ MSG 2 ----- post(mMessage2)
174 // Samplee is now suspended.                   wait(mMessage3)
175 //   Examine its stack/register
176 //   state at leisure
177 //
178 // Release samplee:
179 //   post(mMessage3)       ------- MSG 3 ----->
180 // wait(mMessage4)                              Samplee now resumes.  Tell
181 //                                                the sampler that we are done.
182 //                         <------ MSG 4 ------ post(mMessage4)
183 // Now we know the samplee's signal             (leave signal handler)
184 //   handler has finished using
185 //   sSigHandlerCoordinator.  We can
186 //   safely reuse it for some other thread.
187 //
188 
189 // A type used to coordinate between the sampler (signal sending) thread and
190 // the thread currently being sampled (the samplee, which receives the
191 // signals).
192 //
193 // The first message is sent using a SIGPROF signal delivery.  The subsequent
194 // three are sent using sem_wait/sem_post pairs.  They are named accordingly
195 // in the following struct.
196 struct SigHandlerCoordinator {
SigHandlerCoordinatormozilla::baseprofiler::SigHandlerCoordinator197   SigHandlerCoordinator() {
198     PodZero(&mUContext);
199     int r = sem_init(&mMessage2, /* pshared */ 0, 0);
200     r |= sem_init(&mMessage3, /* pshared */ 0, 0);
201     r |= sem_init(&mMessage4, /* pshared */ 0, 0);
202     MOZ_ASSERT(r == 0);
203     (void)r;
204   }
205 
~SigHandlerCoordinatormozilla::baseprofiler::SigHandlerCoordinator206   ~SigHandlerCoordinator() {
207     int r = sem_destroy(&mMessage2);
208     r |= sem_destroy(&mMessage3);
209     r |= sem_destroy(&mMessage4);
210     MOZ_ASSERT(r == 0);
211     (void)r;
212   }
213 
214   sem_t mMessage2;       // To sampler: "context is in sSigHandlerCoordinator"
215   sem_t mMessage3;       // To samplee: "resume"
216   sem_t mMessage4;       // To sampler: "finished with sSigHandlerCoordinator"
217   ucontext_t mUContext;  // Context at signal
218 };
219 
220 struct SigHandlerCoordinator* Sampler::sSigHandlerCoordinator = nullptr;
221 
SigprofHandler(int aSignal,siginfo_t * aInfo,void * aContext)222 static void SigprofHandler(int aSignal, siginfo_t* aInfo, void* aContext) {
223   // Avoid TSan warning about clobbering errno.
224   int savedErrno = errno;
225 
226   MOZ_ASSERT(aSignal == SIGPROF);
227   MOZ_ASSERT(Sampler::sSigHandlerCoordinator);
228 
229   // By sending us this signal, the sampler thread has sent us message 1 in
230   // the comment above, with the meaning "|sSigHandlerCoordinator| is ready
231   // for use, please copy your register context into it."
232   Sampler::sSigHandlerCoordinator->mUContext =
233       *static_cast<ucontext_t*>(aContext);
234 
235   // Send message 2: tell the sampler thread that the context has been copied
236   // into |sSigHandlerCoordinator->mUContext|.  sem_post can never fail by
237   // being interrupted by a signal, so there's no loop around this call.
238   int r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage2);
239   MOZ_ASSERT(r == 0);
240 
241   // At this point, the sampler thread assumes we are suspended, so we must
242   // not touch any global state here.
243 
244   // Wait for message 3: the sampler thread tells us to resume.
245   while (true) {
246     r = sem_wait(&Sampler::sSigHandlerCoordinator->mMessage3);
247     if (r == -1 && errno == EINTR) {
248       // Interrupted by a signal.  Try again.
249       continue;
250     }
251     // We don't expect any other kind of failure
252     MOZ_ASSERT(r == 0);
253     break;
254   }
255 
256   // Send message 4: tell the sampler thread that we are finished accessing
257   // |sSigHandlerCoordinator|.  After this point it is not safe to touch
258   // |sSigHandlerCoordinator|.
259   r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage4);
260   MOZ_ASSERT(r == 0);
261 
262   errno = savedErrno;
263 }
264 
Sampler(PSLockRef aLock)265 Sampler::Sampler(PSLockRef aLock) : mMyPid(profiler_current_process_id()) {
266 #if defined(USE_EHABI_STACKWALK)
267   EHABIStackWalkInit();
268 #endif
269 
270   // NOTE: We don't initialize LUL here, instead initializing it in
271   // SamplerThread's constructor. This is because with the
272   // profiler_suspend_and_sample_thread entry point, we want to be able to
273   // sample without waiting for LUL to be initialized.
274 
275   // Request profiling signals.
276   struct sigaction sa;
277   sa.sa_sigaction = SigprofHandler;
278   sigemptyset(&sa.sa_mask);
279   sa.sa_flags = SA_RESTART | SA_SIGINFO;
280   if (sigaction(SIGPROF, &sa, &mOldSigprofHandler) != 0) {
281     MOZ_CRASH("Error installing SIGPROF handler in the profiler");
282   }
283 }
284 
Disable(PSLockRef aLock)285 void Sampler::Disable(PSLockRef aLock) {
286   // Restore old signal handler. This is global state so it's important that
287   // we do it now, while gPSMutex is locked.
288   sigaction(SIGPROF, &mOldSigprofHandler, 0);
289 }
290 
291 template <typename Func>
SuspendAndSampleAndResumeThread(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const TimeStamp & aNow,const Func & aProcessRegs)292 void Sampler::SuspendAndSampleAndResumeThread(
293     PSLockRef aLock, const RegisteredThread& aRegisteredThread,
294     const TimeStamp& aNow, const Func& aProcessRegs) {
295   // Only one sampler thread can be sampling at once.  So we expect to have
296   // complete control over |sSigHandlerCoordinator|.
297   MOZ_ASSERT(!sSigHandlerCoordinator);
298 
299   if (!mSamplerTid.IsSpecified()) {
300     mSamplerTid = profiler_current_thread_id();
301   }
302   BaseProfilerThreadId sampleeTid = aRegisteredThread.Info()->ThreadId();
303   MOZ_RELEASE_ASSERT(sampleeTid != mSamplerTid);
304 
305   //----------------------------------------------------------------//
306   // Suspend the samplee thread and get its context.
307 
308   SigHandlerCoordinator coord;  // on sampler thread's stack
309   sSigHandlerCoordinator = &coord;
310 
311   // Send message 1 to the samplee (the thread to be sampled), by
312   // signalling at it.
313   // This could fail if the thread doesn't exist anymore.
314   int r = tgkill(mMyPid.ToNumber(), sampleeTid.ToNumber(), SIGPROF);
315   if (r == 0) {
316     // Wait for message 2 from the samplee, indicating that the context
317     // is available and that the thread is suspended.
318     while (true) {
319       r = sem_wait(&sSigHandlerCoordinator->mMessage2);
320       if (r == -1 && errno == EINTR) {
321         // Interrupted by a signal.  Try again.
322         continue;
323       }
324       // We don't expect any other kind of failure.
325       MOZ_ASSERT(r == 0);
326       break;
327     }
328 
329     //----------------------------------------------------------------//
330     // Sample the target thread.
331 
332     // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
333     //
334     // The profiler's "critical section" begins here.  In the critical section,
335     // we must not do any dynamic memory allocation, nor try to acquire any lock
336     // or any other unshareable resource.  This is because the thread to be
337     // sampled has been suspended at some entirely arbitrary point, and we have
338     // no idea which unsharable resources (locks, essentially) it holds.  So any
339     // attempt to acquire any lock, including the implied locks used by the
340     // malloc implementation, risks deadlock.  This includes TimeStamp::Now(),
341     // which gets a lock on Windows.
342 
343     // The samplee thread is now frozen and sSigHandlerCoordinator->mUContext is
344     // valid.  We can poke around in it and unwind its stack as we like.
345 
346     // Extract the current register values.
347     Registers regs;
348     PopulateRegsFromContext(regs, &sSigHandlerCoordinator->mUContext);
349     aProcessRegs(regs, aNow);
350 
351     //----------------------------------------------------------------//
352     // Resume the target thread.
353 
354     // Send message 3 to the samplee, which tells it to resume.
355     r = sem_post(&sSigHandlerCoordinator->mMessage3);
356     MOZ_ASSERT(r == 0);
357 
358     // Wait for message 4 from the samplee, which tells us that it has
359     // finished with |sSigHandlerCoordinator|.
360     while (true) {
361       r = sem_wait(&sSigHandlerCoordinator->mMessage4);
362       if (r == -1 && errno == EINTR) {
363         continue;
364       }
365       MOZ_ASSERT(r == 0);
366       break;
367     }
368 
369     // The profiler's critical section ends here.  After this point, none of the
370     // critical section limitations documented above apply.
371     //
372     // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
373   }
374 
375   // This isn't strictly necessary, but doing so does help pick up anomalies
376   // in which the signal handler is running when it shouldn't be.
377   sSigHandlerCoordinator = nullptr;
378 }
379 
380 // END Sampler target specifics
381 ////////////////////////////////////////////////////////////////////////
382 
383 ////////////////////////////////////////////////////////////////////////
384 // BEGIN SamplerThread target specifics
385 
ThreadEntry(void * aArg)386 static void* ThreadEntry(void* aArg) {
387   auto thread = static_cast<SamplerThread*>(aArg);
388   thread->Run();
389   return nullptr;
390 }
391 
SamplerThread(PSLockRef aLock,uint32_t aActivityGeneration,double aIntervalMilliseconds,uint32_t aFeatures)392 SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
393                              double aIntervalMilliseconds, uint32_t aFeatures)
394     : mSampler(aLock),
395       mActivityGeneration(aActivityGeneration),
396       mIntervalMicroseconds(
397           std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))) {
398 #if defined(USE_LUL_STACKWALK)
399   lul::LUL* lul = CorePS::Lul(aLock);
400   if (!lul && ProfilerFeature::HasStackWalkEnabled(aFeatures)) {
401     CorePS::SetLul(aLock, MakeUnique<lul::LUL>(logging_sink_for_LUL));
402     // Read all the unwind info currently available.
403     lul = CorePS::Lul(aLock);
404     read_procmaps(lul);
405 
406     // Switch into unwind mode. After this point, we can't add or remove any
407     // unwind info to/from this LUL instance. The only thing we can do with
408     // it is Unwind() calls.
409     lul->EnableUnwinding();
410 
411     // Has a test been requested?
412     if (getenv("MOZ_PROFILER_LUL_TEST")) {
413       int nTests = 0, nTestsPassed = 0;
414       RunLulUnitTests(&nTests, &nTestsPassed, lul);
415     }
416   }
417 #endif
418 
419   // Start the sampling thread. It repeatedly sends a SIGPROF signal. Sending
420   // the signal ourselves instead of relying on itimer provides much better
421   // accuracy.
422   if (pthread_create(&mThread, nullptr, ThreadEntry, this) != 0) {
423     MOZ_CRASH("pthread_create failed");
424   }
425 }
426 
~SamplerThread()427 SamplerThread::~SamplerThread() { pthread_join(mThread, nullptr); }
428 
SleepMicro(uint32_t aMicroseconds)429 void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
430   if (aMicroseconds >= 1000000) {
431     // Use usleep for larger intervals, because the nanosleep
432     // code below only supports intervals < 1 second.
433     MOZ_ALWAYS_TRUE(!::usleep(aMicroseconds));
434     return;
435   }
436 
437   struct timespec ts;
438   ts.tv_sec = 0;
439   ts.tv_nsec = aMicroseconds * 1000UL;
440 
441   int rv = ::nanosleep(&ts, &ts);
442 
443   while (rv != 0 && errno == EINTR) {
444     // Keep waiting in case of interrupt.
445     // nanosleep puts the remaining time back into ts.
446     rv = ::nanosleep(&ts, &ts);
447   }
448 
449   MOZ_ASSERT(!rv, "nanosleep call failed");
450 }
451 
Stop(PSLockRef aLock)452 void SamplerThread::Stop(PSLockRef aLock) {
453   // Restore old signal handler. This is global state so it's important that
454   // we do it now, while gPSMutex is locked. It's safe to do this now even
455   // though this SamplerThread is still alive, because the next time the main
456   // loop of Run() iterates it won't get past the mActivityGeneration check,
457   // and so won't send any signals.
458   mSampler.Disable(aLock);
459 }
460 
461 // END SamplerThread target specifics
462 ////////////////////////////////////////////////////////////////////////
463 
464 #if defined(GP_OS_linux) || defined(GP_OS_freebsd)
465 
466 // We use pthread_atfork() to temporarily disable signal delivery during any
467 // fork() call. Without that, fork() can be repeatedly interrupted by signal
468 // delivery, requiring it to be repeatedly restarted, which can lead to *long*
469 // delays. See bug 837390.
470 //
471 // We provide no paf_child() function to run in the child after forking. This
472 // is fine because we always immediately exec() after fork(), and exec()
473 // clobbers all process state. Also, we don't want the sampler to resume in the
474 // child process between fork() and exec(), it would be wasteful.
475 //
476 // Unfortunately all this is only doable on non-Android because Bionic doesn't
477 // have pthread_atfork.
478 
479 // In the parent, before the fork, increase gSkipSampling to ensure that
480 // profiler sampling loops will be skipped. There could be one in progress now,
481 // causing a small delay, but further sampling will be skipped, allowing `fork`
482 // to complete.
paf_prepare()483 static void paf_prepare() { ++gSkipSampling; }
484 
485 // In the parent, after the fork, decrease gSkipSampling to let the sampler
486 // resume sampling (unless other places have made it non-zero as well).
paf_parent()487 static void paf_parent() { --gSkipSampling; }
488 
PlatformInit(PSLockRef aLock)489 static void PlatformInit(PSLockRef aLock) {
490   // Set up the fork handlers.
491   pthread_atfork(paf_prepare, paf_parent, nullptr);
492 }
493 
494 #else
495 
PlatformInit(PSLockRef aLock)496 static void PlatformInit(PSLockRef aLock) {}
497 
498 #endif
499 
500 #if defined(HAVE_NATIVE_UNWIND)
501 // Context used by synchronous samples. It's safe to have a single one because
502 // only one synchronous sample can be taken at a time (due to
503 // profiler_get_backtrace()'s PSAutoLock).
504 // ucontext_t sSyncUContext;
505 
SyncPopulate()506 void Registers::SyncPopulate() {
507   // TODO port getcontext from breakpad, if profiler_get_backtrace is needed.
508   MOZ_CRASH("profiler_get_backtrace() unsupported");
509   // if (!getcontext(&sSyncUContext)) {
510   //   PopulateRegsFromContext(*this, &sSyncUContext);
511   // }
512 }
513 #endif
514 
515 }  // namespace baseprofiler
516 }  // namespace mozilla
517