1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 // Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions
7 // are met:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in
12 // the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google, Inc. nor the names of its contributors
15 // may be used to endorse or promote products derived from this
16 // software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 // COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25 // OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 // AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 // SUCH DAMAGE.
30
31 // This file is used for both Linux and Android.
32
33 #include <stdio.h>
34 #include <math.h>
35
36 #include <pthread.h>
37 #include <semaphore.h>
38 #include <signal.h>
39 #include <sys/time.h>
40 #include <sys/resource.h>
41 #include <sys/syscall.h>
42 #include <sys/types.h>
43 #include <stdlib.h>
44 #include <sched.h>
45 #include <ucontext.h>
46 // Ubuntu Dapper requires memory pages to be marked as
47 // executable. Otherwise, OS raises an exception when executing code
48 // in that page.
49 #include <sys/types.h> // mmap & munmap
50 #include <sys/mman.h> // mmap & munmap
51 #include <sys/stat.h> // open
52 #include <fcntl.h> // open
53 #include <unistd.h> // sysconf
54 #include <semaphore.h>
55 #ifdef __GLIBC__
56 #include <execinfo.h> // backtrace, backtrace_symbols
57 #endif // def __GLIBC__
58 #include <strings.h> // index
59 #include <errno.h>
60 #include <stdarg.h>
61
62 #include "prenv.h"
63 #include "mozilla/LinuxSignal.h"
64 #include "mozilla/PodOperations.h"
65 #include "mozilla/DebugOnly.h"
66
67 #include <string.h>
68 #include <list>
69
70 using namespace mozilla;
71
GetCurrentId()72 /* static */ int Thread::GetCurrentId() { return gettid(); }
73
GetStackTop(void * aGuess)74 void* GetStackTop(void* aGuess) { return aGuess; }
75
PopulateRegsFromContext(Registers & aRegs,ucontext_t * aContext)76 static void PopulateRegsFromContext(Registers& aRegs, ucontext_t* aContext) {
77 aRegs.mContext = aContext;
78 mcontext_t& mcontext = aContext->uc_mcontext;
79
80 // Extracting the sample from the context is extremely machine dependent.
81 #if defined(GP_ARCH_x86)
82 aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]);
83 aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]);
84 aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]);
85 aRegs.mLR = 0;
86 #elif defined(GP_ARCH_amd64)
87 aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]);
88 aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]);
89 aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]);
90 aRegs.mLR = 0;
91 #elif defined(GP_ARCH_arm)
92 aRegs.mPC = reinterpret_cast<Address>(mcontext.arm_pc);
93 aRegs.mSP = reinterpret_cast<Address>(mcontext.arm_sp);
94 aRegs.mFP = reinterpret_cast<Address>(mcontext.arm_fp);
95 aRegs.mLR = reinterpret_cast<Address>(mcontext.arm_lr);
96 #elif defined(GP_ARCH_aarch64)
97 aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
98 aRegs.mSP = reinterpret_cast<Address>(mcontext.sp);
99 aRegs.mFP = reinterpret_cast<Address>(mcontext.regs[29]);
100 aRegs.mLR = reinterpret_cast<Address>(mcontext.regs[30]);
101 #elif defined(GP_ARCH_mips64)
102 aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
103 aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[29]);
104 aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[30]);
105
106 #else
107 #error "bad platform"
108 #endif
109 }
110
111 #if defined(GP_OS_android)
112 #define SYS_tgkill __NR_tgkill
113 #endif
114
tgkill(pid_t tgid,pid_t tid,int signalno)115 int tgkill(pid_t tgid, pid_t tid, int signalno) {
116 return syscall(SYS_tgkill, tgid, tid, signalno);
117 }
118
119 class PlatformData {
120 public:
PlatformData(int aThreadId)121 explicit PlatformData(int aThreadId) { MOZ_COUNT_CTOR(PlatformData); }
122
~PlatformData()123 ~PlatformData() { MOZ_COUNT_DTOR(PlatformData); }
124 };
125
126 ////////////////////////////////////////////////////////////////////////
127 // BEGIN Sampler target specifics
128
129 // The only way to reliably interrupt a Linux thread and inspect its register
130 // and stack state is by sending a signal to it, and doing the work inside the
131 // signal handler. But we don't want to run much code inside the signal
132 // handler, since POSIX severely restricts what we can do in signal handlers.
133 // So we use a system of semaphores to suspend the thread and allow the
134 // sampler thread to do all the work of unwinding and copying out whatever
135 // data it wants.
136 //
137 // A four-message protocol is used to reliably suspend and later resume the
138 // thread to be sampled (the samplee):
139 //
140 // Sampler (signal sender) thread Samplee (thread to be sampled)
141 //
142 // Prepare the SigHandlerCoordinator
143 // and point sSigHandlerCoordinator at it
144 //
145 // send SIGPROF to samplee ------- MSG 1 ----> (enter signal handler)
146 // wait(mMessage2) Copy register state
147 // into sSigHandlerCoordinator
148 // <------ MSG 2 ----- post(mMessage2)
149 // Samplee is now suspended. wait(mMessage3)
150 // Examine its stack/register
151 // state at leisure
152 //
153 // Release samplee:
154 // post(mMessage3) ------- MSG 3 ----->
155 // wait(mMessage4) Samplee now resumes. Tell
156 // the sampler that we are done.
157 // <------ MSG 4 ------ post(mMessage4)
158 // Now we know the samplee's signal (leave signal handler)
159 // handler has finished using
160 // sSigHandlerCoordinator. We can
161 // safely reuse it for some other thread.
162 //
163
164 // A type used to coordinate between the sampler (signal sending) thread and
165 // the thread currently being sampled (the samplee, which receives the
166 // signals).
167 //
168 // The first message is sent using a SIGPROF signal delivery. The subsequent
169 // three are sent using sem_wait/sem_post pairs. They are named accordingly
170 // in the following struct.
171 struct SigHandlerCoordinator {
SigHandlerCoordinatorSigHandlerCoordinator172 SigHandlerCoordinator() {
173 PodZero(&mUContext);
174 int r = sem_init(&mMessage2, /* pshared */ 0, 0);
175 r |= sem_init(&mMessage3, /* pshared */ 0, 0);
176 r |= sem_init(&mMessage4, /* pshared */ 0, 0);
177 MOZ_ASSERT(r == 0);
178 }
179
~SigHandlerCoordinatorSigHandlerCoordinator180 ~SigHandlerCoordinator() {
181 int r = sem_destroy(&mMessage2);
182 r |= sem_destroy(&mMessage3);
183 r |= sem_destroy(&mMessage4);
184 MOZ_ASSERT(r == 0);
185 }
186
187 sem_t mMessage2; // To sampler: "context is in sSigHandlerCoordinator"
188 sem_t mMessage3; // To samplee: "resume"
189 sem_t mMessage4; // To sampler: "finished with sSigHandlerCoordinator"
190 ucontext_t mUContext; // Context at signal
191 };
192
193 struct SigHandlerCoordinator* Sampler::sSigHandlerCoordinator = nullptr;
194
SigprofHandler(int aSignal,siginfo_t * aInfo,void * aContext)195 static void SigprofHandler(int aSignal, siginfo_t* aInfo, void* aContext) {
196 // Avoid TSan warning about clobbering errno.
197 int savedErrno = errno;
198
199 MOZ_ASSERT(aSignal == SIGPROF);
200 MOZ_ASSERT(Sampler::sSigHandlerCoordinator);
201
202 // By sending us this signal, the sampler thread has sent us message 1 in
203 // the comment above, with the meaning "|sSigHandlerCoordinator| is ready
204 // for use, please copy your register context into it."
205 Sampler::sSigHandlerCoordinator->mUContext =
206 *static_cast<ucontext_t*>(aContext);
207
208 // Send message 2: tell the sampler thread that the context has been copied
209 // into |sSigHandlerCoordinator->mUContext|. sem_post can never fail by
210 // being interrupted by a signal, so there's no loop around this call.
211 int r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage2);
212 MOZ_ASSERT(r == 0);
213
214 // At this point, the sampler thread assumes we are suspended, so we must
215 // not touch any global state here.
216
217 // Wait for message 3: the sampler thread tells us to resume.
218 while (true) {
219 r = sem_wait(&Sampler::sSigHandlerCoordinator->mMessage3);
220 if (r == -1 && errno == EINTR) {
221 // Interrupted by a signal. Try again.
222 continue;
223 }
224 // We don't expect any other kind of failure
225 MOZ_ASSERT(r == 0);
226 break;
227 }
228
229 // Send message 4: tell the sampler thread that we are finished accessing
230 // |sSigHandlerCoordinator|. After this point it is not safe to touch
231 // |sSigHandlerCoordinator|.
232 r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage4);
233 MOZ_ASSERT(r == 0);
234
235 errno = savedErrno;
236 }
237
Sampler(PSLockRef aLock)238 Sampler::Sampler(PSLockRef aLock)
239 : mMyPid(getpid())
240 // We don't know what the sampler thread's ID will be until it runs, so
241 // set mSamplerTid to a dummy value and fill it in for real in
242 // SuspendAndSampleAndResumeThread().
243 ,
244 mSamplerTid(-1) {
245 #if defined(USE_EHABI_STACKWALK)
246 mozilla::EHABIStackWalkInit();
247 #endif
248
249 // NOTE: We don't initialize LUL here, instead initializing it in
250 // SamplerThread's constructor. This is because with the
251 // profiler_suspend_and_sample_thread entry point, we want to be able to
252 // sample without waiting for LUL to be initialized.
253
254 // Request profiling signals.
255 struct sigaction sa;
256 sa.sa_sigaction = MOZ_SIGNAL_TRAMPOLINE(SigprofHandler);
257 sigemptyset(&sa.sa_mask);
258 sa.sa_flags = SA_RESTART | SA_SIGINFO;
259 if (sigaction(SIGPROF, &sa, &mOldSigprofHandler) != 0) {
260 MOZ_CRASH("Error installing SIGPROF handler in the profiler");
261 }
262 }
263
Disable(PSLockRef aLock)264 void Sampler::Disable(PSLockRef aLock) {
265 // Restore old signal handler. This is global state so it's important that
266 // we do it now, while gPSMutex is locked.
267 sigaction(SIGPROF, &mOldSigprofHandler, 0);
268 }
269
270 template <typename Func>
SuspendAndSampleAndResumeThread(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Func & aProcessRegs)271 void Sampler::SuspendAndSampleAndResumeThread(
272 PSLockRef aLock, const RegisteredThread& aRegisteredThread,
273 const Func& aProcessRegs) {
274 // Only one sampler thread can be sampling at once. So we expect to have
275 // complete control over |sSigHandlerCoordinator|.
276 MOZ_ASSERT(!sSigHandlerCoordinator);
277
278 if (mSamplerTid == -1) {
279 mSamplerTid = gettid();
280 }
281 int sampleeTid = aRegisteredThread.Info()->ThreadId();
282 MOZ_RELEASE_ASSERT(sampleeTid != mSamplerTid);
283
284 //----------------------------------------------------------------//
285 // Suspend the samplee thread and get its context.
286
287 SigHandlerCoordinator coord; // on sampler thread's stack
288 sSigHandlerCoordinator = &coord;
289
290 // Send message 1 to the samplee (the thread to be sampled), by
291 // signalling at it.
292 int r = tgkill(mMyPid, sampleeTid, SIGPROF);
293 MOZ_ASSERT(r == 0);
294
295 // Wait for message 2 from the samplee, indicating that the context
296 // is available and that the thread is suspended.
297 while (true) {
298 r = sem_wait(&sSigHandlerCoordinator->mMessage2);
299 if (r == -1 && errno == EINTR) {
300 // Interrupted by a signal. Try again.
301 continue;
302 }
303 // We don't expect any other kind of failure.
304 MOZ_ASSERT(r == 0);
305 break;
306 }
307
308 //----------------------------------------------------------------//
309 // Sample the target thread.
310
311 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
312 //
313 // The profiler's "critical section" begins here. In the critical section,
314 // we must not do any dynamic memory allocation, nor try to acquire any lock
315 // or any other unshareable resource. This is because the thread to be
316 // sampled has been suspended at some entirely arbitrary point, and we have
317 // no idea which unsharable resources (locks, essentially) it holds. So any
318 // attempt to acquire any lock, including the implied locks used by the
319 // malloc implementation, risks deadlock. This includes TimeStamp::Now(),
320 // which gets a lock on Windows.
321
322 // The samplee thread is now frozen and sSigHandlerCoordinator->mUContext is
323 // valid. We can poke around in it and unwind its stack as we like.
324
325 // Extract the current register values.
326 Registers regs;
327 PopulateRegsFromContext(regs, &sSigHandlerCoordinator->mUContext);
328 aProcessRegs(regs);
329
330 //----------------------------------------------------------------//
331 // Resume the target thread.
332
333 // Send message 3 to the samplee, which tells it to resume.
334 r = sem_post(&sSigHandlerCoordinator->mMessage3);
335 MOZ_ASSERT(r == 0);
336
337 // Wait for message 4 from the samplee, which tells us that it has
338 // finished with |sSigHandlerCoordinator|.
339 while (true) {
340 r = sem_wait(&sSigHandlerCoordinator->mMessage4);
341 if (r == -1 && errno == EINTR) {
342 continue;
343 }
344 MOZ_ASSERT(r == 0);
345 break;
346 }
347
348 // The profiler's critical section ends here. After this point, none of the
349 // critical section limitations documented above apply.
350 //
351 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
352
353 // This isn't strictly necessary, but doing so does help pick up anomalies
354 // in which the signal handler is running when it shouldn't be.
355 sSigHandlerCoordinator = nullptr;
356 }
357
358 // END Sampler target specifics
359 ////////////////////////////////////////////////////////////////////////
360
361 ////////////////////////////////////////////////////////////////////////
362 // BEGIN SamplerThread target specifics
363
ThreadEntry(void * aArg)364 static void* ThreadEntry(void* aArg) {
365 auto thread = static_cast<SamplerThread*>(aArg);
366 thread->Run();
367 return nullptr;
368 }
369
SamplerThread(PSLockRef aLock,uint32_t aActivityGeneration,double aIntervalMilliseconds)370 SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
371 double aIntervalMilliseconds)
372 : Sampler(aLock),
373 mActivityGeneration(aActivityGeneration),
374 mIntervalMicroseconds(
375 std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))) {
376 #if defined(USE_LUL_STACKWALK)
377 lul::LUL* lul = CorePS::Lul(aLock);
378 if (!lul) {
379 CorePS::SetLul(aLock, MakeUnique<lul::LUL>(logging_sink_for_LUL));
380 // Read all the unwind info currently available.
381 lul = CorePS::Lul(aLock);
382 read_procmaps(lul);
383
384 // Switch into unwind mode. After this point, we can't add or remove any
385 // unwind info to/from this LUL instance. The only thing we can do with
386 // it is Unwind() calls.
387 lul->EnableUnwinding();
388
389 // Has a test been requested?
390 if (PR_GetEnv("MOZ_PROFILER_LUL_TEST")) {
391 int nTests = 0, nTestsPassed = 0;
392 RunLulUnitTests(&nTests, &nTestsPassed, lul);
393 }
394 }
395 #endif
396
397 // Start the sampling thread. It repeatedly sends a SIGPROF signal. Sending
398 // the signal ourselves instead of relying on itimer provides much better
399 // accuracy.
400 if (pthread_create(&mThread, nullptr, ThreadEntry, this) != 0) {
401 MOZ_CRASH("pthread_create failed");
402 }
403 }
404
~SamplerThread()405 SamplerThread::~SamplerThread() { pthread_join(mThread, nullptr); }
406
SleepMicro(uint32_t aMicroseconds)407 void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
408 if (aMicroseconds >= 1000000) {
409 // Use usleep for larger intervals, because the nanosleep
410 // code below only supports intervals < 1 second.
411 MOZ_ALWAYS_TRUE(!::usleep(aMicroseconds));
412 return;
413 }
414
415 struct timespec ts;
416 ts.tv_sec = 0;
417 ts.tv_nsec = aMicroseconds * 1000UL;
418
419 int rv = ::nanosleep(&ts, &ts);
420
421 while (rv != 0 && errno == EINTR) {
422 // Keep waiting in case of interrupt.
423 // nanosleep puts the remaining time back into ts.
424 rv = ::nanosleep(&ts, &ts);
425 }
426
427 MOZ_ASSERT(!rv, "nanosleep call failed");
428 }
429
Stop(PSLockRef aLock)430 void SamplerThread::Stop(PSLockRef aLock) {
431 // Restore old signal handler. This is global state so it's important that
432 // we do it now, while gPSMutex is locked. It's safe to do this now even
433 // though this SamplerThread is still alive, because the next time the main
434 // loop of Run() iterates it won't get past the mActivityGeneration check,
435 // and so won't send any signals.
436 Sampler::Disable(aLock);
437 }
438
439 // END SamplerThread target specifics
440 ////////////////////////////////////////////////////////////////////////
441
442 #if defined(GP_OS_linux)
443
444 // We use pthread_atfork() to temporarily disable signal delivery during any
445 // fork() call. Without that, fork() can be repeatedly interrupted by signal
446 // delivery, requiring it to be repeatedly restarted, which can lead to *long*
447 // delays. See bug 837390.
448 //
449 // We provide no paf_child() function to run in the child after forking. This
450 // is fine because we always immediately exec() after fork(), and exec()
451 // clobbers all process state. (At one point we did have a paf_child()
452 // function, but it caused problems related to locking gPSMutex. See bug
453 // 1348374.)
454 //
455 // Unfortunately all this is only doable on non-Android because Bionic doesn't
456 // have pthread_atfork.
457
458 // In the parent, before the fork, record IsPaused, and then pause.
paf_prepare()459 static void paf_prepare() {
460 MOZ_RELEASE_ASSERT(CorePS::Exists());
461
462 PSAutoLock lock(gPSMutex);
463
464 if (ActivePS::Exists(lock)) {
465 ActivePS::SetWasPaused(lock, ActivePS::IsPaused(lock));
466 ActivePS::SetIsPaused(lock, true);
467 }
468 }
469
470 // In the parent, after the fork, return IsPaused to the pre-fork state.
paf_parent()471 static void paf_parent() {
472 MOZ_RELEASE_ASSERT(CorePS::Exists());
473
474 PSAutoLock lock(gPSMutex);
475
476 if (ActivePS::Exists(lock)) {
477 ActivePS::SetIsPaused(lock, ActivePS::WasPaused(lock));
478 ActivePS::SetWasPaused(lock, false);
479 }
480 }
481
PlatformInit(PSLockRef aLock)482 static void PlatformInit(PSLockRef aLock) {
483 // Set up the fork handlers.
484 pthread_atfork(paf_prepare, paf_parent, nullptr);
485 }
486
487 #else
488
PlatformInit(PSLockRef aLock)489 static void PlatformInit(PSLockRef aLock) {}
490
491 #endif
492
493 #if defined(HAVE_NATIVE_UNWIND)
494 // Context used by synchronous samples. It's safe to have a single one because
495 // only one synchronous sample can be taken at a time (due to
496 // profiler_get_backtrace()'s PSAutoLock).
497 ucontext_t sSyncUContext;
498
SyncPopulate()499 void Registers::SyncPopulate() {
500 if (!getcontext(&sSyncUContext)) {
501 PopulateRegsFromContext(*this, &sSyncUContext);
502 }
503 }
504 #endif
505