1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 // Copyright (c) 2006-2011 The Chromium Authors. All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions
7 // are met:
8 // * Redistributions of source code must retain the above copyright
9 // notice, this list of conditions and the following disclaimer.
10 // * Redistributions in binary form must reproduce the above copyright
11 // notice, this list of conditions and the following disclaimer in
12 // the documentation and/or other materials provided with the
13 // distribution.
14 // * Neither the name of Google, Inc. nor the names of its contributors
15 // may be used to endorse or promote products derived from this
16 // software without specific prior written permission.
17 //
18 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
21 // FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
22 // COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
23 // INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
24 // BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
25 // OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
26 // AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
28 // OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 // SUCH DAMAGE.
30
31 // This file is used for both Linux and Android.
32
33 #include <stdio.h>
34 #include <math.h>
35
36 #include <pthread.h>
37 #if defined(GP_OS_freebsd)
38 # include <sys/thr.h>
39 #endif
40 #include <semaphore.h>
41 #include <signal.h>
42 #include <sys/time.h>
43 #include <sys/resource.h>
44 #include <sys/syscall.h>
45 #include <sys/types.h>
46 #include <stdlib.h>
47 #include <sched.h>
48 #include <ucontext.h>
49 // Ubuntu Dapper requires memory pages to be marked as
50 // executable. Otherwise, OS raises an exception when executing code
51 // in that page.
52 #include <sys/types.h> // mmap & munmap
53 #include <sys/mman.h> // mmap & munmap
54 #include <sys/stat.h> // open
55 #include <fcntl.h> // open
56 #include <unistd.h> // sysconf
57 #include <semaphore.h>
58 #ifdef __GLIBC__
59 # include <execinfo.h> // backtrace, backtrace_symbols
60 #endif // def __GLIBC__
61 #include <strings.h> // index
62 #include <errno.h>
63 #include <stdarg.h>
64
65 #include "prenv.h"
66 #include "mozilla/PodOperations.h"
67 #include "mozilla/DebugOnly.h"
68
69 #include <string.h>
70 #include <list>
71
72 using namespace mozilla;
73
74 namespace mozilla {
75 namespace baseprofiler {
76
MicrosecondsSince1970()77 static int64_t MicrosecondsSince1970() {
78 struct timeval tv;
79 gettimeofday(&tv, NULL);
80 return int64_t(tv.tv_sec) * 1000000 + int64_t(tv.tv_usec);
81 }
82
GetStackTop(void * aGuess)83 void* GetStackTop(void* aGuess) { return aGuess; }
84
PopulateRegsFromContext(Registers & aRegs,ucontext_t * aContext)85 static void PopulateRegsFromContext(Registers& aRegs, ucontext_t* aContext) {
86 aRegs.mContext = aContext;
87 mcontext_t& mcontext = aContext->uc_mcontext;
88
89 // Extracting the sample from the context is extremely machine dependent.
90 #if defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
91 aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_EIP]);
92 aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_ESP]);
93 aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_EBP]);
94 aRegs.mLR = 0;
95 #elif defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
96 aRegs.mPC = reinterpret_cast<Address>(mcontext.gregs[REG_RIP]);
97 aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[REG_RSP]);
98 aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[REG_RBP]);
99 aRegs.mLR = 0;
100 #elif defined(GP_PLAT_amd64_freebsd)
101 aRegs.mPC = reinterpret_cast<Address>(mcontext.mc_rip);
102 aRegs.mSP = reinterpret_cast<Address>(mcontext.mc_rsp);
103 aRegs.mFP = reinterpret_cast<Address>(mcontext.mc_rbp);
104 aRegs.mLR = 0;
105 #elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
106 aRegs.mPC = reinterpret_cast<Address>(mcontext.arm_pc);
107 aRegs.mSP = reinterpret_cast<Address>(mcontext.arm_sp);
108 aRegs.mFP = reinterpret_cast<Address>(mcontext.arm_fp);
109 aRegs.mLR = reinterpret_cast<Address>(mcontext.arm_lr);
110 #elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
111 aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
112 aRegs.mSP = reinterpret_cast<Address>(mcontext.sp);
113 aRegs.mFP = reinterpret_cast<Address>(mcontext.regs[29]);
114 aRegs.mLR = reinterpret_cast<Address>(mcontext.regs[30]);
115 #elif defined(GP_PLAT_arm64_freebsd)
116 aRegs.mPC = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_elr);
117 aRegs.mSP = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_sp);
118 aRegs.mFP = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_x[29]);
119 aRegs.mLR = reinterpret_cast<Address>(mcontext.mc_gpregs.gp_lr);
120 #elif defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_mips64_android)
121 aRegs.mPC = reinterpret_cast<Address>(mcontext.pc);
122 aRegs.mSP = reinterpret_cast<Address>(mcontext.gregs[29]);
123 aRegs.mFP = reinterpret_cast<Address>(mcontext.gregs[30]);
124
125 #else
126 # error "bad platform"
127 #endif
128 }
129
130 #if defined(GP_OS_android)
131 # define SYS_tgkill __NR_tgkill
132 #endif
133
134 #if defined(GP_OS_linux) || defined(GP_OS_android)
tgkill(pid_t tgid,pid_t tid,int signalno)135 int tgkill(pid_t tgid, pid_t tid, int signalno) {
136 return syscall(SYS_tgkill, tgid, tid, signalno);
137 }
138 #endif
139
140 #if defined(GP_OS_freebsd)
141 # define tgkill thr_kill2
142 #endif
143
144 class PlatformData {
145 public:
PlatformData(BaseProfilerThreadId aThreadId)146 explicit PlatformData(BaseProfilerThreadId aThreadId) {}
147
~PlatformData()148 ~PlatformData() {}
149 };
150
151 ////////////////////////////////////////////////////////////////////////
152 // BEGIN Sampler target specifics
153
154 // The only way to reliably interrupt a Linux thread and inspect its register
155 // and stack state is by sending a signal to it, and doing the work inside the
156 // signal handler. But we don't want to run much code inside the signal
157 // handler, since POSIX severely restricts what we can do in signal handlers.
158 // So we use a system of semaphores to suspend the thread and allow the
159 // sampler thread to do all the work of unwinding and copying out whatever
160 // data it wants.
161 //
162 // A four-message protocol is used to reliably suspend and later resume the
163 // thread to be sampled (the samplee):
164 //
165 // Sampler (signal sender) thread Samplee (thread to be sampled)
166 //
167 // Prepare the SigHandlerCoordinator
168 // and point sSigHandlerCoordinator at it
169 //
170 // send SIGPROF to samplee ------- MSG 1 ----> (enter signal handler)
171 // wait(mMessage2) Copy register state
172 // into sSigHandlerCoordinator
173 // <------ MSG 2 ----- post(mMessage2)
174 // Samplee is now suspended. wait(mMessage3)
175 // Examine its stack/register
176 // state at leisure
177 //
178 // Release samplee:
179 // post(mMessage3) ------- MSG 3 ----->
180 // wait(mMessage4) Samplee now resumes. Tell
181 // the sampler that we are done.
182 // <------ MSG 4 ------ post(mMessage4)
183 // Now we know the samplee's signal (leave signal handler)
184 // handler has finished using
185 // sSigHandlerCoordinator. We can
186 // safely reuse it for some other thread.
187 //
188
189 // A type used to coordinate between the sampler (signal sending) thread and
190 // the thread currently being sampled (the samplee, which receives the
191 // signals).
192 //
193 // The first message is sent using a SIGPROF signal delivery. The subsequent
194 // three are sent using sem_wait/sem_post pairs. They are named accordingly
195 // in the following struct.
196 struct SigHandlerCoordinator {
SigHandlerCoordinatormozilla::baseprofiler::SigHandlerCoordinator197 SigHandlerCoordinator() {
198 PodZero(&mUContext);
199 int r = sem_init(&mMessage2, /* pshared */ 0, 0);
200 r |= sem_init(&mMessage3, /* pshared */ 0, 0);
201 r |= sem_init(&mMessage4, /* pshared */ 0, 0);
202 MOZ_ASSERT(r == 0);
203 (void)r;
204 }
205
~SigHandlerCoordinatormozilla::baseprofiler::SigHandlerCoordinator206 ~SigHandlerCoordinator() {
207 int r = sem_destroy(&mMessage2);
208 r |= sem_destroy(&mMessage3);
209 r |= sem_destroy(&mMessage4);
210 MOZ_ASSERT(r == 0);
211 (void)r;
212 }
213
214 sem_t mMessage2; // To sampler: "context is in sSigHandlerCoordinator"
215 sem_t mMessage3; // To samplee: "resume"
216 sem_t mMessage4; // To sampler: "finished with sSigHandlerCoordinator"
217 ucontext_t mUContext; // Context at signal
218 };
219
220 struct SigHandlerCoordinator* Sampler::sSigHandlerCoordinator = nullptr;
221
SigprofHandler(int aSignal,siginfo_t * aInfo,void * aContext)222 static void SigprofHandler(int aSignal, siginfo_t* aInfo, void* aContext) {
223 // Avoid TSan warning about clobbering errno.
224 int savedErrno = errno;
225
226 MOZ_ASSERT(aSignal == SIGPROF);
227 MOZ_ASSERT(Sampler::sSigHandlerCoordinator);
228
229 // By sending us this signal, the sampler thread has sent us message 1 in
230 // the comment above, with the meaning "|sSigHandlerCoordinator| is ready
231 // for use, please copy your register context into it."
232 Sampler::sSigHandlerCoordinator->mUContext =
233 *static_cast<ucontext_t*>(aContext);
234
235 // Send message 2: tell the sampler thread that the context has been copied
236 // into |sSigHandlerCoordinator->mUContext|. sem_post can never fail by
237 // being interrupted by a signal, so there's no loop around this call.
238 int r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage2);
239 MOZ_ASSERT(r == 0);
240
241 // At this point, the sampler thread assumes we are suspended, so we must
242 // not touch any global state here.
243
244 // Wait for message 3: the sampler thread tells us to resume.
245 while (true) {
246 r = sem_wait(&Sampler::sSigHandlerCoordinator->mMessage3);
247 if (r == -1 && errno == EINTR) {
248 // Interrupted by a signal. Try again.
249 continue;
250 }
251 // We don't expect any other kind of failure
252 MOZ_ASSERT(r == 0);
253 break;
254 }
255
256 // Send message 4: tell the sampler thread that we are finished accessing
257 // |sSigHandlerCoordinator|. After this point it is not safe to touch
258 // |sSigHandlerCoordinator|.
259 r = sem_post(&Sampler::sSigHandlerCoordinator->mMessage4);
260 MOZ_ASSERT(r == 0);
261
262 errno = savedErrno;
263 }
264
Sampler(PSLockRef aLock)265 Sampler::Sampler(PSLockRef aLock) : mMyPid(profiler_current_process_id()) {
266 #if defined(USE_EHABI_STACKWALK)
267 EHABIStackWalkInit();
268 #endif
269
270 // NOTE: We don't initialize LUL here, instead initializing it in
271 // SamplerThread's constructor. This is because with the
272 // profiler_suspend_and_sample_thread entry point, we want to be able to
273 // sample without waiting for LUL to be initialized.
274
275 // Request profiling signals.
276 struct sigaction sa;
277 sa.sa_sigaction = SigprofHandler;
278 sigemptyset(&sa.sa_mask);
279 sa.sa_flags = SA_RESTART | SA_SIGINFO;
280 if (sigaction(SIGPROF, &sa, &mOldSigprofHandler) != 0) {
281 MOZ_CRASH("Error installing SIGPROF handler in the profiler");
282 }
283 }
284
Disable(PSLockRef aLock)285 void Sampler::Disable(PSLockRef aLock) {
286 // Restore old signal handler. This is global state so it's important that
287 // we do it now, while gPSMutex is locked.
288 sigaction(SIGPROF, &mOldSigprofHandler, 0);
289 }
290
291 template <typename Func>
SuspendAndSampleAndResumeThread(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const TimeStamp & aNow,const Func & aProcessRegs)292 void Sampler::SuspendAndSampleAndResumeThread(
293 PSLockRef aLock, const RegisteredThread& aRegisteredThread,
294 const TimeStamp& aNow, const Func& aProcessRegs) {
295 // Only one sampler thread can be sampling at once. So we expect to have
296 // complete control over |sSigHandlerCoordinator|.
297 MOZ_ASSERT(!sSigHandlerCoordinator);
298
299 if (!mSamplerTid.IsSpecified()) {
300 mSamplerTid = profiler_current_thread_id();
301 }
302 BaseProfilerThreadId sampleeTid = aRegisteredThread.Info()->ThreadId();
303 MOZ_RELEASE_ASSERT(sampleeTid != mSamplerTid);
304
305 //----------------------------------------------------------------//
306 // Suspend the samplee thread and get its context.
307
308 SigHandlerCoordinator coord; // on sampler thread's stack
309 sSigHandlerCoordinator = &coord;
310
311 // Send message 1 to the samplee (the thread to be sampled), by
312 // signalling at it.
313 // This could fail if the thread doesn't exist anymore.
314 int r = tgkill(mMyPid.ToNumber(), sampleeTid.ToNumber(), SIGPROF);
315 if (r == 0) {
316 // Wait for message 2 from the samplee, indicating that the context
317 // is available and that the thread is suspended.
318 while (true) {
319 r = sem_wait(&sSigHandlerCoordinator->mMessage2);
320 if (r == -1 && errno == EINTR) {
321 // Interrupted by a signal. Try again.
322 continue;
323 }
324 // We don't expect any other kind of failure.
325 MOZ_ASSERT(r == 0);
326 break;
327 }
328
329 //----------------------------------------------------------------//
330 // Sample the target thread.
331
332 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
333 //
334 // The profiler's "critical section" begins here. In the critical section,
335 // we must not do any dynamic memory allocation, nor try to acquire any lock
336 // or any other unshareable resource. This is because the thread to be
337 // sampled has been suspended at some entirely arbitrary point, and we have
338 // no idea which unsharable resources (locks, essentially) it holds. So any
339 // attempt to acquire any lock, including the implied locks used by the
340 // malloc implementation, risks deadlock. This includes TimeStamp::Now(),
341 // which gets a lock on Windows.
342
343 // The samplee thread is now frozen and sSigHandlerCoordinator->mUContext is
344 // valid. We can poke around in it and unwind its stack as we like.
345
346 // Extract the current register values.
347 Registers regs;
348 PopulateRegsFromContext(regs, &sSigHandlerCoordinator->mUContext);
349 aProcessRegs(regs, aNow);
350
351 //----------------------------------------------------------------//
352 // Resume the target thread.
353
354 // Send message 3 to the samplee, which tells it to resume.
355 r = sem_post(&sSigHandlerCoordinator->mMessage3);
356 MOZ_ASSERT(r == 0);
357
358 // Wait for message 4 from the samplee, which tells us that it has
359 // finished with |sSigHandlerCoordinator|.
360 while (true) {
361 r = sem_wait(&sSigHandlerCoordinator->mMessage4);
362 if (r == -1 && errno == EINTR) {
363 continue;
364 }
365 MOZ_ASSERT(r == 0);
366 break;
367 }
368
369 // The profiler's critical section ends here. After this point, none of the
370 // critical section limitations documented above apply.
371 //
372 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
373 }
374
375 // This isn't strictly necessary, but doing so does help pick up anomalies
376 // in which the signal handler is running when it shouldn't be.
377 sSigHandlerCoordinator = nullptr;
378 }
379
380 // END Sampler target specifics
381 ////////////////////////////////////////////////////////////////////////
382
383 ////////////////////////////////////////////////////////////////////////
384 // BEGIN SamplerThread target specifics
385
ThreadEntry(void * aArg)386 static void* ThreadEntry(void* aArg) {
387 auto thread = static_cast<SamplerThread*>(aArg);
388 thread->Run();
389 return nullptr;
390 }
391
SamplerThread(PSLockRef aLock,uint32_t aActivityGeneration,double aIntervalMilliseconds,uint32_t aFeatures)392 SamplerThread::SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
393 double aIntervalMilliseconds, uint32_t aFeatures)
394 : mSampler(aLock),
395 mActivityGeneration(aActivityGeneration),
396 mIntervalMicroseconds(
397 std::max(1, int(floor(aIntervalMilliseconds * 1000 + 0.5)))) {
398 #if defined(USE_LUL_STACKWALK)
399 lul::LUL* lul = CorePS::Lul(aLock);
400 if (!lul && ProfilerFeature::HasStackWalkEnabled(aFeatures)) {
401 CorePS::SetLul(aLock, MakeUnique<lul::LUL>(logging_sink_for_LUL));
402 // Read all the unwind info currently available.
403 lul = CorePS::Lul(aLock);
404 read_procmaps(lul);
405
406 // Switch into unwind mode. After this point, we can't add or remove any
407 // unwind info to/from this LUL instance. The only thing we can do with
408 // it is Unwind() calls.
409 lul->EnableUnwinding();
410
411 // Has a test been requested?
412 if (getenv("MOZ_PROFILER_LUL_TEST")) {
413 int nTests = 0, nTestsPassed = 0;
414 RunLulUnitTests(&nTests, &nTestsPassed, lul);
415 }
416 }
417 #endif
418
419 // Start the sampling thread. It repeatedly sends a SIGPROF signal. Sending
420 // the signal ourselves instead of relying on itimer provides much better
421 // accuracy.
422 if (pthread_create(&mThread, nullptr, ThreadEntry, this) != 0) {
423 MOZ_CRASH("pthread_create failed");
424 }
425 }
426
~SamplerThread()427 SamplerThread::~SamplerThread() { pthread_join(mThread, nullptr); }
428
SleepMicro(uint32_t aMicroseconds)429 void SamplerThread::SleepMicro(uint32_t aMicroseconds) {
430 if (aMicroseconds >= 1000000) {
431 // Use usleep for larger intervals, because the nanosleep
432 // code below only supports intervals < 1 second.
433 MOZ_ALWAYS_TRUE(!::usleep(aMicroseconds));
434 return;
435 }
436
437 struct timespec ts;
438 ts.tv_sec = 0;
439 ts.tv_nsec = aMicroseconds * 1000UL;
440
441 int rv = ::nanosleep(&ts, &ts);
442
443 while (rv != 0 && errno == EINTR) {
444 // Keep waiting in case of interrupt.
445 // nanosleep puts the remaining time back into ts.
446 rv = ::nanosleep(&ts, &ts);
447 }
448
449 MOZ_ASSERT(!rv, "nanosleep call failed");
450 }
451
Stop(PSLockRef aLock)452 void SamplerThread::Stop(PSLockRef aLock) {
453 // Restore old signal handler. This is global state so it's important that
454 // we do it now, while gPSMutex is locked. It's safe to do this now even
455 // though this SamplerThread is still alive, because the next time the main
456 // loop of Run() iterates it won't get past the mActivityGeneration check,
457 // and so won't send any signals.
458 mSampler.Disable(aLock);
459 }
460
461 // END SamplerThread target specifics
462 ////////////////////////////////////////////////////////////////////////
463
464 #if defined(GP_OS_linux) || defined(GP_OS_freebsd)
465
466 // We use pthread_atfork() to temporarily disable signal delivery during any
467 // fork() call. Without that, fork() can be repeatedly interrupted by signal
468 // delivery, requiring it to be repeatedly restarted, which can lead to *long*
469 // delays. See bug 837390.
470 //
471 // We provide no paf_child() function to run in the child after forking. This
472 // is fine because we always immediately exec() after fork(), and exec()
473 // clobbers all process state. Also, we don't want the sampler to resume in the
474 // child process between fork() and exec(), it would be wasteful.
475 //
476 // Unfortunately all this is only doable on non-Android because Bionic doesn't
477 // have pthread_atfork.
478
479 // In the parent, before the fork, increase gSkipSampling to ensure that
480 // profiler sampling loops will be skipped. There could be one in progress now,
481 // causing a small delay, but further sampling will be skipped, allowing `fork`
482 // to complete.
paf_prepare()483 static void paf_prepare() { ++gSkipSampling; }
484
485 // In the parent, after the fork, decrease gSkipSampling to let the sampler
486 // resume sampling (unless other places have made it non-zero as well).
paf_parent()487 static void paf_parent() { --gSkipSampling; }
488
PlatformInit(PSLockRef aLock)489 static void PlatformInit(PSLockRef aLock) {
490 // Set up the fork handlers.
491 pthread_atfork(paf_prepare, paf_parent, nullptr);
492 }
493
494 #else
495
PlatformInit(PSLockRef aLock)496 static void PlatformInit(PSLockRef aLock) {}
497
498 #endif
499
500 #if defined(HAVE_NATIVE_UNWIND)
501 // Context used by synchronous samples. It's safe to have a single one because
502 // only one synchronous sample can be taken at a time (due to
503 // profiler_get_backtrace()'s PSAutoLock).
504 // ucontext_t sSyncUContext;
505
SyncPopulate()506 void Registers::SyncPopulate() {
507 // TODO port getcontext from breakpad, if profiler_get_backtrace is needed.
508 MOZ_CRASH("profiler_get_backtrace() unsupported");
509 // if (!getcontext(&sSyncUContext)) {
510 // PopulateRegsFromContext(*this, &sSyncUContext);
511 // }
512 }
513 #endif
514
515 } // namespace baseprofiler
516 } // namespace mozilla
517