1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 // There are three kinds of samples done by the profiler.
8 //
9 // - A "periodic" sample is the most complex kind. It is done in response to a
10 // timer while the profiler is active. It involves writing a stack trace plus
11 // a variety of other values (memory measurements, responsiveness
12 // measurements, markers, etc.) into the main ProfileBuffer. The sampling is
13 // done from off-thread, and so SuspendAndSampleAndResumeThread() is used to
14 // get the register values.
15 //
16 // - A "synchronous" sample is a simpler kind. It is done in response to an API
17 // call (profiler_get_backtrace()). It involves writing a stack trace and
18 // little else into a temporary ProfileBuffer, and wrapping that up in a
19 // ProfilerBacktrace that can be subsequently used in a marker. The sampling
20 // is done on-thread, and so Registers::SyncPopulate() is used to get the
21 // register values.
22 //
23 // - A "backtrace" sample is the simplest kind. It is done in response to an
24 // API call (profiler_suspend_and_sample_thread()). It involves getting a
25 // stack trace via a ProfilerStackCollector; it does not write to a
26 // ProfileBuffer. The sampling is done from off-thread, and so uses
27 // SuspendAndSampleAndResumeThread() to get the register values.
28
29 #include "platform.h"
30
31 #include <algorithm>
32 #include <errno.h>
33 #include <fstream>
34 #include <ostream>
35 #include <sstream>
36
37 // #include "memory_hooks.h"
38 #include "mozilla/ArrayUtils.h"
39 #include "mozilla/Atomics.h"
40 #include "mozilla/AutoProfilerLabel.h"
41 #include "mozilla/BaseProfilerDetail.h"
42 #include "mozilla/DoubleConversion.h"
43 #include "mozilla/Printf.h"
44 #include "mozilla/ProfileBufferChunkManagerSingle.h"
45 #include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h"
46 #include "mozilla/ProfileChunkedBuffer.h"
47 #include "mozilla/Services.h"
48 #include "mozilla/Span.h"
49 #include "mozilla/StackWalk.h"
50 #include "mozilla/StaticPtr.h"
51 #include "mozilla/ThreadLocal.h"
52 #include "mozilla/TimeStamp.h"
53 #include "mozilla/Tuple.h"
54 #include "mozilla/UniquePtr.h"
55 #include "mozilla/Vector.h"
56 #include "prdtoa.h"
57 #include "prtime.h"
58
59 #include "BaseProfiler.h"
60 #include "PageInformation.h"
61 #include "ProfiledThreadData.h"
62 #include "ProfilerBacktrace.h"
63 #include "ProfileBuffer.h"
64 #include "BaseProfilerMarkerPayload.h"
65 #include "RegisteredThread.h"
66 #include "BaseProfilerSharedLibraries.h"
67 #include "ThreadInfo.h"
68 #include "VTuneProfiler.h"
69
70 // Win32 builds always have frame pointers, so FramePointerStackWalk() always
71 // works.
72 #if defined(GP_PLAT_x86_windows)
73 # define HAVE_NATIVE_UNWIND
74 # define USE_FRAME_POINTER_STACK_WALK
75 #endif
76
77 // Win64 builds always omit frame pointers, so we use the slower
78 // MozStackWalk(), which works in that case.
79 #if defined(GP_PLAT_amd64_windows)
80 # define HAVE_NATIVE_UNWIND
81 # define USE_MOZ_STACK_WALK
82 #endif
83
84 // AArch64 Win64 doesn't seem to use frame pointers, so we use the slower
85 // MozStackWalk().
86 #if defined(GP_PLAT_arm64_windows)
87 # define HAVE_NATIVE_UNWIND
88 # define USE_MOZ_STACK_WALK
89 #endif
90
91 // Mac builds only have frame pointers when MOZ_PROFILING is specified, so
92 // FramePointerStackWalk() only works in that case. We don't use MozStackWalk()
93 // on Mac.
94 #if defined(GP_OS_darwin) && defined(MOZ_PROFILING)
95 # define HAVE_NATIVE_UNWIND
96 # define USE_FRAME_POINTER_STACK_WALK
97 #endif
98
99 // Android builds use the ARM Exception Handling ABI to unwind.
100 #if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
101 # define HAVE_NATIVE_UNWIND
102 # define USE_EHABI_STACKWALK
103 # include "EHABIStackWalk.h"
104 #endif
105
106 // Linux/BSD builds use LUL, which uses DWARF info to unwind stacks.
107 #if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
108 defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
109 defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) || \
110 defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \
111 defined(GP_PLAT_arm64_freebsd)
112 # define HAVE_NATIVE_UNWIND
113 # define USE_LUL_STACKWALK
114 # include "lul/LulMain.h"
115 # include "lul/platform-linux-lul.h"
116
117 // On linux we use LUL for periodic samples and synchronous samples, but we use
118 // FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled.
119 // (See the comment at the top of the file for a definition of
120 // periodic/synchronous/backtrace.).
121 //
122 // FramePointerStackWalk can produce incomplete stacks when the current entry is
123 // in a shared library without framepointers, however LUL can take a long time
124 // to initialize, which is undesirable for consumers of
125 // profiler_suspend_and_sample_thread like the Background Hang Reporter.
126 # if defined(MOZ_PROFILING)
127 # define USE_FRAME_POINTER_STACK_WALK
128 # endif
129 #endif
130
131 // We can only stackwalk without expensive initialization on platforms which
132 // support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires
133 // initializing LUL, and EHABIStackWalk requires initializing EHABI, both of
134 // which can be expensive.
135 #if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
136 # define HAVE_FASTINIT_NATIVE_UNWIND
137 #endif
138
139 #ifdef MOZ_VALGRIND
140 # include <valgrind/memcheck.h>
141 #else
142 # define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((void)0)
143 #endif
144
145 #if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
146 # include <ucontext.h>
147 #endif
148
149 namespace mozilla {
150 namespace baseprofiler {
151
152 using detail::RacyFeatures;
153
LogTest(int aLevelToTest)154 bool LogTest(int aLevelToTest) {
155 static const int maxLevel =
156 getenv("MOZ_BASE_PROFILER_VERBOSE_LOGGING")
157 ? 5
158 : getenv("MOZ_BASE_PROFILER_DEBUG_LOGGING")
159 ? 4
160 : getenv("MOZ_BASE_PROFILER_LOGGING") ? 3 : 0;
161 return aLevelToTest <= maxLevel;
162 }
163
PrintToConsole(const char * aFmt,...)164 void PrintToConsole(const char* aFmt, ...) {
165 va_list args;
166 va_start(args, aFmt);
167 #if defined(ANDROID)
168 __android_log_vprint(ANDROID_LOG_INFO, "Gecko", aFmt, args);
169 #else
170 vfprintf(stderr, aFmt, args);
171 #endif
172 va_end(args);
173 }
174
ValidateFeatures()175 constexpr static bool ValidateFeatures() {
176 int expectedFeatureNumber = 0;
177
178 // Feature numbers should start at 0 and increase by 1 each.
179 #define CHECK_FEATURE(n_, str_, Name_, desc_) \
180 if ((n_) != expectedFeatureNumber) { \
181 return false; \
182 } \
183 ++expectedFeatureNumber;
184
185 BASE_PROFILER_FOR_EACH_FEATURE(CHECK_FEATURE)
186
187 #undef CHECK_FEATURE
188
189 return true;
190 }
191
192 static_assert(ValidateFeatures(), "Feature list is invalid");
193
194 // Return all features that are available on this platform.
AvailableFeatures()195 static uint32_t AvailableFeatures() {
196 uint32_t features = 0;
197
198 #define ADD_FEATURE(n_, str_, Name_, desc_) \
199 ProfilerFeature::Set##Name_(features);
200
201 // Add all the possible features.
202 BASE_PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
203
204 #undef ADD_FEATURE
205
206 // Now remove features not supported on this platform/configuration.
207 ProfilerFeature::ClearJava(features);
208 ProfilerFeature::ClearJS(features);
209 ProfilerFeature::ClearScreenshots(features);
210 #if !defined(HAVE_NATIVE_UNWIND)
211 ProfilerFeature::ClearStackWalk(features);
212 #endif
213 ProfilerFeature::ClearTaskTracer(features);
214 ProfilerFeature::ClearJSTracer(features);
215
216 return features;
217 }
218
219 // Default features common to all contexts (even if not available).
DefaultFeatures()220 static uint32_t DefaultFeatures() {
221 return ProfilerFeature::Java | ProfilerFeature::JS | ProfilerFeature::Leaf |
222 ProfilerFeature::StackWalk | ProfilerFeature::Threads;
223 }
224
225 // Extra default features when MOZ_PROFILER_STARTUP is set (even if not
226 // available).
StartupExtraDefaultFeatures()227 static uint32_t StartupExtraDefaultFeatures() {
228 // Enable mainthreadio by default for startup profiles as startup is heavy on
229 // I/O operations, and main thread I/O is really important to see there.
230 return ProfilerFeature::MainThreadIO;
231 }
232
233 // The auto-lock/unlock mutex that guards accesses to CorePS and ActivePS.
234 // Use `PSAutoLock lock;` to take the lock until the end of the enclosing block.
235 // External profilers may use this same lock for their own data, but as the lock
236 // is non-recursive, *only* `f(PSLockRef, ...)` functions below should be
237 // called, to avoid double-locking.
238 class MOZ_RAII PSAutoLock {
239 public:
PSAutoLock()240 PSAutoLock() { gPSMutex.Lock(); }
241
~PSAutoLock()242 ~PSAutoLock() { gPSMutex.Unlock(); }
243
244 PSAutoLock(const PSAutoLock&) = delete;
245 void operator=(const PSAutoLock&) = delete;
246
IsLockedOnCurrentThread()247 [[nodiscard]] static bool IsLockedOnCurrentThread() {
248 return gPSMutex.IsLockedOnCurrentThread();
249 }
250
251 private:
252 static detail::BaseProfilerMutex gPSMutex;
253 };
254
255 detail::BaseProfilerMutex PSAutoLock::gPSMutex;
256
257 // Only functions that take a PSLockRef arg can access CorePS's and ActivePS's
258 // fields.
259 typedef const PSAutoLock& PSLockRef;
260
261 #define PS_GET(type_, name_) \
262 static type_ name_(PSLockRef) { \
263 MOZ_ASSERT(sInstance); \
264 return sInstance->m##name_; \
265 }
266
267 #define PS_GET_LOCKLESS(type_, name_) \
268 static type_ name_() { \
269 MOZ_ASSERT(sInstance); \
270 return sInstance->m##name_; \
271 }
272
273 #define PS_GET_AND_SET(type_, name_) \
274 PS_GET(type_, name_) \
275 static void Set##name_(PSLockRef, type_ a##name_) { \
276 MOZ_ASSERT(sInstance); \
277 sInstance->m##name_ = a##name_; \
278 }
279
280 // All functions in this file can run on multiple threads unless they have an
281 // NS_IsMainThread() assertion.
282
283 // This class contains the profiler's core global state, i.e. that which is
284 // valid even when the profiler is not active. Most profile operations can't do
285 // anything useful when this class is not instantiated, so we release-assert
286 // its non-nullness in all such operations.
287 //
288 // Accesses to CorePS are guarded by gPSMutex. Getters and setters take a
289 // PSAutoLock reference as an argument as proof that the gPSMutex is currently
290 // locked. This makes it clear when gPSMutex is locked and helps avoid
291 // accidental unlocked accesses to global state. There are ways to circumvent
292 // this mechanism, but please don't do so without *very* good reason and a
293 // detailed explanation.
294 //
295 // The exceptions to this rule:
296 //
297 // - mProcessStartTime, because it's immutable;
298 //
299 // - each thread's RacyRegisteredThread object is accessible without locking via
300 // TLSRegisteredThread::RacyRegisteredThread().
301 class CorePS {
302 private:
CorePS()303 CorePS()
304 : mMainThreadId(profiler_current_thread_id()),
305 mProcessStartTime(TimeStamp::ProcessCreation()),
306 // This needs its own mutex, because it is used concurrently from
307 // functions guarded by gPSMutex as well as others without safety (e.g.,
308 // profiler_add_marker). It is *not* used inside the critical section of
309 // the sampler, because mutexes cannot be used there.
310 mCoreBuffer(ProfileChunkedBuffer::ThreadSafety::WithMutex)
311 #ifdef USE_LUL_STACKWALK
312 ,
313 mLul(nullptr)
314 #endif
315 {
316 }
317
~CorePS()318 ~CorePS() {}
319
320 public:
Create(PSLockRef aLock)321 static void Create(PSLockRef aLock) {
322 MOZ_ASSERT(!sInstance);
323 sInstance = new CorePS();
324 }
325
Destroy(PSLockRef aLock)326 static void Destroy(PSLockRef aLock) {
327 MOZ_ASSERT(sInstance);
328 delete sInstance;
329 sInstance = nullptr;
330 }
331
332 // Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex
333 // being locked. This is because CorePS is instantiated so early on the main
334 // thread that we don't have to worry about it being racy.
Exists()335 static bool Exists() { return !!sInstance; }
336
IsMainThread()337 static bool IsMainThread() {
338 MOZ_ASSERT(sInstance);
339 return profiler_current_thread_id() == sInstance->mMainThreadId;
340 }
341
AddSizeOf(PSLockRef,MallocSizeOf aMallocSizeOf,size_t & aProfSize,size_t & aLulSize)342 static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf,
343 size_t& aProfSize, size_t& aLulSize) {
344 MOZ_ASSERT(sInstance);
345
346 aProfSize += aMallocSizeOf(sInstance);
347
348 for (auto& registeredThread : sInstance->mRegisteredThreads) {
349 aProfSize += registeredThread->SizeOfIncludingThis(aMallocSizeOf);
350 }
351
352 for (auto& registeredPage : sInstance->mRegisteredPages) {
353 aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
354 }
355
356 // Measurement of the following things may be added later if DMD finds it
357 // is worthwhile:
358 // - CorePS::mRegisteredThreads itself (its elements' children are
359 // measured above)
360 // - CorePS::mRegisteredPages itself (its elements' children are
361 // measured above)
362 // - CorePS::mInterposeObserver
363
364 #if defined(USE_LUL_STACKWALK)
365 if (sInstance->mLul) {
366 aLulSize += sInstance->mLul->SizeOfIncludingThis(aMallocSizeOf);
367 }
368 #endif
369 }
370
371 // No PSLockRef is needed for this field because it's immutable.
PS_GET_LOCKLESS(int,MainThreadId)372 PS_GET_LOCKLESS(int, MainThreadId)
373
374 // No PSLockRef is needed for this field because it's immutable.
375 PS_GET_LOCKLESS(TimeStamp, ProcessStartTime)
376
377 // No PSLockRef is needed for this field because it's thread-safe.
378 PS_GET_LOCKLESS(ProfileChunkedBuffer&, CoreBuffer)
379
380 PS_GET(const Vector<UniquePtr<RegisteredThread>>&, RegisteredThreads)
381
382 static void AppendRegisteredThread(
383 PSLockRef, UniquePtr<RegisteredThread>&& aRegisteredThread) {
384 MOZ_ASSERT(sInstance);
385 MOZ_RELEASE_ASSERT(
386 sInstance->mRegisteredThreads.append(std::move(aRegisteredThread)));
387 }
388
RemoveRegisteredThread(PSLockRef,RegisteredThread * aRegisteredThread)389 static void RemoveRegisteredThread(PSLockRef,
390 RegisteredThread* aRegisteredThread) {
391 MOZ_ASSERT(sInstance);
392 // Remove aRegisteredThread from mRegisteredThreads.
393 for (UniquePtr<RegisteredThread>& rt : sInstance->mRegisteredThreads) {
394 if (rt.get() == aRegisteredThread) {
395 sInstance->mRegisteredThreads.erase(&rt);
396 return;
397 }
398 }
399 }
400
PS_GET(Vector<RefPtr<PageInformation>> &,RegisteredPages)401 PS_GET(Vector<RefPtr<PageInformation>>&, RegisteredPages)
402
403 static void AppendRegisteredPage(PSLockRef,
404 RefPtr<PageInformation>&& aRegisteredPage) {
405 MOZ_ASSERT(sInstance);
406 struct RegisteredPageComparator {
407 PageInformation* aA;
408 bool operator()(PageInformation* aB) const { return aA->Equals(aB); }
409 };
410
411 auto foundPageIter = std::find_if(
412 sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(),
413 RegisteredPageComparator{aRegisteredPage.get()});
414
415 if (foundPageIter != sInstance->mRegisteredPages.end()) {
416 if ((*foundPageIter)->Url() == "about:blank") {
417 // When a BrowsingContext is loaded, the first url loaded in it will be
418 // about:blank, and if the principal matches, the first document loaded
419 // in it will share an inner window. That's why we should delete the
420 // intermittent about:blank if they share the inner window.
421 sInstance->mRegisteredPages.erase(foundPageIter);
422 } else {
423 // Do not register the same page again.
424 return;
425 }
426 }
427 MOZ_RELEASE_ASSERT(
428 sInstance->mRegisteredPages.append(std::move(aRegisteredPage)));
429 }
430
RemoveRegisteredPage(PSLockRef,uint64_t aRegisteredInnerWindowID)431 static void RemoveRegisteredPage(PSLockRef,
432 uint64_t aRegisteredInnerWindowID) {
433 MOZ_ASSERT(sInstance);
434 // Remove RegisteredPage from mRegisteredPages by given inner window ID.
435 sInstance->mRegisteredPages.eraseIf([&](const RefPtr<PageInformation>& rd) {
436 return rd->InnerWindowID() == aRegisteredInnerWindowID;
437 });
438 }
439
ClearRegisteredPages(PSLockRef)440 static void ClearRegisteredPages(PSLockRef) {
441 MOZ_ASSERT(sInstance);
442 sInstance->mRegisteredPages.clear();
443 }
444
PS_GET(const Vector<BaseProfilerCount * > &,Counters)445 PS_GET(const Vector<BaseProfilerCount*>&, Counters)
446
447 static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) {
448 MOZ_ASSERT(sInstance);
449 // we don't own the counter; they may be stored in static objects
450 MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter));
451 }
452
RemoveCounter(PSLockRef,BaseProfilerCount * aCounter)453 static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) {
454 // we may be called to remove a counter after the profiler is stopped or
455 // late in shutdown.
456 if (sInstance) {
457 auto* counter = std::find(sInstance->mCounters.begin(),
458 sInstance->mCounters.end(), aCounter);
459 MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end());
460 sInstance->mCounters.erase(counter);
461 }
462 }
463
464 #ifdef USE_LUL_STACKWALK
Lul(PSLockRef)465 static lul::LUL* Lul(PSLockRef) {
466 MOZ_ASSERT(sInstance);
467 return sInstance->mLul.get();
468 }
SetLul(PSLockRef,UniquePtr<lul::LUL> aLul)469 static void SetLul(PSLockRef, UniquePtr<lul::LUL> aLul) {
470 MOZ_ASSERT(sInstance);
471 sInstance->mLul = std::move(aLul);
472 }
473 #endif
474
475 PS_GET_AND_SET(const std::string&, ProcessName)
476
477 private:
478 // The singleton instance
479 static CorePS* sInstance;
480
481 // ID of the main thread (assuming CorePS was started on the main thread).
482 const int mMainThreadId;
483
484 // The time that the process started.
485 const TimeStamp mProcessStartTime;
486
487 // The thread-safe blocks-oriented buffer into which all profiling data is
488 // recorded.
489 // ActivePS controls the lifetime of the underlying contents buffer: When
490 // ActivePS does not exist, mCoreBuffer is empty and rejects all reads&writes;
491 // see ActivePS for further details.
492 // Note: This needs to live here outside of ActivePS, because some producers
493 // are indirectly controlled (e.g., by atomic flags) and therefore may still
494 // attempt to write some data shortly after ActivePS has shutdown and deleted
495 // the underlying buffer in memory.
496 ProfileChunkedBuffer mCoreBuffer;
497
498 // Info on all the registered threads.
499 // ThreadIds in mRegisteredThreads are unique.
500 Vector<UniquePtr<RegisteredThread>> mRegisteredThreads;
501
502 // Info on all the registered pages.
503 // InnerWindowIDs in mRegisteredPages are unique.
504 Vector<RefPtr<PageInformation>> mRegisteredPages;
505
506 // Non-owning pointers to all active counters
507 Vector<BaseProfilerCount*> mCounters;
508
509 #ifdef USE_LUL_STACKWALK
510 // LUL's state. Null prior to the first activation, non-null thereafter.
511 UniquePtr<lul::LUL> mLul;
512 #endif
513
514 // Process name, provided by child process initialization code.
515 std::string mProcessName;
516 };
517
518 CorePS* CorePS::sInstance = nullptr;
519
520 class SamplerThread;
521
522 static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
523 double aInterval);
524
525 struct LiveProfiledThreadData {
526 RegisteredThread* mRegisteredThread;
527 UniquePtr<ProfiledThreadData> mProfiledThreadData;
528 };
529
530 // The buffer size is provided as a number of "entries", this is their size in
531 // bytes.
532 constexpr static uint32_t scBytesPerEntry = 8;
533
534 // Expected maximum size needed to store one stack sample.
535 constexpr static uint32_t scExpectedMaximumStackSize = 64 * 1024;
536
537 // This class contains the profiler's global state that is valid only when the
538 // profiler is active. When not instantiated, the profiler is inactive.
539 //
540 // Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as
541 // CorePS.
542 //
543 class ActivePS {
544 private:
545 // We need to decide how many chunks of what size we want to fit in the given
546 // total maximum capacity for this process, in the (likely) context of
547 // multiple processes doing the same choice and having an inter-process
548 // mechanism to control the overal memory limit.
549
550 // Minimum chunk size allowed, enough for at least one stack.
551 constexpr static uint32_t scMinimumChunkSize = 2 * scExpectedMaximumStackSize;
552
553 // Ideally we want at least 2 unreleased chunks to work with (1 current and 1
554 // next), and 2 released chunks (so that one can be recycled when old, leaving
555 // one with some data).
556 constexpr static uint32_t scMinimumNumberOfChunks = 4;
557
558 // And we want to limit chunks to a maximum size, which is a compromise
559 // between:
560 // - A big size, which helps with reducing the rate of allocations and IPCs.
561 // - A small size, which helps with equalizing the duration of recorded data
562 // (as the inter-process controller will discard the oldest chunks in all
563 // Firefox processes).
564 constexpr static uint32_t scMaximumChunkSize = 1024 * 1024;
565
566 public:
567 // We should be able to store at least the minimum number of the smallest-
568 // possible chunks.
569 constexpr static uint32_t scMinimumBufferSize =
570 scMinimumNumberOfChunks * scMinimumChunkSize;
571 constexpr static uint32_t scMinimumBufferEntries =
572 scMinimumBufferSize / scBytesPerEntry;
573
574 // Limit to 2GiB.
575 constexpr static uint32_t scMaximumBufferSize = 2u * 1024u * 1024u * 1024u;
576 constexpr static uint32_t scMaximumBufferEntries =
577 scMaximumBufferSize / scBytesPerEntry;
578
ClampToAllowedEntries(uint32_t aEntries)579 constexpr static uint32_t ClampToAllowedEntries(uint32_t aEntries) {
580 if (aEntries <= scMinimumBufferEntries) {
581 return scMinimumBufferEntries;
582 }
583 if (aEntries >= scMaximumBufferEntries) {
584 return scMaximumBufferEntries;
585 }
586 return aEntries;
587 }
588
589 private:
ChunkSizeForEntries(uint32_t aEntries)590 constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) {
591 return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) *
592 scBytesPerEntry / scMinimumNumberOfChunks,
593 size_t(scMaximumChunkSize)));
594 }
595
AdjustFeatures(uint32_t aFeatures,uint32_t aFilterCount)596 static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) {
597 // Filter out any features unavailable in this platform/configuration.
598 aFeatures &= AvailableFeatures();
599
600 // Always enable ProfilerFeature::Threads if we have a filter, because
601 // users sometimes ask to filter by a list of threads but forget to
602 // explicitly specify ProfilerFeature::Threads.
603 if (aFilterCount > 0) {
604 aFeatures |= ProfilerFeature::Threads;
605 }
606
607 // Some features imply others.
608 if (aFeatures & ProfilerFeature::FileIOAll) {
609 aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO;
610 } else if (aFeatures & ProfilerFeature::FileIO) {
611 aFeatures |= ProfilerFeature::MainThreadIO;
612 }
613
614 return aFeatures;
615 }
616
ActivePS(PSLockRef aLock,PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,const Maybe<double> & aDuration)617 ActivePS(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
618 uint32_t aFeatures, const char** aFilters, uint32_t aFilterCount,
619 const Maybe<double>& aDuration)
620 : mGeneration(sNextGeneration++),
621 mCapacity(aCapacity),
622 mDuration(aDuration),
623 mInterval(aInterval),
624 mFeatures(AdjustFeatures(aFeatures, aFilterCount)),
625 mProfileBufferChunkManager(
626 size_t(ClampToAllowedEntries(aCapacity.Value())) * scBytesPerEntry,
627 ChunkSizeForEntries(aCapacity.Value())),
628 mProfileBuffer([this]() -> ProfileChunkedBuffer& {
629 CorePS::CoreBuffer().SetChunkManager(mProfileBufferChunkManager);
630 return CorePS::CoreBuffer();
631 }()),
632 // The new sampler thread doesn't start sampling immediately because the
633 // main loop within Run() is blocked until this function's caller
634 // unlocks gPSMutex.
635 mSamplerThread(NewSamplerThread(aLock, mGeneration, aInterval))
636 #undef HAS_FEATURE
637 ,
638 mIsPaused(false)
639 #if defined(GP_OS_linux) || defined(GP_OS_freebsd)
640 ,
641 mWasPaused(false)
642 #endif
643 {
644 // Deep copy aFilters.
645 MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
646 for (uint32_t i = 0; i < aFilterCount; ++i) {
647 mFilters[i] = aFilters[i];
648 }
649 }
650
~ActivePS()651 ~ActivePS() { CorePS::CoreBuffer().ResetChunkManager(); }
652
ThreadSelected(const char * aThreadName)653 bool ThreadSelected(const char* aThreadName) {
654 if (mFilters.empty()) {
655 return true;
656 }
657
658 std::string name = aThreadName;
659 std::transform(name.begin(), name.end(), name.begin(), ::tolower);
660
661 for (uint32_t i = 0; i < mFilters.length(); ++i) {
662 std::string filter = mFilters[i];
663
664 if (filter == "*") {
665 return true;
666 }
667
668 std::transform(filter.begin(), filter.end(), filter.begin(), ::tolower);
669
670 // Crude, non UTF-8 compatible, case insensitive substring search
671 if (name.find(filter) != std::string::npos) {
672 return true;
673 }
674
675 // If the filter starts with pid:, check for a pid match
676 if (filter.find("pid:") == 0) {
677 std::string mypid = std::to_string(profiler_current_process_id());
678 if (filter.compare(4, std::string::npos, mypid) == 0) {
679 return true;
680 }
681 }
682 }
683
684 return false;
685 }
686
687 public:
Create(PSLockRef aLock,PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,const Maybe<double> & aDuration)688 static void Create(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
689 uint32_t aFeatures, const char** aFilters,
690 uint32_t aFilterCount, const Maybe<double>& aDuration) {
691 MOZ_ASSERT(!sInstance);
692 sInstance = new ActivePS(aLock, aCapacity, aInterval, aFeatures, aFilters,
693 aFilterCount, aDuration);
694 }
695
Destroy(PSLockRef aLock)696 [[nodiscard]] static SamplerThread* Destroy(PSLockRef aLock) {
697 MOZ_ASSERT(sInstance);
698 auto samplerThread = sInstance->mSamplerThread;
699 delete sInstance;
700 sInstance = nullptr;
701
702 return samplerThread;
703 }
704
Exists(PSLockRef)705 static bool Exists(PSLockRef) { return !!sInstance; }
706
Equals(PSLockRef,PowerOfTwo32 aCapacity,const Maybe<double> & aDuration,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount)707 static bool Equals(PSLockRef, PowerOfTwo32 aCapacity,
708 const Maybe<double>& aDuration, double aInterval,
709 uint32_t aFeatures, const char** aFilters,
710 uint32_t aFilterCount) {
711 MOZ_ASSERT(sInstance);
712 if (sInstance->mCapacity != aCapacity ||
713 sInstance->mDuration != aDuration ||
714 sInstance->mInterval != aInterval ||
715 sInstance->mFeatures != aFeatures ||
716 sInstance->mFilters.length() != aFilterCount) {
717 return false;
718 }
719
720 for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) {
721 if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) {
722 return false;
723 }
724 }
725 return true;
726 }
727
SizeOf(PSLockRef,MallocSizeOf aMallocSizeOf)728 static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) {
729 MOZ_ASSERT(sInstance);
730
731 size_t n = aMallocSizeOf(sInstance);
732
733 n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf);
734
735 // Measurement of the following members may be added later if DMD finds it
736 // is worthwhile:
737 // - mLiveProfiledThreads (both the array itself, and the contents)
738 // - mDeadProfiledThreads (both the array itself, and the contents)
739 //
740
741 return n;
742 }
743
ShouldProfileThread(PSLockRef aLock,ThreadInfo * aInfo)744 static bool ShouldProfileThread(PSLockRef aLock, ThreadInfo* aInfo) {
745 MOZ_ASSERT(sInstance);
746 return ((aInfo->IsMainThread() || FeatureThreads(aLock)) &&
747 sInstance->ThreadSelected(aInfo->Name()));
748 }
749
PS_GET(uint32_t,Generation)750 PS_GET(uint32_t, Generation)
751
752 PS_GET(PowerOfTwo32, Capacity)
753
754 PS_GET(Maybe<double>, Duration)
755
756 PS_GET(double, Interval)
757
758 PS_GET(uint32_t, Features)
759
760 #define PS_GET_FEATURE(n_, str_, Name_, desc_) \
761 static bool Feature##Name_(PSLockRef) { \
762 MOZ_ASSERT(sInstance); \
763 return ProfilerFeature::Has##Name_(sInstance->mFeatures); \
764 }
765
766 BASE_PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE)
767
768 #undef PS_GET_FEATURE
769
770 PS_GET(const Vector<std::string>&, Filters)
771
772 static void FulfillChunkRequests(PSLockRef) {
773 MOZ_ASSERT(sInstance);
774 sInstance->mProfileBufferChunkManager.FulfillChunkRequests();
775 }
776
Buffer(PSLockRef)777 static ProfileBuffer& Buffer(PSLockRef) {
778 MOZ_ASSERT(sInstance);
779 return sInstance->mProfileBuffer;
780 }
781
LiveProfiledThreads(PSLockRef)782 static const Vector<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef) {
783 MOZ_ASSERT(sInstance);
784 return sInstance->mLiveProfiledThreads;
785 }
786
787 // Returns an array containing (RegisteredThread*, ProfiledThreadData*) pairs
788 // for all threads that should be included in a profile, both for threads
789 // that are still registered, and for threads that have been unregistered but
790 // still have data in the buffer.
791 // For threads that have already been unregistered, the RegisteredThread
792 // pointer will be null.
793 // The returned array is sorted by thread register time.
794 // Do not hold on to the return value across thread registration or profiler
795 // restarts.
796 static Vector<std::pair<RegisteredThread*, ProfiledThreadData*>>
ProfiledThreads(PSLockRef)797 ProfiledThreads(PSLockRef) {
798 MOZ_ASSERT(sInstance);
799 Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> array;
800 MOZ_RELEASE_ASSERT(
801 array.initCapacity(sInstance->mLiveProfiledThreads.length() +
802 sInstance->mDeadProfiledThreads.length()));
803 for (auto& t : sInstance->mLiveProfiledThreads) {
804 MOZ_RELEASE_ASSERT(array.append(
805 std::make_pair(t.mRegisteredThread, t.mProfiledThreadData.get())));
806 }
807 for (auto& t : sInstance->mDeadProfiledThreads) {
808 MOZ_RELEASE_ASSERT(
809 array.append(std::make_pair((RegisteredThread*)nullptr, t.get())));
810 }
811
812 std::sort(array.begin(), array.end(),
813 [](const std::pair<RegisteredThread*, ProfiledThreadData*>& a,
814 const std::pair<RegisteredThread*, ProfiledThreadData*>& b) {
815 return a.second->Info()->RegisterTime() <
816 b.second->Info()->RegisterTime();
817 });
818 return array;
819 }
820
ProfiledPages(PSLockRef aLock)821 static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) {
822 MOZ_ASSERT(sInstance);
823 Vector<RefPtr<PageInformation>> array;
824 for (auto& d : CorePS::RegisteredPages(aLock)) {
825 MOZ_RELEASE_ASSERT(array.append(d));
826 }
827 for (auto& d : sInstance->mDeadProfiledPages) {
828 MOZ_RELEASE_ASSERT(array.append(d));
829 }
830 // We don't need to sort the pages like threads since we won't show them
831 // as a list.
832 return array;
833 }
834
835 // Do a linear search through mLiveProfiledThreads to find the
836 // ProfiledThreadData object for a RegisteredThread.
GetProfiledThreadData(PSLockRef,RegisteredThread * aRegisteredThread)837 static ProfiledThreadData* GetProfiledThreadData(
838 PSLockRef, RegisteredThread* aRegisteredThread) {
839 MOZ_ASSERT(sInstance);
840 for (const LiveProfiledThreadData& thread :
841 sInstance->mLiveProfiledThreads) {
842 if (thread.mRegisteredThread == aRegisteredThread) {
843 return thread.mProfiledThreadData.get();
844 }
845 }
846 return nullptr;
847 }
848
AddLiveProfiledThread(PSLockRef,RegisteredThread * aRegisteredThread,UniquePtr<ProfiledThreadData> && aProfiledThreadData)849 static ProfiledThreadData* AddLiveProfiledThread(
850 PSLockRef, RegisteredThread* aRegisteredThread,
851 UniquePtr<ProfiledThreadData>&& aProfiledThreadData) {
852 MOZ_ASSERT(sInstance);
853 MOZ_RELEASE_ASSERT(
854 sInstance->mLiveProfiledThreads.append(LiveProfiledThreadData{
855 aRegisteredThread, std::move(aProfiledThreadData)}));
856
857 // Return a weak pointer to the ProfiledThreadData object.
858 return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get();
859 }
860
UnregisterThread(PSLockRef aLockRef,RegisteredThread * aRegisteredThread)861 static void UnregisterThread(PSLockRef aLockRef,
862 RegisteredThread* aRegisteredThread) {
863 MOZ_ASSERT(sInstance);
864
865 DiscardExpiredDeadProfiledThreads(aLockRef);
866
867 // Find the right entry in the mLiveProfiledThreads array and remove the
868 // element, moving the ProfiledThreadData object for the thread into the
869 // mDeadProfiledThreads array.
870 // The thread's RegisteredThread object gets destroyed here.
871 for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) {
872 LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i];
873 if (thread.mRegisteredThread == aRegisteredThread) {
874 thread.mProfiledThreadData->NotifyUnregistered(
875 sInstance->mProfileBuffer.BufferRangeEnd());
876 MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append(
877 std::move(thread.mProfiledThreadData)));
878 sInstance->mLiveProfiledThreads.erase(
879 &sInstance->mLiveProfiledThreads[i]);
880 return;
881 }
882 }
883 }
884
PS_GET_AND_SET(bool,IsPaused)885 PS_GET_AND_SET(bool, IsPaused)
886
887 #if defined(GP_OS_linux) || defined(GP_OS_freebsd)
888 PS_GET_AND_SET(bool, WasPaused)
889 #endif
890
891 static void DiscardExpiredDeadProfiledThreads(PSLockRef) {
892 MOZ_ASSERT(sInstance);
893 uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
894 // Discard any dead threads that were unregistered before bufferRangeStart.
895 sInstance->mDeadProfiledThreads.eraseIf(
896 [bufferRangeStart](
897 const UniquePtr<ProfiledThreadData>& aProfiledThreadData) {
898 Maybe<uint64_t> bufferPosition =
899 aProfiledThreadData->BufferPositionWhenUnregistered();
900 MOZ_RELEASE_ASSERT(bufferPosition,
901 "should have unregistered this thread");
902 return *bufferPosition < bufferRangeStart;
903 });
904 }
905
UnregisterPage(PSLockRef aLock,uint64_t aRegisteredInnerWindowID)906 static void UnregisterPage(PSLockRef aLock,
907 uint64_t aRegisteredInnerWindowID) {
908 MOZ_ASSERT(sInstance);
909 auto& registeredPages = CorePS::RegisteredPages(aLock);
910 for (size_t i = 0; i < registeredPages.length(); i++) {
911 RefPtr<PageInformation>& page = registeredPages[i];
912 if (page->InnerWindowID() == aRegisteredInnerWindowID) {
913 page->NotifyUnregistered(sInstance->mProfileBuffer.BufferRangeEnd());
914 MOZ_RELEASE_ASSERT(
915 sInstance->mDeadProfiledPages.append(std::move(page)));
916 registeredPages.erase(®isteredPages[i--]);
917 }
918 }
919 }
920
DiscardExpiredPages(PSLockRef)921 static void DiscardExpiredPages(PSLockRef) {
922 MOZ_ASSERT(sInstance);
923 uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
924 // Discard any dead pages that were unregistered before
925 // bufferRangeStart.
926 sInstance->mDeadProfiledPages.eraseIf(
927 [bufferRangeStart](const RefPtr<PageInformation>& aProfiledPage) {
928 Maybe<uint64_t> bufferPosition =
929 aProfiledPage->BufferPositionWhenUnregistered();
930 MOZ_RELEASE_ASSERT(bufferPosition,
931 "should have unregistered this page");
932 return *bufferPosition < bufferRangeStart;
933 });
934 }
935
ClearUnregisteredPages(PSLockRef)936 static void ClearUnregisteredPages(PSLockRef) {
937 MOZ_ASSERT(sInstance);
938 sInstance->mDeadProfiledPages.clear();
939 }
940
ClearExpiredExitProfiles(PSLockRef)941 static void ClearExpiredExitProfiles(PSLockRef) {
942 MOZ_ASSERT(sInstance);
943 uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
944 // Discard exit profiles that were gathered before our buffer RangeStart.
945 sInstance->mExitProfiles.eraseIf(
946 [bufferRangeStart](const ExitProfile& aExitProfile) {
947 return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart;
948 });
949 }
950
AddExitProfile(PSLockRef aLock,const std::string & aExitProfile)951 static void AddExitProfile(PSLockRef aLock, const std::string& aExitProfile) {
952 MOZ_ASSERT(sInstance);
953
954 ClearExpiredExitProfiles(aLock);
955
956 MOZ_RELEASE_ASSERT(sInstance->mExitProfiles.append(
957 ExitProfile{aExitProfile, sInstance->mProfileBuffer.BufferRangeEnd()}));
958 }
959
MoveExitProfiles(PSLockRef aLock)960 static Vector<std::string> MoveExitProfiles(PSLockRef aLock) {
961 MOZ_ASSERT(sInstance);
962
963 ClearExpiredExitProfiles(aLock);
964
965 Vector<std::string> profiles;
966 MOZ_RELEASE_ASSERT(
967 profiles.initCapacity(sInstance->mExitProfiles.length()));
968 for (auto& profile : sInstance->mExitProfiles) {
969 MOZ_RELEASE_ASSERT(profiles.append(std::move(profile.mJSON)));
970 }
971 sInstance->mExitProfiles.clear();
972 return profiles;
973 }
974
975 private:
976 // The singleton instance.
977 static ActivePS* sInstance;
978
979 // We need to track activity generations. If we didn't we could have the
980 // following scenario.
981 //
982 // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks
983 // gPSMutex, deletes the SamplerThread (which does a join).
984 //
985 // - profiler_start() runs on a different thread, locks gPSMutex,
986 // re-instantiates ActivePS, unlocks gPSMutex -- all before the join
987 // completes.
988 //
989 // - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated,
990 // and continues as if the start/stop pair didn't occur. Also
991 // profiler_stop() is stuck, unable to finish.
992 //
993 // By checking ActivePS *and* the generation, we can avoid this scenario.
994 // sNextGeneration is used to track the next generation number; it is static
995 // because it must persist across different ActivePS instantiations.
996 const uint32_t mGeneration;
997 static uint32_t sNextGeneration;
998
999 // The maximum number of 8-byte entries in mProfileBuffer.
1000 const PowerOfTwo32 mCapacity;
1001
1002 // The maximum duration of entries in mProfileBuffer, in seconds.
1003 const Maybe<double> mDuration;
1004
1005 // The interval between samples, measured in milliseconds.
1006 const double mInterval;
1007
1008 // The profile features that are enabled.
1009 const uint32_t mFeatures;
1010
1011 // Substrings of names of threads we want to profile.
1012 Vector<std::string> mFilters;
1013
1014 // The chunk manager used by `mProfileBuffer` below.
1015 ProfileBufferChunkManagerWithLocalLimit mProfileBufferChunkManager;
1016
1017 // The buffer into which all samples are recorded.
1018 ProfileBuffer mProfileBuffer;
1019
1020 // ProfiledThreadData objects for any threads that were profiled at any point
1021 // during this run of the profiler:
1022 // - mLiveProfiledThreads contains all threads that are still registered, and
1023 // - mDeadProfiledThreads contains all threads that have already been
1024 // unregistered but for which there is still data in the profile buffer.
1025 Vector<LiveProfiledThreadData> mLiveProfiledThreads;
1026 Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads;
1027
1028 // Info on all the dead pages.
1029 // Registered pages are being moved to this array after unregistration.
1030 // We are keeping them in case we need them in the profile data.
1031 // We are removing them when we ensure that we won't need them anymore.
1032 Vector<RefPtr<PageInformation>> mDeadProfiledPages;
1033
1034 // The current sampler thread. This class is not responsible for destroying
1035 // the SamplerThread object; the Destroy() method returns it so the caller
1036 // can destroy it.
1037 SamplerThread* const mSamplerThread;
1038
1039 // Is the profiler paused?
1040 bool mIsPaused;
1041
1042 #if defined(GP_OS_linux) || defined(GP_OS_freebsd)
1043 // Used to record whether the profiler was paused just before forking. False
1044 // at all times except just before/after forking.
1045 bool mWasPaused;
1046 #endif
1047
1048 struct ExitProfile {
1049 std::string mJSON;
1050 uint64_t mBufferPositionAtGatherTime;
1051 };
1052 Vector<ExitProfile> mExitProfiles;
1053 };
1054
1055 ActivePS* ActivePS::sInstance = nullptr;
1056 uint32_t ActivePS::sNextGeneration = 0;
1057
1058 #undef PS_GET
1059 #undef PS_GET_LOCKLESS
1060 #undef PS_GET_AND_SET
1061
1062 Atomic<uint32_t, MemoryOrdering::Relaxed> RacyFeatures::sActiveAndFeatures(0);
1063
1064 /* static */
SetActive(uint32_t aFeatures)1065 void RacyFeatures::SetActive(uint32_t aFeatures) {
1066 sActiveAndFeatures = Active | aFeatures;
1067 }
1068
1069 /* static */
SetInactive()1070 void RacyFeatures::SetInactive() { sActiveAndFeatures = 0; }
1071
1072 /* static */
IsActive()1073 bool RacyFeatures::IsActive() { return uint32_t(sActiveAndFeatures) & Active; }
1074
1075 /* static */
SetPaused()1076 void RacyFeatures::SetPaused() { sActiveAndFeatures |= Paused; }
1077
1078 /* static */
SetUnpaused()1079 void RacyFeatures::SetUnpaused() { sActiveAndFeatures &= ~Paused; }
1080
1081 /* static */
IsActiveWithFeature(uint32_t aFeature)1082 bool RacyFeatures::IsActiveWithFeature(uint32_t aFeature) {
1083 uint32_t af = sActiveAndFeatures; // copy it first
1084 return (af & Active) && (af & aFeature);
1085 }
1086
1087 /* static */
IsActiveAndUnpaused()1088 bool RacyFeatures::IsActiveAndUnpaused() {
1089 uint32_t af = sActiveAndFeatures; // copy it first
1090 return (af & Active) && !(af & Paused);
1091 }
1092
1093 // Each live thread has a RegisteredThread, and we store a reference to it in
1094 // TLS. This class encapsulates that TLS.
1095 class TLSRegisteredThread {
1096 public:
Init(PSLockRef)1097 static bool Init(PSLockRef) {
1098 bool ok1 = sRegisteredThread.init();
1099 bool ok2 = AutoProfilerLabel::sProfilingStack.init();
1100 return ok1 && ok2;
1101 }
1102
1103 // Get the entire RegisteredThread. Accesses are guarded by gPSMutex.
RegisteredThread(PSLockRef)1104 static class RegisteredThread* RegisteredThread(PSLockRef) {
1105 return sRegisteredThread.get();
1106 }
1107
1108 // Get only the RacyRegisteredThread. Accesses are not guarded by gPSMutex.
RacyRegisteredThread()1109 static class RacyRegisteredThread* RacyRegisteredThread() {
1110 class RegisteredThread* registeredThread = sRegisteredThread.get();
1111 return registeredThread ? ®isteredThread->RacyRegisteredThread()
1112 : nullptr;
1113 }
1114
1115 // Get only the ProfilingStack. Accesses are not guarded by gPSMutex.
1116 // RacyRegisteredThread() can also be used to get the ProfilingStack, but that
1117 // is marginally slower because it requires an extra pointer indirection.
Stack()1118 static ProfilingStack* Stack() {
1119 return AutoProfilerLabel::sProfilingStack.get();
1120 }
1121
SetRegisteredThread(PSLockRef,class RegisteredThread * aRegisteredThread)1122 static void SetRegisteredThread(PSLockRef,
1123 class RegisteredThread* aRegisteredThread) {
1124 sRegisteredThread.set(aRegisteredThread);
1125 AutoProfilerLabel::sProfilingStack.set(
1126 aRegisteredThread
1127 ? &aRegisteredThread->RacyRegisteredThread().ProfilingStack()
1128 : nullptr);
1129 }
1130
1131 private:
1132 // This is a non-owning reference to the RegisteredThread;
1133 // CorePS::mRegisteredThreads is the owning reference. On thread
1134 // deregistration, this reference is cleared and the RegisteredThread is
1135 // destroyed.
1136 static MOZ_THREAD_LOCAL(class RegisteredThread*) sRegisteredThread;
1137 };
1138
1139 MOZ_THREAD_LOCAL(RegisteredThread*) TLSRegisteredThread::sRegisteredThread;
1140
1141 /* static */
GetProfilingStack()1142 ProfilingStack* AutoProfilerLabel::GetProfilingStack() {
1143 return sProfilingStack.get();
1144 }
1145
1146 // Although you can access a thread's ProfilingStack via
1147 // TLSRegisteredThread::sRegisteredThread, we also have a second TLS pointer
1148 // directly to the ProfilingStack. Here's why.
1149 //
1150 // - We need to be able to push to and pop from the ProfilingStack in
1151 // AutoProfilerLabel.
1152 //
1153 // - The class functions are hot and must be defined in BaseProfiler.h so they
1154 // can be inlined.
1155 //
1156 // - We don't want to expose TLSRegisteredThread (and RegisteredThread) in
1157 // BaseProfiler.h.
1158 //
1159 // This second pointer isn't ideal, but does provide a way to satisfy those
1160 // constraints. TLSRegisteredThread is responsible for updating it.
1161 MOZ_THREAD_LOCAL(ProfilingStack*) AutoProfilerLabel::sProfilingStack;
1162
1163 // The name of the main thread.
1164 static const char* const kMainThreadName = "GeckoMain";
1165
1166 ////////////////////////////////////////////////////////////////////////
1167 // BEGIN sampling/unwinding code
1168
1169 // The registers used for stack unwinding and a few other sampling purposes.
1170 // The ctor does nothing; users are responsible for filling in the fields.
1171 class Registers {
1172 public:
Registers()1173 Registers() : mPC{nullptr}, mSP{nullptr}, mFP{nullptr}, mLR{nullptr} {}
1174
1175 #if defined(HAVE_NATIVE_UNWIND)
1176 // Fills in mPC, mSP, mFP, mLR, and mContext for a synchronous sample.
1177 void SyncPopulate();
1178 #endif
1179
Clear()1180 void Clear() { memset(this, 0, sizeof(*this)); }
1181
1182 // These fields are filled in by
1183 // Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace
1184 // samples, and by SyncPopulate() for synchronous samples.
1185 Address mPC; // Instruction pointer.
1186 Address mSP; // Stack pointer.
1187 Address mFP; // Frame pointer.
1188 Address mLR; // ARM link register.
1189 #if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
1190 // This contains all the registers, which means it duplicates the four fields
1191 // above. This is ok.
1192 ucontext_t* mContext; // The context from the signal handler.
1193 #endif
1194 };
1195
1196 // Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time
1197 // looping on corrupted stacks.
1198 static const size_t MAX_NATIVE_FRAMES = 1024;
1199
1200 struct NativeStack {
1201 void* mPCs[MAX_NATIVE_FRAMES];
1202 void* mSPs[MAX_NATIVE_FRAMES];
1203 size_t mCount; // Number of frames filled.
1204
NativeStackmozilla::baseprofiler::NativeStack1205 NativeStack() : mPCs(), mSPs(), mCount(0) {}
1206 };
1207
1208 // Merges the profiling stack and native stack, outputting the details to
1209 // aCollector.
MergeStacks(uint32_t aFeatures,bool aIsSynchronous,const RegisteredThread & aRegisteredThread,const Registers & aRegs,const NativeStack & aNativeStack,ProfilerStackCollector & aCollector)1210 static void MergeStacks(uint32_t aFeatures, bool aIsSynchronous,
1211 const RegisteredThread& aRegisteredThread,
1212 const Registers& aRegs, const NativeStack& aNativeStack,
1213 ProfilerStackCollector& aCollector) {
1214 // WARNING: this function runs within the profiler's "critical section".
1215 // WARNING: this function might be called while the profiler is inactive, and
1216 // cannot rely on ActivePS.
1217
1218 const ProfilingStack& profilingStack =
1219 aRegisteredThread.RacyRegisteredThread().ProfilingStack();
1220 const ProfilingStackFrame* profilingStackFrames = profilingStack.frames;
1221 uint32_t profilingStackFrameCount = profilingStack.stackSize();
1222
1223 Maybe<uint64_t> samplePosInBuffer;
1224 if (!aIsSynchronous) {
1225 // aCollector.SamplePositionInBuffer() will return Nothing() when
1226 // profiler_suspend_and_sample_thread is called from the background hang
1227 // reporter.
1228 samplePosInBuffer = aCollector.SamplePositionInBuffer();
1229 }
1230 // While the profiling stack array is ordered oldest-to-youngest, the JS and
1231 // native arrays are ordered youngest-to-oldest. We must add frames to aInfo
1232 // oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS
1233 // and native arrays backwards. Note: this means the terminating condition
1234 // jsIndex and nativeIndex is being < 0.
1235 uint32_t profilingStackIndex = 0;
1236 int32_t nativeIndex = aNativeStack.mCount - 1;
1237
1238 uint8_t* lastLabelFrameStackAddr = nullptr;
1239
1240 // Iterate as long as there is at least one frame remaining.
1241 while (profilingStackIndex != profilingStackFrameCount || nativeIndex >= 0) {
1242 // There are 1 to 3 frames available. Find and add the oldest.
1243 uint8_t* profilingStackAddr = nullptr;
1244 uint8_t* nativeStackAddr = nullptr;
1245
1246 if (profilingStackIndex != profilingStackFrameCount) {
1247 const ProfilingStackFrame& profilingStackFrame =
1248 profilingStackFrames[profilingStackIndex];
1249
1250 if (profilingStackFrame.isLabelFrame() ||
1251 profilingStackFrame.isSpMarkerFrame()) {
1252 lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress();
1253 }
1254
1255 // Skip any JS_OSR frames. Such frames are used when the JS interpreter
1256 // enters a jit frame on a loop edge (via on-stack-replacement, or OSR).
1257 // To avoid both the profiling stack frame and jit frame being recorded
1258 // (and showing up twice), the interpreter marks the interpreter
1259 // profiling stack frame as JS_OSR to ensure that it doesn't get counted.
1260 if (profilingStackFrame.isOSRFrame()) {
1261 profilingStackIndex++;
1262 continue;
1263 }
1264
1265 MOZ_ASSERT(lastLabelFrameStackAddr);
1266 profilingStackAddr = lastLabelFrameStackAddr;
1267 }
1268
1269 if (nativeIndex >= 0) {
1270 nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex];
1271 }
1272
1273 // If there's a native stack frame which has the same SP as a profiling
1274 // stack frame, pretend we didn't see the native stack frame. Ditto for a
1275 // native stack frame which has the same SP as a JS stack frame. In effect
1276 // this means profiling stack frames or JS frames trump conflicting native
1277 // frames.
1278 if (nativeStackAddr && (profilingStackAddr == nativeStackAddr)) {
1279 nativeStackAddr = nullptr;
1280 nativeIndex--;
1281 MOZ_ASSERT(profilingStackAddr);
1282 }
1283
1284 // Sanity checks.
1285 MOZ_ASSERT_IF(profilingStackAddr, profilingStackAddr != nativeStackAddr);
1286 MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != profilingStackAddr);
1287
1288 // Check to see if profiling stack frame is top-most.
1289 if (profilingStackAddr > nativeStackAddr) {
1290 MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount);
1291 const ProfilingStackFrame& profilingStackFrame =
1292 profilingStackFrames[profilingStackIndex];
1293
1294 // Sp marker frames are just annotations and should not be recorded in
1295 // the profile.
1296 if (!profilingStackFrame.isSpMarkerFrame()) {
1297 aCollector.CollectProfilingStackFrame(profilingStackFrame);
1298 }
1299 profilingStackIndex++;
1300 continue;
1301 }
1302
1303 // If we reach here, there must be a native stack frame and it must be the
1304 // greatest frame.
1305 if (nativeStackAddr) {
1306 MOZ_ASSERT(nativeIndex >= 0);
1307 void* addr = (void*)aNativeStack.mPCs[nativeIndex];
1308 aCollector.CollectNativeLeafAddr(addr);
1309 }
1310 if (nativeIndex >= 0) {
1311 nativeIndex--;
1312 }
1313 }
1314 }
1315
1316 #if defined(GP_OS_windows) && defined(USE_MOZ_STACK_WALK)
1317 static HANDLE GetThreadHandle(PlatformData* aData);
1318 #endif
1319
1320 #if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
StackWalkCallback(uint32_t aFrameNumber,void * aPC,void * aSP,void * aClosure)1321 static void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP,
1322 void* aClosure) {
1323 NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
1324 MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES);
1325 nativeStack->mSPs[nativeStack->mCount] = aSP;
1326 nativeStack->mPCs[nativeStack->mCount] = aPC;
1327 nativeStack->mCount++;
1328 }
1329 #endif
1330
1331 #if defined(USE_FRAME_POINTER_STACK_WALK)
DoFramePointerBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack)1332 static void DoFramePointerBacktrace(PSLockRef aLock,
1333 const RegisteredThread& aRegisteredThread,
1334 const Registers& aRegs,
1335 NativeStack& aNativeStack) {
1336 // WARNING: this function runs within the profiler's "critical section".
1337 // WARNING: this function might be called while the profiler is inactive, and
1338 // cannot rely on ActivePS.
1339
1340 // Start with the current function. We use 0 as the frame number here because
1341 // the FramePointerStackWalk() call below will use 1..N. This is a bit weird
1342 // but it doesn't matter because StackWalkCallback() doesn't use the frame
1343 // number argument.
1344 StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
1345
1346 uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
1347
1348 const void* stackEnd = aRegisteredThread.StackTop();
1349 if (aRegs.mFP >= aRegs.mSP && aRegs.mFP <= stackEnd) {
1350 FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames,
1351 &aNativeStack, reinterpret_cast<void**>(aRegs.mFP),
1352 const_cast<void*>(stackEnd));
1353 }
1354 }
1355 #endif
1356
1357 #if defined(USE_MOZ_STACK_WALK)
DoMozStackWalkBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack)1358 static void DoMozStackWalkBacktrace(PSLockRef aLock,
1359 const RegisteredThread& aRegisteredThread,
1360 const Registers& aRegs,
1361 NativeStack& aNativeStack) {
1362 // WARNING: this function runs within the profiler's "critical section".
1363 // WARNING: this function might be called while the profiler is inactive, and
1364 // cannot rely on ActivePS.
1365
1366 // Start with the current function. We use 0 as the frame number here because
1367 // the MozStackWalkThread() call below will use 1..N. This is a bit weird but
1368 // it doesn't matter because StackWalkCallback() doesn't use the frame number
1369 // argument.
1370 StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
1371
1372 uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
1373
1374 HANDLE thread = GetThreadHandle(aRegisteredThread.GetPlatformData());
1375 MOZ_ASSERT(thread);
1376 MozStackWalkThread(StackWalkCallback, /* skipFrames */ 0, maxFrames,
1377 &aNativeStack, thread, /* context */ nullptr);
1378 }
1379 #endif
1380
1381 #ifdef USE_EHABI_STACKWALK
DoEHABIBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack)1382 static void DoEHABIBacktrace(PSLockRef aLock,
1383 const RegisteredThread& aRegisteredThread,
1384 const Registers& aRegs,
1385 NativeStack& aNativeStack) {
1386 // WARNING: this function runs within the profiler's "critical section".
1387 // WARNING: this function might be called while the profiler is inactive, and
1388 // cannot rely on ActivePS.
1389
1390 aNativeStack.mCount =
1391 EHABIStackWalk(aRegs.mContext->uc_mcontext,
1392 const_cast<void*>(aRegisteredThread.StackTop()),
1393 aNativeStack.mSPs, aNativeStack.mPCs, MAX_NATIVE_FRAMES);
1394 }
1395 #endif
1396
1397 #ifdef USE_LUL_STACKWALK
1398
1399 // See the comment at the callsite for why this function is necessary.
1400 # if defined(MOZ_HAVE_ASAN_BLACKLIST)
ASAN_memcpy(void * aDst,const void * aSrc,size_t aLen)1401 MOZ_ASAN_BLACKLIST static void ASAN_memcpy(void* aDst, const void* aSrc,
1402 size_t aLen) {
1403 // The obvious thing to do here is call memcpy(). However, although
1404 // ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the
1405 // false positive still manifests! So we must implement memcpy() ourselves
1406 // within this function.
1407 char* dst = static_cast<char*>(aDst);
1408 const char* src = static_cast<const char*>(aSrc);
1409
1410 for (size_t i = 0; i < aLen; i++) {
1411 dst[i] = src[i];
1412 }
1413 }
1414 # endif
1415
DoLULBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack)1416 static void DoLULBacktrace(PSLockRef aLock,
1417 const RegisteredThread& aRegisteredThread,
1418 const Registers& aRegs, NativeStack& aNativeStack) {
1419 // WARNING: this function runs within the profiler's "critical section".
1420 // WARNING: this function might be called while the profiler is inactive, and
1421 // cannot rely on ActivePS.
1422
1423 const mcontext_t* mc = &aRegs.mContext->uc_mcontext;
1424
1425 lul::UnwindRegs startRegs;
1426 memset(&startRegs, 0, sizeof(startRegs));
1427
1428 # if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
1429 startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]);
1430 startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]);
1431 startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
1432 # elif defined(GP_PLAT_amd64_freebsd)
1433 startRegs.xip = lul::TaggedUWord(mc->mc_rip);
1434 startRegs.xsp = lul::TaggedUWord(mc->mc_rsp);
1435 startRegs.xbp = lul::TaggedUWord(mc->mc_rbp);
1436 # elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
1437 startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
1438 startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
1439 startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
1440 startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
1441 startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
1442 startRegs.r7 = lul::TaggedUWord(mc->arm_r7);
1443 # elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
1444 startRegs.pc = lul::TaggedUWord(mc->pc);
1445 startRegs.x29 = lul::TaggedUWord(mc->regs[29]);
1446 startRegs.x30 = lul::TaggedUWord(mc->regs[30]);
1447 startRegs.sp = lul::TaggedUWord(mc->sp);
1448 # elif defined(GP_PLAT_arm64_freebsd)
1449 startRegs.pc = lul::TaggedUWord(mc->mc_gpregs.gp_elr);
1450 startRegs.x29 = lul::TaggedUWord(mc->mc_gpregs.gp_x[29]);
1451 startRegs.x30 = lul::TaggedUWord(mc->mc_gpregs.gp_lr);
1452 startRegs.sp = lul::TaggedUWord(mc->mc_gpregs.gp_sp);
1453 # elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
1454 startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
1455 startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
1456 startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
1457 # elif defined(GP_PLAT_mips64_linux)
1458 startRegs.pc = lul::TaggedUWord(mc->pc);
1459 startRegs.sp = lul::TaggedUWord(mc->gregs[29]);
1460 startRegs.fp = lul::TaggedUWord(mc->gregs[30]);
1461 # else
1462 # error "Unknown plat"
1463 # endif
1464
1465 // Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the
1466 // stack's registered top point. Do some basic sanity checks too. This
1467 // assumes that the TaggedUWord holding the stack pointer value is valid, but
1468 // it should be, since it was constructed that way in the code just above.
1469
1470 // We could construct |stackImg| so that LUL reads directly from the stack in
1471 // question, rather than from a copy of it. That would reduce overhead and
1472 // space use a bit. However, it gives a problem with dynamic analysis tools
1473 // (ASan, TSan, Valgrind) which is that such tools will report invalid or
1474 // racing memory accesses, and such accesses will be reported deep inside LUL.
1475 // By taking a copy here, we can either sanitise the copy (for Valgrind) or
1476 // copy it using an unchecked memcpy (for ASan, TSan). That way we don't have
1477 // to try and suppress errors inside LUL.
1478 //
1479 // N_STACK_BYTES is set to 160KB. This is big enough to hold all stacks
1480 // observed in some minutes of testing, whilst keeping the size of this
1481 // function (DoNativeBacktrace)'s frame reasonable. Most stacks observed in
1482 // practice are small, 4KB or less, and so the copy costs are insignificant
1483 // compared to other profiler overhead.
1484 //
1485 // |stackImg| is allocated on this (the sampling thread's) stack. That
1486 // implies that the frame for this function is at least N_STACK_BYTES large.
1487 // In general it would be considered unacceptable to have such a large frame
1488 // on a stack, but it only exists for the unwinder thread, and so is not
1489 // expected to be a problem. Allocating it on the heap is troublesome because
1490 // this function runs whilst the sampled thread is suspended, so any heap
1491 // allocation risks deadlock. Allocating it as a global variable is not
1492 // thread safe, which would be a problem if we ever allow multiple sampler
1493 // threads. Hence allocating it on the stack seems to be the least-worst
1494 // option.
1495
1496 lul::StackImage stackImg;
1497
1498 {
1499 # if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \
1500 defined(GP_PLAT_amd64_freebsd)
1501 uintptr_t rEDZONE_SIZE = 128;
1502 uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
1503 # elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
1504 uintptr_t rEDZONE_SIZE = 0;
1505 uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE;
1506 # elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \
1507 defined(GP_PLAT_arm64_freebsd)
1508 uintptr_t rEDZONE_SIZE = 0;
1509 uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
1510 # elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
1511 uintptr_t rEDZONE_SIZE = 0;
1512 uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
1513 # elif defined(GP_PLAT_mips64_linux)
1514 uintptr_t rEDZONE_SIZE = 0;
1515 uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
1516 # else
1517 # error "Unknown plat"
1518 # endif
1519 uintptr_t end = reinterpret_cast<uintptr_t>(aRegisteredThread.StackTop());
1520 uintptr_t ws = sizeof(void*);
1521 start &= ~(ws - 1);
1522 end &= ~(ws - 1);
1523 uintptr_t nToCopy = 0;
1524 if (start < end) {
1525 nToCopy = end - start;
1526 if (nToCopy > lul::N_STACK_BYTES) nToCopy = lul::N_STACK_BYTES;
1527 }
1528 MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
1529 stackImg.mLen = nToCopy;
1530 stackImg.mStartAvma = start;
1531 if (nToCopy > 0) {
1532 // If this is a vanilla memcpy(), ASAN makes the following complaint:
1533 //
1534 // ERROR: AddressSanitizer: stack-buffer-underflow ...
1535 // ...
1536 // HINT: this may be a false positive if your program uses some custom
1537 // stack unwind mechanism or swapcontext
1538 //
1539 // This code is very much a custom stack unwind mechanism! So we use an
1540 // alternative memcpy() implementation that is ignored by ASAN.
1541 # if defined(MOZ_HAVE_ASAN_BLACKLIST)
1542 ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
1543 # else
1544 memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
1545 # endif
1546 (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
1547 }
1548 }
1549
1550 size_t framePointerFramesAcquired = 0;
1551 lul::LUL* lul = CorePS::Lul(aLock);
1552 lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.mPCs),
1553 reinterpret_cast<uintptr_t*>(aNativeStack.mSPs),
1554 &aNativeStack.mCount, &framePointerFramesAcquired,
1555 MAX_NATIVE_FRAMES, &startRegs, &stackImg);
1556
1557 // Update stats in the LUL stats object. Unfortunately this requires
1558 // three global memory operations.
1559 lul->mStats.mContext += 1;
1560 lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired;
1561 lul->mStats.mFP += framePointerFramesAcquired;
1562 }
1563
1564 #endif
1565
1566 #ifdef HAVE_NATIVE_UNWIND
DoNativeBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack)1567 static void DoNativeBacktrace(PSLockRef aLock,
1568 const RegisteredThread& aRegisteredThread,
1569 const Registers& aRegs,
1570 NativeStack& aNativeStack) {
1571 // This method determines which stackwalker is used for periodic and
1572 // synchronous samples. (Backtrace samples are treated differently, see
1573 // profiler_suspend_and_sample_thread() for details). The only part of the
1574 // ordering that matters is that LUL must precede FRAME_POINTER, because on
1575 // Linux they can both be present.
1576 # if defined(USE_LUL_STACKWALK)
1577 DoLULBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
1578 # elif defined(USE_EHABI_STACKWALK)
1579 DoEHABIBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
1580 # elif defined(USE_FRAME_POINTER_STACK_WALK)
1581 DoFramePointerBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
1582 # elif defined(USE_MOZ_STACK_WALK)
1583 DoMozStackWalkBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
1584 # else
1585 # error "Invalid configuration"
1586 # endif
1587 }
1588 #endif
1589
1590 // Writes some components shared by periodic and synchronous profiles to
1591 // ActivePS's ProfileBuffer. (This should only be called from DoSyncSample()
1592 // and DoPeriodicSample().)
1593 //
1594 // The grammar for entry sequences is in a comment above
1595 // ProfileBuffer::StreamSamplesToJSON.
DoSharedSample(PSLockRef aLock,bool aIsSynchronous,RegisteredThread & aRegisteredThread,const Registers & aRegs,uint64_t aSamplePos,ProfileBuffer & aBuffer)1596 static inline void DoSharedSample(PSLockRef aLock, bool aIsSynchronous,
1597 RegisteredThread& aRegisteredThread,
1598 const Registers& aRegs, uint64_t aSamplePos,
1599 ProfileBuffer& aBuffer) {
1600 // WARNING: this function runs within the profiler's "critical section".
1601
1602 MOZ_ASSERT(!aBuffer.IsThreadSafe(),
1603 "Mutexes cannot be used inside this critical section");
1604
1605 MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
1606
1607 ProfileBufferCollector collector(aBuffer, aSamplePos);
1608 NativeStack nativeStack;
1609 #if defined(HAVE_NATIVE_UNWIND)
1610 if (ActivePS::FeatureStackWalk(aLock)) {
1611 DoNativeBacktrace(aLock, aRegisteredThread, aRegs, nativeStack);
1612
1613 MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
1614 aRegs, nativeStack, collector);
1615 } else
1616 #endif
1617 {
1618 MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
1619 aRegs, nativeStack, collector);
1620
1621 // We can't walk the whole native stack, but we can record the top frame.
1622 if (ActivePS::FeatureLeaf(aLock)) {
1623 aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
1624 }
1625 }
1626 }
1627
1628 // Writes the components of a synchronous sample to the given ProfileBuffer.
DoSyncSample(PSLockRef aLock,RegisteredThread & aRegisteredThread,const TimeStamp & aNow,const Registers & aRegs,ProfileBuffer & aBuffer)1629 static void DoSyncSample(PSLockRef aLock, RegisteredThread& aRegisteredThread,
1630 const TimeStamp& aNow, const Registers& aRegs,
1631 ProfileBuffer& aBuffer) {
1632 // WARNING: this function runs within the profiler's "critical section".
1633
1634 uint64_t samplePos =
1635 aBuffer.AddThreadIdEntry(aRegisteredThread.Info()->ThreadId());
1636
1637 TimeDuration delta = aNow - CorePS::ProcessStartTime();
1638 aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
1639
1640 DoSharedSample(aLock, /* aIsSynchronous = */ true, aRegisteredThread, aRegs,
1641 samplePos, aBuffer);
1642 }
1643
1644 // Writes the components of a periodic sample to ActivePS's ProfileBuffer.
1645 // The ThreadId entry is already written in the main ProfileBuffer, its location
1646 // is `aSamplePos`, we can write the rest to `aBuffer` (which may be different).
DoPeriodicSample(PSLockRef aLock,RegisteredThread & aRegisteredThread,ProfiledThreadData & aProfiledThreadData,const Registers & aRegs,uint64_t aSamplePos,ProfileBuffer & aBuffer)1647 static void DoPeriodicSample(PSLockRef aLock,
1648 RegisteredThread& aRegisteredThread,
1649 ProfiledThreadData& aProfiledThreadData,
1650 const Registers& aRegs, uint64_t aSamplePos,
1651 ProfileBuffer& aBuffer) {
1652 // WARNING: this function runs within the profiler's "critical section".
1653
1654 DoSharedSample(aLock, /* aIsSynchronous = */ false, aRegisteredThread, aRegs,
1655 aSamplePos, aBuffer);
1656 }
1657
1658 // END sampling/unwinding code
1659 ////////////////////////////////////////////////////////////////////////
1660
1661 ////////////////////////////////////////////////////////////////////////
1662 // BEGIN saving/streaming code
1663
1664 const static uint64_t kJS_MAX_SAFE_UINTEGER = +9007199254740991ULL;
1665
SafeJSInteger(uint64_t aValue)1666 static int64_t SafeJSInteger(uint64_t aValue) {
1667 return aValue <= kJS_MAX_SAFE_UINTEGER ? int64_t(aValue) : -1;
1668 }
1669
AddSharedLibraryInfoToStream(JSONWriter & aWriter,const SharedLibrary & aLib)1670 static void AddSharedLibraryInfoToStream(JSONWriter& aWriter,
1671 const SharedLibrary& aLib) {
1672 aWriter.StartObjectElement();
1673 aWriter.IntProperty("start", SafeJSInteger(aLib.GetStart()));
1674 aWriter.IntProperty("end", SafeJSInteger(aLib.GetEnd()));
1675 aWriter.IntProperty("offset", SafeJSInteger(aLib.GetOffset()));
1676 aWriter.StringProperty("name", aLib.GetModuleName().c_str());
1677 aWriter.StringProperty("path", aLib.GetModulePath().c_str());
1678 aWriter.StringProperty("debugName", aLib.GetDebugName().c_str());
1679 aWriter.StringProperty("debugPath", aLib.GetDebugPath().c_str());
1680 aWriter.StringProperty("breakpadId", aLib.GetBreakpadId().c_str());
1681 aWriter.StringProperty("arch", aLib.GetArch().c_str());
1682 aWriter.EndObject();
1683 }
1684
AppendSharedLibraries(JSONWriter & aWriter)1685 void AppendSharedLibraries(JSONWriter& aWriter) {
1686 SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
1687 info.SortByAddress();
1688 for (size_t i = 0; i < info.GetSize(); i++) {
1689 AddSharedLibraryInfoToStream(aWriter, info.GetEntry(i));
1690 }
1691 }
1692
StreamCategories(SpliceableJSONWriter & aWriter)1693 static void StreamCategories(SpliceableJSONWriter& aWriter) {
1694 // Same order as ProfilingCategory. Format:
1695 // [
1696 // {
1697 // name: "Idle",
1698 // color: "transparent",
1699 // subcategories: ["Other"],
1700 // },
1701 // {
1702 // name: "Other",
1703 // color: "grey",
1704 // subcategories: [
1705 // "JSM loading",
1706 // "Subprocess launching",
1707 // "DLL loading"
1708 // ]
1709 // },
1710 // ...
1711 // ]
1712
1713 #define CATEGORY_JSON_BEGIN_CATEGORY(name, labelAsString, color) \
1714 aWriter.Start(); \
1715 aWriter.StringProperty("name", labelAsString); \
1716 aWriter.StringProperty("color", color); \
1717 aWriter.StartArrayProperty("subcategories");
1718 #define CATEGORY_JSON_SUBCATEGORY(supercategory, name, labelAsString) \
1719 aWriter.StringElement(labelAsString);
1720 #define CATEGORY_JSON_END_CATEGORY \
1721 aWriter.EndArray(); \
1722 aWriter.EndObject();
1723
1724 BASE_PROFILING_CATEGORY_LIST(CATEGORY_JSON_BEGIN_CATEGORY,
1725 CATEGORY_JSON_SUBCATEGORY,
1726 CATEGORY_JSON_END_CATEGORY)
1727
1728 #undef CATEGORY_JSON_BEGIN_CATEGORY
1729 #undef CATEGORY_JSON_SUBCATEGORY
1730 #undef CATEGORY_JSON_END_CATEGORY
1731 }
1732
1733 static int64_t MicrosecondsSince1970();
1734
StreamMetaJSCustomObject(PSLockRef aLock,SpliceableJSONWriter & aWriter,bool aIsShuttingDown)1735 static void StreamMetaJSCustomObject(PSLockRef aLock,
1736 SpliceableJSONWriter& aWriter,
1737 bool aIsShuttingDown) {
1738 MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
1739
1740 aWriter.IntProperty("version", 19);
1741
1742 // The "startTime" field holds the number of milliseconds since midnight
1743 // January 1, 1970 GMT. This grotty code computes (Now - (Now -
1744 // ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form.
1745 TimeDuration delta = TimeStamp::NowUnfuzzed() - CorePS::ProcessStartTime();
1746 aWriter.DoubleProperty(
1747 "startTime", MicrosecondsSince1970() / 1000.0 - delta.ToMilliseconds());
1748
1749 // Write the shutdownTime field. Unlike startTime, shutdownTime is not an
1750 // absolute time stamp: It's relative to startTime. This is consistent with
1751 // all other (non-"startTime") times anywhere in the profile JSON.
1752 if (aIsShuttingDown) {
1753 aWriter.DoubleProperty("shutdownTime", profiler_time());
1754 } else {
1755 aWriter.NullProperty("shutdownTime");
1756 }
1757
1758 aWriter.StartArrayProperty("categories");
1759 StreamCategories(aWriter);
1760 aWriter.EndArray();
1761
1762 if (!CorePS::IsMainThread()) {
1763 // Leave the rest of the properties out if we're not on the main thread.
1764 // At the moment, the only case in which this function is called on a
1765 // background thread is if we're in a content process and are going to
1766 // send this profile to the parent process. In that case, the parent
1767 // process profile's "meta" object already has the rest of the properties,
1768 // and the parent process profile is dumped on that process's main thread.
1769 return;
1770 }
1771
1772 aWriter.DoubleProperty("interval", ActivePS::Interval(aLock));
1773 aWriter.IntProperty("stackwalk", ActivePS::FeatureStackWalk(aLock));
1774
1775 #ifdef DEBUG
1776 aWriter.IntProperty("debug", 1);
1777 #else
1778 aWriter.IntProperty("debug", 0);
1779 #endif
1780
1781 aWriter.IntProperty("gcpoison", 0);
1782
1783 aWriter.IntProperty("asyncstack", 0);
1784
1785 aWriter.IntProperty("processType", 0);
1786 }
1787
StreamPages(PSLockRef aLock,SpliceableJSONWriter & aWriter)1788 static void StreamPages(PSLockRef aLock, SpliceableJSONWriter& aWriter) {
1789 MOZ_RELEASE_ASSERT(CorePS::Exists());
1790 ActivePS::DiscardExpiredPages(aLock);
1791 for (const auto& page : ActivePS::ProfiledPages(aLock)) {
1792 page->StreamJSON(aWriter);
1793 }
1794 }
1795
locked_profiler_stream_json_for_this_process(PSLockRef aLock,SpliceableJSONWriter & aWriter,double aSinceTime,bool aIsShuttingDown,bool aOnlyThreads=false)1796 static void locked_profiler_stream_json_for_this_process(
1797 PSLockRef aLock, SpliceableJSONWriter& aWriter, double aSinceTime,
1798 bool aIsShuttingDown, bool aOnlyThreads = false) {
1799 LOG("locked_profiler_stream_json_for_this_process");
1800
1801 MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
1802
1803 AUTO_PROFILER_STATS(base_locked_profiler_stream_json_for_this_process);
1804
1805 const double collectionStartMs = profiler_time();
1806
1807 ProfileBuffer& buffer = ActivePS::Buffer(aLock);
1808
1809 // If there is a set "Window length", discard older data.
1810 Maybe<double> durationS = ActivePS::Duration(aLock);
1811 if (durationS.isSome()) {
1812 const double durationStartMs = collectionStartMs - *durationS * 1000;
1813 buffer.DiscardSamplesBeforeTime(durationStartMs);
1814 }
1815
1816 if (!aOnlyThreads) {
1817 // Put shared library info
1818 aWriter.StartArrayProperty("libs");
1819 AppendSharedLibraries(aWriter);
1820 aWriter.EndArray();
1821
1822 // Put meta data
1823 aWriter.StartObjectProperty("meta");
1824 { StreamMetaJSCustomObject(aLock, aWriter, aIsShuttingDown); }
1825 aWriter.EndObject();
1826
1827 // Put page data
1828 aWriter.StartArrayProperty("pages");
1829 { StreamPages(aLock, aWriter); }
1830 aWriter.EndArray();
1831
1832 buffer.StreamProfilerOverheadToJSON(aWriter, CorePS::ProcessStartTime(),
1833 aSinceTime);
1834 buffer.StreamCountersToJSON(aWriter, CorePS::ProcessStartTime(),
1835 aSinceTime);
1836
1837 // Lists the samples for each thread profile
1838 aWriter.StartArrayProperty("threads");
1839 }
1840
1841 // if aOnlyThreads is true, the only output will be the threads array items.
1842 {
1843 ActivePS::DiscardExpiredDeadProfiledThreads(aLock);
1844 Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> threads =
1845 ActivePS::ProfiledThreads(aLock);
1846 for (auto& thread : threads) {
1847 ProfiledThreadData* profiledThreadData = thread.second;
1848 profiledThreadData->StreamJSON(buffer, aWriter,
1849 CorePS::ProcessName(aLock),
1850 CorePS::ProcessStartTime(), aSinceTime);
1851 }
1852 }
1853
1854 if (!aOnlyThreads) {
1855 aWriter.EndArray();
1856
1857 aWriter.StartArrayProperty("pausedRanges");
1858 { buffer.StreamPausedRangesToJSON(aWriter, aSinceTime); }
1859 aWriter.EndArray();
1860 }
1861
1862 const double collectionEndMs = profiler_time();
1863
1864 // Record timestamps for the collection into the buffer, so that consumers
1865 // know why we didn't collect any samples for its duration.
1866 // We put these entries into the buffer after we've collected the profile,
1867 // so they'll be visible for the *next* profile collection (if they haven't
1868 // been overwritten due to buffer wraparound by then).
1869 buffer.AddEntry(ProfileBufferEntry::CollectionStart(collectionStartMs));
1870 buffer.AddEntry(ProfileBufferEntry::CollectionEnd(collectionEndMs));
1871 }
1872
profiler_stream_json_for_this_process(SpliceableJSONWriter & aWriter,double aSinceTime,bool aIsShuttingDown,bool aOnlyThreads)1873 bool profiler_stream_json_for_this_process(SpliceableJSONWriter& aWriter,
1874 double aSinceTime,
1875 bool aIsShuttingDown,
1876 bool aOnlyThreads) {
1877 LOG("profiler_stream_json_for_this_process");
1878
1879 MOZ_RELEASE_ASSERT(CorePS::Exists());
1880
1881 PSAutoLock lock;
1882
1883 if (!ActivePS::Exists(lock)) {
1884 return false;
1885 }
1886
1887 locked_profiler_stream_json_for_this_process(lock, aWriter, aSinceTime,
1888 aIsShuttingDown, aOnlyThreads);
1889 return true;
1890 }
1891
1892 // END saving/streaming code
1893 ////////////////////////////////////////////////////////////////////////
1894
FeatureCategory(uint32_t aFeature)1895 static char FeatureCategory(uint32_t aFeature) {
1896 if (aFeature & DefaultFeatures()) {
1897 if (aFeature & AvailableFeatures()) {
1898 return 'D';
1899 }
1900 return 'd';
1901 }
1902
1903 if (aFeature & StartupExtraDefaultFeatures()) {
1904 if (aFeature & AvailableFeatures()) {
1905 return 'S';
1906 }
1907 return 's';
1908 }
1909
1910 if (aFeature & AvailableFeatures()) {
1911 return '-';
1912 }
1913 return 'x';
1914 }
1915
PrintUsageThenExit(int aExitCode)1916 static void PrintUsageThenExit(int aExitCode) {
1917 PrintToConsole(
1918 "\n"
1919 "Profiler environment variable usage:\n"
1920 "\n"
1921 " MOZ_BASE_PROFILER_HELP\n"
1922 " If set to any value, prints this message.\n"
1923 " (Only BaseProfiler features are known here; Use MOZ_PROFILER_HELP\n"
1924 " for Gecko Profiler help, with more features).\n"
1925 "\n"
1926 " MOZ_BASE_PROFILER_{,DEBUG_,VERBOSE}LOGGING\n"
1927 " Enables BaseProfiler logging to stdout. The levels of logging\n"
1928 " available are MOZ_BASE_PROFILER_LOGGING' (least verbose),\n"
1929 " '..._DEBUG_LOGGING', '..._VERBOSE_LOGGING' (most verbose)\n"
1930 "\n"
1931 " MOZ_PROFILER_STARTUP\n"
1932 " If set to any value other than '' or '0'/'N'/'n', starts the\n"
1933 " profiler immediately on start-up.\n"
1934 " Useful if you want profile code that runs very early.\n"
1935 "\n"
1936 " MOZ_PROFILER_STARTUP_ENTRIES=<%u..%u>\n"
1937 " If MOZ_PROFILER_STARTUP is set, specifies the number of entries\n"
1938 " per process in the profiler's circular buffer when the profiler is\n"
1939 " first started.\n"
1940 " If unset, the platform default is used:\n"
1941 " %u entries per process, or %u when MOZ_PROFILER_STARTUP is set.\n"
1942 " (%u bytes per entry -> %u or %u total bytes per process)\n"
1943 "\n"
1944 " MOZ_PROFILER_STARTUP_DURATION=<1..>\n"
1945 " If MOZ_PROFILER_STARTUP is set, specifies the maximum life time\n"
1946 " of entries in the the profiler's circular buffer when the profiler\n"
1947 " is first started, in seconds.\n"
1948 " If unset, the life time of the entries will only be restricted by\n"
1949 " MOZ_PROFILER_STARTUP_ENTRIES (or its default value), and no\n"
1950 " additional time duration restriction will be applied.\n"
1951 "\n"
1952 " MOZ_PROFILER_STARTUP_INTERVAL=<1..1000>\n"
1953 " If MOZ_PROFILER_STARTUP is set, specifies the sample interval,\n"
1954 " measured in milliseconds, when the profiler is first started.\n"
1955 " If unset, the platform default is used.\n"
1956 "\n"
1957 " MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=<Number>\n"
1958 " If MOZ_PROFILER_STARTUP is set, specifies the profiling\n"
1959 " features, as the integer value of the features bitfield.\n"
1960 " If unset, the value from MOZ_PROFILER_STARTUP_FEATURES is used.\n"
1961 "\n"
1962 " MOZ_PROFILER_STARTUP_FEATURES=<Features>\n"
1963 " If MOZ_PROFILER_STARTUP is set, specifies the profiling\n"
1964 " features, as a comma-separated list of strings.\n"
1965 " Ignored if MOZ_PROFILER_STARTUP_FEATURES_BITFIELD is set.\n"
1966 " If unset, the platform default is used.\n"
1967 "\n"
1968 " Features: (x=unavailable, D/d=default/unavailable,\n"
1969 " S/s=MOZ_PROFILER_STARTUP extra "
1970 "default/unavailable)\n",
1971 unsigned(ActivePS::scMinimumBufferEntries),
1972 unsigned(ActivePS::scMaximumBufferEntries),
1973 unsigned(BASE_PROFILER_DEFAULT_ENTRIES.Value()),
1974 unsigned(BASE_PROFILER_DEFAULT_STARTUP_ENTRIES.Value()),
1975 unsigned(scBytesPerEntry),
1976 unsigned(BASE_PROFILER_DEFAULT_ENTRIES.Value() * scBytesPerEntry),
1977 unsigned(BASE_PROFILER_DEFAULT_STARTUP_ENTRIES.Value() *
1978 scBytesPerEntry));
1979
1980 #define PRINT_FEATURE(n_, str_, Name_, desc_) \
1981 PrintToConsole(" %c %7u: \"%s\" (%s)\n", \
1982 FeatureCategory(ProfilerFeature::Name_), \
1983 ProfilerFeature::Name_, str_, desc_);
1984
1985 BASE_PROFILER_FOR_EACH_FEATURE(PRINT_FEATURE)
1986
1987 #undef PRINT_FEATURE
1988
1989 PrintToConsole(
1990 " - \"default\" (All above D+S defaults)\n"
1991 "\n"
1992 " MOZ_PROFILER_STARTUP_FILTERS=<Filters>\n"
1993 " If MOZ_PROFILER_STARTUP is set, specifies the thread filters, as "
1994 "a\n"
1995 " comma-separated list of strings. A given thread will be sampled if\n"
1996 " any of the filters is a case-insensitive substring of the thread\n"
1997 " name. If unset, a default is used.\n"
1998 "\n"
1999 " MOZ_PROFILER_SHUTDOWN\n"
2000 " If set, the profiler saves a profile to the named file on shutdown.\n"
2001 "\n"
2002 " MOZ_PROFILER_SYMBOLICATE\n"
2003 " If set, the profiler will pre-symbolicate profiles.\n"
2004 " *Note* This will add a significant pause when gathering data, and\n"
2005 " is intended mainly for local development.\n"
2006 "\n"
2007 " MOZ_PROFILER_LUL_TEST\n"
2008 " If set to any value, runs LUL unit tests at startup.\n"
2009 "\n"
2010 " This platform %s native unwinding.\n"
2011 "\n",
2012 #if defined(HAVE_NATIVE_UNWIND)
2013 "supports"
2014 #else
2015 "does not support"
2016 #endif
2017 );
2018
2019 exit(aExitCode);
2020 }
2021
2022 ////////////////////////////////////////////////////////////////////////
2023 // BEGIN Sampler
2024
2025 #if defined(GP_OS_linux) || defined(GP_OS_android)
2026 struct SigHandlerCoordinator;
2027 #endif
2028
2029 // Sampler performs setup and teardown of the state required to sample with the
2030 // profiler. Sampler may exist when ActivePS is not present.
2031 //
2032 // SuspendAndSampleAndResumeThread must only be called from a single thread,
2033 // and must not sample the thread it is being called from. A separate Sampler
2034 // instance must be used for each thread which wants to capture samples.
2035
2036 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
2037 //
2038 // With the exception of SamplerThread, all Sampler objects must be Disable-d
2039 // before releasing the lock which was used to create them. This avoids races
2040 // on linux with the SIGPROF signal handler.
2041
2042 class Sampler {
2043 public:
2044 // Sets up the profiler such that it can begin sampling.
2045 explicit Sampler(PSLockRef aLock);
2046
2047 // Disable the sampler, restoring it to its previous state. This must be
2048 // called once, and only once, before the Sampler is destroyed.
2049 void Disable(PSLockRef aLock);
2050
2051 // This method suspends and resumes the samplee thread. It calls the passed-in
2052 // function-like object aProcessRegs (passing it a populated |const
2053 // Registers&| arg) while the samplee thread is suspended.
2054 //
2055 // Func must be a function-like object of type `void()`.
2056 template <typename Func>
2057 void SuspendAndSampleAndResumeThread(
2058 PSLockRef aLock, const RegisteredThread& aRegisteredThread,
2059 const TimeStamp& aNow, const Func& aProcessRegs);
2060
2061 private:
2062 #if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
2063 // Used to restore the SIGPROF handler when ours is removed.
2064 struct sigaction mOldSigprofHandler;
2065
2066 // This process' ID. Needed as an argument for tgkill in
2067 // SuspendAndSampleAndResumeThread.
2068 int mMyPid;
2069
2070 // The sampler thread's ID. Used to assert that it is not sampling itself,
2071 // which would lead to deadlock.
2072 int mSamplerTid;
2073
2074 public:
2075 // This is the one-and-only variable used to communicate between the sampler
2076 // thread and the samplee thread's signal handler. It's static because the
2077 // samplee thread's signal handler is static.
2078 static struct SigHandlerCoordinator* sSigHandlerCoordinator;
2079 #endif
2080 };
2081
2082 // END Sampler
2083 ////////////////////////////////////////////////////////////////////////
2084
2085 ////////////////////////////////////////////////////////////////////////
2086 // BEGIN SamplerThread
2087
2088 // The sampler thread controls sampling and runs whenever the profiler is
2089 // active. It periodically runs through all registered threads, finds those
2090 // that should be sampled, then pauses and samples them.
2091
2092 class SamplerThread {
2093 public:
2094 // Creates a sampler thread, but doesn't start it.
2095 SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
2096 double aIntervalMilliseconds);
2097 ~SamplerThread();
2098
2099 // This runs on (is!) the sampler thread.
2100 void Run();
2101
2102 // This runs on the main thread.
2103 void Stop(PSLockRef aLock);
2104
2105 private:
2106 // This suspends the calling thread for the given number of microseconds.
2107 // Best effort timing.
2108 void SleepMicro(uint32_t aMicroseconds);
2109
2110 // The sampler used to suspend and sample threads.
2111 Sampler mSampler;
2112
2113 // The activity generation, for detecting when the sampler thread must stop.
2114 const uint32_t mActivityGeneration;
2115
2116 // The interval between samples, measured in microseconds.
2117 const int mIntervalMicroseconds;
2118
2119 // The OS-specific handle for the sampler thread.
2120 #if defined(GP_OS_windows)
2121 HANDLE mThread;
2122 #elif defined(GP_OS_darwin) || defined(GP_OS_linux) || \
2123 defined(GP_OS_android) || defined(GP_OS_freebsd)
2124 pthread_t mThread;
2125 #endif
2126
2127 SamplerThread(const SamplerThread&) = delete;
2128 void operator=(const SamplerThread&) = delete;
2129 };
2130
2131 // This function is required because we need to create a SamplerThread within
2132 // ActivePS's constructor, but SamplerThread is defined after ActivePS. It
2133 // could probably be removed by moving some code around.
NewSamplerThread(PSLockRef aLock,uint32_t aGeneration,double aInterval)2134 static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
2135 double aInterval) {
2136 return new SamplerThread(aLock, aGeneration, aInterval);
2137 }
2138
2139 // This function is the sampler thread. This implementation is used for all
2140 // targets.
Run()2141 void SamplerThread::Run() {
2142 // TODO: If possible, name this thread later on, after NSPR becomes available.
2143 // PR_SetCurrentThreadName("SamplerThread");
2144
2145 // Features won't change during this SamplerThread's lifetime, so we can
2146 // determine now whether stack sampling is required.
2147 const bool noStackSampling = []() {
2148 PSAutoLock lock;
2149 if (!ActivePS::Exists(lock)) {
2150 // If there is no active profiler, it doesn't matter what we return,
2151 // because this thread will exit before any stack sampling is attempted.
2152 return false;
2153 }
2154 return ActivePS::FeatureNoStackSampling(lock);
2155 }();
2156
2157 // Use local BlocksRingBuffer&ProfileBuffer to capture the stack.
2158 // (This is to avoid touching the CorePS::CoreBuffer lock while
2159 // a thread is suspended, because that thread could be working with
2160 // the CorePS::CoreBuffer as well.)
2161 ProfileBufferChunkManagerSingle localChunkManager(scExpectedMaximumStackSize);
2162 ProfileChunkedBuffer localBuffer(
2163 ProfileChunkedBuffer::ThreadSafety::WithoutMutex, localChunkManager);
2164 ProfileBuffer localProfileBuffer(localBuffer);
2165
2166 // Will be kept between collections, to know what each collection does.
2167 auto previousState = localBuffer.GetState();
2168
2169 // This will be positive if we are running behind schedule (sampling less
2170 // frequently than desired) and negative if we are ahead of schedule.
2171 TimeDuration lastSleepOvershoot = 0;
2172 TimeStamp sampleStart = TimeStamp::NowUnfuzzed();
2173
2174 while (true) {
2175 // This scope is for |lock|. It ends before we sleep below.
2176 {
2177 PSAutoLock lock;
2178 TimeStamp lockAcquired = TimeStamp::NowUnfuzzed();
2179
2180 if (!ActivePS::Exists(lock)) {
2181 return;
2182 }
2183
2184 // At this point profiler_stop() might have been called, and
2185 // profiler_start() might have been called on another thread. If this
2186 // happens the generation won't match.
2187 if (ActivePS::Generation(lock) != mActivityGeneration) {
2188 return;
2189 }
2190
2191 ActivePS::ClearExpiredExitProfiles(lock);
2192
2193 TimeStamp expiredMarkersCleaned = TimeStamp::NowUnfuzzed();
2194
2195 if (!ActivePS::IsPaused(lock)) {
2196 TimeDuration delta = sampleStart - CorePS::ProcessStartTime();
2197 ProfileBuffer& buffer = ActivePS::Buffer(lock);
2198
2199 // handle per-process generic counters
2200 const Vector<BaseProfilerCount*>& counters = CorePS::Counters(lock);
2201 for (auto& counter : counters) {
2202 // create Buffer entries for each counter
2203 buffer.AddEntry(ProfileBufferEntry::CounterId(counter));
2204 buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
2205 // XXX support keyed maps of counts
2206 // In the future, we'll support keyed counters - for example, counters
2207 // with a key which is a thread ID. For "simple" counters we'll just
2208 // use a key of 0.
2209 int64_t count;
2210 uint64_t number;
2211 counter->Sample(count, number);
2212 buffer.AddEntry(ProfileBufferEntry::CounterKey(0));
2213 buffer.AddEntry(ProfileBufferEntry::Count(count));
2214 if (number) {
2215 buffer.AddEntry(ProfileBufferEntry::Number(number));
2216 }
2217 }
2218 TimeStamp countersSampled = TimeStamp::NowUnfuzzed();
2219
2220 if (!noStackSampling) {
2221 const Vector<LiveProfiledThreadData>& liveThreads =
2222 ActivePS::LiveProfiledThreads(lock);
2223
2224 for (auto& thread : liveThreads) {
2225 RegisteredThread* registeredThread = thread.mRegisteredThread;
2226 ProfiledThreadData* profiledThreadData =
2227 thread.mProfiledThreadData.get();
2228 RefPtr<ThreadInfo> info = registeredThread->Info();
2229
2230 // If the thread is asleep and has been sampled before in the same
2231 // sleep episode, find and copy the previous sample, as that's
2232 // cheaper than taking a new sample.
2233 if (registeredThread->RacyRegisteredThread()
2234 .CanDuplicateLastSampleDueToSleep()) {
2235 bool dup_ok = ActivePS::Buffer(lock).DuplicateLastSample(
2236 info->ThreadId(), CorePS::ProcessStartTime(),
2237 profiledThreadData->LastSample());
2238 if (dup_ok) {
2239 continue;
2240 }
2241 }
2242
2243 AUTO_PROFILER_STATS(base_SamplerThread_Run_DoPeriodicSample);
2244
2245 TimeStamp now = TimeStamp::NowUnfuzzed();
2246
2247 // Add the thread ID now, so we know its position in the main
2248 // buffer, which is used by some JS data. (DoPeriodicSample only
2249 // knows about the temporary local buffer.)
2250 uint64_t samplePos =
2251 buffer.AddThreadIdEntry(registeredThread->Info()->ThreadId());
2252 profiledThreadData->LastSample() = Some(samplePos);
2253
2254 // Also add the time, so it's always there after the thread ID, as
2255 // expected by the parser. (Other stack data is optional.)
2256 TimeDuration delta = now - CorePS::ProcessStartTime();
2257 buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
2258
2259 mSampler.SuspendAndSampleAndResumeThread(
2260 lock, *registeredThread, now,
2261 [&](const Registers& aRegs, const TimeStamp& aNow) {
2262 DoPeriodicSample(lock, *registeredThread, *profiledThreadData,
2263 aRegs, samplePos, localProfileBuffer);
2264 });
2265
2266 // If data is complete, copy it into the global buffer.
2267 auto state = localBuffer.GetState();
2268 if (state.mClearedBlockCount != previousState.mClearedBlockCount) {
2269 LOG("Stack sample too big for local storage, needed %u bytes",
2270 unsigned(state.mRangeEnd - previousState.mRangeEnd));
2271 } else if (state.mRangeEnd - previousState.mRangeEnd >=
2272 *CorePS::CoreBuffer().BufferLength()) {
2273 LOG("Stack sample too big for profiler storage, needed %u bytes",
2274 unsigned(state.mRangeEnd - previousState.mRangeEnd));
2275 } else {
2276 CorePS::CoreBuffer().AppendContents(localBuffer);
2277 }
2278
2279 // Clean up for the next run.
2280 localBuffer.Clear();
2281 previousState = localBuffer.GetState();
2282 }
2283 }
2284
2285 #if defined(USE_LUL_STACKWALK)
2286 // The LUL unwind object accumulates frame statistics. Periodically we
2287 // should poke it to give it a chance to print those statistics. This
2288 // involves doing I/O (fprintf, __android_log_print, etc.) and so
2289 // can't safely be done from the critical section inside
2290 // SuspendAndSampleAndResumeThread, which is why it is done here.
2291 CorePS::Lul(lock)->MaybeShowStats();
2292 #endif
2293 TimeStamp threadsSampled = TimeStamp::NowUnfuzzed();
2294
2295 {
2296 AUTO_PROFILER_STATS(Sampler_FulfillChunkRequests);
2297 ActivePS::FulfillChunkRequests(lock);
2298 }
2299
2300 buffer.CollectOverheadStats(delta, lockAcquired - sampleStart,
2301 expiredMarkersCleaned - lockAcquired,
2302 countersSampled - expiredMarkersCleaned,
2303 threadsSampled - countersSampled);
2304 }
2305 }
2306 // gPSMutex is not held after this point.
2307
2308 // Calculate how long a sleep to request. After the sleep, measure how
2309 // long we actually slept and take the difference into account when
2310 // calculating the sleep interval for the next iteration. This is an
2311 // attempt to keep "to schedule" in the presence of inaccuracy of the
2312 // actual sleep intervals.
2313 TimeStamp targetSleepEndTime =
2314 sampleStart + TimeDuration::FromMicroseconds(mIntervalMicroseconds);
2315 TimeStamp beforeSleep = TimeStamp::NowUnfuzzed();
2316 TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep;
2317 double sleepTime = std::max(
2318 0.0, (targetSleepDuration - lastSleepOvershoot).ToMicroseconds());
2319 SleepMicro(static_cast<uint32_t>(sleepTime));
2320 sampleStart = TimeStamp::NowUnfuzzed();
2321 lastSleepOvershoot =
2322 sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime));
2323 }
2324 }
2325
2326 // Temporary closing namespaces from enclosing platform.cpp.
2327 } // namespace baseprofiler
2328 } // namespace mozilla
2329
2330 // We #include these files directly because it means those files can use
2331 // declarations from this file trivially. These provide target-specific
2332 // implementations of all SamplerThread methods except Run().
2333 #if defined(GP_OS_windows)
2334 # include "platform-win32.cpp"
2335 #elif defined(GP_OS_darwin)
2336 # include "platform-macos.cpp"
2337 #elif defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
2338 # include "platform-linux-android.cpp"
2339 #else
2340 # error "bad platform"
2341 #endif
2342
2343 namespace mozilla {
2344 namespace baseprofiler {
2345
AllocPlatformData(int aThreadId)2346 UniquePlatformData AllocPlatformData(int aThreadId) {
2347 return UniquePlatformData(new PlatformData(aThreadId));
2348 }
2349
operator ()(PlatformData * aData)2350 void PlatformDataDestructor::operator()(PlatformData* aData) { delete aData; }
2351
2352 // END SamplerThread
2353 ////////////////////////////////////////////////////////////////////////
2354
2355 ////////////////////////////////////////////////////////////////////////
2356 // BEGIN externally visible functions
2357
ParseFeature(const char * aFeature,bool aIsStartup)2358 static uint32_t ParseFeature(const char* aFeature, bool aIsStartup) {
2359 if (strcmp(aFeature, "default") == 0) {
2360 return (aIsStartup ? (DefaultFeatures() | StartupExtraDefaultFeatures())
2361 : DefaultFeatures()) &
2362 AvailableFeatures();
2363 }
2364
2365 #define PARSE_FEATURE_BIT(n_, str_, Name_, desc_) \
2366 if (strcmp(aFeature, str_) == 0) { \
2367 return ProfilerFeature::Name_; \
2368 }
2369
2370 BASE_PROFILER_FOR_EACH_FEATURE(PARSE_FEATURE_BIT)
2371
2372 #undef PARSE_FEATURE_BIT
2373
2374 PrintToConsole("\nUnrecognized feature \"%s\".\n\n", aFeature);
2375 // Since we may have an old feature we don't implement anymore, don't exit
2376 PrintUsageThenExit(0);
2377 return 0;
2378 }
2379
ParseFeaturesFromStringArray(const char ** aFeatures,uint32_t aFeatureCount,bool aIsStartup)2380 uint32_t ParseFeaturesFromStringArray(const char** aFeatures,
2381 uint32_t aFeatureCount,
2382 bool aIsStartup /* = false */) {
2383 uint32_t features = 0;
2384 for (size_t i = 0; i < aFeatureCount; i++) {
2385 features |= ParseFeature(aFeatures[i], aIsStartup);
2386 }
2387 return features;
2388 }
2389
2390 // Find the RegisteredThread for the current thread. This should only be called
2391 // in places where TLSRegisteredThread can't be used.
FindCurrentThreadRegisteredThread(PSLockRef aLock)2392 static RegisteredThread* FindCurrentThreadRegisteredThread(PSLockRef aLock) {
2393 int id = profiler_current_thread_id();
2394 const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
2395 CorePS::RegisteredThreads(aLock);
2396 for (auto& registeredThread : registeredThreads) {
2397 if (registeredThread->Info()->ThreadId() == id) {
2398 return registeredThread.get();
2399 }
2400 }
2401
2402 return nullptr;
2403 }
2404
locked_register_thread(PSLockRef aLock,const char * aName,void * aStackTop)2405 static ProfilingStack* locked_register_thread(PSLockRef aLock,
2406 const char* aName,
2407 void* aStackTop) {
2408 MOZ_RELEASE_ASSERT(CorePS::Exists());
2409
2410 MOZ_RELEASE_ASSERT(!FindCurrentThreadRegisteredThread(aLock));
2411
2412 VTUNE_REGISTER_THREAD(aName);
2413
2414 if (!TLSRegisteredThread::Init(aLock)) {
2415 return nullptr;
2416 }
2417
2418 RefPtr<ThreadInfo> info = new ThreadInfo(aName, profiler_current_thread_id(),
2419 CorePS::IsMainThread());
2420 UniquePtr<RegisteredThread> registeredThread =
2421 MakeUnique<RegisteredThread>(info, aStackTop);
2422
2423 TLSRegisteredThread::SetRegisteredThread(aLock, registeredThread.get());
2424
2425 if (ActivePS::Exists(aLock) && ActivePS::ShouldProfileThread(aLock, info)) {
2426 registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
2427 ActivePS::AddLiveProfiledThread(aLock, registeredThread.get(),
2428 MakeUnique<ProfiledThreadData>(info));
2429 }
2430
2431 ProfilingStack* profilingStack =
2432 ®isteredThread->RacyRegisteredThread().ProfilingStack();
2433
2434 CorePS::AppendRegisteredThread(aLock, std::move(registeredThread));
2435
2436 return profilingStack;
2437 }
2438
2439 static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
2440 double aInterval, uint32_t aFeatures,
2441 const char** aFilters, uint32_t aFilterCount,
2442 const Maybe<double>& aDuration);
2443
SplitAtCommas(const char * aString,UniquePtr<char[]> & aStorage)2444 static Vector<const char*> SplitAtCommas(const char* aString,
2445 UniquePtr<char[]>& aStorage) {
2446 size_t len = strlen(aString);
2447 aStorage = MakeUnique<char[]>(len + 1);
2448 PodCopy(aStorage.get(), aString, len + 1);
2449
2450 // Iterate over all characters in aStorage and split at commas, by
2451 // overwriting commas with the null char.
2452 Vector<const char*> array;
2453 size_t currentElementStart = 0;
2454 for (size_t i = 0; i <= len; i++) {
2455 if (aStorage[i] == ',') {
2456 aStorage[i] = '\0';
2457 }
2458 if (aStorage[i] == '\0') {
2459 MOZ_RELEASE_ASSERT(array.append(&aStorage[currentElementStart]));
2460 currentElementStart = i + 1;
2461 }
2462 }
2463 return array;
2464 }
2465
profiler_init(void * aStackTop)2466 void profiler_init(void* aStackTop) {
2467 LOG("profiler_init");
2468
2469 VTUNE_INIT();
2470
2471 MOZ_RELEASE_ASSERT(!CorePS::Exists());
2472
2473 if (getenv("MOZ_BASE_PROFILER_HELP")) {
2474 PrintUsageThenExit(0); // terminates execution
2475 }
2476
2477 SharedLibraryInfo::Initialize();
2478
2479 uint32_t features = DefaultFeatures() & AvailableFeatures();
2480
2481 UniquePtr<char[]> filterStorage;
2482
2483 Vector<const char*> filters;
2484 MOZ_RELEASE_ASSERT(filters.append(kMainThreadName));
2485
2486 PowerOfTwo32 capacity = BASE_PROFILER_DEFAULT_ENTRIES;
2487 Maybe<double> duration = Nothing();
2488 double interval = BASE_PROFILER_DEFAULT_INTERVAL;
2489
2490 {
2491 PSAutoLock lock;
2492
2493 // We've passed the possible failure point. Instantiate CorePS, which
2494 // indicates that the profiler has initialized successfully.
2495 CorePS::Create(lock);
2496
2497 locked_register_thread(lock, kMainThreadName, aStackTop);
2498
2499 // Platform-specific initialization.
2500 PlatformInit(lock);
2501
2502 // (Linux-only) We could create CorePS::mLul and read unwind info into it
2503 // at this point. That would match the lifetime implied by destruction of
2504 // it in profiler_shutdown() just below. However, that gives a big delay on
2505 // startup, even if no profiling is actually to be done. So, instead, it is
2506 // created on demand at the first call to PlatformStart().
2507
2508 const char* startupEnv = getenv("MOZ_PROFILER_STARTUP");
2509 if (!startupEnv || startupEnv[0] == '\0' ||
2510 ((startupEnv[0] == '0' || startupEnv[0] == 'N' ||
2511 startupEnv[0] == 'n') &&
2512 startupEnv[1] == '\0')) {
2513 return;
2514 }
2515
2516 LOG("- MOZ_PROFILER_STARTUP is set");
2517
2518 // Startup default capacity may be different.
2519 capacity = BASE_PROFILER_DEFAULT_STARTUP_ENTRIES;
2520
2521 const char* startupCapacity = getenv("MOZ_PROFILER_STARTUP_ENTRIES");
2522 if (startupCapacity && startupCapacity[0] != '\0') {
2523 errno = 0;
2524 long capacityLong = strtol(startupCapacity, nullptr, 10);
2525 // `long` could be 32 or 64 bits, so we force a 64-bit comparison with
2526 // the maximum 32-bit signed number (as more than that is clamped down to
2527 // 2^31 anyway).
2528 if (errno == 0 && capacityLong > 0 &&
2529 static_cast<uint64_t>(capacityLong) <=
2530 static_cast<uint64_t>(INT32_MAX)) {
2531 capacity = PowerOfTwo32(ActivePS::ClampToAllowedEntries(
2532 static_cast<uint32_t>(capacityLong)));
2533 LOG("- MOZ_PROFILER_STARTUP_ENTRIES = %u", unsigned(capacity.Value()));
2534 } else {
2535 PrintToConsole("- MOZ_PROFILER_STARTUP_ENTRIES not a valid integer: %s",
2536 startupCapacity);
2537 PrintUsageThenExit(1);
2538 }
2539 }
2540
2541 const char* startupDuration = getenv("MOZ_PROFILER_STARTUP_DURATION");
2542 if (startupDuration && startupDuration[0] != '\0') {
2543 // The duration is a floating point number. Use StringToDouble rather than
2544 // strtod, so that "." is used as the decimal separator regardless of OS
2545 // locale.
2546 auto durationVal = StringToDouble(std::string(startupDuration));
2547 if (durationVal && *durationVal >= 0.0) {
2548 if (*durationVal > 0.0) {
2549 duration = Some(*durationVal);
2550 }
2551 LOG("- MOZ_PROFILER_STARTUP_DURATION = %f", *durationVal);
2552 } else {
2553 PrintToConsole("- MOZ_PROFILER_STARTUP_DURATION not a valid float: %s",
2554 startupDuration);
2555 PrintUsageThenExit(1);
2556 }
2557 }
2558
2559 const char* startupInterval = getenv("MOZ_PROFILER_STARTUP_INTERVAL");
2560 if (startupInterval && startupInterval[0] != '\0') {
2561 // The interval is a floating point number. Use StringToDouble rather than
2562 // strtod, so that "." is used as the decimal separator regardless of OS
2563 // locale.
2564 auto intervalValue = StringToDouble(MakeStringSpan(startupInterval));
2565 if (intervalValue && *intervalValue > 0.0 && *intervalValue <= 1000.0) {
2566 interval = *intervalValue;
2567 LOG("- MOZ_PROFILER_STARTUP_INTERVAL = %f", interval);
2568 } else {
2569 PrintToConsole("- MOZ_PROFILER_STARTUP_INTERVAL not a valid float: %s",
2570 startupInterval);
2571 PrintUsageThenExit(1);
2572 }
2573 }
2574
2575 features |= StartupExtraDefaultFeatures() & AvailableFeatures();
2576
2577 const char* startupFeaturesBitfield =
2578 getenv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD");
2579 if (startupFeaturesBitfield && startupFeaturesBitfield[0] != '\0') {
2580 errno = 0;
2581 features = strtol(startupFeaturesBitfield, nullptr, 10);
2582 if (errno == 0 && features != 0) {
2583 LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD = %d", features);
2584 } else {
2585 PrintToConsole(
2586 "- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD not a valid integer: %s",
2587 startupFeaturesBitfield);
2588 PrintUsageThenExit(1);
2589 }
2590 } else {
2591 const char* startupFeatures = getenv("MOZ_PROFILER_STARTUP_FEATURES");
2592 if (startupFeatures && startupFeatures[0] != '\0') {
2593 // Interpret startupFeatures as a list of feature strings, separated by
2594 // commas.
2595 UniquePtr<char[]> featureStringStorage;
2596 Vector<const char*> featureStringArray =
2597 SplitAtCommas(startupFeatures, featureStringStorage);
2598 features = ParseFeaturesFromStringArray(featureStringArray.begin(),
2599 featureStringArray.length(),
2600 /* aIsStartup */ true);
2601 LOG("- MOZ_PROFILER_STARTUP_FEATURES = %d", features);
2602 }
2603 }
2604
2605 const char* startupFilters = getenv("MOZ_PROFILER_STARTUP_FILTERS");
2606 if (startupFilters && startupFilters[0] != '\0') {
2607 filters = SplitAtCommas(startupFilters, filterStorage);
2608 LOG("- MOZ_PROFILER_STARTUP_FILTERS = %s", startupFilters);
2609 }
2610
2611 locked_profiler_start(lock, capacity, interval, features, filters.begin(),
2612 filters.length(), duration);
2613 }
2614
2615 // TODO: Install memory counter if it is possible from mozglue.
2616 // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
2617 // // start counting memory allocations (outside of lock because this may
2618 // call
2619 // // profiler_add_sampled_counter which would attempt to take the lock.)
2620 // mozilla::profiler::install_memory_counter(true);
2621 // #endif
2622 }
2623
2624 static void locked_profiler_save_profile_to_file(PSLockRef aLock,
2625 const char* aFilename,
2626 bool aIsShuttingDown);
2627
2628 static SamplerThread* locked_profiler_stop(PSLockRef aLock);
2629
profiler_shutdown()2630 void profiler_shutdown() {
2631 LOG("profiler_shutdown");
2632
2633 VTUNE_SHUTDOWN();
2634
2635 MOZ_RELEASE_ASSERT(CorePS::IsMainThread());
2636 MOZ_RELEASE_ASSERT(CorePS::Exists());
2637
2638 // If the profiler is active we must get a handle to the SamplerThread before
2639 // ActivePS is destroyed, in order to delete it.
2640 SamplerThread* samplerThread = nullptr;
2641 {
2642 PSAutoLock lock;
2643
2644 // Save the profile on shutdown if requested.
2645 if (ActivePS::Exists(lock)) {
2646 const char* filename = getenv("MOZ_PROFILER_SHUTDOWN");
2647 if (filename) {
2648 locked_profiler_save_profile_to_file(lock, filename,
2649 /* aIsShuttingDown */ true);
2650 }
2651
2652 samplerThread = locked_profiler_stop(lock);
2653 }
2654
2655 CorePS::Destroy(lock);
2656
2657 // We just destroyed CorePS and the ThreadInfos it contains, so we can
2658 // clear this thread's TLSRegisteredThread.
2659 TLSRegisteredThread::SetRegisteredThread(lock, nullptr);
2660 }
2661
2662 // We do these operations with gPSMutex unlocked. The comments in
2663 // profiler_stop() explain why.
2664 if (samplerThread) {
2665 delete samplerThread;
2666 }
2667 }
2668
WriteProfileToJSONWriter(SpliceableChunkedJSONWriter & aWriter,double aSinceTime,bool aIsShuttingDown,bool aOnlyThreads=false)2669 static bool WriteProfileToJSONWriter(SpliceableChunkedJSONWriter& aWriter,
2670 double aSinceTime, bool aIsShuttingDown,
2671 bool aOnlyThreads = false) {
2672 LOG("WriteProfileToJSONWriter");
2673
2674 MOZ_RELEASE_ASSERT(CorePS::Exists());
2675
2676 if (!aOnlyThreads) {
2677 aWriter.Start();
2678 {
2679 if (!profiler_stream_json_for_this_process(
2680 aWriter, aSinceTime, aIsShuttingDown, aOnlyThreads)) {
2681 return false;
2682 }
2683
2684 // Don't include profiles from other processes because this is a
2685 // synchronous function.
2686 aWriter.StartArrayProperty("processes");
2687 aWriter.EndArray();
2688 }
2689 aWriter.End();
2690 } else {
2691 aWriter.StartBareList();
2692 if (!profiler_stream_json_for_this_process(aWriter, aSinceTime,
2693 aIsShuttingDown, aOnlyThreads)) {
2694 return false;
2695 }
2696 aWriter.EndBareList();
2697 }
2698 return true;
2699 }
2700
profiler_set_process_name(const std::string & aProcessName)2701 void profiler_set_process_name(const std::string& aProcessName) {
2702 LOG("profiler_set_process_name(\"%s\")", aProcessName.c_str());
2703 PSAutoLock lock;
2704 CorePS::SetProcessName(lock, aProcessName);
2705 }
2706
profiler_get_profile(double aSinceTime,bool aIsShuttingDown,bool aOnlyThreads)2707 UniquePtr<char[]> profiler_get_profile(double aSinceTime, bool aIsShuttingDown,
2708 bool aOnlyThreads) {
2709 LOG("profiler_get_profile");
2710
2711 SpliceableChunkedJSONWriter b;
2712 if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown, aOnlyThreads)) {
2713 return nullptr;
2714 }
2715 return b.WriteFunc()->CopyData();
2716 }
2717
profiler_get_profile_json_into_lazily_allocated_buffer(const std::function<char * (size_t)> & aAllocator,double aSinceTime,bool aIsShuttingDown)2718 void profiler_get_profile_json_into_lazily_allocated_buffer(
2719 const std::function<char*(size_t)>& aAllocator, double aSinceTime,
2720 bool aIsShuttingDown) {
2721 LOG("profiler_get_profile_json_into_lazily_allocated_buffer");
2722
2723 SpliceableChunkedJSONWriter b;
2724 if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown)) {
2725 return;
2726 }
2727
2728 b.WriteFunc()->CopyDataIntoLazilyAllocatedBuffer(aAllocator);
2729 }
2730
profiler_get_start_params(int * aCapacity,Maybe<double> * aDuration,double * aInterval,uint32_t * aFeatures,Vector<const char * > * aFilters)2731 void profiler_get_start_params(int* aCapacity, Maybe<double>* aDuration,
2732 double* aInterval, uint32_t* aFeatures,
2733 Vector<const char*>* aFilters) {
2734 MOZ_RELEASE_ASSERT(CorePS::Exists());
2735
2736 if (!aCapacity || !aDuration || !aInterval || !aFeatures || !aFilters) {
2737 return;
2738 }
2739
2740 PSAutoLock lock;
2741
2742 if (!ActivePS::Exists(lock)) {
2743 *aCapacity = 0;
2744 *aDuration = Nothing();
2745 *aInterval = 0;
2746 *aFeatures = 0;
2747 aFilters->clear();
2748 return;
2749 }
2750
2751 *aCapacity = ActivePS::Capacity(lock).Value();
2752 *aDuration = ActivePS::Duration(lock);
2753 *aInterval = ActivePS::Interval(lock);
2754 *aFeatures = ActivePS::Features(lock);
2755
2756 const Vector<std::string>& filters = ActivePS::Filters(lock);
2757 MOZ_ALWAYS_TRUE(aFilters->resize(filters.length()));
2758 for (uint32_t i = 0; i < filters.length(); ++i) {
2759 (*aFilters)[i] = filters[i].c_str();
2760 }
2761 }
2762
GetProfilerEnvVarsForChildProcess(std::function<void (const char * key,const char * value)> && aSetEnv)2763 void GetProfilerEnvVarsForChildProcess(
2764 std::function<void(const char* key, const char* value)>&& aSetEnv) {
2765 MOZ_RELEASE_ASSERT(CorePS::Exists());
2766
2767 PSAutoLock lock;
2768
2769 if (!ActivePS::Exists(lock)) {
2770 aSetEnv("MOZ_PROFILER_STARTUP", "");
2771 return;
2772 }
2773
2774 aSetEnv("MOZ_PROFILER_STARTUP", "1");
2775 auto capacityString =
2776 Smprintf("%u", unsigned(ActivePS::Capacity(lock).Value()));
2777 aSetEnv("MOZ_PROFILER_STARTUP_ENTRIES", capacityString.get());
2778
2779 // Use AppendFloat instead of Smprintf with %f because the decimal
2780 // separator used by %f is locale-dependent. But the string we produce needs
2781 // to be parseable by strtod, which only accepts the period character as a
2782 // decimal separator. AppendFloat always uses the period character.
2783 std::string intervalString = std::to_string(ActivePS::Interval(lock));
2784 aSetEnv("MOZ_PROFILER_STARTUP_INTERVAL", intervalString.c_str());
2785
2786 auto featuresString = Smprintf("%d", ActivePS::Features(lock));
2787 aSetEnv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD", featuresString.get());
2788
2789 std::string filtersString;
2790 const Vector<std::string>& filters = ActivePS::Filters(lock);
2791 for (uint32_t i = 0; i < filters.length(); ++i) {
2792 filtersString += filters[i];
2793 if (i != filters.length() - 1) {
2794 filtersString += ",";
2795 }
2796 }
2797 aSetEnv("MOZ_PROFILER_STARTUP_FILTERS", filtersString.c_str());
2798 }
2799
profiler_received_exit_profile(const std::string & aExitProfile)2800 void profiler_received_exit_profile(const std::string& aExitProfile) {
2801 MOZ_RELEASE_ASSERT(CorePS::Exists());
2802 PSAutoLock lock;
2803 if (!ActivePS::Exists(lock)) {
2804 return;
2805 }
2806 ActivePS::AddExitProfile(lock, aExitProfile);
2807 }
2808
profiler_move_exit_profiles()2809 Vector<std::string> profiler_move_exit_profiles() {
2810 MOZ_RELEASE_ASSERT(CorePS::Exists());
2811 PSAutoLock lock;
2812 Vector<std::string> profiles;
2813 if (ActivePS::Exists(lock)) {
2814 profiles = ActivePS::MoveExitProfiles(lock);
2815 }
2816 return profiles;
2817 }
2818
locked_profiler_save_profile_to_file(PSLockRef aLock,const char * aFilename,bool aIsShuttingDown=false)2819 static void locked_profiler_save_profile_to_file(PSLockRef aLock,
2820 const char* aFilename,
2821 bool aIsShuttingDown = false) {
2822 LOG("locked_profiler_save_profile_to_file(%s)", aFilename);
2823
2824 MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
2825
2826 std::ofstream stream;
2827 stream.open(aFilename);
2828 if (stream.is_open()) {
2829 SpliceableJSONWriter w(MakeUnique<OStreamJSONWriteFunc>(stream));
2830 w.Start();
2831 {
2832 locked_profiler_stream_json_for_this_process(aLock, w, /* sinceTime */ 0,
2833 aIsShuttingDown);
2834
2835 w.StartArrayProperty("processes");
2836 Vector<std::string> exitProfiles = ActivePS::MoveExitProfiles(aLock);
2837 for (auto& exitProfile : exitProfiles) {
2838 if (!exitProfile.empty()) {
2839 w.Splice(exitProfile.c_str());
2840 }
2841 }
2842 w.EndArray();
2843 }
2844 w.End();
2845
2846 stream.close();
2847 }
2848 }
2849
profiler_save_profile_to_file(const char * aFilename)2850 void profiler_save_profile_to_file(const char* aFilename) {
2851 LOG("profiler_save_profile_to_file(%s)", aFilename);
2852
2853 MOZ_RELEASE_ASSERT(CorePS::Exists());
2854
2855 PSAutoLock lock;
2856
2857 if (!ActivePS::Exists(lock)) {
2858 return;
2859 }
2860
2861 locked_profiler_save_profile_to_file(lock, aFilename);
2862 }
2863
profiler_get_available_features()2864 uint32_t profiler_get_available_features() {
2865 MOZ_RELEASE_ASSERT(CorePS::Exists());
2866 return AvailableFeatures();
2867 }
2868
profiler_get_buffer_info()2869 Maybe<ProfilerBufferInfo> profiler_get_buffer_info() {
2870 MOZ_RELEASE_ASSERT(CorePS::Exists());
2871
2872 PSAutoLock lock;
2873
2874 if (!ActivePS::Exists(lock)) {
2875 return Nothing();
2876 }
2877
2878 return Some(ActivePS::Buffer(lock).GetProfilerBufferInfo());
2879 }
2880
2881 // This basically duplicates AutoProfilerLabel's constructor.
MozGlueBaseLabelEnter(const char * aLabel,const char * aDynamicString,void * aSp)2882 static void* MozGlueBaseLabelEnter(const char* aLabel,
2883 const char* aDynamicString, void* aSp) {
2884 ProfilingStack* profilingStack = AutoProfilerLabel::sProfilingStack.get();
2885 if (profilingStack) {
2886 profilingStack->pushLabelFrame(aLabel, aDynamicString, aSp,
2887 ProfilingCategoryPair::OTHER);
2888 }
2889 return profilingStack;
2890 }
2891
2892 // This basically duplicates AutoProfilerLabel's destructor.
MozGlueBaseLabelExit(void * sProfilingStack)2893 static void MozGlueBaseLabelExit(void* sProfilingStack) {
2894 if (sProfilingStack) {
2895 reinterpret_cast<ProfilingStack*>(sProfilingStack)->pop();
2896 }
2897 }
2898
locked_profiler_start(PSLockRef aLock,PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,const Maybe<double> & aDuration)2899 static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
2900 double aInterval, uint32_t aFeatures,
2901 const char** aFilters, uint32_t aFilterCount,
2902 const Maybe<double>& aDuration) {
2903 if (LOG_TEST) {
2904 LOG("locked_profiler_start");
2905 LOG("- capacity = %d", int(aCapacity.Value()));
2906 LOG("- duration = %.2f", aDuration ? *aDuration : -1);
2907 LOG("- interval = %.2f", aInterval);
2908
2909 #define LOG_FEATURE(n_, str_, Name_, desc_) \
2910 if (ProfilerFeature::Has##Name_(aFeatures)) { \
2911 LOG("- feature = %s", str_); \
2912 }
2913
2914 BASE_PROFILER_FOR_EACH_FEATURE(LOG_FEATURE)
2915
2916 #undef LOG_FEATURE
2917
2918 for (uint32_t i = 0; i < aFilterCount; i++) {
2919 LOG("- threads = %s", aFilters[i]);
2920 }
2921 }
2922
2923 MOZ_RELEASE_ASSERT(CorePS::Exists() && !ActivePS::Exists(aLock));
2924
2925 #if defined(GP_PLAT_amd64_windows)
2926 InitializeWin64ProfilerHooks();
2927 #endif
2928
2929 // Fall back to the default values if the passed-in values are unreasonable.
2930 // Less than 8192 entries (65536 bytes) may not be enough for the most complex
2931 // stack, so we should be able to store at least one full stack.
2932 // TODO: Review magic numbers.
2933 PowerOfTwo32 capacity =
2934 (aCapacity.Value() >= 8192u) ? aCapacity : BASE_PROFILER_DEFAULT_ENTRIES;
2935 Maybe<double> duration = aDuration;
2936
2937 if (aDuration && *aDuration <= 0) {
2938 duration = Nothing();
2939 }
2940 double interval = aInterval > 0 ? aInterval : BASE_PROFILER_DEFAULT_INTERVAL;
2941
2942 ActivePS::Create(aLock, capacity, interval, aFeatures, aFilters, aFilterCount,
2943 duration);
2944
2945 // Set up profiling for each registered thread, if appropriate.
2946 const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
2947 CorePS::RegisteredThreads(aLock);
2948 for (auto& registeredThread : registeredThreads) {
2949 RefPtr<ThreadInfo> info = registeredThread->Info();
2950
2951 if (ActivePS::ShouldProfileThread(aLock, info)) {
2952 registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
2953 ActivePS::AddLiveProfiledThread(aLock, registeredThread.get(),
2954 MakeUnique<ProfiledThreadData>(info));
2955 registeredThread->RacyRegisteredThread().ReinitializeOnResume();
2956 }
2957 }
2958
2959 // Setup support for pushing/popping labels in mozglue.
2960 RegisterProfilerLabelEnterExit(MozGlueBaseLabelEnter, MozGlueBaseLabelExit);
2961
2962 // At the very end, set up RacyFeatures.
2963 RacyFeatures::SetActive(ActivePS::Features(aLock));
2964 }
2965
profiler_start(PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,const Maybe<double> & aDuration)2966 void profiler_start(PowerOfTwo32 aCapacity, double aInterval,
2967 uint32_t aFeatures, const char** aFilters,
2968 uint32_t aFilterCount, const Maybe<double>& aDuration) {
2969 LOG("profiler_start");
2970
2971 SamplerThread* samplerThread = nullptr;
2972 {
2973 PSAutoLock lock;
2974
2975 // Initialize if necessary.
2976 if (!CorePS::Exists()) {
2977 profiler_init(nullptr);
2978 }
2979
2980 // Reset the current state if the profiler is running.
2981 if (ActivePS::Exists(lock)) {
2982 samplerThread = locked_profiler_stop(lock);
2983 }
2984
2985 locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
2986 aFilterCount, aDuration);
2987 }
2988
2989 // TODO: Install memory counter if it is possible from mozglue.
2990 // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
2991 // // start counting memory allocations (outside of lock because this may
2992 // call
2993 // // profiler_add_sampled_counter which would attempt to take the lock.)
2994 // mozilla::profiler::install_memory_counter(true);
2995 // #endif
2996
2997 // We do these operations with gPSMutex unlocked. The comments in
2998 // profiler_stop() explain why.
2999 if (samplerThread) {
3000 delete samplerThread;
3001 }
3002 }
3003
profiler_ensure_started(PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,const Maybe<double> & aDuration)3004 void profiler_ensure_started(PowerOfTwo32 aCapacity, double aInterval,
3005 uint32_t aFeatures, const char** aFilters,
3006 uint32_t aFilterCount,
3007 const Maybe<double>& aDuration) {
3008 LOG("profiler_ensure_started");
3009
3010 // bool startedProfiler = false; (See TODO below)
3011 SamplerThread* samplerThread = nullptr;
3012 {
3013 PSAutoLock lock;
3014
3015 // Initialize if necessary.
3016 if (!CorePS::Exists()) {
3017 profiler_init(nullptr);
3018 }
3019
3020 if (ActivePS::Exists(lock)) {
3021 // The profiler is active.
3022 if (!ActivePS::Equals(lock, aCapacity, aDuration, aInterval, aFeatures,
3023 aFilters, aFilterCount)) {
3024 // Stop and restart with different settings.
3025 samplerThread = locked_profiler_stop(lock);
3026 locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
3027 aFilterCount, aDuration);
3028 // startedProfiler = true; (See TODO below)
3029 }
3030 } else {
3031 // The profiler is stopped.
3032 locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
3033 aFilterCount, aDuration);
3034 // startedProfiler = true; (See TODO below)
3035 }
3036 }
3037
3038 // TODO: Install memory counter if it is possible from mozglue.
3039 // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
3040 // // start counting memory allocations (outside of lock because this may
3041 // // call profiler_add_sampled_counter which would attempt to take the
3042 // // lock.)
3043 // mozilla::profiler::install_memory_counter(true);
3044 // #endif
3045
3046 // We do these operations with gPSMutex unlocked. The comments in
3047 // profiler_stop() explain why.
3048 if (samplerThread) {
3049 delete samplerThread;
3050 }
3051 }
3052
locked_profiler_stop(PSLockRef aLock)3053 [[nodiscard]] static SamplerThread* locked_profiler_stop(PSLockRef aLock) {
3054 LOG("locked_profiler_stop");
3055
3056 MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
3057
3058 // At the very start, clear RacyFeatures.
3059 RacyFeatures::SetInactive();
3060
3061 // TODO: Uninstall memory counter if it is possible from mozglue.
3062 // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
3063 // mozilla::profiler::install_memory_counter(false);
3064 // #endif
3065
3066 // Remove support for pushing/popping labels in mozglue.
3067 RegisterProfilerLabelEnterExit(nullptr, nullptr);
3068
3069 // Stop sampling live threads.
3070 const Vector<LiveProfiledThreadData>& liveProfiledThreads =
3071 ActivePS::LiveProfiledThreads(aLock);
3072 for (auto& thread : liveProfiledThreads) {
3073 RegisteredThread* registeredThread = thread.mRegisteredThread;
3074 registeredThread->RacyRegisteredThread().SetIsBeingProfiled(false);
3075 }
3076
3077 // The Stop() call doesn't actually stop Run(); that happens in this
3078 // function's caller when the sampler thread is destroyed. Stop() just gives
3079 // the SamplerThread a chance to do some cleanup with gPSMutex locked.
3080 SamplerThread* samplerThread = ActivePS::Destroy(aLock);
3081 samplerThread->Stop(aLock);
3082
3083 return samplerThread;
3084 }
3085
profiler_stop()3086 void profiler_stop() {
3087 LOG("profiler_stop");
3088
3089 MOZ_RELEASE_ASSERT(CorePS::Exists());
3090
3091 SamplerThread* samplerThread;
3092 {
3093 PSAutoLock lock;
3094
3095 if (!ActivePS::Exists(lock)) {
3096 return;
3097 }
3098
3099 samplerThread = locked_profiler_stop(lock);
3100 }
3101
3102 // We delete with gPSMutex unlocked. Otherwise we would get a deadlock: we
3103 // would be waiting here with gPSMutex locked for SamplerThread::Run() to
3104 // return so the join operation within the destructor can complete, but Run()
3105 // needs to lock gPSMutex to return.
3106 //
3107 // Because this call occurs with gPSMutex unlocked, it -- including the final
3108 // iteration of Run()'s loop -- must be able detect deactivation and return
3109 // in a way that's safe with respect to other gPSMutex-locking operations
3110 // that may have occurred in the meantime.
3111 delete samplerThread;
3112 }
3113
profiler_is_paused()3114 bool profiler_is_paused() {
3115 MOZ_RELEASE_ASSERT(CorePS::Exists());
3116
3117 PSAutoLock lock;
3118
3119 if (!ActivePS::Exists(lock)) {
3120 return false;
3121 }
3122
3123 return ActivePS::IsPaused(lock);
3124 }
3125
profiler_pause()3126 void profiler_pause() {
3127 LOG("profiler_pause");
3128
3129 MOZ_RELEASE_ASSERT(CorePS::Exists());
3130
3131 {
3132 PSAutoLock lock;
3133
3134 if (!ActivePS::Exists(lock)) {
3135 return;
3136 }
3137
3138 RacyFeatures::SetPaused();
3139 ActivePS::SetIsPaused(lock, true);
3140 ActivePS::Buffer(lock).AddEntry(ProfileBufferEntry::Pause(profiler_time()));
3141 }
3142 }
3143
profiler_resume()3144 void profiler_resume() {
3145 LOG("profiler_resume");
3146
3147 MOZ_RELEASE_ASSERT(CorePS::Exists());
3148
3149 {
3150 PSAutoLock lock;
3151
3152 if (!ActivePS::Exists(lock)) {
3153 return;
3154 }
3155
3156 ActivePS::Buffer(lock).AddEntry(
3157 ProfileBufferEntry::Resume(profiler_time()));
3158 ActivePS::SetIsPaused(lock, false);
3159 RacyFeatures::SetUnpaused();
3160 }
3161 }
3162
profiler_feature_active(uint32_t aFeature)3163 bool profiler_feature_active(uint32_t aFeature) {
3164 // This function runs both on and off the main thread.
3165
3166 MOZ_RELEASE_ASSERT(CorePS::Exists());
3167
3168 // This function is hot enough that we use RacyFeatures, not ActivePS.
3169 return RacyFeatures::IsActiveWithFeature(aFeature);
3170 }
3171
profiler_add_sampled_counter(BaseProfilerCount * aCounter)3172 void profiler_add_sampled_counter(BaseProfilerCount* aCounter) {
3173 DEBUG_LOG("profiler_add_sampled_counter(%s)", aCounter->mLabel);
3174 PSAutoLock lock;
3175 CorePS::AppendCounter(lock, aCounter);
3176 }
3177
profiler_remove_sampled_counter(BaseProfilerCount * aCounter)3178 void profiler_remove_sampled_counter(BaseProfilerCount* aCounter) {
3179 DEBUG_LOG("profiler_remove_sampled_counter(%s)", aCounter->mLabel);
3180 PSAutoLock lock;
3181 // Note: we don't enforce a final sample, though we could do so if the
3182 // profiler was active
3183 CorePS::RemoveCounter(lock, aCounter);
3184 }
3185
profiler_register_thread(const char * aName,void * aGuessStackTop)3186 ProfilingStack* profiler_register_thread(const char* aName,
3187 void* aGuessStackTop) {
3188 DEBUG_LOG("profiler_register_thread(%s)", aName);
3189
3190 MOZ_RELEASE_ASSERT(CorePS::Exists());
3191
3192 PSAutoLock lock;
3193
3194 void* stackTop = GetStackTop(aGuessStackTop);
3195 return locked_register_thread(lock, aName, stackTop);
3196 }
3197
profiler_unregister_thread()3198 void profiler_unregister_thread() {
3199 if (!CorePS::Exists()) {
3200 // This function can be called after the main thread has already shut down.
3201 return;
3202 }
3203
3204 PSAutoLock lock;
3205
3206 RegisteredThread* registeredThread = FindCurrentThreadRegisteredThread(lock);
3207 MOZ_RELEASE_ASSERT(registeredThread ==
3208 TLSRegisteredThread::RegisteredThread(lock));
3209 if (registeredThread) {
3210 RefPtr<ThreadInfo> info = registeredThread->Info();
3211
3212 DEBUG_LOG("profiler_unregister_thread: %s", info->Name());
3213
3214 if (ActivePS::Exists(lock)) {
3215 ActivePS::UnregisterThread(lock, registeredThread);
3216 }
3217
3218 // Clear the pointer to the RegisteredThread object that we're about to
3219 // destroy.
3220 TLSRegisteredThread::SetRegisteredThread(lock, nullptr);
3221
3222 // Remove the thread from the list of registered threads. This deletes the
3223 // registeredThread object.
3224 CorePS::RemoveRegisteredThread(lock, registeredThread);
3225 } else {
3226 // There are two ways FindCurrentThreadRegisteredThread() might have failed.
3227 //
3228 // - TLSRegisteredThread::Init() failed in locked_register_thread().
3229 //
3230 // - We've already called profiler_unregister_thread() for this thread.
3231 // (Whether or not it should, this does happen in practice.)
3232 //
3233 // Either way, TLSRegisteredThread should be empty.
3234 MOZ_RELEASE_ASSERT(!TLSRegisteredThread::RegisteredThread(lock));
3235 }
3236 }
3237
profiler_register_page(uint64_t aBrowsingContextID,uint64_t aInnerWindowID,const std::string & aUrl,uint64_t aEmbedderInnerWindowID)3238 void profiler_register_page(uint64_t aBrowsingContextID,
3239 uint64_t aInnerWindowID, const std::string& aUrl,
3240 uint64_t aEmbedderInnerWindowID) {
3241 DEBUG_LOG("profiler_register_page(%" PRIu64 ", %" PRIu64 ", %s, %" PRIu64 ")",
3242 aBrowsingContextID, aInnerWindowID, aUrl.c_str(),
3243 aEmbedderInnerWindowID);
3244
3245 MOZ_RELEASE_ASSERT(CorePS::Exists());
3246
3247 PSAutoLock lock;
3248
3249 // When a Browsing context is first loaded, the first url loaded in it will be
3250 // about:blank. Because of that, this call keeps the first non-about:blank
3251 // registration of window and discards the previous one.
3252 RefPtr<PageInformation> pageInfo = new PageInformation(
3253 aBrowsingContextID, aInnerWindowID, aUrl, aEmbedderInnerWindowID);
3254 CorePS::AppendRegisteredPage(lock, std::move(pageInfo));
3255
3256 // After appending the given page to CorePS, look for the expired
3257 // pages and remove them if there are any.
3258 if (ActivePS::Exists(lock)) {
3259 ActivePS::DiscardExpiredPages(lock);
3260 }
3261 }
3262
profiler_unregister_page(uint64_t aRegisteredInnerWindowID)3263 void profiler_unregister_page(uint64_t aRegisteredInnerWindowID) {
3264 if (!CorePS::Exists()) {
3265 // This function can be called after the main thread has already shut down.
3266 return;
3267 }
3268
3269 PSAutoLock lock;
3270
3271 // During unregistration, if the profiler is active, we have to keep the
3272 // page information since there may be some markers associated with the given
3273 // page. But if profiler is not active. we have no reason to keep the
3274 // page information here because there can't be any marker associated with it.
3275 if (ActivePS::Exists(lock)) {
3276 ActivePS::UnregisterPage(lock, aRegisteredInnerWindowID);
3277 } else {
3278 CorePS::RemoveRegisteredPage(lock, aRegisteredInnerWindowID);
3279 }
3280 }
3281
profiler_clear_all_pages()3282 void profiler_clear_all_pages() {
3283 if (!CorePS::Exists()) {
3284 // This function can be called after the main thread has already shut down.
3285 return;
3286 }
3287
3288 {
3289 PSAutoLock lock;
3290 CorePS::ClearRegisteredPages(lock);
3291 if (ActivePS::Exists(lock)) {
3292 ActivePS::ClearUnregisteredPages(lock);
3293 }
3294 }
3295 }
3296
profiler_thread_sleep()3297 void profiler_thread_sleep() {
3298 // This function runs both on and off the main thread.
3299
3300 MOZ_RELEASE_ASSERT(CorePS::Exists());
3301
3302 RacyRegisteredThread* racyRegisteredThread =
3303 TLSRegisteredThread::RacyRegisteredThread();
3304 if (!racyRegisteredThread) {
3305 return;
3306 }
3307
3308 racyRegisteredThread->SetSleeping();
3309 }
3310
profiler_thread_wake()3311 void profiler_thread_wake() {
3312 // This function runs both on and off the main thread.
3313
3314 MOZ_RELEASE_ASSERT(CorePS::Exists());
3315
3316 RacyRegisteredThread* racyRegisteredThread =
3317 TLSRegisteredThread::RacyRegisteredThread();
3318 if (!racyRegisteredThread) {
3319 return;
3320 }
3321
3322 racyRegisteredThread->SetAwake();
3323 }
3324
IsThreadBeingProfiled()3325 bool detail::IsThreadBeingProfiled() {
3326 MOZ_RELEASE_ASSERT(CorePS::Exists());
3327
3328 const RacyRegisteredThread* racyRegisteredThread =
3329 TLSRegisteredThread::RacyRegisteredThread();
3330 return racyRegisteredThread && racyRegisteredThread->IsBeingProfiled();
3331 }
3332
profiler_thread_is_sleeping()3333 bool profiler_thread_is_sleeping() {
3334 MOZ_RELEASE_ASSERT(CorePS::IsMainThread());
3335 MOZ_RELEASE_ASSERT(CorePS::Exists());
3336
3337 RacyRegisteredThread* racyRegisteredThread =
3338 TLSRegisteredThread::RacyRegisteredThread();
3339 if (!racyRegisteredThread) {
3340 return false;
3341 }
3342 return racyRegisteredThread->IsSleeping();
3343 }
3344
profiler_time()3345 double profiler_time() {
3346 MOZ_RELEASE_ASSERT(CorePS::Exists());
3347
3348 TimeDuration delta = TimeStamp::NowUnfuzzed() - CorePS::ProcessStartTime();
3349 return delta.ToMilliseconds();
3350 }
3351
profiler_get_backtrace()3352 UniqueProfilerBacktrace profiler_get_backtrace() {
3353 MOZ_RELEASE_ASSERT(CorePS::Exists());
3354
3355 PSAutoLock lock;
3356
3357 if (!ActivePS::Exists(lock)) {
3358 return nullptr;
3359 }
3360
3361 RegisteredThread* registeredThread =
3362 TLSRegisteredThread::RegisteredThread(lock);
3363 if (!registeredThread) {
3364 MOZ_ASSERT(registeredThread);
3365 return nullptr;
3366 }
3367
3368 int tid = profiler_current_thread_id();
3369
3370 TimeStamp now = TimeStamp::NowUnfuzzed();
3371
3372 Registers regs;
3373 #if defined(HAVE_NATIVE_UNWIND)
3374 regs.SyncPopulate();
3375 #else
3376 regs.Clear();
3377 #endif
3378
3379 auto bufferManager = MakeUnique<ProfileChunkedBuffer>(
3380 ProfileChunkedBuffer::ThreadSafety::WithoutMutex,
3381 MakeUnique<ProfileBufferChunkManagerSingle>(scExpectedMaximumStackSize));
3382 auto buffer = MakeUnique<ProfileBuffer>(*bufferManager);
3383
3384 DoSyncSample(lock, *registeredThread, now, regs, *buffer.get());
3385
3386 return UniqueProfilerBacktrace(new ProfilerBacktrace(
3387 "SyncProfile", tid, std::move(bufferManager), std::move(buffer)));
3388 }
3389
operator ()(ProfilerBacktrace * aBacktrace)3390 void ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace) {
3391 delete aBacktrace;
3392 }
3393
profiler_is_locked_on_current_thread()3394 bool profiler_is_locked_on_current_thread() {
3395 // This function is used to help users avoid calling `profiler_...` functions
3396 // when the profiler may already have a lock in place, which would prevent a
3397 // 2nd recursive lock (resulting in a crash or a never-ending wait).
3398 // So we must return `true` for any of:
3399 // - The main profiler mutex, used by most functions, and/or
3400 // - The buffer mutex, used directly in some functions without locking the
3401 // main mutex, e.g., marker-related functions.
3402 return PSAutoLock::IsLockedOnCurrentThread() ||
3403 CorePS::CoreBuffer().IsThreadSafeAndLockedOnCurrentThread();
3404 }
3405
racy_profiler_add_marker(const char * aMarkerName,ProfilingCategoryPair aCategoryPair,const ProfilerMarkerPayload * aPayload)3406 static void racy_profiler_add_marker(const char* aMarkerName,
3407 ProfilingCategoryPair aCategoryPair,
3408 const ProfilerMarkerPayload* aPayload) {
3409 MOZ_RELEASE_ASSERT(CorePS::Exists());
3410
3411 // This function is hot enough that we use RacyFeatures, not ActivePS.
3412 if (!profiler_can_accept_markers()) {
3413 return;
3414 }
3415
3416 // Note that it's possible that the above test would change again before we
3417 // actually record the marker. Because of this imprecision it's possible to
3418 // miss a marker or record one we shouldn't. Either way is not a big deal.
3419
3420 RacyRegisteredThread* racyRegisteredThread =
3421 TLSRegisteredThread::RacyRegisteredThread();
3422 if (!racyRegisteredThread || !racyRegisteredThread->IsBeingProfiled()) {
3423 return;
3424 }
3425
3426 TimeStamp origin = (aPayload && !aPayload->GetStartTime().IsNull())
3427 ? aPayload->GetStartTime()
3428 : TimeStamp::NowUnfuzzed();
3429 TimeDuration delta = origin - CorePS::ProcessStartTime();
3430 CorePS::CoreBuffer().PutObjects(
3431 ProfileBufferEntry::Kind::MarkerData, racyRegisteredThread->ThreadId(),
3432 WrapProfileBufferUnownedCString(aMarkerName),
3433 static_cast<uint32_t>(aCategoryPair), aPayload, delta.ToMilliseconds());
3434 }
3435
profiler_add_marker(const char * aMarkerName,ProfilingCategoryPair aCategoryPair,const ProfilerMarkerPayload & aPayload)3436 void profiler_add_marker(const char* aMarkerName,
3437 ProfilingCategoryPair aCategoryPair,
3438 const ProfilerMarkerPayload& aPayload) {
3439 racy_profiler_add_marker(aMarkerName, aCategoryPair, &aPayload);
3440 }
3441
profiler_add_marker(const char * aMarkerName,ProfilingCategoryPair aCategoryPair)3442 void profiler_add_marker(const char* aMarkerName,
3443 ProfilingCategoryPair aCategoryPair) {
3444 racy_profiler_add_marker(aMarkerName, aCategoryPair, nullptr);
3445 }
3446
3447 // This is a simplified version of profiler_add_marker that can be easily passed
3448 // into the JS engine.
profiler_add_js_marker(const char * aMarkerName)3449 void profiler_add_js_marker(const char* aMarkerName) {
3450 AUTO_PROFILER_STATS(base_add_marker);
3451 profiler_add_marker(aMarkerName, ProfilingCategoryPair::JS);
3452 }
3453
3454 // This logic needs to add a marker for a different thread, so we actually need
3455 // to lock here.
profiler_add_marker_for_thread(int aThreadId,ProfilingCategoryPair aCategoryPair,const char * aMarkerName,const ProfilerMarkerPayload & aPayload)3456 void profiler_add_marker_for_thread(int aThreadId,
3457 ProfilingCategoryPair aCategoryPair,
3458 const char* aMarkerName,
3459 const ProfilerMarkerPayload& aPayload) {
3460 MOZ_RELEASE_ASSERT(CorePS::Exists());
3461
3462 if (!profiler_can_accept_markers()) {
3463 return;
3464 }
3465
3466 #ifdef DEBUG
3467 {
3468 PSAutoLock lock;
3469 if (!ActivePS::Exists(lock)) {
3470 return;
3471 }
3472
3473 // Assert that our thread ID makes sense
3474 bool realThread = false;
3475 const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
3476 CorePS::RegisteredThreads(lock);
3477 for (auto& thread : registeredThreads) {
3478 RefPtr<ThreadInfo> info = thread->Info();
3479 if (info->ThreadId() == aThreadId) {
3480 realThread = true;
3481 break;
3482 }
3483 }
3484 MOZ_ASSERT(realThread, "Invalid thread id");
3485 }
3486 #endif
3487
3488 // Insert the marker into the buffer
3489 TimeStamp origin = (!aPayload.GetStartTime().IsNull())
3490 ? aPayload.GetStartTime()
3491 : TimeStamp::NowUnfuzzed();
3492 TimeDuration delta = origin - CorePS::ProcessStartTime();
3493 CorePS::CoreBuffer().PutObjects(
3494 ProfileBufferEntry::Kind::MarkerData, aThreadId,
3495 WrapProfileBufferUnownedCString(aMarkerName),
3496 static_cast<uint32_t>(aCategoryPair), &aPayload, delta.ToMilliseconds());
3497 }
3498
profiler_add_marker_for_mainthread(ProfilingCategoryPair aCategoryPair,const char * aMarkerName,const ProfilerMarkerPayload & aPayload)3499 void profiler_add_marker_for_mainthread(ProfilingCategoryPair aCategoryPair,
3500 const char* aMarkerName,
3501 const ProfilerMarkerPayload& aPayload) {
3502 profiler_add_marker_for_thread(CorePS::MainThreadId(), aCategoryPair,
3503 aMarkerName, aPayload);
3504 }
3505
profiler_tracing_marker(const char * aCategoryString,const char * aMarkerName,ProfilingCategoryPair aCategoryPair,TracingKind aKind,const Maybe<uint64_t> & aInnerWindowID)3506 void profiler_tracing_marker(const char* aCategoryString,
3507 const char* aMarkerName,
3508 ProfilingCategoryPair aCategoryPair,
3509 TracingKind aKind,
3510 const Maybe<uint64_t>& aInnerWindowID) {
3511 MOZ_RELEASE_ASSERT(CorePS::Exists());
3512
3513 VTUNE_TRACING(aMarkerName, aKind);
3514
3515 // This function is hot enough that we use RacyFeatures, notActivePS.
3516 if (!profiler_can_accept_markers()) {
3517 return;
3518 }
3519
3520 AUTO_PROFILER_STATS(base_add_marker_with_TracingMarkerPayload);
3521 profiler_add_marker(
3522 aMarkerName, aCategoryPair,
3523 TracingMarkerPayload(aCategoryString, aKind, aInnerWindowID));
3524 }
3525
profiler_tracing_marker(const char * aCategoryString,const char * aMarkerName,ProfilingCategoryPair aCategoryPair,TracingKind aKind,UniqueProfilerBacktrace aCause,const Maybe<uint64_t> & aInnerWindowID)3526 void profiler_tracing_marker(const char* aCategoryString,
3527 const char* aMarkerName,
3528 ProfilingCategoryPair aCategoryPair,
3529 TracingKind aKind, UniqueProfilerBacktrace aCause,
3530 const Maybe<uint64_t>& aInnerWindowID) {
3531 MOZ_RELEASE_ASSERT(CorePS::Exists());
3532
3533 VTUNE_TRACING(aMarkerName, aKind);
3534
3535 // This function is hot enough that we use RacyFeatures, notActivePS.
3536 if (!profiler_can_accept_markers()) {
3537 return;
3538 }
3539
3540 AUTO_PROFILER_STATS(base_add_marker_with_TracingMarkerPayload);
3541 profiler_add_marker(aMarkerName, aCategoryPair,
3542 TracingMarkerPayload(aCategoryString, aKind,
3543 aInnerWindowID, std::move(aCause)));
3544 }
3545
profiler_add_text_marker(const char * aMarkerName,const std::string & aText,ProfilingCategoryPair aCategoryPair,const TimeStamp & aStartTime,const TimeStamp & aEndTime,const Maybe<uint64_t> & aInnerWindowID,UniqueProfilerBacktrace aCause)3546 void profiler_add_text_marker(const char* aMarkerName, const std::string& aText,
3547 ProfilingCategoryPair aCategoryPair,
3548 const TimeStamp& aStartTime,
3549 const TimeStamp& aEndTime,
3550 const Maybe<uint64_t>& aInnerWindowID,
3551 UniqueProfilerBacktrace aCause) {
3552 AUTO_PROFILER_STATS(base_add_marker_with_TextMarkerPayload);
3553 profiler_add_marker(aMarkerName, aCategoryPair,
3554 TextMarkerPayload(aText, aStartTime, aEndTime,
3555 aInnerWindowID, std::move(aCause)));
3556 }
3557
3558 // NOTE: aCollector's methods will be called while the target thread is paused.
3559 // Doing things in those methods like allocating -- which may try to claim
3560 // locks -- is a surefire way to deadlock.
profiler_suspend_and_sample_thread(int aThreadId,uint32_t aFeatures,ProfilerStackCollector & aCollector,bool aSampleNative)3561 void profiler_suspend_and_sample_thread(int aThreadId, uint32_t aFeatures,
3562 ProfilerStackCollector& aCollector,
3563 bool aSampleNative /* = true */) {
3564 // Lock the profiler mutex
3565 PSAutoLock lock;
3566
3567 const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
3568 CorePS::RegisteredThreads(lock);
3569 for (auto& thread : registeredThreads) {
3570 RefPtr<ThreadInfo> info = thread->Info();
3571 RegisteredThread& registeredThread = *thread.get();
3572
3573 if (info->ThreadId() == aThreadId) {
3574 if (info->IsMainThread()) {
3575 aCollector.SetIsMainThread();
3576 }
3577
3578 // Allocate the space for the native stack
3579 NativeStack nativeStack;
3580
3581 // Suspend, sample, and then resume the target thread.
3582 Sampler sampler(lock);
3583 TimeStamp now = TimeStamp::NowUnfuzzed();
3584 sampler.SuspendAndSampleAndResumeThread(
3585 lock, registeredThread, now,
3586 [&](const Registers& aRegs, const TimeStamp& aNow) {
3587 // The target thread is now suspended. Collect a native
3588 // backtrace, and call the callback.
3589 bool isSynchronous = false;
3590 #if defined(HAVE_FASTINIT_NATIVE_UNWIND)
3591 if (aSampleNative) {
3592 // We can only use FramePointerStackWalk or MozStackWalk from
3593 // suspend_and_sample_thread as other stackwalking methods may not be
3594 // initialized.
3595 # if defined(USE_FRAME_POINTER_STACK_WALK)
3596 DoFramePointerBacktrace(lock, registeredThread, aRegs,
3597 nativeStack);
3598 # elif defined(USE_MOZ_STACK_WALK)
3599 DoMozStackWalkBacktrace(lock, registeredThread, aRegs,
3600 nativeStack);
3601 # else
3602 # error "Invalid configuration"
3603 # endif
3604
3605 MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs,
3606 nativeStack, aCollector);
3607 } else
3608 #endif
3609 {
3610 MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs,
3611 nativeStack, aCollector);
3612
3613 if (ProfilerFeature::HasLeaf(aFeatures)) {
3614 aCollector.CollectNativeLeafAddr((void*)aRegs.mPC);
3615 }
3616 }
3617 });
3618
3619 // NOTE: Make sure to disable the sampler before it is destroyed, in case
3620 // the profiler is running at the same time.
3621 sampler.Disable(lock);
3622 break;
3623 }
3624 }
3625 }
3626
3627 // END externally visible functions
3628 ////////////////////////////////////////////////////////////////////////
3629
3630 } // namespace baseprofiler
3631 } // namespace mozilla
3632