1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 // There are three kinds of samples done by the profiler.
8 //
9 // - A "periodic" sample is the most complex kind. It is done in response to a
10 // timer while the profiler is active. It involves writing a stack trace plus
11 // a variety of other values (memory measurements, responsiveness
12 // measurements, markers, etc.) into the main ProfileBuffer. The sampling is
13 // done from off-thread, and so SuspendAndSampleAndResumeThread() is used to
14 // get the register values.
15 //
16 // - A "synchronous" sample is a simpler kind. It is done in response to an API
17 // call (profiler_get_backtrace()). It involves writing a stack trace and
18 // little else into a temporary ProfileBuffer, and wrapping that up in a
19 // ProfilerBacktrace that can be subsequently used in a marker. The sampling
20 // is done on-thread, and so Registers::SyncPopulate() is used to get the
21 // register values.
22 //
23 // - A "backtrace" sample is the simplest kind. It is done in response to an
24 // API call (profiler_suspend_and_sample_thread()). It involves getting a
25 // stack trace via a ProfilerStackCollector; it does not write to a
26 // ProfileBuffer. The sampling is done from off-thread, and so uses
27 // SuspendAndSampleAndResumeThread() to get the register values.
28
29 #include "platform.h"
30
31 #include "GeckoProfiler.h"
32 #include "GeckoProfilerReporter.h"
33 #include "PageInformation.h"
34 #include "ProfileBuffer.h"
35 #include "ProfiledThreadData.h"
36 #include "ProfilerBacktrace.h"
37 #include "ProfilerChild.h"
38 #include "ProfilerCodeAddressService.h"
39 #include "ProfilerIOInterposeObserver.h"
40 #include "ProfilerParent.h"
41 #include "RegisteredThread.h"
42 #include "shared-libraries.h"
43 #include "ThreadInfo.h"
44 #include "VTuneProfiler.h"
45
46 #include "js/TraceLoggerAPI.h"
47 #include "js/ProfilingFrameIterator.h"
48 #include "memory_hooks.h"
49 #include "mozilla/ArrayUtils.h"
50 #include "mozilla/Atomics.h"
51 #include "mozilla/AutoProfilerLabel.h"
52 #include "mozilla/ExtensionPolicyService.h"
53 #include "mozilla/extensions/WebExtensionPolicy.h"
54 #include "mozilla/net/HttpBaseChannel.h" // for net::TimingStruct
55 #include "mozilla/Printf.h"
56 #include "mozilla/ProfileBufferChunkManagerSingle.h"
57 #include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h"
58 #include "mozilla/ProfileChunkedBuffer.h"
59 #include "mozilla/SchedulerGroup.h"
60 #include "mozilla/Services.h"
61 #include "mozilla/StackWalk.h"
62 #ifdef XP_WIN
63 # include "mozilla/StackWalkThread.h"
64 #endif
65 #include "mozilla/StaticPtr.h"
66 #include "mozilla/ThreadLocal.h"
67 #include "mozilla/TimeStamp.h"
68 #include "mozilla/Tuple.h"
69 #include "mozilla/UniquePtr.h"
70 #include "mozilla/Vector.h"
71 #include "BaseProfiler.h"
72 #include "nsDirectoryServiceDefs.h"
73 #include "nsDirectoryServiceUtils.h"
74 #include "nsIChannelEventSink.h"
75 #include "nsIDocShell.h"
76 #include "nsIHttpProtocolHandler.h"
77 #include "nsIObserverService.h"
78 #include "nsIPropertyBag2.h"
79 #include "nsIXULAppInfo.h"
80 #include "nsIXULRuntime.h"
81 #include "nsJSPrincipals.h"
82 #include "nsMemoryReporterManager.h"
83 #include "nsProfilerStartParams.h"
84 #include "nsScriptSecurityManager.h"
85 #include "nsSystemInfo.h"
86 #include "nsThreadUtils.h"
87 #include "nsXULAppAPI.h"
88 #include "Tracing.h"
89 #include "prdtoa.h"
90 #include "prtime.h"
91
92 #include <algorithm>
93 #include <errno.h>
94 #include <fstream>
95 #include <ostream>
96 #include <set>
97 #include <sstream>
98 #include <type_traits>
99
100 #if defined(GP_OS_android)
101 # include "mozilla/java/GeckoJavaSamplerNatives.h"
102 #endif
103
104 // Win32 builds always have frame pointers, so FramePointerStackWalk() always
105 // works.
106 #if defined(GP_PLAT_x86_windows)
107 # define HAVE_NATIVE_UNWIND
108 # define USE_FRAME_POINTER_STACK_WALK
109 #endif
110
111 // Win64 builds always omit frame pointers, so we use the slower
112 // MozStackWalk(), which works in that case.
113 #if defined(GP_PLAT_amd64_windows)
114 # define HAVE_NATIVE_UNWIND
115 # define USE_MOZ_STACK_WALK
116 #endif
117
118 // AArch64 Win64 doesn't seem to use frame pointers, so we use the slower
119 // MozStackWalk().
120 #if defined(GP_PLAT_arm64_windows)
121 # define HAVE_NATIVE_UNWIND
122 # define USE_MOZ_STACK_WALK
123 #endif
124
125 // Mac builds only have frame pointers when MOZ_PROFILING is specified, so
126 // FramePointerStackWalk() only works in that case. We don't use MozStackWalk()
127 // on Mac.
128 #if defined(GP_OS_darwin) && defined(MOZ_PROFILING)
129 # define HAVE_NATIVE_UNWIND
130 # define USE_FRAME_POINTER_STACK_WALK
131 #endif
132
133 // Android builds use the ARM Exception Handling ABI to unwind.
134 #if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
135 # define HAVE_NATIVE_UNWIND
136 # define USE_EHABI_STACKWALK
137 # include "EHABIStackWalk.h"
138 #endif
139
140 // Linux/BSD builds use LUL, which uses DWARF info to unwind stacks.
141 #if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) || \
142 defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) || \
143 defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) || \
144 defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \
145 defined(GP_PLAT_arm64_freebsd)
146 # define HAVE_NATIVE_UNWIND
147 # define USE_LUL_STACKWALK
148 # include "lul/LulMain.h"
149 # include "lul/platform-linux-lul.h"
150
151 // On linux we use LUL for periodic samples and synchronous samples, but we use
152 // FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled.
153 // (See the comment at the top of the file for a definition of
154 // periodic/synchronous/backtrace.).
155 //
156 // FramePointerStackWalk can produce incomplete stacks when the current entry is
157 // in a shared library without framepointers, however LUL can take a long time
158 // to initialize, which is undesirable for consumers of
159 // profiler_suspend_and_sample_thread like the Background Hang Reporter.
160 # if defined(MOZ_PROFILING)
161 # define USE_FRAME_POINTER_STACK_WALK
162 # endif
163 #endif
164
165 // We can only stackwalk without expensive initialization on platforms which
166 // support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires
167 // initializing LUL, and EHABIStackWalk requires initializing EHABI, both of
168 // which can be expensive.
169 #if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
170 # define HAVE_FASTINIT_NATIVE_UNWIND
171 #endif
172
173 #ifdef MOZ_VALGRIND
174 # include <valgrind/memcheck.h>
175 #else
176 # define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((void)0)
177 #endif
178
179 #if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
180 # include <ucontext.h>
181 #endif
182
183 using namespace mozilla;
184 using mozilla::profiler::detail::RacyFeatures;
185
186 LazyLogModule gProfilerLog("prof");
187
188 // Statically initialized to 0, then set once from profiler_init(), which should
189 // be called from the main thread before any other use of the profiler.
190 int scProfilerMainThreadId;
191
192 #if defined(GP_OS_android)
193 class GeckoJavaSampler
194 : public java::GeckoJavaSampler::Natives<GeckoJavaSampler> {
195 private:
196 GeckoJavaSampler();
197
198 public:
GetProfilerTime()199 static double GetProfilerTime() {
200 if (!profiler_is_active()) {
201 return 0.0;
202 }
203 return profiler_time();
204 };
205 };
206 #endif
207
ValidateFeatures()208 constexpr static bool ValidateFeatures() {
209 int expectedFeatureNumber = 0;
210
211 // Feature numbers should start at 0 and increase by 1 each.
212 #define CHECK_FEATURE(n_, str_, Name_, desc_) \
213 if ((n_) != expectedFeatureNumber) { \
214 return false; \
215 } \
216 ++expectedFeatureNumber;
217
218 PROFILER_FOR_EACH_FEATURE(CHECK_FEATURE)
219
220 #undef CHECK_FEATURE
221
222 return true;
223 }
224
225 static_assert(ValidateFeatures(), "Feature list is invalid");
226
227 // Return all features that are available on this platform.
AvailableFeatures()228 static uint32_t AvailableFeatures() {
229 uint32_t features = 0;
230
231 #define ADD_FEATURE(n_, str_, Name_, desc_) \
232 ProfilerFeature::Set##Name_(features);
233
234 // Add all the possible features.
235 PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
236
237 #undef ADD_FEATURE
238
239 // Now remove features not supported on this platform/configuration.
240 #if !defined(GP_OS_android)
241 ProfilerFeature::ClearJava(features);
242 #endif
243 #if !defined(HAVE_NATIVE_UNWIND)
244 ProfilerFeature::ClearStackWalk(features);
245 #endif
246 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
247 if (getenv("XPCOM_MEM_BLOAT_LOG")) {
248 NS_WARNING("XPCOM_MEM_BLOAT_LOG is set, disabling native allocations.");
249 // The memory hooks are available, but the bloat log is enabled, which is
250 // not compatible with the native allocations tracking. See the comment in
251 // enable_native_allocations() (tools/profiler/core/memory_hooks.cpp) for
252 // more information.
253 ProfilerFeature::ClearNativeAllocations(features);
254 }
255 #else
256 // The memory hooks are not available.
257 ProfilerFeature::ClearNativeAllocations(features);
258 #endif
259 if (!JS::TraceLoggerSupported()) {
260 ProfilerFeature::ClearJSTracer(features);
261 }
262 #if !defined(GP_OS_windows)
263 ProfilerFeature::ClearNoTimerResolutionChange(features);
264 #endif
265
266 return features;
267 }
268
269 // Default features common to all contexts (even if not available).
DefaultFeatures()270 static uint32_t DefaultFeatures() {
271 return ProfilerFeature::Java | ProfilerFeature::JS | ProfilerFeature::Leaf |
272 ProfilerFeature::StackWalk | ProfilerFeature::Threads |
273 ProfilerFeature::CPUUtilization | ProfilerFeature::Screenshots;
274 }
275
276 // Extra default features when MOZ_PROFILER_STARTUP is set (even if not
277 // available).
StartupExtraDefaultFeatures()278 static uint32_t StartupExtraDefaultFeatures() {
279 // Enable file I/Os by default for startup profiles as startup is heavy on
280 // I/O operations.
281 return ProfilerFeature::FileIOAll;
282 }
283
284 // The class is a thin shell around mozglue PlatformMutex. It does not preserve
285 // behavior in JS record/replay. It provides a mechanism to determine if it is
286 // locked or not in order for memory hooks to avoid re-entering the profiler
287 // locked state.
288 class PSMutex : private ::mozilla::detail::MutexImpl {
289 public:
PSMutex()290 PSMutex() : ::mozilla::detail::MutexImpl() {}
291
Lock()292 void Lock() {
293 const int tid = profiler_current_thread_id();
294 MOZ_ASSERT(tid != 0);
295
296 // This is only designed to catch recursive locking:
297 // - If the current thread doesn't own the mutex, `mOwningThreadId` must be
298 // zero or a different thread id written by another thread; it may change
299 // again at any time, but never to the current thread's id.
300 // - If the current thread owns the mutex, `mOwningThreadId` must be its id.
301 MOZ_ASSERT(mOwningThreadId != tid);
302
303 ::mozilla::detail::MutexImpl::lock();
304
305 // We now hold the mutex, it should have been in the unlocked state before.
306 MOZ_ASSERT(mOwningThreadId == 0);
307 // And we can write our own thread id.
308 mOwningThreadId = tid;
309 }
310
TryLock()311 [[nodiscard]] bool TryLock() {
312 const int tid = profiler_current_thread_id();
313 MOZ_ASSERT(tid != 0);
314
315 // This is only designed to catch recursive locking:
316 // - If the current thread doesn't own the mutex, `mOwningThreadId` must be
317 // zero or a different thread id written by another thread; it may change
318 // again at any time, but never to the current thread's id.
319 // - If the current thread owns the mutex, `mOwningThreadId` must be its id.
320 MOZ_ASSERT(mOwningThreadId != tid);
321
322 if (!::mozilla::detail::MutexImpl::tryLock()) {
323 // Failed to lock, nothing more to do.
324 return false;
325 }
326
327 // We now hold the mutex, it should have been in the unlocked state before.
328 MOZ_ASSERT(mOwningThreadId == 0);
329 // And we can write our own thread id.
330 mOwningThreadId = tid;
331
332 return true;
333 }
334
Unlock()335 void Unlock() {
336 // This should never trigger! But check just in case something has gone
337 // very wrong (e.g., memory corruption).
338 AssertCurrentThreadOwns();
339
340 // We're still holding the mutex here, so it's safe to just reset
341 // `mOwningThreadId`.
342 mOwningThreadId = 0;
343
344 ::mozilla::detail::MutexImpl::unlock();
345 }
346
347 // Does the current thread own this mutex?
348 // False positive or false negatives are not possible:
349 // - If `true`, the current thread owns the mutex, it has written its own
350 // `mOwningThreadId` when taking the lock, and no-one else can modify it
351 // until the current thread itself unlocks the mutex.
352 // - If `false`, the current thread does not own the mutex, therefore either
353 // `mOwningThreadId` is zero (unlocked), or it is a different thread id
354 // written by another thread, but it can never be the current thread's id
355 // until the current thread itself locks the mutex.
IsLockedOnCurrentThread() const356 bool IsLockedOnCurrentThread() const {
357 return mOwningThreadId == profiler_current_thread_id();
358 }
359
AssertCurrentThreadOwns() const360 void AssertCurrentThreadOwns() const {
361 MOZ_ASSERT(IsLockedOnCurrentThread());
362 }
363
AssertCurrentThreadDoesNotOwn() const364 void AssertCurrentThreadDoesNotOwn() const {
365 MOZ_ASSERT(!IsLockedOnCurrentThread());
366 }
367
368 private:
369 // Zero when unlocked, or the thread id of the owning thread.
370 // This should only be used to compare with the current thread id; any other
371 // number (0 or other id) could change at any time because the current thread
372 // wouldn't own the lock.
373 Atomic<int, MemoryOrdering::SequentiallyConsistent> mOwningThreadId{0};
374 };
375
376 // RAII class to lock the profiler mutex.
377 class MOZ_RAII PSAutoLock {
378 public:
PSAutoLock(PSMutex & aMutex)379 explicit PSAutoLock(PSMutex& aMutex) : mMutex(aMutex) { mMutex.Lock(); }
~PSAutoLock()380 ~PSAutoLock() { mMutex.Unlock(); }
381
382 private:
383 // Allow PSAutoTryLock to call the following `PSAutoLock(PSMutex&, int)`
384 // constructor through `Maybe<const PSAutoLock>::emplace()`.
385 friend class Maybe<const PSAutoLock>;
386
387 // Special constructor taking an already-locked mutex. The `int` parameter is
388 // necessary to distinguish it from the main constructor.
PSAutoLock(PSMutex & aAlreadyLockedMutex,int)389 PSAutoLock(PSMutex& aAlreadyLockedMutex, int) : mMutex(aAlreadyLockedMutex) {
390 mMutex.AssertCurrentThreadOwns();
391 }
392
393 PSMutex& mMutex;
394 };
395
396 // RAII class that attempts to lock the profiler mutex. Example usage:
397 // PSAutoTryLock tryLock(gPSMutex);
398 // if (tryLock.IsLocked()) { locked_foo(tryLock.LockRef()); }
399 class MOZ_RAII PSAutoTryLock {
400 public:
PSAutoTryLock(PSMutex & aMutex)401 explicit PSAutoTryLock(PSMutex& aMutex) {
402 if (aMutex.TryLock()) {
403 mMaybePSAutoLock.emplace(aMutex, 0);
404 }
405 }
406
407 // Return true if the mutex was aquired and locked.
IsLocked() const408 [[nodiscard]] bool IsLocked() const { return mMaybePSAutoLock.isSome(); }
409
410 // Assuming the mutex is locked, return a reference to a `PSAutoLock` for that
411 // mutex, which can be passed as proof-of-lock.
LockRef() const412 [[nodiscard]] const PSAutoLock& LockRef() const {
413 MOZ_ASSERT(IsLocked());
414 return mMaybePSAutoLock.ref();
415 }
416
417 private:
418 // `mMaybePSAutoLock` is `Nothing` if locking failed, otherwise it contains a
419 // `const PSAutoLock` holding the locked mutex, and whose reference may be
420 // passed to functions expecting a proof-of-lock.
421 Maybe<const PSAutoLock> mMaybePSAutoLock;
422 };
423
424 // Only functions that take a PSLockRef arg can access CorePS's and ActivePS's
425 // fields.
426 typedef const PSAutoLock& PSLockRef;
427
428 #define PS_GET(type_, name_) \
429 static type_ name_(PSLockRef) { \
430 MOZ_ASSERT(sInstance); \
431 return sInstance->m##name_; \
432 }
433
434 #define PS_GET_LOCKLESS(type_, name_) \
435 static type_ name_() { \
436 MOZ_ASSERT(sInstance); \
437 return sInstance->m##name_; \
438 }
439
440 #define PS_GET_AND_SET(type_, name_) \
441 PS_GET(type_, name_) \
442 static void Set##name_(PSLockRef, type_ a##name_) { \
443 MOZ_ASSERT(sInstance); \
444 sInstance->m##name_ = a##name_; \
445 }
446
447 static const size_t MAX_JS_FRAMES = 1024;
448 using JsFrameBuffer = JS::ProfilingFrameIterator::Frame[MAX_JS_FRAMES];
449
450 // All functions in this file can run on multiple threads unless they have an
451 // NS_IsMainThread() assertion.
452
453 // This class contains the profiler's core global state, i.e. that which is
454 // valid even when the profiler is not active. Most profile operations can't do
455 // anything useful when this class is not instantiated, so we release-assert
456 // its non-nullness in all such operations.
457 //
458 // Accesses to CorePS are guarded by gPSMutex. Getters and setters take a
459 // PSAutoLock reference as an argument as proof that the gPSMutex is currently
460 // locked. This makes it clear when gPSMutex is locked and helps avoid
461 // accidental unlocked accesses to global state. There are ways to circumvent
462 // this mechanism, but please don't do so without *very* good reason and a
463 // detailed explanation.
464 //
465 // The exceptions to this rule:
466 //
467 // - mProcessStartTime, because it's immutable;
468 //
469 // - each thread's RacyRegisteredThread object is accessible without locking via
470 // TLSRegisteredThread::RacyRegisteredThread().
471 class CorePS {
472 private:
CorePS()473 CorePS()
474 : mProcessStartTime(TimeStamp::ProcessCreation()),
475 // This needs its own mutex, because it is used concurrently from
476 // functions guarded by gPSMutex as well as others without safety (e.g.,
477 // profiler_add_marker). It is *not* used inside the critical section of
478 // the sampler, because mutexes cannot be used there.
479 mCoreBuffer(ProfileChunkedBuffer::ThreadSafety::WithMutex)
480 #ifdef USE_LUL_STACKWALK
481 ,
482 mLul(nullptr)
483 #endif
484 {
485 MOZ_ASSERT(NS_IsMainThread(),
486 "CorePS must be created from the main thread");
487 }
488
~CorePS()489 ~CorePS() {}
490
491 public:
Create(PSLockRef aLock)492 static void Create(PSLockRef aLock) {
493 MOZ_ASSERT(!sInstance);
494 sInstance = new CorePS();
495 }
496
Destroy(PSLockRef aLock)497 static void Destroy(PSLockRef aLock) {
498 MOZ_ASSERT(sInstance);
499 delete sInstance;
500 sInstance = nullptr;
501 }
502
503 // Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex
504 // being locked. This is because CorePS is instantiated so early on the main
505 // thread that we don't have to worry about it being racy.
Exists()506 static bool Exists() { return !!sInstance; }
507
AddSizeOf(PSLockRef,MallocSizeOf aMallocSizeOf,size_t & aProfSize,size_t & aLulSize)508 static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf,
509 size_t& aProfSize, size_t& aLulSize) {
510 MOZ_ASSERT(sInstance);
511
512 aProfSize += aMallocSizeOf(sInstance);
513
514 for (auto& registeredThread : sInstance->mRegisteredThreads) {
515 aProfSize += registeredThread->SizeOfIncludingThis(aMallocSizeOf);
516 }
517
518 for (auto& registeredPage : sInstance->mRegisteredPages) {
519 aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
520 }
521
522 // Measurement of the following things may be added later if DMD finds it
523 // is worthwhile:
524 // - CorePS::mRegisteredThreads itself (its elements' children are
525 // measured above)
526 // - CorePS::mRegisteredPages itself (its elements' children are
527 // measured above)
528 // - CorePS::mInterposeObserver
529
530 #if defined(USE_LUL_STACKWALK)
531 if (sInstance->mLul) {
532 aLulSize += sInstance->mLul->SizeOfIncludingThis(aMallocSizeOf);
533 }
534 #endif
535 }
536
537 // No PSLockRef is needed for this field because it's immutable.
PS_GET_LOCKLESS(TimeStamp,ProcessStartTime)538 PS_GET_LOCKLESS(TimeStamp, ProcessStartTime)
539
540 // No PSLockRef is needed for this field because it's thread-safe.
541 PS_GET_LOCKLESS(ProfileChunkedBuffer&, CoreBuffer)
542
543 PS_GET(const Vector<UniquePtr<RegisteredThread>>&, RegisteredThreads)
544
545 PS_GET(JsFrameBuffer&, JsFrames)
546
547 static void AppendRegisteredThread(
548 PSLockRef, UniquePtr<RegisteredThread>&& aRegisteredThread) {
549 MOZ_ASSERT(sInstance);
550 MOZ_RELEASE_ASSERT(
551 sInstance->mRegisteredThreads.append(std::move(aRegisteredThread)));
552 }
553
RemoveRegisteredThread(PSLockRef,RegisteredThread * aRegisteredThread)554 static void RemoveRegisteredThread(PSLockRef,
555 RegisteredThread* aRegisteredThread) {
556 MOZ_ASSERT(sInstance);
557 // Remove aRegisteredThread from mRegisteredThreads.
558 for (UniquePtr<RegisteredThread>& rt : sInstance->mRegisteredThreads) {
559 if (rt.get() == aRegisteredThread) {
560 sInstance->mRegisteredThreads.erase(&rt);
561 return;
562 }
563 }
564 }
565
PS_GET(Vector<RefPtr<PageInformation>> &,RegisteredPages)566 PS_GET(Vector<RefPtr<PageInformation>>&, RegisteredPages)
567
568 static void AppendRegisteredPage(PSLockRef,
569 RefPtr<PageInformation>&& aRegisteredPage) {
570 MOZ_ASSERT(sInstance);
571 struct RegisteredPageComparator {
572 PageInformation* aA;
573 bool operator()(PageInformation* aB) const { return aA->Equals(aB); }
574 };
575
576 auto foundPageIter = std::find_if(
577 sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(),
578 RegisteredPageComparator{aRegisteredPage.get()});
579
580 if (foundPageIter != sInstance->mRegisteredPages.end()) {
581 if ((*foundPageIter)->Url().EqualsLiteral("about:blank")) {
582 // When a BrowsingContext is loaded, the first url loaded in it will be
583 // about:blank, and if the principal matches, the first document loaded
584 // in it will share an inner window. That's why we should delete the
585 // intermittent about:blank if they share the inner window.
586 sInstance->mRegisteredPages.erase(foundPageIter);
587 } else {
588 // Do not register the same page again.
589 return;
590 }
591 }
592
593 MOZ_RELEASE_ASSERT(
594 sInstance->mRegisteredPages.append(std::move(aRegisteredPage)));
595 }
596
RemoveRegisteredPage(PSLockRef,uint64_t aRegisteredInnerWindowID)597 static void RemoveRegisteredPage(PSLockRef,
598 uint64_t aRegisteredInnerWindowID) {
599 MOZ_ASSERT(sInstance);
600 // Remove RegisteredPage from mRegisteredPages by given inner window ID.
601 sInstance->mRegisteredPages.eraseIf([&](const RefPtr<PageInformation>& rd) {
602 return rd->InnerWindowID() == aRegisteredInnerWindowID;
603 });
604 }
605
ClearRegisteredPages(PSLockRef)606 static void ClearRegisteredPages(PSLockRef) {
607 MOZ_ASSERT(sInstance);
608 sInstance->mRegisteredPages.clear();
609 }
610
PS_GET(const Vector<BaseProfilerCount * > &,Counters)611 PS_GET(const Vector<BaseProfilerCount*>&, Counters)
612
613 static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) {
614 MOZ_ASSERT(sInstance);
615 // we don't own the counter; they may be stored in static objects
616 MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter));
617 }
618
RemoveCounter(PSLockRef,BaseProfilerCount * aCounter)619 static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) {
620 // we may be called to remove a counter after the profiler is stopped or
621 // late in shutdown.
622 if (sInstance) {
623 auto* counter = std::find(sInstance->mCounters.begin(),
624 sInstance->mCounters.end(), aCounter);
625 MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end());
626 sInstance->mCounters.erase(counter);
627 }
628 }
629
630 #ifdef USE_LUL_STACKWALK
Lul(PSLockRef)631 static lul::LUL* Lul(PSLockRef) {
632 MOZ_ASSERT(sInstance);
633 return sInstance->mLul.get();
634 }
SetLul(PSLockRef,UniquePtr<lul::LUL> aLul)635 static void SetLul(PSLockRef, UniquePtr<lul::LUL> aLul) {
636 MOZ_ASSERT(sInstance);
637 sInstance->mLul = std::move(aLul);
638 }
639 #endif
640
641 PS_GET_AND_SET(const nsACString&, ProcessName)
642 PS_GET_AND_SET(const nsACString&, ETLDplus1)
643
644 private:
645 // The singleton instance
646 static CorePS* sInstance;
647
648 // The time that the process started.
649 const TimeStamp mProcessStartTime;
650
651 // The thread-safe blocks-oriented buffer into which all profiling data is
652 // recorded.
653 // ActivePS controls the lifetime of the underlying contents buffer: When
654 // ActivePS does not exist, mCoreBuffer is empty and rejects all reads&writes;
655 // see ActivePS for further details.
656 // Note: This needs to live here outside of ActivePS, because some producers
657 // are indirectly controlled (e.g., by atomic flags) and therefore may still
658 // attempt to write some data shortly after ActivePS has shutdown and deleted
659 // the underlying buffer in memory.
660 ProfileChunkedBuffer mCoreBuffer;
661
662 // Info on all the registered threads.
663 // ThreadIds in mRegisteredThreads are unique.
664 Vector<UniquePtr<RegisteredThread>> mRegisteredThreads;
665
666 // Info on all the registered pages.
667 // InnerWindowIDs in mRegisteredPages are unique.
668 Vector<RefPtr<PageInformation>> mRegisteredPages;
669
670 // Non-owning pointers to all active counters
671 Vector<BaseProfilerCount*> mCounters;
672
673 #ifdef USE_LUL_STACKWALK
674 // LUL's state. Null prior to the first activation, non-null thereafter.
675 UniquePtr<lul::LUL> mLul;
676 #endif
677
678 // Process name, provided by child process initialization code.
679 nsAutoCString mProcessName;
680 // Private name, provided by child process initialization code (eTLD+1 in
681 // fission)
682 nsAutoCString mETLDplus1;
683
684 // This memory buffer is used by the MergeStacks mechanism. Previously it was
685 // stack allocated, but this led to a stack overflow, as it was too much
686 // memory. Here the buffer can be pre-allocated, and shared with the
687 // MergeStacks feature as needed. MergeStacks is only run while holding the
688 // lock, so it is safe to have only one instance allocated for all of the
689 // threads.
690 JsFrameBuffer mJsFrames;
691 };
692
693 CorePS* CorePS::sInstance = nullptr;
694
profiler_get_core_buffer()695 ProfileChunkedBuffer& profiler_get_core_buffer() {
696 MOZ_ASSERT(CorePS::Exists());
697 return CorePS::CoreBuffer();
698 }
699
700 class SamplerThread;
701
702 static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
703 double aInterval, bool aStackWalkEnabled,
704 bool aNoTimerResolutionChange);
705
706 struct LiveProfiledThreadData {
707 RegisteredThread* mRegisteredThread;
708 UniquePtr<ProfiledThreadData> mProfiledThreadData;
709 };
710
711 // The buffer size is provided as a number of "entries", this is their size in
712 // bytes.
713 constexpr static uint32_t scBytesPerEntry = 8;
714
715 // This class contains the profiler's global state that is valid only when the
716 // profiler is active. When not instantiated, the profiler is inactive.
717 //
718 // Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as
719 // CorePS.
720 //
721 class ActivePS {
722 private:
723 // We need to decide how many chunks of what size we want to fit in the given
724 // total maximum capacity for this process, in the (likely) context of
725 // multiple processes doing the same choice and having an inter-process
726 // mechanism to control the overal memory limit.
727
728 // Minimum chunk size allowed, enough for at least one stack.
729 constexpr static uint32_t scMinimumChunkSize =
730 2 * ProfileBufferChunkManager::scExpectedMaximumStackSize;
731
732 // Ideally we want at least 2 unreleased chunks to work with (1 current and 1
733 // next), and 2 released chunks (so that one can be recycled when old, leaving
734 // one with some data).
735 constexpr static uint32_t scMinimumNumberOfChunks = 4;
736
737 // And we want to limit chunks to a maximum size, which is a compromise
738 // between:
739 // - A big size, which helps with reducing the rate of allocations and IPCs.
740 // - A small size, which helps with equalizing the duration of recorded data
741 // (as the inter-process controller will discard the oldest chunks in all
742 // Firefox processes).
743 constexpr static uint32_t scMaximumChunkSize = 1024 * 1024;
744
745 public:
746 // We should be able to store at least the minimum number of the smallest-
747 // possible chunks.
748 constexpr static uint32_t scMinimumBufferSize =
749 scMinimumNumberOfChunks * scMinimumChunkSize;
750 // Note: Keep in sync with GeckoThread.maybeStartGeckoProfiler:
751 // https://searchfox.org/mozilla-central/source/mobile/android/geckoview/src/main/java/org/mozilla/gecko/GeckoThread.java
752 constexpr static uint32_t scMinimumBufferEntries =
753 scMinimumBufferSize / scBytesPerEntry;
754
755 // Limit to 2GiB.
756 constexpr static uint32_t scMaximumBufferSize = 2u * 1024u * 1024u * 1024u;
757 constexpr static uint32_t scMaximumBufferEntries =
758 scMaximumBufferSize / scBytesPerEntry;
759
ClampToAllowedEntries(uint32_t aEntries)760 constexpr static uint32_t ClampToAllowedEntries(uint32_t aEntries) {
761 if (aEntries <= scMinimumBufferEntries) {
762 return scMinimumBufferEntries;
763 }
764 if (aEntries >= scMaximumBufferEntries) {
765 return scMaximumBufferEntries;
766 }
767 return aEntries;
768 }
769
770 private:
ChunkSizeForEntries(uint32_t aEntries)771 constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) {
772 return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) *
773 scBytesPerEntry / scMinimumNumberOfChunks,
774 size_t(scMaximumChunkSize)));
775 }
776
AdjustFeatures(uint32_t aFeatures,uint32_t aFilterCount)777 static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) {
778 // Filter out any features unavailable in this platform/configuration.
779 aFeatures &= AvailableFeatures();
780
781 // Always enable ProfilerFeature::Threads if we have a filter, because
782 // users sometimes ask to filter by a list of threads but forget to
783 // explicitly specify ProfilerFeature::Threads.
784 if (aFilterCount > 0) {
785 aFeatures |= ProfilerFeature::Threads;
786 }
787
788 // Some features imply others.
789 if (aFeatures & ProfilerFeature::FileIOAll) {
790 aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO;
791 } else if (aFeatures & ProfilerFeature::FileIO) {
792 aFeatures |= ProfilerFeature::MainThreadIO;
793 }
794
795 return aFeatures;
796 }
797
ActivePS(PSLockRef aLock,PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,uint64_t aActiveTabID,const Maybe<double> & aDuration)798 ActivePS(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
799 uint32_t aFeatures, const char** aFilters, uint32_t aFilterCount,
800 uint64_t aActiveTabID, const Maybe<double>& aDuration)
801 : mGeneration(sNextGeneration++),
802 mCapacity(aCapacity),
803 mDuration(aDuration),
804 mInterval(aInterval),
805 mFeatures(AdjustFeatures(aFeatures, aFilterCount)),
806 mActiveTabID(aActiveTabID),
807 mProfileBufferChunkManager(
808 size_t(ClampToAllowedEntries(aCapacity.Value())) * scBytesPerEntry,
809 ChunkSizeForEntries(aCapacity.Value())),
810 mProfileBuffer([this]() -> ProfileChunkedBuffer& {
811 CorePS::CoreBuffer().SetChunkManager(mProfileBufferChunkManager);
812 return CorePS::CoreBuffer();
813 }()),
814 // The new sampler thread doesn't start sampling immediately because the
815 // main loop within Run() is blocked until this function's caller
816 // unlocks gPSMutex.
817 mSamplerThread(NewSamplerThread(
818 aLock, mGeneration, aInterval,
819 ProfilerFeature::HasStackWalk(aFeatures),
820 ProfilerFeature::HasNoTimerResolutionChange(aFeatures))),
821 mInterposeObserver((ProfilerFeature::HasMainThreadIO(aFeatures) ||
822 ProfilerFeature::HasFileIO(aFeatures) ||
823 ProfilerFeature::HasFileIOAll(aFeatures))
824 ? new ProfilerIOInterposeObserver()
825 : nullptr),
826 mIsPaused(false),
827 mIsSamplingPaused(false)
828 #if defined(GP_OS_linux) || defined(GP_OS_freebsd)
829 ,
830 mWasSamplingPaused(false)
831 #endif
832 {
833 // Deep copy aFilters.
834 MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
835 for (uint32_t i = 0; i < aFilterCount; ++i) {
836 mFilters[i] = aFilters[i];
837 }
838
839 #if !defined(RELEASE_OR_BETA)
840 if (mInterposeObserver) {
841 // We need to register the observer on the main thread, because we want
842 // to observe IO that happens on the main thread.
843 // IOInterposer needs to be initialized before calling
844 // IOInterposer::Register or our observer will be silently dropped.
845 if (NS_IsMainThread()) {
846 IOInterposer::Init();
847 IOInterposer::Register(IOInterposeObserver::OpAll, mInterposeObserver);
848 } else {
849 RefPtr<ProfilerIOInterposeObserver> observer = mInterposeObserver;
850 NS_DispatchToMainThread(
__anon2f016a4b0302() 851 NS_NewRunnableFunction("ActivePS::ActivePS", [=]() {
852 IOInterposer::Init();
853 IOInterposer::Register(IOInterposeObserver::OpAll, observer);
854 }));
855 }
856 }
857 #endif
858 }
859
~ActivePS()860 ~ActivePS() {
861 #if !defined(RELEASE_OR_BETA)
862 if (mInterposeObserver) {
863 // We need to unregister the observer on the main thread, because that's
864 // where we've registered it.
865 if (NS_IsMainThread()) {
866 IOInterposer::Unregister(IOInterposeObserver::OpAll,
867 mInterposeObserver);
868 } else {
869 RefPtr<ProfilerIOInterposeObserver> observer = mInterposeObserver;
870 NS_DispatchToMainThread(
871 NS_NewRunnableFunction("ActivePS::~ActivePS", [=]() {
872 IOInterposer::Unregister(IOInterposeObserver::OpAll, observer);
873 }));
874 }
875 }
876 #endif
877 CorePS::CoreBuffer().ResetChunkManager();
878 }
879
ThreadSelected(const char * aThreadName)880 bool ThreadSelected(const char* aThreadName) {
881 if (mFilters.empty()) {
882 return true;
883 }
884
885 std::string name = aThreadName;
886 std::transform(name.begin(), name.end(), name.begin(), ::tolower);
887
888 for (uint32_t i = 0; i < mFilters.length(); ++i) {
889 std::string filter = mFilters[i];
890
891 if (filter == "*") {
892 return true;
893 }
894
895 std::transform(filter.begin(), filter.end(), filter.begin(), ::tolower);
896
897 // Crude, non UTF-8 compatible, case insensitive substring search
898 if (name.find(filter) != std::string::npos) {
899 return true;
900 }
901
902 // If the filter starts with pid:, check for a pid match
903 if (filter.find("pid:") == 0) {
904 std::string mypid = std::to_string(profiler_current_process_id());
905 if (filter.compare(4, std::string::npos, mypid) == 0) {
906 return true;
907 }
908 }
909 }
910
911 return false;
912 }
913
914 public:
Create(PSLockRef aLock,PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,uint64_t aActiveTabID,const Maybe<double> & aDuration)915 static void Create(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
916 uint32_t aFeatures, const char** aFilters,
917 uint32_t aFilterCount, uint64_t aActiveTabID,
918 const Maybe<double>& aDuration) {
919 MOZ_ASSERT(!sInstance);
920 sInstance = new ActivePS(aLock, aCapacity, aInterval, aFeatures, aFilters,
921 aFilterCount, aActiveTabID, aDuration);
922 }
923
Destroy(PSLockRef aLock)924 [[nodiscard]] static SamplerThread* Destroy(PSLockRef aLock) {
925 MOZ_ASSERT(sInstance);
926 auto samplerThread = sInstance->mSamplerThread;
927 delete sInstance;
928 sInstance = nullptr;
929
930 return samplerThread;
931 }
932
Exists(PSLockRef)933 static bool Exists(PSLockRef) { return !!sInstance; }
934
Equals(PSLockRef,PowerOfTwo32 aCapacity,const Maybe<double> & aDuration,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,uint64_t aActiveTabID)935 static bool Equals(PSLockRef, PowerOfTwo32 aCapacity,
936 const Maybe<double>& aDuration, double aInterval,
937 uint32_t aFeatures, const char** aFilters,
938 uint32_t aFilterCount, uint64_t aActiveTabID) {
939 MOZ_ASSERT(sInstance);
940 if (sInstance->mCapacity != aCapacity ||
941 sInstance->mDuration != aDuration ||
942 sInstance->mInterval != aInterval ||
943 sInstance->mFeatures != aFeatures ||
944 sInstance->mFilters.length() != aFilterCount ||
945 sInstance->mActiveTabID != aActiveTabID) {
946 return false;
947 }
948
949 for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) {
950 if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) {
951 return false;
952 }
953 }
954 return true;
955 }
956
SizeOf(PSLockRef,MallocSizeOf aMallocSizeOf)957 static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) {
958 MOZ_ASSERT(sInstance);
959
960 size_t n = aMallocSizeOf(sInstance);
961
962 n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf);
963
964 // Measurement of the following members may be added later if DMD finds it
965 // is worthwhile:
966 // - mLiveProfiledThreads (both the array itself, and the contents)
967 // - mDeadProfiledThreads (both the array itself, and the contents)
968 //
969
970 return n;
971 }
972
ShouldProfileThread(PSLockRef aLock,ThreadInfo * aInfo)973 static bool ShouldProfileThread(PSLockRef aLock, ThreadInfo* aInfo) {
974 MOZ_ASSERT(sInstance);
975 return ((aInfo->IsMainThread() || FeatureThreads(aLock)) &&
976 sInstance->ThreadSelected(aInfo->Name()));
977 }
978
979 [[nodiscard]] static bool AppendPostSamplingCallback(
980 PSLockRef, PostSamplingCallback&& aCallback);
981
982 // Writes out the current active configuration of the profile.
WriteActiveConfiguration(PSLockRef aLock,JSONWriter & aWriter,const Span<const char> & aPropertyName=MakeStringSpan (""))983 static void WriteActiveConfiguration(
984 PSLockRef aLock, JSONWriter& aWriter,
985 const Span<const char>& aPropertyName = MakeStringSpan("")) {
986 if (!sInstance) {
987 if (!aPropertyName.empty()) {
988 aWriter.NullProperty(aPropertyName);
989 } else {
990 aWriter.NullElement();
991 }
992 return;
993 };
994
995 if (!aPropertyName.empty()) {
996 aWriter.StartObjectProperty(aPropertyName);
997 } else {
998 aWriter.StartObjectElement();
999 }
1000
1001 {
1002 aWriter.StartArrayProperty("features", aWriter.SingleLineStyle);
1003 #define WRITE_ACTIVE_FEATURES(n_, str_, Name_, desc_) \
1004 if (profiler_feature_active(ProfilerFeature::Name_)) { \
1005 aWriter.StringElement(str_); \
1006 }
1007
1008 PROFILER_FOR_EACH_FEATURE(WRITE_ACTIVE_FEATURES)
1009 #undef WRITE_ACTIVE_FEATURES
1010 aWriter.EndArray();
1011 }
1012 {
1013 aWriter.StartArrayProperty("threads", aWriter.SingleLineStyle);
1014 for (const auto& filter : sInstance->mFilters) {
1015 aWriter.StringElement(filter);
1016 }
1017 aWriter.EndArray();
1018 }
1019 {
1020 // Now write all the simple values.
1021
1022 // The interval is also available on profile.meta.interval
1023 aWriter.DoubleProperty("interval", sInstance->mInterval);
1024 aWriter.IntProperty("capacity", sInstance->mCapacity.Value());
1025 if (sInstance->mDuration) {
1026 aWriter.DoubleProperty("duration", sInstance->mDuration.value());
1027 }
1028 // Here, we are converting uint64_t to double. Tab IDs are
1029 // being created using `nsContentUtils::GenerateProcessSpecificId`, which
1030 // is specifically designed to only use 53 of the 64 bits to be lossless
1031 // when passed into and out of JS as a double.
1032 aWriter.DoubleProperty("activeTabID", sInstance->mActiveTabID);
1033 }
1034 aWriter.EndObject();
1035 }
1036
PS_GET(uint32_t,Generation)1037 PS_GET(uint32_t, Generation)
1038
1039 PS_GET(PowerOfTwo32, Capacity)
1040
1041 PS_GET(Maybe<double>, Duration)
1042
1043 PS_GET(double, Interval)
1044
1045 PS_GET(uint32_t, Features)
1046
1047 PS_GET(uint64_t, ActiveTabID)
1048
1049 #define PS_GET_FEATURE(n_, str_, Name_, desc_) \
1050 static bool Feature##Name_(PSLockRef) { \
1051 MOZ_ASSERT(sInstance); \
1052 return ProfilerFeature::Has##Name_(sInstance->mFeatures); \
1053 }
1054
1055 PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE)
1056
1057 #undef PS_GET_FEATURE
1058
1059 static uint32_t JSFlags(PSLockRef aLock) {
1060 uint32_t Flags = 0;
1061 Flags |=
1062 FeatureJS(aLock) ? uint32_t(JSInstrumentationFlags::StackSampling) : 0;
1063 Flags |= FeatureJSTracer(aLock)
1064 ? uint32_t(JSInstrumentationFlags::TraceLogging)
1065 : 0;
1066 Flags |= FeatureJSAllocations(aLock)
1067 ? uint32_t(JSInstrumentationFlags::Allocations)
1068 : 0;
1069 return Flags;
1070 }
1071
PS_GET(const Vector<std::string> &,Filters)1072 PS_GET(const Vector<std::string>&, Filters)
1073
1074 // Not using PS_GET, because only the "Controlled" interface of
1075 // `mProfileBufferChunkManager` should be exposed here.
1076 static ProfileBufferChunkManagerWithLocalLimit& ControlledChunkManager(
1077 PSLockRef) {
1078 MOZ_ASSERT(sInstance);
1079 return sInstance->mProfileBufferChunkManager;
1080 }
1081
FulfillChunkRequests(PSLockRef)1082 static void FulfillChunkRequests(PSLockRef) {
1083 MOZ_ASSERT(sInstance);
1084 sInstance->mProfileBufferChunkManager.FulfillChunkRequests();
1085 }
1086
Buffer(PSLockRef)1087 static ProfileBuffer& Buffer(PSLockRef) {
1088 MOZ_ASSERT(sInstance);
1089 return sInstance->mProfileBuffer;
1090 }
1091
LiveProfiledThreads(PSLockRef)1092 static const Vector<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef) {
1093 MOZ_ASSERT(sInstance);
1094 return sInstance->mLiveProfiledThreads;
1095 }
1096
1097 // Returns an array containing (RegisteredThread*, ProfiledThreadData*) pairs
1098 // for all threads that should be included in a profile, both for threads
1099 // that are still registered, and for threads that have been unregistered but
1100 // still have data in the buffer.
1101 // For threads that have already been unregistered, the RegisteredThread
1102 // pointer will be null.
1103 // The returned array is sorted by thread register time.
1104 // Do not hold on to the return value across thread registration or profiler
1105 // restarts.
1106 static Vector<std::pair<RegisteredThread*, ProfiledThreadData*>>
ProfiledThreads(PSLockRef)1107 ProfiledThreads(PSLockRef) {
1108 MOZ_ASSERT(sInstance);
1109 Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> array;
1110 MOZ_RELEASE_ASSERT(
1111 array.initCapacity(sInstance->mLiveProfiledThreads.length() +
1112 sInstance->mDeadProfiledThreads.length()));
1113 for (auto& t : sInstance->mLiveProfiledThreads) {
1114 MOZ_RELEASE_ASSERT(array.append(
1115 std::make_pair(t.mRegisteredThread, t.mProfiledThreadData.get())));
1116 }
1117 for (auto& t : sInstance->mDeadProfiledThreads) {
1118 MOZ_RELEASE_ASSERT(
1119 array.append(std::make_pair((RegisteredThread*)nullptr, t.get())));
1120 }
1121
1122 std::sort(array.begin(), array.end(),
1123 [](const std::pair<RegisteredThread*, ProfiledThreadData*>& a,
1124 const std::pair<RegisteredThread*, ProfiledThreadData*>& b) {
1125 return a.second->Info()->RegisterTime() <
1126 b.second->Info()->RegisterTime();
1127 });
1128 return array;
1129 }
1130
ProfiledPages(PSLockRef aLock)1131 static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) {
1132 MOZ_ASSERT(sInstance);
1133 Vector<RefPtr<PageInformation>> array;
1134 for (auto& d : CorePS::RegisteredPages(aLock)) {
1135 MOZ_RELEASE_ASSERT(array.append(d));
1136 }
1137 for (auto& d : sInstance->mDeadProfiledPages) {
1138 MOZ_RELEASE_ASSERT(array.append(d));
1139 }
1140 // We don't need to sort the pages like threads since we won't show them
1141 // as a list.
1142 return array;
1143 }
1144
1145 // Do a linear search through mLiveProfiledThreads to find the
1146 // ProfiledThreadData object for a RegisteredThread.
GetProfiledThreadData(PSLockRef,RegisteredThread * aRegisteredThread)1147 static ProfiledThreadData* GetProfiledThreadData(
1148 PSLockRef, RegisteredThread* aRegisteredThread) {
1149 MOZ_ASSERT(sInstance);
1150 for (const LiveProfiledThreadData& thread :
1151 sInstance->mLiveProfiledThreads) {
1152 if (thread.mRegisteredThread == aRegisteredThread) {
1153 return thread.mProfiledThreadData.get();
1154 }
1155 }
1156 return nullptr;
1157 }
1158
AddLiveProfiledThread(PSLockRef,RegisteredThread * aRegisteredThread,UniquePtr<ProfiledThreadData> && aProfiledThreadData)1159 static ProfiledThreadData* AddLiveProfiledThread(
1160 PSLockRef, RegisteredThread* aRegisteredThread,
1161 UniquePtr<ProfiledThreadData>&& aProfiledThreadData) {
1162 MOZ_ASSERT(sInstance);
1163 MOZ_RELEASE_ASSERT(
1164 sInstance->mLiveProfiledThreads.append(LiveProfiledThreadData{
1165 aRegisteredThread, std::move(aProfiledThreadData)}));
1166
1167 // Return a weak pointer to the ProfiledThreadData object.
1168 return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get();
1169 }
1170
UnregisterThread(PSLockRef aLockRef,RegisteredThread * aRegisteredThread)1171 static void UnregisterThread(PSLockRef aLockRef,
1172 RegisteredThread* aRegisteredThread) {
1173 MOZ_ASSERT(sInstance);
1174
1175 DiscardExpiredDeadProfiledThreads(aLockRef);
1176
1177 // Find the right entry in the mLiveProfiledThreads array and remove the
1178 // element, moving the ProfiledThreadData object for the thread into the
1179 // mDeadProfiledThreads array.
1180 // The thread's RegisteredThread object gets destroyed here.
1181 for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) {
1182 LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i];
1183 if (thread.mRegisteredThread == aRegisteredThread) {
1184 thread.mProfiledThreadData->NotifyUnregistered(
1185 sInstance->mProfileBuffer.BufferRangeEnd());
1186 MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append(
1187 std::move(thread.mProfiledThreadData)));
1188 sInstance->mLiveProfiledThreads.erase(
1189 &sInstance->mLiveProfiledThreads[i]);
1190 return;
1191 }
1192 }
1193 }
1194
PS_GET_AND_SET(bool,IsPaused)1195 PS_GET_AND_SET(bool, IsPaused)
1196
1197 // True if sampling is paused (though generic `SetIsPaused()` or specific
1198 // `SetIsSamplingPaused()`).
1199 static bool IsSamplingPaused(PSLockRef lock) {
1200 MOZ_ASSERT(sInstance);
1201 return IsPaused(lock) || sInstance->mIsSamplingPaused;
1202 }
1203
SetIsSamplingPaused(PSLockRef,bool aIsSamplingPaused)1204 static void SetIsSamplingPaused(PSLockRef, bool aIsSamplingPaused) {
1205 MOZ_ASSERT(sInstance);
1206 sInstance->mIsSamplingPaused = aIsSamplingPaused;
1207 }
1208
1209 #if defined(GP_OS_linux) || defined(GP_OS_freebsd)
PS_GET_AND_SET(bool,WasSamplingPaused)1210 PS_GET_AND_SET(bool, WasSamplingPaused)
1211 #endif
1212
1213 static void DiscardExpiredDeadProfiledThreads(PSLockRef) {
1214 MOZ_ASSERT(sInstance);
1215 uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
1216 // Discard any dead threads that were unregistered before bufferRangeStart.
1217 sInstance->mDeadProfiledThreads.eraseIf(
1218 [bufferRangeStart](
1219 const UniquePtr<ProfiledThreadData>& aProfiledThreadData) {
1220 Maybe<uint64_t> bufferPosition =
1221 aProfiledThreadData->BufferPositionWhenUnregistered();
1222 MOZ_RELEASE_ASSERT(bufferPosition,
1223 "should have unregistered this thread");
1224 return *bufferPosition < bufferRangeStart;
1225 });
1226 }
1227
UnregisterPage(PSLockRef aLock,uint64_t aRegisteredInnerWindowID)1228 static void UnregisterPage(PSLockRef aLock,
1229 uint64_t aRegisteredInnerWindowID) {
1230 MOZ_ASSERT(sInstance);
1231 auto& registeredPages = CorePS::RegisteredPages(aLock);
1232 for (size_t i = 0; i < registeredPages.length(); i++) {
1233 RefPtr<PageInformation>& page = registeredPages[i];
1234 if (page->InnerWindowID() == aRegisteredInnerWindowID) {
1235 page->NotifyUnregistered(sInstance->mProfileBuffer.BufferRangeEnd());
1236 MOZ_RELEASE_ASSERT(
1237 sInstance->mDeadProfiledPages.append(std::move(page)));
1238 registeredPages.erase(®isteredPages[i--]);
1239 }
1240 }
1241 }
1242
DiscardExpiredPages(PSLockRef)1243 static void DiscardExpiredPages(PSLockRef) {
1244 MOZ_ASSERT(sInstance);
1245 uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
1246 // Discard any dead pages that were unregistered before
1247 // bufferRangeStart.
1248 sInstance->mDeadProfiledPages.eraseIf(
1249 [bufferRangeStart](const RefPtr<PageInformation>& aProfiledPage) {
1250 Maybe<uint64_t> bufferPosition =
1251 aProfiledPage->BufferPositionWhenUnregistered();
1252 MOZ_RELEASE_ASSERT(bufferPosition,
1253 "should have unregistered this page");
1254 return *bufferPosition < bufferRangeStart;
1255 });
1256 }
1257
ClearUnregisteredPages(PSLockRef)1258 static void ClearUnregisteredPages(PSLockRef) {
1259 MOZ_ASSERT(sInstance);
1260 sInstance->mDeadProfiledPages.clear();
1261 }
1262
ClearExpiredExitProfiles(PSLockRef)1263 static void ClearExpiredExitProfiles(PSLockRef) {
1264 MOZ_ASSERT(sInstance);
1265 uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
1266 // Discard exit profiles that were gathered before our buffer RangeStart.
1267 // If we have started to overwrite our data from when the Base profile was
1268 // added, we should get rid of that Base profile because it's now older than
1269 // our oldest Gecko profile data.
1270 //
1271 // When adding: (In practice the starting buffer should be empty)
1272 // v Start == End
1273 // | <-- Buffer range, initially empty.
1274 // ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it
1275 //
1276 // Later, still in range:
1277 // v Start v End
1278 // |=========| <-- Buffer range growing.
1279 // ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it
1280 //
1281 // Even later, now out of range:
1282 // v Start v End
1283 // |============| <-- Buffer range full and sliding.
1284 // ^ mGeckoIndexWhenBaseProfileAdded < Start TRUE! -> Discard it
1285 if (sInstance->mBaseProfileThreads &&
1286 sInstance->mGeckoIndexWhenBaseProfileAdded
1287 .ConvertToProfileBufferIndex() <
1288 CorePS::CoreBuffer().GetState().mRangeStart) {
1289 DEBUG_LOG("ClearExpiredExitProfiles() - Discarding base profile %p",
1290 sInstance->mBaseProfileThreads.get());
1291 sInstance->mBaseProfileThreads.reset();
1292 }
1293 sInstance->mExitProfiles.eraseIf(
1294 [bufferRangeStart](const ExitProfile& aExitProfile) {
1295 return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart;
1296 });
1297 }
1298
AddBaseProfileThreads(PSLockRef aLock,UniquePtr<char[]> aBaseProfileThreads)1299 static void AddBaseProfileThreads(PSLockRef aLock,
1300 UniquePtr<char[]> aBaseProfileThreads) {
1301 MOZ_ASSERT(sInstance);
1302 DEBUG_LOG("AddBaseProfileThreads(%p)", aBaseProfileThreads.get());
1303 sInstance->mBaseProfileThreads = std::move(aBaseProfileThreads);
1304 sInstance->mGeckoIndexWhenBaseProfileAdded =
1305 ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
1306 CorePS::CoreBuffer().GetState().mRangeEnd);
1307 }
1308
MoveBaseProfileThreads(PSLockRef aLock)1309 static UniquePtr<char[]> MoveBaseProfileThreads(PSLockRef aLock) {
1310 MOZ_ASSERT(sInstance);
1311
1312 ClearExpiredExitProfiles(aLock);
1313
1314 DEBUG_LOG("MoveBaseProfileThreads() - Consuming base profile %p",
1315 sInstance->mBaseProfileThreads.get());
1316 return std::move(sInstance->mBaseProfileThreads);
1317 }
1318
AddExitProfile(PSLockRef aLock,const nsCString & aExitProfile)1319 static void AddExitProfile(PSLockRef aLock, const nsCString& aExitProfile) {
1320 MOZ_ASSERT(sInstance);
1321
1322 ClearExpiredExitProfiles(aLock);
1323
1324 MOZ_RELEASE_ASSERT(sInstance->mExitProfiles.append(
1325 ExitProfile{aExitProfile, sInstance->mProfileBuffer.BufferRangeEnd()}));
1326 }
1327
MoveExitProfiles(PSLockRef aLock)1328 static Vector<nsCString> MoveExitProfiles(PSLockRef aLock) {
1329 MOZ_ASSERT(sInstance);
1330
1331 ClearExpiredExitProfiles(aLock);
1332
1333 Vector<nsCString> profiles;
1334 MOZ_RELEASE_ASSERT(
1335 profiles.initCapacity(sInstance->mExitProfiles.length()));
1336 for (auto& profile : sInstance->mExitProfiles) {
1337 MOZ_RELEASE_ASSERT(profiles.append(std::move(profile.mJSON)));
1338 }
1339 sInstance->mExitProfiles.clear();
1340 return profiles;
1341 }
1342
1343 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
SetMemoryCounter(const BaseProfilerCount * aMemoryCounter)1344 static void SetMemoryCounter(const BaseProfilerCount* aMemoryCounter) {
1345 MOZ_ASSERT(sInstance);
1346
1347 sInstance->mMemoryCounter = aMemoryCounter;
1348 }
1349
IsMemoryCounter(const BaseProfilerCount * aMemoryCounter)1350 static bool IsMemoryCounter(const BaseProfilerCount* aMemoryCounter) {
1351 MOZ_ASSERT(sInstance);
1352
1353 return sInstance->mMemoryCounter == aMemoryCounter;
1354 }
1355 #endif
1356
1357 private:
1358 // The singleton instance.
1359 static ActivePS* sInstance;
1360
1361 // We need to track activity generations. If we didn't we could have the
1362 // following scenario.
1363 //
1364 // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks
1365 // gPSMutex, deletes the SamplerThread (which does a join).
1366 //
1367 // - profiler_start() runs on a different thread, locks gPSMutex,
1368 // re-instantiates ActivePS, unlocks gPSMutex -- all before the join
1369 // completes.
1370 //
1371 // - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated,
1372 // and continues as if the start/stop pair didn't occur. Also
1373 // profiler_stop() is stuck, unable to finish.
1374 //
1375 // By checking ActivePS *and* the generation, we can avoid this scenario.
1376 // sNextGeneration is used to track the next generation number; it is static
1377 // because it must persist across different ActivePS instantiations.
1378 const uint32_t mGeneration;
1379 static uint32_t sNextGeneration;
1380
1381 // The maximum number of entries in mProfileBuffer.
1382 const PowerOfTwo32 mCapacity;
1383
1384 // The maximum duration of entries in mProfileBuffer, in seconds.
1385 const Maybe<double> mDuration;
1386
1387 // The interval between samples, measured in milliseconds.
1388 const double mInterval;
1389
1390 // The profile features that are enabled.
1391 const uint32_t mFeatures;
1392
1393 // Substrings of names of threads we want to profile.
1394 Vector<std::string> mFilters;
1395
1396 // ID of the active browser screen's active tab.
1397 // It's being used to determine the profiled tab. It's "0" if we failed to
1398 // get the ID.
1399 const uint64_t mActiveTabID;
1400
1401 // The chunk manager used by `mProfileBuffer` below.
1402 ProfileBufferChunkManagerWithLocalLimit mProfileBufferChunkManager;
1403
1404 // The buffer into which all samples are recorded.
1405 ProfileBuffer mProfileBuffer;
1406
1407 // ProfiledThreadData objects for any threads that were profiled at any point
1408 // during this run of the profiler:
1409 // - mLiveProfiledThreads contains all threads that are still registered, and
1410 // - mDeadProfiledThreads contains all threads that have already been
1411 // unregistered but for which there is still data in the profile buffer.
1412 Vector<LiveProfiledThreadData> mLiveProfiledThreads;
1413 Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads;
1414
1415 // Info on all the dead pages.
1416 // Registered pages are being moved to this array after unregistration.
1417 // We are keeping them in case we need them in the profile data.
1418 // We are removing them when we ensure that we won't need them anymore.
1419 Vector<RefPtr<PageInformation>> mDeadProfiledPages;
1420
1421 // The current sampler thread. This class is not responsible for destroying
1422 // the SamplerThread object; the Destroy() method returns it so the caller
1423 // can destroy it.
1424 SamplerThread* const mSamplerThread;
1425
1426 // The interposer that records main thread I/O.
1427 RefPtr<ProfilerIOInterposeObserver> mInterposeObserver;
1428
1429 // Is the profiler fully paused?
1430 bool mIsPaused;
1431
1432 // Is the profiler periodic sampling paused?
1433 bool mIsSamplingPaused;
1434
1435 #if defined(GP_OS_linux) || defined(GP_OS_freebsd)
1436 // Used to record whether the sampler was paused just before forking. False
1437 // at all times except just before/after forking.
1438 bool mWasSamplingPaused;
1439 #endif
1440
1441 // Optional startup profile thread array from BaseProfiler.
1442 UniquePtr<char[]> mBaseProfileThreads;
1443 ProfileBufferBlockIndex mGeckoIndexWhenBaseProfileAdded;
1444
1445 struct ExitProfile {
1446 nsCString mJSON;
1447 uint64_t mBufferPositionAtGatherTime;
1448 };
1449 Vector<ExitProfile> mExitProfiles;
1450
1451 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
1452 Atomic<const BaseProfilerCount*> mMemoryCounter;
1453 #endif
1454 };
1455
1456 ActivePS* ActivePS::sInstance = nullptr;
1457 uint32_t ActivePS::sNextGeneration = 0;
1458
1459 #undef PS_GET
1460 #undef PS_GET_LOCKLESS
1461 #undef PS_GET_AND_SET
1462
1463 // The mutex that guards accesses to CorePS and ActivePS.
1464 static PSMutex gPSMutex;
1465
1466 static PSMutex gProfilerStateChangeMutex;
1467
1468 struct IdentifiedProfilingStateChangeCallback {
1469 ProfilingStateSet mProfilingStateSet;
1470 ProfilingStateChangeCallback mProfilingStateChangeCallback;
1471 uintptr_t mUniqueIdentifier;
1472
IdentifiedProfilingStateChangeCallbackIdentifiedProfilingStateChangeCallback1473 explicit IdentifiedProfilingStateChangeCallback(
1474 ProfilingStateSet aProfilingStateSet,
1475 ProfilingStateChangeCallback&& aProfilingStateChangeCallback,
1476 uintptr_t aUniqueIdentifier)
1477 : mProfilingStateSet(aProfilingStateSet),
1478 mProfilingStateChangeCallback(aProfilingStateChangeCallback),
1479 mUniqueIdentifier(aUniqueIdentifier) {}
1480 };
1481 using IdentifiedProfilingStateChangeCallbackUPtr =
1482 UniquePtr<IdentifiedProfilingStateChangeCallback>;
1483
1484 static Vector<IdentifiedProfilingStateChangeCallbackUPtr>
1485 mIdentifiedProfilingStateChangeCallbacks;
1486
profiler_add_state_change_callback(ProfilingStateSet aProfilingStateSet,ProfilingStateChangeCallback && aCallback,uintptr_t aUniqueIdentifier)1487 void profiler_add_state_change_callback(
1488 ProfilingStateSet aProfilingStateSet,
1489 ProfilingStateChangeCallback&& aCallback,
1490 uintptr_t aUniqueIdentifier /* = 0 */) {
1491 gPSMutex.AssertCurrentThreadDoesNotOwn();
1492 PSAutoLock lock(gProfilerStateChangeMutex);
1493
1494 #ifdef DEBUG
1495 // Check if a non-zero id is not already used. Bug forgive it in non-DEBUG
1496 // builds; in the worst case they may get removed too early.
1497 if (aUniqueIdentifier != 0) {
1498 for (const IdentifiedProfilingStateChangeCallbackUPtr& idedCallback :
1499 mIdentifiedProfilingStateChangeCallbacks) {
1500 MOZ_ASSERT(idedCallback->mUniqueIdentifier != aUniqueIdentifier);
1501 }
1502 }
1503 #endif // DEBUG
1504
1505 if (aProfilingStateSet.contains(ProfilingState::AlreadyActive) &&
1506 profiler_is_active()) {
1507 aCallback(ProfilingState::AlreadyActive);
1508 }
1509
1510 (void)mIdentifiedProfilingStateChangeCallbacks.append(
1511 MakeUnique<IdentifiedProfilingStateChangeCallback>(
1512 aProfilingStateSet, std::move(aCallback), aUniqueIdentifier));
1513 }
1514
1515 // Remove the callback with the given identifier.
profiler_remove_state_change_callback(uintptr_t aUniqueIdentifier)1516 void profiler_remove_state_change_callback(uintptr_t aUniqueIdentifier) {
1517 MOZ_ASSERT(aUniqueIdentifier != 0);
1518 if (aUniqueIdentifier == 0) {
1519 // Forgive zero in non-DEBUG builds.
1520 return;
1521 }
1522
1523 gPSMutex.AssertCurrentThreadDoesNotOwn();
1524 PSAutoLock lock(gProfilerStateChangeMutex);
1525
1526 mIdentifiedProfilingStateChangeCallbacks.eraseIf(
1527 [aUniqueIdentifier](
1528 const IdentifiedProfilingStateChangeCallbackUPtr& aIdedCallback) {
1529 if (aIdedCallback->mUniqueIdentifier != aUniqueIdentifier) {
1530 return false;
1531 }
1532 if (aIdedCallback->mProfilingStateSet.contains(
1533 ProfilingState::RemovingCallback)) {
1534 aIdedCallback->mProfilingStateChangeCallback(
1535 ProfilingState::RemovingCallback);
1536 }
1537 return true;
1538 });
1539 }
1540
invoke_profiler_state_change_callbacks(ProfilingState aProfilingState)1541 static void invoke_profiler_state_change_callbacks(
1542 ProfilingState aProfilingState) {
1543 gPSMutex.AssertCurrentThreadDoesNotOwn();
1544 PSAutoLock lock(gProfilerStateChangeMutex);
1545
1546 for (const IdentifiedProfilingStateChangeCallbackUPtr& idedCallback :
1547 mIdentifiedProfilingStateChangeCallbacks) {
1548 if (idedCallback->mProfilingStateSet.contains(aProfilingState)) {
1549 idedCallback->mProfilingStateChangeCallback(aProfilingState);
1550 }
1551 }
1552 }
1553
1554 Atomic<uint32_t, MemoryOrdering::Relaxed> RacyFeatures::sActiveAndFeatures(0);
1555
1556 // Each live thread has a RegisteredThread, and we store a reference to it in
1557 // TLS. This class encapsulates that TLS, and also handles the associated
1558 // profiling stack used by AutoProfilerLabel.
1559 class TLSRegisteredThread {
1560 public:
1561 // This should only be called once before any other access.
1562 // In this case it's called from `profiler_init()` on the main thread, before
1563 // the main thread registers itself.
Init()1564 static void Init() {
1565 MOZ_ASSERT(sState == State::Uninitialized, "Already initialized");
1566 AutoProfilerLabel::ProfilingStackOwnerTLS::Init();
1567 MOZ_ASSERT(
1568 AutoProfilerLabel::ProfilingStackOwnerTLS::sState !=
1569 AutoProfilerLabel::ProfilingStackOwnerTLS::State::Uninitialized,
1570 "Unexpected ProfilingStackOwnerTLS::sState after "
1571 "ProfilingStackOwnerTLS::Init()");
1572 sState =
1573 (AutoProfilerLabel::ProfilingStackOwnerTLS::sState ==
1574 AutoProfilerLabel::ProfilingStackOwnerTLS::State::Initialized &&
1575 sRegisteredThread.init())
1576 ? State::Initialized
1577 : State::Unavailable;
1578 }
1579
IsTLSInited()1580 static bool IsTLSInited() {
1581 MOZ_ASSERT(sState != State::Uninitialized,
1582 "TLSRegisteredThread should only be accessed after Init()");
1583 return sState == State::Initialized;
1584 }
1585
1586 // Get the entire RegisteredThread. Accesses are guarded by gPSMutex.
RegisteredThread(PSLockRef)1587 static class RegisteredThread* RegisteredThread(PSLockRef) {
1588 if (!IsTLSInited()) {
1589 return nullptr;
1590 }
1591 return sRegisteredThread.get();
1592 }
1593
1594 // Get only the RacyRegisteredThread. Accesses are not guarded by gPSMutex.
RacyRegisteredThread()1595 static class RacyRegisteredThread* RacyRegisteredThread() {
1596 if (!IsTLSInited()) {
1597 return nullptr;
1598 }
1599 class RegisteredThread* registeredThread = sRegisteredThread.get();
1600 return registeredThread ? ®isteredThread->RacyRegisteredThread()
1601 : nullptr;
1602 }
1603
1604 // Get only the ProfilingStack. Accesses are not guarded by gPSMutex.
1605 // RacyRegisteredThread() can also be used to get the ProfilingStack, but that
1606 // is marginally slower because it requires an extra pointer indirection.
Stack()1607 static ProfilingStack* Stack() {
1608 if (!IsTLSInited()) {
1609 return nullptr;
1610 }
1611 ProfilingStackOwner* profilingStackOwner =
1612 AutoProfilerLabel::ProfilingStackOwnerTLS::Get();
1613 if (!profilingStackOwner) {
1614 return nullptr;
1615 }
1616 return &profilingStackOwner->ProfilingStack();
1617 }
1618
SetRegisteredThreadAndAutoProfilerLabelProfilingStack(PSLockRef,class RegisteredThread * aRegisteredThread)1619 static void SetRegisteredThreadAndAutoProfilerLabelProfilingStack(
1620 PSLockRef, class RegisteredThread* aRegisteredThread) {
1621 if (!IsTLSInited()) {
1622 return;
1623 }
1624 MOZ_RELEASE_ASSERT(
1625 aRegisteredThread,
1626 "Use ResetRegisteredThread() instead of SetRegisteredThread(nullptr)");
1627 sRegisteredThread.set(aRegisteredThread);
1628 ProfilingStackOwner& profilingStackOwner =
1629 aRegisteredThread->RacyRegisteredThread().ProfilingStackOwner();
1630 profilingStackOwner.AddRef();
1631 AutoProfilerLabel::ProfilingStackOwnerTLS::Set(&profilingStackOwner);
1632 }
1633
1634 // Only reset the registered thread. The AutoProfilerLabel's ProfilingStack
1635 // is kept, because the thread may not have unregistered itself yet, so it may
1636 // still push/pop labels even after the profiler has shut down.
ResetRegisteredThread(PSLockRef)1637 static void ResetRegisteredThread(PSLockRef) {
1638 if (!IsTLSInited()) {
1639 return;
1640 }
1641 sRegisteredThread.set(nullptr);
1642 }
1643
1644 // Reset the AutoProfilerLabels' ProfilingStack, because the thread is
1645 // unregistering itself.
ResetAutoProfilerLabelProfilingStack(PSLockRef)1646 static void ResetAutoProfilerLabelProfilingStack(PSLockRef) {
1647 if (!IsTLSInited()) {
1648 return;
1649 }
1650 MOZ_RELEASE_ASSERT(
1651 AutoProfilerLabel::ProfilingStackOwnerTLS::Get(),
1652 "ResetAutoProfilerLabelProfilingStack should only be called once");
1653 AutoProfilerLabel::ProfilingStackOwnerTLS::Get()->Release();
1654 AutoProfilerLabel::ProfilingStackOwnerTLS::Set(nullptr);
1655 }
1656
1657 private:
1658 // Only written once from `profiler_init` calling
1659 // `TLSRegisteredThread::Init()`; all reads should only happen after `Init()`,
1660 // so there is no need to make it atomic.
1661 enum class State { Uninitialized = 0, Initialized, Unavailable };
1662 static State sState;
1663
1664 // This is a non-owning reference to the RegisteredThread;
1665 // CorePS::mRegisteredThreads is the owning reference. On thread
1666 // deregistration, this reference is cleared and the RegisteredThread is
1667 // destroyed.
1668 static MOZ_THREAD_LOCAL(class RegisteredThread*) sRegisteredThread;
1669 };
1670
1671 // Zero-initialized to State::Uninitialized.
1672 /* static */
1673 TLSRegisteredThread::State TLSRegisteredThread::sState;
1674
1675 /* static */
1676 MOZ_THREAD_LOCAL(RegisteredThread*) TLSRegisteredThread::sRegisteredThread;
1677
1678 // Only written once from `profiler_init` (through `TLSRegisteredThread::Init()`
1679 // and `AutoProfilerLabel::ProfilingStackOwnerTLS::Init()`); all reads should
1680 // only happen after `Init()`, so there is no need to make it atomic.
1681 // Zero-initialized to State::Uninitialized.
1682 /* static */
1683 AutoProfilerLabel::ProfilingStackOwnerTLS::State
1684 AutoProfilerLabel::ProfilingStackOwnerTLS::sState;
1685
1686 // Although you can access a thread's ProfilingStack via
1687 // TLSRegisteredThread::sRegisteredThread, we also have a second TLS pointer
1688 // directly to the ProfilingStack. Here's why.
1689 //
1690 // - We need to be able to push to and pop from the ProfilingStack in
1691 // AutoProfilerLabel.
1692 //
1693 // - The class functions are hot and must be defined in GeckoProfiler.h so they
1694 // can be inlined.
1695 //
1696 // - We don't want to expose TLSRegisteredThread (and RegisteredThread) in
1697 // GeckoProfiler.h.
1698 //
1699 // This second pointer isn't ideal, but does provide a way to satisfy those
1700 // constraints. TLSRegisteredThread is responsible for updating it.
1701 //
1702 // The (Racy)RegisteredThread and AutoProfilerLabel::ProfilingStackOwnerTLS
1703 // co-own the thread's ProfilingStack, so whichever is reset second, is
1704 // responsible for destroying the ProfilingStack; Because MOZ_THREAD_LOCAL
1705 // doesn't support RefPtr, AddRef&Release are done explicitly in
1706 // TLSRegisteredThread.
1707 /* static */
1708 MOZ_THREAD_LOCAL(ProfilingStackOwner*)
1709 AutoProfilerLabel::ProfilingStackOwnerTLS::sProfilingStackOwnerTLS;
1710
1711 /* static */
Init()1712 void AutoProfilerLabel::ProfilingStackOwnerTLS::Init() {
1713 MOZ_ASSERT(sState == State::Uninitialized, "Already initialized");
1714 sState =
1715 sProfilingStackOwnerTLS.init() ? State::Initialized : State::Unavailable;
1716 }
1717
DumpStackAndCrash() const1718 void ProfilingStackOwner::DumpStackAndCrash() const {
1719 fprintf(stderr,
1720 "ProfilingStackOwner::DumpStackAndCrash() thread id: %d, size: %u\n",
1721 profiler_current_thread_id(), unsigned(mProfilingStack.stackSize()));
1722 js::ProfilingStackFrame* allFrames = mProfilingStack.frames;
1723 for (uint32_t i = 0; i < mProfilingStack.stackSize(); i++) {
1724 js::ProfilingStackFrame& frame = allFrames[i];
1725 if (frame.isLabelFrame()) {
1726 fprintf(stderr, "%u: label frame, sp=%p, label='%s' (%s)\n", unsigned(i),
1727 frame.stackAddress(), frame.label(),
1728 frame.dynamicString() ? frame.dynamicString() : "-");
1729 } else {
1730 fprintf(stderr, "%u: non-label frame\n", unsigned(i));
1731 }
1732 }
1733
1734 MOZ_CRASH("Non-empty stack!");
1735 }
1736
1737 // The name of the main thread.
1738 static const char* const kMainThreadName = "GeckoMain";
1739
1740 ////////////////////////////////////////////////////////////////////////
1741 // BEGIN sampling/unwinding code
1742
1743 // The registers used for stack unwinding and a few other sampling purposes.
1744 // The ctor does nothing; users are responsible for filling in the fields.
1745 class Registers {
1746 public:
Registers()1747 Registers() : mPC{nullptr}, mSP{nullptr}, mFP{nullptr}, mLR{nullptr} {}
1748
1749 #if defined(HAVE_NATIVE_UNWIND)
1750 // Fills in mPC, mSP, mFP, mLR, and mContext for a synchronous sample.
1751 void SyncPopulate();
1752 #endif
1753
Clear()1754 void Clear() { memset(this, 0, sizeof(*this)); }
1755
1756 // These fields are filled in by
1757 // Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace
1758 // samples, and by SyncPopulate() for synchronous samples.
1759 Address mPC; // Instruction pointer.
1760 Address mSP; // Stack pointer.
1761 Address mFP; // Frame pointer.
1762 Address mLR; // ARM link register.
1763 #if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
1764 // This contains all the registers, which means it duplicates the four fields
1765 // above. This is ok.
1766 ucontext_t* mContext; // The context from the signal handler.
1767 #endif
1768 };
1769
1770 // Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time
1771 // looping on corrupted stacks.
1772 static const size_t MAX_NATIVE_FRAMES = 1024;
1773
1774 struct NativeStack {
1775 void* mPCs[MAX_NATIVE_FRAMES];
1776 void* mSPs[MAX_NATIVE_FRAMES];
1777 size_t mCount; // Number of frames filled.
1778
NativeStackNativeStack1779 NativeStack() : mPCs(), mSPs(), mCount(0) {}
1780 };
1781
1782 Atomic<bool> WALKING_JS_STACK(false);
1783
1784 struct AutoWalkJSStack {
1785 bool walkAllowed;
1786
AutoWalkJSStackAutoWalkJSStack1787 AutoWalkJSStack() : walkAllowed(false) {
1788 walkAllowed = WALKING_JS_STACK.compareExchange(false, true);
1789 }
1790
~AutoWalkJSStackAutoWalkJSStack1791 ~AutoWalkJSStack() {
1792 if (walkAllowed) {
1793 WALKING_JS_STACK = false;
1794 }
1795 }
1796 };
1797
1798 class StackWalkControl {
1799 public:
1800 struct ResumePoint {
1801 // If lost, the stack walker should resume at these values.
1802 void* resumeSp; // If null, stop the walker here, don't resume again.
1803 void* resumeBp;
1804 void* resumePc;
1805 };
1806
1807 #if ((defined(USE_MOZ_STACK_WALK) || defined(USE_FRAME_POINTER_STACK_WALK)) && \
1808 defined(GP_ARCH_amd64))
1809 public:
1810 static constexpr bool scIsSupported = true;
1811
Clear()1812 void Clear() { mResumePointCount = 0; }
1813
ResumePointCount() const1814 size_t ResumePointCount() const { return mResumePointCount; }
1815
MaxResumePointCount()1816 static constexpr size_t MaxResumePointCount() {
1817 return scMaxResumePointCount;
1818 }
1819
1820 // Add a resume point. Note that adding anything past MaxResumePointCount()
1821 // would silently fail. In practice this means that stack walking may still
1822 // lose native frames.
AddResumePoint(ResumePoint && aResumePoint)1823 void AddResumePoint(ResumePoint&& aResumePoint) {
1824 // If SP is null, we expect BP and PC to also be null.
1825 MOZ_ASSERT_IF(!aResumePoint.resumeSp, !aResumePoint.resumeBp);
1826 MOZ_ASSERT_IF(!aResumePoint.resumeSp, !aResumePoint.resumePc);
1827
1828 // If BP and/or PC are not null, SP must not be null. (But we allow BP/PC to
1829 // be null even if SP is not null.)
1830 MOZ_ASSERT_IF(aResumePoint.resumeBp, aResumePoint.resumeSp);
1831 MOZ_ASSERT_IF(aResumePoint.resumePc, aResumePoint.resumeSp);
1832
1833 if (mResumePointCount < scMaxResumePointCount) {
1834 mResumePoint[mResumePointCount] = std::move(aResumePoint);
1835 ++mResumePointCount;
1836 }
1837 }
1838
1839 // Only allow non-modifying range-for loops.
begin() const1840 const ResumePoint* begin() const { return &mResumePoint[0]; }
end() const1841 const ResumePoint* end() const { return &mResumePoint[mResumePointCount]; }
1842
1843 // Find the next resume point that would be a caller of the function with the
1844 // given SP; i.e., the resume point with the closest resumeSp > aSp.
GetResumePointCallingSp(void * aSp) const1845 const ResumePoint* GetResumePointCallingSp(void* aSp) const {
1846 const ResumePoint* callingResumePoint = nullptr;
1847 for (const ResumePoint& resumePoint : *this) {
1848 if (resumePoint.resumeSp && // This is a potential resume point.
1849 resumePoint.resumeSp > aSp && // It is a caller of the given SP.
1850 (!callingResumePoint || // This is the first candidate.
1851 resumePoint.resumeSp < callingResumePoint->resumeSp) // Or better.
1852 ) {
1853 callingResumePoint = &resumePoint;
1854 }
1855 }
1856 return callingResumePoint;
1857 }
1858
1859 private:
1860 size_t mResumePointCount = 0;
1861 static constexpr size_t scMaxResumePointCount = 32;
1862 ResumePoint mResumePoint[scMaxResumePointCount];
1863
1864 #else
1865 public:
1866 static constexpr bool scIsSupported = false;
1867 // Discarded constexpr-if statements are still checked during compilation,
1868 // these declarations are necessary for that, even if not actually used.
1869 void Clear();
1870 size_t ResumePointCount();
1871 static constexpr size_t MaxResumePointCount();
1872 void AddResumePoint(ResumePoint&& aResumePoint);
1873 const ResumePoint* begin() const;
1874 const ResumePoint* end() const;
1875 const ResumePoint* GetResumePointCallingSp(void* aSp) const;
1876 #endif
1877 };
1878
1879 // Make a copy of the JS stack into a JSFrame array, and return the number of
1880 // copied frames.
1881 // This copy is necessary since, like the native stack, the JS stack is iterated
1882 // youngest-to-oldest and we need to iterate oldest-to-youngest in MergeStacks.
ExtractJsFrames(bool aIsSynchronous,const RegisteredThread & aRegisteredThread,const Registers & aRegs,ProfilerStackCollector & aCollector,JsFrameBuffer aJsFrames,StackWalkControl * aStackWalkControlIfSupported)1883 static uint32_t ExtractJsFrames(
1884 bool aIsSynchronous, const RegisteredThread& aRegisteredThread,
1885 const Registers& aRegs, ProfilerStackCollector& aCollector,
1886 JsFrameBuffer aJsFrames, StackWalkControl* aStackWalkControlIfSupported) {
1887 uint32_t jsFramesCount = 0;
1888
1889 // Only walk jit stack if profiling frame iterator is turned on.
1890 JSContext* context = aRegisteredThread.GetJSContext();
1891 if (context && JS::IsProfilingEnabledForContext(context)) {
1892 AutoWalkJSStack autoWalkJSStack;
1893
1894 if (autoWalkJSStack.walkAllowed) {
1895 JS::ProfilingFrameIterator::RegisterState registerState;
1896 registerState.pc = aRegs.mPC;
1897 registerState.sp = aRegs.mSP;
1898 registerState.lr = aRegs.mLR;
1899 registerState.fp = aRegs.mFP;
1900
1901 // Non-periodic sampling passes Nothing() as the buffer write position to
1902 // ProfilingFrameIterator to avoid incorrectly resetting the buffer
1903 // position of sampled JIT frames inside the JS engine.
1904 Maybe<uint64_t> samplePosInBuffer;
1905 if (!aIsSynchronous) {
1906 // aCollector.SamplePositionInBuffer() will return Nothing() when
1907 // profiler_suspend_and_sample_thread is called from the background hang
1908 // reporter.
1909 samplePosInBuffer = aCollector.SamplePositionInBuffer();
1910 }
1911
1912 for (JS::ProfilingFrameIterator jsIter(context, registerState,
1913 samplePosInBuffer);
1914 !jsIter.done(); ++jsIter) {
1915 if (aIsSynchronous || jsIter.isWasm()) {
1916 jsFramesCount +=
1917 jsIter.extractStack(aJsFrames, jsFramesCount, MAX_JS_FRAMES);
1918 if (jsFramesCount == MAX_JS_FRAMES) {
1919 break;
1920 }
1921 } else {
1922 Maybe<JS::ProfilingFrameIterator::Frame> frame =
1923 jsIter.getPhysicalFrameWithoutLabel();
1924 if (frame.isSome()) {
1925 aJsFrames[jsFramesCount++] = std::move(frame).ref();
1926 if (jsFramesCount == MAX_JS_FRAMES) {
1927 break;
1928 }
1929 }
1930 }
1931
1932 if constexpr (StackWalkControl::scIsSupported) {
1933 if (aStackWalkControlIfSupported) {
1934 jsIter.getCppEntryRegisters().apply(
1935 [&](const JS::ProfilingFrameIterator::RegisterState&
1936 aCppEntry) {
1937 StackWalkControl::ResumePoint resumePoint;
1938 resumePoint.resumeSp = aCppEntry.sp;
1939 resumePoint.resumeBp = aCppEntry.fp;
1940 resumePoint.resumePc = aCppEntry.pc;
1941 aStackWalkControlIfSupported->AddResumePoint(
1942 std::move(resumePoint));
1943 });
1944 }
1945 } else {
1946 MOZ_ASSERT(!aStackWalkControlIfSupported,
1947 "aStackWalkControlIfSupported should be null when "
1948 "!StackWalkControl::scIsSupported");
1949 (void)aStackWalkControlIfSupported;
1950 }
1951 }
1952 }
1953 }
1954
1955 return jsFramesCount;
1956 }
1957
1958 // Merges the profiling stack, native stack, and JS stack, outputting the
1959 // details to aCollector.
MergeStacks(uint32_t aFeatures,bool aIsSynchronous,const RegisteredThread & aRegisteredThread,const Registers & aRegs,const NativeStack & aNativeStack,ProfilerStackCollector & aCollector,JsFrameBuffer aJsFrames,uint32_t aJsFramesCount)1960 static void MergeStacks(uint32_t aFeatures, bool aIsSynchronous,
1961 const RegisteredThread& aRegisteredThread,
1962 const Registers& aRegs, const NativeStack& aNativeStack,
1963 ProfilerStackCollector& aCollector,
1964 JsFrameBuffer aJsFrames, uint32_t aJsFramesCount) {
1965 // WARNING: this function runs within the profiler's "critical section".
1966 // WARNING: this function might be called while the profiler is inactive, and
1967 // cannot rely on ActivePS.
1968
1969 const ProfilingStack& profilingStack =
1970 aRegisteredThread.RacyRegisteredThread().ProfilingStack();
1971 const js::ProfilingStackFrame* profilingStackFrames = profilingStack.frames;
1972 uint32_t profilingStackFrameCount = profilingStack.stackSize();
1973
1974 // While the profiling stack array is ordered oldest-to-youngest, the JS and
1975 // native arrays are ordered youngest-to-oldest. We must add frames to aInfo
1976 // oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS
1977 // and native arrays backwards. Note: this means the terminating condition
1978 // jsIndex and nativeIndex is being < 0.
1979 uint32_t profilingStackIndex = 0;
1980 int32_t jsIndex = aJsFramesCount - 1;
1981 int32_t nativeIndex = aNativeStack.mCount - 1;
1982
1983 uint8_t* lastLabelFrameStackAddr = nullptr;
1984 uint8_t* jitEndStackAddr = nullptr;
1985
1986 // Iterate as long as there is at least one frame remaining.
1987 while (profilingStackIndex != profilingStackFrameCount || jsIndex >= 0 ||
1988 nativeIndex >= 0) {
1989 // There are 1 to 3 frames available. Find and add the oldest.
1990 uint8_t* profilingStackAddr = nullptr;
1991 uint8_t* jsStackAddr = nullptr;
1992 uint8_t* nativeStackAddr = nullptr;
1993 uint8_t* jsActivationAddr = nullptr;
1994
1995 if (profilingStackIndex != profilingStackFrameCount) {
1996 const js::ProfilingStackFrame& profilingStackFrame =
1997 profilingStackFrames[profilingStackIndex];
1998
1999 if (profilingStackFrame.isLabelFrame() ||
2000 profilingStackFrame.isSpMarkerFrame()) {
2001 lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress();
2002 }
2003
2004 // Skip any JS_OSR frames. Such frames are used when the JS interpreter
2005 // enters a jit frame on a loop edge (via on-stack-replacement, or OSR).
2006 // To avoid both the profiling stack frame and jit frame being recorded
2007 // (and showing up twice), the interpreter marks the interpreter
2008 // profiling stack frame as JS_OSR to ensure that it doesn't get counted.
2009 if (profilingStackFrame.isOSRFrame()) {
2010 profilingStackIndex++;
2011 continue;
2012 }
2013
2014 MOZ_ASSERT(lastLabelFrameStackAddr);
2015 profilingStackAddr = lastLabelFrameStackAddr;
2016 }
2017
2018 if (jsIndex >= 0) {
2019 jsStackAddr = (uint8_t*)aJsFrames[jsIndex].stackAddress;
2020 jsActivationAddr = (uint8_t*)aJsFrames[jsIndex].activation;
2021 }
2022
2023 if (nativeIndex >= 0) {
2024 nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex];
2025 }
2026
2027 // If there's a native stack frame which has the same SP as a profiling
2028 // stack frame, pretend we didn't see the native stack frame. Ditto for a
2029 // native stack frame which has the same SP as a JS stack frame. In effect
2030 // this means profiling stack frames or JS frames trump conflicting native
2031 // frames.
2032 if (nativeStackAddr && (profilingStackAddr == nativeStackAddr ||
2033 jsStackAddr == nativeStackAddr)) {
2034 nativeStackAddr = nullptr;
2035 nativeIndex--;
2036 MOZ_ASSERT(profilingStackAddr || jsStackAddr);
2037 }
2038
2039 // Sanity checks.
2040 MOZ_ASSERT_IF(profilingStackAddr,
2041 profilingStackAddr != jsStackAddr &&
2042 profilingStackAddr != nativeStackAddr);
2043 MOZ_ASSERT_IF(jsStackAddr, jsStackAddr != profilingStackAddr &&
2044 jsStackAddr != nativeStackAddr);
2045 MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != profilingStackAddr &&
2046 nativeStackAddr != jsStackAddr);
2047
2048 // Check to see if profiling stack frame is top-most.
2049 if (profilingStackAddr > jsStackAddr &&
2050 profilingStackAddr > nativeStackAddr) {
2051 MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount);
2052 const js::ProfilingStackFrame& profilingStackFrame =
2053 profilingStackFrames[profilingStackIndex];
2054
2055 // Sp marker frames are just annotations and should not be recorded in
2056 // the profile.
2057 if (!profilingStackFrame.isSpMarkerFrame()) {
2058 // The JIT only allows the top-most frame to have a nullptr pc.
2059 MOZ_ASSERT_IF(
2060 profilingStackFrame.isJsFrame() && profilingStackFrame.script() &&
2061 !profilingStackFrame.pc(),
2062 &profilingStackFrame ==
2063 &profilingStack.frames[profilingStack.stackSize() - 1]);
2064 if (aIsSynchronous && profilingStackFrame.categoryPair() ==
2065 JS::ProfilingCategoryPair::PROFILER) {
2066 // For stacks captured synchronously (ie. marker stacks), stop
2067 // walking the stack as soon as we enter the profiler category,
2068 // to avoid showing profiler internal code in marker stacks.
2069 return;
2070 }
2071 aCollector.CollectProfilingStackFrame(profilingStackFrame);
2072 }
2073 profilingStackIndex++;
2074 continue;
2075 }
2076
2077 // Check to see if JS jit stack frame is top-most
2078 if (jsStackAddr > nativeStackAddr) {
2079 MOZ_ASSERT(jsIndex >= 0);
2080 const JS::ProfilingFrameIterator::Frame& jsFrame = aJsFrames[jsIndex];
2081 jitEndStackAddr = (uint8_t*)jsFrame.endStackAddress;
2082 // Stringifying non-wasm JIT frames is delayed until streaming time. To
2083 // re-lookup the entry in the JitcodeGlobalTable, we need to store the
2084 // JIT code address (OptInfoAddr) in the circular buffer.
2085 //
2086 // Note that we cannot do this when we are sychronously sampling the
2087 // current thread; that is, when called from profiler_get_backtrace. The
2088 // captured backtrace is usually externally stored for an indeterminate
2089 // amount of time, such as in nsRefreshDriver. Problematically, the
2090 // stored backtrace may be alive across a GC during which the profiler
2091 // itself is disabled. In that case, the JS engine is free to discard its
2092 // JIT code. This means that if we inserted such OptInfoAddr entries into
2093 // the buffer, nsRefreshDriver would now be holding on to a backtrace
2094 // with stale JIT code return addresses.
2095 if (aIsSynchronous ||
2096 jsFrame.kind == JS::ProfilingFrameIterator::Frame_Wasm) {
2097 aCollector.CollectWasmFrame(jsFrame.label);
2098 } else if (jsFrame.kind ==
2099 JS::ProfilingFrameIterator::Frame_BaselineInterpreter) {
2100 // Materialize a ProfilingStackFrame similar to the C++ Interpreter. We
2101 // also set the IS_BLINTERP_FRAME flag to differentiate though.
2102 JSScript* script = jsFrame.interpreterScript;
2103 jsbytecode* pc = jsFrame.interpreterPC();
2104 js::ProfilingStackFrame stackFrame;
2105 constexpr uint32_t ExtraFlags =
2106 uint32_t(js::ProfilingStackFrame::Flags::IS_BLINTERP_FRAME);
2107 stackFrame.initJsFrame<JS::ProfilingCategoryPair::JS_BaselineInterpret,
2108 ExtraFlags>("", jsFrame.label, script, pc,
2109 jsFrame.realmID);
2110 aCollector.CollectProfilingStackFrame(stackFrame);
2111 } else {
2112 MOZ_ASSERT(jsFrame.kind == JS::ProfilingFrameIterator::Frame_Ion ||
2113 jsFrame.kind == JS::ProfilingFrameIterator::Frame_Baseline);
2114 aCollector.CollectJitReturnAddr(jsFrame.returnAddress());
2115 }
2116
2117 jsIndex--;
2118 continue;
2119 }
2120
2121 // If we reach here, there must be a native stack frame and it must be the
2122 // greatest frame.
2123 if (nativeStackAddr &&
2124 // If the latest JS frame was JIT, this could be the native frame that
2125 // corresponds to it. In that case, skip the native frame, because
2126 // there's no need for the same frame to be present twice in the stack.
2127 // The JS frame can be considered the symbolicated version of the native
2128 // frame.
2129 (!jitEndStackAddr || nativeStackAddr < jitEndStackAddr) &&
2130 // This might still be a JIT operation, check to make sure that is not
2131 // in range of the NEXT JavaScript's stacks' activation address.
2132 (!jsActivationAddr || nativeStackAddr > jsActivationAddr)) {
2133 MOZ_ASSERT(nativeIndex >= 0);
2134 void* addr = (void*)aNativeStack.mPCs[nativeIndex];
2135 aCollector.CollectNativeLeafAddr(addr);
2136 }
2137 if (nativeIndex >= 0) {
2138 nativeIndex--;
2139 }
2140 }
2141
2142 // Update the JS context with the current profile sample buffer generation.
2143 //
2144 // Only do this for periodic samples. We don't want to do this for
2145 // synchronous samples, and we also don't want to do it for calls to
2146 // profiler_suspend_and_sample_thread() from the background hang reporter -
2147 // in that case, aCollector.BufferRangeStart() will return Nothing().
2148 if (!aIsSynchronous) {
2149 aCollector.BufferRangeStart().apply(
2150 [&aRegisteredThread](uint64_t aBufferRangeStart) {
2151 JSContext* context = aRegisteredThread.GetJSContext();
2152 if (context) {
2153 JS::SetJSContextProfilerSampleBufferRangeStart(context,
2154 aBufferRangeStart);
2155 }
2156 });
2157 }
2158 }
2159
2160 #if defined(GP_OS_windows) && defined(USE_MOZ_STACK_WALK)
2161 static HANDLE GetThreadHandle(PlatformData* aData);
2162 #endif
2163
2164 #if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
StackWalkCallback(uint32_t aFrameNumber,void * aPC,void * aSP,void * aClosure)2165 static void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP,
2166 void* aClosure) {
2167 NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
2168 MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES);
2169 nativeStack->mSPs[nativeStack->mCount] = aSP;
2170 nativeStack->mPCs[nativeStack->mCount] = aPC;
2171 nativeStack->mCount++;
2172 }
2173 #endif
2174
2175 #if defined(USE_FRAME_POINTER_STACK_WALK)
DoFramePointerBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,Registers aRegs,NativeStack & aNativeStack,StackWalkControl * aStackWalkControlIfSupported)2176 static void DoFramePointerBacktrace(
2177 PSLockRef aLock, const RegisteredThread& aRegisteredThread, Registers aRegs,
2178 NativeStack& aNativeStack, StackWalkControl* aStackWalkControlIfSupported) {
2179 // WARNING: this function runs within the profiler's "critical section".
2180 // WARNING: this function might be called while the profiler is inactive, and
2181 // cannot rely on ActivePS.
2182
2183 // Start with the current function. We use 0 as the frame number here because
2184 // the FramePointerStackWalk() call below will use 1..N. This is a bit weird
2185 // but it doesn't matter because StackWalkCallback() doesn't use the frame
2186 // number argument.
2187 StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
2188
2189 const void* const stackEnd = aRegisteredThread.StackTop();
2190
2191 // This is to check forward-progress after using a resume point.
2192 void* previousResumeSp = nullptr;
2193
2194 for (;;) {
2195 if (!(aRegs.mSP && aRegs.mSP <= aRegs.mFP && aRegs.mFP <= stackEnd)) {
2196 break;
2197 }
2198 FramePointerStackWalk(StackWalkCallback,
2199 uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount),
2200 &aNativeStack, reinterpret_cast<void**>(aRegs.mFP),
2201 const_cast<void*>(stackEnd));
2202
2203 if constexpr (!StackWalkControl::scIsSupported) {
2204 break;
2205 } else {
2206 if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
2207 // No room to add more frames.
2208 break;
2209 }
2210 if (!aStackWalkControlIfSupported ||
2211 aStackWalkControlIfSupported->ResumePointCount() == 0) {
2212 // No resume information.
2213 break;
2214 }
2215 void* lastSP = aNativeStack.mSPs[aNativeStack.mCount - 1];
2216 if (previousResumeSp &&
2217 ((uintptr_t)lastSP <= (uintptr_t)previousResumeSp)) {
2218 // No progress after the previous resume point.
2219 break;
2220 }
2221 const StackWalkControl::ResumePoint* resumePoint =
2222 aStackWalkControlIfSupported->GetResumePointCallingSp(lastSP);
2223 if (!resumePoint) {
2224 break;
2225 }
2226 void* sp = resumePoint->resumeSp;
2227 if (!sp) {
2228 // Null SP in a resume point means we stop here.
2229 break;
2230 }
2231 void* pc = resumePoint->resumePc;
2232 StackWalkCallback(/* frameNum */ aNativeStack.mCount, pc, sp,
2233 &aNativeStack);
2234 ++aNativeStack.mCount;
2235 if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
2236 break;
2237 }
2238 // Prepare context to resume stack walking.
2239 aRegs.mPC = (Address)pc;
2240 aRegs.mSP = (Address)sp;
2241 aRegs.mFP = (Address)resumePoint->resumeBp;
2242
2243 previousResumeSp = sp;
2244 }
2245 }
2246 }
2247 #endif
2248
2249 #if defined(USE_MOZ_STACK_WALK)
DoMozStackWalkBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack,StackWalkControl * aStackWalkControlIfSupported)2250 static void DoMozStackWalkBacktrace(
2251 PSLockRef aLock, const RegisteredThread& aRegisteredThread,
2252 const Registers& aRegs, NativeStack& aNativeStack,
2253 StackWalkControl* aStackWalkControlIfSupported) {
2254 // WARNING: this function runs within the profiler's "critical section".
2255 // WARNING: this function might be called while the profiler is inactive, and
2256 // cannot rely on ActivePS.
2257
2258 // Start with the current function. We use 0 as the frame number here because
2259 // the MozStackWalkThread() call below will use 1..N. This is a bit weird but
2260 // it doesn't matter because StackWalkCallback() doesn't use the frame number
2261 // argument.
2262 StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
2263
2264 HANDLE thread = GetThreadHandle(aRegisteredThread.GetPlatformData());
2265 MOZ_ASSERT(thread);
2266
2267 CONTEXT context_buf;
2268 CONTEXT* context = nullptr;
2269 if constexpr (StackWalkControl::scIsSupported) {
2270 context = &context_buf;
2271 memset(&context_buf, 0, sizeof(CONTEXT));
2272 context_buf.ContextFlags = CONTEXT_FULL;
2273 # if defined(_M_AMD64)
2274 context_buf.Rsp = (DWORD64)aRegs.mSP;
2275 context_buf.Rbp = (DWORD64)aRegs.mFP;
2276 context_buf.Rip = (DWORD64)aRegs.mPC;
2277 # else
2278 static_assert(!StackWalkControl::scIsSupported,
2279 "Mismatched support between StackWalkControl and "
2280 "DoMozStackWalkBacktrace");
2281 # endif
2282 } else {
2283 context = nullptr;
2284 }
2285
2286 // This is to check forward-progress after using a resume point.
2287 void* previousResumeSp = nullptr;
2288
2289 for (;;) {
2290 MozStackWalkThread(StackWalkCallback,
2291 uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount),
2292 &aNativeStack, thread, context);
2293
2294 if constexpr (!StackWalkControl::scIsSupported) {
2295 break;
2296 } else {
2297 if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
2298 // No room to add more frames.
2299 break;
2300 }
2301 if (!aStackWalkControlIfSupported ||
2302 aStackWalkControlIfSupported->ResumePointCount() == 0) {
2303 // No resume information.
2304 break;
2305 }
2306 void* lastSP = aNativeStack.mSPs[aNativeStack.mCount - 1];
2307 if (previousResumeSp &&
2308 ((uintptr_t)lastSP <= (uintptr_t)previousResumeSp)) {
2309 // No progress after the previous resume point.
2310 break;
2311 }
2312 const StackWalkControl::ResumePoint* resumePoint =
2313 aStackWalkControlIfSupported->GetResumePointCallingSp(lastSP);
2314 if (!resumePoint) {
2315 break;
2316 }
2317 void* sp = resumePoint->resumeSp;
2318 if (!sp) {
2319 // Null SP in a resume point means we stop here.
2320 break;
2321 }
2322 void* pc = resumePoint->resumePc;
2323 StackWalkCallback(/* frameNum */ aNativeStack.mCount, pc, sp,
2324 &aNativeStack);
2325 ++aNativeStack.mCount;
2326 if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
2327 break;
2328 }
2329 // Prepare context to resume stack walking.
2330 memset(&context_buf, 0, sizeof(CONTEXT));
2331 context_buf.ContextFlags = CONTEXT_FULL;
2332 # if defined(_M_AMD64)
2333 context_buf.Rsp = (DWORD64)sp;
2334 context_buf.Rbp = (DWORD64)resumePoint->resumeBp;
2335 context_buf.Rip = (DWORD64)pc;
2336 # else
2337 static_assert(!StackWalkControl::scIsSupported,
2338 "Mismatched support between StackWalkControl and "
2339 "DoMozStackWalkBacktrace");
2340 # endif
2341 previousResumeSp = sp;
2342 }
2343 }
2344 }
2345 #endif
2346
2347 #ifdef USE_EHABI_STACKWALK
DoEHABIBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack,StackWalkControl * aStackWalkControlIfSupported)2348 static void DoEHABIBacktrace(PSLockRef aLock,
2349 const RegisteredThread& aRegisteredThread,
2350 const Registers& aRegs, NativeStack& aNativeStack,
2351 StackWalkControl* aStackWalkControlIfSupported) {
2352 // WARNING: this function runs within the profiler's "critical section".
2353 // WARNING: this function might be called while the profiler is inactive, and
2354 // cannot rely on ActivePS.
2355
2356 aNativeStack.mCount =
2357 EHABIStackWalk(aRegs.mContext->uc_mcontext,
2358 const_cast<void*>(aRegisteredThread.StackTop()),
2359 aNativeStack.mSPs, aNativeStack.mPCs, MAX_NATIVE_FRAMES);
2360 (void)aStackWalkControlIfSupported; // TODO: Implement.
2361 }
2362 #endif
2363
2364 #ifdef USE_LUL_STACKWALK
2365
2366 // See the comment at the callsite for why this function is necessary.
2367 # if defined(MOZ_HAVE_ASAN_BLACKLIST)
ASAN_memcpy(void * aDst,const void * aSrc,size_t aLen)2368 MOZ_ASAN_BLACKLIST static void ASAN_memcpy(void* aDst, const void* aSrc,
2369 size_t aLen) {
2370 // The obvious thing to do here is call memcpy(). However, although
2371 // ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the
2372 // false positive still manifests! So we must implement memcpy() ourselves
2373 // within this function.
2374 char* dst = static_cast<char*>(aDst);
2375 const char* src = static_cast<const char*>(aSrc);
2376
2377 for (size_t i = 0; i < aLen; i++) {
2378 dst[i] = src[i];
2379 }
2380 }
2381 # endif
2382
DoLULBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack,StackWalkControl * aStackWalkControlIfSupported)2383 static void DoLULBacktrace(PSLockRef aLock,
2384 const RegisteredThread& aRegisteredThread,
2385 const Registers& aRegs, NativeStack& aNativeStack,
2386 StackWalkControl* aStackWalkControlIfSupported) {
2387 // WARNING: this function runs within the profiler's "critical section".
2388 // WARNING: this function might be called while the profiler is inactive, and
2389 // cannot rely on ActivePS.
2390
2391 (void)aStackWalkControlIfSupported; // TODO: Implement.
2392
2393 const mcontext_t* mc = &aRegs.mContext->uc_mcontext;
2394
2395 lul::UnwindRegs startRegs;
2396 memset(&startRegs, 0, sizeof(startRegs));
2397
2398 # if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
2399 startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]);
2400 startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]);
2401 startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
2402 # elif defined(GP_PLAT_amd64_freebsd)
2403 startRegs.xip = lul::TaggedUWord(mc->mc_rip);
2404 startRegs.xsp = lul::TaggedUWord(mc->mc_rsp);
2405 startRegs.xbp = lul::TaggedUWord(mc->mc_rbp);
2406 # elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
2407 startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
2408 startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
2409 startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
2410 startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
2411 startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
2412 startRegs.r7 = lul::TaggedUWord(mc->arm_r7);
2413 # elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
2414 startRegs.pc = lul::TaggedUWord(mc->pc);
2415 startRegs.x29 = lul::TaggedUWord(mc->regs[29]);
2416 startRegs.x30 = lul::TaggedUWord(mc->regs[30]);
2417 startRegs.sp = lul::TaggedUWord(mc->sp);
2418 # elif defined(GP_PLAT_arm64_freebsd)
2419 startRegs.pc = lul::TaggedUWord(mc->mc_gpregs.gp_elr);
2420 startRegs.x29 = lul::TaggedUWord(mc->mc_gpregs.gp_x[29]);
2421 startRegs.x30 = lul::TaggedUWord(mc->mc_gpregs.gp_lr);
2422 startRegs.sp = lul::TaggedUWord(mc->mc_gpregs.gp_sp);
2423 # elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
2424 startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
2425 startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
2426 startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
2427 # elif defined(GP_PLAT_mips64_linux)
2428 startRegs.pc = lul::TaggedUWord(mc->pc);
2429 startRegs.sp = lul::TaggedUWord(mc->gregs[29]);
2430 startRegs.fp = lul::TaggedUWord(mc->gregs[30]);
2431 # else
2432 # error "Unknown plat"
2433 # endif
2434
2435 // Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the
2436 // stack's registered top point. Do some basic sanity checks too. This
2437 // assumes that the TaggedUWord holding the stack pointer value is valid, but
2438 // it should be, since it was constructed that way in the code just above.
2439
2440 // We could construct |stackImg| so that LUL reads directly from the stack in
2441 // question, rather than from a copy of it. That would reduce overhead and
2442 // space use a bit. However, it gives a problem with dynamic analysis tools
2443 // (ASan, TSan, Valgrind) which is that such tools will report invalid or
2444 // racing memory accesses, and such accesses will be reported deep inside LUL.
2445 // By taking a copy here, we can either sanitise the copy (for Valgrind) or
2446 // copy it using an unchecked memcpy (for ASan, TSan). That way we don't have
2447 // to try and suppress errors inside LUL.
2448 //
2449 // N_STACK_BYTES is set to 160KB. This is big enough to hold all stacks
2450 // observed in some minutes of testing, whilst keeping the size of this
2451 // function (DoNativeBacktrace)'s frame reasonable. Most stacks observed in
2452 // practice are small, 4KB or less, and so the copy costs are insignificant
2453 // compared to other profiler overhead.
2454 //
2455 // |stackImg| is allocated on this (the sampling thread's) stack. That
2456 // implies that the frame for this function is at least N_STACK_BYTES large.
2457 // In general it would be considered unacceptable to have such a large frame
2458 // on a stack, but it only exists for the unwinder thread, and so is not
2459 // expected to be a problem. Allocating it on the heap is troublesome because
2460 // this function runs whilst the sampled thread is suspended, so any heap
2461 // allocation risks deadlock. Allocating it as a global variable is not
2462 // thread safe, which would be a problem if we ever allow multiple sampler
2463 // threads. Hence allocating it on the stack seems to be the least-worst
2464 // option.
2465
2466 lul::StackImage stackImg;
2467
2468 {
2469 # if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \
2470 defined(GP_PLAT_amd64_freebsd)
2471 uintptr_t rEDZONE_SIZE = 128;
2472 uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
2473 # elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
2474 uintptr_t rEDZONE_SIZE = 0;
2475 uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE;
2476 # elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \
2477 defined(GP_PLAT_arm64_freebsd)
2478 uintptr_t rEDZONE_SIZE = 0;
2479 uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
2480 # elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
2481 uintptr_t rEDZONE_SIZE = 0;
2482 uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
2483 # elif defined(GP_PLAT_mips64_linux)
2484 uintptr_t rEDZONE_SIZE = 0;
2485 uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
2486 # else
2487 # error "Unknown plat"
2488 # endif
2489 uintptr_t end = reinterpret_cast<uintptr_t>(aRegisteredThread.StackTop());
2490 uintptr_t ws = sizeof(void*);
2491 start &= ~(ws - 1);
2492 end &= ~(ws - 1);
2493 uintptr_t nToCopy = 0;
2494 if (start < end) {
2495 nToCopy = end - start;
2496 if (nToCopy > lul::N_STACK_BYTES) nToCopy = lul::N_STACK_BYTES;
2497 }
2498 MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
2499 stackImg.mLen = nToCopy;
2500 stackImg.mStartAvma = start;
2501 if (nToCopy > 0) {
2502 // If this is a vanilla memcpy(), ASAN makes the following complaint:
2503 //
2504 // ERROR: AddressSanitizer: stack-buffer-underflow ...
2505 // ...
2506 // HINT: this may be a false positive if your program uses some custom
2507 // stack unwind mechanism or swapcontext
2508 //
2509 // This code is very much a custom stack unwind mechanism! So we use an
2510 // alternative memcpy() implementation that is ignored by ASAN.
2511 # if defined(MOZ_HAVE_ASAN_BLACKLIST)
2512 ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
2513 # else
2514 memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
2515 # endif
2516 (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
2517 }
2518 }
2519
2520 size_t framePointerFramesAcquired = 0;
2521 lul::LUL* lul = CorePS::Lul(aLock);
2522 lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.mPCs),
2523 reinterpret_cast<uintptr_t*>(aNativeStack.mSPs),
2524 &aNativeStack.mCount, &framePointerFramesAcquired,
2525 MAX_NATIVE_FRAMES, &startRegs, &stackImg);
2526
2527 // Update stats in the LUL stats object. Unfortunately this requires
2528 // three global memory operations.
2529 lul->mStats.mContext += 1;
2530 lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired;
2531 lul->mStats.mFP += framePointerFramesAcquired;
2532 }
2533
2534 #endif
2535
2536 #ifdef HAVE_NATIVE_UNWIND
DoNativeBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack,StackWalkControl * aStackWalkControlIfSupported)2537 static void DoNativeBacktrace(PSLockRef aLock,
2538 const RegisteredThread& aRegisteredThread,
2539 const Registers& aRegs, NativeStack& aNativeStack,
2540 StackWalkControl* aStackWalkControlIfSupported) {
2541 // This method determines which stackwalker is used for periodic and
2542 // synchronous samples. (Backtrace samples are treated differently, see
2543 // profiler_suspend_and_sample_thread() for details). The only part of the
2544 // ordering that matters is that LUL must precede FRAME_POINTER, because on
2545 // Linux they can both be present.
2546 # if defined(USE_LUL_STACKWALK)
2547 DoLULBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack,
2548 aStackWalkControlIfSupported);
2549 # elif defined(USE_EHABI_STACKWALK)
2550 DoEHABIBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack,
2551 aStackWalkControlIfSupported);
2552 # elif defined(USE_FRAME_POINTER_STACK_WALK)
2553 DoFramePointerBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack,
2554 aStackWalkControlIfSupported);
2555 # elif defined(USE_MOZ_STACK_WALK)
2556 DoMozStackWalkBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack,
2557 aStackWalkControlIfSupported);
2558 # else
2559 # error "Invalid configuration"
2560 # endif
2561 }
2562 #endif
2563
2564 // Writes some components shared by periodic and synchronous profiles to
2565 // ActivePS's ProfileBuffer. (This should only be called from DoSyncSample()
2566 // and DoPeriodicSample().)
2567 //
2568 // The grammar for entry sequences is in a comment above
2569 // ProfileBuffer::StreamSamplesToJSON.
DoSharedSample(PSLockRef aLock,bool aIsSynchronous,RegisteredThread & aRegisteredThread,const Registers & aRegs,uint64_t aSamplePos,uint64_t aBufferRangeStart,ProfileBuffer & aBuffer,StackCaptureOptions aCaptureOptions=StackCaptureOptions::Full)2570 static inline void DoSharedSample(
2571 PSLockRef aLock, bool aIsSynchronous, RegisteredThread& aRegisteredThread,
2572 const Registers& aRegs, uint64_t aSamplePos, uint64_t aBufferRangeStart,
2573 ProfileBuffer& aBuffer,
2574 StackCaptureOptions aCaptureOptions = StackCaptureOptions::Full) {
2575 // WARNING: this function runs within the profiler's "critical section".
2576
2577 MOZ_ASSERT(!aBuffer.IsThreadSafe(),
2578 "Mutexes cannot be used inside this critical section");
2579
2580 MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
2581
2582 ProfileBufferCollector collector(aBuffer, aSamplePos, aBufferRangeStart);
2583 JsFrameBuffer& jsFrames = CorePS::JsFrames(aLock);
2584 StackWalkControl* stackWalkControlIfSupported = nullptr;
2585 #if defined(HAVE_NATIVE_UNWIND)
2586 const bool captureNative = ActivePS::FeatureStackWalk(aLock) &&
2587 aCaptureOptions == StackCaptureOptions::Full;
2588 StackWalkControl stackWalkControl;
2589 if constexpr (StackWalkControl::scIsSupported) {
2590 if (captureNative) {
2591 stackWalkControlIfSupported = &stackWalkControl;
2592 }
2593 }
2594 #endif // defined(HAVE_NATIVE_UNWIND)
2595 const uint32_t jsFramesCount =
2596 ExtractJsFrames(aIsSynchronous, aRegisteredThread, aRegs, collector,
2597 jsFrames, stackWalkControlIfSupported);
2598 NativeStack nativeStack;
2599 #if defined(HAVE_NATIVE_UNWIND)
2600 if (captureNative) {
2601 DoNativeBacktrace(aLock, aRegisteredThread, aRegs, nativeStack,
2602 stackWalkControlIfSupported);
2603
2604 MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
2605 aRegs, nativeStack, collector, jsFrames, jsFramesCount);
2606 } else
2607 #endif
2608 {
2609 MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
2610 aRegs, nativeStack, collector, jsFrames, jsFramesCount);
2611
2612 // We can't walk the whole native stack, but we can record the top frame.
2613 if (ActivePS::FeatureLeaf(aLock) &&
2614 aCaptureOptions == StackCaptureOptions::Full) {
2615 aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
2616 }
2617 }
2618 }
2619
2620 // Writes the components of a synchronous sample to the given ProfileBuffer.
DoSyncSample(PSLockRef aLock,RegisteredThread & aRegisteredThread,const TimeStamp & aNow,const Registers & aRegs,ProfileBuffer & aBuffer,StackCaptureOptions aCaptureOptions)2621 static void DoSyncSample(PSLockRef aLock, RegisteredThread& aRegisteredThread,
2622 const TimeStamp& aNow, const Registers& aRegs,
2623 ProfileBuffer& aBuffer,
2624 StackCaptureOptions aCaptureOptions) {
2625 // WARNING: this function runs within the profiler's "critical section".
2626
2627 MOZ_ASSERT(aCaptureOptions != StackCaptureOptions::NoStack,
2628 "DoSyncSample should not be called when no capture is needed");
2629
2630 const uint64_t bufferRangeStart = aBuffer.BufferRangeStart();
2631
2632 const uint64_t samplePos =
2633 aBuffer.AddThreadIdEntry(aRegisteredThread.Info()->ThreadId());
2634
2635 TimeDuration delta = aNow - CorePS::ProcessStartTime();
2636 aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
2637
2638 DoSharedSample(aLock, /* aIsSynchronous = */ true, aRegisteredThread, aRegs,
2639 samplePos, bufferRangeStart, aBuffer, aCaptureOptions);
2640 }
2641
2642 // Writes the components of a periodic sample to ActivePS's ProfileBuffer.
2643 // The ThreadId entry is already written in the main ProfileBuffer, its location
2644 // is `aSamplePos`, we can write the rest to `aBuffer` (which may be different).
DoPeriodicSample(PSLockRef aLock,RegisteredThread & aRegisteredThread,const Registers & aRegs,uint64_t aSamplePos,uint64_t aBufferRangeStart,ProfileBuffer & aBuffer)2645 static inline void DoPeriodicSample(PSLockRef aLock,
2646 RegisteredThread& aRegisteredThread,
2647 const Registers& aRegs, uint64_t aSamplePos,
2648 uint64_t aBufferRangeStart,
2649 ProfileBuffer& aBuffer) {
2650 // WARNING: this function runs within the profiler's "critical section".
2651
2652 DoSharedSample(aLock, /* aIsSynchronous = */ false, aRegisteredThread, aRegs,
2653 aSamplePos, aBufferRangeStart, aBuffer);
2654 }
2655
2656 // END sampling/unwinding code
2657 ////////////////////////////////////////////////////////////////////////
2658
2659 ////////////////////////////////////////////////////////////////////////
2660 // BEGIN saving/streaming code
2661
2662 const static uint64_t kJS_MAX_SAFE_UINTEGER = +9007199254740991ULL;
2663
SafeJSInteger(uint64_t aValue)2664 static int64_t SafeJSInteger(uint64_t aValue) {
2665 return aValue <= kJS_MAX_SAFE_UINTEGER ? int64_t(aValue) : -1;
2666 }
2667
AddSharedLibraryInfoToStream(JSONWriter & aWriter,const SharedLibrary & aLib)2668 static void AddSharedLibraryInfoToStream(JSONWriter& aWriter,
2669 const SharedLibrary& aLib) {
2670 aWriter.StartObjectElement();
2671 aWriter.IntProperty("start", SafeJSInteger(aLib.GetStart()));
2672 aWriter.IntProperty("end", SafeJSInteger(aLib.GetEnd()));
2673 aWriter.IntProperty("offset", SafeJSInteger(aLib.GetOffset()));
2674 aWriter.StringProperty("name", NS_ConvertUTF16toUTF8(aLib.GetModuleName()));
2675 aWriter.StringProperty("path", NS_ConvertUTF16toUTF8(aLib.GetModulePath()));
2676 aWriter.StringProperty("debugName",
2677 NS_ConvertUTF16toUTF8(aLib.GetDebugName()));
2678 aWriter.StringProperty("debugPath",
2679 NS_ConvertUTF16toUTF8(aLib.GetDebugPath()));
2680 aWriter.StringProperty("breakpadId", aLib.GetBreakpadId());
2681 aWriter.StringProperty("arch", aLib.GetArch());
2682 aWriter.EndObject();
2683 }
2684
AppendSharedLibraries(JSONWriter & aWriter)2685 void AppendSharedLibraries(JSONWriter& aWriter) {
2686 SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
2687 info.SortByAddress();
2688 for (size_t i = 0; i < info.GetSize(); i++) {
2689 AddSharedLibraryInfoToStream(aWriter, info.GetEntry(i));
2690 }
2691 }
2692
StreamCategories(SpliceableJSONWriter & aWriter)2693 static void StreamCategories(SpliceableJSONWriter& aWriter) {
2694 // Same order as ProfilingCategory. Format:
2695 // [
2696 // {
2697 // name: "Idle",
2698 // color: "transparent",
2699 // subcategories: ["Other"],
2700 // },
2701 // {
2702 // name: "Other",
2703 // color: "grey",
2704 // subcategories: [
2705 // "JSM loading",
2706 // "Subprocess launching",
2707 // "DLL loading"
2708 // ]
2709 // },
2710 // ...
2711 // ]
2712
2713 #define CATEGORY_JSON_BEGIN_CATEGORY(name, labelAsString, color) \
2714 aWriter.Start(); \
2715 aWriter.StringProperty("name", labelAsString); \
2716 aWriter.StringProperty("color", color); \
2717 aWriter.StartArrayProperty("subcategories");
2718 #define CATEGORY_JSON_SUBCATEGORY(supercategory, name, labelAsString) \
2719 aWriter.StringElement(labelAsString);
2720 #define CATEGORY_JSON_END_CATEGORY \
2721 aWriter.EndArray(); \
2722 aWriter.EndObject();
2723
2724 MOZ_PROFILING_CATEGORY_LIST(CATEGORY_JSON_BEGIN_CATEGORY,
2725 CATEGORY_JSON_SUBCATEGORY,
2726 CATEGORY_JSON_END_CATEGORY)
2727
2728 #undef CATEGORY_JSON_BEGIN_CATEGORY
2729 #undef CATEGORY_JSON_SUBCATEGORY
2730 #undef CATEGORY_JSON_END_CATEGORY
2731 }
2732
StreamMarkerSchema(SpliceableJSONWriter & aWriter)2733 static void StreamMarkerSchema(SpliceableJSONWriter& aWriter) {
2734 // Get an array view with all registered marker-type-specific functions.
2735 Span<const base_profiler_markers_detail::Streaming::MarkerTypeFunctions>
2736 markerTypeFunctionsArray =
2737 base_profiler_markers_detail::Streaming::MarkerTypeFunctionsArray();
2738 // List of streamed marker names, this is used to spot duplicates.
2739 std::set<std::string> names;
2740 // Stream the display schema for each different one. (Duplications may come
2741 // from the same code potentially living in different libraries.)
2742 for (const auto& markerTypeFunctions : markerTypeFunctionsArray) {
2743 auto name = markerTypeFunctions.mMarkerTypeNameFunction();
2744 // std::set.insert(T&&) returns a pair, its `second` is true if the element
2745 // was actually inserted (i.e., it was not there yet.)
2746 const bool didInsert =
2747 names.insert(std::string(name.data(), name.size())).second;
2748 if (didInsert) {
2749 markerTypeFunctions.mMarkerSchemaFunction().Stream(aWriter, name);
2750 }
2751 }
2752 }
2753
2754 // Some meta information that is better recorded before streaming the profile.
2755 // This is *not* intended to be cached, as some values could change between
2756 // profiling sessions.
2757 struct PreRecordedMetaInformation {
2758 bool mAsyncStacks;
2759
2760 // This struct should only live on the stack, so it's fine to use Auto
2761 // strings.
2762 nsAutoCString mHttpPlatform;
2763 nsAutoCString mHttpOscpu;
2764 nsAutoCString mHttpMisc;
2765
2766 nsAutoCString mRuntimeABI;
2767 nsAutoCString mRuntimeToolkit;
2768
2769 nsAutoCString mAppInfoProduct;
2770 nsAutoCString mAppInfoAppBuildID;
2771 nsAutoCString mAppInfoSourceURL;
2772
2773 int32_t mProcessInfoCpuCount;
2774 int32_t mProcessInfoCpuCores;
2775 };
2776
2777 // This function should be called out of the profiler lock.
2778 // It gathers non-trivial data that doesn't require the profiler to stop, or for
2779 // which the request could theoretically deadlock if the profiler is locked.
PreRecordMetaInformation()2780 static PreRecordedMetaInformation PreRecordMetaInformation() {
2781 gPSMutex.AssertCurrentThreadDoesNotOwn();
2782
2783 PreRecordedMetaInformation info = {}; // Aggregate-init all fields.
2784
2785 if (!NS_IsMainThread()) {
2786 // Leave these properties out if we're not on the main thread.
2787 // At the moment, the only case in which this function is called on a
2788 // background thread is if we're in a content process and are going to
2789 // send this profile to the parent process. In that case, the parent
2790 // process profile's "meta" object already has the rest of the properties,
2791 // and the parent process profile is dumped on that process's main thread.
2792 return info;
2793 }
2794
2795 info.mAsyncStacks = Preferences::GetBool("javascript.options.asyncstack");
2796
2797 nsresult res;
2798
2799 if (nsCOMPtr<nsIHttpProtocolHandler> http =
2800 do_GetService(NS_NETWORK_PROTOCOL_CONTRACTID_PREFIX "http", &res);
2801 !NS_FAILED(res) && http) {
2802 Unused << http->GetPlatform(info.mHttpPlatform);
2803 Unused << http->GetOscpu(info.mHttpOscpu);
2804 Unused << http->GetMisc(info.mHttpMisc);
2805 }
2806
2807 if (nsCOMPtr<nsIXULRuntime> runtime =
2808 do_GetService("@mozilla.org/xre/runtime;1");
2809 runtime) {
2810 Unused << runtime->GetXPCOMABI(info.mRuntimeABI);
2811 Unused << runtime->GetWidgetToolkit(info.mRuntimeToolkit);
2812 }
2813
2814 if (nsCOMPtr<nsIXULAppInfo> appInfo =
2815 do_GetService("@mozilla.org/xre/app-info;1");
2816 appInfo) {
2817 Unused << appInfo->GetName(info.mAppInfoProduct);
2818 Unused << appInfo->GetAppBuildID(info.mAppInfoAppBuildID);
2819 Unused << appInfo->GetSourceURL(info.mAppInfoSourceURL);
2820 }
2821
2822 ProcessInfo processInfo = {}; // Aggregate-init all fields to false/zeroes.
2823 if (NS_SUCCEEDED(CollectProcessInfo(processInfo))) {
2824 info.mProcessInfoCpuCount = processInfo.cpuCount;
2825 info.mProcessInfoCpuCores = processInfo.cpuCores;
2826 }
2827
2828 return info;
2829 }
2830
2831 // Implemented in platform-specific cpps, to add object properties describing
2832 // the units of CPU measurements in samples.
2833 static void StreamMetaPlatformSampleUnits(PSLockRef aLock,
2834 SpliceableJSONWriter& aWriter);
2835
StreamMetaJSCustomObject(PSLockRef aLock,SpliceableJSONWriter & aWriter,bool aIsShuttingDown,const PreRecordedMetaInformation & aPreRecordedMetaInformation)2836 static void StreamMetaJSCustomObject(
2837 PSLockRef aLock, SpliceableJSONWriter& aWriter, bool aIsShuttingDown,
2838 const PreRecordedMetaInformation& aPreRecordedMetaInformation) {
2839 MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
2840
2841 aWriter.IntProperty("version", 23);
2842
2843 // The "startTime" field holds the number of milliseconds since midnight
2844 // January 1, 1970 GMT. This grotty code computes (Now - (Now -
2845 // ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form.
2846 TimeDuration delta = TimeStamp::NowUnfuzzed() - CorePS::ProcessStartTime();
2847 aWriter.DoubleProperty(
2848 "startTime",
2849 static_cast<double>(PR_Now() / 1000.0 - delta.ToMilliseconds()));
2850
2851 // Write the shutdownTime field. Unlike startTime, shutdownTime is not an
2852 // absolute time stamp: It's relative to startTime. This is consistent with
2853 // all other (non-"startTime") times anywhere in the profile JSON.
2854 if (aIsShuttingDown) {
2855 aWriter.DoubleProperty("shutdownTime", profiler_time());
2856 } else {
2857 aWriter.NullProperty("shutdownTime");
2858 }
2859
2860 aWriter.StartArrayProperty("categories");
2861 StreamCategories(aWriter);
2862 aWriter.EndArray();
2863
2864 aWriter.StartArrayProperty("markerSchema");
2865 StreamMarkerSchema(aWriter);
2866 aWriter.EndArray();
2867
2868 ActivePS::WriteActiveConfiguration(aLock, aWriter,
2869 MakeStringSpan("configuration"));
2870
2871 if (!NS_IsMainThread()) {
2872 // Leave the rest of the properties out if we're not on the main thread.
2873 // At the moment, the only case in which this function is called on a
2874 // background thread is if we're in a content process and are going to
2875 // send this profile to the parent process. In that case, the parent
2876 // process profile's "meta" object already has the rest of the properties,
2877 // and the parent process profile is dumped on that process's main thread.
2878 return;
2879 }
2880
2881 aWriter.DoubleProperty("interval", ActivePS::Interval(aLock));
2882 aWriter.IntProperty("stackwalk", ActivePS::FeatureStackWalk(aLock));
2883
2884 #ifdef DEBUG
2885 aWriter.IntProperty("debug", 1);
2886 #else
2887 aWriter.IntProperty("debug", 0);
2888 #endif
2889
2890 aWriter.IntProperty("gcpoison", JS::IsGCPoisoning() ? 1 : 0);
2891
2892 aWriter.IntProperty("asyncstack", aPreRecordedMetaInformation.mAsyncStacks);
2893
2894 aWriter.IntProperty("processType", XRE_GetProcessType());
2895
2896 aWriter.StringProperty("updateChannel", MOZ_STRINGIFY(MOZ_UPDATE_CHANNEL));
2897
2898 if (!aPreRecordedMetaInformation.mHttpPlatform.IsEmpty()) {
2899 aWriter.StringProperty("platform",
2900 aPreRecordedMetaInformation.mHttpPlatform);
2901 }
2902 if (!aPreRecordedMetaInformation.mHttpOscpu.IsEmpty()) {
2903 aWriter.StringProperty("oscpu", aPreRecordedMetaInformation.mHttpOscpu);
2904 }
2905 if (!aPreRecordedMetaInformation.mHttpMisc.IsEmpty()) {
2906 aWriter.StringProperty("misc", aPreRecordedMetaInformation.mHttpMisc);
2907 }
2908
2909 if (!aPreRecordedMetaInformation.mRuntimeABI.IsEmpty()) {
2910 aWriter.StringProperty("abi", aPreRecordedMetaInformation.mRuntimeABI);
2911 }
2912 if (!aPreRecordedMetaInformation.mRuntimeToolkit.IsEmpty()) {
2913 aWriter.StringProperty("toolkit",
2914 aPreRecordedMetaInformation.mRuntimeToolkit);
2915 }
2916
2917 if (!aPreRecordedMetaInformation.mAppInfoProduct.IsEmpty()) {
2918 aWriter.StringProperty("product",
2919 aPreRecordedMetaInformation.mAppInfoProduct);
2920 }
2921 if (!aPreRecordedMetaInformation.mAppInfoAppBuildID.IsEmpty()) {
2922 aWriter.StringProperty("appBuildID",
2923 aPreRecordedMetaInformation.mAppInfoAppBuildID);
2924 }
2925 if (!aPreRecordedMetaInformation.mAppInfoSourceURL.IsEmpty()) {
2926 aWriter.StringProperty("sourceURL",
2927 aPreRecordedMetaInformation.mAppInfoSourceURL);
2928 }
2929
2930 if (aPreRecordedMetaInformation.mProcessInfoCpuCores > 0) {
2931 aWriter.IntProperty("physicalCPUs",
2932 aPreRecordedMetaInformation.mProcessInfoCpuCores);
2933 }
2934 if (aPreRecordedMetaInformation.mProcessInfoCpuCount > 0) {
2935 aWriter.IntProperty("logicalCPUs",
2936 aPreRecordedMetaInformation.mProcessInfoCpuCount);
2937 }
2938
2939 #if defined(GP_OS_android)
2940 jni::String::LocalRef deviceInformation =
2941 java::GeckoJavaSampler::GetDeviceInformation();
2942 aWriter.StringProperty("device", deviceInformation->ToCString());
2943 #endif
2944
2945 aWriter.StartObjectProperty("sampleUnits");
2946 {
2947 aWriter.StringProperty("time", "ms");
2948 aWriter.StringProperty("eventDelay", "ms");
2949 StreamMetaPlatformSampleUnits(aLock, aWriter);
2950 }
2951 aWriter.EndObject();
2952
2953 // We should avoid collecting extension metadata for profiler when there is no
2954 // observer service, since a ExtensionPolicyService could not be created then.
2955 if (nsCOMPtr<nsIObserverService> os = services::GetObserverService()) {
2956 aWriter.StartObjectProperty("extensions");
2957 {
2958 {
2959 JSONSchemaWriter schema(aWriter);
2960 schema.WriteField("id");
2961 schema.WriteField("name");
2962 schema.WriteField("baseURL");
2963 }
2964
2965 aWriter.StartArrayProperty("data");
2966 {
2967 nsTArray<RefPtr<WebExtensionPolicy>> exts;
2968 ExtensionPolicyService::GetSingleton().GetAll(exts);
2969
2970 for (auto& ext : exts) {
2971 aWriter.StartArrayElement(JSONWriter::SingleLineStyle);
2972
2973 nsAutoString id;
2974 ext->GetId(id);
2975 aWriter.StringElement(NS_ConvertUTF16toUTF8(id));
2976
2977 aWriter.StringElement(NS_ConvertUTF16toUTF8(ext->Name()));
2978
2979 auto url = ext->GetURL(u""_ns);
2980 if (url.isOk()) {
2981 aWriter.StringElement(NS_ConvertUTF16toUTF8(url.unwrap()));
2982 }
2983
2984 aWriter.EndArray();
2985 }
2986 }
2987 aWriter.EndArray();
2988 }
2989 aWriter.EndObject();
2990 }
2991 }
2992
StreamPages(PSLockRef aLock,SpliceableJSONWriter & aWriter)2993 static void StreamPages(PSLockRef aLock, SpliceableJSONWriter& aWriter) {
2994 MOZ_RELEASE_ASSERT(CorePS::Exists());
2995 ActivePS::DiscardExpiredPages(aLock);
2996 for (const auto& page : ActivePS::ProfiledPages(aLock)) {
2997 page->StreamJSON(aWriter);
2998 }
2999 }
3000
3001 #if defined(GP_OS_android)
3002 template <int N>
StartsWith(const nsACString & string,const char (& prefix)[N])3003 static bool StartsWith(const nsACString& string, const char (&prefix)[N]) {
3004 if (N - 1 > string.Length()) {
3005 return false;
3006 }
3007 return memcmp(string.Data(), prefix, N - 1) == 0;
3008 }
3009
InferJavaCategory(nsACString & aName)3010 static JS::ProfilingCategoryPair InferJavaCategory(nsACString& aName) {
3011 if (aName.EqualsLiteral("android.os.MessageQueue.nativePollOnce()")) {
3012 return JS::ProfilingCategoryPair::IDLE;
3013 }
3014 if (aName.EqualsLiteral("java.lang.Object.wait()")) {
3015 return JS::ProfilingCategoryPair::JAVA_BLOCKED;
3016 }
3017 if (StartsWith(aName, "android.") || StartsWith(aName, "com.android.")) {
3018 return JS::ProfilingCategoryPair::JAVA_ANDROID;
3019 }
3020 if (StartsWith(aName, "mozilla.") || StartsWith(aName, "org.mozilla.")) {
3021 return JS::ProfilingCategoryPair::JAVA_MOZILLA;
3022 }
3023 if (StartsWith(aName, "java.") || StartsWith(aName, "sun.") ||
3024 StartsWith(aName, "com.sun.")) {
3025 return JS::ProfilingCategoryPair::JAVA_LANGUAGE;
3026 }
3027 if (StartsWith(aName, "kotlin.") || StartsWith(aName, "kotlinx.")) {
3028 return JS::ProfilingCategoryPair::JAVA_KOTLIN;
3029 }
3030 if (StartsWith(aName, "androidx.")) {
3031 return JS::ProfilingCategoryPair::JAVA_ANDROIDX;
3032 }
3033 return JS::ProfilingCategoryPair::OTHER;
3034 }
3035
CollectJavaThreadProfileData(ProfileBuffer & aProfileBuffer)3036 static void CollectJavaThreadProfileData(ProfileBuffer& aProfileBuffer) {
3037 // locked_profiler_start uses sample count is 1000 for Java thread.
3038 // This entry size is enough now, but we might have to estimate it
3039 // if we can customize it
3040
3041 // Pass the samples
3042 // FIXME(bug 1618560): We are currently only profiling the Android UI thread.
3043 constexpr int threadId = 0;
3044 int sampleId = 0;
3045 while (true) {
3046 // Gets the data from the Android UI thread only.
3047 double sampleTime = java::GeckoJavaSampler::GetSampleTime(sampleId);
3048 if (sampleTime == 0.0) {
3049 break;
3050 }
3051
3052 aProfileBuffer.AddThreadIdEntry(threadId);
3053 aProfileBuffer.AddEntry(ProfileBufferEntry::Time(sampleTime));
3054 int frameId = 0;
3055 while (true) {
3056 jni::String::LocalRef frameName =
3057 java::GeckoJavaSampler::GetFrameName(sampleId, frameId++);
3058 if (!frameName) {
3059 break;
3060 }
3061 nsCString frameNameString = frameName->ToCString();
3062
3063 auto categoryPair = InferJavaCategory(frameNameString);
3064 aProfileBuffer.CollectCodeLocation("", frameNameString.get(), 0, 0,
3065 Nothing(), Nothing(),
3066 Some(categoryPair));
3067 }
3068 sampleId++;
3069 }
3070
3071 // Pass the markers now
3072 while (true) {
3073 // Gets the data from the Android UI thread only.
3074 java::GeckoJavaSampler::Marker::LocalRef marker =
3075 java::GeckoJavaSampler::PollNextMarker();
3076 if (!marker) {
3077 // All markers are transferred.
3078 break;
3079 }
3080
3081 // Get all the marker information from the Java thread using JNI.
3082 nsCString markerName = marker->GetMarkerName()->ToCString();
3083 jni::String::LocalRef text = marker->GetMarkerText();
3084 TimeStamp startTime =
3085 CorePS::ProcessStartTime() +
3086 TimeDuration::FromMilliseconds(marker->GetStartTime());
3087
3088 double endTimeMs = marker->GetEndTime();
3089 // A marker can be either a duration with start and end, or a point in time
3090 // with only startTime. If endTime is 0, this means it's a point in time.
3091 TimeStamp endTime = endTimeMs == 0
3092 ? startTime
3093 : CorePS::ProcessStartTime() +
3094 TimeDuration::FromMilliseconds(endTimeMs);
3095 MarkerTiming timing = endTimeMs == 0
3096 ? MarkerTiming::InstantAt(startTime)
3097 : MarkerTiming::Interval(startTime, endTime);
3098
3099 if (!text) {
3100 // This marker doesn't have a text.
3101 AddMarkerToBuffer(aProfileBuffer.UnderlyingChunkedBuffer(), markerName,
3102 geckoprofiler::category::JAVA_ANDROID,
3103 {MarkerThreadId(threadId), std::move(timing)});
3104 } else {
3105 // This marker has a text.
3106 AddMarkerToBuffer(aProfileBuffer.UnderlyingChunkedBuffer(), markerName,
3107 geckoprofiler::category::JAVA_ANDROID,
3108 {MarkerThreadId(threadId), std::move(timing)},
3109 geckoprofiler::markers::TextMarker{},
3110 text->ToCString());
3111 }
3112 }
3113 }
3114 #endif
3115
3116 UniquePtr<ProfilerCodeAddressService>
profiler_code_address_service_for_presymbolication()3117 profiler_code_address_service_for_presymbolication() {
3118 static const bool preSymbolicate = []() {
3119 const char* symbolicate = getenv("MOZ_PROFILER_SYMBOLICATE");
3120 return symbolicate && symbolicate[0] != '\0';
3121 }();
3122 return preSymbolicate ? MakeUnique<ProfilerCodeAddressService>() : nullptr;
3123 }
3124
locked_profiler_stream_json_for_this_process(PSLockRef aLock,SpliceableJSONWriter & aWriter,double aSinceTime,const PreRecordedMetaInformation & aPreRecordedMetaInformation,bool aIsShuttingDown,ProfilerCodeAddressService * aService)3125 static void locked_profiler_stream_json_for_this_process(
3126 PSLockRef aLock, SpliceableJSONWriter& aWriter, double aSinceTime,
3127 const PreRecordedMetaInformation& aPreRecordedMetaInformation,
3128 bool aIsShuttingDown, ProfilerCodeAddressService* aService) {
3129 LOG("locked_profiler_stream_json_for_this_process");
3130
3131 MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
3132
3133 AUTO_PROFILER_STATS(locked_profiler_stream_json_for_this_process);
3134
3135 const double collectionStartMs = profiler_time();
3136
3137 ProfileBuffer& buffer = ActivePS::Buffer(aLock);
3138
3139 // If there is a set "Window length", discard older data.
3140 Maybe<double> durationS = ActivePS::Duration(aLock);
3141 if (durationS.isSome()) {
3142 const double durationStartMs = collectionStartMs - *durationS * 1000;
3143 buffer.DiscardSamplesBeforeTime(durationStartMs);
3144 }
3145
3146 // Put shared library info
3147 aWriter.StartArrayProperty("libs");
3148 AppendSharedLibraries(aWriter);
3149 aWriter.EndArray();
3150
3151 // Put meta data
3152 aWriter.StartObjectProperty("meta");
3153 {
3154 StreamMetaJSCustomObject(aLock, aWriter, aIsShuttingDown,
3155 aPreRecordedMetaInformation);
3156 }
3157 aWriter.EndObject();
3158
3159 // Put page data
3160 aWriter.StartArrayProperty("pages");
3161 { StreamPages(aLock, aWriter); }
3162 aWriter.EndArray();
3163
3164 buffer.StreamProfilerOverheadToJSON(aWriter, CorePS::ProcessStartTime(),
3165 aSinceTime);
3166 buffer.StreamCountersToJSON(aWriter, CorePS::ProcessStartTime(), aSinceTime);
3167
3168 // Lists the samples for each thread profile
3169 aWriter.StartArrayProperty("threads");
3170 {
3171 ActivePS::DiscardExpiredDeadProfiledThreads(aLock);
3172 Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> threads =
3173 ActivePS::ProfiledThreads(aLock);
3174 for (auto& thread : threads) {
3175 RegisteredThread* registeredThread = thread.first;
3176 JSContext* cx =
3177 registeredThread ? registeredThread->GetJSContext() : nullptr;
3178 ProfiledThreadData* profiledThreadData = thread.second;
3179 profiledThreadData->StreamJSON(
3180 buffer, cx, aWriter, CorePS::ProcessName(aLock),
3181 CorePS::ETLDplus1(aLock), CorePS::ProcessStartTime(), aSinceTime,
3182 ActivePS::FeatureJSTracer(aLock), aService);
3183 }
3184
3185 #if defined(GP_OS_android)
3186 if (ActivePS::FeatureJava(aLock)) {
3187 // We are allocating it chunk by chunk. So this will not allocate 64 MiB
3188 // at once. This size should be more than enough for java threads.
3189 // This buffer is being created for each process but Android has
3190 // relatively less processes compared to desktop, so it's okay here.
3191 mozilla::ProfileBufferChunkManagerWithLocalLimit chunkManager(
3192 64 * 1024 * 1024, 1024 * 1024);
3193 ProfileChunkedBuffer bufferManager(
3194 ProfileChunkedBuffer::ThreadSafety::WithoutMutex, chunkManager);
3195 ProfileBuffer javaBuffer(bufferManager);
3196 CollectJavaThreadProfileData(javaBuffer);
3197
3198 // Set the thread id of the Android UI thread to be 0.
3199 // We are profiling the Android UI thread twice: Both from the C++ side
3200 // (as a regular C++ profiled thread with the name "AndroidUI"), and from
3201 // the Java side. The thread's actual ID is mozilla::jni::GetUIThreadId(),
3202 // but since we're using that ID for the C++ side, we need to pick another
3203 // tid that doesn't conflict with it for the Java side. So we just use 0.
3204 // Once we add support for profiling of other java threads, we'll have to
3205 // get their thread id and name via JNI.
3206 RefPtr<ThreadInfo> threadInfo = new ThreadInfo(
3207 "AndroidUI (JVM)", 0, false, CorePS::ProcessStartTime());
3208 ProfiledThreadData profiledThreadData(threadInfo, nullptr);
3209 profiledThreadData.StreamJSON(
3210 javaBuffer, nullptr, aWriter, CorePS::ProcessName(aLock),
3211 CorePS::ETLDplus1(aLock), CorePS::ProcessStartTime(), aSinceTime,
3212 ActivePS::FeatureJSTracer(aLock), nullptr);
3213 }
3214 #endif
3215
3216 UniquePtr<char[]> baseProfileThreads =
3217 ActivePS::MoveBaseProfileThreads(aLock);
3218 if (baseProfileThreads) {
3219 aWriter.Splice(MakeStringSpan(baseProfileThreads.get()));
3220 }
3221 }
3222 aWriter.EndArray();
3223
3224 if (ActivePS::FeatureJSTracer(aLock)) {
3225 aWriter.StartArrayProperty("jsTracerDictionary");
3226 {
3227 JS::AutoTraceLoggerLockGuard lockGuard;
3228 // Collect Event Dictionary
3229 JS::TraceLoggerDictionaryBuffer collectionBuffer(lockGuard);
3230 while (collectionBuffer.NextChunk()) {
3231 aWriter.StringElement(
3232 MakeStringSpan(collectionBuffer.internalBuffer()));
3233 }
3234 }
3235 aWriter.EndArray();
3236 }
3237
3238 aWriter.StartArrayProperty("pausedRanges");
3239 { buffer.StreamPausedRangesToJSON(aWriter, aSinceTime); }
3240 aWriter.EndArray();
3241
3242 const double collectionEndMs = profiler_time();
3243
3244 // Record timestamps for the collection into the buffer, so that consumers
3245 // know why we didn't collect any samples for its duration.
3246 // We put these entries into the buffer after we've collected the profile,
3247 // so they'll be visible for the *next* profile collection (if they haven't
3248 // been overwritten due to buffer wraparound by then).
3249 buffer.AddEntry(ProfileBufferEntry::CollectionStart(collectionStartMs));
3250 buffer.AddEntry(ProfileBufferEntry::CollectionEnd(collectionEndMs));
3251 }
3252
profiler_stream_json_for_this_process(SpliceableJSONWriter & aWriter,double aSinceTime,bool aIsShuttingDown,ProfilerCodeAddressService * aService)3253 bool profiler_stream_json_for_this_process(
3254 SpliceableJSONWriter& aWriter, double aSinceTime, bool aIsShuttingDown,
3255 ProfilerCodeAddressService* aService) {
3256 LOG("profiler_stream_json_for_this_process");
3257
3258 MOZ_RELEASE_ASSERT(CorePS::Exists());
3259
3260 const auto preRecordedMetaInformation = PreRecordMetaInformation();
3261
3262 if (profiler_is_active()) {
3263 invoke_profiler_state_change_callbacks(ProfilingState::GeneratingProfile);
3264 }
3265
3266 PSAutoLock lock(gPSMutex);
3267
3268 if (!ActivePS::Exists(lock)) {
3269 return false;
3270 }
3271
3272 locked_profiler_stream_json_for_this_process(lock, aWriter, aSinceTime,
3273 preRecordedMetaInformation,
3274 aIsShuttingDown, aService);
3275 return true;
3276 }
3277
3278 // END saving/streaming code
3279 ////////////////////////////////////////////////////////////////////////
3280
FeatureCategory(uint32_t aFeature)3281 static char FeatureCategory(uint32_t aFeature) {
3282 if (aFeature & DefaultFeatures()) {
3283 if (aFeature & AvailableFeatures()) {
3284 return 'D';
3285 }
3286 return 'd';
3287 }
3288
3289 if (aFeature & StartupExtraDefaultFeatures()) {
3290 if (aFeature & AvailableFeatures()) {
3291 return 'S';
3292 }
3293 return 's';
3294 }
3295
3296 if (aFeature & AvailableFeatures()) {
3297 return '-';
3298 }
3299 return 'x';
3300 }
3301
3302 // Doesn't exist if aExitCode is 0
PrintUsageThenExit(int aExitCode)3303 static void PrintUsageThenExit(int aExitCode) {
3304 MOZ_RELEASE_ASSERT(NS_IsMainThread());
3305
3306 printf(
3307 "\n"
3308 "Profiler environment variable usage:\n"
3309 "\n"
3310 " MOZ_PROFILER_HELP\n"
3311 " If set to any value, prints this message.\n"
3312 " Use MOZ_BASE_PROFILER_HELP for BaseProfiler help.\n"
3313 "\n"
3314 " MOZ_LOG\n"
3315 " Enables logging. The levels of logging available are\n"
3316 " 'prof:3' (least verbose), 'prof:4', 'prof:5' (most verbose).\n"
3317 "\n"
3318 " MOZ_PROFILER_STARTUP\n"
3319 " If set to any value other than '' or '0'/'N'/'n', starts the\n"
3320 " profiler immediately on start-up.\n"
3321 " Useful if you want profile code that runs very early.\n"
3322 "\n"
3323 " MOZ_PROFILER_STARTUP_ENTRIES=<%u..%u>\n"
3324 " If MOZ_PROFILER_STARTUP is set, specifies the number of entries per\n"
3325 " process in the profiler's circular buffer when the profiler is first\n"
3326 " started.\n"
3327 " If unset, the platform default is used:\n"
3328 " %u entries per process, or %u when MOZ_PROFILER_STARTUP is set.\n"
3329 " (%u bytes per entry -> %u or %u total bytes per process)\n"
3330 "\n"
3331 " MOZ_PROFILER_STARTUP_DURATION=<1..>\n"
3332 " If MOZ_PROFILER_STARTUP is set, specifies the maximum life time of\n"
3333 " entries in the the profiler's circular buffer when the profiler is\n"
3334 " first started, in seconds.\n"
3335 " If unset, the life time of the entries will only be restricted by\n"
3336 " MOZ_PROFILER_STARTUP_ENTRIES (or its default value), and no\n"
3337 " additional time duration restriction will be applied.\n"
3338 "\n"
3339 " MOZ_PROFILER_STARTUP_INTERVAL=<1..%d>\n"
3340 " If MOZ_PROFILER_STARTUP is set, specifies the sample interval,\n"
3341 " measured in milliseconds, when the profiler is first started.\n"
3342 " If unset, the platform default is used.\n"
3343 "\n"
3344 " MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=<Number>\n"
3345 " If MOZ_PROFILER_STARTUP is set, specifies the profiling features, as\n"
3346 " the integer value of the features bitfield.\n"
3347 " If unset, the value from MOZ_PROFILER_STARTUP_FEATURES is used.\n"
3348 "\n"
3349 " MOZ_PROFILER_STARTUP_FEATURES=<Features>\n"
3350 " If MOZ_PROFILER_STARTUP is set, specifies the profiling features, as\n"
3351 " a comma-separated list of strings.\n"
3352 " Ignored if MOZ_PROFILER_STARTUP_FEATURES_BITFIELD is set.\n"
3353 " If unset, the platform default is used.\n"
3354 "\n"
3355 " Features: (x=unavailable, D/d=default/unavailable,\n"
3356 " S/s=MOZ_PROFILER_STARTUP extra default/unavailable)\n",
3357 unsigned(ActivePS::scMinimumBufferEntries),
3358 unsigned(ActivePS::scMaximumBufferEntries),
3359 unsigned(PROFILER_DEFAULT_ENTRIES.Value()),
3360 unsigned(PROFILER_DEFAULT_STARTUP_ENTRIES.Value()),
3361 unsigned(scBytesPerEntry),
3362 unsigned(PROFILER_DEFAULT_ENTRIES.Value() * scBytesPerEntry),
3363 unsigned(PROFILER_DEFAULT_STARTUP_ENTRIES.Value() * scBytesPerEntry),
3364 PROFILER_MAX_INTERVAL);
3365
3366 #define PRINT_FEATURE(n_, str_, Name_, desc_) \
3367 printf(" %c %7u: \"%s\" (%s)\n", FeatureCategory(ProfilerFeature::Name_), \
3368 ProfilerFeature::Name_, str_, desc_);
3369
3370 PROFILER_FOR_EACH_FEATURE(PRINT_FEATURE)
3371
3372 #undef PRINT_FEATURE
3373
3374 printf(
3375 " - \"default\" (All above D+S defaults)\n"
3376 "\n"
3377 " MOZ_PROFILER_STARTUP_FILTERS=<Filters>\n"
3378 " If MOZ_PROFILER_STARTUP is set, specifies the thread filters, as a\n"
3379 " comma-separated list of strings. A given thread will be sampled if\n"
3380 " any of the filters is a case-insensitive substring of the thread\n"
3381 " name. If unset, a default is used.\n"
3382 "\n"
3383 " MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID=<Number>\n"
3384 " This variable is used to propagate the activeTabID of\n"
3385 " the profiler init params to subprocesses.\n"
3386 "\n"
3387 " MOZ_PROFILER_SHUTDOWN\n"
3388 " If set, the profiler saves a profile to the named file on shutdown.\n"
3389 "\n"
3390 " MOZ_PROFILER_SYMBOLICATE\n"
3391 " If set, the profiler will pre-symbolicate profiles.\n"
3392 " *Note* This will add a significant pause when gathering data, and\n"
3393 " is intended mainly for local development.\n"
3394 "\n"
3395 " MOZ_PROFILER_LUL_TEST\n"
3396 " If set to any value, runs LUL unit tests at startup.\n"
3397 "\n"
3398 " This platform %s native unwinding.\n"
3399 "\n",
3400 #if defined(HAVE_NATIVE_UNWIND)
3401 "supports"
3402 #else
3403 "does not support"
3404 #endif
3405 );
3406
3407 if (aExitCode != 0) {
3408 exit(aExitCode);
3409 }
3410 }
3411
3412 ////////////////////////////////////////////////////////////////////////
3413 // BEGIN Sampler
3414
3415 #if defined(GP_OS_linux) || defined(GP_OS_android)
3416 struct SigHandlerCoordinator;
3417 #endif
3418
3419 // Sampler performs setup and teardown of the state required to sample with the
3420 // profiler. Sampler may exist when ActivePS is not present.
3421 //
3422 // SuspendAndSampleAndResumeThread must only be called from a single thread,
3423 // and must not sample the thread it is being called from. A separate Sampler
3424 // instance must be used for each thread which wants to capture samples.
3425
3426 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
3427 //
3428 // With the exception of SamplerThread, all Sampler objects must be Disable-d
3429 // before releasing the lock which was used to create them. This avoids races
3430 // on linux with the SIGPROF signal handler.
3431
3432 class Sampler {
3433 public:
3434 // Sets up the profiler such that it can begin sampling.
3435 explicit Sampler(PSLockRef aLock);
3436
3437 // Disable the sampler, restoring it to its previous state. This must be
3438 // called once, and only once, before the Sampler is destroyed.
3439 void Disable(PSLockRef aLock);
3440
3441 // This method suspends and resumes the samplee thread. It calls the passed-in
3442 // function-like object aProcessRegs (passing it a populated |const
3443 // Registers&| arg) while the samplee thread is suspended. Note that
3444 // the aProcessRegs function must be very careful not to do anything that
3445 // requires a lock, since we may have interrupted the thread at any point.
3446 // As an example, you can't call TimeStamp::Now() since on windows it
3447 // takes a lock on the performance counter.
3448 //
3449 // Func must be a function-like object of type `void()`.
3450 template <typename Func>
3451 void SuspendAndSampleAndResumeThread(
3452 PSLockRef aLock, const RegisteredThread& aRegisteredThread,
3453 const TimeStamp& aNow, const Func& aProcessRegs);
3454
3455 private:
3456 #if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
3457 // Used to restore the SIGPROF handler when ours is removed.
3458 struct sigaction mOldSigprofHandler;
3459
3460 // This process' ID. Needed as an argument for tgkill in
3461 // SuspendAndSampleAndResumeThread.
3462 int mMyPid;
3463
3464 // The sampler thread's ID. Used to assert that it is not sampling itself,
3465 // which would lead to deadlock.
3466 int mSamplerTid;
3467
3468 public:
3469 // This is the one-and-only variable used to communicate between the sampler
3470 // thread and the samplee thread's signal handler. It's static because the
3471 // samplee thread's signal handler is static.
3472 static struct SigHandlerCoordinator* sSigHandlerCoordinator;
3473 #endif
3474 };
3475
3476 // END Sampler
3477 ////////////////////////////////////////////////////////////////////////
3478
3479 // Platform-specific function that retrieves per-thread CPU measurements.
3480 static RunningTimes GetThreadRunningTimesDiff(
3481 PSLockRef aLock, const RegisteredThread& aRegisteredThread);
3482 static void ClearThreadRunningTimes(PSLockRef aLock,
3483 const RegisteredThread& aRegisteredThread);
3484
3485 // Template function to be used by `GetThreadRunningTimesDiff()` (unless some
3486 // platform has a better way to achieve this).
3487 // It help perform CPU measurements and tie them to a timestamp, such that the
3488 // measurements and timestamp are very close together.
3489 // This is necessary, because the relative CPU usage is computed by dividing
3490 // consecutive CPU measurements by their timestamp difference; if there was an
3491 // unexpected big gap, it could skew this computation and produce impossible
3492 // spikes that would hide the rest of the data. See bug 1685938 for more info.
3493 // Note that this may call the measurement function more than once; it is
3494 // assumed to normally be fast.
3495 // This was verified experimentally, but there is currently no regression
3496 // testing for it; see follow-up bug 1687402.
3497 template <typename GetCPURunningTimesFunction>
GetRunningTimesWithTightTimestamp(GetCPURunningTimesFunction && aGetCPURunningTimesFunction)3498 RunningTimes GetRunningTimesWithTightTimestamp(
3499 GetCPURunningTimesFunction&& aGetCPURunningTimesFunction) {
3500 // Once per process, compute a threshold over which running times and their
3501 // timestamp is considered too far apart.
3502 static const TimeDuration scMaxRunningTimesReadDuration = [&]() {
3503 // Run the main CPU measurements + timestamp a number of times and capture
3504 // their durations.
3505 constexpr int loops = 128;
3506 TimeDuration durations[loops];
3507 RunningTimes runningTimes;
3508 TimeStamp before = TimeStamp::NowUnfuzzed();
3509 for (int i = 0; i < loops; ++i) {
3510 AUTO_PROFILER_STATS(GetRunningTimes_MaxRunningTimesReadDuration);
3511 aGetCPURunningTimesFunction(runningTimes);
3512 const TimeStamp after = TimeStamp::NowUnfuzzed();
3513 durations[i] = after - before;
3514 before = after;
3515 }
3516 // Move median duration to the middle.
3517 std::nth_element(&durations[0], &durations[loops / 2], &durations[loops]);
3518 // Use median*8 as cut-off point.
3519 // Typical durations should be around a microsecond, the cut-off should then
3520 // be around 10 microseconds, well below the expected minimum inter-sample
3521 // interval (observed as a few milliseconds), so overall this should keep
3522 // cpu/interval spikes
3523 return durations[loops / 2] * 8;
3524 }();
3525
3526 // Record CPU measurements between two timestamps.
3527 RunningTimes runningTimes;
3528 TimeStamp before = TimeStamp::NowUnfuzzed();
3529 aGetCPURunningTimesFunction(runningTimes);
3530 TimeStamp after = TimeStamp::NowUnfuzzed();
3531 // In most cases, the above should be quick enough. But if not, repeat:
3532 while (MOZ_UNLIKELY(after - before > scMaxRunningTimesReadDuration)) {
3533 AUTO_PROFILER_STATS(GetRunningTimes_REDO);
3534 before = after;
3535 aGetCPURunningTimesFunction(runningTimes);
3536 after = TimeStamp::NowUnfuzzed();
3537 }
3538 // Finally, record the closest timestamp just after the final measurement was
3539 // done. This must stay *after* the CPU measurements.
3540 runningTimes.SetPostMeasurementTimeStamp(after);
3541
3542 return runningTimes;
3543 }
3544
3545 ////////////////////////////////////////////////////////////////////////
3546 // BEGIN SamplerThread
3547
3548 // The sampler thread controls sampling and runs whenever the profiler is
3549 // active. It periodically runs through all registered threads, finds those
3550 // that should be sampled, then pauses and samples them.
3551
3552 class SamplerThread {
3553 public:
3554 // Creates a sampler thread, but doesn't start it.
3555 SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
3556 double aIntervalMilliseconds, bool aStackWalkEnabled,
3557 bool aNoTimerResolutionChange);
3558 ~SamplerThread();
3559
3560 // This runs on (is!) the sampler thread.
3561 void Run();
3562
3563 // This runs on the main thread.
3564 void Stop(PSLockRef aLock);
3565
AppendPostSamplingCallback(PSLockRef,PostSamplingCallback && aCallback)3566 void AppendPostSamplingCallback(PSLockRef, PostSamplingCallback&& aCallback) {
3567 // We are under lock, so it's safe to just modify the list pointer.
3568 // Also this means the sampler has not started its run yet, so any callback
3569 // added now will be invoked at the end of the next loop; this guarantees
3570 // that the callback will be invoked after at least one full sampling loop.
3571 mPostSamplingCallbackList = MakeUnique<PostSamplingCallbackListItem>(
3572 std::move(mPostSamplingCallbackList), std::move(aCallback));
3573 }
3574
3575 private:
3576 // Item containing a post-sampling callback, and a tail-list of more items.
3577 // Using a linked list means no need to move items when adding more, and
3578 // "stealing" the whole list is one pointer move.
3579 struct PostSamplingCallbackListItem {
3580 UniquePtr<PostSamplingCallbackListItem> mPrev;
3581 PostSamplingCallback mCallback;
3582
PostSamplingCallbackListItemSamplerThread::PostSamplingCallbackListItem3583 PostSamplingCallbackListItem(UniquePtr<PostSamplingCallbackListItem> aPrev,
3584 PostSamplingCallback&& aCallback)
3585 : mPrev(std::move(aPrev)), mCallback(std::move(aCallback)) {}
3586 };
3587
3588 [[nodiscard]] UniquePtr<PostSamplingCallbackListItem>
TakePostSamplingCallbacks(PSLockRef)3589 TakePostSamplingCallbacks(PSLockRef) {
3590 return std::move(mPostSamplingCallbackList);
3591 }
3592
InvokePostSamplingCallbacks(UniquePtr<PostSamplingCallbackListItem> aCallbacks,SamplingState aSamplingState)3593 static void InvokePostSamplingCallbacks(
3594 UniquePtr<PostSamplingCallbackListItem> aCallbacks,
3595 SamplingState aSamplingState) {
3596 if (!aCallbacks) {
3597 return;
3598 }
3599 // We want to drill down to the last element in this list, which is the
3600 // oldest one, so that we invoke them in FIFO order.
3601 // We don't expect many callbacks, so it's safe to recurse. Note that we're
3602 // moving-from the UniquePtr, so the tail will implicitly get destroyed.
3603 InvokePostSamplingCallbacks(std::move(aCallbacks->mPrev), aSamplingState);
3604 // We are going to destroy this item, so we can safely move-from the
3605 // callback before calling it (in case it has an rvalue-ref-qualified call
3606 // operator).
3607 std::move(aCallbacks->mCallback)(aSamplingState);
3608 // It may be tempting for a future maintainer to change aCallbacks into an
3609 // rvalue reference; this will remind them not to do that!
3610 static_assert(
3611 std::is_same_v<decltype(aCallbacks),
3612 UniquePtr<PostSamplingCallbackListItem>>,
3613 "We need to capture the list by-value, to implicitly destroy it");
3614 }
3615
3616 // This suspends the calling thread for the given number of microseconds.
3617 // Best effort timing.
3618 void SleepMicro(uint32_t aMicroseconds);
3619
3620 // The sampler used to suspend and sample threads.
3621 Sampler mSampler;
3622
3623 // The activity generation, for detecting when the sampler thread must stop.
3624 const uint32_t mActivityGeneration;
3625
3626 // The interval between samples, measured in microseconds.
3627 const int mIntervalMicroseconds;
3628
3629 // The OS-specific handle for the sampler thread.
3630 #if defined(GP_OS_windows)
3631 HANDLE mThread;
3632 #elif defined(GP_OS_darwin) || defined(GP_OS_linux) || \
3633 defined(GP_OS_android) || defined(GP_OS_freebsd)
3634 pthread_t mThread;
3635 #endif
3636
3637 // Post-sampling callbacks are kept in a simple linked list, which will be
3638 // stolen by the sampler thread at the end of its next run.
3639 UniquePtr<PostSamplingCallbackListItem> mPostSamplingCallbackList;
3640
3641 #if defined(GP_OS_windows)
3642 bool mNoTimerResolutionChange = true;
3643 #endif
3644
3645 SamplerThread(const SamplerThread&) = delete;
3646 void operator=(const SamplerThread&) = delete;
3647 };
3648
3649 // [[nodiscard]] static
AppendPostSamplingCallback(PSLockRef aLock,PostSamplingCallback && aCallback)3650 bool ActivePS::AppendPostSamplingCallback(PSLockRef aLock,
3651 PostSamplingCallback&& aCallback) {
3652 if (!sInstance || !sInstance->mSamplerThread) {
3653 return false;
3654 }
3655 sInstance->mSamplerThread->AppendPostSamplingCallback(aLock,
3656 std::move(aCallback));
3657 return true;
3658 }
3659
3660 // This function is required because we need to create a SamplerThread within
3661 // ActivePS's constructor, but SamplerThread is defined after ActivePS. It
3662 // could probably be removed by moving some code around.
NewSamplerThread(PSLockRef aLock,uint32_t aGeneration,double aInterval,bool aStackWalkEnabled,bool aNoTimerResolutionChange)3663 static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
3664 double aInterval, bool aStackWalkEnabled,
3665 bool aNoTimerResolutionChange) {
3666 return new SamplerThread(aLock, aGeneration, aInterval, aStackWalkEnabled,
3667 aNoTimerResolutionChange);
3668 }
3669
3670 // This function is the sampler thread. This implementation is used for all
3671 // targets.
Run()3672 void SamplerThread::Run() {
3673 PR_SetCurrentThreadName("SamplerThread");
3674
3675 // Features won't change during this SamplerThread's lifetime, so we can read
3676 // them once and store them locally.
3677 const uint32_t features = []() -> uint32_t {
3678 PSAutoLock lock(gPSMutex);
3679 if (!ActivePS::Exists(lock)) {
3680 // If there is no active profiler, it doesn't matter what we return,
3681 // because this thread will exit before any feature is used.
3682 return 0;
3683 }
3684 return ActivePS::Features(lock);
3685 }();
3686
3687 // Not *no*-stack-sampling means we do want stack sampling.
3688 const bool stackSampling = !ProfilerFeature::HasNoStackSampling(features);
3689
3690 const bool cpuUtilization = ProfilerFeature::HasCPUUtilization(features);
3691
3692 // Use local ProfileBuffer and underlying buffer to capture the stack.
3693 // (This is to avoid touching the CorePS::CoreBuffer lock while a thread is
3694 // suspended, because that thread could be working with the CorePS::CoreBuffer
3695 // as well.)
3696 mozilla::ProfileBufferChunkManagerSingle localChunkManager(
3697 ProfileBufferChunkManager::scExpectedMaximumStackSize);
3698 ProfileChunkedBuffer localBuffer(
3699 ProfileChunkedBuffer::ThreadSafety::WithoutMutex, localChunkManager);
3700 ProfileBuffer localProfileBuffer(localBuffer);
3701
3702 // Will be kept between collections, to know what each collection does.
3703 auto previousState = localBuffer.GetState();
3704
3705 // This will be set inside the loop, from inside the lock scope, to capture
3706 // all callbacks added before that, but none after the lock is released.
3707 UniquePtr<PostSamplingCallbackListItem> postSamplingCallbacks;
3708 // This will be set inside the loop, before invoking callbacks outside.
3709 SamplingState samplingState{};
3710
3711 const TimeDuration sampleInterval =
3712 TimeDuration::FromMicroseconds(mIntervalMicroseconds);
3713 const uint32_t minimumIntervalSleepUs =
3714 static_cast<uint32_t>(mIntervalMicroseconds / 4);
3715
3716 // This is the scheduled time at which each sampling loop should start.
3717 // It will determine the ideal next sampling start by adding the expected
3718 // interval, unless when sampling runs late -- See end of while() loop.
3719 TimeStamp scheduledSampleStart = TimeStamp::NowUnfuzzed();
3720
3721 while (true) {
3722 const TimeStamp sampleStart = TimeStamp::NowUnfuzzed();
3723
3724 // This scope is for |lock|. It ends before we sleep below.
3725 {
3726 // There should be no local callbacks left from a previous loop.
3727 MOZ_ASSERT(!postSamplingCallbacks);
3728
3729 PSAutoLock lock(gPSMutex);
3730 TimeStamp lockAcquired = TimeStamp::NowUnfuzzed();
3731
3732 // Move all the post-sampling callbacks locally, so that new ones cannot
3733 // sneak in between the end of the lock scope and the invocation after it.
3734 postSamplingCallbacks = TakePostSamplingCallbacks(lock);
3735
3736 if (!ActivePS::Exists(lock)) {
3737 // Exit the `while` loop, including the lock scope, before invoking
3738 // callbacks and returning.
3739 samplingState = SamplingState::JustStopped;
3740 break;
3741 }
3742
3743 // At this point profiler_stop() might have been called, and
3744 // profiler_start() might have been called on another thread. If this
3745 // happens the generation won't match.
3746 if (ActivePS::Generation(lock) != mActivityGeneration) {
3747 samplingState = SamplingState::JustStopped;
3748 // Exit the `while` loop, including the lock scope, before invoking
3749 // callbacks and returning.
3750 break;
3751 }
3752
3753 ActivePS::ClearExpiredExitProfiles(lock);
3754
3755 TimeStamp expiredMarkersCleaned = TimeStamp::NowUnfuzzed();
3756
3757 if (!ActivePS::IsSamplingPaused(lock)) {
3758 double sampleStartDeltaMs =
3759 (sampleStart - CorePS::ProcessStartTime()).ToMilliseconds();
3760 ProfileBuffer& buffer = ActivePS::Buffer(lock);
3761
3762 // handle per-process generic counters
3763 const Vector<BaseProfilerCount*>& counters = CorePS::Counters(lock);
3764 for (auto& counter : counters) {
3765 // create Buffer entries for each counter
3766 buffer.AddEntry(ProfileBufferEntry::CounterId(counter));
3767 buffer.AddEntry(ProfileBufferEntry::Time(sampleStartDeltaMs));
3768 // XXX support keyed maps of counts
3769 // In the future, we'll support keyed counters - for example, counters
3770 // with a key which is a thread ID. For "simple" counters we'll just
3771 // use a key of 0.
3772 int64_t count;
3773 uint64_t number;
3774 counter->Sample(count, number);
3775 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
3776 if (ActivePS::IsMemoryCounter(counter)) {
3777 // For the memory counter, substract the size of our buffer to avoid
3778 // giving the misleading impression that the memory use keeps on
3779 // growing when it's just the profiler session that's using a larger
3780 // buffer as it gets longer.
3781 count -= static_cast<int64_t>(
3782 ActivePS::ControlledChunkManager(lock).TotalSize());
3783 }
3784 #endif
3785 buffer.AddEntry(ProfileBufferEntry::CounterKey(0));
3786 buffer.AddEntry(ProfileBufferEntry::Count(count));
3787 if (number) {
3788 buffer.AddEntry(ProfileBufferEntry::Number(number));
3789 }
3790 }
3791 TimeStamp countersSampled = TimeStamp::NowUnfuzzed();
3792
3793 if (stackSampling || cpuUtilization) {
3794 samplingState = SamplingState::SamplingCompleted;
3795
3796 const Vector<LiveProfiledThreadData>& liveThreads =
3797 ActivePS::LiveProfiledThreads(lock);
3798
3799 for (auto& thread : liveThreads) {
3800 RegisteredThread* registeredThread = thread.mRegisteredThread;
3801 ProfiledThreadData* profiledThreadData =
3802 thread.mProfiledThreadData.get();
3803 RefPtr<ThreadInfo> info = registeredThread->Info();
3804
3805 const RunningTimes runningTimesDiff = [&]() {
3806 if (!cpuUtilization) {
3807 // If we don't need CPU measurements, we only need a timestamp.
3808 return RunningTimes(TimeStamp::NowUnfuzzed());
3809 }
3810 return GetThreadRunningTimesDiff(lock, *registeredThread);
3811 }();
3812
3813 const TimeStamp& now = runningTimesDiff.PostMeasurementTimeStamp();
3814 double threadSampleDeltaMs =
3815 (now - CorePS::ProcessStartTime()).ToMilliseconds();
3816
3817 // If the thread is asleep and has been sampled before in the same
3818 // sleep episode, or otherwise(*) if there was zero CPU activity
3819 // since the previous sampling, find and copy the previous sample,
3820 // as that's cheaper than taking a new sample.
3821 // (*) Tech note: The asleep check is done first and always, because
3822 // it is more reliable, and knows if it's the first asleep
3823 // sample, which cannot be duplicated; if the test was the other
3824 // way around, it could find zero CPU and then short-circuit
3825 // that state-changing second-asleep-check operation, which
3826 // could result in an unneeded sample.
3827 // However we're using current running times (instead of copying the
3828 // old ones) because some work could have happened.
3829 if (registeredThread->RacyRegisteredThread()
3830 .CanDuplicateLastSampleDueToSleep() ||
3831 runningTimesDiff.GetThreadCPUDelta() == Some(uint64_t(0))) {
3832 const bool dup_ok = ActivePS::Buffer(lock).DuplicateLastSample(
3833 info->ThreadId(), threadSampleDeltaMs,
3834 profiledThreadData->LastSample(), runningTimesDiff);
3835 if (dup_ok) {
3836 continue;
3837 }
3838 }
3839
3840 AUTO_PROFILER_STATS(gecko_SamplerThread_Run_DoPeriodicSample);
3841
3842 // Record the global profiler buffer's range start now, before
3843 // adding the first entry for this thread's sample.
3844 const uint64_t bufferRangeStart = buffer.BufferRangeStart();
3845
3846 // Add the thread ID now, so we know its position in the main
3847 // buffer, which is used by some JS data.
3848 // (DoPeriodicSample only knows about the temporary local buffer.)
3849 const uint64_t samplePos =
3850 buffer.AddThreadIdEntry(registeredThread->Info()->ThreadId());
3851 profiledThreadData->LastSample() = Some(samplePos);
3852
3853 // Also add the time, so it's always there after the thread ID, as
3854 // expected by the parser. (Other stack data is optional.)
3855 buffer.AddEntry(ProfileBufferEntry::TimeBeforeCompactStack(
3856 threadSampleDeltaMs));
3857
3858 Maybe<double> unresponsiveDuration_ms;
3859
3860 // If we have RunningTimes data, store it before the CompactStack.
3861 // Note: It is not stored inside the CompactStack so that it doesn't
3862 // get incorrectly duplicated when the thread is sleeping.
3863 if (!runningTimesDiff.IsEmpty()) {
3864 CorePS::CoreBuffer().PutObjects(
3865 ProfileBufferEntry::Kind::RunningTimes, runningTimesDiff);
3866 }
3867
3868 if (stackSampling) {
3869 // Suspend the thread and collect its stack data in the local
3870 // buffer.
3871 mSampler.SuspendAndSampleAndResumeThread(
3872 lock, *registeredThread, now,
3873 [&](const Registers& aRegs, const TimeStamp& aNow) {
3874 DoPeriodicSample(lock, *registeredThread, aRegs, samplePos,
3875 bufferRangeStart, localProfileBuffer);
3876
3877 // For "eventDelay", we want the input delay - but if
3878 // there are no events in the input queue (or even if there
3879 // are), we're interested in how long the delay *would* be
3880 // for an input event now, which would be the time to finish
3881 // the current event + the delay caused by any events
3882 // already in the input queue (plus any High priority
3883 // events). Events at lower priorities (in a
3884 // PrioritizedEventQueue) than Input count for input delay
3885 // only for the duration that they're running, since when
3886 // they finish, any queued input event would run.
3887 //
3888 // Unless we record the time state of all events and queue
3889 // states at all times, this is hard to precisely calculate,
3890 // but we can approximate it well in post-processing with
3891 // RunningEventDelay and RunningEventStart.
3892 //
3893 // RunningEventDelay is the time duration the event was
3894 // queued before starting execution. RunningEventStart is
3895 // the time the event started. (Note: since we care about
3896 // Input event delays on MainThread, for
3897 // PrioritizedEventQueues we return 0 for RunningEventDelay
3898 // if the currently running event has a lower priority than
3899 // Input (since Input events won't queue behind them).
3900 //
3901 // To directly measure this we would need to record the time
3902 // at which the newest event currently in each queue at time
3903 // X (the sample time) finishes running. This of course
3904 // would require looking into the future, or recording all
3905 // this state and then post-processing it later. If we were
3906 // to trace every event start and end we could do this, but
3907 // it would have significant overhead to do so (and buffer
3908 // usage). From a recording of RunningEventDelays and
3909 // RunningEventStarts we can infer the actual delay:
3910 //
3911 // clang-format off
3912 // Event queue: <tail> D : C : B : A <head>
3913 // Time inserted (ms): 40 : 20 : 10 : 0
3914 // Run Time (ms): 30 : 100 : 40 : 30
3915 //
3916 // 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170
3917 // [A||||||||||||]
3918 // ----------[B|||||||||||||||||]
3919 // -------------------------[C|||||||||||||||||||||||||||||||||||||||||||||||]
3920 // -----------------------------------------------------------------[D|||||||||...]
3921 //
3922 // Calculate the delay of a new event added at time t: (run every sample)
3923 // TimeSinceRunningEventBlockedInputEvents = RunningEventDelay + (now - RunningEventStart);
3924 // effective_submission = now - TimeSinceRunningEventBlockedInputEvents;
3925 // delta = (now - last_sample_time);
3926 // last_sample_time = now;
3927 // for (t=effective_submission to now) {
3928 // delay[t] += delta;
3929 // }
3930 //
3931 // Can be reduced in overhead by:
3932 // TimeSinceRunningEventBlockedInputEvents = RunningEventDelay + (now - RunningEventStart);
3933 // effective_submission = now - TimeSinceRunningEventBlockedInputEvents;
3934 // if (effective_submission != last_submission) {
3935 // delta = (now - last_submision);
3936 // // this loop should be made to match each sample point in the range
3937 // // intead of assuming 1ms sampling as this pseudocode does
3938 // for (t=last_submission to effective_submission-1) {
3939 // delay[t] += delta;
3940 // delta -= 1; // assumes 1ms; adjust as needed to match for()
3941 // }
3942 // last_submission = effective_submission;
3943 // }
3944 //
3945 // Time Head of queue Running Event RunningEventDelay Delay of Effective Started Calc (submission->now add 10ms) Final
3946 // hypothetical Submission Running @ result
3947 // event E
3948 // 0 Empty A 0 30 0 0 @0=10 30
3949 // 10 B A 0 60 0 0 @0=20, @10=10 60
3950 // 20 B A 0 150 0 0 @0=30, @10=20, @20=10 150
3951 // 30 C B 20 140 10 30 @10=20, @20=10, @30=0 140
3952 // 40 C B 20 160 @10=30, @20=20... 160
3953 // 50 C B 20 150 150
3954 // 60 C B 20 140 @10=50, @20=40... 140
3955 // 70 D C 50 130 20 70 @20=50, @30=40... 130
3956 // ...
3957 // 160 D C 50 40 @20=140, @30=130... 40
3958 // 170 <empty> D 140 30 40 @40=140, @50=130... (rounding) 30
3959 // 180 <empty> D 140 20 40 @40=150 20
3960 // 190 <empty> D 140 10 40 @40=160 10
3961 // 200 <empty> <empty> 0 0 NA 0
3962 //
3963 // Function Delay(t) = the time between t and the time at which a hypothetical
3964 // event e would start executing, if e was enqueued at time t.
3965 //
3966 // Delay(-1) = 0 // Before A was enqueued. No wait time, can start running
3967 // // instantly.
3968 // Delay(0) = 30 // The hypothetical event e got enqueued just after A got
3969 // // enqueued. It can start running at 30, when A is done.
3970 // Delay(5) = 25
3971 // Delay(10) = 60 // Can start running at 70, after both A and B are done.
3972 // Delay(19) = 51
3973 // Delay(20) = 150 // Can start running at 170, after A, B & C.
3974 // Delay(25) = 145
3975 // Delay(30) = 170 // Can start running at 200, after A, B, C & D.
3976 // Delay(120) = 80
3977 // Delay(200) = 0 // (assuming nothing was enqueued after D)
3978 //
3979 // For every event that gets enqueued, the Delay time will go up by the
3980 // event's running time at the time at which the event is enqueued.
3981 // The Delay function will be a sawtooth of the following shape:
3982 //
3983 // |\ |...
3984 // | \ |
3985 // |\ | \ |
3986 // | \ | \ |
3987 // |\ | \ | \ |
3988 // |\ | \| \| \ |
3989 // | \| \ |
3990 // _| \____|
3991 //
3992 //
3993 // A more complex example with a PrioritizedEventQueue:
3994 //
3995 // Event queue: <tail> D : C : B : A <head>
3996 // Time inserted (ms): 40 : 20 : 10 : 0
3997 // Run Time (ms): 30 : 100 : 40 : 30
3998 // Priority: Input: Norm: Norm: Norm
3999 //
4000 // 0 10 20 30 40 50 60 70 80 90 100 110 120 130 140 150 160 170
4001 // [A||||||||||||]
4002 // ----------[B|||||||||||||||||]
4003 // ----------------------------------------[C|||||||||||||||||||||||||||||||||||||||||||||||]
4004 // ---------------[D||||||||||||]
4005 //
4006 //
4007 // Time Head of queue Running Event RunningEventDelay Delay of Effective Started Calc (submission->now add 10ms) Final
4008 // hypothetical Submission Running @ result
4009 // event
4010 // 0 Empty A 0 30 0 0 @0=10 30
4011 // 10 B A 0 20 0 0 @0=20, @10=10 20
4012 // 20 B A 0 10 0 0 @0=30, @10=20, @20=10 10
4013 // 30 C B 0 40 30 30 @30=10 40
4014 // 40 C B 0 60 30 @40=10, @30=20 60
4015 // 50 C B 0 50 30 @50=10, @40=20, @30=30 50
4016 // 60 C B 0 40 30 @60=10, @50=20, @40=30, @30=40 40
4017 // 70 C D 30 30 40 70 @60=20, @50=30, @40=40 30
4018 // 80 C D 30 20 40 70 ...@50=40, @40=50 20
4019 // 90 C D 30 10 40 70 ...@60=40, @50=50, @40=60 10
4020 // 100 <empty> C 0 100 100 100 @100=10 100
4021 // 110 <empty> C 0 90 100 100 @110=10, @100=20 90
4022
4023 //
4024 // For PrioritizedEventQueue, the definition of the Delay(t) function is adjusted: the hypothetical event e has Input priority.
4025 // Delay(-1) = 0 // Before A was enqueued. No wait time, can start running
4026 // // instantly.
4027 // Delay(0) = 30 // The hypothetical input event e got enqueued just after A got
4028 // // enqueued. It can start running at 30, when A is done.
4029 // Delay(5) = 25
4030 // Delay(10) = 20
4031 // Delay(25) = 5 // B has been queued, but e does not need to wait for B because e has Input priority and B does not.
4032 // // So e can start running at 30, when A is done.
4033 // Delay(30) = 40 // Can start running at 70, after B is done.
4034 // Delay(40) = 60 // Can start at 100, after B and D are done (D is Input Priority)
4035 // Delay(80) = 20
4036 // Delay(100) = 100 // Wait for C to finish
4037
4038 // clang-format on
4039 //
4040 // Alternatively we could insert (recycled instead of
4041 // allocated/freed) input events at every sample period
4042 // (1ms...), and use them to back-calculate the delay. This
4043 // might also be somewhat expensive, and would require
4044 // guessing at the maximum delay, which would likely be in
4045 // the seconds, and so you'd need 1000's of pre-allocated
4046 // events per queue per thread - so there would be a memory
4047 // impact as well.
4048
4049 TimeDuration currentEventDelay;
4050 TimeDuration currentEventRunning;
4051 registeredThread->GetRunningEventDelay(
4052 aNow, currentEventDelay, currentEventRunning);
4053
4054 // Note: eventDelay is a different definition of
4055 // responsiveness than the 16ms event injection.
4056
4057 // Don't suppress 0's for now; that can be a future
4058 // optimization. We probably want one zero to be stored
4059 // before we start suppressing, which would be more
4060 // complex.
4061 unresponsiveDuration_ms =
4062 Some(currentEventDelay.ToMilliseconds() +
4063 currentEventRunning.ToMilliseconds());
4064 });
4065
4066 // If we got eventDelay data, store it before the CompactStack.
4067 // Note: It is not stored inside the CompactStack so that it
4068 // doesn't get incorrectly duplicated when the thread is sleeping.
4069 if (unresponsiveDuration_ms.isSome()) {
4070 CorePS::CoreBuffer().PutObjects(
4071 ProfileBufferEntry::Kind::UnresponsiveDurationMs,
4072 *unresponsiveDuration_ms);
4073 }
4074 }
4075
4076 // There *must* be a CompactStack after a TimeBeforeCompactStack;
4077 // but note that other entries may have been concurrently inserted
4078 // between the TimeBeforeCompactStack above and now. If the captured
4079 // sample from `DoPeriodicSample` is complete, copy it into the
4080 // global buffer, otherwise add an empty one to satisfy the parser
4081 // that expects one.
4082 auto state = localBuffer.GetState();
4083 if (NS_WARN_IF(state.mFailedPutBytes !=
4084 previousState.mFailedPutBytes)) {
4085 LOG("Stack sample too big for local storage, failed to store %u "
4086 "bytes",
4087 unsigned(state.mFailedPutBytes -
4088 previousState.mFailedPutBytes));
4089 // There *must* be a CompactStack after a TimeBeforeCompactStack,
4090 // even an empty one.
4091 CorePS::CoreBuffer().PutObjects(
4092 ProfileBufferEntry::Kind::CompactStack,
4093 UniquePtr<ProfileChunkedBuffer>(nullptr));
4094 } else if (state.mRangeEnd - previousState.mRangeEnd >=
4095 *CorePS::CoreBuffer().BufferLength()) {
4096 LOG("Stack sample too big for profiler storage, needed %u bytes",
4097 unsigned(state.mRangeEnd - previousState.mRangeEnd));
4098 // There *must* be a CompactStack after a TimeBeforeCompactStack,
4099 // even an empty one.
4100 CorePS::CoreBuffer().PutObjects(
4101 ProfileBufferEntry::Kind::CompactStack,
4102 UniquePtr<ProfileChunkedBuffer>(nullptr));
4103 } else {
4104 CorePS::CoreBuffer().PutObjects(
4105 ProfileBufferEntry::Kind::CompactStack, localBuffer);
4106 }
4107
4108 // Clean up for the next run.
4109 localBuffer.Clear();
4110 previousState = localBuffer.GetState();
4111 }
4112 } else {
4113 samplingState = SamplingState::NoStackSamplingCompleted;
4114 }
4115
4116 #if defined(USE_LUL_STACKWALK)
4117 // The LUL unwind object accumulates frame statistics. Periodically we
4118 // should poke it to give it a chance to print those statistics. This
4119 // involves doing I/O (fprintf, __android_log_print, etc.) and so
4120 // can't safely be done from the critical section inside
4121 // SuspendAndSampleAndResumeThread, which is why it is done here.
4122 lul::LUL* lul = CorePS::Lul(lock);
4123 if (lul) {
4124 lul->MaybeShowStats();
4125 }
4126 #endif
4127 TimeStamp threadsSampled = TimeStamp::NowUnfuzzed();
4128
4129 {
4130 AUTO_PROFILER_STATS(Sampler_FulfillChunkRequests);
4131 ActivePS::FulfillChunkRequests(lock);
4132 }
4133
4134 buffer.CollectOverheadStats(sampleStartDeltaMs,
4135 lockAcquired - sampleStart,
4136 expiredMarkersCleaned - lockAcquired,
4137 countersSampled - expiredMarkersCleaned,
4138 threadsSampled - countersSampled);
4139 } else {
4140 samplingState = SamplingState::SamplingPaused;
4141 }
4142 }
4143 // gPSMutex is not held after this point.
4144
4145 // Invoke end-of-sampling callbacks outside of the locked scope.
4146 InvokePostSamplingCallbacks(std::move(postSamplingCallbacks),
4147 samplingState);
4148
4149 ProfilerChild::ProcessPendingUpdate();
4150
4151 // We expect the next sampling loop to start `sampleInterval` after this
4152 // loop here was scheduled to start.
4153 scheduledSampleStart += sampleInterval;
4154
4155 // Try to sleep until we reach that next scheduled time.
4156 const TimeStamp beforeSleep = TimeStamp::NowUnfuzzed();
4157 if (scheduledSampleStart >= beforeSleep) {
4158 // There is still time before the next scheduled sample time.
4159 const uint32_t sleepTimeUs = static_cast<uint32_t>(
4160 (scheduledSampleStart - beforeSleep).ToMicroseconds());
4161 if (sleepTimeUs >= minimumIntervalSleepUs) {
4162 SleepMicro(sleepTimeUs);
4163 } else {
4164 // If we're too close to that time, sleep the minimum amount of time.
4165 // Note that the next scheduled start is not shifted, so at the end of
4166 // the next loop, sleep may again be adjusted to get closer to schedule.
4167 SleepMicro(minimumIntervalSleepUs);
4168 }
4169 } else {
4170 // This sampling loop ended after the next sampling should have started!
4171 // There is little point to try and keep up to schedule now, it would
4172 // require more work, while it's likely we're late because the system is
4173 // already busy. Try and restart a normal schedule from now.
4174 scheduledSampleStart = beforeSleep + sampleInterval;
4175 SleepMicro(static_cast<uint32_t>(sampleInterval.ToMicroseconds()));
4176 }
4177 }
4178
4179 // End of `while` loop. We can only be here from a `break` inside the loop.
4180 InvokePostSamplingCallbacks(std::move(postSamplingCallbacks), samplingState);
4181 }
4182
4183 // We #include these files directly because it means those files can use
4184 // declarations from this file trivially. These provide target-specific
4185 // implementations of all SamplerThread methods except Run().
4186 #if defined(GP_OS_windows)
4187 # include "platform-win32.cpp"
4188 #elif defined(GP_OS_darwin)
4189 # include "platform-macos.cpp"
4190 #elif defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
4191 # include "platform-linux-android.cpp"
4192 #else
4193 # error "bad platform"
4194 #endif
4195
AllocPlatformData(int aThreadId)4196 UniquePlatformData AllocPlatformData(int aThreadId) {
4197 return UniquePlatformData(new PlatformData(aThreadId));
4198 }
4199
operator ()(PlatformData * aData)4200 void PlatformDataDestructor::operator()(PlatformData* aData) { delete aData; }
4201
4202 // END SamplerThread
4203 ////////////////////////////////////////////////////////////////////////
4204
4205 ////////////////////////////////////////////////////////////////////////
4206 // BEGIN externally visible functions
4207
MOZ_DEFINE_MALLOC_SIZE_OF(GeckoProfilerMallocSizeOf)4208 MOZ_DEFINE_MALLOC_SIZE_OF(GeckoProfilerMallocSizeOf)
4209
4210 NS_IMETHODIMP
4211 GeckoProfilerReporter::CollectReports(nsIHandleReportCallback* aHandleReport,
4212 nsISupports* aData, bool aAnonymize) {
4213 MOZ_RELEASE_ASSERT(NS_IsMainThread());
4214
4215 size_t profSize = 0;
4216 size_t lulSize = 0;
4217
4218 {
4219 PSAutoLock lock(gPSMutex);
4220
4221 if (CorePS::Exists()) {
4222 CorePS::AddSizeOf(lock, GeckoProfilerMallocSizeOf, profSize, lulSize);
4223 }
4224
4225 if (ActivePS::Exists(lock)) {
4226 profSize += ActivePS::SizeOf(lock, GeckoProfilerMallocSizeOf);
4227 }
4228 }
4229
4230 MOZ_COLLECT_REPORT(
4231 "explicit/profiler/profiler-state", KIND_HEAP, UNITS_BYTES, profSize,
4232 "Memory used by the Gecko Profiler's global state (excluding memory used "
4233 "by LUL).");
4234
4235 #if defined(USE_LUL_STACKWALK)
4236 MOZ_COLLECT_REPORT(
4237 "explicit/profiler/lul", KIND_HEAP, UNITS_BYTES, lulSize,
4238 "Memory used by LUL, a stack unwinder used by the Gecko Profiler.");
4239 #endif
4240
4241 return NS_OK;
4242 }
4243
NS_IMPL_ISUPPORTS(GeckoProfilerReporter,nsIMemoryReporter)4244 NS_IMPL_ISUPPORTS(GeckoProfilerReporter, nsIMemoryReporter)
4245
4246 static uint32_t ParseFeature(const char* aFeature, bool aIsStartup) {
4247 if (strcmp(aFeature, "default") == 0) {
4248 return (aIsStartup ? (DefaultFeatures() | StartupExtraDefaultFeatures())
4249 : DefaultFeatures()) &
4250 AvailableFeatures();
4251 }
4252
4253 #define PARSE_FEATURE_BIT(n_, str_, Name_, desc_) \
4254 if (strcmp(aFeature, str_) == 0) { \
4255 return ProfilerFeature::Name_; \
4256 }
4257
4258 PROFILER_FOR_EACH_FEATURE(PARSE_FEATURE_BIT)
4259
4260 #undef PARSE_FEATURE_BIT
4261
4262 printf("\nUnrecognized feature \"%s\".\n\n", aFeature);
4263 // Since we may have an old feature we don't implement anymore, don't exit
4264 PrintUsageThenExit(0);
4265 return 0;
4266 }
4267
ParseFeaturesFromStringArray(const char ** aFeatures,uint32_t aFeatureCount,bool aIsStartup)4268 uint32_t ParseFeaturesFromStringArray(const char** aFeatures,
4269 uint32_t aFeatureCount,
4270 bool aIsStartup /* = false */) {
4271 uint32_t features = 0;
4272 for (size_t i = 0; i < aFeatureCount; i++) {
4273 features |= ParseFeature(aFeatures[i], aIsStartup);
4274 }
4275 return features;
4276 }
4277
IsRegisteredThreadInRegisteredThreadsList(PSLockRef aLock,RegisteredThread * aThread)4278 static bool IsRegisteredThreadInRegisteredThreadsList(
4279 PSLockRef aLock, RegisteredThread* aThread) {
4280 const auto& registeredThreads = CorePS::RegisteredThreads(aLock);
4281 for (const auto& registeredThread : registeredThreads) {
4282 if (registeredThread.get() == aThread) {
4283 return true;
4284 }
4285 }
4286
4287 return false;
4288 }
4289
locked_register_thread(PSLockRef aLock,const char * aName,void * aStackTop)4290 static ProfilingStack* locked_register_thread(PSLockRef aLock,
4291 const char* aName,
4292 void* aStackTop) {
4293 MOZ_RELEASE_ASSERT(CorePS::Exists());
4294
4295 VTUNE_REGISTER_THREAD(aName);
4296
4297 if (!TLSRegisteredThread::IsTLSInited()) {
4298 return nullptr;
4299 }
4300
4301 RefPtr<ThreadInfo> info =
4302 new ThreadInfo(aName, profiler_current_thread_id(), NS_IsMainThread());
4303 UniquePtr<RegisteredThread> registeredThread = MakeUnique<RegisteredThread>(
4304 info, NS_GetCurrentThreadNoCreate(), aStackTop);
4305
4306 TLSRegisteredThread::SetRegisteredThreadAndAutoProfilerLabelProfilingStack(
4307 aLock, registeredThread.get());
4308
4309 if (ActivePS::Exists(aLock) && ActivePS::ShouldProfileThread(aLock, info)) {
4310 registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
4311 nsCOMPtr<nsIEventTarget> eventTarget = registeredThread->GetEventTarget();
4312 ProfiledThreadData* profiledThreadData = ActivePS::AddLiveProfiledThread(
4313 aLock, registeredThread.get(),
4314 MakeUnique<ProfiledThreadData>(info, eventTarget));
4315
4316 if (ActivePS::FeatureJS(aLock)) {
4317 // This StartJSSampling() call is on-thread, so we can poll manually to
4318 // start JS sampling immediately.
4319 registeredThread->StartJSSampling(ActivePS::JSFlags(aLock));
4320 registeredThread->PollJSSampling();
4321 if (registeredThread->GetJSContext()) {
4322 profiledThreadData->NotifyReceivedJSContext(
4323 ActivePS::Buffer(aLock).BufferRangeEnd());
4324 }
4325 }
4326 }
4327
4328 MOZ_RELEASE_ASSERT(TLSRegisteredThread::RegisteredThread(aLock),
4329 "TLS should be set when registering thread");
4330 MOZ_RELEASE_ASSERT(
4331 registeredThread == TLSRegisteredThread::RegisteredThread(aLock),
4332 "TLS should be set as expected when registering thread");
4333
4334 ProfilingStack* profilingStack =
4335 ®isteredThread->RacyRegisteredThread().ProfilingStack();
4336
4337 CorePS::AppendRegisteredThread(aLock, std::move(registeredThread));
4338
4339 return profilingStack;
4340 }
4341
NotifyObservers(const char * aTopic,nsISupports * aSubject=nullptr)4342 static void NotifyObservers(const char* aTopic,
4343 nsISupports* aSubject = nullptr) {
4344 if (!NS_IsMainThread()) {
4345 // Dispatch a task to the main thread that notifies observers.
4346 // If NotifyObservers is called both on and off the main thread within a
4347 // short time, the order of the notifications can be different from the
4348 // order of the calls to NotifyObservers.
4349 // Getting the order 100% right isn't that important at the moment, because
4350 // these notifications are only observed in the parent process, where the
4351 // profiler_* functions are currently only called on the main thread.
4352 nsCOMPtr<nsISupports> subject = aSubject;
4353 NS_DispatchToMainThread(NS_NewRunnableFunction(
4354 "NotifyObservers", [=] { NotifyObservers(aTopic, subject); }));
4355 return;
4356 }
4357
4358 if (nsCOMPtr<nsIObserverService> os = services::GetObserverService()) {
4359 os->NotifyObservers(aSubject, aTopic, nullptr);
4360 }
4361 }
4362
NotifyProfilerStarted(const PowerOfTwo32 & aCapacity,const Maybe<double> & aDuration,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,uint64_t aActiveTabID)4363 static void NotifyProfilerStarted(const PowerOfTwo32& aCapacity,
4364 const Maybe<double>& aDuration,
4365 double aInterval, uint32_t aFeatures,
4366 const char** aFilters, uint32_t aFilterCount,
4367 uint64_t aActiveTabID) {
4368 nsTArray<nsCString> filtersArray;
4369 for (size_t i = 0; i < aFilterCount; ++i) {
4370 filtersArray.AppendElement(aFilters[i]);
4371 }
4372
4373 nsCOMPtr<nsIProfilerStartParams> params = new nsProfilerStartParams(
4374 aCapacity.Value(), aDuration, aInterval, aFeatures,
4375 std::move(filtersArray), aActiveTabID);
4376
4377 ProfilerParent::ProfilerStarted(params);
4378 NotifyObservers("profiler-started", params);
4379 }
4380
4381 static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
4382 double aInterval, uint32_t aFeatures,
4383 const char** aFilters, uint32_t aFilterCount,
4384 uint64_t aActiveTabID,
4385 const Maybe<double>& aDuration);
4386
4387 // This basically duplicates AutoProfilerLabel's constructor.
MozGlueLabelEnter(const char * aLabel,const char * aDynamicString,void * aSp)4388 static void* MozGlueLabelEnter(const char* aLabel, const char* aDynamicString,
4389 void* aSp) {
4390 ProfilingStackOwner* profilingStackOwner =
4391 AutoProfilerLabel::ProfilingStackOwnerTLS::Get();
4392 if (profilingStackOwner) {
4393 profilingStackOwner->ProfilingStack().pushLabelFrame(
4394 aLabel, aDynamicString, aSp, JS::ProfilingCategoryPair::OTHER);
4395 }
4396 return profilingStackOwner;
4397 }
4398
4399 // This basically duplicates AutoProfilerLabel's destructor.
MozGlueLabelExit(void * aProfilingStackOwner)4400 static void MozGlueLabelExit(void* aProfilingStackOwner) {
4401 if (aProfilingStackOwner) {
4402 reinterpret_cast<ProfilingStackOwner*>(aProfilingStackOwner)
4403 ->ProfilingStack()
4404 .pop();
4405 }
4406 }
4407
SplitAtCommas(const char * aString,UniquePtr<char[]> & aStorage)4408 static Vector<const char*> SplitAtCommas(const char* aString,
4409 UniquePtr<char[]>& aStorage) {
4410 size_t len = strlen(aString);
4411 aStorage = MakeUnique<char[]>(len + 1);
4412 PodCopy(aStorage.get(), aString, len + 1);
4413
4414 // Iterate over all characters in aStorage and split at commas, by
4415 // overwriting commas with the null char.
4416 Vector<const char*> array;
4417 size_t currentElementStart = 0;
4418 for (size_t i = 0; i <= len; i++) {
4419 if (aStorage[i] == ',') {
4420 aStorage[i] = '\0';
4421 }
4422 if (aStorage[i] == '\0') {
4423 MOZ_RELEASE_ASSERT(array.append(&aStorage[currentElementStart]));
4424 currentElementStart = i + 1;
4425 }
4426 }
4427 return array;
4428 }
4429
profiler_init_threadmanager()4430 void profiler_init_threadmanager() {
4431 LOG("profiler_init_threadmanager");
4432
4433 PSAutoLock lock(gPSMutex);
4434 RegisteredThread* registeredThread =
4435 TLSRegisteredThread::RegisteredThread(lock);
4436 if (registeredThread && !registeredThread->GetEventTarget()) {
4437 registeredThread->ResetMainThread(NS_GetCurrentThreadNoCreate());
4438 }
4439 }
4440
profiler_init(void * aStackTop)4441 void profiler_init(void* aStackTop) {
4442 LOG("profiler_init");
4443
4444 scProfilerMainThreadId = profiler_current_thread_id();
4445
4446 VTUNE_INIT();
4447
4448 MOZ_RELEASE_ASSERT(!CorePS::Exists());
4449
4450 if (getenv("MOZ_PROFILER_HELP")) {
4451 PrintUsageThenExit(1); // terminates execution
4452 }
4453
4454 // This must be before any TLS access (e.g.: Thread registration, labels...).
4455 TLSRegisteredThread::Init();
4456
4457 SharedLibraryInfo::Initialize();
4458
4459 uint32_t features = DefaultFeatures() & AvailableFeatures();
4460
4461 UniquePtr<char[]> filterStorage;
4462
4463 Vector<const char*> filters;
4464 MOZ_RELEASE_ASSERT(filters.append("GeckoMain"));
4465 MOZ_RELEASE_ASSERT(filters.append("Compositor"));
4466 MOZ_RELEASE_ASSERT(filters.append("Renderer"));
4467 MOZ_RELEASE_ASSERT(filters.append("DOM Worker"));
4468
4469 PowerOfTwo32 capacity = PROFILER_DEFAULT_ENTRIES;
4470 Maybe<double> duration = Nothing();
4471 double interval = PROFILER_DEFAULT_INTERVAL;
4472 uint64_t activeTabID = PROFILER_DEFAULT_ACTIVE_TAB_ID;
4473
4474 {
4475 PSAutoLock lock(gPSMutex);
4476
4477 // We've passed the possible failure point. Instantiate CorePS, which
4478 // indicates that the profiler has initialized successfully.
4479 CorePS::Create(lock);
4480
4481 // profiler_init implicitly registers this thread as main thread.
4482 Unused << locked_register_thread(lock, kMainThreadName, aStackTop);
4483
4484 // Platform-specific initialization.
4485 PlatformInit(lock);
4486
4487 #if defined(GP_OS_android)
4488 if (jni::IsAvailable()) {
4489 GeckoJavaSampler::Init();
4490 }
4491 #endif
4492
4493 // (Linux-only) We could create CorePS::mLul and read unwind info into it
4494 // at this point. That would match the lifetime implied by destruction of
4495 // it in profiler_shutdown() just below. However, that gives a big delay on
4496 // startup, even if no profiling is actually to be done. So, instead, it is
4497 // created on demand at the first call to PlatformStart().
4498
4499 const char* startupEnv = getenv("MOZ_PROFILER_STARTUP");
4500 if (!startupEnv || startupEnv[0] == '\0' ||
4501 ((startupEnv[0] == '0' || startupEnv[0] == 'N' ||
4502 startupEnv[0] == 'n') &&
4503 startupEnv[1] == '\0')) {
4504 return;
4505 }
4506
4507 LOG("- MOZ_PROFILER_STARTUP is set");
4508
4509 // Startup default capacity may be different.
4510 capacity = PROFILER_DEFAULT_STARTUP_ENTRIES;
4511
4512 const char* startupCapacity = getenv("MOZ_PROFILER_STARTUP_ENTRIES");
4513 if (startupCapacity && startupCapacity[0] != '\0') {
4514 errno = 0;
4515 long capacityLong = strtol(startupCapacity, nullptr, 10);
4516 // `long` could be 32 or 64 bits, so we force a 64-bit comparison with
4517 // the maximum 32-bit signed number (as more than that is clamped down to
4518 // 2^31 anyway).
4519 if (errno == 0 && capacityLong > 0 &&
4520 static_cast<uint64_t>(capacityLong) <=
4521 static_cast<uint64_t>(INT32_MAX)) {
4522 capacity = PowerOfTwo32(ActivePS::ClampToAllowedEntries(
4523 static_cast<uint32_t>(capacityLong)));
4524 LOG("- MOZ_PROFILER_STARTUP_ENTRIES = %u", unsigned(capacity.Value()));
4525 } else {
4526 LOG("- MOZ_PROFILER_STARTUP_ENTRIES not a valid integer: %s",
4527 startupCapacity);
4528 PrintUsageThenExit(1);
4529 }
4530 }
4531
4532 const char* startupDuration = getenv("MOZ_PROFILER_STARTUP_DURATION");
4533 if (startupDuration && startupDuration[0] != '\0') {
4534 errno = 0;
4535 double durationVal = PR_strtod(startupDuration, nullptr);
4536 if (errno == 0 && durationVal >= 0.0) {
4537 if (durationVal > 0.0) {
4538 duration = Some(durationVal);
4539 }
4540 LOG("- MOZ_PROFILER_STARTUP_DURATION = %f", durationVal);
4541 } else {
4542 LOG("- MOZ_PROFILER_STARTUP_DURATION not a valid float: %s",
4543 startupDuration);
4544 PrintUsageThenExit(1);
4545 }
4546 }
4547
4548 const char* startupInterval = getenv("MOZ_PROFILER_STARTUP_INTERVAL");
4549 if (startupInterval && startupInterval[0] != '\0') {
4550 errno = 0;
4551 interval = PR_strtod(startupInterval, nullptr);
4552 if (errno == 0 && interval > 0.0 && interval <= PROFILER_MAX_INTERVAL) {
4553 LOG("- MOZ_PROFILER_STARTUP_INTERVAL = %f", interval);
4554 } else {
4555 LOG("- MOZ_PROFILER_STARTUP_INTERVAL not a valid float: %s",
4556 startupInterval);
4557 PrintUsageThenExit(1);
4558 }
4559 }
4560
4561 features |= StartupExtraDefaultFeatures() & AvailableFeatures();
4562
4563 const char* startupFeaturesBitfield =
4564 getenv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD");
4565 if (startupFeaturesBitfield && startupFeaturesBitfield[0] != '\0') {
4566 errno = 0;
4567 features = strtol(startupFeaturesBitfield, nullptr, 10);
4568 if (errno == 0 && features != 0) {
4569 LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD = %d", features);
4570 } else {
4571 LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD not a valid integer: %s",
4572 startupFeaturesBitfield);
4573 PrintUsageThenExit(1);
4574 }
4575 } else {
4576 const char* startupFeatures = getenv("MOZ_PROFILER_STARTUP_FEATURES");
4577 if (startupFeatures && startupFeatures[0] != '\0') {
4578 // Interpret startupFeatures as a list of feature strings, separated by
4579 // commas.
4580 UniquePtr<char[]> featureStringStorage;
4581 Vector<const char*> featureStringArray =
4582 SplitAtCommas(startupFeatures, featureStringStorage);
4583 features = ParseFeaturesFromStringArray(featureStringArray.begin(),
4584 featureStringArray.length(),
4585 /* aIsStartup */ true);
4586 LOG("- MOZ_PROFILER_STARTUP_FEATURES = %d", features);
4587 }
4588 }
4589
4590 const char* startupFilters = getenv("MOZ_PROFILER_STARTUP_FILTERS");
4591 if (startupFilters && startupFilters[0] != '\0') {
4592 filters = SplitAtCommas(startupFilters, filterStorage);
4593 LOG("- MOZ_PROFILER_STARTUP_FILTERS = %s", startupFilters);
4594 }
4595
4596 const char* startupActiveTabID =
4597 getenv("MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID");
4598 if (startupActiveTabID && startupActiveTabID[0] != '\0') {
4599 std::istringstream iss(startupActiveTabID);
4600 iss >> activeTabID;
4601 if (!iss.fail()) {
4602 LOG("- MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID = %" PRIu64, activeTabID);
4603 } else {
4604 LOG("- MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID not a valid "
4605 "uint64_t: %s",
4606 startupActiveTabID);
4607 PrintUsageThenExit(1);
4608 }
4609 }
4610
4611 locked_profiler_start(lock, capacity, interval, features, filters.begin(),
4612 filters.length(), activeTabID, duration);
4613 }
4614
4615 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
4616 // Start counting memory allocations (outside of lock because this may call
4617 // profiler_add_sampled_counter which would attempt to take the lock.)
4618 ActivePS::SetMemoryCounter(mozilla::profiler::install_memory_hooks());
4619 #endif
4620
4621 invoke_profiler_state_change_callbacks(ProfilingState::Started);
4622
4623 // We do this with gPSMutex unlocked. The comment in profiler_stop() explains
4624 // why.
4625 NotifyProfilerStarted(capacity, duration, interval, features, filters.begin(),
4626 filters.length(), 0);
4627 }
4628
4629 static void locked_profiler_save_profile_to_file(
4630 PSLockRef aLock, const char* aFilename,
4631 const PreRecordedMetaInformation& aPreRecordedMetaInformation,
4632 bool aIsShuttingDown);
4633
4634 static SamplerThread* locked_profiler_stop(PSLockRef aLock);
4635
profiler_shutdown(IsFastShutdown aIsFastShutdown)4636 void profiler_shutdown(IsFastShutdown aIsFastShutdown) {
4637 LOG("profiler_shutdown");
4638
4639 VTUNE_SHUTDOWN();
4640
4641 MOZ_RELEASE_ASSERT(NS_IsMainThread());
4642 MOZ_RELEASE_ASSERT(CorePS::Exists());
4643
4644 if (profiler_is_active()) {
4645 invoke_profiler_state_change_callbacks(ProfilingState::Stopping);
4646 }
4647 invoke_profiler_state_change_callbacks(ProfilingState::ShuttingDown);
4648
4649 const auto preRecordedMetaInformation = PreRecordMetaInformation();
4650
4651 ProfilerParent::ProfilerWillStopIfStarted();
4652
4653 // If the profiler is active we must get a handle to the SamplerThread before
4654 // ActivePS is destroyed, in order to delete it.
4655 SamplerThread* samplerThread = nullptr;
4656 {
4657 PSAutoLock lock(gPSMutex);
4658
4659 // Save the profile on shutdown if requested.
4660 if (ActivePS::Exists(lock)) {
4661 const char* filename = getenv("MOZ_PROFILER_SHUTDOWN");
4662 if (filename) {
4663 locked_profiler_save_profile_to_file(lock, filename,
4664 preRecordedMetaInformation,
4665 /* aIsShuttingDown */ true);
4666 }
4667 if (aIsFastShutdown == IsFastShutdown::Yes) {
4668 return;
4669 }
4670
4671 samplerThread = locked_profiler_stop(lock);
4672 } else if (aIsFastShutdown == IsFastShutdown::Yes) {
4673 return;
4674 }
4675
4676 CorePS::Destroy(lock);
4677
4678 // We just destroyed CorePS and the ThreadInfos it contains, so we can
4679 // clear this thread's TLSRegisteredThread.
4680 TLSRegisteredThread::ResetRegisteredThread(lock);
4681 // We can also clear the AutoProfilerLabel's ProfilingStack because the
4682 // main thread should not use labels after profiler_shutdown.
4683 TLSRegisteredThread::ResetAutoProfilerLabelProfilingStack(lock);
4684 }
4685
4686 // We do these operations with gPSMutex unlocked. The comments in
4687 // profiler_stop() explain why.
4688 if (samplerThread) {
4689 ProfilerParent::ProfilerStopped();
4690 NotifyObservers("profiler-stopped");
4691 delete samplerThread;
4692 }
4693 }
4694
WriteProfileToJSONWriter(SpliceableChunkedJSONWriter & aWriter,double aSinceTime,bool aIsShuttingDown,ProfilerCodeAddressService * aService)4695 static bool WriteProfileToJSONWriter(SpliceableChunkedJSONWriter& aWriter,
4696 double aSinceTime, bool aIsShuttingDown,
4697 ProfilerCodeAddressService* aService) {
4698 LOG("WriteProfileToJSONWriter");
4699
4700 MOZ_RELEASE_ASSERT(CorePS::Exists());
4701
4702 aWriter.Start();
4703 {
4704 if (!profiler_stream_json_for_this_process(aWriter, aSinceTime,
4705 aIsShuttingDown, aService)) {
4706 return false;
4707 }
4708
4709 // Don't include profiles from other processes because this is a
4710 // synchronous function.
4711 aWriter.StartArrayProperty("processes");
4712 aWriter.EndArray();
4713 }
4714 aWriter.End();
4715 return true;
4716 }
4717
profiler_set_process_name(const nsACString & aProcessName,const nsACString * aETLDplus1)4718 void profiler_set_process_name(const nsACString& aProcessName,
4719 const nsACString* aETLDplus1) {
4720 LOG("profiler_set_process_name(\"%s\", \"%s\")", aProcessName.Data(),
4721 aETLDplus1 ? aETLDplus1->Data() : "<none>");
4722 PSAutoLock lock(gPSMutex);
4723 CorePS::SetProcessName(lock, aProcessName);
4724 if (aETLDplus1) {
4725 CorePS::SetETLDplus1(lock, *aETLDplus1);
4726 }
4727 }
4728
profiler_get_profile(double aSinceTime,bool aIsShuttingDown)4729 UniquePtr<char[]> profiler_get_profile(double aSinceTime,
4730 bool aIsShuttingDown) {
4731 LOG("profiler_get_profile");
4732
4733 UniquePtr<ProfilerCodeAddressService> service =
4734 profiler_code_address_service_for_presymbolication();
4735
4736 SpliceableChunkedJSONWriter b;
4737 if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown,
4738 service.get())) {
4739 return nullptr;
4740 }
4741 return b.ChunkedWriteFunc().CopyData();
4742 }
4743
profiler_get_profile_json_into_lazily_allocated_buffer(const std::function<char * (size_t)> & aAllocator,double aSinceTime,bool aIsShuttingDown)4744 void profiler_get_profile_json_into_lazily_allocated_buffer(
4745 const std::function<char*(size_t)>& aAllocator, double aSinceTime,
4746 bool aIsShuttingDown) {
4747 LOG("profiler_get_profile_json_into_lazily_allocated_buffer");
4748
4749 UniquePtr<ProfilerCodeAddressService> service =
4750 profiler_code_address_service_for_presymbolication();
4751
4752 SpliceableChunkedJSONWriter b;
4753 if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown,
4754 service.get())) {
4755 return;
4756 }
4757
4758 b.ChunkedWriteFunc().CopyDataIntoLazilyAllocatedBuffer(aAllocator);
4759 }
4760
profiler_get_start_params(int * aCapacity,Maybe<double> * aDuration,double * aInterval,uint32_t * aFeatures,Vector<const char * > * aFilters,uint64_t * aActiveTabID)4761 void profiler_get_start_params(int* aCapacity, Maybe<double>* aDuration,
4762 double* aInterval, uint32_t* aFeatures,
4763 Vector<const char*>* aFilters,
4764 uint64_t* aActiveTabID) {
4765 MOZ_RELEASE_ASSERT(CorePS::Exists());
4766
4767 if (NS_WARN_IF(!aCapacity) || NS_WARN_IF(!aDuration) ||
4768 NS_WARN_IF(!aInterval) || NS_WARN_IF(!aFeatures) ||
4769 NS_WARN_IF(!aFilters)) {
4770 return;
4771 }
4772
4773 PSAutoLock lock(gPSMutex);
4774
4775 if (!ActivePS::Exists(lock)) {
4776 *aCapacity = 0;
4777 *aDuration = Nothing();
4778 *aInterval = 0;
4779 *aFeatures = 0;
4780 *aActiveTabID = 0;
4781 aFilters->clear();
4782 return;
4783 }
4784
4785 *aCapacity = ActivePS::Capacity(lock).Value();
4786 *aDuration = ActivePS::Duration(lock);
4787 *aInterval = ActivePS::Interval(lock);
4788 *aFeatures = ActivePS::Features(lock);
4789 *aActiveTabID = ActivePS::ActiveTabID(lock);
4790
4791 const Vector<std::string>& filters = ActivePS::Filters(lock);
4792 MOZ_ALWAYS_TRUE(aFilters->resize(filters.length()));
4793 for (uint32_t i = 0; i < filters.length(); ++i) {
4794 (*aFilters)[i] = filters[i].c_str();
4795 }
4796 }
4797
profiler_get_controlled_chunk_manager()4798 ProfileBufferControlledChunkManager* profiler_get_controlled_chunk_manager() {
4799 MOZ_RELEASE_ASSERT(CorePS::Exists());
4800 PSAutoLock lock(gPSMutex);
4801 if (NS_WARN_IF(!ActivePS::Exists(lock))) {
4802 return nullptr;
4803 }
4804 return &ActivePS::ControlledChunkManager(lock);
4805 }
4806
4807 namespace mozilla {
4808
GetProfilerEnvVarsForChildProcess(std::function<void (const char * key,const char * value)> && aSetEnv)4809 void GetProfilerEnvVarsForChildProcess(
4810 std::function<void(const char* key, const char* value)>&& aSetEnv) {
4811 MOZ_RELEASE_ASSERT(CorePS::Exists());
4812
4813 PSAutoLock lock(gPSMutex);
4814
4815 if (!ActivePS::Exists(lock)) {
4816 aSetEnv("MOZ_PROFILER_STARTUP", "");
4817 return;
4818 }
4819
4820 aSetEnv("MOZ_PROFILER_STARTUP", "1");
4821
4822 // Hidden option to stop Base Profiler, mostly due to Talos intermittents,
4823 // see https://bugzilla.mozilla.org/show_bug.cgi?id=1638851#c3
4824 // TODO: Investigate root cause and remove this in bugs 1648324 and 1648325.
4825 if (getenv("MOZ_PROFILER_STARTUP_NO_BASE")) {
4826 aSetEnv("MOZ_PROFILER_STARTUP_NO_BASE", "1");
4827 }
4828
4829 auto capacityString =
4830 Smprintf("%u", unsigned(ActivePS::Capacity(lock).Value()));
4831 aSetEnv("MOZ_PROFILER_STARTUP_ENTRIES", capacityString.get());
4832
4833 // Use AppendFloat instead of Smprintf with %f because the decimal
4834 // separator used by %f is locale-dependent. But the string we produce needs
4835 // to be parseable by strtod, which only accepts the period character as a
4836 // decimal separator. AppendFloat always uses the period character.
4837 nsCString intervalString;
4838 intervalString.AppendFloat(ActivePS::Interval(lock));
4839 aSetEnv("MOZ_PROFILER_STARTUP_INTERVAL", intervalString.get());
4840
4841 auto featuresString = Smprintf("%d", ActivePS::Features(lock));
4842 aSetEnv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD", featuresString.get());
4843
4844 std::string filtersString;
4845 const Vector<std::string>& filters = ActivePS::Filters(lock);
4846 for (uint32_t i = 0; i < filters.length(); ++i) {
4847 if (i != 0) {
4848 filtersString += ",";
4849 }
4850 filtersString += filters[i];
4851 }
4852 aSetEnv("MOZ_PROFILER_STARTUP_FILTERS", filtersString.c_str());
4853
4854 auto activeTabIDString = Smprintf("%" PRIu64, ActivePS::ActiveTabID(lock));
4855 aSetEnv("MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID", activeTabIDString.get());
4856 }
4857
4858 } // namespace mozilla
4859
profiler_received_exit_profile(const nsCString & aExitProfile)4860 void profiler_received_exit_profile(const nsCString& aExitProfile) {
4861 MOZ_RELEASE_ASSERT(CorePS::Exists());
4862 PSAutoLock lock(gPSMutex);
4863 if (!ActivePS::Exists(lock)) {
4864 return;
4865 }
4866 ActivePS::AddExitProfile(lock, aExitProfile);
4867 }
4868
profiler_move_exit_profiles()4869 Vector<nsCString> profiler_move_exit_profiles() {
4870 MOZ_RELEASE_ASSERT(CorePS::Exists());
4871 PSAutoLock lock(gPSMutex);
4872 Vector<nsCString> profiles;
4873 if (ActivePS::Exists(lock)) {
4874 profiles = ActivePS::MoveExitProfiles(lock);
4875 }
4876 return profiles;
4877 }
4878
locked_profiler_save_profile_to_file(PSLockRef aLock,const char * aFilename,const PreRecordedMetaInformation & aPreRecordedMetaInformation,bool aIsShuttingDown=false)4879 static void locked_profiler_save_profile_to_file(
4880 PSLockRef aLock, const char* aFilename,
4881 const PreRecordedMetaInformation& aPreRecordedMetaInformation,
4882 bool aIsShuttingDown = false) {
4883 LOG("locked_profiler_save_profile_to_file(%s)", aFilename);
4884
4885 MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
4886
4887 std::ofstream stream;
4888 stream.open(aFilename);
4889 if (stream.is_open()) {
4890 SpliceableJSONWriter w(MakeUnique<OStreamJSONWriteFunc>(stream));
4891 w.Start();
4892 {
4893 locked_profiler_stream_json_for_this_process(aLock, w, /* sinceTime */ 0,
4894 aPreRecordedMetaInformation,
4895 aIsShuttingDown, nullptr);
4896
4897 w.StartArrayProperty("processes");
4898 Vector<nsCString> exitProfiles = ActivePS::MoveExitProfiles(aLock);
4899 for (auto& exitProfile : exitProfiles) {
4900 if (!exitProfile.IsEmpty()) {
4901 w.Splice(exitProfile);
4902 }
4903 }
4904 w.EndArray();
4905 }
4906 w.End();
4907
4908 stream.close();
4909 }
4910 }
4911
profiler_save_profile_to_file(const char * aFilename)4912 void profiler_save_profile_to_file(const char* aFilename) {
4913 LOG("profiler_save_profile_to_file(%s)", aFilename);
4914
4915 MOZ_RELEASE_ASSERT(CorePS::Exists());
4916
4917 const auto preRecordedMetaInformation = PreRecordMetaInformation();
4918
4919 PSAutoLock lock(gPSMutex);
4920
4921 if (!ActivePS::Exists(lock)) {
4922 return;
4923 }
4924
4925 locked_profiler_save_profile_to_file(lock, aFilename,
4926 preRecordedMetaInformation);
4927 }
4928
profiler_get_available_features()4929 uint32_t profiler_get_available_features() {
4930 MOZ_RELEASE_ASSERT(CorePS::Exists());
4931 return AvailableFeatures();
4932 }
4933
profiler_get_buffer_info()4934 Maybe<ProfilerBufferInfo> profiler_get_buffer_info() {
4935 MOZ_RELEASE_ASSERT(CorePS::Exists());
4936
4937 PSAutoLock lock(gPSMutex);
4938
4939 if (!ActivePS::Exists(lock)) {
4940 return Nothing();
4941 }
4942
4943 return Some(ActivePS::Buffer(lock).GetProfilerBufferInfo());
4944 }
4945
PollJSSamplingForCurrentThread()4946 static void PollJSSamplingForCurrentThread() {
4947 MOZ_RELEASE_ASSERT(CorePS::Exists());
4948
4949 PSAutoLock lock(gPSMutex);
4950
4951 RegisteredThread* registeredThread =
4952 TLSRegisteredThread::RegisteredThread(lock);
4953 if (!registeredThread) {
4954 return;
4955 }
4956
4957 registeredThread->PollJSSampling();
4958 }
4959
4960 // When the profiler is started on a background thread, we can't synchronously
4961 // call PollJSSampling on the main thread's ThreadInfo. And the next regular
4962 // call to PollJSSampling on the main thread would only happen once the main
4963 // thread triggers a JS interrupt callback.
4964 // This means that all the JS execution between profiler_start() and the first
4965 // JS interrupt would happen with JS sampling disabled, and we wouldn't get any
4966 // JS function information for that period of time.
4967 // So in order to start JS sampling as soon as possible, we dispatch a runnable
4968 // to the main thread which manually calls PollJSSamplingForCurrentThread().
4969 // In some cases this runnable will lose the race with the next JS interrupt.
4970 // That's fine; PollJSSamplingForCurrentThread() is immune to redundant calls.
TriggerPollJSSamplingOnMainThread()4971 static void TriggerPollJSSamplingOnMainThread() {
4972 nsCOMPtr<nsIThread> mainThread;
4973 nsresult rv = NS_GetMainThread(getter_AddRefs(mainThread));
4974 if (NS_SUCCEEDED(rv) && mainThread) {
4975 nsCOMPtr<nsIRunnable> task =
4976 NS_NewRunnableFunction("TriggerPollJSSamplingOnMainThread",
4977 []() { PollJSSamplingForCurrentThread(); });
4978 SchedulerGroup::Dispatch(TaskCategory::Other, task.forget());
4979 }
4980 }
4981
HasMinimumLength(const char * aString,size_t aMinimumLength)4982 static bool HasMinimumLength(const char* aString, size_t aMinimumLength) {
4983 if (!aString) {
4984 return false;
4985 }
4986 for (size_t i = 0; i < aMinimumLength; ++i) {
4987 if (aString[i] == '\0') {
4988 return false;
4989 }
4990 }
4991 return true;
4992 }
4993
locked_profiler_start(PSLockRef aLock,PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,uint64_t aActiveTabID,const Maybe<double> & aDuration)4994 static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
4995 double aInterval, uint32_t aFeatures,
4996 const char** aFilters, uint32_t aFilterCount,
4997 uint64_t aActiveTabID,
4998 const Maybe<double>& aDuration) {
4999 if (LOG_TEST) {
5000 LOG("locked_profiler_start");
5001 LOG("- capacity = %u", unsigned(aCapacity.Value()));
5002 LOG("- duration = %.2f", aDuration ? *aDuration : -1);
5003 LOG("- interval = %.2f", aInterval);
5004 LOG("- tab ID = %" PRIu64, aActiveTabID);
5005
5006 #define LOG_FEATURE(n_, str_, Name_, desc_) \
5007 if (ProfilerFeature::Has##Name_(aFeatures)) { \
5008 LOG("- feature = %s", str_); \
5009 }
5010
5011 PROFILER_FOR_EACH_FEATURE(LOG_FEATURE)
5012
5013 #undef LOG_FEATURE
5014
5015 for (uint32_t i = 0; i < aFilterCount; i++) {
5016 LOG("- threads = %s", aFilters[i]);
5017 }
5018 }
5019
5020 MOZ_RELEASE_ASSERT(CorePS::Exists() && !ActivePS::Exists(aLock));
5021
5022 UniquePtr<char[]> baseprofile;
5023 if (baseprofiler::profiler_is_active()) {
5024 // Note that we still hold the lock, so the sampler cannot run yet and
5025 // interact negatively with the still-active BaseProfiler sampler.
5026 // Assume that Base Profiler is active because of MOZ_PROFILER_STARTUP.
5027 // Capture the Base Profiler startup profile threads (if any).
5028 baseprofile = baseprofiler::profiler_get_profile(
5029 /* aSinceTime */ 0, /* aIsShuttingDown */ false,
5030 /* aOnlyThreads */ true);
5031
5032 // Now stop Base Profiler (BP), as further recording will be ignored anyway,
5033 // and so that it won't clash with Gecko Profiler (GP) sampling starting
5034 // after the lock is dropped.
5035 // On Linux this is especially important to do before creating the GP
5036 // sampler, because the BP sampler may send a signal (to stop threads to be
5037 // sampled), which the GP would intercept before its own initialization is
5038 // complete and ready to handle such signals.
5039 // Note that even though `profiler_stop()` doesn't immediately destroy and
5040 // join the sampler thread, it safely deactivates it in such a way that the
5041 // thread will soon exit without doing any actual work.
5042 // TODO: Allow non-sampling profiling to continue.
5043 // TODO: Re-start BP after GP shutdown, to capture post-XPCOM shutdown.
5044 baseprofiler::profiler_stop();
5045 }
5046
5047 #if defined(GP_PLAT_amd64_windows)
5048 InitializeWin64ProfilerHooks();
5049 #endif
5050
5051 // Fall back to the default values if the passed-in values are unreasonable.
5052 // We want to be able to store at least one full stack.
5053 PowerOfTwo32 capacity =
5054 (aCapacity.Value() >=
5055 ProfileBufferChunkManager::scExpectedMaximumStackSize / scBytesPerEntry)
5056 ? aCapacity
5057 : PROFILER_DEFAULT_ENTRIES;
5058 Maybe<double> duration = aDuration;
5059
5060 if (aDuration && *aDuration <= 0) {
5061 duration = Nothing();
5062 }
5063
5064 double interval = aInterval > 0 ? aInterval : PROFILER_DEFAULT_INTERVAL;
5065
5066 ActivePS::Create(aLock, capacity, interval, aFeatures, aFilters, aFilterCount,
5067 aActiveTabID, duration);
5068
5069 // ActivePS::Create can only succeed or crash.
5070 MOZ_ASSERT(ActivePS::Exists(aLock));
5071
5072 // An "empty" profile string may in fact contain 1 character (a newline), so
5073 // we want at least 2 characters to register a profile.
5074 if (HasMinimumLength(baseprofile.get(), 2)) {
5075 // The BaseProfiler startup profile will be stored as a separate "process"
5076 // in the Gecko Profiler profile, and shown as a new track under the
5077 // corresponding Gecko Profiler thread.
5078 ActivePS::AddBaseProfileThreads(aLock, std::move(baseprofile));
5079 }
5080
5081 // Set up profiling for each registered thread, if appropriate.
5082 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
5083 bool isMainThreadBeingProfiled = false;
5084 #endif
5085 int tid = profiler_current_thread_id();
5086 const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
5087 CorePS::RegisteredThreads(aLock);
5088 for (auto& registeredThread : registeredThreads) {
5089 RefPtr<ThreadInfo> info = registeredThread->Info();
5090
5091 if (ActivePS::ShouldProfileThread(aLock, info)) {
5092 registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
5093 nsCOMPtr<nsIEventTarget> eventTarget = registeredThread->GetEventTarget();
5094 ProfiledThreadData* profiledThreadData = ActivePS::AddLiveProfiledThread(
5095 aLock, registeredThread.get(),
5096 MakeUnique<ProfiledThreadData>(info, eventTarget));
5097 ClearThreadRunningTimes(aLock, *registeredThread);
5098 if (ActivePS::FeatureJS(aLock)) {
5099 registeredThread->StartJSSampling(ActivePS::JSFlags(aLock));
5100 if (info->ThreadId() == tid) {
5101 // We can manually poll the current thread so it starts sampling
5102 // immediately.
5103 registeredThread->PollJSSampling();
5104 } else if (info->IsMainThread()) {
5105 // Dispatch a runnable to the main thread to call PollJSSampling(),
5106 // so that we don't have wait for the next JS interrupt callback in
5107 // order to start profiling JS.
5108 TriggerPollJSSamplingOnMainThread();
5109 }
5110 }
5111 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
5112 if (info->IsMainThread()) {
5113 isMainThreadBeingProfiled = true;
5114 }
5115 #endif
5116 registeredThread->RacyRegisteredThread().ReinitializeOnResume();
5117 if (registeredThread->GetJSContext()) {
5118 profiledThreadData->NotifyReceivedJSContext(0);
5119 }
5120 }
5121 }
5122
5123 // Setup support for pushing/popping labels in mozglue.
5124 RegisterProfilerLabelEnterExit(MozGlueLabelEnter, MozGlueLabelExit);
5125
5126 #if defined(GP_OS_android)
5127 if (ActivePS::FeatureJava(aLock)) {
5128 int javaInterval = interval;
5129 // Java sampling doesn't accurately keep up with the sampling rate that is
5130 // lower than 1ms.
5131 if (javaInterval < 1) {
5132 javaInterval = 1;
5133 }
5134 // Send the interval-relative entry count, but we have 100000 hard cap in
5135 // the java code, it can't be more than that.
5136 java::GeckoJavaSampler::Start(
5137 javaInterval, std::round((double)(capacity.Value()) * interval /
5138 (double)(javaInterval)));
5139 }
5140 #endif
5141
5142 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
5143 if (ActivePS::FeatureNativeAllocations(aLock)) {
5144 if (isMainThreadBeingProfiled) {
5145 mozilla::profiler::enable_native_allocations();
5146 } else {
5147 NS_WARNING(
5148 "The nativeallocations feature is turned on, but the main thread is "
5149 "not being profiled. The allocations are only stored on the main "
5150 "thread.");
5151 }
5152 }
5153 #endif
5154
5155 if (ProfilerFeature::HasAudioCallbackTracing(aFeatures)) {
5156 StartAudioCallbackTracing();
5157 }
5158
5159 // At the very end, set up RacyFeatures.
5160 RacyFeatures::SetActive(ActivePS::Features(aLock));
5161 }
5162
profiler_start(PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,uint64_t aActiveTabID,const Maybe<double> & aDuration)5163 void profiler_start(PowerOfTwo32 aCapacity, double aInterval,
5164 uint32_t aFeatures, const char** aFilters,
5165 uint32_t aFilterCount, uint64_t aActiveTabID,
5166 const Maybe<double>& aDuration) {
5167 LOG("profiler_start");
5168
5169 ProfilerParent::ProfilerWillStopIfStarted();
5170
5171 SamplerThread* samplerThread = nullptr;
5172 {
5173 PSAutoLock lock(gPSMutex);
5174
5175 // Initialize if necessary.
5176 if (!CorePS::Exists()) {
5177 profiler_init(nullptr);
5178 }
5179
5180 // Reset the current state if the profiler is running.
5181 if (ActivePS::Exists(lock)) {
5182 // Note: Not invoking callbacks with ProfilingState::Stopping, because
5183 // we're under lock, and also it would not be useful: Any profiling data
5184 // will be discarded, and we're immediately restarting the profiler below
5185 // and then notifying ProfilingState::Started.
5186 samplerThread = locked_profiler_stop(lock);
5187 }
5188
5189 locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
5190 aFilterCount, aActiveTabID, aDuration);
5191 }
5192
5193 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
5194 // Start counting memory allocations (outside of lock because this may call
5195 // profiler_add_sampled_counter which would attempt to take the lock.)
5196 ActivePS::SetMemoryCounter(mozilla::profiler::install_memory_hooks());
5197 #endif
5198
5199 invoke_profiler_state_change_callbacks(ProfilingState::Started);
5200
5201 // We do these operations with gPSMutex unlocked. The comments in
5202 // profiler_stop() explain why.
5203 if (samplerThread) {
5204 ProfilerParent::ProfilerStopped();
5205 NotifyObservers("profiler-stopped");
5206 delete samplerThread;
5207 }
5208 NotifyProfilerStarted(aCapacity, aDuration, aInterval, aFeatures, aFilters,
5209 aFilterCount, aActiveTabID);
5210 }
5211
profiler_ensure_started(PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,uint64_t aActiveTabID,const Maybe<double> & aDuration)5212 void profiler_ensure_started(PowerOfTwo32 aCapacity, double aInterval,
5213 uint32_t aFeatures, const char** aFilters,
5214 uint32_t aFilterCount, uint64_t aActiveTabID,
5215 const Maybe<double>& aDuration) {
5216 LOG("profiler_ensure_started");
5217
5218 ProfilerParent::ProfilerWillStopIfStarted();
5219
5220 bool startedProfiler = false;
5221 SamplerThread* samplerThread = nullptr;
5222 {
5223 PSAutoLock lock(gPSMutex);
5224
5225 // Initialize if necessary.
5226 if (!CorePS::Exists()) {
5227 profiler_init(nullptr);
5228 }
5229
5230 if (ActivePS::Exists(lock)) {
5231 // The profiler is active.
5232 if (!ActivePS::Equals(lock, aCapacity, aDuration, aInterval, aFeatures,
5233 aFilters, aFilterCount, aActiveTabID)) {
5234 // Stop and restart with different settings.
5235 // Note: Not invoking callbacks with ProfilingState::Stopping, because
5236 // we're under lock, and also it would not be useful: Any profiling data
5237 // will be discarded, and we're immediately restarting the profiler
5238 // below and then notifying ProfilingState::Started.
5239 samplerThread = locked_profiler_stop(lock);
5240 locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
5241 aFilterCount, aActiveTabID, aDuration);
5242 startedProfiler = true;
5243 }
5244 } else {
5245 // The profiler is stopped.
5246 locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
5247 aFilterCount, aActiveTabID, aDuration);
5248 startedProfiler = true;
5249 }
5250 }
5251
5252 // We do these operations with gPSMutex unlocked. The comments in
5253 // profiler_stop() explain why.
5254 if (samplerThread) {
5255 ProfilerParent::ProfilerStopped();
5256 NotifyObservers("profiler-stopped");
5257 delete samplerThread;
5258 }
5259
5260 if (startedProfiler) {
5261 invoke_profiler_state_change_callbacks(ProfilingState::Started);
5262
5263 NotifyProfilerStarted(aCapacity, aDuration, aInterval, aFeatures, aFilters,
5264 aFilterCount, aActiveTabID);
5265 }
5266 }
5267
locked_profiler_stop(PSLockRef aLock)5268 [[nodiscard]] static SamplerThread* locked_profiler_stop(PSLockRef aLock) {
5269 LOG("locked_profiler_stop");
5270
5271 MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
5272
5273 // At the very start, clear RacyFeatures.
5274 RacyFeatures::SetInactive();
5275
5276 if (ActivePS::FeatureAudioCallbackTracing(aLock)) {
5277 StopAudioCallbackTracing();
5278 }
5279
5280 #if defined(GP_OS_android)
5281 if (ActivePS::FeatureJava(aLock)) {
5282 java::GeckoJavaSampler::Stop();
5283 }
5284 #endif
5285
5286 // Remove support for pushing/popping labels in mozglue.
5287 RegisterProfilerLabelEnterExit(nullptr, nullptr);
5288
5289 // Stop sampling live threads.
5290 int tid = profiler_current_thread_id();
5291 const Vector<LiveProfiledThreadData>& liveProfiledThreads =
5292 ActivePS::LiveProfiledThreads(aLock);
5293 for (auto& thread : liveProfiledThreads) {
5294 RegisteredThread* registeredThread = thread.mRegisteredThread;
5295 registeredThread->RacyRegisteredThread().SetIsBeingProfiled(false);
5296 if (ActivePS::FeatureJS(aLock)) {
5297 registeredThread->StopJSSampling();
5298 RefPtr<ThreadInfo> info = registeredThread->Info();
5299 if (info->ThreadId() == tid) {
5300 // We can manually poll the current thread so it stops profiling
5301 // immediately.
5302 registeredThread->PollJSSampling();
5303 } else if (info->IsMainThread()) {
5304 // Dispatch a runnable to the main thread to call PollJSSampling(),
5305 // so that we don't have wait for the next JS interrupt callback in
5306 // order to start profiling JS.
5307 TriggerPollJSSamplingOnMainThread();
5308 }
5309 }
5310 }
5311
5312 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
5313 if (ActivePS::FeatureNativeAllocations(aLock)) {
5314 mozilla::profiler::disable_native_allocations();
5315 }
5316 #endif
5317
5318 // The Stop() call doesn't actually stop Run(); that happens in this
5319 // function's caller when the sampler thread is destroyed. Stop() just gives
5320 // the SamplerThread a chance to do some cleanup with gPSMutex locked.
5321 SamplerThread* samplerThread = ActivePS::Destroy(aLock);
5322 samplerThread->Stop(aLock);
5323
5324 return samplerThread;
5325 }
5326
profiler_stop()5327 void profiler_stop() {
5328 LOG("profiler_stop");
5329
5330 MOZ_RELEASE_ASSERT(CorePS::Exists());
5331
5332 if (profiler_is_active()) {
5333 invoke_profiler_state_change_callbacks(ProfilingState::Stopping);
5334 }
5335
5336 ProfilerParent::ProfilerWillStopIfStarted();
5337
5338 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
5339 // Remove the hooks early, as native allocations (if they are on) can be
5340 // quite expensive.
5341 mozilla::profiler::remove_memory_hooks();
5342 #endif
5343
5344 SamplerThread* samplerThread;
5345 {
5346 PSAutoLock lock(gPSMutex);
5347
5348 if (!ActivePS::Exists(lock)) {
5349 return;
5350 }
5351
5352 samplerThread = locked_profiler_stop(lock);
5353 }
5354
5355 // We notify observers with gPSMutex unlocked. Otherwise we might get a
5356 // deadlock, if code run by these functions calls a profiler function that
5357 // locks gPSMutex, for example when it wants to insert a marker.
5358 // (This has been seen in practise in bug 1346356, when we were still firing
5359 // these notifications synchronously.)
5360 ProfilerParent::ProfilerStopped();
5361 NotifyObservers("profiler-stopped");
5362
5363 // We delete with gPSMutex unlocked. Otherwise we would get a deadlock: we
5364 // would be waiting here with gPSMutex locked for SamplerThread::Run() to
5365 // return so the join operation within the destructor can complete, but Run()
5366 // needs to lock gPSMutex to return.
5367 //
5368 // Because this call occurs with gPSMutex unlocked, it -- including the final
5369 // iteration of Run()'s loop -- must be able detect deactivation and return
5370 // in a way that's safe with respect to other gPSMutex-locking operations
5371 // that may have occurred in the meantime.
5372 delete samplerThread;
5373 }
5374
profiler_is_paused()5375 bool profiler_is_paused() {
5376 MOZ_RELEASE_ASSERT(CorePS::Exists());
5377
5378 PSAutoLock lock(gPSMutex);
5379
5380 if (!ActivePS::Exists(lock)) {
5381 return false;
5382 }
5383
5384 return ActivePS::IsPaused(lock);
5385 }
5386
profiler_callback_after_sampling(PostSamplingCallback && aCallback)5387 /* [[nodiscard]] */ bool profiler_callback_after_sampling(
5388 PostSamplingCallback&& aCallback) {
5389 LOG("profiler_callback_after_sampling");
5390
5391 MOZ_RELEASE_ASSERT(CorePS::Exists());
5392
5393 PSAutoLock lock(gPSMutex);
5394
5395 return ActivePS::AppendPostSamplingCallback(lock, std::move(aCallback));
5396 }
5397
profiler_pause()5398 void profiler_pause() {
5399 LOG("profiler_pause");
5400
5401 MOZ_RELEASE_ASSERT(CorePS::Exists());
5402
5403 invoke_profiler_state_change_callbacks(ProfilingState::Pausing);
5404
5405 {
5406 PSAutoLock lock(gPSMutex);
5407
5408 if (!ActivePS::Exists(lock)) {
5409 return;
5410 }
5411
5412 #if defined(GP_OS_android)
5413 if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) {
5414 // Not paused yet, so this is the first pause, let Java know.
5415 // TODO: Distinguish Pause and PauseSampling in Java.
5416 java::GeckoJavaSampler::PauseSampling();
5417 }
5418 #endif
5419
5420 RacyFeatures::SetPaused();
5421 ActivePS::SetIsPaused(lock, true);
5422 ActivePS::Buffer(lock).AddEntry(ProfileBufferEntry::Pause(profiler_time()));
5423 }
5424
5425 // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
5426 ProfilerParent::ProfilerPaused();
5427 NotifyObservers("profiler-paused");
5428 }
5429
profiler_resume()5430 void profiler_resume() {
5431 LOG("profiler_resume");
5432
5433 MOZ_RELEASE_ASSERT(CorePS::Exists());
5434
5435 {
5436 PSAutoLock lock(gPSMutex);
5437
5438 if (!ActivePS::Exists(lock)) {
5439 return;
5440 }
5441
5442 ActivePS::Buffer(lock).AddEntry(
5443 ProfileBufferEntry::Resume(profiler_time()));
5444 ActivePS::SetIsPaused(lock, false);
5445 RacyFeatures::SetUnpaused();
5446
5447 #if defined(GP_OS_android)
5448 if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) {
5449 // Not paused anymore, so this is the last unpause, let Java know.
5450 // TODO: Distinguish Unpause and UnpauseSampling in Java.
5451 java::GeckoJavaSampler::UnpauseSampling();
5452 }
5453 #endif
5454 }
5455
5456 // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
5457 ProfilerParent::ProfilerResumed();
5458 NotifyObservers("profiler-resumed");
5459
5460 invoke_profiler_state_change_callbacks(ProfilingState::Resumed);
5461 }
5462
profiler_is_sampling_paused()5463 bool profiler_is_sampling_paused() {
5464 MOZ_RELEASE_ASSERT(CorePS::Exists());
5465
5466 PSAutoLock lock(gPSMutex);
5467
5468 if (!ActivePS::Exists(lock)) {
5469 return false;
5470 }
5471
5472 return ActivePS::IsSamplingPaused(lock);
5473 }
5474
profiler_pause_sampling()5475 void profiler_pause_sampling() {
5476 LOG("profiler_pause_sampling");
5477
5478 MOZ_RELEASE_ASSERT(CorePS::Exists());
5479
5480 {
5481 PSAutoLock lock(gPSMutex);
5482
5483 if (!ActivePS::Exists(lock)) {
5484 return;
5485 }
5486
5487 #if defined(GP_OS_android)
5488 if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) {
5489 // Not paused yet, so this is the first pause, let Java know.
5490 // TODO: Distinguish Pause and PauseSampling in Java.
5491 java::GeckoJavaSampler::PauseSampling();
5492 }
5493 #endif
5494
5495 RacyFeatures::SetSamplingPaused();
5496 ActivePS::SetIsSamplingPaused(lock, true);
5497 ActivePS::Buffer(lock).AddEntry(
5498 ProfileBufferEntry::PauseSampling(profiler_time()));
5499 }
5500
5501 // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
5502 ProfilerParent::ProfilerPausedSampling();
5503 NotifyObservers("profiler-paused-sampling");
5504 }
5505
profiler_resume_sampling()5506 void profiler_resume_sampling() {
5507 LOG("profiler_resume_sampling");
5508
5509 MOZ_RELEASE_ASSERT(CorePS::Exists());
5510
5511 {
5512 PSAutoLock lock(gPSMutex);
5513
5514 if (!ActivePS::Exists(lock)) {
5515 return;
5516 }
5517
5518 ActivePS::Buffer(lock).AddEntry(
5519 ProfileBufferEntry::ResumeSampling(profiler_time()));
5520 ActivePS::SetIsSamplingPaused(lock, false);
5521 RacyFeatures::SetSamplingUnpaused();
5522
5523 #if defined(GP_OS_android)
5524 if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) {
5525 // Not paused anymore, so this is the last unpause, let Java know.
5526 // TODO: Distinguish Unpause and UnpauseSampling in Java.
5527 java::GeckoJavaSampler::UnpauseSampling();
5528 }
5529 #endif
5530 }
5531
5532 // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
5533 ProfilerParent::ProfilerResumedSampling();
5534 NotifyObservers("profiler-resumed-sampling");
5535 }
5536
profiler_feature_active(uint32_t aFeature)5537 bool profiler_feature_active(uint32_t aFeature) {
5538 // This function runs both on and off the main thread.
5539
5540 MOZ_RELEASE_ASSERT(CorePS::Exists());
5541
5542 // This function is hot enough that we use RacyFeatures, not ActivePS.
5543 return RacyFeatures::IsActiveWithFeature(aFeature);
5544 }
5545
profiler_write_active_configuration(JSONWriter & aWriter)5546 void profiler_write_active_configuration(JSONWriter& aWriter) {
5547 MOZ_RELEASE_ASSERT(CorePS::Exists());
5548 PSAutoLock lock(gPSMutex);
5549 ActivePS::WriteActiveConfiguration(lock, aWriter);
5550 }
5551
profiler_add_sampled_counter(BaseProfilerCount * aCounter)5552 void profiler_add_sampled_counter(BaseProfilerCount* aCounter) {
5553 DEBUG_LOG("profiler_add_sampled_counter(%s)", aCounter->mLabel);
5554 PSAutoLock lock(gPSMutex);
5555 CorePS::AppendCounter(lock, aCounter);
5556 }
5557
profiler_remove_sampled_counter(BaseProfilerCount * aCounter)5558 void profiler_remove_sampled_counter(BaseProfilerCount* aCounter) {
5559 DEBUG_LOG("profiler_remove_sampled_counter(%s)", aCounter->mLabel);
5560 PSAutoLock lock(gPSMutex);
5561 // Note: we don't enforce a final sample, though we could do so if the
5562 // profiler was active
5563 CorePS::RemoveCounter(lock, aCounter);
5564 }
5565
profiler_register_thread(const char * aName,void * aGuessStackTop)5566 ProfilingStack* profiler_register_thread(const char* aName,
5567 void* aGuessStackTop) {
5568 DEBUG_LOG("profiler_register_thread(%s)", aName);
5569
5570 MOZ_RELEASE_ASSERT(CorePS::Exists());
5571
5572 // Make sure we have a nsThread wrapper for the current thread, and that NSPR
5573 // knows its name.
5574 (void)NS_GetCurrentThread();
5575 NS_SetCurrentThreadName(aName);
5576
5577 if (!TLSRegisteredThread::IsTLSInited()) {
5578 return nullptr;
5579 }
5580
5581 PSAutoLock lock(gPSMutex);
5582
5583 if (RegisteredThread* thread = TLSRegisteredThread::RegisteredThread(lock)) {
5584 MOZ_RELEASE_ASSERT(IsRegisteredThreadInRegisteredThreadsList(lock, thread),
5585 "Thread being re-registered is not in registered thread "
5586 "list even though its TLS is non-null");
5587 MOZ_RELEASE_ASSERT(
5588 thread->Info()->ThreadId() == profiler_current_thread_id(),
5589 "Thread being re-registered has changed its TID");
5590 LOG("profiler_register_thread(%s) - thread %d already registered as %s",
5591 aName, profiler_current_thread_id(), thread->Info()->Name());
5592 // TODO: Use new name. This is currently not possible because the
5593 // RegisteredThread's ThreadInfo cannot be changed.
5594 // In the meantime, we record a marker that could be used in the frontend.
5595 nsCString text("Thread ");
5596 text.AppendInt(profiler_current_thread_id());
5597 text.AppendLiteral(" \"");
5598 text.AppendASCII(thread->Info()->Name());
5599 text.AppendLiteral("\" attempted to re-register as \"");
5600 text.AppendASCII(aName);
5601 text.AppendLiteral("\"");
5602 PROFILER_MARKER_TEXT("profiler_register_thread again", OTHER_Profiling,
5603 MarkerThreadId::MainThread(), text);
5604
5605 return &thread->RacyRegisteredThread().ProfilingStack();
5606 }
5607
5608 void* stackTop = GetStackTop(aGuessStackTop);
5609 return locked_register_thread(lock, aName, stackTop);
5610 }
5611
profiler_unregister_thread()5612 void profiler_unregister_thread() {
5613 PSAutoLock lock(gPSMutex);
5614
5615 if (!TLSRegisteredThread::IsTLSInited()) {
5616 return;
5617 }
5618
5619 if (!CorePS::Exists()) {
5620 // This function can be called after the main thread has already shut down.
5621 // We want to reset the AutoProfilerLabel's ProfilingStack pointer (if
5622 // needed), because a thread could stay registered after the profiler has
5623 // shut down.
5624 TLSRegisteredThread::ResetAutoProfilerLabelProfilingStack(lock);
5625 return;
5626 }
5627
5628 // We don't call RegisteredThread::StopJSSampling() here; there's no point
5629 // doing that for a JS thread that is in the process of disappearing.
5630
5631 if (RegisteredThread* registeredThread =
5632 TLSRegisteredThread::RegisteredThread(lock)) {
5633 MOZ_RELEASE_ASSERT(
5634 IsRegisteredThreadInRegisteredThreadsList(lock, registeredThread),
5635 "Thread being unregistered is not in registered thread list even "
5636 "though its TLS is non-null");
5637 MOZ_RELEASE_ASSERT(
5638 registeredThread->Info()->ThreadId() == profiler_current_thread_id(),
5639 "Thread being unregistered has changed its TID");
5640 RefPtr<ThreadInfo> info = registeredThread->Info();
5641
5642 DEBUG_LOG("profiler_unregister_thread: %s", info->Name());
5643
5644 if (ActivePS::Exists(lock)) {
5645 ActivePS::UnregisterThread(lock, registeredThread);
5646 }
5647
5648 // Clear the pointer to the RegisteredThread object that we're about to
5649 // destroy, as well as the AutoProfilerLabel's ProfilingStack because the
5650 // thread is unregistering itself and won't need the ProfilingStack anymore.
5651 TLSRegisteredThread::ResetRegisteredThread(lock);
5652 TLSRegisteredThread::ResetAutoProfilerLabelProfilingStack(lock);
5653
5654 // Remove the thread from the list of registered threads. This deletes the
5655 // registeredThread object.
5656 CorePS::RemoveRegisteredThread(lock, registeredThread);
5657
5658 MOZ_RELEASE_ASSERT(
5659 !IsRegisteredThreadInRegisteredThreadsList(lock, registeredThread),
5660 "After unregistering, thread should no longer be in the registered "
5661 "thread list");
5662 MOZ_RELEASE_ASSERT(
5663 !TLSRegisteredThread::RegisteredThread(lock),
5664 "TLS should have been reset after un-registering thread");
5665 } else {
5666 // There are two ways TLSRegisteredThread::RegisteredThread() might be
5667 // empty.
5668 //
5669 // - TLSRegisteredThread::Init() failed in locked_register_thread().
5670 //
5671 // - We've already called profiler_unregister_thread() for this thread.
5672 // (Whether or not it should, this does happen in practice.)
5673 LOG("profiler_unregister_thread() - thread %d already unregistered",
5674 profiler_current_thread_id());
5675 // We cannot record a marker on this thread because it was already
5676 // unregistered. Send it to the main thread (unless this *is* already the
5677 // main thread, which has been unregistered); this may be useful to catch
5678 // mismatched register/unregister pairs in Firefox.
5679 if (int tid = profiler_current_thread_id();
5680 tid != profiler_main_thread_id()) {
5681 nsCString threadIdString;
5682 threadIdString.AppendInt(tid);
5683 PROFILER_MARKER_TEXT("profiler_unregister_thread again", OTHER_Profiling,
5684 MarkerThreadId::MainThread(), threadIdString);
5685 }
5686 }
5687 }
5688
profiler_register_page(uint64_t aTabID,uint64_t aInnerWindowID,const nsCString & aUrl,uint64_t aEmbedderInnerWindowID)5689 void profiler_register_page(uint64_t aTabID, uint64_t aInnerWindowID,
5690 const nsCString& aUrl,
5691 uint64_t aEmbedderInnerWindowID) {
5692 DEBUG_LOG("profiler_register_page(%" PRIu64 ", %" PRIu64 ", %s, %" PRIu64 ")",
5693 aTabID, aInnerWindowID, aUrl.get(), aEmbedderInnerWindowID);
5694
5695 MOZ_RELEASE_ASSERT(CorePS::Exists());
5696
5697 PSAutoLock lock(gPSMutex);
5698
5699 // When a Browsing context is first loaded, the first url loaded in it will be
5700 // about:blank. Because of that, this call keeps the first non-about:blank
5701 // registration of window and discards the previous one.
5702 RefPtr<PageInformation> pageInfo =
5703 new PageInformation(aTabID, aInnerWindowID, aUrl, aEmbedderInnerWindowID);
5704 CorePS::AppendRegisteredPage(lock, std::move(pageInfo));
5705
5706 // After appending the given page to CorePS, look for the expired
5707 // pages and remove them if there are any.
5708 if (ActivePS::Exists(lock)) {
5709 ActivePS::DiscardExpiredPages(lock);
5710 }
5711 }
5712
profiler_unregister_page(uint64_t aRegisteredInnerWindowID)5713 void profiler_unregister_page(uint64_t aRegisteredInnerWindowID) {
5714 PSAutoLock lock(gPSMutex);
5715
5716 if (!CorePS::Exists()) {
5717 // This function can be called after the main thread has already shut down.
5718 return;
5719 }
5720
5721 // During unregistration, if the profiler is active, we have to keep the
5722 // page information since there may be some markers associated with the given
5723 // page. But if profiler is not active. we have no reason to keep the
5724 // page information here because there can't be any marker associated with it.
5725 if (ActivePS::Exists(lock)) {
5726 ActivePS::UnregisterPage(lock, aRegisteredInnerWindowID);
5727 } else {
5728 CorePS::RemoveRegisteredPage(lock, aRegisteredInnerWindowID);
5729 }
5730 }
5731
profiler_clear_all_pages()5732 void profiler_clear_all_pages() {
5733 {
5734 PSAutoLock lock(gPSMutex);
5735
5736 if (!CorePS::Exists()) {
5737 // This function can be called after the main thread has already shut
5738 // down.
5739 return;
5740 }
5741
5742 CorePS::ClearRegisteredPages(lock);
5743 if (ActivePS::Exists(lock)) {
5744 ActivePS::ClearUnregisteredPages(lock);
5745 }
5746 }
5747
5748 // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
5749 ProfilerParent::ClearAllPages();
5750 }
5751
5752 namespace geckoprofiler::markers::detail {
5753
profiler_get_inner_window_id_from_docshell(nsIDocShell * aDocshell)5754 Maybe<uint64_t> profiler_get_inner_window_id_from_docshell(
5755 nsIDocShell* aDocshell) {
5756 Maybe<uint64_t> innerWindowID = Nothing();
5757 if (aDocshell) {
5758 auto outerWindow = aDocshell->GetWindow();
5759 if (outerWindow) {
5760 auto innerWindow = outerWindow->GetCurrentInnerWindow();
5761 if (innerWindow) {
5762 innerWindowID = Some(innerWindow->WindowID());
5763 }
5764 }
5765 }
5766 return innerWindowID;
5767 }
5768
5769 } // namespace geckoprofiler::markers::detail
5770
profiler_thread_sleep()5771 void profiler_thread_sleep() {
5772 // This function runs both on and off the main thread.
5773
5774 MOZ_RELEASE_ASSERT(CorePS::Exists());
5775
5776 RacyRegisteredThread* racyRegisteredThread =
5777 TLSRegisteredThread::RacyRegisteredThread();
5778 if (!racyRegisteredThread) {
5779 return;
5780 }
5781
5782 racyRegisteredThread->SetSleeping();
5783 }
5784
profiler_thread_wake()5785 void profiler_thread_wake() {
5786 // This function runs both on and off the main thread.
5787
5788 MOZ_RELEASE_ASSERT(CorePS::Exists());
5789
5790 RacyRegisteredThread* racyRegisteredThread =
5791 TLSRegisteredThread::RacyRegisteredThread();
5792 if (!racyRegisteredThread) {
5793 return;
5794 }
5795
5796 racyRegisteredThread->SetAwake();
5797 }
5798
IsThreadBeingProfiled()5799 bool mozilla::profiler::detail::IsThreadBeingProfiled() {
5800 MOZ_RELEASE_ASSERT(CorePS::Exists());
5801
5802 const RacyRegisteredThread* racyRegisteredThread =
5803 TLSRegisteredThread::RacyRegisteredThread();
5804 return racyRegisteredThread && racyRegisteredThread->IsBeingProfiled();
5805 }
5806
IsThreadRegistered()5807 bool mozilla::profiler::detail::IsThreadRegistered() {
5808 MOZ_RELEASE_ASSERT(CorePS::Exists());
5809
5810 const RacyRegisteredThread* racyRegisteredThread =
5811 TLSRegisteredThread::RacyRegisteredThread();
5812 // The simple presence of this TLS pointer is proof that the thread is
5813 // registered.
5814 return !!racyRegisteredThread;
5815 }
5816
profiler_thread_is_sleeping()5817 bool profiler_thread_is_sleeping() {
5818 MOZ_RELEASE_ASSERT(NS_IsMainThread());
5819 MOZ_RELEASE_ASSERT(CorePS::Exists());
5820
5821 RacyRegisteredThread* racyRegisteredThread =
5822 TLSRegisteredThread::RacyRegisteredThread();
5823 if (!racyRegisteredThread) {
5824 return false;
5825 }
5826 return racyRegisteredThread->IsSleeping();
5827 }
5828
profiler_js_interrupt_callback()5829 void profiler_js_interrupt_callback() {
5830 // This function runs on JS threads being sampled.
5831 PollJSSamplingForCurrentThread();
5832 }
5833
profiler_time()5834 double profiler_time() {
5835 MOZ_RELEASE_ASSERT(CorePS::Exists());
5836
5837 TimeDuration delta = TimeStamp::NowUnfuzzed() - CorePS::ProcessStartTime();
5838 return delta.ToMilliseconds();
5839 }
5840
profiler_capture_backtrace_into(ProfileChunkedBuffer & aChunkedBuffer,StackCaptureOptions aCaptureOptions)5841 bool profiler_capture_backtrace_into(ProfileChunkedBuffer& aChunkedBuffer,
5842 StackCaptureOptions aCaptureOptions) {
5843 MOZ_RELEASE_ASSERT(CorePS::Exists());
5844
5845 PSAutoLock lock(gPSMutex);
5846
5847 if (!ActivePS::Exists(lock) ||
5848 aCaptureOptions == StackCaptureOptions::NoStack) {
5849 return false;
5850 }
5851
5852 RegisteredThread* registeredThread =
5853 TLSRegisteredThread::RegisteredThread(lock);
5854 if (!registeredThread) {
5855 // If this was called from a non-registered thread, return false and do no
5856 // more work. This can happen from a memory hook. Before the allocation
5857 // tracking there was a MOZ_ASSERT() here checking for the existence of a
5858 // registeredThread.
5859 return false;
5860 }
5861
5862 ProfileBuffer profileBuffer(aChunkedBuffer);
5863
5864 Registers regs;
5865 #if defined(HAVE_NATIVE_UNWIND)
5866 regs.SyncPopulate();
5867 #else
5868 regs.Clear();
5869 #endif
5870
5871 DoSyncSample(lock, *registeredThread, TimeStamp::NowUnfuzzed(), regs,
5872 profileBuffer, aCaptureOptions);
5873
5874 return true;
5875 }
5876
profiler_capture_backtrace()5877 UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace() {
5878 MOZ_RELEASE_ASSERT(CorePS::Exists());
5879 AUTO_PROFILER_LABEL("profiler_capture_backtrace", PROFILER);
5880
5881 // Quick is-active check before allocating a buffer.
5882 if (!profiler_is_active()) {
5883 return nullptr;
5884 }
5885
5886 auto buffer = MakeUnique<ProfileChunkedBuffer>(
5887 ProfileChunkedBuffer::ThreadSafety::WithoutMutex,
5888 MakeUnique<ProfileBufferChunkManagerSingle>(
5889 ProfileBufferChunkManager::scExpectedMaximumStackSize));
5890
5891 if (!profiler_capture_backtrace_into(*buffer, StackCaptureOptions::Full)) {
5892 return nullptr;
5893 }
5894
5895 return buffer;
5896 }
5897
profiler_get_backtrace()5898 UniqueProfilerBacktrace profiler_get_backtrace() {
5899 UniquePtr<ProfileChunkedBuffer> buffer = profiler_capture_backtrace();
5900
5901 if (!buffer) {
5902 return nullptr;
5903 }
5904
5905 return UniqueProfilerBacktrace(
5906 new ProfilerBacktrace("SyncProfile", std::move(buffer)));
5907 }
5908
operator ()(ProfilerBacktrace * aBacktrace)5909 void ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace) {
5910 delete aBacktrace;
5911 }
5912
5913 // This is a simplified version of profiler_add_marker that can be easily passed
5914 // into the JS engine.
profiler_add_js_marker(const char * aMarkerName,const char * aMarkerText)5915 void profiler_add_js_marker(const char* aMarkerName, const char* aMarkerText) {
5916 PROFILER_MARKER_TEXT(
5917 ProfilerString8View::WrapNullTerminatedString(aMarkerName), JS, {},
5918 ProfilerString8View::WrapNullTerminatedString(aMarkerText));
5919 }
5920
profiler_add_js_allocation_marker(JS::RecordAllocationInfo && info)5921 void profiler_add_js_allocation_marker(JS::RecordAllocationInfo&& info) {
5922 if (!profiler_can_accept_markers()) {
5923 return;
5924 }
5925
5926 struct JsAllocationMarker {
5927 static constexpr mozilla::Span<const char> MarkerTypeName() {
5928 return mozilla::MakeStringSpan("JS allocation");
5929 }
5930 static void StreamJSONMarkerData(
5931 mozilla::baseprofiler::SpliceableJSONWriter& aWriter,
5932 const mozilla::ProfilerString16View& aTypeName,
5933 const mozilla::ProfilerString8View& aClassName,
5934 const mozilla::ProfilerString16View& aDescriptiveTypeName,
5935 const mozilla::ProfilerString8View& aCoarseType, uint64_t aSize,
5936 bool aInNursery) {
5937 if (aClassName.Length() != 0) {
5938 aWriter.StringProperty("className", aClassName);
5939 }
5940 if (aTypeName.Length() != 0) {
5941 aWriter.StringProperty(
5942 "typeName",
5943 NS_ConvertUTF16toUTF8(aTypeName.Data(), aTypeName.Length()));
5944 }
5945 if (aDescriptiveTypeName.Length() != 0) {
5946 aWriter.StringProperty(
5947 "descriptiveTypeName",
5948 NS_ConvertUTF16toUTF8(aDescriptiveTypeName.Data(),
5949 aDescriptiveTypeName.Length()));
5950 }
5951 aWriter.StringProperty("coarseType", aCoarseType);
5952 aWriter.IntProperty("size", aSize);
5953 aWriter.BoolProperty("inNursery", aInNursery);
5954 }
5955 static mozilla::MarkerSchema MarkerTypeDisplay() {
5956 return mozilla::MarkerSchema::SpecialFrontendLocation{};
5957 }
5958 };
5959
5960 profiler_add_marker(
5961 "JS allocation", geckoprofiler::category::JS, MarkerStack::Capture(),
5962 JsAllocationMarker{},
5963 ProfilerString16View::WrapNullTerminatedString(info.typeName),
5964 ProfilerString8View::WrapNullTerminatedString(info.className),
5965 ProfilerString16View::WrapNullTerminatedString(info.descriptiveTypeName),
5966 ProfilerString8View::WrapNullTerminatedString(info.coarseType), info.size,
5967 info.inNursery);
5968 }
5969
profiler_is_locked_on_current_thread()5970 bool profiler_is_locked_on_current_thread() {
5971 // This function is used to help users avoid calling `profiler_...` functions
5972 // when the profiler may already have a lock in place, which would prevent a
5973 // 2nd recursive lock (resulting in a crash or a never-ending wait), or a
5974 // deadlock between any two mutexes. So we must return `true` for any of:
5975 // - The main profiler mutex, used by most functions, and/or
5976 // - The buffer mutex, used directly in some functions without locking the
5977 // main mutex, e.g., marker-related functions.
5978 // - The ProfilerParent or ProfilerChild mutex, used to store and process
5979 // buffer chunk updates.
5980 return gPSMutex.IsLockedOnCurrentThread() ||
5981 CorePS::CoreBuffer().IsThreadSafeAndLockedOnCurrentThread() ||
5982 ProfilerParent::IsLockedOnCurrentThread() ||
5983 ProfilerChild::IsLockedOnCurrentThread();
5984 }
5985
5986 static constexpr net::TimingStruct scEmptyNetTimingStruct;
5987
profiler_add_network_marker(nsIURI * aURI,const nsACString & aRequestMethod,int32_t aPriority,uint64_t aChannelId,NetworkLoadType aType,mozilla::TimeStamp aStart,mozilla::TimeStamp aEnd,int64_t aCount,mozilla::net::CacheDisposition aCacheDisposition,uint64_t aInnerWindowID,const mozilla::net::TimingStruct * aTimings,UniquePtr<ProfileChunkedBuffer> aSource,const Maybe<nsDependentCString> & aContentType,nsIURI * aRedirectURI,uint32_t aRedirectFlags,uint64_t aRedirectChannelId)5988 void profiler_add_network_marker(
5989 nsIURI* aURI, const nsACString& aRequestMethod, int32_t aPriority,
5990 uint64_t aChannelId, NetworkLoadType aType, mozilla::TimeStamp aStart,
5991 mozilla::TimeStamp aEnd, int64_t aCount,
5992 mozilla::net::CacheDisposition aCacheDisposition, uint64_t aInnerWindowID,
5993 const mozilla::net::TimingStruct* aTimings,
5994 UniquePtr<ProfileChunkedBuffer> aSource,
5995 const Maybe<nsDependentCString>& aContentType, nsIURI* aRedirectURI,
5996 uint32_t aRedirectFlags, uint64_t aRedirectChannelId) {
5997 if (!profiler_can_accept_markers()) {
5998 return;
5999 }
6000
6001 nsAutoCStringN<2048> name;
6002 name.AppendASCII("Load ");
6003 // top 32 bits are process id of the load
6004 name.AppendInt(aChannelId & 0xFFFFFFFFu);
6005
6006 // These can do allocations/frees/etc; avoid if not active
6007 nsAutoCStringN<2048> spec;
6008 if (aURI) {
6009 aURI->GetAsciiSpec(spec);
6010 name.AppendASCII(": ");
6011 name.Append(spec);
6012 }
6013
6014 nsAutoCString redirect_spec;
6015 if (aRedirectURI) {
6016 aRedirectURI->GetAsciiSpec(redirect_spec);
6017 }
6018
6019 struct NetworkMarker {
6020 static constexpr Span<const char> MarkerTypeName() {
6021 return MakeStringSpan("Network");
6022 }
6023 static void StreamJSONMarkerData(
6024 baseprofiler::SpliceableJSONWriter& aWriter, mozilla::TimeStamp aStart,
6025 mozilla::TimeStamp aEnd, int64_t aID, const ProfilerString8View& aURI,
6026 const ProfilerString8View& aRequestMethod, NetworkLoadType aType,
6027 int32_t aPri, int64_t aCount, net::CacheDisposition aCacheDisposition,
6028 const net::TimingStruct& aTimings,
6029 const ProfilerString8View& aRedirectURI,
6030 const ProfilerString8View& aContentType, uint32_t aRedirectFlags,
6031 int64_t aRedirectChannelId) {
6032 // This payload still streams a startTime and endTime property because it
6033 // made the migration to MarkerTiming on the front-end easier.
6034 aWriter.TimeProperty("startTime", aStart);
6035 aWriter.TimeProperty("endTime", aEnd);
6036
6037 aWriter.IntProperty("id", aID);
6038 aWriter.StringProperty("status", GetNetworkState(aType));
6039 if (Span<const char> cacheString = GetCacheState(aCacheDisposition);
6040 !cacheString.IsEmpty()) {
6041 aWriter.StringProperty("cache", cacheString);
6042 }
6043 aWriter.IntProperty("pri", aPri);
6044 if (aCount > 0) {
6045 aWriter.IntProperty("count", aCount);
6046 }
6047 if (aURI.Length() != 0) {
6048 aWriter.StringProperty("URI", aURI);
6049 }
6050 if (aRedirectURI.Length() != 0) {
6051 aWriter.StringProperty("RedirectURI", aRedirectURI);
6052 aWriter.StringProperty("redirectType", getRedirectType(aRedirectFlags));
6053 aWriter.BoolProperty(
6054 "isHttpToHttpsRedirect",
6055 aRedirectFlags & nsIChannelEventSink::REDIRECT_STS_UPGRADE);
6056
6057 MOZ_ASSERT(
6058 aRedirectChannelId != 0,
6059 "aRedirectChannelId should be non-zero for a redirected request");
6060 aWriter.IntProperty("redirectId", aRedirectChannelId);
6061 }
6062
6063 aWriter.StringProperty("requestMethod", aRequestMethod);
6064
6065 if (aContentType.Length() != 0) {
6066 aWriter.StringProperty("contentType", aContentType);
6067 } else {
6068 aWriter.NullProperty("contentType");
6069 }
6070
6071 if (aType != NetworkLoadType::LOAD_START) {
6072 aWriter.TimeProperty("domainLookupStart", aTimings.domainLookupStart);
6073 aWriter.TimeProperty("domainLookupEnd", aTimings.domainLookupEnd);
6074 aWriter.TimeProperty("connectStart", aTimings.connectStart);
6075 aWriter.TimeProperty("tcpConnectEnd", aTimings.tcpConnectEnd);
6076 aWriter.TimeProperty("secureConnectionStart",
6077 aTimings.secureConnectionStart);
6078 aWriter.TimeProperty("connectEnd", aTimings.connectEnd);
6079 aWriter.TimeProperty("requestStart", aTimings.requestStart);
6080 aWriter.TimeProperty("responseStart", aTimings.responseStart);
6081 aWriter.TimeProperty("responseEnd", aTimings.responseEnd);
6082 }
6083 }
6084 static MarkerSchema MarkerTypeDisplay() {
6085 return MarkerSchema::SpecialFrontendLocation{};
6086 }
6087
6088 private:
6089 static Span<const char> GetNetworkState(NetworkLoadType aType) {
6090 switch (aType) {
6091 case NetworkLoadType::LOAD_START:
6092 return MakeStringSpan("STATUS_START");
6093 case NetworkLoadType::LOAD_STOP:
6094 return MakeStringSpan("STATUS_STOP");
6095 case NetworkLoadType::LOAD_REDIRECT:
6096 return MakeStringSpan("STATUS_REDIRECT");
6097 default:
6098 MOZ_ASSERT(false, "Unexpected NetworkLoadType enum value.");
6099 return MakeStringSpan("");
6100 }
6101 }
6102
6103 static Span<const char> GetCacheState(
6104 net::CacheDisposition aCacheDisposition) {
6105 switch (aCacheDisposition) {
6106 case net::kCacheUnresolved:
6107 return MakeStringSpan("Unresolved");
6108 case net::kCacheHit:
6109 return MakeStringSpan("Hit");
6110 case net::kCacheHitViaReval:
6111 return MakeStringSpan("HitViaReval");
6112 case net::kCacheMissedViaReval:
6113 return MakeStringSpan("MissedViaReval");
6114 case net::kCacheMissed:
6115 return MakeStringSpan("Missed");
6116 case net::kCacheUnknown:
6117 return MakeStringSpan("");
6118 default:
6119 MOZ_ASSERT(false, "Unexpected CacheDisposition enum value.");
6120 return MakeStringSpan("");
6121 }
6122 }
6123
6124 static Span<const char> getRedirectType(uint32_t aRedirectFlags) {
6125 MOZ_ASSERT(aRedirectFlags != 0, "aRedirectFlags should be non-zero");
6126 if (aRedirectFlags & nsIChannelEventSink::REDIRECT_TEMPORARY) {
6127 return MakeStringSpan("Temporary");
6128 }
6129 if (aRedirectFlags & nsIChannelEventSink::REDIRECT_PERMANENT) {
6130 return MakeStringSpan("Permanent");
6131 }
6132 if (aRedirectFlags & nsIChannelEventSink::REDIRECT_INTERNAL) {
6133 return MakeStringSpan("Internal");
6134 }
6135 MOZ_ASSERT(false, "Couldn't find a redirect type from aRedirectFlags");
6136 return MakeStringSpan("");
6137 }
6138 };
6139
6140 profiler_add_marker(
6141 name, geckoprofiler::category::NETWORK,
6142 {MarkerTiming::Interval(aStart, aEnd),
6143 MarkerStack::TakeBacktrace(std::move(aSource)),
6144 MarkerInnerWindowId(aInnerWindowID)},
6145 NetworkMarker{}, aStart, aEnd, static_cast<int64_t>(aChannelId), spec,
6146 aRequestMethod, aType, aPriority, aCount, aCacheDisposition,
6147 aTimings ? *aTimings : scEmptyNetTimingStruct, redirect_spec,
6148 aContentType ? ProfilerString8View(*aContentType) : ProfilerString8View(),
6149 aRedirectFlags, aRedirectChannelId);
6150 }
6151
profiler_add_native_allocation_marker(int64_t aSize,uintptr_t aMemoryAddress)6152 bool profiler_add_native_allocation_marker(int64_t aSize,
6153 uintptr_t aMemoryAddress) {
6154 if (!profiler_can_accept_markers()) {
6155 return false;
6156 }
6157
6158 // Because native allocations may be intercepted anywhere, blocking while
6159 // locking the profiler mutex here could end up causing a deadlock if another
6160 // mutex is taken, which the profiler may indirectly need elsewhere.
6161 // See bug 1642726 for such a scenario.
6162 // So instead we bail out if the mutex is already locked. Native allocations
6163 // are statistically sampled anyway, so missing a few because of this is
6164 // acceptable.
6165 if (gPSMutex.IsLockedOnCurrentThread()) {
6166 return false;
6167 }
6168
6169 struct NativeAllocationMarker {
6170 static constexpr mozilla::Span<const char> MarkerTypeName() {
6171 return mozilla::MakeStringSpan("Native allocation");
6172 }
6173 static void StreamJSONMarkerData(
6174 mozilla::baseprofiler::SpliceableJSONWriter& aWriter, int64_t aSize,
6175 uintptr_t aMemoryAddress, int aThreadId) {
6176 aWriter.IntProperty("size", aSize);
6177 aWriter.IntProperty("memoryAddress",
6178 static_cast<int64_t>(aMemoryAddress));
6179 aWriter.IntProperty("threadId", aThreadId);
6180 }
6181 static mozilla::MarkerSchema MarkerTypeDisplay() {
6182 return mozilla::MarkerSchema::SpecialFrontendLocation{};
6183 }
6184 };
6185
6186 profiler_add_marker("Native allocation", geckoprofiler::category::OTHER,
6187 {MarkerThreadId::MainThread(), MarkerStack::Capture()},
6188 NativeAllocationMarker{}, aSize, aMemoryAddress,
6189 profiler_current_thread_id());
6190 return true;
6191 }
6192
profiler_set_js_context(JSContext * aCx)6193 void profiler_set_js_context(JSContext* aCx) {
6194 MOZ_ASSERT(aCx);
6195
6196 PSAutoLock lock(gPSMutex);
6197
6198 RegisteredThread* registeredThread =
6199 TLSRegisteredThread::RegisteredThread(lock);
6200 if (!registeredThread) {
6201 return;
6202 }
6203
6204 registeredThread->SetJSContext(aCx);
6205
6206 // This call is on-thread, so we can call PollJSSampling() to start JS
6207 // sampling immediately.
6208 registeredThread->PollJSSampling();
6209
6210 if (ActivePS::Exists(lock)) {
6211 ProfiledThreadData* profiledThreadData =
6212 ActivePS::GetProfiledThreadData(lock, registeredThread);
6213 if (profiledThreadData) {
6214 profiledThreadData->NotifyReceivedJSContext(
6215 ActivePS::Buffer(lock).BufferRangeEnd());
6216 }
6217 }
6218 }
6219
profiler_clear_js_context()6220 void profiler_clear_js_context() {
6221 MOZ_RELEASE_ASSERT(CorePS::Exists());
6222
6223 PSAutoLock lock(gPSMutex);
6224
6225 RegisteredThread* registeredThread =
6226 TLSRegisteredThread::RegisteredThread(lock);
6227 if (!registeredThread) {
6228 return;
6229 }
6230
6231 JSContext* cx = registeredThread->GetJSContext();
6232 if (!cx) {
6233 return;
6234 }
6235
6236 if (ActivePS::Exists(lock) && ActivePS::FeatureJS(lock)) {
6237 ProfiledThreadData* profiledThreadData =
6238 ActivePS::GetProfiledThreadData(lock, registeredThread);
6239 if (profiledThreadData) {
6240 profiledThreadData->NotifyAboutToLoseJSContext(
6241 cx, CorePS::ProcessStartTime(), ActivePS::Buffer(lock));
6242
6243 // Notify the JS context that profiling for this context has stopped.
6244 // Do this by calling StopJSSampling and PollJSSampling before
6245 // nulling out the JSContext.
6246 registeredThread->StopJSSampling();
6247 registeredThread->PollJSSampling();
6248
6249 registeredThread->ClearJSContext();
6250
6251 // Tell the thread that we'd like to have JS sampling on this
6252 // thread again, once it gets a new JSContext (if ever).
6253 registeredThread->StartJSSampling(ActivePS::JSFlags(lock));
6254 return;
6255 }
6256 }
6257
6258 registeredThread->ClearJSContext();
6259 }
6260
6261 // NOTE: aCollector's methods will be called while the target thread is paused.
6262 // Doing things in those methods like allocating -- which may try to claim
6263 // locks -- is a surefire way to deadlock.
profiler_suspend_and_sample_thread(int aThreadId,uint32_t aFeatures,ProfilerStackCollector & aCollector,bool aSampleNative)6264 void profiler_suspend_and_sample_thread(int aThreadId, uint32_t aFeatures,
6265 ProfilerStackCollector& aCollector,
6266 bool aSampleNative /* = true */) {
6267 const bool isSynchronous = [&aThreadId]() {
6268 const int currentThreadId = profiler_current_thread_id();
6269 if (aThreadId == 0) {
6270 aThreadId = currentThreadId;
6271 return true;
6272 }
6273 return aThreadId == currentThreadId;
6274 }();
6275
6276 // Lock the profiler mutex
6277 PSAutoLock lock(gPSMutex);
6278
6279 const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
6280 CorePS::RegisteredThreads(lock);
6281 for (auto& thread : registeredThreads) {
6282 RefPtr<ThreadInfo> info = thread->Info();
6283 RegisteredThread& registeredThread = *thread.get();
6284
6285 if (info->ThreadId() == aThreadId) {
6286 if (info->IsMainThread()) {
6287 aCollector.SetIsMainThread();
6288 }
6289
6290 // Allocate the space for the native stack
6291 NativeStack nativeStack;
6292
6293 auto collectStack = [&](const Registers& aRegs, const TimeStamp& aNow) {
6294 // The target thread is now suspended. Collect a native backtrace,
6295 // and call the callback.
6296 JsFrameBuffer& jsFrames = CorePS::JsFrames(lock);
6297 StackWalkControl* stackWalkControlIfSupported = nullptr;
6298 #if defined(HAVE_FASTINIT_NATIVE_UNWIND)
6299 StackWalkControl stackWalkControl;
6300 if constexpr (StackWalkControl::scIsSupported) {
6301 if (aSampleNative) {
6302 stackWalkControlIfSupported = &stackWalkControl;
6303 }
6304 }
6305 #endif
6306 const uint32_t jsFramesCount =
6307 ExtractJsFrames(isSynchronous, registeredThread, aRegs, aCollector,
6308 jsFrames, stackWalkControlIfSupported);
6309
6310 #if defined(HAVE_FASTINIT_NATIVE_UNWIND)
6311 if (aSampleNative) {
6312 // We can only use FramePointerStackWalk or MozStackWalk from
6313 // suspend_and_sample_thread as other stackwalking methods may not be
6314 // initialized.
6315 # if defined(USE_FRAME_POINTER_STACK_WALK)
6316 DoFramePointerBacktrace(lock, registeredThread, aRegs, nativeStack,
6317 stackWalkControlIfSupported);
6318 # elif defined(USE_MOZ_STACK_WALK)
6319 DoMozStackWalkBacktrace(lock, registeredThread, aRegs, nativeStack,
6320 stackWalkControlIfSupported);
6321 # else
6322 # error "Invalid configuration"
6323 # endif
6324
6325 MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs,
6326 nativeStack, aCollector, jsFrames, jsFramesCount);
6327 } else
6328 #endif
6329 {
6330 MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs,
6331 nativeStack, aCollector, jsFrames, jsFramesCount);
6332
6333 if (ProfilerFeature::HasLeaf(aFeatures)) {
6334 aCollector.CollectNativeLeafAddr((void*)aRegs.mPC);
6335 }
6336 }
6337 };
6338
6339 if (isSynchronous) {
6340 // Sampling the current thread, do NOT suspend it!
6341 Registers regs;
6342 #if defined(HAVE_NATIVE_UNWIND)
6343 regs.SyncPopulate();
6344 #else
6345 regs.Clear();
6346 #endif
6347 collectStack(regs, TimeStamp::Now());
6348 } else {
6349 // Suspend, sample, and then resume the target thread.
6350 Sampler sampler(lock);
6351 TimeStamp now = TimeStamp::Now();
6352 sampler.SuspendAndSampleAndResumeThread(lock, registeredThread, now,
6353 collectStack);
6354
6355 // NOTE: Make sure to disable the sampler before it is destroyed, in
6356 // case the profiler is running at the same time.
6357 sampler.Disable(lock);
6358 }
6359 break;
6360 }
6361 }
6362 }
6363
6364 // END externally visible functions
6365 ////////////////////////////////////////////////////////////////////////
6366