1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 // There are three kinds of samples done by the profiler.
8 //
9 // - A "periodic" sample is the most complex kind. It is done in response to a
10 //   timer while the profiler is active. It involves writing a stack trace plus
11 //   a variety of other values (memory measurements, responsiveness
12 //   measurements, markers, etc.) into the main ProfileBuffer. The sampling is
13 //   done from off-thread, and so SuspendAndSampleAndResumeThread() is used to
14 //   get the register values.
15 //
16 // - A "synchronous" sample is a simpler kind. It is done in response to an API
17 //   call (profiler_get_backtrace()). It involves writing a stack trace and
18 //   little else into a temporary ProfileBuffer, and wrapping that up in a
19 //   ProfilerBacktrace that can be subsequently used in a marker. The sampling
20 //   is done on-thread, and so Registers::SyncPopulate() is used to get the
21 //   register values.
22 //
23 // - A "backtrace" sample is the simplest kind. It is done in response to an
24 //   API call (profiler_suspend_and_sample_thread()). It involves getting a
25 //   stack trace via a ProfilerStackCollector; it does not write to a
26 //   ProfileBuffer. The sampling is done from off-thread, and so uses
27 //   SuspendAndSampleAndResumeThread() to get the register values.
28 
29 #include "platform.h"
30 
31 #include "GeckoProfiler.h"
32 #include "GeckoProfilerReporter.h"
33 #include "PageInformation.h"
34 #include "ProfileBuffer.h"
35 #include "ProfiledThreadData.h"
36 #include "ProfilerBacktrace.h"
37 #include "ProfilerChild.h"
38 #include "ProfilerCodeAddressService.h"
39 #include "ProfilerIOInterposeObserver.h"
40 #include "ProfilerParent.h"
41 #include "RegisteredThread.h"
42 #include "shared-libraries.h"
43 #include "ThreadInfo.h"
44 #include "VTuneProfiler.h"
45 
46 #include "js/TraceLoggerAPI.h"
47 #include "js/ProfilingFrameIterator.h"
48 #include "memory_hooks.h"
49 #include "mozilla/ArrayUtils.h"
50 #include "mozilla/Atomics.h"
51 #include "mozilla/AutoProfilerLabel.h"
52 #include "mozilla/ExtensionPolicyService.h"
53 #include "mozilla/extensions/WebExtensionPolicy.h"
54 #include "mozilla/net/HttpBaseChannel.h"  // for net::TimingStruct
55 #include "mozilla/Printf.h"
56 #include "mozilla/ProfileBufferChunkManagerSingle.h"
57 #include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h"
58 #include "mozilla/ProfileChunkedBuffer.h"
59 #include "mozilla/SchedulerGroup.h"
60 #include "mozilla/Services.h"
61 #include "mozilla/StackWalk.h"
62 #ifdef XP_WIN
63 #  include "mozilla/StackWalkThread.h"
64 #endif
65 #include "mozilla/StaticPtr.h"
66 #include "mozilla/ThreadLocal.h"
67 #include "mozilla/TimeStamp.h"
68 #include "mozilla/Tuple.h"
69 #include "mozilla/UniquePtr.h"
70 #include "mozilla/Vector.h"
71 #include "BaseProfiler.h"
72 #include "nsDirectoryServiceDefs.h"
73 #include "nsDirectoryServiceUtils.h"
74 #include "nsIChannelEventSink.h"
75 #include "nsIDocShell.h"
76 #include "nsIHttpProtocolHandler.h"
77 #include "nsIObserverService.h"
78 #include "nsIPropertyBag2.h"
79 #include "nsIXULAppInfo.h"
80 #include "nsIXULRuntime.h"
81 #include "nsJSPrincipals.h"
82 #include "nsMemoryReporterManager.h"
83 #include "nsProfilerStartParams.h"
84 #include "nsScriptSecurityManager.h"
85 #include "nsSystemInfo.h"
86 #include "nsThreadUtils.h"
87 #include "nsXULAppAPI.h"
88 #include "Tracing.h"
89 #include "prdtoa.h"
90 #include "prtime.h"
91 
92 #include <algorithm>
93 #include <errno.h>
94 #include <fstream>
95 #include <ostream>
96 #include <set>
97 #include <sstream>
98 #include <type_traits>
99 
100 #if defined(GP_OS_android)
101 #  include "mozilla/java/GeckoJavaSamplerNatives.h"
102 #endif
103 
104 // Win32 builds always have frame pointers, so FramePointerStackWalk() always
105 // works.
106 #if defined(GP_PLAT_x86_windows)
107 #  define HAVE_NATIVE_UNWIND
108 #  define USE_FRAME_POINTER_STACK_WALK
109 #endif
110 
111 // Win64 builds always omit frame pointers, so we use the slower
112 // MozStackWalk(), which works in that case.
113 #if defined(GP_PLAT_amd64_windows)
114 #  define HAVE_NATIVE_UNWIND
115 #  define USE_MOZ_STACK_WALK
116 #endif
117 
118 // AArch64 Win64 doesn't seem to use frame pointers, so we use the slower
119 // MozStackWalk().
120 #if defined(GP_PLAT_arm64_windows)
121 #  define HAVE_NATIVE_UNWIND
122 #  define USE_MOZ_STACK_WALK
123 #endif
124 
125 // Mac builds only have frame pointers when MOZ_PROFILING is specified, so
126 // FramePointerStackWalk() only works in that case. We don't use MozStackWalk()
127 // on Mac.
128 #if defined(GP_OS_darwin) && defined(MOZ_PROFILING)
129 #  define HAVE_NATIVE_UNWIND
130 #  define USE_FRAME_POINTER_STACK_WALK
131 #endif
132 
133 // Android builds use the ARM Exception Handling ABI to unwind.
134 #if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
135 #  define HAVE_NATIVE_UNWIND
136 #  define USE_EHABI_STACKWALK
137 #  include "EHABIStackWalk.h"
138 #endif
139 
140 // Linux/BSD builds use LUL, which uses DWARF info to unwind stacks.
141 #if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||       \
142     defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) ||   \
143     defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) ||    \
144     defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \
145     defined(GP_PLAT_arm64_freebsd)
146 #  define HAVE_NATIVE_UNWIND
147 #  define USE_LUL_STACKWALK
148 #  include "lul/LulMain.h"
149 #  include "lul/platform-linux-lul.h"
150 
151 // On linux we use LUL for periodic samples and synchronous samples, but we use
152 // FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled.
153 // (See the comment at the top of the file for a definition of
154 // periodic/synchronous/backtrace.).
155 //
156 // FramePointerStackWalk can produce incomplete stacks when the current entry is
157 // in a shared library without framepointers, however LUL can take a long time
158 // to initialize, which is undesirable for consumers of
159 // profiler_suspend_and_sample_thread like the Background Hang Reporter.
160 #  if defined(MOZ_PROFILING)
161 #    define USE_FRAME_POINTER_STACK_WALK
162 #  endif
163 #endif
164 
165 // We can only stackwalk without expensive initialization on platforms which
166 // support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires
167 // initializing LUL, and EHABIStackWalk requires initializing EHABI, both of
168 // which can be expensive.
169 #if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
170 #  define HAVE_FASTINIT_NATIVE_UNWIND
171 #endif
172 
173 #ifdef MOZ_VALGRIND
174 #  include <valgrind/memcheck.h>
175 #else
176 #  define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((void)0)
177 #endif
178 
179 #if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
180 #  include <ucontext.h>
181 #endif
182 
183 using namespace mozilla;
184 using mozilla::profiler::detail::RacyFeatures;
185 
186 LazyLogModule gProfilerLog("prof");
187 
188 // Statically initialized to 0, then set once from profiler_init(), which should
189 // be called from the main thread before any other use of the profiler.
190 int scProfilerMainThreadId;
191 
192 #if defined(GP_OS_android)
193 class GeckoJavaSampler
194     : public java::GeckoJavaSampler::Natives<GeckoJavaSampler> {
195  private:
196   GeckoJavaSampler();
197 
198  public:
GetProfilerTime()199   static double GetProfilerTime() {
200     if (!profiler_is_active()) {
201       return 0.0;
202     }
203     return profiler_time();
204   };
205 };
206 #endif
207 
ValidateFeatures()208 constexpr static bool ValidateFeatures() {
209   int expectedFeatureNumber = 0;
210 
211   // Feature numbers should start at 0 and increase by 1 each.
212 #define CHECK_FEATURE(n_, str_, Name_, desc_) \
213   if ((n_) != expectedFeatureNumber) {        \
214     return false;                             \
215   }                                           \
216   ++expectedFeatureNumber;
217 
218   PROFILER_FOR_EACH_FEATURE(CHECK_FEATURE)
219 
220 #undef CHECK_FEATURE
221 
222   return true;
223 }
224 
225 static_assert(ValidateFeatures(), "Feature list is invalid");
226 
227 // Return all features that are available on this platform.
AvailableFeatures()228 static uint32_t AvailableFeatures() {
229   uint32_t features = 0;
230 
231 #define ADD_FEATURE(n_, str_, Name_, desc_) \
232   ProfilerFeature::Set##Name_(features);
233 
234   // Add all the possible features.
235   PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
236 
237 #undef ADD_FEATURE
238 
239   // Now remove features not supported on this platform/configuration.
240 #if !defined(GP_OS_android)
241   ProfilerFeature::ClearJava(features);
242 #endif
243 #if !defined(HAVE_NATIVE_UNWIND)
244   ProfilerFeature::ClearStackWalk(features);
245 #endif
246 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
247   if (getenv("XPCOM_MEM_BLOAT_LOG")) {
248     NS_WARNING("XPCOM_MEM_BLOAT_LOG is set, disabling native allocations.");
249     // The memory hooks are available, but the bloat log is enabled, which is
250     // not compatible with the native allocations tracking. See the comment in
251     // enable_native_allocations() (tools/profiler/core/memory_hooks.cpp) for
252     // more information.
253     ProfilerFeature::ClearNativeAllocations(features);
254   }
255 #else
256   // The memory hooks are not available.
257   ProfilerFeature::ClearNativeAllocations(features);
258 #endif
259   if (!JS::TraceLoggerSupported()) {
260     ProfilerFeature::ClearJSTracer(features);
261   }
262 #if !defined(GP_OS_windows)
263   ProfilerFeature::ClearNoTimerResolutionChange(features);
264 #endif
265 
266   return features;
267 }
268 
269 // Default features common to all contexts (even if not available).
DefaultFeatures()270 static uint32_t DefaultFeatures() {
271   return ProfilerFeature::Java | ProfilerFeature::JS | ProfilerFeature::Leaf |
272          ProfilerFeature::StackWalk | ProfilerFeature::Threads |
273          ProfilerFeature::CPUUtilization | ProfilerFeature::Screenshots;
274 }
275 
276 // Extra default features when MOZ_PROFILER_STARTUP is set (even if not
277 // available).
StartupExtraDefaultFeatures()278 static uint32_t StartupExtraDefaultFeatures() {
279   // Enable file I/Os by default for startup profiles as startup is heavy on
280   // I/O operations.
281   return ProfilerFeature::FileIOAll;
282 }
283 
284 // The class is a thin shell around mozglue PlatformMutex. It does not preserve
285 // behavior in JS record/replay. It provides a mechanism to determine if it is
286 // locked or not in order for memory hooks to avoid re-entering the profiler
287 // locked state.
288 class PSMutex : private ::mozilla::detail::MutexImpl {
289  public:
PSMutex()290   PSMutex() : ::mozilla::detail::MutexImpl() {}
291 
Lock()292   void Lock() {
293     const int tid = profiler_current_thread_id();
294     MOZ_ASSERT(tid != 0);
295 
296     // This is only designed to catch recursive locking:
297     // - If the current thread doesn't own the mutex, `mOwningThreadId` must be
298     //   zero or a different thread id written by another thread; it may change
299     //   again at any time, but never to the current thread's id.
300     // - If the current thread owns the mutex, `mOwningThreadId` must be its id.
301     MOZ_ASSERT(mOwningThreadId != tid);
302 
303     ::mozilla::detail::MutexImpl::lock();
304 
305     // We now hold the mutex, it should have been in the unlocked state before.
306     MOZ_ASSERT(mOwningThreadId == 0);
307     // And we can write our own thread id.
308     mOwningThreadId = tid;
309   }
310 
TryLock()311   [[nodiscard]] bool TryLock() {
312     const int tid = profiler_current_thread_id();
313     MOZ_ASSERT(tid != 0);
314 
315     // This is only designed to catch recursive locking:
316     // - If the current thread doesn't own the mutex, `mOwningThreadId` must be
317     //   zero or a different thread id written by another thread; it may change
318     //   again at any time, but never to the current thread's id.
319     // - If the current thread owns the mutex, `mOwningThreadId` must be its id.
320     MOZ_ASSERT(mOwningThreadId != tid);
321 
322     if (!::mozilla::detail::MutexImpl::tryLock()) {
323       // Failed to lock, nothing more to do.
324       return false;
325     }
326 
327     // We now hold the mutex, it should have been in the unlocked state before.
328     MOZ_ASSERT(mOwningThreadId == 0);
329     // And we can write our own thread id.
330     mOwningThreadId = tid;
331 
332     return true;
333   }
334 
Unlock()335   void Unlock() {
336     // This should never trigger! But check just in case something has gone
337     // very wrong (e.g., memory corruption).
338     AssertCurrentThreadOwns();
339 
340     // We're still holding the mutex here, so it's safe to just reset
341     // `mOwningThreadId`.
342     mOwningThreadId = 0;
343 
344     ::mozilla::detail::MutexImpl::unlock();
345   }
346 
347   // Does the current thread own this mutex?
348   // False positive or false negatives are not possible:
349   // - If `true`, the current thread owns the mutex, it has written its own
350   //   `mOwningThreadId` when taking the lock, and no-one else can modify it
351   //   until the current thread itself unlocks the mutex.
352   // - If `false`, the current thread does not own the mutex, therefore either
353   //   `mOwningThreadId` is zero (unlocked), or it is a different thread id
354   //   written by another thread, but it can never be the current thread's id
355   //   until the current thread itself locks the mutex.
IsLockedOnCurrentThread() const356   bool IsLockedOnCurrentThread() const {
357     return mOwningThreadId == profiler_current_thread_id();
358   }
359 
AssertCurrentThreadOwns() const360   void AssertCurrentThreadOwns() const {
361     MOZ_ASSERT(IsLockedOnCurrentThread());
362   }
363 
AssertCurrentThreadDoesNotOwn() const364   void AssertCurrentThreadDoesNotOwn() const {
365     MOZ_ASSERT(!IsLockedOnCurrentThread());
366   }
367 
368  private:
369   // Zero when unlocked, or the thread id of the owning thread.
370   // This should only be used to compare with the current thread id; any other
371   // number (0 or other id) could change at any time because the current thread
372   // wouldn't own the lock.
373   Atomic<int, MemoryOrdering::SequentiallyConsistent> mOwningThreadId{0};
374 };
375 
376 // RAII class to lock the profiler mutex.
377 class MOZ_RAII PSAutoLock {
378  public:
PSAutoLock(PSMutex & aMutex)379   explicit PSAutoLock(PSMutex& aMutex) : mMutex(aMutex) { mMutex.Lock(); }
~PSAutoLock()380   ~PSAutoLock() { mMutex.Unlock(); }
381 
382  private:
383   // Allow PSAutoTryLock to call the following `PSAutoLock(PSMutex&, int)`
384   // constructor through `Maybe<const PSAutoLock>::emplace()`.
385   friend class Maybe<const PSAutoLock>;
386 
387   // Special constructor taking an already-locked mutex. The `int` parameter is
388   // necessary to distinguish it from the main constructor.
PSAutoLock(PSMutex & aAlreadyLockedMutex,int)389   PSAutoLock(PSMutex& aAlreadyLockedMutex, int) : mMutex(aAlreadyLockedMutex) {
390     mMutex.AssertCurrentThreadOwns();
391   }
392 
393   PSMutex& mMutex;
394 };
395 
396 // RAII class that attempts to lock the profiler mutex. Example usage:
397 //   PSAutoTryLock tryLock(gPSMutex);
398 //   if (tryLock.IsLocked()) { locked_foo(tryLock.LockRef()); }
399 class MOZ_RAII PSAutoTryLock {
400  public:
PSAutoTryLock(PSMutex & aMutex)401   explicit PSAutoTryLock(PSMutex& aMutex) {
402     if (aMutex.TryLock()) {
403       mMaybePSAutoLock.emplace(aMutex, 0);
404     }
405   }
406 
407   // Return true if the mutex was aquired and locked.
IsLocked() const408   [[nodiscard]] bool IsLocked() const { return mMaybePSAutoLock.isSome(); }
409 
410   // Assuming the mutex is locked, return a reference to a `PSAutoLock` for that
411   // mutex, which can be passed as proof-of-lock.
LockRef() const412   [[nodiscard]] const PSAutoLock& LockRef() const {
413     MOZ_ASSERT(IsLocked());
414     return mMaybePSAutoLock.ref();
415   }
416 
417  private:
418   // `mMaybePSAutoLock` is `Nothing` if locking failed, otherwise it contains a
419   // `const PSAutoLock` holding the locked mutex, and whose reference may be
420   // passed to functions expecting a proof-of-lock.
421   Maybe<const PSAutoLock> mMaybePSAutoLock;
422 };
423 
424 // Only functions that take a PSLockRef arg can access CorePS's and ActivePS's
425 // fields.
426 typedef const PSAutoLock& PSLockRef;
427 
428 #define PS_GET(type_, name_)      \
429   static type_ name_(PSLockRef) { \
430     MOZ_ASSERT(sInstance);        \
431     return sInstance->m##name_;   \
432   }
433 
434 #define PS_GET_LOCKLESS(type_, name_) \
435   static type_ name_() {              \
436     MOZ_ASSERT(sInstance);            \
437     return sInstance->m##name_;       \
438   }
439 
440 #define PS_GET_AND_SET(type_, name_)                  \
441   PS_GET(type_, name_)                                \
442   static void Set##name_(PSLockRef, type_ a##name_) { \
443     MOZ_ASSERT(sInstance);                            \
444     sInstance->m##name_ = a##name_;                   \
445   }
446 
447 static const size_t MAX_JS_FRAMES = 1024;
448 using JsFrameBuffer = JS::ProfilingFrameIterator::Frame[MAX_JS_FRAMES];
449 
450 // All functions in this file can run on multiple threads unless they have an
451 // NS_IsMainThread() assertion.
452 
453 // This class contains the profiler's core global state, i.e. that which is
454 // valid even when the profiler is not active. Most profile operations can't do
455 // anything useful when this class is not instantiated, so we release-assert
456 // its non-nullness in all such operations.
457 //
458 // Accesses to CorePS are guarded by gPSMutex. Getters and setters take a
459 // PSAutoLock reference as an argument as proof that the gPSMutex is currently
460 // locked. This makes it clear when gPSMutex is locked and helps avoid
461 // accidental unlocked accesses to global state. There are ways to circumvent
462 // this mechanism, but please don't do so without *very* good reason and a
463 // detailed explanation.
464 //
465 // The exceptions to this rule:
466 //
467 // - mProcessStartTime, because it's immutable;
468 //
469 // - each thread's RacyRegisteredThread object is accessible without locking via
470 //   TLSRegisteredThread::RacyRegisteredThread().
471 class CorePS {
472  private:
CorePS()473   CorePS()
474       : mProcessStartTime(TimeStamp::ProcessCreation()),
475         // This needs its own mutex, because it is used concurrently from
476         // functions guarded by gPSMutex as well as others without safety (e.g.,
477         // profiler_add_marker). It is *not* used inside the critical section of
478         // the sampler, because mutexes cannot be used there.
479         mCoreBuffer(ProfileChunkedBuffer::ThreadSafety::WithMutex)
480 #ifdef USE_LUL_STACKWALK
481         ,
482         mLul(nullptr)
483 #endif
484   {
485     MOZ_ASSERT(NS_IsMainThread(),
486                "CorePS must be created from the main thread");
487   }
488 
~CorePS()489   ~CorePS() {}
490 
491  public:
Create(PSLockRef aLock)492   static void Create(PSLockRef aLock) {
493     MOZ_ASSERT(!sInstance);
494     sInstance = new CorePS();
495   }
496 
Destroy(PSLockRef aLock)497   static void Destroy(PSLockRef aLock) {
498     MOZ_ASSERT(sInstance);
499     delete sInstance;
500     sInstance = nullptr;
501   }
502 
503   // Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex
504   // being locked. This is because CorePS is instantiated so early on the main
505   // thread that we don't have to worry about it being racy.
Exists()506   static bool Exists() { return !!sInstance; }
507 
AddSizeOf(PSLockRef,MallocSizeOf aMallocSizeOf,size_t & aProfSize,size_t & aLulSize)508   static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf,
509                         size_t& aProfSize, size_t& aLulSize) {
510     MOZ_ASSERT(sInstance);
511 
512     aProfSize += aMallocSizeOf(sInstance);
513 
514     for (auto& registeredThread : sInstance->mRegisteredThreads) {
515       aProfSize += registeredThread->SizeOfIncludingThis(aMallocSizeOf);
516     }
517 
518     for (auto& registeredPage : sInstance->mRegisteredPages) {
519       aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
520     }
521 
522     // Measurement of the following things may be added later if DMD finds it
523     // is worthwhile:
524     // - CorePS::mRegisteredThreads itself (its elements' children are
525     // measured above)
526     // - CorePS::mRegisteredPages itself (its elements' children are
527     // measured above)
528     // - CorePS::mInterposeObserver
529 
530 #if defined(USE_LUL_STACKWALK)
531     if (sInstance->mLul) {
532       aLulSize += sInstance->mLul->SizeOfIncludingThis(aMallocSizeOf);
533     }
534 #endif
535   }
536 
537   // No PSLockRef is needed for this field because it's immutable.
PS_GET_LOCKLESS(TimeStamp,ProcessStartTime)538   PS_GET_LOCKLESS(TimeStamp, ProcessStartTime)
539 
540   // No PSLockRef is needed for this field because it's thread-safe.
541   PS_GET_LOCKLESS(ProfileChunkedBuffer&, CoreBuffer)
542 
543   PS_GET(const Vector<UniquePtr<RegisteredThread>>&, RegisteredThreads)
544 
545   PS_GET(JsFrameBuffer&, JsFrames)
546 
547   static void AppendRegisteredThread(
548       PSLockRef, UniquePtr<RegisteredThread>&& aRegisteredThread) {
549     MOZ_ASSERT(sInstance);
550     MOZ_RELEASE_ASSERT(
551         sInstance->mRegisteredThreads.append(std::move(aRegisteredThread)));
552   }
553 
RemoveRegisteredThread(PSLockRef,RegisteredThread * aRegisteredThread)554   static void RemoveRegisteredThread(PSLockRef,
555                                      RegisteredThread* aRegisteredThread) {
556     MOZ_ASSERT(sInstance);
557     // Remove aRegisteredThread from mRegisteredThreads.
558     for (UniquePtr<RegisteredThread>& rt : sInstance->mRegisteredThreads) {
559       if (rt.get() == aRegisteredThread) {
560         sInstance->mRegisteredThreads.erase(&rt);
561         return;
562       }
563     }
564   }
565 
PS_GET(Vector<RefPtr<PageInformation>> &,RegisteredPages)566   PS_GET(Vector<RefPtr<PageInformation>>&, RegisteredPages)
567 
568   static void AppendRegisteredPage(PSLockRef,
569                                    RefPtr<PageInformation>&& aRegisteredPage) {
570     MOZ_ASSERT(sInstance);
571     struct RegisteredPageComparator {
572       PageInformation* aA;
573       bool operator()(PageInformation* aB) const { return aA->Equals(aB); }
574     };
575 
576     auto foundPageIter = std::find_if(
577         sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(),
578         RegisteredPageComparator{aRegisteredPage.get()});
579 
580     if (foundPageIter != sInstance->mRegisteredPages.end()) {
581       if ((*foundPageIter)->Url().EqualsLiteral("about:blank")) {
582         // When a BrowsingContext is loaded, the first url loaded in it will be
583         // about:blank, and if the principal matches, the first document loaded
584         // in it will share an inner window. That's why we should delete the
585         // intermittent about:blank if they share the inner window.
586         sInstance->mRegisteredPages.erase(foundPageIter);
587       } else {
588         // Do not register the same page again.
589         return;
590       }
591     }
592 
593     MOZ_RELEASE_ASSERT(
594         sInstance->mRegisteredPages.append(std::move(aRegisteredPage)));
595   }
596 
RemoveRegisteredPage(PSLockRef,uint64_t aRegisteredInnerWindowID)597   static void RemoveRegisteredPage(PSLockRef,
598                                    uint64_t aRegisteredInnerWindowID) {
599     MOZ_ASSERT(sInstance);
600     // Remove RegisteredPage from mRegisteredPages by given inner window ID.
601     sInstance->mRegisteredPages.eraseIf([&](const RefPtr<PageInformation>& rd) {
602       return rd->InnerWindowID() == aRegisteredInnerWindowID;
603     });
604   }
605 
ClearRegisteredPages(PSLockRef)606   static void ClearRegisteredPages(PSLockRef) {
607     MOZ_ASSERT(sInstance);
608     sInstance->mRegisteredPages.clear();
609   }
610 
PS_GET(const Vector<BaseProfilerCount * > &,Counters)611   PS_GET(const Vector<BaseProfilerCount*>&, Counters)
612 
613   static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) {
614     MOZ_ASSERT(sInstance);
615     // we don't own the counter; they may be stored in static objects
616     MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter));
617   }
618 
RemoveCounter(PSLockRef,BaseProfilerCount * aCounter)619   static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) {
620     // we may be called to remove a counter after the profiler is stopped or
621     // late in shutdown.
622     if (sInstance) {
623       auto* counter = std::find(sInstance->mCounters.begin(),
624                                 sInstance->mCounters.end(), aCounter);
625       MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end());
626       sInstance->mCounters.erase(counter);
627     }
628   }
629 
630 #ifdef USE_LUL_STACKWALK
Lul(PSLockRef)631   static lul::LUL* Lul(PSLockRef) {
632     MOZ_ASSERT(sInstance);
633     return sInstance->mLul.get();
634   }
SetLul(PSLockRef,UniquePtr<lul::LUL> aLul)635   static void SetLul(PSLockRef, UniquePtr<lul::LUL> aLul) {
636     MOZ_ASSERT(sInstance);
637     sInstance->mLul = std::move(aLul);
638   }
639 #endif
640 
641   PS_GET_AND_SET(const nsACString&, ProcessName)
642   PS_GET_AND_SET(const nsACString&, ETLDplus1)
643 
644  private:
645   // The singleton instance
646   static CorePS* sInstance;
647 
648   // The time that the process started.
649   const TimeStamp mProcessStartTime;
650 
651   // The thread-safe blocks-oriented buffer into which all profiling data is
652   // recorded.
653   // ActivePS controls the lifetime of the underlying contents buffer: When
654   // ActivePS does not exist, mCoreBuffer is empty and rejects all reads&writes;
655   // see ActivePS for further details.
656   // Note: This needs to live here outside of ActivePS, because some producers
657   // are indirectly controlled (e.g., by atomic flags) and therefore may still
658   // attempt to write some data shortly after ActivePS has shutdown and deleted
659   // the underlying buffer in memory.
660   ProfileChunkedBuffer mCoreBuffer;
661 
662   // Info on all the registered threads.
663   // ThreadIds in mRegisteredThreads are unique.
664   Vector<UniquePtr<RegisteredThread>> mRegisteredThreads;
665 
666   // Info on all the registered pages.
667   // InnerWindowIDs in mRegisteredPages are unique.
668   Vector<RefPtr<PageInformation>> mRegisteredPages;
669 
670   // Non-owning pointers to all active counters
671   Vector<BaseProfilerCount*> mCounters;
672 
673 #ifdef USE_LUL_STACKWALK
674   // LUL's state. Null prior to the first activation, non-null thereafter.
675   UniquePtr<lul::LUL> mLul;
676 #endif
677 
678   // Process name, provided by child process initialization code.
679   nsAutoCString mProcessName;
680   // Private name, provided by child process initialization code (eTLD+1 in
681   // fission)
682   nsAutoCString mETLDplus1;
683 
684   // This memory buffer is used by the MergeStacks mechanism. Previously it was
685   // stack allocated, but this led to a stack overflow, as it was too much
686   // memory. Here the buffer can be pre-allocated, and shared with the
687   // MergeStacks feature as needed. MergeStacks is only run while holding the
688   // lock, so it is safe to have only one instance allocated for all of the
689   // threads.
690   JsFrameBuffer mJsFrames;
691 };
692 
693 CorePS* CorePS::sInstance = nullptr;
694 
profiler_get_core_buffer()695 ProfileChunkedBuffer& profiler_get_core_buffer() {
696   MOZ_ASSERT(CorePS::Exists());
697   return CorePS::CoreBuffer();
698 }
699 
700 class SamplerThread;
701 
702 static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
703                                        double aInterval, bool aStackWalkEnabled,
704                                        bool aNoTimerResolutionChange);
705 
706 struct LiveProfiledThreadData {
707   RegisteredThread* mRegisteredThread;
708   UniquePtr<ProfiledThreadData> mProfiledThreadData;
709 };
710 
711 // The buffer size is provided as a number of "entries", this is their size in
712 // bytes.
713 constexpr static uint32_t scBytesPerEntry = 8;
714 
715 // This class contains the profiler's global state that is valid only when the
716 // profiler is active. When not instantiated, the profiler is inactive.
717 //
718 // Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as
719 // CorePS.
720 //
721 class ActivePS {
722  private:
723   // We need to decide how many chunks of what size we want to fit in the given
724   // total maximum capacity for this process, in the (likely) context of
725   // multiple processes doing the same choice and having an inter-process
726   // mechanism to control the overal memory limit.
727 
728   // Minimum chunk size allowed, enough for at least one stack.
729   constexpr static uint32_t scMinimumChunkSize =
730       2 * ProfileBufferChunkManager::scExpectedMaximumStackSize;
731 
732   // Ideally we want at least 2 unreleased chunks to work with (1 current and 1
733   // next), and 2 released chunks (so that one can be recycled when old, leaving
734   // one with some data).
735   constexpr static uint32_t scMinimumNumberOfChunks = 4;
736 
737   // And we want to limit chunks to a maximum size, which is a compromise
738   // between:
739   // - A big size, which helps with reducing the rate of allocations and IPCs.
740   // - A small size, which helps with equalizing the duration of recorded data
741   //   (as the inter-process controller will discard the oldest chunks in all
742   //   Firefox processes).
743   constexpr static uint32_t scMaximumChunkSize = 1024 * 1024;
744 
745  public:
746   // We should be able to store at least the minimum number of the smallest-
747   // possible chunks.
748   constexpr static uint32_t scMinimumBufferSize =
749       scMinimumNumberOfChunks * scMinimumChunkSize;
750   // Note: Keep in sync with GeckoThread.maybeStartGeckoProfiler:
751   // https://searchfox.org/mozilla-central/source/mobile/android/geckoview/src/main/java/org/mozilla/gecko/GeckoThread.java
752   constexpr static uint32_t scMinimumBufferEntries =
753       scMinimumBufferSize / scBytesPerEntry;
754 
755   // Limit to 2GiB.
756   constexpr static uint32_t scMaximumBufferSize = 2u * 1024u * 1024u * 1024u;
757   constexpr static uint32_t scMaximumBufferEntries =
758       scMaximumBufferSize / scBytesPerEntry;
759 
ClampToAllowedEntries(uint32_t aEntries)760   constexpr static uint32_t ClampToAllowedEntries(uint32_t aEntries) {
761     if (aEntries <= scMinimumBufferEntries) {
762       return scMinimumBufferEntries;
763     }
764     if (aEntries >= scMaximumBufferEntries) {
765       return scMaximumBufferEntries;
766     }
767     return aEntries;
768   }
769 
770  private:
ChunkSizeForEntries(uint32_t aEntries)771   constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) {
772     return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) *
773                                  scBytesPerEntry / scMinimumNumberOfChunks,
774                              size_t(scMaximumChunkSize)));
775   }
776 
AdjustFeatures(uint32_t aFeatures,uint32_t aFilterCount)777   static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) {
778     // Filter out any features unavailable in this platform/configuration.
779     aFeatures &= AvailableFeatures();
780 
781     // Always enable ProfilerFeature::Threads if we have a filter, because
782     // users sometimes ask to filter by a list of threads but forget to
783     // explicitly specify ProfilerFeature::Threads.
784     if (aFilterCount > 0) {
785       aFeatures |= ProfilerFeature::Threads;
786     }
787 
788     // Some features imply others.
789     if (aFeatures & ProfilerFeature::FileIOAll) {
790       aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO;
791     } else if (aFeatures & ProfilerFeature::FileIO) {
792       aFeatures |= ProfilerFeature::MainThreadIO;
793     }
794 
795     return aFeatures;
796   }
797 
ActivePS(PSLockRef aLock,PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,uint64_t aActiveTabID,const Maybe<double> & aDuration)798   ActivePS(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
799            uint32_t aFeatures, const char** aFilters, uint32_t aFilterCount,
800            uint64_t aActiveTabID, const Maybe<double>& aDuration)
801       : mGeneration(sNextGeneration++),
802         mCapacity(aCapacity),
803         mDuration(aDuration),
804         mInterval(aInterval),
805         mFeatures(AdjustFeatures(aFeatures, aFilterCount)),
806         mActiveTabID(aActiveTabID),
807         mProfileBufferChunkManager(
808             size_t(ClampToAllowedEntries(aCapacity.Value())) * scBytesPerEntry,
809             ChunkSizeForEntries(aCapacity.Value())),
810         mProfileBuffer([this]() -> ProfileChunkedBuffer& {
811           CorePS::CoreBuffer().SetChunkManager(mProfileBufferChunkManager);
812           return CorePS::CoreBuffer();
813         }()),
814         // The new sampler thread doesn't start sampling immediately because the
815         // main loop within Run() is blocked until this function's caller
816         // unlocks gPSMutex.
817         mSamplerThread(NewSamplerThread(
818             aLock, mGeneration, aInterval,
819             ProfilerFeature::HasStackWalk(aFeatures),
820             ProfilerFeature::HasNoTimerResolutionChange(aFeatures))),
821         mInterposeObserver((ProfilerFeature::HasMainThreadIO(aFeatures) ||
822                             ProfilerFeature::HasFileIO(aFeatures) ||
823                             ProfilerFeature::HasFileIOAll(aFeatures))
824                                ? new ProfilerIOInterposeObserver()
825                                : nullptr),
826         mIsPaused(false),
827         mIsSamplingPaused(false)
828 #if defined(GP_OS_linux) || defined(GP_OS_freebsd)
829         ,
830         mWasSamplingPaused(false)
831 #endif
832   {
833     // Deep copy aFilters.
834     MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
835     for (uint32_t i = 0; i < aFilterCount; ++i) {
836       mFilters[i] = aFilters[i];
837     }
838 
839 #if !defined(RELEASE_OR_BETA)
840     if (mInterposeObserver) {
841       // We need to register the observer on the main thread, because we want
842       // to observe IO that happens on the main thread.
843       // IOInterposer needs to be initialized before calling
844       // IOInterposer::Register or our observer will be silently dropped.
845       if (NS_IsMainThread()) {
846         IOInterposer::Init();
847         IOInterposer::Register(IOInterposeObserver::OpAll, mInterposeObserver);
848       } else {
849         RefPtr<ProfilerIOInterposeObserver> observer = mInterposeObserver;
850         NS_DispatchToMainThread(
__anon2f016a4b0302() 851             NS_NewRunnableFunction("ActivePS::ActivePS", [=]() {
852               IOInterposer::Init();
853               IOInterposer::Register(IOInterposeObserver::OpAll, observer);
854             }));
855       }
856     }
857 #endif
858   }
859 
~ActivePS()860   ~ActivePS() {
861 #if !defined(RELEASE_OR_BETA)
862     if (mInterposeObserver) {
863       // We need to unregister the observer on the main thread, because that's
864       // where we've registered it.
865       if (NS_IsMainThread()) {
866         IOInterposer::Unregister(IOInterposeObserver::OpAll,
867                                  mInterposeObserver);
868       } else {
869         RefPtr<ProfilerIOInterposeObserver> observer = mInterposeObserver;
870         NS_DispatchToMainThread(
871             NS_NewRunnableFunction("ActivePS::~ActivePS", [=]() {
872               IOInterposer::Unregister(IOInterposeObserver::OpAll, observer);
873             }));
874       }
875     }
876 #endif
877     CorePS::CoreBuffer().ResetChunkManager();
878   }
879 
ThreadSelected(const char * aThreadName)880   bool ThreadSelected(const char* aThreadName) {
881     if (mFilters.empty()) {
882       return true;
883     }
884 
885     std::string name = aThreadName;
886     std::transform(name.begin(), name.end(), name.begin(), ::tolower);
887 
888     for (uint32_t i = 0; i < mFilters.length(); ++i) {
889       std::string filter = mFilters[i];
890 
891       if (filter == "*") {
892         return true;
893       }
894 
895       std::transform(filter.begin(), filter.end(), filter.begin(), ::tolower);
896 
897       // Crude, non UTF-8 compatible, case insensitive substring search
898       if (name.find(filter) != std::string::npos) {
899         return true;
900       }
901 
902       // If the filter starts with pid:, check for a pid match
903       if (filter.find("pid:") == 0) {
904         std::string mypid = std::to_string(profiler_current_process_id());
905         if (filter.compare(4, std::string::npos, mypid) == 0) {
906           return true;
907         }
908       }
909     }
910 
911     return false;
912   }
913 
914  public:
Create(PSLockRef aLock,PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,uint64_t aActiveTabID,const Maybe<double> & aDuration)915   static void Create(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
916                      uint32_t aFeatures, const char** aFilters,
917                      uint32_t aFilterCount, uint64_t aActiveTabID,
918                      const Maybe<double>& aDuration) {
919     MOZ_ASSERT(!sInstance);
920     sInstance = new ActivePS(aLock, aCapacity, aInterval, aFeatures, aFilters,
921                              aFilterCount, aActiveTabID, aDuration);
922   }
923 
Destroy(PSLockRef aLock)924   [[nodiscard]] static SamplerThread* Destroy(PSLockRef aLock) {
925     MOZ_ASSERT(sInstance);
926     auto samplerThread = sInstance->mSamplerThread;
927     delete sInstance;
928     sInstance = nullptr;
929 
930     return samplerThread;
931   }
932 
Exists(PSLockRef)933   static bool Exists(PSLockRef) { return !!sInstance; }
934 
Equals(PSLockRef,PowerOfTwo32 aCapacity,const Maybe<double> & aDuration,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,uint64_t aActiveTabID)935   static bool Equals(PSLockRef, PowerOfTwo32 aCapacity,
936                      const Maybe<double>& aDuration, double aInterval,
937                      uint32_t aFeatures, const char** aFilters,
938                      uint32_t aFilterCount, uint64_t aActiveTabID) {
939     MOZ_ASSERT(sInstance);
940     if (sInstance->mCapacity != aCapacity ||
941         sInstance->mDuration != aDuration ||
942         sInstance->mInterval != aInterval ||
943         sInstance->mFeatures != aFeatures ||
944         sInstance->mFilters.length() != aFilterCount ||
945         sInstance->mActiveTabID != aActiveTabID) {
946       return false;
947     }
948 
949     for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) {
950       if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) {
951         return false;
952       }
953     }
954     return true;
955   }
956 
SizeOf(PSLockRef,MallocSizeOf aMallocSizeOf)957   static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) {
958     MOZ_ASSERT(sInstance);
959 
960     size_t n = aMallocSizeOf(sInstance);
961 
962     n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf);
963 
964     // Measurement of the following members may be added later if DMD finds it
965     // is worthwhile:
966     // - mLiveProfiledThreads (both the array itself, and the contents)
967     // - mDeadProfiledThreads (both the array itself, and the contents)
968     //
969 
970     return n;
971   }
972 
ShouldProfileThread(PSLockRef aLock,ThreadInfo * aInfo)973   static bool ShouldProfileThread(PSLockRef aLock, ThreadInfo* aInfo) {
974     MOZ_ASSERT(sInstance);
975     return ((aInfo->IsMainThread() || FeatureThreads(aLock)) &&
976             sInstance->ThreadSelected(aInfo->Name()));
977   }
978 
979   [[nodiscard]] static bool AppendPostSamplingCallback(
980       PSLockRef, PostSamplingCallback&& aCallback);
981 
982   // Writes out the current active configuration of the profile.
WriteActiveConfiguration(PSLockRef aLock,JSONWriter & aWriter,const Span<const char> & aPropertyName=MakeStringSpan (""))983   static void WriteActiveConfiguration(
984       PSLockRef aLock, JSONWriter& aWriter,
985       const Span<const char>& aPropertyName = MakeStringSpan("")) {
986     if (!sInstance) {
987       if (!aPropertyName.empty()) {
988         aWriter.NullProperty(aPropertyName);
989       } else {
990         aWriter.NullElement();
991       }
992       return;
993     };
994 
995     if (!aPropertyName.empty()) {
996       aWriter.StartObjectProperty(aPropertyName);
997     } else {
998       aWriter.StartObjectElement();
999     }
1000 
1001     {
1002       aWriter.StartArrayProperty("features", aWriter.SingleLineStyle);
1003 #define WRITE_ACTIVE_FEATURES(n_, str_, Name_, desc_)    \
1004   if (profiler_feature_active(ProfilerFeature::Name_)) { \
1005     aWriter.StringElement(str_);                         \
1006   }
1007 
1008       PROFILER_FOR_EACH_FEATURE(WRITE_ACTIVE_FEATURES)
1009 #undef WRITE_ACTIVE_FEATURES
1010       aWriter.EndArray();
1011     }
1012     {
1013       aWriter.StartArrayProperty("threads", aWriter.SingleLineStyle);
1014       for (const auto& filter : sInstance->mFilters) {
1015         aWriter.StringElement(filter);
1016       }
1017       aWriter.EndArray();
1018     }
1019     {
1020       // Now write all the simple values.
1021 
1022       // The interval is also available on profile.meta.interval
1023       aWriter.DoubleProperty("interval", sInstance->mInterval);
1024       aWriter.IntProperty("capacity", sInstance->mCapacity.Value());
1025       if (sInstance->mDuration) {
1026         aWriter.DoubleProperty("duration", sInstance->mDuration.value());
1027       }
1028       // Here, we are converting uint64_t to double. Tab IDs are
1029       // being created using `nsContentUtils::GenerateProcessSpecificId`, which
1030       // is specifically designed to only use 53 of the 64 bits to be lossless
1031       // when passed into and out of JS as a double.
1032       aWriter.DoubleProperty("activeTabID", sInstance->mActiveTabID);
1033     }
1034     aWriter.EndObject();
1035   }
1036 
PS_GET(uint32_t,Generation)1037   PS_GET(uint32_t, Generation)
1038 
1039   PS_GET(PowerOfTwo32, Capacity)
1040 
1041   PS_GET(Maybe<double>, Duration)
1042 
1043   PS_GET(double, Interval)
1044 
1045   PS_GET(uint32_t, Features)
1046 
1047   PS_GET(uint64_t, ActiveTabID)
1048 
1049 #define PS_GET_FEATURE(n_, str_, Name_, desc_)                \
1050   static bool Feature##Name_(PSLockRef) {                     \
1051     MOZ_ASSERT(sInstance);                                    \
1052     return ProfilerFeature::Has##Name_(sInstance->mFeatures); \
1053   }
1054 
1055   PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE)
1056 
1057 #undef PS_GET_FEATURE
1058 
1059   static uint32_t JSFlags(PSLockRef aLock) {
1060     uint32_t Flags = 0;
1061     Flags |=
1062         FeatureJS(aLock) ? uint32_t(JSInstrumentationFlags::StackSampling) : 0;
1063     Flags |= FeatureJSTracer(aLock)
1064                  ? uint32_t(JSInstrumentationFlags::TraceLogging)
1065                  : 0;
1066     Flags |= FeatureJSAllocations(aLock)
1067                  ? uint32_t(JSInstrumentationFlags::Allocations)
1068                  : 0;
1069     return Flags;
1070   }
1071 
PS_GET(const Vector<std::string> &,Filters)1072   PS_GET(const Vector<std::string>&, Filters)
1073 
1074   // Not using PS_GET, because only the "Controlled" interface of
1075   // `mProfileBufferChunkManager` should be exposed here.
1076   static ProfileBufferChunkManagerWithLocalLimit& ControlledChunkManager(
1077       PSLockRef) {
1078     MOZ_ASSERT(sInstance);
1079     return sInstance->mProfileBufferChunkManager;
1080   }
1081 
FulfillChunkRequests(PSLockRef)1082   static void FulfillChunkRequests(PSLockRef) {
1083     MOZ_ASSERT(sInstance);
1084     sInstance->mProfileBufferChunkManager.FulfillChunkRequests();
1085   }
1086 
Buffer(PSLockRef)1087   static ProfileBuffer& Buffer(PSLockRef) {
1088     MOZ_ASSERT(sInstance);
1089     return sInstance->mProfileBuffer;
1090   }
1091 
LiveProfiledThreads(PSLockRef)1092   static const Vector<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef) {
1093     MOZ_ASSERT(sInstance);
1094     return sInstance->mLiveProfiledThreads;
1095   }
1096 
1097   // Returns an array containing (RegisteredThread*, ProfiledThreadData*) pairs
1098   // for all threads that should be included in a profile, both for threads
1099   // that are still registered, and for threads that have been unregistered but
1100   // still have data in the buffer.
1101   // For threads that have already been unregistered, the RegisteredThread
1102   // pointer will be null.
1103   // The returned array is sorted by thread register time.
1104   // Do not hold on to the return value across thread registration or profiler
1105   // restarts.
1106   static Vector<std::pair<RegisteredThread*, ProfiledThreadData*>>
ProfiledThreads(PSLockRef)1107   ProfiledThreads(PSLockRef) {
1108     MOZ_ASSERT(sInstance);
1109     Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> array;
1110     MOZ_RELEASE_ASSERT(
1111         array.initCapacity(sInstance->mLiveProfiledThreads.length() +
1112                            sInstance->mDeadProfiledThreads.length()));
1113     for (auto& t : sInstance->mLiveProfiledThreads) {
1114       MOZ_RELEASE_ASSERT(array.append(
1115           std::make_pair(t.mRegisteredThread, t.mProfiledThreadData.get())));
1116     }
1117     for (auto& t : sInstance->mDeadProfiledThreads) {
1118       MOZ_RELEASE_ASSERT(
1119           array.append(std::make_pair((RegisteredThread*)nullptr, t.get())));
1120     }
1121 
1122     std::sort(array.begin(), array.end(),
1123               [](const std::pair<RegisteredThread*, ProfiledThreadData*>& a,
1124                  const std::pair<RegisteredThread*, ProfiledThreadData*>& b) {
1125                 return a.second->Info()->RegisterTime() <
1126                        b.second->Info()->RegisterTime();
1127               });
1128     return array;
1129   }
1130 
ProfiledPages(PSLockRef aLock)1131   static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) {
1132     MOZ_ASSERT(sInstance);
1133     Vector<RefPtr<PageInformation>> array;
1134     for (auto& d : CorePS::RegisteredPages(aLock)) {
1135       MOZ_RELEASE_ASSERT(array.append(d));
1136     }
1137     for (auto& d : sInstance->mDeadProfiledPages) {
1138       MOZ_RELEASE_ASSERT(array.append(d));
1139     }
1140     // We don't need to sort the pages like threads since we won't show them
1141     // as a list.
1142     return array;
1143   }
1144 
1145   // Do a linear search through mLiveProfiledThreads to find the
1146   // ProfiledThreadData object for a RegisteredThread.
GetProfiledThreadData(PSLockRef,RegisteredThread * aRegisteredThread)1147   static ProfiledThreadData* GetProfiledThreadData(
1148       PSLockRef, RegisteredThread* aRegisteredThread) {
1149     MOZ_ASSERT(sInstance);
1150     for (const LiveProfiledThreadData& thread :
1151          sInstance->mLiveProfiledThreads) {
1152       if (thread.mRegisteredThread == aRegisteredThread) {
1153         return thread.mProfiledThreadData.get();
1154       }
1155     }
1156     return nullptr;
1157   }
1158 
AddLiveProfiledThread(PSLockRef,RegisteredThread * aRegisteredThread,UniquePtr<ProfiledThreadData> && aProfiledThreadData)1159   static ProfiledThreadData* AddLiveProfiledThread(
1160       PSLockRef, RegisteredThread* aRegisteredThread,
1161       UniquePtr<ProfiledThreadData>&& aProfiledThreadData) {
1162     MOZ_ASSERT(sInstance);
1163     MOZ_RELEASE_ASSERT(
1164         sInstance->mLiveProfiledThreads.append(LiveProfiledThreadData{
1165             aRegisteredThread, std::move(aProfiledThreadData)}));
1166 
1167     // Return a weak pointer to the ProfiledThreadData object.
1168     return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get();
1169   }
1170 
UnregisterThread(PSLockRef aLockRef,RegisteredThread * aRegisteredThread)1171   static void UnregisterThread(PSLockRef aLockRef,
1172                                RegisteredThread* aRegisteredThread) {
1173     MOZ_ASSERT(sInstance);
1174 
1175     DiscardExpiredDeadProfiledThreads(aLockRef);
1176 
1177     // Find the right entry in the mLiveProfiledThreads array and remove the
1178     // element, moving the ProfiledThreadData object for the thread into the
1179     // mDeadProfiledThreads array.
1180     // The thread's RegisteredThread object gets destroyed here.
1181     for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) {
1182       LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i];
1183       if (thread.mRegisteredThread == aRegisteredThread) {
1184         thread.mProfiledThreadData->NotifyUnregistered(
1185             sInstance->mProfileBuffer.BufferRangeEnd());
1186         MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append(
1187             std::move(thread.mProfiledThreadData)));
1188         sInstance->mLiveProfiledThreads.erase(
1189             &sInstance->mLiveProfiledThreads[i]);
1190         return;
1191       }
1192     }
1193   }
1194 
PS_GET_AND_SET(bool,IsPaused)1195   PS_GET_AND_SET(bool, IsPaused)
1196 
1197   // True if sampling is paused (though generic `SetIsPaused()` or specific
1198   // `SetIsSamplingPaused()`).
1199   static bool IsSamplingPaused(PSLockRef lock) {
1200     MOZ_ASSERT(sInstance);
1201     return IsPaused(lock) || sInstance->mIsSamplingPaused;
1202   }
1203 
SetIsSamplingPaused(PSLockRef,bool aIsSamplingPaused)1204   static void SetIsSamplingPaused(PSLockRef, bool aIsSamplingPaused) {
1205     MOZ_ASSERT(sInstance);
1206     sInstance->mIsSamplingPaused = aIsSamplingPaused;
1207   }
1208 
1209 #if defined(GP_OS_linux) || defined(GP_OS_freebsd)
PS_GET_AND_SET(bool,WasSamplingPaused)1210   PS_GET_AND_SET(bool, WasSamplingPaused)
1211 #endif
1212 
1213   static void DiscardExpiredDeadProfiledThreads(PSLockRef) {
1214     MOZ_ASSERT(sInstance);
1215     uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
1216     // Discard any dead threads that were unregistered before bufferRangeStart.
1217     sInstance->mDeadProfiledThreads.eraseIf(
1218         [bufferRangeStart](
1219             const UniquePtr<ProfiledThreadData>& aProfiledThreadData) {
1220           Maybe<uint64_t> bufferPosition =
1221               aProfiledThreadData->BufferPositionWhenUnregistered();
1222           MOZ_RELEASE_ASSERT(bufferPosition,
1223                              "should have unregistered this thread");
1224           return *bufferPosition < bufferRangeStart;
1225         });
1226   }
1227 
UnregisterPage(PSLockRef aLock,uint64_t aRegisteredInnerWindowID)1228   static void UnregisterPage(PSLockRef aLock,
1229                              uint64_t aRegisteredInnerWindowID) {
1230     MOZ_ASSERT(sInstance);
1231     auto& registeredPages = CorePS::RegisteredPages(aLock);
1232     for (size_t i = 0; i < registeredPages.length(); i++) {
1233       RefPtr<PageInformation>& page = registeredPages[i];
1234       if (page->InnerWindowID() == aRegisteredInnerWindowID) {
1235         page->NotifyUnregistered(sInstance->mProfileBuffer.BufferRangeEnd());
1236         MOZ_RELEASE_ASSERT(
1237             sInstance->mDeadProfiledPages.append(std::move(page)));
1238         registeredPages.erase(&registeredPages[i--]);
1239       }
1240     }
1241   }
1242 
DiscardExpiredPages(PSLockRef)1243   static void DiscardExpiredPages(PSLockRef) {
1244     MOZ_ASSERT(sInstance);
1245     uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
1246     // Discard any dead pages that were unregistered before
1247     // bufferRangeStart.
1248     sInstance->mDeadProfiledPages.eraseIf(
1249         [bufferRangeStart](const RefPtr<PageInformation>& aProfiledPage) {
1250           Maybe<uint64_t> bufferPosition =
1251               aProfiledPage->BufferPositionWhenUnregistered();
1252           MOZ_RELEASE_ASSERT(bufferPosition,
1253                              "should have unregistered this page");
1254           return *bufferPosition < bufferRangeStart;
1255         });
1256   }
1257 
ClearUnregisteredPages(PSLockRef)1258   static void ClearUnregisteredPages(PSLockRef) {
1259     MOZ_ASSERT(sInstance);
1260     sInstance->mDeadProfiledPages.clear();
1261   }
1262 
ClearExpiredExitProfiles(PSLockRef)1263   static void ClearExpiredExitProfiles(PSLockRef) {
1264     MOZ_ASSERT(sInstance);
1265     uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
1266     // Discard exit profiles that were gathered before our buffer RangeStart.
1267     // If we have started to overwrite our data from when the Base profile was
1268     // added, we should get rid of that Base profile because it's now older than
1269     // our oldest Gecko profile data.
1270     //
1271     // When adding: (In practice the starting buffer should be empty)
1272     // v Start == End
1273     // |                 <-- Buffer range, initially empty.
1274     // ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it
1275     //
1276     // Later, still in range:
1277     // v Start   v End
1278     // |=========|       <-- Buffer range growing.
1279     // ^ mGeckoIndexWhenBaseProfileAdded < Start FALSE -> keep it
1280     //
1281     // Even later, now out of range:
1282     //       v Start      v End
1283     //       |============|       <-- Buffer range full and sliding.
1284     // ^ mGeckoIndexWhenBaseProfileAdded < Start TRUE! -> Discard it
1285     if (sInstance->mBaseProfileThreads &&
1286         sInstance->mGeckoIndexWhenBaseProfileAdded
1287                 .ConvertToProfileBufferIndex() <
1288             CorePS::CoreBuffer().GetState().mRangeStart) {
1289       DEBUG_LOG("ClearExpiredExitProfiles() - Discarding base profile %p",
1290                 sInstance->mBaseProfileThreads.get());
1291       sInstance->mBaseProfileThreads.reset();
1292     }
1293     sInstance->mExitProfiles.eraseIf(
1294         [bufferRangeStart](const ExitProfile& aExitProfile) {
1295           return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart;
1296         });
1297   }
1298 
AddBaseProfileThreads(PSLockRef aLock,UniquePtr<char[]> aBaseProfileThreads)1299   static void AddBaseProfileThreads(PSLockRef aLock,
1300                                     UniquePtr<char[]> aBaseProfileThreads) {
1301     MOZ_ASSERT(sInstance);
1302     DEBUG_LOG("AddBaseProfileThreads(%p)", aBaseProfileThreads.get());
1303     sInstance->mBaseProfileThreads = std::move(aBaseProfileThreads);
1304     sInstance->mGeckoIndexWhenBaseProfileAdded =
1305         ProfileBufferBlockIndex::CreateFromProfileBufferIndex(
1306             CorePS::CoreBuffer().GetState().mRangeEnd);
1307   }
1308 
MoveBaseProfileThreads(PSLockRef aLock)1309   static UniquePtr<char[]> MoveBaseProfileThreads(PSLockRef aLock) {
1310     MOZ_ASSERT(sInstance);
1311 
1312     ClearExpiredExitProfiles(aLock);
1313 
1314     DEBUG_LOG("MoveBaseProfileThreads() - Consuming base profile %p",
1315               sInstance->mBaseProfileThreads.get());
1316     return std::move(sInstance->mBaseProfileThreads);
1317   }
1318 
AddExitProfile(PSLockRef aLock,const nsCString & aExitProfile)1319   static void AddExitProfile(PSLockRef aLock, const nsCString& aExitProfile) {
1320     MOZ_ASSERT(sInstance);
1321 
1322     ClearExpiredExitProfiles(aLock);
1323 
1324     MOZ_RELEASE_ASSERT(sInstance->mExitProfiles.append(
1325         ExitProfile{aExitProfile, sInstance->mProfileBuffer.BufferRangeEnd()}));
1326   }
1327 
MoveExitProfiles(PSLockRef aLock)1328   static Vector<nsCString> MoveExitProfiles(PSLockRef aLock) {
1329     MOZ_ASSERT(sInstance);
1330 
1331     ClearExpiredExitProfiles(aLock);
1332 
1333     Vector<nsCString> profiles;
1334     MOZ_RELEASE_ASSERT(
1335         profiles.initCapacity(sInstance->mExitProfiles.length()));
1336     for (auto& profile : sInstance->mExitProfiles) {
1337       MOZ_RELEASE_ASSERT(profiles.append(std::move(profile.mJSON)));
1338     }
1339     sInstance->mExitProfiles.clear();
1340     return profiles;
1341   }
1342 
1343 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
SetMemoryCounter(const BaseProfilerCount * aMemoryCounter)1344   static void SetMemoryCounter(const BaseProfilerCount* aMemoryCounter) {
1345     MOZ_ASSERT(sInstance);
1346 
1347     sInstance->mMemoryCounter = aMemoryCounter;
1348   }
1349 
IsMemoryCounter(const BaseProfilerCount * aMemoryCounter)1350   static bool IsMemoryCounter(const BaseProfilerCount* aMemoryCounter) {
1351     MOZ_ASSERT(sInstance);
1352 
1353     return sInstance->mMemoryCounter == aMemoryCounter;
1354   }
1355 #endif
1356 
1357  private:
1358   // The singleton instance.
1359   static ActivePS* sInstance;
1360 
1361   // We need to track activity generations. If we didn't we could have the
1362   // following scenario.
1363   //
1364   // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks
1365   //   gPSMutex, deletes the SamplerThread (which does a join).
1366   //
1367   // - profiler_start() runs on a different thread, locks gPSMutex,
1368   //   re-instantiates ActivePS, unlocks gPSMutex -- all before the join
1369   //   completes.
1370   //
1371   // - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated,
1372   //   and continues as if the start/stop pair didn't occur. Also
1373   //   profiler_stop() is stuck, unable to finish.
1374   //
1375   // By checking ActivePS *and* the generation, we can avoid this scenario.
1376   // sNextGeneration is used to track the next generation number; it is static
1377   // because it must persist across different ActivePS instantiations.
1378   const uint32_t mGeneration;
1379   static uint32_t sNextGeneration;
1380 
1381   // The maximum number of entries in mProfileBuffer.
1382   const PowerOfTwo32 mCapacity;
1383 
1384   // The maximum duration of entries in mProfileBuffer, in seconds.
1385   const Maybe<double> mDuration;
1386 
1387   // The interval between samples, measured in milliseconds.
1388   const double mInterval;
1389 
1390   // The profile features that are enabled.
1391   const uint32_t mFeatures;
1392 
1393   // Substrings of names of threads we want to profile.
1394   Vector<std::string> mFilters;
1395 
1396   // ID of the active browser screen's active tab.
1397   // It's being used to determine the profiled tab. It's "0" if we failed to
1398   // get the ID.
1399   const uint64_t mActiveTabID;
1400 
1401   // The chunk manager used by `mProfileBuffer` below.
1402   ProfileBufferChunkManagerWithLocalLimit mProfileBufferChunkManager;
1403 
1404   // The buffer into which all samples are recorded.
1405   ProfileBuffer mProfileBuffer;
1406 
1407   // ProfiledThreadData objects for any threads that were profiled at any point
1408   // during this run of the profiler:
1409   //  - mLiveProfiledThreads contains all threads that are still registered, and
1410   //  - mDeadProfiledThreads contains all threads that have already been
1411   //    unregistered but for which there is still data in the profile buffer.
1412   Vector<LiveProfiledThreadData> mLiveProfiledThreads;
1413   Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads;
1414 
1415   // Info on all the dead pages.
1416   // Registered pages are being moved to this array after unregistration.
1417   // We are keeping them in case we need them in the profile data.
1418   // We are removing them when we ensure that we won't need them anymore.
1419   Vector<RefPtr<PageInformation>> mDeadProfiledPages;
1420 
1421   // The current sampler thread. This class is not responsible for destroying
1422   // the SamplerThread object; the Destroy() method returns it so the caller
1423   // can destroy it.
1424   SamplerThread* const mSamplerThread;
1425 
1426   // The interposer that records main thread I/O.
1427   RefPtr<ProfilerIOInterposeObserver> mInterposeObserver;
1428 
1429   // Is the profiler fully paused?
1430   bool mIsPaused;
1431 
1432   // Is the profiler periodic sampling paused?
1433   bool mIsSamplingPaused;
1434 
1435 #if defined(GP_OS_linux) || defined(GP_OS_freebsd)
1436   // Used to record whether the sampler was paused just before forking. False
1437   // at all times except just before/after forking.
1438   bool mWasSamplingPaused;
1439 #endif
1440 
1441   // Optional startup profile thread array from BaseProfiler.
1442   UniquePtr<char[]> mBaseProfileThreads;
1443   ProfileBufferBlockIndex mGeckoIndexWhenBaseProfileAdded;
1444 
1445   struct ExitProfile {
1446     nsCString mJSON;
1447     uint64_t mBufferPositionAtGatherTime;
1448   };
1449   Vector<ExitProfile> mExitProfiles;
1450 
1451 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
1452   Atomic<const BaseProfilerCount*> mMemoryCounter;
1453 #endif
1454 };
1455 
1456 ActivePS* ActivePS::sInstance = nullptr;
1457 uint32_t ActivePS::sNextGeneration = 0;
1458 
1459 #undef PS_GET
1460 #undef PS_GET_LOCKLESS
1461 #undef PS_GET_AND_SET
1462 
1463 // The mutex that guards accesses to CorePS and ActivePS.
1464 static PSMutex gPSMutex;
1465 
1466 static PSMutex gProfilerStateChangeMutex;
1467 
1468 struct IdentifiedProfilingStateChangeCallback {
1469   ProfilingStateSet mProfilingStateSet;
1470   ProfilingStateChangeCallback mProfilingStateChangeCallback;
1471   uintptr_t mUniqueIdentifier;
1472 
IdentifiedProfilingStateChangeCallbackIdentifiedProfilingStateChangeCallback1473   explicit IdentifiedProfilingStateChangeCallback(
1474       ProfilingStateSet aProfilingStateSet,
1475       ProfilingStateChangeCallback&& aProfilingStateChangeCallback,
1476       uintptr_t aUniqueIdentifier)
1477       : mProfilingStateSet(aProfilingStateSet),
1478         mProfilingStateChangeCallback(aProfilingStateChangeCallback),
1479         mUniqueIdentifier(aUniqueIdentifier) {}
1480 };
1481 using IdentifiedProfilingStateChangeCallbackUPtr =
1482     UniquePtr<IdentifiedProfilingStateChangeCallback>;
1483 
1484 static Vector<IdentifiedProfilingStateChangeCallbackUPtr>
1485     mIdentifiedProfilingStateChangeCallbacks;
1486 
profiler_add_state_change_callback(ProfilingStateSet aProfilingStateSet,ProfilingStateChangeCallback && aCallback,uintptr_t aUniqueIdentifier)1487 void profiler_add_state_change_callback(
1488     ProfilingStateSet aProfilingStateSet,
1489     ProfilingStateChangeCallback&& aCallback,
1490     uintptr_t aUniqueIdentifier /* = 0 */) {
1491   gPSMutex.AssertCurrentThreadDoesNotOwn();
1492   PSAutoLock lock(gProfilerStateChangeMutex);
1493 
1494 #ifdef DEBUG
1495   // Check if a non-zero id is not already used. Bug forgive it in non-DEBUG
1496   // builds; in the worst case they may get removed too early.
1497   if (aUniqueIdentifier != 0) {
1498     for (const IdentifiedProfilingStateChangeCallbackUPtr& idedCallback :
1499          mIdentifiedProfilingStateChangeCallbacks) {
1500       MOZ_ASSERT(idedCallback->mUniqueIdentifier != aUniqueIdentifier);
1501     }
1502   }
1503 #endif  // DEBUG
1504 
1505   if (aProfilingStateSet.contains(ProfilingState::AlreadyActive) &&
1506       profiler_is_active()) {
1507     aCallback(ProfilingState::AlreadyActive);
1508   }
1509 
1510   (void)mIdentifiedProfilingStateChangeCallbacks.append(
1511       MakeUnique<IdentifiedProfilingStateChangeCallback>(
1512           aProfilingStateSet, std::move(aCallback), aUniqueIdentifier));
1513 }
1514 
1515 // Remove the callback with the given identifier.
profiler_remove_state_change_callback(uintptr_t aUniqueIdentifier)1516 void profiler_remove_state_change_callback(uintptr_t aUniqueIdentifier) {
1517   MOZ_ASSERT(aUniqueIdentifier != 0);
1518   if (aUniqueIdentifier == 0) {
1519     // Forgive zero in non-DEBUG builds.
1520     return;
1521   }
1522 
1523   gPSMutex.AssertCurrentThreadDoesNotOwn();
1524   PSAutoLock lock(gProfilerStateChangeMutex);
1525 
1526   mIdentifiedProfilingStateChangeCallbacks.eraseIf(
1527       [aUniqueIdentifier](
1528           const IdentifiedProfilingStateChangeCallbackUPtr& aIdedCallback) {
1529         if (aIdedCallback->mUniqueIdentifier != aUniqueIdentifier) {
1530           return false;
1531         }
1532         if (aIdedCallback->mProfilingStateSet.contains(
1533                 ProfilingState::RemovingCallback)) {
1534           aIdedCallback->mProfilingStateChangeCallback(
1535               ProfilingState::RemovingCallback);
1536         }
1537         return true;
1538       });
1539 }
1540 
invoke_profiler_state_change_callbacks(ProfilingState aProfilingState)1541 static void invoke_profiler_state_change_callbacks(
1542     ProfilingState aProfilingState) {
1543   gPSMutex.AssertCurrentThreadDoesNotOwn();
1544   PSAutoLock lock(gProfilerStateChangeMutex);
1545 
1546   for (const IdentifiedProfilingStateChangeCallbackUPtr& idedCallback :
1547        mIdentifiedProfilingStateChangeCallbacks) {
1548     if (idedCallback->mProfilingStateSet.contains(aProfilingState)) {
1549       idedCallback->mProfilingStateChangeCallback(aProfilingState);
1550     }
1551   }
1552 }
1553 
1554 Atomic<uint32_t, MemoryOrdering::Relaxed> RacyFeatures::sActiveAndFeatures(0);
1555 
1556 // Each live thread has a RegisteredThread, and we store a reference to it in
1557 // TLS. This class encapsulates that TLS, and also handles the associated
1558 // profiling stack used by AutoProfilerLabel.
1559 class TLSRegisteredThread {
1560  public:
1561   // This should only be called once before any other access.
1562   // In this case it's called from `profiler_init()` on the main thread, before
1563   // the main thread registers itself.
Init()1564   static void Init() {
1565     MOZ_ASSERT(sState == State::Uninitialized, "Already initialized");
1566     AutoProfilerLabel::ProfilingStackOwnerTLS::Init();
1567     MOZ_ASSERT(
1568         AutoProfilerLabel::ProfilingStackOwnerTLS::sState !=
1569             AutoProfilerLabel::ProfilingStackOwnerTLS::State::Uninitialized,
1570         "Unexpected ProfilingStackOwnerTLS::sState after "
1571         "ProfilingStackOwnerTLS::Init()");
1572     sState =
1573         (AutoProfilerLabel::ProfilingStackOwnerTLS::sState ==
1574              AutoProfilerLabel::ProfilingStackOwnerTLS::State::Initialized &&
1575          sRegisteredThread.init())
1576             ? State::Initialized
1577             : State::Unavailable;
1578   }
1579 
IsTLSInited()1580   static bool IsTLSInited() {
1581     MOZ_ASSERT(sState != State::Uninitialized,
1582                "TLSRegisteredThread should only be accessed after Init()");
1583     return sState == State::Initialized;
1584   }
1585 
1586   // Get the entire RegisteredThread. Accesses are guarded by gPSMutex.
RegisteredThread(PSLockRef)1587   static class RegisteredThread* RegisteredThread(PSLockRef) {
1588     if (!IsTLSInited()) {
1589       return nullptr;
1590     }
1591     return sRegisteredThread.get();
1592   }
1593 
1594   // Get only the RacyRegisteredThread. Accesses are not guarded by gPSMutex.
RacyRegisteredThread()1595   static class RacyRegisteredThread* RacyRegisteredThread() {
1596     if (!IsTLSInited()) {
1597       return nullptr;
1598     }
1599     class RegisteredThread* registeredThread = sRegisteredThread.get();
1600     return registeredThread ? &registeredThread->RacyRegisteredThread()
1601                             : nullptr;
1602   }
1603 
1604   // Get only the ProfilingStack. Accesses are not guarded by gPSMutex.
1605   // RacyRegisteredThread() can also be used to get the ProfilingStack, but that
1606   // is marginally slower because it requires an extra pointer indirection.
Stack()1607   static ProfilingStack* Stack() {
1608     if (!IsTLSInited()) {
1609       return nullptr;
1610     }
1611     ProfilingStackOwner* profilingStackOwner =
1612         AutoProfilerLabel::ProfilingStackOwnerTLS::Get();
1613     if (!profilingStackOwner) {
1614       return nullptr;
1615     }
1616     return &profilingStackOwner->ProfilingStack();
1617   }
1618 
SetRegisteredThreadAndAutoProfilerLabelProfilingStack(PSLockRef,class RegisteredThread * aRegisteredThread)1619   static void SetRegisteredThreadAndAutoProfilerLabelProfilingStack(
1620       PSLockRef, class RegisteredThread* aRegisteredThread) {
1621     if (!IsTLSInited()) {
1622       return;
1623     }
1624     MOZ_RELEASE_ASSERT(
1625         aRegisteredThread,
1626         "Use ResetRegisteredThread() instead of SetRegisteredThread(nullptr)");
1627     sRegisteredThread.set(aRegisteredThread);
1628     ProfilingStackOwner& profilingStackOwner =
1629         aRegisteredThread->RacyRegisteredThread().ProfilingStackOwner();
1630     profilingStackOwner.AddRef();
1631     AutoProfilerLabel::ProfilingStackOwnerTLS::Set(&profilingStackOwner);
1632   }
1633 
1634   // Only reset the registered thread. The AutoProfilerLabel's ProfilingStack
1635   // is kept, because the thread may not have unregistered itself yet, so it may
1636   // still push/pop labels even after the profiler has shut down.
ResetRegisteredThread(PSLockRef)1637   static void ResetRegisteredThread(PSLockRef) {
1638     if (!IsTLSInited()) {
1639       return;
1640     }
1641     sRegisteredThread.set(nullptr);
1642   }
1643 
1644   // Reset the AutoProfilerLabels' ProfilingStack, because the thread is
1645   // unregistering itself.
ResetAutoProfilerLabelProfilingStack(PSLockRef)1646   static void ResetAutoProfilerLabelProfilingStack(PSLockRef) {
1647     if (!IsTLSInited()) {
1648       return;
1649     }
1650     MOZ_RELEASE_ASSERT(
1651         AutoProfilerLabel::ProfilingStackOwnerTLS::Get(),
1652         "ResetAutoProfilerLabelProfilingStack should only be called once");
1653     AutoProfilerLabel::ProfilingStackOwnerTLS::Get()->Release();
1654     AutoProfilerLabel::ProfilingStackOwnerTLS::Set(nullptr);
1655   }
1656 
1657  private:
1658   // Only written once from `profiler_init` calling
1659   // `TLSRegisteredThread::Init()`; all reads should only happen after `Init()`,
1660   // so there is no need to make it atomic.
1661   enum class State { Uninitialized = 0, Initialized, Unavailable };
1662   static State sState;
1663 
1664   // This is a non-owning reference to the RegisteredThread;
1665   // CorePS::mRegisteredThreads is the owning reference. On thread
1666   // deregistration, this reference is cleared and the RegisteredThread is
1667   // destroyed.
1668   static MOZ_THREAD_LOCAL(class RegisteredThread*) sRegisteredThread;
1669 };
1670 
1671 // Zero-initialized to State::Uninitialized.
1672 /* static */
1673 TLSRegisteredThread::State TLSRegisteredThread::sState;
1674 
1675 /* static */
1676 MOZ_THREAD_LOCAL(RegisteredThread*) TLSRegisteredThread::sRegisteredThread;
1677 
1678 // Only written once from `profiler_init` (through `TLSRegisteredThread::Init()`
1679 // and `AutoProfilerLabel::ProfilingStackOwnerTLS::Init()`); all reads should
1680 // only happen after `Init()`, so there is no need to make it atomic.
1681 // Zero-initialized to State::Uninitialized.
1682 /* static */
1683 AutoProfilerLabel::ProfilingStackOwnerTLS::State
1684     AutoProfilerLabel::ProfilingStackOwnerTLS::sState;
1685 
1686 // Although you can access a thread's ProfilingStack via
1687 // TLSRegisteredThread::sRegisteredThread, we also have a second TLS pointer
1688 // directly to the ProfilingStack. Here's why.
1689 //
1690 // - We need to be able to push to and pop from the ProfilingStack in
1691 //   AutoProfilerLabel.
1692 //
1693 // - The class functions are hot and must be defined in GeckoProfiler.h so they
1694 //   can be inlined.
1695 //
1696 // - We don't want to expose TLSRegisteredThread (and RegisteredThread) in
1697 //   GeckoProfiler.h.
1698 //
1699 // This second pointer isn't ideal, but does provide a way to satisfy those
1700 // constraints. TLSRegisteredThread is responsible for updating it.
1701 //
1702 // The (Racy)RegisteredThread and AutoProfilerLabel::ProfilingStackOwnerTLS
1703 // co-own the thread's ProfilingStack, so whichever is reset second, is
1704 // responsible for destroying the ProfilingStack; Because MOZ_THREAD_LOCAL
1705 // doesn't support RefPtr, AddRef&Release are done explicitly in
1706 // TLSRegisteredThread.
1707 /* static */
1708 MOZ_THREAD_LOCAL(ProfilingStackOwner*)
1709 AutoProfilerLabel::ProfilingStackOwnerTLS::sProfilingStackOwnerTLS;
1710 
1711 /* static */
Init()1712 void AutoProfilerLabel::ProfilingStackOwnerTLS::Init() {
1713   MOZ_ASSERT(sState == State::Uninitialized, "Already initialized");
1714   sState =
1715       sProfilingStackOwnerTLS.init() ? State::Initialized : State::Unavailable;
1716 }
1717 
DumpStackAndCrash() const1718 void ProfilingStackOwner::DumpStackAndCrash() const {
1719   fprintf(stderr,
1720           "ProfilingStackOwner::DumpStackAndCrash() thread id: %d, size: %u\n",
1721           profiler_current_thread_id(), unsigned(mProfilingStack.stackSize()));
1722   js::ProfilingStackFrame* allFrames = mProfilingStack.frames;
1723   for (uint32_t i = 0; i < mProfilingStack.stackSize(); i++) {
1724     js::ProfilingStackFrame& frame = allFrames[i];
1725     if (frame.isLabelFrame()) {
1726       fprintf(stderr, "%u: label frame, sp=%p, label='%s' (%s)\n", unsigned(i),
1727               frame.stackAddress(), frame.label(),
1728               frame.dynamicString() ? frame.dynamicString() : "-");
1729     } else {
1730       fprintf(stderr, "%u: non-label frame\n", unsigned(i));
1731     }
1732   }
1733 
1734   MOZ_CRASH("Non-empty stack!");
1735 }
1736 
1737 // The name of the main thread.
1738 static const char* const kMainThreadName = "GeckoMain";
1739 
1740 ////////////////////////////////////////////////////////////////////////
1741 // BEGIN sampling/unwinding code
1742 
1743 // The registers used for stack unwinding and a few other sampling purposes.
1744 // The ctor does nothing; users are responsible for filling in the fields.
1745 class Registers {
1746  public:
Registers()1747   Registers() : mPC{nullptr}, mSP{nullptr}, mFP{nullptr}, mLR{nullptr} {}
1748 
1749 #if defined(HAVE_NATIVE_UNWIND)
1750   // Fills in mPC, mSP, mFP, mLR, and mContext for a synchronous sample.
1751   void SyncPopulate();
1752 #endif
1753 
Clear()1754   void Clear() { memset(this, 0, sizeof(*this)); }
1755 
1756   // These fields are filled in by
1757   // Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace
1758   // samples, and by SyncPopulate() for synchronous samples.
1759   Address mPC;  // Instruction pointer.
1760   Address mSP;  // Stack pointer.
1761   Address mFP;  // Frame pointer.
1762   Address mLR;  // ARM link register.
1763 #if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
1764   // This contains all the registers, which means it duplicates the four fields
1765   // above. This is ok.
1766   ucontext_t* mContext;  // The context from the signal handler.
1767 #endif
1768 };
1769 
1770 // Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time
1771 // looping on corrupted stacks.
1772 static const size_t MAX_NATIVE_FRAMES = 1024;
1773 
1774 struct NativeStack {
1775   void* mPCs[MAX_NATIVE_FRAMES];
1776   void* mSPs[MAX_NATIVE_FRAMES];
1777   size_t mCount;  // Number of frames filled.
1778 
NativeStackNativeStack1779   NativeStack() : mPCs(), mSPs(), mCount(0) {}
1780 };
1781 
1782 Atomic<bool> WALKING_JS_STACK(false);
1783 
1784 struct AutoWalkJSStack {
1785   bool walkAllowed;
1786 
AutoWalkJSStackAutoWalkJSStack1787   AutoWalkJSStack() : walkAllowed(false) {
1788     walkAllowed = WALKING_JS_STACK.compareExchange(false, true);
1789   }
1790 
~AutoWalkJSStackAutoWalkJSStack1791   ~AutoWalkJSStack() {
1792     if (walkAllowed) {
1793       WALKING_JS_STACK = false;
1794     }
1795   }
1796 };
1797 
1798 class StackWalkControl {
1799  public:
1800   struct ResumePoint {
1801     // If lost, the stack walker should resume at these values.
1802     void* resumeSp;  // If null, stop the walker here, don't resume again.
1803     void* resumeBp;
1804     void* resumePc;
1805   };
1806 
1807 #if ((defined(USE_MOZ_STACK_WALK) || defined(USE_FRAME_POINTER_STACK_WALK)) && \
1808      defined(GP_ARCH_amd64))
1809  public:
1810   static constexpr bool scIsSupported = true;
1811 
Clear()1812   void Clear() { mResumePointCount = 0; }
1813 
ResumePointCount() const1814   size_t ResumePointCount() const { return mResumePointCount; }
1815 
MaxResumePointCount()1816   static constexpr size_t MaxResumePointCount() {
1817     return scMaxResumePointCount;
1818   }
1819 
1820   // Add a resume point. Note that adding anything past MaxResumePointCount()
1821   // would silently fail. In practice this means that stack walking may still
1822   // lose native frames.
AddResumePoint(ResumePoint && aResumePoint)1823   void AddResumePoint(ResumePoint&& aResumePoint) {
1824     // If SP is null, we expect BP and PC to also be null.
1825     MOZ_ASSERT_IF(!aResumePoint.resumeSp, !aResumePoint.resumeBp);
1826     MOZ_ASSERT_IF(!aResumePoint.resumeSp, !aResumePoint.resumePc);
1827 
1828     // If BP and/or PC are not null, SP must not be null. (But we allow BP/PC to
1829     // be null even if SP is not null.)
1830     MOZ_ASSERT_IF(aResumePoint.resumeBp, aResumePoint.resumeSp);
1831     MOZ_ASSERT_IF(aResumePoint.resumePc, aResumePoint.resumeSp);
1832 
1833     if (mResumePointCount < scMaxResumePointCount) {
1834       mResumePoint[mResumePointCount] = std::move(aResumePoint);
1835       ++mResumePointCount;
1836     }
1837   }
1838 
1839   // Only allow non-modifying range-for loops.
begin() const1840   const ResumePoint* begin() const { return &mResumePoint[0]; }
end() const1841   const ResumePoint* end() const { return &mResumePoint[mResumePointCount]; }
1842 
1843   // Find the next resume point that would be a caller of the function with the
1844   // given SP; i.e., the resume point with the closest resumeSp > aSp.
GetResumePointCallingSp(void * aSp) const1845   const ResumePoint* GetResumePointCallingSp(void* aSp) const {
1846     const ResumePoint* callingResumePoint = nullptr;
1847     for (const ResumePoint& resumePoint : *this) {
1848       if (resumePoint.resumeSp &&        // This is a potential resume point.
1849           resumePoint.resumeSp > aSp &&  // It is a caller of the given SP.
1850           (!callingResumePoint ||        // This is the first candidate.
1851            resumePoint.resumeSp < callingResumePoint->resumeSp)  // Or better.
1852       ) {
1853         callingResumePoint = &resumePoint;
1854       }
1855     }
1856     return callingResumePoint;
1857   }
1858 
1859  private:
1860   size_t mResumePointCount = 0;
1861   static constexpr size_t scMaxResumePointCount = 32;
1862   ResumePoint mResumePoint[scMaxResumePointCount];
1863 
1864 #else
1865  public:
1866   static constexpr bool scIsSupported = false;
1867   // Discarded constexpr-if statements are still checked during compilation,
1868   // these declarations are necessary for that, even if not actually used.
1869   void Clear();
1870   size_t ResumePointCount();
1871   static constexpr size_t MaxResumePointCount();
1872   void AddResumePoint(ResumePoint&& aResumePoint);
1873   const ResumePoint* begin() const;
1874   const ResumePoint* end() const;
1875   const ResumePoint* GetResumePointCallingSp(void* aSp) const;
1876 #endif
1877 };
1878 
1879 // Make a copy of the JS stack into a JSFrame array, and return the number of
1880 // copied frames.
1881 // This copy is necessary since, like the native stack, the JS stack is iterated
1882 // youngest-to-oldest and we need to iterate oldest-to-youngest in MergeStacks.
ExtractJsFrames(bool aIsSynchronous,const RegisteredThread & aRegisteredThread,const Registers & aRegs,ProfilerStackCollector & aCollector,JsFrameBuffer aJsFrames,StackWalkControl * aStackWalkControlIfSupported)1883 static uint32_t ExtractJsFrames(
1884     bool aIsSynchronous, const RegisteredThread& aRegisteredThread,
1885     const Registers& aRegs, ProfilerStackCollector& aCollector,
1886     JsFrameBuffer aJsFrames, StackWalkControl* aStackWalkControlIfSupported) {
1887   uint32_t jsFramesCount = 0;
1888 
1889   // Only walk jit stack if profiling frame iterator is turned on.
1890   JSContext* context = aRegisteredThread.GetJSContext();
1891   if (context && JS::IsProfilingEnabledForContext(context)) {
1892     AutoWalkJSStack autoWalkJSStack;
1893 
1894     if (autoWalkJSStack.walkAllowed) {
1895       JS::ProfilingFrameIterator::RegisterState registerState;
1896       registerState.pc = aRegs.mPC;
1897       registerState.sp = aRegs.mSP;
1898       registerState.lr = aRegs.mLR;
1899       registerState.fp = aRegs.mFP;
1900 
1901       // Non-periodic sampling passes Nothing() as the buffer write position to
1902       // ProfilingFrameIterator to avoid incorrectly resetting the buffer
1903       // position of sampled JIT frames inside the JS engine.
1904       Maybe<uint64_t> samplePosInBuffer;
1905       if (!aIsSynchronous) {
1906         // aCollector.SamplePositionInBuffer() will return Nothing() when
1907         // profiler_suspend_and_sample_thread is called from the background hang
1908         // reporter.
1909         samplePosInBuffer = aCollector.SamplePositionInBuffer();
1910       }
1911 
1912       for (JS::ProfilingFrameIterator jsIter(context, registerState,
1913                                              samplePosInBuffer);
1914            !jsIter.done(); ++jsIter) {
1915         if (aIsSynchronous || jsIter.isWasm()) {
1916           jsFramesCount +=
1917               jsIter.extractStack(aJsFrames, jsFramesCount, MAX_JS_FRAMES);
1918           if (jsFramesCount == MAX_JS_FRAMES) {
1919             break;
1920           }
1921         } else {
1922           Maybe<JS::ProfilingFrameIterator::Frame> frame =
1923               jsIter.getPhysicalFrameWithoutLabel();
1924           if (frame.isSome()) {
1925             aJsFrames[jsFramesCount++] = std::move(frame).ref();
1926             if (jsFramesCount == MAX_JS_FRAMES) {
1927               break;
1928             }
1929           }
1930         }
1931 
1932         if constexpr (StackWalkControl::scIsSupported) {
1933           if (aStackWalkControlIfSupported) {
1934             jsIter.getCppEntryRegisters().apply(
1935                 [&](const JS::ProfilingFrameIterator::RegisterState&
1936                         aCppEntry) {
1937                   StackWalkControl::ResumePoint resumePoint;
1938                   resumePoint.resumeSp = aCppEntry.sp;
1939                   resumePoint.resumeBp = aCppEntry.fp;
1940                   resumePoint.resumePc = aCppEntry.pc;
1941                   aStackWalkControlIfSupported->AddResumePoint(
1942                       std::move(resumePoint));
1943                 });
1944           }
1945         } else {
1946           MOZ_ASSERT(!aStackWalkControlIfSupported,
1947                      "aStackWalkControlIfSupported should be null when "
1948                      "!StackWalkControl::scIsSupported");
1949           (void)aStackWalkControlIfSupported;
1950         }
1951       }
1952     }
1953   }
1954 
1955   return jsFramesCount;
1956 }
1957 
1958 // Merges the profiling stack, native stack, and JS stack, outputting the
1959 // details to aCollector.
MergeStacks(uint32_t aFeatures,bool aIsSynchronous,const RegisteredThread & aRegisteredThread,const Registers & aRegs,const NativeStack & aNativeStack,ProfilerStackCollector & aCollector,JsFrameBuffer aJsFrames,uint32_t aJsFramesCount)1960 static void MergeStacks(uint32_t aFeatures, bool aIsSynchronous,
1961                         const RegisteredThread& aRegisteredThread,
1962                         const Registers& aRegs, const NativeStack& aNativeStack,
1963                         ProfilerStackCollector& aCollector,
1964                         JsFrameBuffer aJsFrames, uint32_t aJsFramesCount) {
1965   // WARNING: this function runs within the profiler's "critical section".
1966   // WARNING: this function might be called while the profiler is inactive, and
1967   //          cannot rely on ActivePS.
1968 
1969   const ProfilingStack& profilingStack =
1970       aRegisteredThread.RacyRegisteredThread().ProfilingStack();
1971   const js::ProfilingStackFrame* profilingStackFrames = profilingStack.frames;
1972   uint32_t profilingStackFrameCount = profilingStack.stackSize();
1973 
1974   // While the profiling stack array is ordered oldest-to-youngest, the JS and
1975   // native arrays are ordered youngest-to-oldest. We must add frames to aInfo
1976   // oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS
1977   // and native arrays backwards. Note: this means the terminating condition
1978   // jsIndex and nativeIndex is being < 0.
1979   uint32_t profilingStackIndex = 0;
1980   int32_t jsIndex = aJsFramesCount - 1;
1981   int32_t nativeIndex = aNativeStack.mCount - 1;
1982 
1983   uint8_t* lastLabelFrameStackAddr = nullptr;
1984   uint8_t* jitEndStackAddr = nullptr;
1985 
1986   // Iterate as long as there is at least one frame remaining.
1987   while (profilingStackIndex != profilingStackFrameCount || jsIndex >= 0 ||
1988          nativeIndex >= 0) {
1989     // There are 1 to 3 frames available. Find and add the oldest.
1990     uint8_t* profilingStackAddr = nullptr;
1991     uint8_t* jsStackAddr = nullptr;
1992     uint8_t* nativeStackAddr = nullptr;
1993     uint8_t* jsActivationAddr = nullptr;
1994 
1995     if (profilingStackIndex != profilingStackFrameCount) {
1996       const js::ProfilingStackFrame& profilingStackFrame =
1997           profilingStackFrames[profilingStackIndex];
1998 
1999       if (profilingStackFrame.isLabelFrame() ||
2000           profilingStackFrame.isSpMarkerFrame()) {
2001         lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress();
2002       }
2003 
2004       // Skip any JS_OSR frames. Such frames are used when the JS interpreter
2005       // enters a jit frame on a loop edge (via on-stack-replacement, or OSR).
2006       // To avoid both the profiling stack frame and jit frame being recorded
2007       // (and showing up twice), the interpreter marks the interpreter
2008       // profiling stack frame as JS_OSR to ensure that it doesn't get counted.
2009       if (profilingStackFrame.isOSRFrame()) {
2010         profilingStackIndex++;
2011         continue;
2012       }
2013 
2014       MOZ_ASSERT(lastLabelFrameStackAddr);
2015       profilingStackAddr = lastLabelFrameStackAddr;
2016     }
2017 
2018     if (jsIndex >= 0) {
2019       jsStackAddr = (uint8_t*)aJsFrames[jsIndex].stackAddress;
2020       jsActivationAddr = (uint8_t*)aJsFrames[jsIndex].activation;
2021     }
2022 
2023     if (nativeIndex >= 0) {
2024       nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex];
2025     }
2026 
2027     // If there's a native stack frame which has the same SP as a profiling
2028     // stack frame, pretend we didn't see the native stack frame.  Ditto for a
2029     // native stack frame which has the same SP as a JS stack frame.  In effect
2030     // this means profiling stack frames or JS frames trump conflicting native
2031     // frames.
2032     if (nativeStackAddr && (profilingStackAddr == nativeStackAddr ||
2033                             jsStackAddr == nativeStackAddr)) {
2034       nativeStackAddr = nullptr;
2035       nativeIndex--;
2036       MOZ_ASSERT(profilingStackAddr || jsStackAddr);
2037     }
2038 
2039     // Sanity checks.
2040     MOZ_ASSERT_IF(profilingStackAddr,
2041                   profilingStackAddr != jsStackAddr &&
2042                       profilingStackAddr != nativeStackAddr);
2043     MOZ_ASSERT_IF(jsStackAddr, jsStackAddr != profilingStackAddr &&
2044                                    jsStackAddr != nativeStackAddr);
2045     MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != profilingStackAddr &&
2046                                        nativeStackAddr != jsStackAddr);
2047 
2048     // Check to see if profiling stack frame is top-most.
2049     if (profilingStackAddr > jsStackAddr &&
2050         profilingStackAddr > nativeStackAddr) {
2051       MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount);
2052       const js::ProfilingStackFrame& profilingStackFrame =
2053           profilingStackFrames[profilingStackIndex];
2054 
2055       // Sp marker frames are just annotations and should not be recorded in
2056       // the profile.
2057       if (!profilingStackFrame.isSpMarkerFrame()) {
2058         // The JIT only allows the top-most frame to have a nullptr pc.
2059         MOZ_ASSERT_IF(
2060             profilingStackFrame.isJsFrame() && profilingStackFrame.script() &&
2061                 !profilingStackFrame.pc(),
2062             &profilingStackFrame ==
2063                 &profilingStack.frames[profilingStack.stackSize() - 1]);
2064         if (aIsSynchronous && profilingStackFrame.categoryPair() ==
2065                                   JS::ProfilingCategoryPair::PROFILER) {
2066           // For stacks captured synchronously (ie. marker stacks), stop
2067           // walking the stack as soon as we enter the profiler category,
2068           // to avoid showing profiler internal code in marker stacks.
2069           return;
2070         }
2071         aCollector.CollectProfilingStackFrame(profilingStackFrame);
2072       }
2073       profilingStackIndex++;
2074       continue;
2075     }
2076 
2077     // Check to see if JS jit stack frame is top-most
2078     if (jsStackAddr > nativeStackAddr) {
2079       MOZ_ASSERT(jsIndex >= 0);
2080       const JS::ProfilingFrameIterator::Frame& jsFrame = aJsFrames[jsIndex];
2081       jitEndStackAddr = (uint8_t*)jsFrame.endStackAddress;
2082       // Stringifying non-wasm JIT frames is delayed until streaming time. To
2083       // re-lookup the entry in the JitcodeGlobalTable, we need to store the
2084       // JIT code address (OptInfoAddr) in the circular buffer.
2085       //
2086       // Note that we cannot do this when we are sychronously sampling the
2087       // current thread; that is, when called from profiler_get_backtrace. The
2088       // captured backtrace is usually externally stored for an indeterminate
2089       // amount of time, such as in nsRefreshDriver. Problematically, the
2090       // stored backtrace may be alive across a GC during which the profiler
2091       // itself is disabled. In that case, the JS engine is free to discard its
2092       // JIT code. This means that if we inserted such OptInfoAddr entries into
2093       // the buffer, nsRefreshDriver would now be holding on to a backtrace
2094       // with stale JIT code return addresses.
2095       if (aIsSynchronous ||
2096           jsFrame.kind == JS::ProfilingFrameIterator::Frame_Wasm) {
2097         aCollector.CollectWasmFrame(jsFrame.label);
2098       } else if (jsFrame.kind ==
2099                  JS::ProfilingFrameIterator::Frame_BaselineInterpreter) {
2100         // Materialize a ProfilingStackFrame similar to the C++ Interpreter. We
2101         // also set the IS_BLINTERP_FRAME flag to differentiate though.
2102         JSScript* script = jsFrame.interpreterScript;
2103         jsbytecode* pc = jsFrame.interpreterPC();
2104         js::ProfilingStackFrame stackFrame;
2105         constexpr uint32_t ExtraFlags =
2106             uint32_t(js::ProfilingStackFrame::Flags::IS_BLINTERP_FRAME);
2107         stackFrame.initJsFrame<JS::ProfilingCategoryPair::JS_BaselineInterpret,
2108                                ExtraFlags>("", jsFrame.label, script, pc,
2109                                            jsFrame.realmID);
2110         aCollector.CollectProfilingStackFrame(stackFrame);
2111       } else {
2112         MOZ_ASSERT(jsFrame.kind == JS::ProfilingFrameIterator::Frame_Ion ||
2113                    jsFrame.kind == JS::ProfilingFrameIterator::Frame_Baseline);
2114         aCollector.CollectJitReturnAddr(jsFrame.returnAddress());
2115       }
2116 
2117       jsIndex--;
2118       continue;
2119     }
2120 
2121     // If we reach here, there must be a native stack frame and it must be the
2122     // greatest frame.
2123     if (nativeStackAddr &&
2124         // If the latest JS frame was JIT, this could be the native frame that
2125         // corresponds to it. In that case, skip the native frame, because
2126         // there's no need for the same frame to be present twice in the stack.
2127         // The JS frame can be considered the symbolicated version of the native
2128         // frame.
2129         (!jitEndStackAddr || nativeStackAddr < jitEndStackAddr) &&
2130         // This might still be a JIT operation, check to make sure that is not
2131         // in range of the NEXT JavaScript's stacks' activation address.
2132         (!jsActivationAddr || nativeStackAddr > jsActivationAddr)) {
2133       MOZ_ASSERT(nativeIndex >= 0);
2134       void* addr = (void*)aNativeStack.mPCs[nativeIndex];
2135       aCollector.CollectNativeLeafAddr(addr);
2136     }
2137     if (nativeIndex >= 0) {
2138       nativeIndex--;
2139     }
2140   }
2141 
2142   // Update the JS context with the current profile sample buffer generation.
2143   //
2144   // Only do this for periodic samples. We don't want to do this for
2145   // synchronous samples, and we also don't want to do it for calls to
2146   // profiler_suspend_and_sample_thread() from the background hang reporter -
2147   // in that case, aCollector.BufferRangeStart() will return Nothing().
2148   if (!aIsSynchronous) {
2149     aCollector.BufferRangeStart().apply(
2150         [&aRegisteredThread](uint64_t aBufferRangeStart) {
2151           JSContext* context = aRegisteredThread.GetJSContext();
2152           if (context) {
2153             JS::SetJSContextProfilerSampleBufferRangeStart(context,
2154                                                            aBufferRangeStart);
2155           }
2156         });
2157   }
2158 }
2159 
2160 #if defined(GP_OS_windows) && defined(USE_MOZ_STACK_WALK)
2161 static HANDLE GetThreadHandle(PlatformData* aData);
2162 #endif
2163 
2164 #if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
StackWalkCallback(uint32_t aFrameNumber,void * aPC,void * aSP,void * aClosure)2165 static void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP,
2166                               void* aClosure) {
2167   NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
2168   MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES);
2169   nativeStack->mSPs[nativeStack->mCount] = aSP;
2170   nativeStack->mPCs[nativeStack->mCount] = aPC;
2171   nativeStack->mCount++;
2172 }
2173 #endif
2174 
2175 #if defined(USE_FRAME_POINTER_STACK_WALK)
DoFramePointerBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,Registers aRegs,NativeStack & aNativeStack,StackWalkControl * aStackWalkControlIfSupported)2176 static void DoFramePointerBacktrace(
2177     PSLockRef aLock, const RegisteredThread& aRegisteredThread, Registers aRegs,
2178     NativeStack& aNativeStack, StackWalkControl* aStackWalkControlIfSupported) {
2179   // WARNING: this function runs within the profiler's "critical section".
2180   // WARNING: this function might be called while the profiler is inactive, and
2181   //          cannot rely on ActivePS.
2182 
2183   // Start with the current function. We use 0 as the frame number here because
2184   // the FramePointerStackWalk() call below will use 1..N. This is a bit weird
2185   // but it doesn't matter because StackWalkCallback() doesn't use the frame
2186   // number argument.
2187   StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
2188 
2189   const void* const stackEnd = aRegisteredThread.StackTop();
2190 
2191   // This is to check forward-progress after using a resume point.
2192   void* previousResumeSp = nullptr;
2193 
2194   for (;;) {
2195     if (!(aRegs.mSP && aRegs.mSP <= aRegs.mFP && aRegs.mFP <= stackEnd)) {
2196       break;
2197     }
2198     FramePointerStackWalk(StackWalkCallback,
2199                           uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount),
2200                           &aNativeStack, reinterpret_cast<void**>(aRegs.mFP),
2201                           const_cast<void*>(stackEnd));
2202 
2203     if constexpr (!StackWalkControl::scIsSupported) {
2204       break;
2205     } else {
2206       if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
2207         // No room to add more frames.
2208         break;
2209       }
2210       if (!aStackWalkControlIfSupported ||
2211           aStackWalkControlIfSupported->ResumePointCount() == 0) {
2212         // No resume information.
2213         break;
2214       }
2215       void* lastSP = aNativeStack.mSPs[aNativeStack.mCount - 1];
2216       if (previousResumeSp &&
2217           ((uintptr_t)lastSP <= (uintptr_t)previousResumeSp)) {
2218         // No progress after the previous resume point.
2219         break;
2220       }
2221       const StackWalkControl::ResumePoint* resumePoint =
2222           aStackWalkControlIfSupported->GetResumePointCallingSp(lastSP);
2223       if (!resumePoint) {
2224         break;
2225       }
2226       void* sp = resumePoint->resumeSp;
2227       if (!sp) {
2228         // Null SP in a resume point means we stop here.
2229         break;
2230       }
2231       void* pc = resumePoint->resumePc;
2232       StackWalkCallback(/* frameNum */ aNativeStack.mCount, pc, sp,
2233                         &aNativeStack);
2234       ++aNativeStack.mCount;
2235       if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
2236         break;
2237       }
2238       // Prepare context to resume stack walking.
2239       aRegs.mPC = (Address)pc;
2240       aRegs.mSP = (Address)sp;
2241       aRegs.mFP = (Address)resumePoint->resumeBp;
2242 
2243       previousResumeSp = sp;
2244     }
2245   }
2246 }
2247 #endif
2248 
2249 #if defined(USE_MOZ_STACK_WALK)
DoMozStackWalkBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack,StackWalkControl * aStackWalkControlIfSupported)2250 static void DoMozStackWalkBacktrace(
2251     PSLockRef aLock, const RegisteredThread& aRegisteredThread,
2252     const Registers& aRegs, NativeStack& aNativeStack,
2253     StackWalkControl* aStackWalkControlIfSupported) {
2254   // WARNING: this function runs within the profiler's "critical section".
2255   // WARNING: this function might be called while the profiler is inactive, and
2256   //          cannot rely on ActivePS.
2257 
2258   // Start with the current function. We use 0 as the frame number here because
2259   // the MozStackWalkThread() call below will use 1..N. This is a bit weird but
2260   // it doesn't matter because StackWalkCallback() doesn't use the frame number
2261   // argument.
2262   StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
2263 
2264   HANDLE thread = GetThreadHandle(aRegisteredThread.GetPlatformData());
2265   MOZ_ASSERT(thread);
2266 
2267   CONTEXT context_buf;
2268   CONTEXT* context = nullptr;
2269   if constexpr (StackWalkControl::scIsSupported) {
2270     context = &context_buf;
2271     memset(&context_buf, 0, sizeof(CONTEXT));
2272     context_buf.ContextFlags = CONTEXT_FULL;
2273 #  if defined(_M_AMD64)
2274     context_buf.Rsp = (DWORD64)aRegs.mSP;
2275     context_buf.Rbp = (DWORD64)aRegs.mFP;
2276     context_buf.Rip = (DWORD64)aRegs.mPC;
2277 #  else
2278     static_assert(!StackWalkControl::scIsSupported,
2279                   "Mismatched support between StackWalkControl and "
2280                   "DoMozStackWalkBacktrace");
2281 #  endif
2282   } else {
2283     context = nullptr;
2284   }
2285 
2286   // This is to check forward-progress after using a resume point.
2287   void* previousResumeSp = nullptr;
2288 
2289   for (;;) {
2290     MozStackWalkThread(StackWalkCallback,
2291                        uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount),
2292                        &aNativeStack, thread, context);
2293 
2294     if constexpr (!StackWalkControl::scIsSupported) {
2295       break;
2296     } else {
2297       if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
2298         // No room to add more frames.
2299         break;
2300       }
2301       if (!aStackWalkControlIfSupported ||
2302           aStackWalkControlIfSupported->ResumePointCount() == 0) {
2303         // No resume information.
2304         break;
2305       }
2306       void* lastSP = aNativeStack.mSPs[aNativeStack.mCount - 1];
2307       if (previousResumeSp &&
2308           ((uintptr_t)lastSP <= (uintptr_t)previousResumeSp)) {
2309         // No progress after the previous resume point.
2310         break;
2311       }
2312       const StackWalkControl::ResumePoint* resumePoint =
2313           aStackWalkControlIfSupported->GetResumePointCallingSp(lastSP);
2314       if (!resumePoint) {
2315         break;
2316       }
2317       void* sp = resumePoint->resumeSp;
2318       if (!sp) {
2319         // Null SP in a resume point means we stop here.
2320         break;
2321       }
2322       void* pc = resumePoint->resumePc;
2323       StackWalkCallback(/* frameNum */ aNativeStack.mCount, pc, sp,
2324                         &aNativeStack);
2325       ++aNativeStack.mCount;
2326       if (aNativeStack.mCount >= MAX_NATIVE_FRAMES) {
2327         break;
2328       }
2329       // Prepare context to resume stack walking.
2330       memset(&context_buf, 0, sizeof(CONTEXT));
2331       context_buf.ContextFlags = CONTEXT_FULL;
2332 #  if defined(_M_AMD64)
2333       context_buf.Rsp = (DWORD64)sp;
2334       context_buf.Rbp = (DWORD64)resumePoint->resumeBp;
2335       context_buf.Rip = (DWORD64)pc;
2336 #  else
2337       static_assert(!StackWalkControl::scIsSupported,
2338                     "Mismatched support between StackWalkControl and "
2339                     "DoMozStackWalkBacktrace");
2340 #  endif
2341       previousResumeSp = sp;
2342     }
2343   }
2344 }
2345 #endif
2346 
2347 #ifdef USE_EHABI_STACKWALK
DoEHABIBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack,StackWalkControl * aStackWalkControlIfSupported)2348 static void DoEHABIBacktrace(PSLockRef aLock,
2349                              const RegisteredThread& aRegisteredThread,
2350                              const Registers& aRegs, NativeStack& aNativeStack,
2351                              StackWalkControl* aStackWalkControlIfSupported) {
2352   // WARNING: this function runs within the profiler's "critical section".
2353   // WARNING: this function might be called while the profiler is inactive, and
2354   //          cannot rely on ActivePS.
2355 
2356   aNativeStack.mCount =
2357       EHABIStackWalk(aRegs.mContext->uc_mcontext,
2358                      const_cast<void*>(aRegisteredThread.StackTop()),
2359                      aNativeStack.mSPs, aNativeStack.mPCs, MAX_NATIVE_FRAMES);
2360   (void)aStackWalkControlIfSupported;  // TODO: Implement.
2361 }
2362 #endif
2363 
2364 #ifdef USE_LUL_STACKWALK
2365 
2366 // See the comment at the callsite for why this function is necessary.
2367 #  if defined(MOZ_HAVE_ASAN_BLACKLIST)
ASAN_memcpy(void * aDst,const void * aSrc,size_t aLen)2368 MOZ_ASAN_BLACKLIST static void ASAN_memcpy(void* aDst, const void* aSrc,
2369                                            size_t aLen) {
2370   // The obvious thing to do here is call memcpy(). However, although
2371   // ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the
2372   // false positive still manifests! So we must implement memcpy() ourselves
2373   // within this function.
2374   char* dst = static_cast<char*>(aDst);
2375   const char* src = static_cast<const char*>(aSrc);
2376 
2377   for (size_t i = 0; i < aLen; i++) {
2378     dst[i] = src[i];
2379   }
2380 }
2381 #  endif
2382 
DoLULBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack,StackWalkControl * aStackWalkControlIfSupported)2383 static void DoLULBacktrace(PSLockRef aLock,
2384                            const RegisteredThread& aRegisteredThread,
2385                            const Registers& aRegs, NativeStack& aNativeStack,
2386                            StackWalkControl* aStackWalkControlIfSupported) {
2387   // WARNING: this function runs within the profiler's "critical section".
2388   // WARNING: this function might be called while the profiler is inactive, and
2389   //          cannot rely on ActivePS.
2390 
2391   (void)aStackWalkControlIfSupported;  // TODO: Implement.
2392 
2393   const mcontext_t* mc = &aRegs.mContext->uc_mcontext;
2394 
2395   lul::UnwindRegs startRegs;
2396   memset(&startRegs, 0, sizeof(startRegs));
2397 
2398 #  if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
2399   startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]);
2400   startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]);
2401   startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
2402 #  elif defined(GP_PLAT_amd64_freebsd)
2403   startRegs.xip = lul::TaggedUWord(mc->mc_rip);
2404   startRegs.xsp = lul::TaggedUWord(mc->mc_rsp);
2405   startRegs.xbp = lul::TaggedUWord(mc->mc_rbp);
2406 #  elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
2407   startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
2408   startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
2409   startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
2410   startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
2411   startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
2412   startRegs.r7 = lul::TaggedUWord(mc->arm_r7);
2413 #  elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
2414   startRegs.pc = lul::TaggedUWord(mc->pc);
2415   startRegs.x29 = lul::TaggedUWord(mc->regs[29]);
2416   startRegs.x30 = lul::TaggedUWord(mc->regs[30]);
2417   startRegs.sp = lul::TaggedUWord(mc->sp);
2418 #  elif defined(GP_PLAT_arm64_freebsd)
2419   startRegs.pc = lul::TaggedUWord(mc->mc_gpregs.gp_elr);
2420   startRegs.x29 = lul::TaggedUWord(mc->mc_gpregs.gp_x[29]);
2421   startRegs.x30 = lul::TaggedUWord(mc->mc_gpregs.gp_lr);
2422   startRegs.sp = lul::TaggedUWord(mc->mc_gpregs.gp_sp);
2423 #  elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
2424   startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
2425   startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
2426   startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
2427 #  elif defined(GP_PLAT_mips64_linux)
2428   startRegs.pc = lul::TaggedUWord(mc->pc);
2429   startRegs.sp = lul::TaggedUWord(mc->gregs[29]);
2430   startRegs.fp = lul::TaggedUWord(mc->gregs[30]);
2431 #  else
2432 #    error "Unknown plat"
2433 #  endif
2434 
2435   // Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the
2436   // stack's registered top point.  Do some basic sanity checks too.  This
2437   // assumes that the TaggedUWord holding the stack pointer value is valid, but
2438   // it should be, since it was constructed that way in the code just above.
2439 
2440   // We could construct |stackImg| so that LUL reads directly from the stack in
2441   // question, rather than from a copy of it.  That would reduce overhead and
2442   // space use a bit.  However, it gives a problem with dynamic analysis tools
2443   // (ASan, TSan, Valgrind) which is that such tools will report invalid or
2444   // racing memory accesses, and such accesses will be reported deep inside LUL.
2445   // By taking a copy here, we can either sanitise the copy (for Valgrind) or
2446   // copy it using an unchecked memcpy (for ASan, TSan).  That way we don't have
2447   // to try and suppress errors inside LUL.
2448   //
2449   // N_STACK_BYTES is set to 160KB.  This is big enough to hold all stacks
2450   // observed in some minutes of testing, whilst keeping the size of this
2451   // function (DoNativeBacktrace)'s frame reasonable.  Most stacks observed in
2452   // practice are small, 4KB or less, and so the copy costs are insignificant
2453   // compared to other profiler overhead.
2454   //
2455   // |stackImg| is allocated on this (the sampling thread's) stack.  That
2456   // implies that the frame for this function is at least N_STACK_BYTES large.
2457   // In general it would be considered unacceptable to have such a large frame
2458   // on a stack, but it only exists for the unwinder thread, and so is not
2459   // expected to be a problem.  Allocating it on the heap is troublesome because
2460   // this function runs whilst the sampled thread is suspended, so any heap
2461   // allocation risks deadlock.  Allocating it as a global variable is not
2462   // thread safe, which would be a problem if we ever allow multiple sampler
2463   // threads.  Hence allocating it on the stack seems to be the least-worst
2464   // option.
2465 
2466   lul::StackImage stackImg;
2467 
2468   {
2469 #  if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \
2470       defined(GP_PLAT_amd64_freebsd)
2471     uintptr_t rEDZONE_SIZE = 128;
2472     uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
2473 #  elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
2474     uintptr_t rEDZONE_SIZE = 0;
2475     uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE;
2476 #  elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \
2477       defined(GP_PLAT_arm64_freebsd)
2478     uintptr_t rEDZONE_SIZE = 0;
2479     uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
2480 #  elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
2481     uintptr_t rEDZONE_SIZE = 0;
2482     uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
2483 #  elif defined(GP_PLAT_mips64_linux)
2484     uintptr_t rEDZONE_SIZE = 0;
2485     uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
2486 #  else
2487 #    error "Unknown plat"
2488 #  endif
2489     uintptr_t end = reinterpret_cast<uintptr_t>(aRegisteredThread.StackTop());
2490     uintptr_t ws = sizeof(void*);
2491     start &= ~(ws - 1);
2492     end &= ~(ws - 1);
2493     uintptr_t nToCopy = 0;
2494     if (start < end) {
2495       nToCopy = end - start;
2496       if (nToCopy > lul::N_STACK_BYTES) nToCopy = lul::N_STACK_BYTES;
2497     }
2498     MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
2499     stackImg.mLen = nToCopy;
2500     stackImg.mStartAvma = start;
2501     if (nToCopy > 0) {
2502       // If this is a vanilla memcpy(), ASAN makes the following complaint:
2503       //
2504       //   ERROR: AddressSanitizer: stack-buffer-underflow ...
2505       //   ...
2506       //   HINT: this may be a false positive if your program uses some custom
2507       //   stack unwind mechanism or swapcontext
2508       //
2509       // This code is very much a custom stack unwind mechanism! So we use an
2510       // alternative memcpy() implementation that is ignored by ASAN.
2511 #  if defined(MOZ_HAVE_ASAN_BLACKLIST)
2512       ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
2513 #  else
2514       memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
2515 #  endif
2516       (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
2517     }
2518   }
2519 
2520   size_t framePointerFramesAcquired = 0;
2521   lul::LUL* lul = CorePS::Lul(aLock);
2522   lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.mPCs),
2523               reinterpret_cast<uintptr_t*>(aNativeStack.mSPs),
2524               &aNativeStack.mCount, &framePointerFramesAcquired,
2525               MAX_NATIVE_FRAMES, &startRegs, &stackImg);
2526 
2527   // Update stats in the LUL stats object.  Unfortunately this requires
2528   // three global memory operations.
2529   lul->mStats.mContext += 1;
2530   lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired;
2531   lul->mStats.mFP += framePointerFramesAcquired;
2532 }
2533 
2534 #endif
2535 
2536 #ifdef HAVE_NATIVE_UNWIND
DoNativeBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack,StackWalkControl * aStackWalkControlIfSupported)2537 static void DoNativeBacktrace(PSLockRef aLock,
2538                               const RegisteredThread& aRegisteredThread,
2539                               const Registers& aRegs, NativeStack& aNativeStack,
2540                               StackWalkControl* aStackWalkControlIfSupported) {
2541   // This method determines which stackwalker is used for periodic and
2542   // synchronous samples. (Backtrace samples are treated differently, see
2543   // profiler_suspend_and_sample_thread() for details). The only part of the
2544   // ordering that matters is that LUL must precede FRAME_POINTER, because on
2545   // Linux they can both be present.
2546 #  if defined(USE_LUL_STACKWALK)
2547   DoLULBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack,
2548                  aStackWalkControlIfSupported);
2549 #  elif defined(USE_EHABI_STACKWALK)
2550   DoEHABIBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack,
2551                    aStackWalkControlIfSupported);
2552 #  elif defined(USE_FRAME_POINTER_STACK_WALK)
2553   DoFramePointerBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack,
2554                           aStackWalkControlIfSupported);
2555 #  elif defined(USE_MOZ_STACK_WALK)
2556   DoMozStackWalkBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack,
2557                           aStackWalkControlIfSupported);
2558 #  else
2559 #    error "Invalid configuration"
2560 #  endif
2561 }
2562 #endif
2563 
2564 // Writes some components shared by periodic and synchronous profiles to
2565 // ActivePS's ProfileBuffer. (This should only be called from DoSyncSample()
2566 // and DoPeriodicSample().)
2567 //
2568 // The grammar for entry sequences is in a comment above
2569 // ProfileBuffer::StreamSamplesToJSON.
DoSharedSample(PSLockRef aLock,bool aIsSynchronous,RegisteredThread & aRegisteredThread,const Registers & aRegs,uint64_t aSamplePos,uint64_t aBufferRangeStart,ProfileBuffer & aBuffer,StackCaptureOptions aCaptureOptions=StackCaptureOptions::Full)2570 static inline void DoSharedSample(
2571     PSLockRef aLock, bool aIsSynchronous, RegisteredThread& aRegisteredThread,
2572     const Registers& aRegs, uint64_t aSamplePos, uint64_t aBufferRangeStart,
2573     ProfileBuffer& aBuffer,
2574     StackCaptureOptions aCaptureOptions = StackCaptureOptions::Full) {
2575   // WARNING: this function runs within the profiler's "critical section".
2576 
2577   MOZ_ASSERT(!aBuffer.IsThreadSafe(),
2578              "Mutexes cannot be used inside this critical section");
2579 
2580   MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
2581 
2582   ProfileBufferCollector collector(aBuffer, aSamplePos, aBufferRangeStart);
2583   JsFrameBuffer& jsFrames = CorePS::JsFrames(aLock);
2584   StackWalkControl* stackWalkControlIfSupported = nullptr;
2585 #if defined(HAVE_NATIVE_UNWIND)
2586   const bool captureNative = ActivePS::FeatureStackWalk(aLock) &&
2587                              aCaptureOptions == StackCaptureOptions::Full;
2588   StackWalkControl stackWalkControl;
2589   if constexpr (StackWalkControl::scIsSupported) {
2590     if (captureNative) {
2591       stackWalkControlIfSupported = &stackWalkControl;
2592     }
2593   }
2594 #endif  // defined(HAVE_NATIVE_UNWIND)
2595   const uint32_t jsFramesCount =
2596       ExtractJsFrames(aIsSynchronous, aRegisteredThread, aRegs, collector,
2597                       jsFrames, stackWalkControlIfSupported);
2598   NativeStack nativeStack;
2599 #if defined(HAVE_NATIVE_UNWIND)
2600   if (captureNative) {
2601     DoNativeBacktrace(aLock, aRegisteredThread, aRegs, nativeStack,
2602                       stackWalkControlIfSupported);
2603 
2604     MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
2605                 aRegs, nativeStack, collector, jsFrames, jsFramesCount);
2606   } else
2607 #endif
2608   {
2609     MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
2610                 aRegs, nativeStack, collector, jsFrames, jsFramesCount);
2611 
2612     // We can't walk the whole native stack, but we can record the top frame.
2613     if (ActivePS::FeatureLeaf(aLock) &&
2614         aCaptureOptions == StackCaptureOptions::Full) {
2615       aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
2616     }
2617   }
2618 }
2619 
2620 // Writes the components of a synchronous sample to the given ProfileBuffer.
DoSyncSample(PSLockRef aLock,RegisteredThread & aRegisteredThread,const TimeStamp & aNow,const Registers & aRegs,ProfileBuffer & aBuffer,StackCaptureOptions aCaptureOptions)2621 static void DoSyncSample(PSLockRef aLock, RegisteredThread& aRegisteredThread,
2622                          const TimeStamp& aNow, const Registers& aRegs,
2623                          ProfileBuffer& aBuffer,
2624                          StackCaptureOptions aCaptureOptions) {
2625   // WARNING: this function runs within the profiler's "critical section".
2626 
2627   MOZ_ASSERT(aCaptureOptions != StackCaptureOptions::NoStack,
2628              "DoSyncSample should not be called when no capture is needed");
2629 
2630   const uint64_t bufferRangeStart = aBuffer.BufferRangeStart();
2631 
2632   const uint64_t samplePos =
2633       aBuffer.AddThreadIdEntry(aRegisteredThread.Info()->ThreadId());
2634 
2635   TimeDuration delta = aNow - CorePS::ProcessStartTime();
2636   aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
2637 
2638   DoSharedSample(aLock, /* aIsSynchronous = */ true, aRegisteredThread, aRegs,
2639                  samplePos, bufferRangeStart, aBuffer, aCaptureOptions);
2640 }
2641 
2642 // Writes the components of a periodic sample to ActivePS's ProfileBuffer.
2643 // The ThreadId entry is already written in the main ProfileBuffer, its location
2644 // is `aSamplePos`, we can write the rest to `aBuffer` (which may be different).
DoPeriodicSample(PSLockRef aLock,RegisteredThread & aRegisteredThread,const Registers & aRegs,uint64_t aSamplePos,uint64_t aBufferRangeStart,ProfileBuffer & aBuffer)2645 static inline void DoPeriodicSample(PSLockRef aLock,
2646                                     RegisteredThread& aRegisteredThread,
2647                                     const Registers& aRegs, uint64_t aSamplePos,
2648                                     uint64_t aBufferRangeStart,
2649                                     ProfileBuffer& aBuffer) {
2650   // WARNING: this function runs within the profiler's "critical section".
2651 
2652   DoSharedSample(aLock, /* aIsSynchronous = */ false, aRegisteredThread, aRegs,
2653                  aSamplePos, aBufferRangeStart, aBuffer);
2654 }
2655 
2656 // END sampling/unwinding code
2657 ////////////////////////////////////////////////////////////////////////
2658 
2659 ////////////////////////////////////////////////////////////////////////
2660 // BEGIN saving/streaming code
2661 
2662 const static uint64_t kJS_MAX_SAFE_UINTEGER = +9007199254740991ULL;
2663 
SafeJSInteger(uint64_t aValue)2664 static int64_t SafeJSInteger(uint64_t aValue) {
2665   return aValue <= kJS_MAX_SAFE_UINTEGER ? int64_t(aValue) : -1;
2666 }
2667 
AddSharedLibraryInfoToStream(JSONWriter & aWriter,const SharedLibrary & aLib)2668 static void AddSharedLibraryInfoToStream(JSONWriter& aWriter,
2669                                          const SharedLibrary& aLib) {
2670   aWriter.StartObjectElement();
2671   aWriter.IntProperty("start", SafeJSInteger(aLib.GetStart()));
2672   aWriter.IntProperty("end", SafeJSInteger(aLib.GetEnd()));
2673   aWriter.IntProperty("offset", SafeJSInteger(aLib.GetOffset()));
2674   aWriter.StringProperty("name", NS_ConvertUTF16toUTF8(aLib.GetModuleName()));
2675   aWriter.StringProperty("path", NS_ConvertUTF16toUTF8(aLib.GetModulePath()));
2676   aWriter.StringProperty("debugName",
2677                          NS_ConvertUTF16toUTF8(aLib.GetDebugName()));
2678   aWriter.StringProperty("debugPath",
2679                          NS_ConvertUTF16toUTF8(aLib.GetDebugPath()));
2680   aWriter.StringProperty("breakpadId", aLib.GetBreakpadId());
2681   aWriter.StringProperty("arch", aLib.GetArch());
2682   aWriter.EndObject();
2683 }
2684 
AppendSharedLibraries(JSONWriter & aWriter)2685 void AppendSharedLibraries(JSONWriter& aWriter) {
2686   SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
2687   info.SortByAddress();
2688   for (size_t i = 0; i < info.GetSize(); i++) {
2689     AddSharedLibraryInfoToStream(aWriter, info.GetEntry(i));
2690   }
2691 }
2692 
StreamCategories(SpliceableJSONWriter & aWriter)2693 static void StreamCategories(SpliceableJSONWriter& aWriter) {
2694   // Same order as ProfilingCategory. Format:
2695   // [
2696   //   {
2697   //     name: "Idle",
2698   //     color: "transparent",
2699   //     subcategories: ["Other"],
2700   //   },
2701   //   {
2702   //     name: "Other",
2703   //     color: "grey",
2704   //     subcategories: [
2705   //       "JSM loading",
2706   //       "Subprocess launching",
2707   //       "DLL loading"
2708   //     ]
2709   //   },
2710   //   ...
2711   // ]
2712 
2713 #define CATEGORY_JSON_BEGIN_CATEGORY(name, labelAsString, color) \
2714   aWriter.Start();                                               \
2715   aWriter.StringProperty("name", labelAsString);                 \
2716   aWriter.StringProperty("color", color);                        \
2717   aWriter.StartArrayProperty("subcategories");
2718 #define CATEGORY_JSON_SUBCATEGORY(supercategory, name, labelAsString) \
2719   aWriter.StringElement(labelAsString);
2720 #define CATEGORY_JSON_END_CATEGORY \
2721   aWriter.EndArray();              \
2722   aWriter.EndObject();
2723 
2724   MOZ_PROFILING_CATEGORY_LIST(CATEGORY_JSON_BEGIN_CATEGORY,
2725                               CATEGORY_JSON_SUBCATEGORY,
2726                               CATEGORY_JSON_END_CATEGORY)
2727 
2728 #undef CATEGORY_JSON_BEGIN_CATEGORY
2729 #undef CATEGORY_JSON_SUBCATEGORY
2730 #undef CATEGORY_JSON_END_CATEGORY
2731 }
2732 
StreamMarkerSchema(SpliceableJSONWriter & aWriter)2733 static void StreamMarkerSchema(SpliceableJSONWriter& aWriter) {
2734   // Get an array view with all registered marker-type-specific functions.
2735   Span<const base_profiler_markers_detail::Streaming::MarkerTypeFunctions>
2736       markerTypeFunctionsArray =
2737           base_profiler_markers_detail::Streaming::MarkerTypeFunctionsArray();
2738   // List of streamed marker names, this is used to spot duplicates.
2739   std::set<std::string> names;
2740   // Stream the display schema for each different one. (Duplications may come
2741   // from the same code potentially living in different libraries.)
2742   for (const auto& markerTypeFunctions : markerTypeFunctionsArray) {
2743     auto name = markerTypeFunctions.mMarkerTypeNameFunction();
2744     // std::set.insert(T&&) returns a pair, its `second` is true if the element
2745     // was actually inserted (i.e., it was not there yet.)
2746     const bool didInsert =
2747         names.insert(std::string(name.data(), name.size())).second;
2748     if (didInsert) {
2749       markerTypeFunctions.mMarkerSchemaFunction().Stream(aWriter, name);
2750     }
2751   }
2752 }
2753 
2754 // Some meta information that is better recorded before streaming the profile.
2755 // This is *not* intended to be cached, as some values could change between
2756 // profiling sessions.
2757 struct PreRecordedMetaInformation {
2758   bool mAsyncStacks;
2759 
2760   // This struct should only live on the stack, so it's fine to use Auto
2761   // strings.
2762   nsAutoCString mHttpPlatform;
2763   nsAutoCString mHttpOscpu;
2764   nsAutoCString mHttpMisc;
2765 
2766   nsAutoCString mRuntimeABI;
2767   nsAutoCString mRuntimeToolkit;
2768 
2769   nsAutoCString mAppInfoProduct;
2770   nsAutoCString mAppInfoAppBuildID;
2771   nsAutoCString mAppInfoSourceURL;
2772 
2773   int32_t mProcessInfoCpuCount;
2774   int32_t mProcessInfoCpuCores;
2775 };
2776 
2777 // This function should be called out of the profiler lock.
2778 // It gathers non-trivial data that doesn't require the profiler to stop, or for
2779 // which the request could theoretically deadlock if the profiler is locked.
PreRecordMetaInformation()2780 static PreRecordedMetaInformation PreRecordMetaInformation() {
2781   gPSMutex.AssertCurrentThreadDoesNotOwn();
2782 
2783   PreRecordedMetaInformation info = {};  // Aggregate-init all fields.
2784 
2785   if (!NS_IsMainThread()) {
2786     // Leave these properties out if we're not on the main thread.
2787     // At the moment, the only case in which this function is called on a
2788     // background thread is if we're in a content process and are going to
2789     // send this profile to the parent process. In that case, the parent
2790     // process profile's "meta" object already has the rest of the properties,
2791     // and the parent process profile is dumped on that process's main thread.
2792     return info;
2793   }
2794 
2795   info.mAsyncStacks = Preferences::GetBool("javascript.options.asyncstack");
2796 
2797   nsresult res;
2798 
2799   if (nsCOMPtr<nsIHttpProtocolHandler> http =
2800           do_GetService(NS_NETWORK_PROTOCOL_CONTRACTID_PREFIX "http", &res);
2801       !NS_FAILED(res) && http) {
2802     Unused << http->GetPlatform(info.mHttpPlatform);
2803     Unused << http->GetOscpu(info.mHttpOscpu);
2804     Unused << http->GetMisc(info.mHttpMisc);
2805   }
2806 
2807   if (nsCOMPtr<nsIXULRuntime> runtime =
2808           do_GetService("@mozilla.org/xre/runtime;1");
2809       runtime) {
2810     Unused << runtime->GetXPCOMABI(info.mRuntimeABI);
2811     Unused << runtime->GetWidgetToolkit(info.mRuntimeToolkit);
2812   }
2813 
2814   if (nsCOMPtr<nsIXULAppInfo> appInfo =
2815           do_GetService("@mozilla.org/xre/app-info;1");
2816       appInfo) {
2817     Unused << appInfo->GetName(info.mAppInfoProduct);
2818     Unused << appInfo->GetAppBuildID(info.mAppInfoAppBuildID);
2819     Unused << appInfo->GetSourceURL(info.mAppInfoSourceURL);
2820   }
2821 
2822   ProcessInfo processInfo = {};  // Aggregate-init all fields to false/zeroes.
2823   if (NS_SUCCEEDED(CollectProcessInfo(processInfo))) {
2824     info.mProcessInfoCpuCount = processInfo.cpuCount;
2825     info.mProcessInfoCpuCores = processInfo.cpuCores;
2826   }
2827 
2828   return info;
2829 }
2830 
2831 // Implemented in platform-specific cpps, to add object properties describing
2832 // the units of CPU measurements in samples.
2833 static void StreamMetaPlatformSampleUnits(PSLockRef aLock,
2834                                           SpliceableJSONWriter& aWriter);
2835 
StreamMetaJSCustomObject(PSLockRef aLock,SpliceableJSONWriter & aWriter,bool aIsShuttingDown,const PreRecordedMetaInformation & aPreRecordedMetaInformation)2836 static void StreamMetaJSCustomObject(
2837     PSLockRef aLock, SpliceableJSONWriter& aWriter, bool aIsShuttingDown,
2838     const PreRecordedMetaInformation& aPreRecordedMetaInformation) {
2839   MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
2840 
2841   aWriter.IntProperty("version", 23);
2842 
2843   // The "startTime" field holds the number of milliseconds since midnight
2844   // January 1, 1970 GMT. This grotty code computes (Now - (Now -
2845   // ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form.
2846   TimeDuration delta = TimeStamp::NowUnfuzzed() - CorePS::ProcessStartTime();
2847   aWriter.DoubleProperty(
2848       "startTime",
2849       static_cast<double>(PR_Now() / 1000.0 - delta.ToMilliseconds()));
2850 
2851   // Write the shutdownTime field. Unlike startTime, shutdownTime is not an
2852   // absolute time stamp: It's relative to startTime. This is consistent with
2853   // all other (non-"startTime") times anywhere in the profile JSON.
2854   if (aIsShuttingDown) {
2855     aWriter.DoubleProperty("shutdownTime", profiler_time());
2856   } else {
2857     aWriter.NullProperty("shutdownTime");
2858   }
2859 
2860   aWriter.StartArrayProperty("categories");
2861   StreamCategories(aWriter);
2862   aWriter.EndArray();
2863 
2864   aWriter.StartArrayProperty("markerSchema");
2865   StreamMarkerSchema(aWriter);
2866   aWriter.EndArray();
2867 
2868   ActivePS::WriteActiveConfiguration(aLock, aWriter,
2869                                      MakeStringSpan("configuration"));
2870 
2871   if (!NS_IsMainThread()) {
2872     // Leave the rest of the properties out if we're not on the main thread.
2873     // At the moment, the only case in which this function is called on a
2874     // background thread is if we're in a content process and are going to
2875     // send this profile to the parent process. In that case, the parent
2876     // process profile's "meta" object already has the rest of the properties,
2877     // and the parent process profile is dumped on that process's main thread.
2878     return;
2879   }
2880 
2881   aWriter.DoubleProperty("interval", ActivePS::Interval(aLock));
2882   aWriter.IntProperty("stackwalk", ActivePS::FeatureStackWalk(aLock));
2883 
2884 #ifdef DEBUG
2885   aWriter.IntProperty("debug", 1);
2886 #else
2887   aWriter.IntProperty("debug", 0);
2888 #endif
2889 
2890   aWriter.IntProperty("gcpoison", JS::IsGCPoisoning() ? 1 : 0);
2891 
2892   aWriter.IntProperty("asyncstack", aPreRecordedMetaInformation.mAsyncStacks);
2893 
2894   aWriter.IntProperty("processType", XRE_GetProcessType());
2895 
2896   aWriter.StringProperty("updateChannel", MOZ_STRINGIFY(MOZ_UPDATE_CHANNEL));
2897 
2898   if (!aPreRecordedMetaInformation.mHttpPlatform.IsEmpty()) {
2899     aWriter.StringProperty("platform",
2900                            aPreRecordedMetaInformation.mHttpPlatform);
2901   }
2902   if (!aPreRecordedMetaInformation.mHttpOscpu.IsEmpty()) {
2903     aWriter.StringProperty("oscpu", aPreRecordedMetaInformation.mHttpOscpu);
2904   }
2905   if (!aPreRecordedMetaInformation.mHttpMisc.IsEmpty()) {
2906     aWriter.StringProperty("misc", aPreRecordedMetaInformation.mHttpMisc);
2907   }
2908 
2909   if (!aPreRecordedMetaInformation.mRuntimeABI.IsEmpty()) {
2910     aWriter.StringProperty("abi", aPreRecordedMetaInformation.mRuntimeABI);
2911   }
2912   if (!aPreRecordedMetaInformation.mRuntimeToolkit.IsEmpty()) {
2913     aWriter.StringProperty("toolkit",
2914                            aPreRecordedMetaInformation.mRuntimeToolkit);
2915   }
2916 
2917   if (!aPreRecordedMetaInformation.mAppInfoProduct.IsEmpty()) {
2918     aWriter.StringProperty("product",
2919                            aPreRecordedMetaInformation.mAppInfoProduct);
2920   }
2921   if (!aPreRecordedMetaInformation.mAppInfoAppBuildID.IsEmpty()) {
2922     aWriter.StringProperty("appBuildID",
2923                            aPreRecordedMetaInformation.mAppInfoAppBuildID);
2924   }
2925   if (!aPreRecordedMetaInformation.mAppInfoSourceURL.IsEmpty()) {
2926     aWriter.StringProperty("sourceURL",
2927                            aPreRecordedMetaInformation.mAppInfoSourceURL);
2928   }
2929 
2930   if (aPreRecordedMetaInformation.mProcessInfoCpuCores > 0) {
2931     aWriter.IntProperty("physicalCPUs",
2932                         aPreRecordedMetaInformation.mProcessInfoCpuCores);
2933   }
2934   if (aPreRecordedMetaInformation.mProcessInfoCpuCount > 0) {
2935     aWriter.IntProperty("logicalCPUs",
2936                         aPreRecordedMetaInformation.mProcessInfoCpuCount);
2937   }
2938 
2939 #if defined(GP_OS_android)
2940   jni::String::LocalRef deviceInformation =
2941       java::GeckoJavaSampler::GetDeviceInformation();
2942   aWriter.StringProperty("device", deviceInformation->ToCString());
2943 #endif
2944 
2945   aWriter.StartObjectProperty("sampleUnits");
2946   {
2947     aWriter.StringProperty("time", "ms");
2948     aWriter.StringProperty("eventDelay", "ms");
2949     StreamMetaPlatformSampleUnits(aLock, aWriter);
2950   }
2951   aWriter.EndObject();
2952 
2953   // We should avoid collecting extension metadata for profiler when there is no
2954   // observer service, since a ExtensionPolicyService could not be created then.
2955   if (nsCOMPtr<nsIObserverService> os = services::GetObserverService()) {
2956     aWriter.StartObjectProperty("extensions");
2957     {
2958       {
2959         JSONSchemaWriter schema(aWriter);
2960         schema.WriteField("id");
2961         schema.WriteField("name");
2962         schema.WriteField("baseURL");
2963       }
2964 
2965       aWriter.StartArrayProperty("data");
2966       {
2967         nsTArray<RefPtr<WebExtensionPolicy>> exts;
2968         ExtensionPolicyService::GetSingleton().GetAll(exts);
2969 
2970         for (auto& ext : exts) {
2971           aWriter.StartArrayElement(JSONWriter::SingleLineStyle);
2972 
2973           nsAutoString id;
2974           ext->GetId(id);
2975           aWriter.StringElement(NS_ConvertUTF16toUTF8(id));
2976 
2977           aWriter.StringElement(NS_ConvertUTF16toUTF8(ext->Name()));
2978 
2979           auto url = ext->GetURL(u""_ns);
2980           if (url.isOk()) {
2981             aWriter.StringElement(NS_ConvertUTF16toUTF8(url.unwrap()));
2982           }
2983 
2984           aWriter.EndArray();
2985         }
2986       }
2987       aWriter.EndArray();
2988     }
2989     aWriter.EndObject();
2990   }
2991 }
2992 
StreamPages(PSLockRef aLock,SpliceableJSONWriter & aWriter)2993 static void StreamPages(PSLockRef aLock, SpliceableJSONWriter& aWriter) {
2994   MOZ_RELEASE_ASSERT(CorePS::Exists());
2995   ActivePS::DiscardExpiredPages(aLock);
2996   for (const auto& page : ActivePS::ProfiledPages(aLock)) {
2997     page->StreamJSON(aWriter);
2998   }
2999 }
3000 
3001 #if defined(GP_OS_android)
3002 template <int N>
StartsWith(const nsACString & string,const char (& prefix)[N])3003 static bool StartsWith(const nsACString& string, const char (&prefix)[N]) {
3004   if (N - 1 > string.Length()) {
3005     return false;
3006   }
3007   return memcmp(string.Data(), prefix, N - 1) == 0;
3008 }
3009 
InferJavaCategory(nsACString & aName)3010 static JS::ProfilingCategoryPair InferJavaCategory(nsACString& aName) {
3011   if (aName.EqualsLiteral("android.os.MessageQueue.nativePollOnce()")) {
3012     return JS::ProfilingCategoryPair::IDLE;
3013   }
3014   if (aName.EqualsLiteral("java.lang.Object.wait()")) {
3015     return JS::ProfilingCategoryPair::JAVA_BLOCKED;
3016   }
3017   if (StartsWith(aName, "android.") || StartsWith(aName, "com.android.")) {
3018     return JS::ProfilingCategoryPair::JAVA_ANDROID;
3019   }
3020   if (StartsWith(aName, "mozilla.") || StartsWith(aName, "org.mozilla.")) {
3021     return JS::ProfilingCategoryPair::JAVA_MOZILLA;
3022   }
3023   if (StartsWith(aName, "java.") || StartsWith(aName, "sun.") ||
3024       StartsWith(aName, "com.sun.")) {
3025     return JS::ProfilingCategoryPair::JAVA_LANGUAGE;
3026   }
3027   if (StartsWith(aName, "kotlin.") || StartsWith(aName, "kotlinx.")) {
3028     return JS::ProfilingCategoryPair::JAVA_KOTLIN;
3029   }
3030   if (StartsWith(aName, "androidx.")) {
3031     return JS::ProfilingCategoryPair::JAVA_ANDROIDX;
3032   }
3033   return JS::ProfilingCategoryPair::OTHER;
3034 }
3035 
CollectJavaThreadProfileData(ProfileBuffer & aProfileBuffer)3036 static void CollectJavaThreadProfileData(ProfileBuffer& aProfileBuffer) {
3037   // locked_profiler_start uses sample count is 1000 for Java thread.
3038   // This entry size is enough now, but we might have to estimate it
3039   // if we can customize it
3040 
3041   // Pass the samples
3042   // FIXME(bug 1618560): We are currently only profiling the Android UI thread.
3043   constexpr int threadId = 0;
3044   int sampleId = 0;
3045   while (true) {
3046     // Gets the data from the Android UI thread only.
3047     double sampleTime = java::GeckoJavaSampler::GetSampleTime(sampleId);
3048     if (sampleTime == 0.0) {
3049       break;
3050     }
3051 
3052     aProfileBuffer.AddThreadIdEntry(threadId);
3053     aProfileBuffer.AddEntry(ProfileBufferEntry::Time(sampleTime));
3054     int frameId = 0;
3055     while (true) {
3056       jni::String::LocalRef frameName =
3057           java::GeckoJavaSampler::GetFrameName(sampleId, frameId++);
3058       if (!frameName) {
3059         break;
3060       }
3061       nsCString frameNameString = frameName->ToCString();
3062 
3063       auto categoryPair = InferJavaCategory(frameNameString);
3064       aProfileBuffer.CollectCodeLocation("", frameNameString.get(), 0, 0,
3065                                          Nothing(), Nothing(),
3066                                          Some(categoryPair));
3067     }
3068     sampleId++;
3069   }
3070 
3071   // Pass the markers now
3072   while (true) {
3073     // Gets the data from the Android UI thread only.
3074     java::GeckoJavaSampler::Marker::LocalRef marker =
3075         java::GeckoJavaSampler::PollNextMarker();
3076     if (!marker) {
3077       // All markers are transferred.
3078       break;
3079     }
3080 
3081     // Get all the marker information from the Java thread using JNI.
3082     nsCString markerName = marker->GetMarkerName()->ToCString();
3083     jni::String::LocalRef text = marker->GetMarkerText();
3084     TimeStamp startTime =
3085         CorePS::ProcessStartTime() +
3086         TimeDuration::FromMilliseconds(marker->GetStartTime());
3087 
3088     double endTimeMs = marker->GetEndTime();
3089     // A marker can be either a duration with start and end, or a point in time
3090     // with only startTime. If endTime is 0, this means it's a point in time.
3091     TimeStamp endTime = endTimeMs == 0
3092                             ? startTime
3093                             : CorePS::ProcessStartTime() +
3094                                   TimeDuration::FromMilliseconds(endTimeMs);
3095     MarkerTiming timing = endTimeMs == 0
3096                               ? MarkerTiming::InstantAt(startTime)
3097                               : MarkerTiming::Interval(startTime, endTime);
3098 
3099     if (!text) {
3100       // This marker doesn't have a text.
3101       AddMarkerToBuffer(aProfileBuffer.UnderlyingChunkedBuffer(), markerName,
3102                         geckoprofiler::category::JAVA_ANDROID,
3103                         {MarkerThreadId(threadId), std::move(timing)});
3104     } else {
3105       // This marker has a text.
3106       AddMarkerToBuffer(aProfileBuffer.UnderlyingChunkedBuffer(), markerName,
3107                         geckoprofiler::category::JAVA_ANDROID,
3108                         {MarkerThreadId(threadId), std::move(timing)},
3109                         geckoprofiler::markers::TextMarker{},
3110                         text->ToCString());
3111     }
3112   }
3113 }
3114 #endif
3115 
3116 UniquePtr<ProfilerCodeAddressService>
profiler_code_address_service_for_presymbolication()3117 profiler_code_address_service_for_presymbolication() {
3118   static const bool preSymbolicate = []() {
3119     const char* symbolicate = getenv("MOZ_PROFILER_SYMBOLICATE");
3120     return symbolicate && symbolicate[0] != '\0';
3121   }();
3122   return preSymbolicate ? MakeUnique<ProfilerCodeAddressService>() : nullptr;
3123 }
3124 
locked_profiler_stream_json_for_this_process(PSLockRef aLock,SpliceableJSONWriter & aWriter,double aSinceTime,const PreRecordedMetaInformation & aPreRecordedMetaInformation,bool aIsShuttingDown,ProfilerCodeAddressService * aService)3125 static void locked_profiler_stream_json_for_this_process(
3126     PSLockRef aLock, SpliceableJSONWriter& aWriter, double aSinceTime,
3127     const PreRecordedMetaInformation& aPreRecordedMetaInformation,
3128     bool aIsShuttingDown, ProfilerCodeAddressService* aService) {
3129   LOG("locked_profiler_stream_json_for_this_process");
3130 
3131   MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
3132 
3133   AUTO_PROFILER_STATS(locked_profiler_stream_json_for_this_process);
3134 
3135   const double collectionStartMs = profiler_time();
3136 
3137   ProfileBuffer& buffer = ActivePS::Buffer(aLock);
3138 
3139   // If there is a set "Window length", discard older data.
3140   Maybe<double> durationS = ActivePS::Duration(aLock);
3141   if (durationS.isSome()) {
3142     const double durationStartMs = collectionStartMs - *durationS * 1000;
3143     buffer.DiscardSamplesBeforeTime(durationStartMs);
3144   }
3145 
3146   // Put shared library info
3147   aWriter.StartArrayProperty("libs");
3148   AppendSharedLibraries(aWriter);
3149   aWriter.EndArray();
3150 
3151   // Put meta data
3152   aWriter.StartObjectProperty("meta");
3153   {
3154     StreamMetaJSCustomObject(aLock, aWriter, aIsShuttingDown,
3155                              aPreRecordedMetaInformation);
3156   }
3157   aWriter.EndObject();
3158 
3159   // Put page data
3160   aWriter.StartArrayProperty("pages");
3161   { StreamPages(aLock, aWriter); }
3162   aWriter.EndArray();
3163 
3164   buffer.StreamProfilerOverheadToJSON(aWriter, CorePS::ProcessStartTime(),
3165                                       aSinceTime);
3166   buffer.StreamCountersToJSON(aWriter, CorePS::ProcessStartTime(), aSinceTime);
3167 
3168   // Lists the samples for each thread profile
3169   aWriter.StartArrayProperty("threads");
3170   {
3171     ActivePS::DiscardExpiredDeadProfiledThreads(aLock);
3172     Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> threads =
3173         ActivePS::ProfiledThreads(aLock);
3174     for (auto& thread : threads) {
3175       RegisteredThread* registeredThread = thread.first;
3176       JSContext* cx =
3177           registeredThread ? registeredThread->GetJSContext() : nullptr;
3178       ProfiledThreadData* profiledThreadData = thread.second;
3179       profiledThreadData->StreamJSON(
3180           buffer, cx, aWriter, CorePS::ProcessName(aLock),
3181           CorePS::ETLDplus1(aLock), CorePS::ProcessStartTime(), aSinceTime,
3182           ActivePS::FeatureJSTracer(aLock), aService);
3183     }
3184 
3185 #if defined(GP_OS_android)
3186     if (ActivePS::FeatureJava(aLock)) {
3187       // We are allocating it chunk by chunk. So this will not allocate 64 MiB
3188       // at once. This size should be more than enough for java threads.
3189       // This buffer is being created for each process but Android has
3190       // relatively less processes compared to desktop, so it's okay here.
3191       mozilla::ProfileBufferChunkManagerWithLocalLimit chunkManager(
3192           64 * 1024 * 1024, 1024 * 1024);
3193       ProfileChunkedBuffer bufferManager(
3194           ProfileChunkedBuffer::ThreadSafety::WithoutMutex, chunkManager);
3195       ProfileBuffer javaBuffer(bufferManager);
3196       CollectJavaThreadProfileData(javaBuffer);
3197 
3198       // Set the thread id of the Android UI thread to be 0.
3199       // We are profiling the Android UI thread twice: Both from the C++ side
3200       // (as a regular C++ profiled thread with the name "AndroidUI"), and from
3201       // the Java side. The thread's actual ID is mozilla::jni::GetUIThreadId(),
3202       // but since we're using that ID for the C++ side, we need to pick another
3203       // tid that doesn't conflict with it for the Java side. So we just use 0.
3204       // Once we add support for profiling of other java threads, we'll have to
3205       // get their thread id and name via JNI.
3206       RefPtr<ThreadInfo> threadInfo = new ThreadInfo(
3207           "AndroidUI (JVM)", 0, false, CorePS::ProcessStartTime());
3208       ProfiledThreadData profiledThreadData(threadInfo, nullptr);
3209       profiledThreadData.StreamJSON(
3210           javaBuffer, nullptr, aWriter, CorePS::ProcessName(aLock),
3211           CorePS::ETLDplus1(aLock), CorePS::ProcessStartTime(), aSinceTime,
3212           ActivePS::FeatureJSTracer(aLock), nullptr);
3213     }
3214 #endif
3215 
3216     UniquePtr<char[]> baseProfileThreads =
3217         ActivePS::MoveBaseProfileThreads(aLock);
3218     if (baseProfileThreads) {
3219       aWriter.Splice(MakeStringSpan(baseProfileThreads.get()));
3220     }
3221   }
3222   aWriter.EndArray();
3223 
3224   if (ActivePS::FeatureJSTracer(aLock)) {
3225     aWriter.StartArrayProperty("jsTracerDictionary");
3226     {
3227       JS::AutoTraceLoggerLockGuard lockGuard;
3228       // Collect Event Dictionary
3229       JS::TraceLoggerDictionaryBuffer collectionBuffer(lockGuard);
3230       while (collectionBuffer.NextChunk()) {
3231         aWriter.StringElement(
3232             MakeStringSpan(collectionBuffer.internalBuffer()));
3233       }
3234     }
3235     aWriter.EndArray();
3236   }
3237 
3238   aWriter.StartArrayProperty("pausedRanges");
3239   { buffer.StreamPausedRangesToJSON(aWriter, aSinceTime); }
3240   aWriter.EndArray();
3241 
3242   const double collectionEndMs = profiler_time();
3243 
3244   // Record timestamps for the collection into the buffer, so that consumers
3245   // know why we didn't collect any samples for its duration.
3246   // We put these entries into the buffer after we've collected the profile,
3247   // so they'll be visible for the *next* profile collection (if they haven't
3248   // been overwritten due to buffer wraparound by then).
3249   buffer.AddEntry(ProfileBufferEntry::CollectionStart(collectionStartMs));
3250   buffer.AddEntry(ProfileBufferEntry::CollectionEnd(collectionEndMs));
3251 }
3252 
profiler_stream_json_for_this_process(SpliceableJSONWriter & aWriter,double aSinceTime,bool aIsShuttingDown,ProfilerCodeAddressService * aService)3253 bool profiler_stream_json_for_this_process(
3254     SpliceableJSONWriter& aWriter, double aSinceTime, bool aIsShuttingDown,
3255     ProfilerCodeAddressService* aService) {
3256   LOG("profiler_stream_json_for_this_process");
3257 
3258   MOZ_RELEASE_ASSERT(CorePS::Exists());
3259 
3260   const auto preRecordedMetaInformation = PreRecordMetaInformation();
3261 
3262   if (profiler_is_active()) {
3263     invoke_profiler_state_change_callbacks(ProfilingState::GeneratingProfile);
3264   }
3265 
3266   PSAutoLock lock(gPSMutex);
3267 
3268   if (!ActivePS::Exists(lock)) {
3269     return false;
3270   }
3271 
3272   locked_profiler_stream_json_for_this_process(lock, aWriter, aSinceTime,
3273                                                preRecordedMetaInformation,
3274                                                aIsShuttingDown, aService);
3275   return true;
3276 }
3277 
3278 // END saving/streaming code
3279 ////////////////////////////////////////////////////////////////////////
3280 
FeatureCategory(uint32_t aFeature)3281 static char FeatureCategory(uint32_t aFeature) {
3282   if (aFeature & DefaultFeatures()) {
3283     if (aFeature & AvailableFeatures()) {
3284       return 'D';
3285     }
3286     return 'd';
3287   }
3288 
3289   if (aFeature & StartupExtraDefaultFeatures()) {
3290     if (aFeature & AvailableFeatures()) {
3291       return 'S';
3292     }
3293     return 's';
3294   }
3295 
3296   if (aFeature & AvailableFeatures()) {
3297     return '-';
3298   }
3299   return 'x';
3300 }
3301 
3302 // Doesn't exist if aExitCode is 0
PrintUsageThenExit(int aExitCode)3303 static void PrintUsageThenExit(int aExitCode) {
3304   MOZ_RELEASE_ASSERT(NS_IsMainThread());
3305 
3306   printf(
3307       "\n"
3308       "Profiler environment variable usage:\n"
3309       "\n"
3310       "  MOZ_PROFILER_HELP\n"
3311       "  If set to any value, prints this message.\n"
3312       "  Use MOZ_BASE_PROFILER_HELP for BaseProfiler help.\n"
3313       "\n"
3314       "  MOZ_LOG\n"
3315       "  Enables logging. The levels of logging available are\n"
3316       "  'prof:3' (least verbose), 'prof:4', 'prof:5' (most verbose).\n"
3317       "\n"
3318       "  MOZ_PROFILER_STARTUP\n"
3319       "  If set to any value other than '' or '0'/'N'/'n', starts the\n"
3320       "  profiler immediately on start-up.\n"
3321       "  Useful if you want profile code that runs very early.\n"
3322       "\n"
3323       "  MOZ_PROFILER_STARTUP_ENTRIES=<%u..%u>\n"
3324       "  If MOZ_PROFILER_STARTUP is set, specifies the number of entries per\n"
3325       "  process in the profiler's circular buffer when the profiler is first\n"
3326       "  started.\n"
3327       "  If unset, the platform default is used:\n"
3328       "  %u entries per process, or %u when MOZ_PROFILER_STARTUP is set.\n"
3329       "  (%u bytes per entry -> %u or %u total bytes per process)\n"
3330       "\n"
3331       "  MOZ_PROFILER_STARTUP_DURATION=<1..>\n"
3332       "  If MOZ_PROFILER_STARTUP is set, specifies the maximum life time of\n"
3333       "  entries in the the profiler's circular buffer when the profiler is\n"
3334       "  first started, in seconds.\n"
3335       "  If unset, the life time of the entries will only be restricted by\n"
3336       "  MOZ_PROFILER_STARTUP_ENTRIES (or its default value), and no\n"
3337       "  additional time duration restriction will be applied.\n"
3338       "\n"
3339       "  MOZ_PROFILER_STARTUP_INTERVAL=<1..%d>\n"
3340       "  If MOZ_PROFILER_STARTUP is set, specifies the sample interval,\n"
3341       "  measured in milliseconds, when the profiler is first started.\n"
3342       "  If unset, the platform default is used.\n"
3343       "\n"
3344       "  MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=<Number>\n"
3345       "  If MOZ_PROFILER_STARTUP is set, specifies the profiling features, as\n"
3346       "  the integer value of the features bitfield.\n"
3347       "  If unset, the value from MOZ_PROFILER_STARTUP_FEATURES is used.\n"
3348       "\n"
3349       "  MOZ_PROFILER_STARTUP_FEATURES=<Features>\n"
3350       "  If MOZ_PROFILER_STARTUP is set, specifies the profiling features, as\n"
3351       "  a comma-separated list of strings.\n"
3352       "  Ignored if  MOZ_PROFILER_STARTUP_FEATURES_BITFIELD is set.\n"
3353       "  If unset, the platform default is used.\n"
3354       "\n"
3355       "    Features: (x=unavailable, D/d=default/unavailable,\n"
3356       "               S/s=MOZ_PROFILER_STARTUP extra default/unavailable)\n",
3357       unsigned(ActivePS::scMinimumBufferEntries),
3358       unsigned(ActivePS::scMaximumBufferEntries),
3359       unsigned(PROFILER_DEFAULT_ENTRIES.Value()),
3360       unsigned(PROFILER_DEFAULT_STARTUP_ENTRIES.Value()),
3361       unsigned(scBytesPerEntry),
3362       unsigned(PROFILER_DEFAULT_ENTRIES.Value() * scBytesPerEntry),
3363       unsigned(PROFILER_DEFAULT_STARTUP_ENTRIES.Value() * scBytesPerEntry),
3364       PROFILER_MAX_INTERVAL);
3365 
3366 #define PRINT_FEATURE(n_, str_, Name_, desc_)                                  \
3367   printf("    %c %7u: \"%s\" (%s)\n", FeatureCategory(ProfilerFeature::Name_), \
3368          ProfilerFeature::Name_, str_, desc_);
3369 
3370   PROFILER_FOR_EACH_FEATURE(PRINT_FEATURE)
3371 
3372 #undef PRINT_FEATURE
3373 
3374   printf(
3375       "    -          \"default\" (All above D+S defaults)\n"
3376       "\n"
3377       "  MOZ_PROFILER_STARTUP_FILTERS=<Filters>\n"
3378       "  If MOZ_PROFILER_STARTUP is set, specifies the thread filters, as a\n"
3379       "  comma-separated list of strings. A given thread will be sampled if\n"
3380       "  any of the filters is a case-insensitive substring of the thread\n"
3381       "  name. If unset, a default is used.\n"
3382       "\n"
3383       "  MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID=<Number>\n"
3384       "  This variable is used to propagate the activeTabID of\n"
3385       "  the profiler init params to subprocesses.\n"
3386       "\n"
3387       "  MOZ_PROFILER_SHUTDOWN\n"
3388       "  If set, the profiler saves a profile to the named file on shutdown.\n"
3389       "\n"
3390       "  MOZ_PROFILER_SYMBOLICATE\n"
3391       "  If set, the profiler will pre-symbolicate profiles.\n"
3392       "  *Note* This will add a significant pause when gathering data, and\n"
3393       "  is intended mainly for local development.\n"
3394       "\n"
3395       "  MOZ_PROFILER_LUL_TEST\n"
3396       "  If set to any value, runs LUL unit tests at startup.\n"
3397       "\n"
3398       "  This platform %s native unwinding.\n"
3399       "\n",
3400 #if defined(HAVE_NATIVE_UNWIND)
3401       "supports"
3402 #else
3403       "does not support"
3404 #endif
3405   );
3406 
3407   if (aExitCode != 0) {
3408     exit(aExitCode);
3409   }
3410 }
3411 
3412 ////////////////////////////////////////////////////////////////////////
3413 // BEGIN Sampler
3414 
3415 #if defined(GP_OS_linux) || defined(GP_OS_android)
3416 struct SigHandlerCoordinator;
3417 #endif
3418 
3419 // Sampler performs setup and teardown of the state required to sample with the
3420 // profiler. Sampler may exist when ActivePS is not present.
3421 //
3422 // SuspendAndSampleAndResumeThread must only be called from a single thread,
3423 // and must not sample the thread it is being called from. A separate Sampler
3424 // instance must be used for each thread which wants to capture samples.
3425 
3426 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
3427 //
3428 // With the exception of SamplerThread, all Sampler objects must be Disable-d
3429 // before releasing the lock which was used to create them. This avoids races
3430 // on linux with the SIGPROF signal handler.
3431 
3432 class Sampler {
3433  public:
3434   // Sets up the profiler such that it can begin sampling.
3435   explicit Sampler(PSLockRef aLock);
3436 
3437   // Disable the sampler, restoring it to its previous state. This must be
3438   // called once, and only once, before the Sampler is destroyed.
3439   void Disable(PSLockRef aLock);
3440 
3441   // This method suspends and resumes the samplee thread. It calls the passed-in
3442   // function-like object aProcessRegs (passing it a populated |const
3443   // Registers&| arg) while the samplee thread is suspended.  Note that
3444   // the aProcessRegs function must be very careful not to do anything that
3445   // requires a lock, since we may have interrupted the thread at any point.
3446   // As an example, you can't call TimeStamp::Now() since on windows it
3447   // takes a lock on the performance counter.
3448   //
3449   // Func must be a function-like object of type `void()`.
3450   template <typename Func>
3451   void SuspendAndSampleAndResumeThread(
3452       PSLockRef aLock, const RegisteredThread& aRegisteredThread,
3453       const TimeStamp& aNow, const Func& aProcessRegs);
3454 
3455  private:
3456 #if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
3457   // Used to restore the SIGPROF handler when ours is removed.
3458   struct sigaction mOldSigprofHandler;
3459 
3460   // This process' ID. Needed as an argument for tgkill in
3461   // SuspendAndSampleAndResumeThread.
3462   int mMyPid;
3463 
3464   // The sampler thread's ID.  Used to assert that it is not sampling itself,
3465   // which would lead to deadlock.
3466   int mSamplerTid;
3467 
3468  public:
3469   // This is the one-and-only variable used to communicate between the sampler
3470   // thread and the samplee thread's signal handler. It's static because the
3471   // samplee thread's signal handler is static.
3472   static struct SigHandlerCoordinator* sSigHandlerCoordinator;
3473 #endif
3474 };
3475 
3476 // END Sampler
3477 ////////////////////////////////////////////////////////////////////////
3478 
3479 // Platform-specific function that retrieves per-thread CPU measurements.
3480 static RunningTimes GetThreadRunningTimesDiff(
3481     PSLockRef aLock, const RegisteredThread& aRegisteredThread);
3482 static void ClearThreadRunningTimes(PSLockRef aLock,
3483                                     const RegisteredThread& aRegisteredThread);
3484 
3485 // Template function to be used by `GetThreadRunningTimesDiff()` (unless some
3486 // platform has a better way to achieve this).
3487 // It help perform CPU measurements and tie them to a timestamp, such that the
3488 // measurements and timestamp are very close together.
3489 // This is necessary, because the relative CPU usage is computed by dividing
3490 // consecutive CPU measurements by their timestamp difference; if there was an
3491 // unexpected big gap, it could skew this computation and produce impossible
3492 // spikes that would hide the rest of the data. See bug 1685938 for more info.
3493 // Note that this may call the measurement function more than once; it is
3494 // assumed to normally be fast.
3495 // This was verified experimentally, but there is currently no regression
3496 // testing for it; see follow-up bug 1687402.
3497 template <typename GetCPURunningTimesFunction>
GetRunningTimesWithTightTimestamp(GetCPURunningTimesFunction && aGetCPURunningTimesFunction)3498 RunningTimes GetRunningTimesWithTightTimestamp(
3499     GetCPURunningTimesFunction&& aGetCPURunningTimesFunction) {
3500   // Once per process, compute a threshold over which running times and their
3501   // timestamp is considered too far apart.
3502   static const TimeDuration scMaxRunningTimesReadDuration = [&]() {
3503     // Run the main CPU measurements + timestamp a number of times and capture
3504     // their durations.
3505     constexpr int loops = 128;
3506     TimeDuration durations[loops];
3507     RunningTimes runningTimes;
3508     TimeStamp before = TimeStamp::NowUnfuzzed();
3509     for (int i = 0; i < loops; ++i) {
3510       AUTO_PROFILER_STATS(GetRunningTimes_MaxRunningTimesReadDuration);
3511       aGetCPURunningTimesFunction(runningTimes);
3512       const TimeStamp after = TimeStamp::NowUnfuzzed();
3513       durations[i] = after - before;
3514       before = after;
3515     }
3516     // Move median duration to the middle.
3517     std::nth_element(&durations[0], &durations[loops / 2], &durations[loops]);
3518     // Use median*8 as cut-off point.
3519     // Typical durations should be around a microsecond, the cut-off should then
3520     // be around 10 microseconds, well below the expected minimum inter-sample
3521     // interval (observed as a few milliseconds), so overall this should keep
3522     // cpu/interval spikes
3523     return durations[loops / 2] * 8;
3524   }();
3525 
3526   // Record CPU measurements between two timestamps.
3527   RunningTimes runningTimes;
3528   TimeStamp before = TimeStamp::NowUnfuzzed();
3529   aGetCPURunningTimesFunction(runningTimes);
3530   TimeStamp after = TimeStamp::NowUnfuzzed();
3531   // In most cases, the above should be quick enough. But if not, repeat:
3532   while (MOZ_UNLIKELY(after - before > scMaxRunningTimesReadDuration)) {
3533     AUTO_PROFILER_STATS(GetRunningTimes_REDO);
3534     before = after;
3535     aGetCPURunningTimesFunction(runningTimes);
3536     after = TimeStamp::NowUnfuzzed();
3537   }
3538   // Finally, record the closest timestamp just after the final measurement was
3539   // done. This must stay *after* the CPU measurements.
3540   runningTimes.SetPostMeasurementTimeStamp(after);
3541 
3542   return runningTimes;
3543 }
3544 
3545 ////////////////////////////////////////////////////////////////////////
3546 // BEGIN SamplerThread
3547 
3548 // The sampler thread controls sampling and runs whenever the profiler is
3549 // active. It periodically runs through all registered threads, finds those
3550 // that should be sampled, then pauses and samples them.
3551 
3552 class SamplerThread {
3553  public:
3554   // Creates a sampler thread, but doesn't start it.
3555   SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
3556                 double aIntervalMilliseconds, bool aStackWalkEnabled,
3557                 bool aNoTimerResolutionChange);
3558   ~SamplerThread();
3559 
3560   // This runs on (is!) the sampler thread.
3561   void Run();
3562 
3563   // This runs on the main thread.
3564   void Stop(PSLockRef aLock);
3565 
AppendPostSamplingCallback(PSLockRef,PostSamplingCallback && aCallback)3566   void AppendPostSamplingCallback(PSLockRef, PostSamplingCallback&& aCallback) {
3567     // We are under lock, so it's safe to just modify the list pointer.
3568     // Also this means the sampler has not started its run yet, so any callback
3569     // added now will be invoked at the end of the next loop; this guarantees
3570     // that the callback will be invoked after at least one full sampling loop.
3571     mPostSamplingCallbackList = MakeUnique<PostSamplingCallbackListItem>(
3572         std::move(mPostSamplingCallbackList), std::move(aCallback));
3573   }
3574 
3575  private:
3576   // Item containing a post-sampling callback, and a tail-list of more items.
3577   // Using a linked list means no need to move items when adding more, and
3578   // "stealing" the whole list is one pointer move.
3579   struct PostSamplingCallbackListItem {
3580     UniquePtr<PostSamplingCallbackListItem> mPrev;
3581     PostSamplingCallback mCallback;
3582 
PostSamplingCallbackListItemSamplerThread::PostSamplingCallbackListItem3583     PostSamplingCallbackListItem(UniquePtr<PostSamplingCallbackListItem> aPrev,
3584                                  PostSamplingCallback&& aCallback)
3585         : mPrev(std::move(aPrev)), mCallback(std::move(aCallback)) {}
3586   };
3587 
3588   [[nodiscard]] UniquePtr<PostSamplingCallbackListItem>
TakePostSamplingCallbacks(PSLockRef)3589   TakePostSamplingCallbacks(PSLockRef) {
3590     return std::move(mPostSamplingCallbackList);
3591   }
3592 
InvokePostSamplingCallbacks(UniquePtr<PostSamplingCallbackListItem> aCallbacks,SamplingState aSamplingState)3593   static void InvokePostSamplingCallbacks(
3594       UniquePtr<PostSamplingCallbackListItem> aCallbacks,
3595       SamplingState aSamplingState) {
3596     if (!aCallbacks) {
3597       return;
3598     }
3599     // We want to drill down to the last element in this list, which is the
3600     // oldest one, so that we invoke them in FIFO order.
3601     // We don't expect many callbacks, so it's safe to recurse. Note that we're
3602     // moving-from the UniquePtr, so the tail will implicitly get destroyed.
3603     InvokePostSamplingCallbacks(std::move(aCallbacks->mPrev), aSamplingState);
3604     // We are going to destroy this item, so we can safely move-from the
3605     // callback before calling it (in case it has an rvalue-ref-qualified call
3606     // operator).
3607     std::move(aCallbacks->mCallback)(aSamplingState);
3608     // It may be tempting for a future maintainer to change aCallbacks into an
3609     // rvalue reference; this will remind them not to do that!
3610     static_assert(
3611         std::is_same_v<decltype(aCallbacks),
3612                        UniquePtr<PostSamplingCallbackListItem>>,
3613         "We need to capture the list by-value, to implicitly destroy it");
3614   }
3615 
3616   // This suspends the calling thread for the given number of microseconds.
3617   // Best effort timing.
3618   void SleepMicro(uint32_t aMicroseconds);
3619 
3620   // The sampler used to suspend and sample threads.
3621   Sampler mSampler;
3622 
3623   // The activity generation, for detecting when the sampler thread must stop.
3624   const uint32_t mActivityGeneration;
3625 
3626   // The interval between samples, measured in microseconds.
3627   const int mIntervalMicroseconds;
3628 
3629   // The OS-specific handle for the sampler thread.
3630 #if defined(GP_OS_windows)
3631   HANDLE mThread;
3632 #elif defined(GP_OS_darwin) || defined(GP_OS_linux) || \
3633     defined(GP_OS_android) || defined(GP_OS_freebsd)
3634   pthread_t mThread;
3635 #endif
3636 
3637   // Post-sampling callbacks are kept in a simple linked list, which will be
3638   // stolen by the sampler thread at the end of its next run.
3639   UniquePtr<PostSamplingCallbackListItem> mPostSamplingCallbackList;
3640 
3641 #if defined(GP_OS_windows)
3642   bool mNoTimerResolutionChange = true;
3643 #endif
3644 
3645   SamplerThread(const SamplerThread&) = delete;
3646   void operator=(const SamplerThread&) = delete;
3647 };
3648 
3649 // [[nodiscard]] static
AppendPostSamplingCallback(PSLockRef aLock,PostSamplingCallback && aCallback)3650 bool ActivePS::AppendPostSamplingCallback(PSLockRef aLock,
3651                                           PostSamplingCallback&& aCallback) {
3652   if (!sInstance || !sInstance->mSamplerThread) {
3653     return false;
3654   }
3655   sInstance->mSamplerThread->AppendPostSamplingCallback(aLock,
3656                                                         std::move(aCallback));
3657   return true;
3658 }
3659 
3660 // This function is required because we need to create a SamplerThread within
3661 // ActivePS's constructor, but SamplerThread is defined after ActivePS. It
3662 // could probably be removed by moving some code around.
NewSamplerThread(PSLockRef aLock,uint32_t aGeneration,double aInterval,bool aStackWalkEnabled,bool aNoTimerResolutionChange)3663 static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
3664                                        double aInterval, bool aStackWalkEnabled,
3665                                        bool aNoTimerResolutionChange) {
3666   return new SamplerThread(aLock, aGeneration, aInterval, aStackWalkEnabled,
3667                            aNoTimerResolutionChange);
3668 }
3669 
3670 // This function is the sampler thread.  This implementation is used for all
3671 // targets.
Run()3672 void SamplerThread::Run() {
3673   PR_SetCurrentThreadName("SamplerThread");
3674 
3675   // Features won't change during this SamplerThread's lifetime, so we can read
3676   // them once and store them locally.
3677   const uint32_t features = []() -> uint32_t {
3678     PSAutoLock lock(gPSMutex);
3679     if (!ActivePS::Exists(lock)) {
3680       // If there is no active profiler, it doesn't matter what we return,
3681       // because this thread will exit before any feature is used.
3682       return 0;
3683     }
3684     return ActivePS::Features(lock);
3685   }();
3686 
3687   // Not *no*-stack-sampling means we do want stack sampling.
3688   const bool stackSampling = !ProfilerFeature::HasNoStackSampling(features);
3689 
3690   const bool cpuUtilization = ProfilerFeature::HasCPUUtilization(features);
3691 
3692   // Use local ProfileBuffer and underlying buffer to capture the stack.
3693   // (This is to avoid touching the CorePS::CoreBuffer lock while a thread is
3694   // suspended, because that thread could be working with the CorePS::CoreBuffer
3695   // as well.)
3696   mozilla::ProfileBufferChunkManagerSingle localChunkManager(
3697       ProfileBufferChunkManager::scExpectedMaximumStackSize);
3698   ProfileChunkedBuffer localBuffer(
3699       ProfileChunkedBuffer::ThreadSafety::WithoutMutex, localChunkManager);
3700   ProfileBuffer localProfileBuffer(localBuffer);
3701 
3702   // Will be kept between collections, to know what each collection does.
3703   auto previousState = localBuffer.GetState();
3704 
3705   // This will be set inside the loop, from inside the lock scope, to capture
3706   // all callbacks added before that, but none after the lock is released.
3707   UniquePtr<PostSamplingCallbackListItem> postSamplingCallbacks;
3708   // This will be set inside the loop, before invoking callbacks outside.
3709   SamplingState samplingState{};
3710 
3711   const TimeDuration sampleInterval =
3712       TimeDuration::FromMicroseconds(mIntervalMicroseconds);
3713   const uint32_t minimumIntervalSleepUs =
3714       static_cast<uint32_t>(mIntervalMicroseconds / 4);
3715 
3716   // This is the scheduled time at which each sampling loop should start.
3717   // It will determine the ideal next sampling start by adding the expected
3718   // interval, unless when sampling runs late -- See end of while() loop.
3719   TimeStamp scheduledSampleStart = TimeStamp::NowUnfuzzed();
3720 
3721   while (true) {
3722     const TimeStamp sampleStart = TimeStamp::NowUnfuzzed();
3723 
3724     // This scope is for |lock|. It ends before we sleep below.
3725     {
3726       // There should be no local callbacks left from a previous loop.
3727       MOZ_ASSERT(!postSamplingCallbacks);
3728 
3729       PSAutoLock lock(gPSMutex);
3730       TimeStamp lockAcquired = TimeStamp::NowUnfuzzed();
3731 
3732       // Move all the post-sampling callbacks locally, so that new ones cannot
3733       // sneak in between the end of the lock scope and the invocation after it.
3734       postSamplingCallbacks = TakePostSamplingCallbacks(lock);
3735 
3736       if (!ActivePS::Exists(lock)) {
3737         // Exit the `while` loop, including the lock scope, before invoking
3738         // callbacks and returning.
3739         samplingState = SamplingState::JustStopped;
3740         break;
3741       }
3742 
3743       // At this point profiler_stop() might have been called, and
3744       // profiler_start() might have been called on another thread. If this
3745       // happens the generation won't match.
3746       if (ActivePS::Generation(lock) != mActivityGeneration) {
3747         samplingState = SamplingState::JustStopped;
3748         // Exit the `while` loop, including the lock scope, before invoking
3749         // callbacks and returning.
3750         break;
3751       }
3752 
3753       ActivePS::ClearExpiredExitProfiles(lock);
3754 
3755       TimeStamp expiredMarkersCleaned = TimeStamp::NowUnfuzzed();
3756 
3757       if (!ActivePS::IsSamplingPaused(lock)) {
3758         double sampleStartDeltaMs =
3759             (sampleStart - CorePS::ProcessStartTime()).ToMilliseconds();
3760         ProfileBuffer& buffer = ActivePS::Buffer(lock);
3761 
3762         // handle per-process generic counters
3763         const Vector<BaseProfilerCount*>& counters = CorePS::Counters(lock);
3764         for (auto& counter : counters) {
3765           // create Buffer entries for each counter
3766           buffer.AddEntry(ProfileBufferEntry::CounterId(counter));
3767           buffer.AddEntry(ProfileBufferEntry::Time(sampleStartDeltaMs));
3768           // XXX support keyed maps of counts
3769           // In the future, we'll support keyed counters - for example, counters
3770           // with a key which is a thread ID. For "simple" counters we'll just
3771           // use a key of 0.
3772           int64_t count;
3773           uint64_t number;
3774           counter->Sample(count, number);
3775 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
3776           if (ActivePS::IsMemoryCounter(counter)) {
3777             // For the memory counter, substract the size of our buffer to avoid
3778             // giving the misleading impression that the memory use keeps on
3779             // growing when it's just the profiler session that's using a larger
3780             // buffer as it gets longer.
3781             count -= static_cast<int64_t>(
3782                 ActivePS::ControlledChunkManager(lock).TotalSize());
3783           }
3784 #endif
3785           buffer.AddEntry(ProfileBufferEntry::CounterKey(0));
3786           buffer.AddEntry(ProfileBufferEntry::Count(count));
3787           if (number) {
3788             buffer.AddEntry(ProfileBufferEntry::Number(number));
3789           }
3790         }
3791         TimeStamp countersSampled = TimeStamp::NowUnfuzzed();
3792 
3793         if (stackSampling || cpuUtilization) {
3794           samplingState = SamplingState::SamplingCompleted;
3795 
3796           const Vector<LiveProfiledThreadData>& liveThreads =
3797               ActivePS::LiveProfiledThreads(lock);
3798 
3799           for (auto& thread : liveThreads) {
3800             RegisteredThread* registeredThread = thread.mRegisteredThread;
3801             ProfiledThreadData* profiledThreadData =
3802                 thread.mProfiledThreadData.get();
3803             RefPtr<ThreadInfo> info = registeredThread->Info();
3804 
3805             const RunningTimes runningTimesDiff = [&]() {
3806               if (!cpuUtilization) {
3807                 // If we don't need CPU measurements, we only need a timestamp.
3808                 return RunningTimes(TimeStamp::NowUnfuzzed());
3809               }
3810               return GetThreadRunningTimesDiff(lock, *registeredThread);
3811             }();
3812 
3813             const TimeStamp& now = runningTimesDiff.PostMeasurementTimeStamp();
3814             double threadSampleDeltaMs =
3815                 (now - CorePS::ProcessStartTime()).ToMilliseconds();
3816 
3817             // If the thread is asleep and has been sampled before in the same
3818             // sleep episode, or otherwise(*) if there was zero CPU activity
3819             // since the previous sampling, find and copy the previous sample,
3820             // as that's cheaper than taking a new sample.
3821             // (*) Tech note: The asleep check is done first and always, because
3822             //     it is more reliable, and knows if it's the first asleep
3823             //     sample, which cannot be duplicated; if the test was the other
3824             //     way around, it could find zero CPU and then short-circuit
3825             //     that state-changing second-asleep-check operation, which
3826             //     could result in an unneeded sample.
3827             // However we're using current running times (instead of copying the
3828             // old ones) because some work could have happened.
3829             if (registeredThread->RacyRegisteredThread()
3830                     .CanDuplicateLastSampleDueToSleep() ||
3831                 runningTimesDiff.GetThreadCPUDelta() == Some(uint64_t(0))) {
3832               const bool dup_ok = ActivePS::Buffer(lock).DuplicateLastSample(
3833                   info->ThreadId(), threadSampleDeltaMs,
3834                   profiledThreadData->LastSample(), runningTimesDiff);
3835               if (dup_ok) {
3836                 continue;
3837               }
3838             }
3839 
3840             AUTO_PROFILER_STATS(gecko_SamplerThread_Run_DoPeriodicSample);
3841 
3842             // Record the global profiler buffer's range start now, before
3843             // adding the first entry for this thread's sample.
3844             const uint64_t bufferRangeStart = buffer.BufferRangeStart();
3845 
3846             // Add the thread ID now, so we know its position in the main
3847             // buffer, which is used by some JS data.
3848             // (DoPeriodicSample only knows about the temporary local buffer.)
3849             const uint64_t samplePos =
3850                 buffer.AddThreadIdEntry(registeredThread->Info()->ThreadId());
3851             profiledThreadData->LastSample() = Some(samplePos);
3852 
3853             // Also add the time, so it's always there after the thread ID, as
3854             // expected by the parser. (Other stack data is optional.)
3855             buffer.AddEntry(ProfileBufferEntry::TimeBeforeCompactStack(
3856                 threadSampleDeltaMs));
3857 
3858             Maybe<double> unresponsiveDuration_ms;
3859 
3860             // If we have RunningTimes data, store it before the CompactStack.
3861             // Note: It is not stored inside the CompactStack so that it doesn't
3862             // get incorrectly duplicated when the thread is sleeping.
3863             if (!runningTimesDiff.IsEmpty()) {
3864               CorePS::CoreBuffer().PutObjects(
3865                   ProfileBufferEntry::Kind::RunningTimes, runningTimesDiff);
3866             }
3867 
3868             if (stackSampling) {
3869               // Suspend the thread and collect its stack data in the local
3870               // buffer.
3871               mSampler.SuspendAndSampleAndResumeThread(
3872                   lock, *registeredThread, now,
3873                   [&](const Registers& aRegs, const TimeStamp& aNow) {
3874                     DoPeriodicSample(lock, *registeredThread, aRegs, samplePos,
3875                                      bufferRangeStart, localProfileBuffer);
3876 
3877                     // For "eventDelay", we want the input delay - but if
3878                     // there are no events in the input queue (or even if there
3879                     // are), we're interested in how long the delay *would* be
3880                     // for an input event now, which would be the time to finish
3881                     // the current event + the delay caused by any events
3882                     // already in the input queue (plus any High priority
3883                     // events).  Events at lower priorities (in a
3884                     // PrioritizedEventQueue) than Input count for input delay
3885                     // only for the duration that they're running, since when
3886                     // they finish, any queued input event would run.
3887                     //
3888                     // Unless we record the time state of all events and queue
3889                     // states at all times, this is hard to precisely calculate,
3890                     // but we can approximate it well in post-processing with
3891                     // RunningEventDelay and RunningEventStart.
3892                     //
3893                     // RunningEventDelay is the time duration the event was
3894                     // queued before starting execution.  RunningEventStart is
3895                     // the time the event started. (Note: since we care about
3896                     // Input event delays on MainThread, for
3897                     // PrioritizedEventQueues we return 0 for RunningEventDelay
3898                     // if the currently running event has a lower priority than
3899                     // Input (since Input events won't queue behind them).
3900                     //
3901                     // To directly measure this we would need to record the time
3902                     // at which the newest event currently in each queue at time
3903                     // X (the sample time) finishes running.  This of course
3904                     // would require looking into the future, or recording all
3905                     // this state and then post-processing it later. If we were
3906                     // to trace every event start and end we could do this, but
3907                     // it would have significant overhead to do so (and buffer
3908                     // usage).  From a recording of RunningEventDelays and
3909                     // RunningEventStarts we can infer the actual delay:
3910                     //
3911                     // clang-format off
3912                     // Event queue: <tail> D  :  C  :  B  : A <head>
3913                     // Time inserted (ms): 40 :  20 : 10  : 0
3914                     // Run Time (ms):      30 : 100 : 40  : 30
3915                     //
3916                     // 0    10   20   30   40   50   60   70   80   90  100  110  120  130  140  150  160  170
3917                     // [A||||||||||||]
3918                     //      ----------[B|||||||||||||||||]
3919                     //           -------------------------[C|||||||||||||||||||||||||||||||||||||||||||||||]
3920                     //                     -----------------------------------------------------------------[D|||||||||...]
3921                     //
3922                     // Calculate the delay of a new event added at time t: (run every sample)
3923                     //    TimeSinceRunningEventBlockedInputEvents = RunningEventDelay + (now - RunningEventStart);
3924                     //    effective_submission = now - TimeSinceRunningEventBlockedInputEvents;
3925                     //    delta = (now - last_sample_time);
3926                     //    last_sample_time = now;
3927                     //    for (t=effective_submission to now) {
3928                     //       delay[t] += delta;
3929                     //    }
3930                     //
3931                     // Can be reduced in overhead by:
3932                     //    TimeSinceRunningEventBlockedInputEvents = RunningEventDelay + (now - RunningEventStart);
3933                     //    effective_submission = now - TimeSinceRunningEventBlockedInputEvents;
3934                     //    if (effective_submission != last_submission) {
3935                     //      delta = (now - last_submision);
3936                     //      // this loop should be made to match each sample point in the range
3937                     //      // intead of assuming 1ms sampling as this pseudocode does
3938                     //      for (t=last_submission to effective_submission-1) {
3939                     //         delay[t] += delta;
3940                     //         delta -= 1; // assumes 1ms; adjust as needed to match for()
3941                     //      }
3942                     //      last_submission = effective_submission;
3943                     //    }
3944                     //
3945                     // Time  Head of queue   Running Event  RunningEventDelay  Delay of       Effective     Started    Calc (submission->now add 10ms)  Final
3946                     //                                                         hypothetical   Submission    Running @                                   result
3947                     //                                                         event E
3948                     // 0        Empty            A                0                30              0           0       @0=10                             30
3949                     // 10         B              A                0                60              0           0       @0=20, @10=10                     60
3950                     // 20         B              A                0               150              0           0       @0=30, @10=20, @20=10            150
3951                     // 30         C              B               20               140             10          30       @10=20, @20=10, @30=0            140
3952                     // 40         C              B               20               160                                  @10=30, @20=20...                160
3953                     // 50         C              B               20               150                                                                   150
3954                     // 60         C              B               20               140                                  @10=50, @20=40...                140
3955                     // 70         D              C               50               130             20          70       @20=50, @30=40...                130
3956                     // ...
3957                     // 160        D              C               50                40                                  @20=140, @30=130...               40
3958                     // 170      <empty>          D              140                30             40                   @40=140, @50=130... (rounding)    30
3959                     // 180      <empty>          D              140                20             40                   @40=150                           20
3960                     // 190      <empty>          D              140                10             40                   @40=160                           10
3961                     // 200      <empty>        <empty>            0                 0             NA                                                      0
3962                     //
3963                     // Function Delay(t) = the time between t and the time at which a hypothetical
3964                     // event e would start executing, if e was enqueued at time t.
3965                     //
3966                     // Delay(-1) = 0 // Before A was enqueued. No wait time, can start running
3967                     //               // instantly.
3968                     // Delay(0) = 30 // The hypothetical event e got enqueued just after A got
3969                     //               // enqueued. It can start running at 30, when A is done.
3970                     // Delay(5) = 25
3971                     // Delay(10) = 60 // Can start running at 70, after both A and B are done.
3972                     // Delay(19) = 51
3973                     // Delay(20) = 150 // Can start running at 170, after A, B & C.
3974                     // Delay(25) = 145
3975                     // Delay(30) = 170 // Can start running at 200, after A, B, C & D.
3976                     // Delay(120) = 80
3977                     // Delay(200) = 0 // (assuming nothing was enqueued after D)
3978                     //
3979                     // For every event that gets enqueued, the Delay time will go up by the
3980                     // event's running time at the time at which the event is enqueued.
3981                     // The Delay function will be a sawtooth of the following shape:
3982                     //
3983                     //             |\           |...
3984                     //             | \          |
3985                     //        |\   |  \         |
3986                     //        | \  |   \        |
3987                     //     |\ |  \ |    \       |
3988                     //  |\ | \|   \|     \      |
3989                     //  | \|              \     |
3990                     // _|                  \____|
3991                     //
3992                     //
3993                     // A more complex example with a PrioritizedEventQueue:
3994                     //
3995                     // Event queue: <tail> D  :  C  :  B  : A <head>
3996                     // Time inserted (ms): 40 :  20 : 10  : 0
3997                     // Run Time (ms):      30 : 100 : 40  : 30
3998                     // Priority:         Input: Norm: Norm: Norm
3999                     //
4000                     // 0    10   20   30   40   50   60   70   80   90  100  110  120  130  140  150  160  170
4001                     // [A||||||||||||]
4002                     //      ----------[B|||||||||||||||||]
4003                     //           ----------------------------------------[C|||||||||||||||||||||||||||||||||||||||||||||||]
4004                     //                     ---------------[D||||||||||||]
4005                     //
4006                     //
4007                     // Time  Head of queue   Running Event  RunningEventDelay  Delay of       Effective   Started    Calc (submission->now add 10ms)   Final
4008                     //                                                         hypothetical   Submission  Running @                                    result
4009                     //                                                         event
4010                     // 0        Empty            A                0                30              0           0       @0=10                             30
4011                     // 10         B              A                0                20              0           0       @0=20, @10=10                     20
4012                     // 20         B              A                0                10              0           0       @0=30, @10=20, @20=10             10
4013                     // 30         C              B                0                40             30          30       @30=10                            40
4014                     // 40         C              B                0                60             30                   @40=10, @30=20                    60
4015                     // 50         C              B                0                50             30                   @50=10, @40=20, @30=30            50
4016                     // 60         C              B                0                40             30                   @60=10, @50=20, @40=30, @30=40    40
4017                     // 70         C              D               30                30             40          70       @60=20, @50=30, @40=40            30
4018                     // 80         C              D               30                20             40          70       ...@50=40, @40=50                 20
4019                     // 90         C              D               30                10             40          70       ...@60=40, @50=50, @40=60         10
4020                     // 100      <empty>          C                0               100             100        100       @100=10                          100
4021                     // 110      <empty>          C                0                90             100        100       @110=10, @100=20                  90
4022 
4023                     //
4024                     // For PrioritizedEventQueue, the definition of the Delay(t) function is adjusted: the hypothetical event e has Input priority.
4025                     // Delay(-1) = 0 // Before A was enqueued. No wait time, can start running
4026                     //               // instantly.
4027                     // Delay(0) = 30 // The hypothetical input event e got enqueued just after A got
4028                     //               // enqueued. It can start running at 30, when A is done.
4029                     // Delay(5) = 25
4030                     // Delay(10) = 20
4031                     // Delay(25) = 5 // B has been queued, but e does not need to wait for B because e has Input priority and B does not.
4032                     //               // So e can start running at 30, when A is done.
4033                     // Delay(30) = 40 // Can start running at 70, after B is done.
4034                     // Delay(40) = 60 // Can start at 100, after B and D are done (D is Input Priority)
4035                     // Delay(80) = 20
4036                     // Delay(100) = 100 // Wait for C to finish
4037 
4038                     // clang-format on
4039                     //
4040                     // Alternatively we could insert (recycled instead of
4041                     // allocated/freed) input events at every sample period
4042                     // (1ms...), and use them to back-calculate the delay.  This
4043                     // might also be somewhat expensive, and would require
4044                     // guessing at the maximum delay, which would likely be in
4045                     // the seconds, and so you'd need 1000's of pre-allocated
4046                     // events per queue per thread - so there would be a memory
4047                     // impact as well.
4048 
4049                     TimeDuration currentEventDelay;
4050                     TimeDuration currentEventRunning;
4051                     registeredThread->GetRunningEventDelay(
4052                         aNow, currentEventDelay, currentEventRunning);
4053 
4054                     // Note: eventDelay is a different definition of
4055                     // responsiveness than the 16ms event injection.
4056 
4057                     // Don't suppress 0's for now; that can be a future
4058                     // optimization.  We probably want one zero to be stored
4059                     // before we start suppressing, which would be more
4060                     // complex.
4061                     unresponsiveDuration_ms =
4062                         Some(currentEventDelay.ToMilliseconds() +
4063                              currentEventRunning.ToMilliseconds());
4064                   });
4065 
4066               // If we got eventDelay data, store it before the CompactStack.
4067               // Note: It is not stored inside the CompactStack so that it
4068               // doesn't get incorrectly duplicated when the thread is sleeping.
4069               if (unresponsiveDuration_ms.isSome()) {
4070                 CorePS::CoreBuffer().PutObjects(
4071                     ProfileBufferEntry::Kind::UnresponsiveDurationMs,
4072                     *unresponsiveDuration_ms);
4073               }
4074             }
4075 
4076             // There *must* be a CompactStack after a TimeBeforeCompactStack;
4077             // but note that other entries may have been concurrently inserted
4078             // between the TimeBeforeCompactStack above and now. If the captured
4079             // sample from `DoPeriodicSample` is complete, copy it into the
4080             // global buffer, otherwise add an empty one to satisfy the parser
4081             // that expects one.
4082             auto state = localBuffer.GetState();
4083             if (NS_WARN_IF(state.mFailedPutBytes !=
4084                            previousState.mFailedPutBytes)) {
4085               LOG("Stack sample too big for local storage, failed to store %u "
4086                   "bytes",
4087                   unsigned(state.mFailedPutBytes -
4088                            previousState.mFailedPutBytes));
4089               // There *must* be a CompactStack after a TimeBeforeCompactStack,
4090               // even an empty one.
4091               CorePS::CoreBuffer().PutObjects(
4092                   ProfileBufferEntry::Kind::CompactStack,
4093                   UniquePtr<ProfileChunkedBuffer>(nullptr));
4094             } else if (state.mRangeEnd - previousState.mRangeEnd >=
4095                        *CorePS::CoreBuffer().BufferLength()) {
4096               LOG("Stack sample too big for profiler storage, needed %u bytes",
4097                   unsigned(state.mRangeEnd - previousState.mRangeEnd));
4098               // There *must* be a CompactStack after a TimeBeforeCompactStack,
4099               // even an empty one.
4100               CorePS::CoreBuffer().PutObjects(
4101                   ProfileBufferEntry::Kind::CompactStack,
4102                   UniquePtr<ProfileChunkedBuffer>(nullptr));
4103             } else {
4104               CorePS::CoreBuffer().PutObjects(
4105                   ProfileBufferEntry::Kind::CompactStack, localBuffer);
4106             }
4107 
4108             // Clean up for the next run.
4109             localBuffer.Clear();
4110             previousState = localBuffer.GetState();
4111           }
4112         } else {
4113           samplingState = SamplingState::NoStackSamplingCompleted;
4114         }
4115 
4116 #if defined(USE_LUL_STACKWALK)
4117         // The LUL unwind object accumulates frame statistics. Periodically we
4118         // should poke it to give it a chance to print those statistics.  This
4119         // involves doing I/O (fprintf, __android_log_print, etc.) and so
4120         // can't safely be done from the critical section inside
4121         // SuspendAndSampleAndResumeThread, which is why it is done here.
4122         lul::LUL* lul = CorePS::Lul(lock);
4123         if (lul) {
4124           lul->MaybeShowStats();
4125         }
4126 #endif
4127         TimeStamp threadsSampled = TimeStamp::NowUnfuzzed();
4128 
4129         {
4130           AUTO_PROFILER_STATS(Sampler_FulfillChunkRequests);
4131           ActivePS::FulfillChunkRequests(lock);
4132         }
4133 
4134         buffer.CollectOverheadStats(sampleStartDeltaMs,
4135                                     lockAcquired - sampleStart,
4136                                     expiredMarkersCleaned - lockAcquired,
4137                                     countersSampled - expiredMarkersCleaned,
4138                                     threadsSampled - countersSampled);
4139       } else {
4140         samplingState = SamplingState::SamplingPaused;
4141       }
4142     }
4143     // gPSMutex is not held after this point.
4144 
4145     // Invoke end-of-sampling callbacks outside of the locked scope.
4146     InvokePostSamplingCallbacks(std::move(postSamplingCallbacks),
4147                                 samplingState);
4148 
4149     ProfilerChild::ProcessPendingUpdate();
4150 
4151     // We expect the next sampling loop to start `sampleInterval` after this
4152     // loop here was scheduled to start.
4153     scheduledSampleStart += sampleInterval;
4154 
4155     // Try to sleep until we reach that next scheduled time.
4156     const TimeStamp beforeSleep = TimeStamp::NowUnfuzzed();
4157     if (scheduledSampleStart >= beforeSleep) {
4158       // There is still time before the next scheduled sample time.
4159       const uint32_t sleepTimeUs = static_cast<uint32_t>(
4160           (scheduledSampleStart - beforeSleep).ToMicroseconds());
4161       if (sleepTimeUs >= minimumIntervalSleepUs) {
4162         SleepMicro(sleepTimeUs);
4163       } else {
4164         // If we're too close to that time, sleep the minimum amount of time.
4165         // Note that the next scheduled start is not shifted, so at the end of
4166         // the next loop, sleep may again be adjusted to get closer to schedule.
4167         SleepMicro(minimumIntervalSleepUs);
4168       }
4169     } else {
4170       // This sampling loop ended after the next sampling should have started!
4171       // There is little point to try and keep up to schedule now, it would
4172       // require more work, while it's likely we're late because the system is
4173       // already busy. Try and restart a normal schedule from now.
4174       scheduledSampleStart = beforeSleep + sampleInterval;
4175       SleepMicro(static_cast<uint32_t>(sampleInterval.ToMicroseconds()));
4176     }
4177   }
4178 
4179   // End of `while` loop. We can only be here from a `break` inside the loop.
4180   InvokePostSamplingCallbacks(std::move(postSamplingCallbacks), samplingState);
4181 }
4182 
4183 // We #include these files directly because it means those files can use
4184 // declarations from this file trivially.  These provide target-specific
4185 // implementations of all SamplerThread methods except Run().
4186 #if defined(GP_OS_windows)
4187 #  include "platform-win32.cpp"
4188 #elif defined(GP_OS_darwin)
4189 #  include "platform-macos.cpp"
4190 #elif defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
4191 #  include "platform-linux-android.cpp"
4192 #else
4193 #  error "bad platform"
4194 #endif
4195 
AllocPlatformData(int aThreadId)4196 UniquePlatformData AllocPlatformData(int aThreadId) {
4197   return UniquePlatformData(new PlatformData(aThreadId));
4198 }
4199 
operator ()(PlatformData * aData)4200 void PlatformDataDestructor::operator()(PlatformData* aData) { delete aData; }
4201 
4202 // END SamplerThread
4203 ////////////////////////////////////////////////////////////////////////
4204 
4205 ////////////////////////////////////////////////////////////////////////
4206 // BEGIN externally visible functions
4207 
MOZ_DEFINE_MALLOC_SIZE_OF(GeckoProfilerMallocSizeOf)4208 MOZ_DEFINE_MALLOC_SIZE_OF(GeckoProfilerMallocSizeOf)
4209 
4210 NS_IMETHODIMP
4211 GeckoProfilerReporter::CollectReports(nsIHandleReportCallback* aHandleReport,
4212                                       nsISupports* aData, bool aAnonymize) {
4213   MOZ_RELEASE_ASSERT(NS_IsMainThread());
4214 
4215   size_t profSize = 0;
4216   size_t lulSize = 0;
4217 
4218   {
4219     PSAutoLock lock(gPSMutex);
4220 
4221     if (CorePS::Exists()) {
4222       CorePS::AddSizeOf(lock, GeckoProfilerMallocSizeOf, profSize, lulSize);
4223     }
4224 
4225     if (ActivePS::Exists(lock)) {
4226       profSize += ActivePS::SizeOf(lock, GeckoProfilerMallocSizeOf);
4227     }
4228   }
4229 
4230   MOZ_COLLECT_REPORT(
4231       "explicit/profiler/profiler-state", KIND_HEAP, UNITS_BYTES, profSize,
4232       "Memory used by the Gecko Profiler's global state (excluding memory used "
4233       "by LUL).");
4234 
4235 #if defined(USE_LUL_STACKWALK)
4236   MOZ_COLLECT_REPORT(
4237       "explicit/profiler/lul", KIND_HEAP, UNITS_BYTES, lulSize,
4238       "Memory used by LUL, a stack unwinder used by the Gecko Profiler.");
4239 #endif
4240 
4241   return NS_OK;
4242 }
4243 
NS_IMPL_ISUPPORTS(GeckoProfilerReporter,nsIMemoryReporter)4244 NS_IMPL_ISUPPORTS(GeckoProfilerReporter, nsIMemoryReporter)
4245 
4246 static uint32_t ParseFeature(const char* aFeature, bool aIsStartup) {
4247   if (strcmp(aFeature, "default") == 0) {
4248     return (aIsStartup ? (DefaultFeatures() | StartupExtraDefaultFeatures())
4249                        : DefaultFeatures()) &
4250            AvailableFeatures();
4251   }
4252 
4253 #define PARSE_FEATURE_BIT(n_, str_, Name_, desc_) \
4254   if (strcmp(aFeature, str_) == 0) {              \
4255     return ProfilerFeature::Name_;                \
4256   }
4257 
4258   PROFILER_FOR_EACH_FEATURE(PARSE_FEATURE_BIT)
4259 
4260 #undef PARSE_FEATURE_BIT
4261 
4262   printf("\nUnrecognized feature \"%s\".\n\n", aFeature);
4263   // Since we may have an old feature we don't implement anymore, don't exit
4264   PrintUsageThenExit(0);
4265   return 0;
4266 }
4267 
ParseFeaturesFromStringArray(const char ** aFeatures,uint32_t aFeatureCount,bool aIsStartup)4268 uint32_t ParseFeaturesFromStringArray(const char** aFeatures,
4269                                       uint32_t aFeatureCount,
4270                                       bool aIsStartup /* = false */) {
4271   uint32_t features = 0;
4272   for (size_t i = 0; i < aFeatureCount; i++) {
4273     features |= ParseFeature(aFeatures[i], aIsStartup);
4274   }
4275   return features;
4276 }
4277 
IsRegisteredThreadInRegisteredThreadsList(PSLockRef aLock,RegisteredThread * aThread)4278 static bool IsRegisteredThreadInRegisteredThreadsList(
4279     PSLockRef aLock, RegisteredThread* aThread) {
4280   const auto& registeredThreads = CorePS::RegisteredThreads(aLock);
4281   for (const auto& registeredThread : registeredThreads) {
4282     if (registeredThread.get() == aThread) {
4283       return true;
4284     }
4285   }
4286 
4287   return false;
4288 }
4289 
locked_register_thread(PSLockRef aLock,const char * aName,void * aStackTop)4290 static ProfilingStack* locked_register_thread(PSLockRef aLock,
4291                                               const char* aName,
4292                                               void* aStackTop) {
4293   MOZ_RELEASE_ASSERT(CorePS::Exists());
4294 
4295   VTUNE_REGISTER_THREAD(aName);
4296 
4297   if (!TLSRegisteredThread::IsTLSInited()) {
4298     return nullptr;
4299   }
4300 
4301   RefPtr<ThreadInfo> info =
4302       new ThreadInfo(aName, profiler_current_thread_id(), NS_IsMainThread());
4303   UniquePtr<RegisteredThread> registeredThread = MakeUnique<RegisteredThread>(
4304       info, NS_GetCurrentThreadNoCreate(), aStackTop);
4305 
4306   TLSRegisteredThread::SetRegisteredThreadAndAutoProfilerLabelProfilingStack(
4307       aLock, registeredThread.get());
4308 
4309   if (ActivePS::Exists(aLock) && ActivePS::ShouldProfileThread(aLock, info)) {
4310     registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
4311     nsCOMPtr<nsIEventTarget> eventTarget = registeredThread->GetEventTarget();
4312     ProfiledThreadData* profiledThreadData = ActivePS::AddLiveProfiledThread(
4313         aLock, registeredThread.get(),
4314         MakeUnique<ProfiledThreadData>(info, eventTarget));
4315 
4316     if (ActivePS::FeatureJS(aLock)) {
4317       // This StartJSSampling() call is on-thread, so we can poll manually to
4318       // start JS sampling immediately.
4319       registeredThread->StartJSSampling(ActivePS::JSFlags(aLock));
4320       registeredThread->PollJSSampling();
4321       if (registeredThread->GetJSContext()) {
4322         profiledThreadData->NotifyReceivedJSContext(
4323             ActivePS::Buffer(aLock).BufferRangeEnd());
4324       }
4325     }
4326   }
4327 
4328   MOZ_RELEASE_ASSERT(TLSRegisteredThread::RegisteredThread(aLock),
4329                      "TLS should be set when registering thread");
4330   MOZ_RELEASE_ASSERT(
4331       registeredThread == TLSRegisteredThread::RegisteredThread(aLock),
4332       "TLS should be set as expected when registering thread");
4333 
4334   ProfilingStack* profilingStack =
4335       &registeredThread->RacyRegisteredThread().ProfilingStack();
4336 
4337   CorePS::AppendRegisteredThread(aLock, std::move(registeredThread));
4338 
4339   return profilingStack;
4340 }
4341 
NotifyObservers(const char * aTopic,nsISupports * aSubject=nullptr)4342 static void NotifyObservers(const char* aTopic,
4343                             nsISupports* aSubject = nullptr) {
4344   if (!NS_IsMainThread()) {
4345     // Dispatch a task to the main thread that notifies observers.
4346     // If NotifyObservers is called both on and off the main thread within a
4347     // short time, the order of the notifications can be different from the
4348     // order of the calls to NotifyObservers.
4349     // Getting the order 100% right isn't that important at the moment, because
4350     // these notifications are only observed in the parent process, where the
4351     // profiler_* functions are currently only called on the main thread.
4352     nsCOMPtr<nsISupports> subject = aSubject;
4353     NS_DispatchToMainThread(NS_NewRunnableFunction(
4354         "NotifyObservers", [=] { NotifyObservers(aTopic, subject); }));
4355     return;
4356   }
4357 
4358   if (nsCOMPtr<nsIObserverService> os = services::GetObserverService()) {
4359     os->NotifyObservers(aSubject, aTopic, nullptr);
4360   }
4361 }
4362 
NotifyProfilerStarted(const PowerOfTwo32 & aCapacity,const Maybe<double> & aDuration,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,uint64_t aActiveTabID)4363 static void NotifyProfilerStarted(const PowerOfTwo32& aCapacity,
4364                                   const Maybe<double>& aDuration,
4365                                   double aInterval, uint32_t aFeatures,
4366                                   const char** aFilters, uint32_t aFilterCount,
4367                                   uint64_t aActiveTabID) {
4368   nsTArray<nsCString> filtersArray;
4369   for (size_t i = 0; i < aFilterCount; ++i) {
4370     filtersArray.AppendElement(aFilters[i]);
4371   }
4372 
4373   nsCOMPtr<nsIProfilerStartParams> params = new nsProfilerStartParams(
4374       aCapacity.Value(), aDuration, aInterval, aFeatures,
4375       std::move(filtersArray), aActiveTabID);
4376 
4377   ProfilerParent::ProfilerStarted(params);
4378   NotifyObservers("profiler-started", params);
4379 }
4380 
4381 static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
4382                                   double aInterval, uint32_t aFeatures,
4383                                   const char** aFilters, uint32_t aFilterCount,
4384                                   uint64_t aActiveTabID,
4385                                   const Maybe<double>& aDuration);
4386 
4387 // This basically duplicates AutoProfilerLabel's constructor.
MozGlueLabelEnter(const char * aLabel,const char * aDynamicString,void * aSp)4388 static void* MozGlueLabelEnter(const char* aLabel, const char* aDynamicString,
4389                                void* aSp) {
4390   ProfilingStackOwner* profilingStackOwner =
4391       AutoProfilerLabel::ProfilingStackOwnerTLS::Get();
4392   if (profilingStackOwner) {
4393     profilingStackOwner->ProfilingStack().pushLabelFrame(
4394         aLabel, aDynamicString, aSp, JS::ProfilingCategoryPair::OTHER);
4395   }
4396   return profilingStackOwner;
4397 }
4398 
4399 // This basically duplicates AutoProfilerLabel's destructor.
MozGlueLabelExit(void * aProfilingStackOwner)4400 static void MozGlueLabelExit(void* aProfilingStackOwner) {
4401   if (aProfilingStackOwner) {
4402     reinterpret_cast<ProfilingStackOwner*>(aProfilingStackOwner)
4403         ->ProfilingStack()
4404         .pop();
4405   }
4406 }
4407 
SplitAtCommas(const char * aString,UniquePtr<char[]> & aStorage)4408 static Vector<const char*> SplitAtCommas(const char* aString,
4409                                          UniquePtr<char[]>& aStorage) {
4410   size_t len = strlen(aString);
4411   aStorage = MakeUnique<char[]>(len + 1);
4412   PodCopy(aStorage.get(), aString, len + 1);
4413 
4414   // Iterate over all characters in aStorage and split at commas, by
4415   // overwriting commas with the null char.
4416   Vector<const char*> array;
4417   size_t currentElementStart = 0;
4418   for (size_t i = 0; i <= len; i++) {
4419     if (aStorage[i] == ',') {
4420       aStorage[i] = '\0';
4421     }
4422     if (aStorage[i] == '\0') {
4423       MOZ_RELEASE_ASSERT(array.append(&aStorage[currentElementStart]));
4424       currentElementStart = i + 1;
4425     }
4426   }
4427   return array;
4428 }
4429 
profiler_init_threadmanager()4430 void profiler_init_threadmanager() {
4431   LOG("profiler_init_threadmanager");
4432 
4433   PSAutoLock lock(gPSMutex);
4434   RegisteredThread* registeredThread =
4435       TLSRegisteredThread::RegisteredThread(lock);
4436   if (registeredThread && !registeredThread->GetEventTarget()) {
4437     registeredThread->ResetMainThread(NS_GetCurrentThreadNoCreate());
4438   }
4439 }
4440 
profiler_init(void * aStackTop)4441 void profiler_init(void* aStackTop) {
4442   LOG("profiler_init");
4443 
4444   scProfilerMainThreadId = profiler_current_thread_id();
4445 
4446   VTUNE_INIT();
4447 
4448   MOZ_RELEASE_ASSERT(!CorePS::Exists());
4449 
4450   if (getenv("MOZ_PROFILER_HELP")) {
4451     PrintUsageThenExit(1);  // terminates execution
4452   }
4453 
4454   // This must be before any TLS access (e.g.: Thread registration, labels...).
4455   TLSRegisteredThread::Init();
4456 
4457   SharedLibraryInfo::Initialize();
4458 
4459   uint32_t features = DefaultFeatures() & AvailableFeatures();
4460 
4461   UniquePtr<char[]> filterStorage;
4462 
4463   Vector<const char*> filters;
4464   MOZ_RELEASE_ASSERT(filters.append("GeckoMain"));
4465   MOZ_RELEASE_ASSERT(filters.append("Compositor"));
4466   MOZ_RELEASE_ASSERT(filters.append("Renderer"));
4467   MOZ_RELEASE_ASSERT(filters.append("DOM Worker"));
4468 
4469   PowerOfTwo32 capacity = PROFILER_DEFAULT_ENTRIES;
4470   Maybe<double> duration = Nothing();
4471   double interval = PROFILER_DEFAULT_INTERVAL;
4472   uint64_t activeTabID = PROFILER_DEFAULT_ACTIVE_TAB_ID;
4473 
4474   {
4475     PSAutoLock lock(gPSMutex);
4476 
4477     // We've passed the possible failure point. Instantiate CorePS, which
4478     // indicates that the profiler has initialized successfully.
4479     CorePS::Create(lock);
4480 
4481     // profiler_init implicitly registers this thread as main thread.
4482     Unused << locked_register_thread(lock, kMainThreadName, aStackTop);
4483 
4484     // Platform-specific initialization.
4485     PlatformInit(lock);
4486 
4487 #if defined(GP_OS_android)
4488     if (jni::IsAvailable()) {
4489       GeckoJavaSampler::Init();
4490     }
4491 #endif
4492 
4493     // (Linux-only) We could create CorePS::mLul and read unwind info into it
4494     // at this point. That would match the lifetime implied by destruction of
4495     // it in profiler_shutdown() just below. However, that gives a big delay on
4496     // startup, even if no profiling is actually to be done. So, instead, it is
4497     // created on demand at the first call to PlatformStart().
4498 
4499     const char* startupEnv = getenv("MOZ_PROFILER_STARTUP");
4500     if (!startupEnv || startupEnv[0] == '\0' ||
4501         ((startupEnv[0] == '0' || startupEnv[0] == 'N' ||
4502           startupEnv[0] == 'n') &&
4503          startupEnv[1] == '\0')) {
4504       return;
4505     }
4506 
4507     LOG("- MOZ_PROFILER_STARTUP is set");
4508 
4509     // Startup default capacity may be different.
4510     capacity = PROFILER_DEFAULT_STARTUP_ENTRIES;
4511 
4512     const char* startupCapacity = getenv("MOZ_PROFILER_STARTUP_ENTRIES");
4513     if (startupCapacity && startupCapacity[0] != '\0') {
4514       errno = 0;
4515       long capacityLong = strtol(startupCapacity, nullptr, 10);
4516       // `long` could be 32 or 64 bits, so we force a 64-bit comparison with
4517       // the maximum 32-bit signed number (as more than that is clamped down to
4518       // 2^31 anyway).
4519       if (errno == 0 && capacityLong > 0 &&
4520           static_cast<uint64_t>(capacityLong) <=
4521               static_cast<uint64_t>(INT32_MAX)) {
4522         capacity = PowerOfTwo32(ActivePS::ClampToAllowedEntries(
4523             static_cast<uint32_t>(capacityLong)));
4524         LOG("- MOZ_PROFILER_STARTUP_ENTRIES = %u", unsigned(capacity.Value()));
4525       } else {
4526         LOG("- MOZ_PROFILER_STARTUP_ENTRIES not a valid integer: %s",
4527             startupCapacity);
4528         PrintUsageThenExit(1);
4529       }
4530     }
4531 
4532     const char* startupDuration = getenv("MOZ_PROFILER_STARTUP_DURATION");
4533     if (startupDuration && startupDuration[0] != '\0') {
4534       errno = 0;
4535       double durationVal = PR_strtod(startupDuration, nullptr);
4536       if (errno == 0 && durationVal >= 0.0) {
4537         if (durationVal > 0.0) {
4538           duration = Some(durationVal);
4539         }
4540         LOG("- MOZ_PROFILER_STARTUP_DURATION = %f", durationVal);
4541       } else {
4542         LOG("- MOZ_PROFILER_STARTUP_DURATION not a valid float: %s",
4543             startupDuration);
4544         PrintUsageThenExit(1);
4545       }
4546     }
4547 
4548     const char* startupInterval = getenv("MOZ_PROFILER_STARTUP_INTERVAL");
4549     if (startupInterval && startupInterval[0] != '\0') {
4550       errno = 0;
4551       interval = PR_strtod(startupInterval, nullptr);
4552       if (errno == 0 && interval > 0.0 && interval <= PROFILER_MAX_INTERVAL) {
4553         LOG("- MOZ_PROFILER_STARTUP_INTERVAL = %f", interval);
4554       } else {
4555         LOG("- MOZ_PROFILER_STARTUP_INTERVAL not a valid float: %s",
4556             startupInterval);
4557         PrintUsageThenExit(1);
4558       }
4559     }
4560 
4561     features |= StartupExtraDefaultFeatures() & AvailableFeatures();
4562 
4563     const char* startupFeaturesBitfield =
4564         getenv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD");
4565     if (startupFeaturesBitfield && startupFeaturesBitfield[0] != '\0') {
4566       errno = 0;
4567       features = strtol(startupFeaturesBitfield, nullptr, 10);
4568       if (errno == 0 && features != 0) {
4569         LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD = %d", features);
4570       } else {
4571         LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD not a valid integer: %s",
4572             startupFeaturesBitfield);
4573         PrintUsageThenExit(1);
4574       }
4575     } else {
4576       const char* startupFeatures = getenv("MOZ_PROFILER_STARTUP_FEATURES");
4577       if (startupFeatures && startupFeatures[0] != '\0') {
4578         // Interpret startupFeatures as a list of feature strings, separated by
4579         // commas.
4580         UniquePtr<char[]> featureStringStorage;
4581         Vector<const char*> featureStringArray =
4582             SplitAtCommas(startupFeatures, featureStringStorage);
4583         features = ParseFeaturesFromStringArray(featureStringArray.begin(),
4584                                                 featureStringArray.length(),
4585                                                 /* aIsStartup */ true);
4586         LOG("- MOZ_PROFILER_STARTUP_FEATURES = %d", features);
4587       }
4588     }
4589 
4590     const char* startupFilters = getenv("MOZ_PROFILER_STARTUP_FILTERS");
4591     if (startupFilters && startupFilters[0] != '\0') {
4592       filters = SplitAtCommas(startupFilters, filterStorage);
4593       LOG("- MOZ_PROFILER_STARTUP_FILTERS = %s", startupFilters);
4594     }
4595 
4596     const char* startupActiveTabID =
4597         getenv("MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID");
4598     if (startupActiveTabID && startupActiveTabID[0] != '\0') {
4599       std::istringstream iss(startupActiveTabID);
4600       iss >> activeTabID;
4601       if (!iss.fail()) {
4602         LOG("- MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID = %" PRIu64, activeTabID);
4603       } else {
4604         LOG("- MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID not a valid "
4605             "uint64_t: %s",
4606             startupActiveTabID);
4607         PrintUsageThenExit(1);
4608       }
4609     }
4610 
4611     locked_profiler_start(lock, capacity, interval, features, filters.begin(),
4612                           filters.length(), activeTabID, duration);
4613   }
4614 
4615 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
4616   // Start counting memory allocations (outside of lock because this may call
4617   // profiler_add_sampled_counter which would attempt to take the lock.)
4618   ActivePS::SetMemoryCounter(mozilla::profiler::install_memory_hooks());
4619 #endif
4620 
4621   invoke_profiler_state_change_callbacks(ProfilingState::Started);
4622 
4623   // We do this with gPSMutex unlocked. The comment in profiler_stop() explains
4624   // why.
4625   NotifyProfilerStarted(capacity, duration, interval, features, filters.begin(),
4626                         filters.length(), 0);
4627 }
4628 
4629 static void locked_profiler_save_profile_to_file(
4630     PSLockRef aLock, const char* aFilename,
4631     const PreRecordedMetaInformation& aPreRecordedMetaInformation,
4632     bool aIsShuttingDown);
4633 
4634 static SamplerThread* locked_profiler_stop(PSLockRef aLock);
4635 
profiler_shutdown(IsFastShutdown aIsFastShutdown)4636 void profiler_shutdown(IsFastShutdown aIsFastShutdown) {
4637   LOG("profiler_shutdown");
4638 
4639   VTUNE_SHUTDOWN();
4640 
4641   MOZ_RELEASE_ASSERT(NS_IsMainThread());
4642   MOZ_RELEASE_ASSERT(CorePS::Exists());
4643 
4644   if (profiler_is_active()) {
4645     invoke_profiler_state_change_callbacks(ProfilingState::Stopping);
4646   }
4647   invoke_profiler_state_change_callbacks(ProfilingState::ShuttingDown);
4648 
4649   const auto preRecordedMetaInformation = PreRecordMetaInformation();
4650 
4651   ProfilerParent::ProfilerWillStopIfStarted();
4652 
4653   // If the profiler is active we must get a handle to the SamplerThread before
4654   // ActivePS is destroyed, in order to delete it.
4655   SamplerThread* samplerThread = nullptr;
4656   {
4657     PSAutoLock lock(gPSMutex);
4658 
4659     // Save the profile on shutdown if requested.
4660     if (ActivePS::Exists(lock)) {
4661       const char* filename = getenv("MOZ_PROFILER_SHUTDOWN");
4662       if (filename) {
4663         locked_profiler_save_profile_to_file(lock, filename,
4664                                              preRecordedMetaInformation,
4665                                              /* aIsShuttingDown */ true);
4666       }
4667       if (aIsFastShutdown == IsFastShutdown::Yes) {
4668         return;
4669       }
4670 
4671       samplerThread = locked_profiler_stop(lock);
4672     } else if (aIsFastShutdown == IsFastShutdown::Yes) {
4673       return;
4674     }
4675 
4676     CorePS::Destroy(lock);
4677 
4678     // We just destroyed CorePS and the ThreadInfos it contains, so we can
4679     // clear this thread's TLSRegisteredThread.
4680     TLSRegisteredThread::ResetRegisteredThread(lock);
4681     // We can also clear the AutoProfilerLabel's ProfilingStack because the
4682     // main thread should not use labels after profiler_shutdown.
4683     TLSRegisteredThread::ResetAutoProfilerLabelProfilingStack(lock);
4684   }
4685 
4686   // We do these operations with gPSMutex unlocked. The comments in
4687   // profiler_stop() explain why.
4688   if (samplerThread) {
4689     ProfilerParent::ProfilerStopped();
4690     NotifyObservers("profiler-stopped");
4691     delete samplerThread;
4692   }
4693 }
4694 
WriteProfileToJSONWriter(SpliceableChunkedJSONWriter & aWriter,double aSinceTime,bool aIsShuttingDown,ProfilerCodeAddressService * aService)4695 static bool WriteProfileToJSONWriter(SpliceableChunkedJSONWriter& aWriter,
4696                                      double aSinceTime, bool aIsShuttingDown,
4697                                      ProfilerCodeAddressService* aService) {
4698   LOG("WriteProfileToJSONWriter");
4699 
4700   MOZ_RELEASE_ASSERT(CorePS::Exists());
4701 
4702   aWriter.Start();
4703   {
4704     if (!profiler_stream_json_for_this_process(aWriter, aSinceTime,
4705                                                aIsShuttingDown, aService)) {
4706       return false;
4707     }
4708 
4709     // Don't include profiles from other processes because this is a
4710     // synchronous function.
4711     aWriter.StartArrayProperty("processes");
4712     aWriter.EndArray();
4713   }
4714   aWriter.End();
4715   return true;
4716 }
4717 
profiler_set_process_name(const nsACString & aProcessName,const nsACString * aETLDplus1)4718 void profiler_set_process_name(const nsACString& aProcessName,
4719                                const nsACString* aETLDplus1) {
4720   LOG("profiler_set_process_name(\"%s\", \"%s\")", aProcessName.Data(),
4721       aETLDplus1 ? aETLDplus1->Data() : "<none>");
4722   PSAutoLock lock(gPSMutex);
4723   CorePS::SetProcessName(lock, aProcessName);
4724   if (aETLDplus1) {
4725     CorePS::SetETLDplus1(lock, *aETLDplus1);
4726   }
4727 }
4728 
profiler_get_profile(double aSinceTime,bool aIsShuttingDown)4729 UniquePtr<char[]> profiler_get_profile(double aSinceTime,
4730                                        bool aIsShuttingDown) {
4731   LOG("profiler_get_profile");
4732 
4733   UniquePtr<ProfilerCodeAddressService> service =
4734       profiler_code_address_service_for_presymbolication();
4735 
4736   SpliceableChunkedJSONWriter b;
4737   if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown,
4738                                 service.get())) {
4739     return nullptr;
4740   }
4741   return b.ChunkedWriteFunc().CopyData();
4742 }
4743 
profiler_get_profile_json_into_lazily_allocated_buffer(const std::function<char * (size_t)> & aAllocator,double aSinceTime,bool aIsShuttingDown)4744 void profiler_get_profile_json_into_lazily_allocated_buffer(
4745     const std::function<char*(size_t)>& aAllocator, double aSinceTime,
4746     bool aIsShuttingDown) {
4747   LOG("profiler_get_profile_json_into_lazily_allocated_buffer");
4748 
4749   UniquePtr<ProfilerCodeAddressService> service =
4750       profiler_code_address_service_for_presymbolication();
4751 
4752   SpliceableChunkedJSONWriter b;
4753   if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown,
4754                                 service.get())) {
4755     return;
4756   }
4757 
4758   b.ChunkedWriteFunc().CopyDataIntoLazilyAllocatedBuffer(aAllocator);
4759 }
4760 
profiler_get_start_params(int * aCapacity,Maybe<double> * aDuration,double * aInterval,uint32_t * aFeatures,Vector<const char * > * aFilters,uint64_t * aActiveTabID)4761 void profiler_get_start_params(int* aCapacity, Maybe<double>* aDuration,
4762                                double* aInterval, uint32_t* aFeatures,
4763                                Vector<const char*>* aFilters,
4764                                uint64_t* aActiveTabID) {
4765   MOZ_RELEASE_ASSERT(CorePS::Exists());
4766 
4767   if (NS_WARN_IF(!aCapacity) || NS_WARN_IF(!aDuration) ||
4768       NS_WARN_IF(!aInterval) || NS_WARN_IF(!aFeatures) ||
4769       NS_WARN_IF(!aFilters)) {
4770     return;
4771   }
4772 
4773   PSAutoLock lock(gPSMutex);
4774 
4775   if (!ActivePS::Exists(lock)) {
4776     *aCapacity = 0;
4777     *aDuration = Nothing();
4778     *aInterval = 0;
4779     *aFeatures = 0;
4780     *aActiveTabID = 0;
4781     aFilters->clear();
4782     return;
4783   }
4784 
4785   *aCapacity = ActivePS::Capacity(lock).Value();
4786   *aDuration = ActivePS::Duration(lock);
4787   *aInterval = ActivePS::Interval(lock);
4788   *aFeatures = ActivePS::Features(lock);
4789   *aActiveTabID = ActivePS::ActiveTabID(lock);
4790 
4791   const Vector<std::string>& filters = ActivePS::Filters(lock);
4792   MOZ_ALWAYS_TRUE(aFilters->resize(filters.length()));
4793   for (uint32_t i = 0; i < filters.length(); ++i) {
4794     (*aFilters)[i] = filters[i].c_str();
4795   }
4796 }
4797 
profiler_get_controlled_chunk_manager()4798 ProfileBufferControlledChunkManager* profiler_get_controlled_chunk_manager() {
4799   MOZ_RELEASE_ASSERT(CorePS::Exists());
4800   PSAutoLock lock(gPSMutex);
4801   if (NS_WARN_IF(!ActivePS::Exists(lock))) {
4802     return nullptr;
4803   }
4804   return &ActivePS::ControlledChunkManager(lock);
4805 }
4806 
4807 namespace mozilla {
4808 
GetProfilerEnvVarsForChildProcess(std::function<void (const char * key,const char * value)> && aSetEnv)4809 void GetProfilerEnvVarsForChildProcess(
4810     std::function<void(const char* key, const char* value)>&& aSetEnv) {
4811   MOZ_RELEASE_ASSERT(CorePS::Exists());
4812 
4813   PSAutoLock lock(gPSMutex);
4814 
4815   if (!ActivePS::Exists(lock)) {
4816     aSetEnv("MOZ_PROFILER_STARTUP", "");
4817     return;
4818   }
4819 
4820   aSetEnv("MOZ_PROFILER_STARTUP", "1");
4821 
4822   // Hidden option to stop Base Profiler, mostly due to Talos intermittents,
4823   // see https://bugzilla.mozilla.org/show_bug.cgi?id=1638851#c3
4824   // TODO: Investigate root cause and remove this in bugs 1648324 and 1648325.
4825   if (getenv("MOZ_PROFILER_STARTUP_NO_BASE")) {
4826     aSetEnv("MOZ_PROFILER_STARTUP_NO_BASE", "1");
4827   }
4828 
4829   auto capacityString =
4830       Smprintf("%u", unsigned(ActivePS::Capacity(lock).Value()));
4831   aSetEnv("MOZ_PROFILER_STARTUP_ENTRIES", capacityString.get());
4832 
4833   // Use AppendFloat instead of Smprintf with %f because the decimal
4834   // separator used by %f is locale-dependent. But the string we produce needs
4835   // to be parseable by strtod, which only accepts the period character as a
4836   // decimal separator. AppendFloat always uses the period character.
4837   nsCString intervalString;
4838   intervalString.AppendFloat(ActivePS::Interval(lock));
4839   aSetEnv("MOZ_PROFILER_STARTUP_INTERVAL", intervalString.get());
4840 
4841   auto featuresString = Smprintf("%d", ActivePS::Features(lock));
4842   aSetEnv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD", featuresString.get());
4843 
4844   std::string filtersString;
4845   const Vector<std::string>& filters = ActivePS::Filters(lock);
4846   for (uint32_t i = 0; i < filters.length(); ++i) {
4847     if (i != 0) {
4848       filtersString += ",";
4849     }
4850     filtersString += filters[i];
4851   }
4852   aSetEnv("MOZ_PROFILER_STARTUP_FILTERS", filtersString.c_str());
4853 
4854   auto activeTabIDString = Smprintf("%" PRIu64, ActivePS::ActiveTabID(lock));
4855   aSetEnv("MOZ_PROFILER_STARTUP_ACTIVE_TAB_ID", activeTabIDString.get());
4856 }
4857 
4858 }  // namespace mozilla
4859 
profiler_received_exit_profile(const nsCString & aExitProfile)4860 void profiler_received_exit_profile(const nsCString& aExitProfile) {
4861   MOZ_RELEASE_ASSERT(CorePS::Exists());
4862   PSAutoLock lock(gPSMutex);
4863   if (!ActivePS::Exists(lock)) {
4864     return;
4865   }
4866   ActivePS::AddExitProfile(lock, aExitProfile);
4867 }
4868 
profiler_move_exit_profiles()4869 Vector<nsCString> profiler_move_exit_profiles() {
4870   MOZ_RELEASE_ASSERT(CorePS::Exists());
4871   PSAutoLock lock(gPSMutex);
4872   Vector<nsCString> profiles;
4873   if (ActivePS::Exists(lock)) {
4874     profiles = ActivePS::MoveExitProfiles(lock);
4875   }
4876   return profiles;
4877 }
4878 
locked_profiler_save_profile_to_file(PSLockRef aLock,const char * aFilename,const PreRecordedMetaInformation & aPreRecordedMetaInformation,bool aIsShuttingDown=false)4879 static void locked_profiler_save_profile_to_file(
4880     PSLockRef aLock, const char* aFilename,
4881     const PreRecordedMetaInformation& aPreRecordedMetaInformation,
4882     bool aIsShuttingDown = false) {
4883   LOG("locked_profiler_save_profile_to_file(%s)", aFilename);
4884 
4885   MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
4886 
4887   std::ofstream stream;
4888   stream.open(aFilename);
4889   if (stream.is_open()) {
4890     SpliceableJSONWriter w(MakeUnique<OStreamJSONWriteFunc>(stream));
4891     w.Start();
4892     {
4893       locked_profiler_stream_json_for_this_process(aLock, w, /* sinceTime */ 0,
4894                                                    aPreRecordedMetaInformation,
4895                                                    aIsShuttingDown, nullptr);
4896 
4897       w.StartArrayProperty("processes");
4898       Vector<nsCString> exitProfiles = ActivePS::MoveExitProfiles(aLock);
4899       for (auto& exitProfile : exitProfiles) {
4900         if (!exitProfile.IsEmpty()) {
4901           w.Splice(exitProfile);
4902         }
4903       }
4904       w.EndArray();
4905     }
4906     w.End();
4907 
4908     stream.close();
4909   }
4910 }
4911 
profiler_save_profile_to_file(const char * aFilename)4912 void profiler_save_profile_to_file(const char* aFilename) {
4913   LOG("profiler_save_profile_to_file(%s)", aFilename);
4914 
4915   MOZ_RELEASE_ASSERT(CorePS::Exists());
4916 
4917   const auto preRecordedMetaInformation = PreRecordMetaInformation();
4918 
4919   PSAutoLock lock(gPSMutex);
4920 
4921   if (!ActivePS::Exists(lock)) {
4922     return;
4923   }
4924 
4925   locked_profiler_save_profile_to_file(lock, aFilename,
4926                                        preRecordedMetaInformation);
4927 }
4928 
profiler_get_available_features()4929 uint32_t profiler_get_available_features() {
4930   MOZ_RELEASE_ASSERT(CorePS::Exists());
4931   return AvailableFeatures();
4932 }
4933 
profiler_get_buffer_info()4934 Maybe<ProfilerBufferInfo> profiler_get_buffer_info() {
4935   MOZ_RELEASE_ASSERT(CorePS::Exists());
4936 
4937   PSAutoLock lock(gPSMutex);
4938 
4939   if (!ActivePS::Exists(lock)) {
4940     return Nothing();
4941   }
4942 
4943   return Some(ActivePS::Buffer(lock).GetProfilerBufferInfo());
4944 }
4945 
PollJSSamplingForCurrentThread()4946 static void PollJSSamplingForCurrentThread() {
4947   MOZ_RELEASE_ASSERT(CorePS::Exists());
4948 
4949   PSAutoLock lock(gPSMutex);
4950 
4951   RegisteredThread* registeredThread =
4952       TLSRegisteredThread::RegisteredThread(lock);
4953   if (!registeredThread) {
4954     return;
4955   }
4956 
4957   registeredThread->PollJSSampling();
4958 }
4959 
4960 // When the profiler is started on a background thread, we can't synchronously
4961 // call PollJSSampling on the main thread's ThreadInfo. And the next regular
4962 // call to PollJSSampling on the main thread would only happen once the main
4963 // thread triggers a JS interrupt callback.
4964 // This means that all the JS execution between profiler_start() and the first
4965 // JS interrupt would happen with JS sampling disabled, and we wouldn't get any
4966 // JS function information for that period of time.
4967 // So in order to start JS sampling as soon as possible, we dispatch a runnable
4968 // to the main thread which manually calls PollJSSamplingForCurrentThread().
4969 // In some cases this runnable will lose the race with the next JS interrupt.
4970 // That's fine; PollJSSamplingForCurrentThread() is immune to redundant calls.
TriggerPollJSSamplingOnMainThread()4971 static void TriggerPollJSSamplingOnMainThread() {
4972   nsCOMPtr<nsIThread> mainThread;
4973   nsresult rv = NS_GetMainThread(getter_AddRefs(mainThread));
4974   if (NS_SUCCEEDED(rv) && mainThread) {
4975     nsCOMPtr<nsIRunnable> task =
4976         NS_NewRunnableFunction("TriggerPollJSSamplingOnMainThread",
4977                                []() { PollJSSamplingForCurrentThread(); });
4978     SchedulerGroup::Dispatch(TaskCategory::Other, task.forget());
4979   }
4980 }
4981 
HasMinimumLength(const char * aString,size_t aMinimumLength)4982 static bool HasMinimumLength(const char* aString, size_t aMinimumLength) {
4983   if (!aString) {
4984     return false;
4985   }
4986   for (size_t i = 0; i < aMinimumLength; ++i) {
4987     if (aString[i] == '\0') {
4988       return false;
4989     }
4990   }
4991   return true;
4992 }
4993 
locked_profiler_start(PSLockRef aLock,PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,uint64_t aActiveTabID,const Maybe<double> & aDuration)4994 static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
4995                                   double aInterval, uint32_t aFeatures,
4996                                   const char** aFilters, uint32_t aFilterCount,
4997                                   uint64_t aActiveTabID,
4998                                   const Maybe<double>& aDuration) {
4999   if (LOG_TEST) {
5000     LOG("locked_profiler_start");
5001     LOG("- capacity  = %u", unsigned(aCapacity.Value()));
5002     LOG("- duration  = %.2f", aDuration ? *aDuration : -1);
5003     LOG("- interval = %.2f", aInterval);
5004     LOG("- tab ID = %" PRIu64, aActiveTabID);
5005 
5006 #define LOG_FEATURE(n_, str_, Name_, desc_)     \
5007   if (ProfilerFeature::Has##Name_(aFeatures)) { \
5008     LOG("- feature  = %s", str_);               \
5009   }
5010 
5011     PROFILER_FOR_EACH_FEATURE(LOG_FEATURE)
5012 
5013 #undef LOG_FEATURE
5014 
5015     for (uint32_t i = 0; i < aFilterCount; i++) {
5016       LOG("- threads  = %s", aFilters[i]);
5017     }
5018   }
5019 
5020   MOZ_RELEASE_ASSERT(CorePS::Exists() && !ActivePS::Exists(aLock));
5021 
5022   UniquePtr<char[]> baseprofile;
5023   if (baseprofiler::profiler_is_active()) {
5024     // Note that we still hold the lock, so the sampler cannot run yet and
5025     // interact negatively with the still-active BaseProfiler sampler.
5026     // Assume that Base Profiler is active because of MOZ_PROFILER_STARTUP.
5027     // Capture the Base Profiler startup profile threads (if any).
5028     baseprofile = baseprofiler::profiler_get_profile(
5029         /* aSinceTime */ 0, /* aIsShuttingDown */ false,
5030         /* aOnlyThreads */ true);
5031 
5032     // Now stop Base Profiler (BP), as further recording will be ignored anyway,
5033     // and so that it won't clash with Gecko Profiler (GP) sampling starting
5034     // after the lock is dropped.
5035     // On Linux this is especially important to do before creating the GP
5036     // sampler, because the BP sampler may send a signal (to stop threads to be
5037     // sampled), which the GP would intercept before its own initialization is
5038     // complete and ready to handle such signals.
5039     // Note that even though `profiler_stop()` doesn't immediately destroy and
5040     // join the sampler thread, it safely deactivates it in such a way that the
5041     // thread will soon exit without doing any actual work.
5042     // TODO: Allow non-sampling profiling to continue.
5043     // TODO: Re-start BP after GP shutdown, to capture post-XPCOM shutdown.
5044     baseprofiler::profiler_stop();
5045   }
5046 
5047 #if defined(GP_PLAT_amd64_windows)
5048   InitializeWin64ProfilerHooks();
5049 #endif
5050 
5051   // Fall back to the default values if the passed-in values are unreasonable.
5052   // We want to be able to store at least one full stack.
5053   PowerOfTwo32 capacity =
5054       (aCapacity.Value() >=
5055        ProfileBufferChunkManager::scExpectedMaximumStackSize / scBytesPerEntry)
5056           ? aCapacity
5057           : PROFILER_DEFAULT_ENTRIES;
5058   Maybe<double> duration = aDuration;
5059 
5060   if (aDuration && *aDuration <= 0) {
5061     duration = Nothing();
5062   }
5063 
5064   double interval = aInterval > 0 ? aInterval : PROFILER_DEFAULT_INTERVAL;
5065 
5066   ActivePS::Create(aLock, capacity, interval, aFeatures, aFilters, aFilterCount,
5067                    aActiveTabID, duration);
5068 
5069   // ActivePS::Create can only succeed or crash.
5070   MOZ_ASSERT(ActivePS::Exists(aLock));
5071 
5072   // An "empty" profile string may in fact contain 1 character (a newline), so
5073   // we want at least 2 characters to register a profile.
5074   if (HasMinimumLength(baseprofile.get(), 2)) {
5075     // The BaseProfiler startup profile will be stored as a separate "process"
5076     // in the Gecko Profiler profile, and shown as a new track under the
5077     // corresponding Gecko Profiler thread.
5078     ActivePS::AddBaseProfileThreads(aLock, std::move(baseprofile));
5079   }
5080 
5081   // Set up profiling for each registered thread, if appropriate.
5082 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
5083   bool isMainThreadBeingProfiled = false;
5084 #endif
5085   int tid = profiler_current_thread_id();
5086   const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
5087       CorePS::RegisteredThreads(aLock);
5088   for (auto& registeredThread : registeredThreads) {
5089     RefPtr<ThreadInfo> info = registeredThread->Info();
5090 
5091     if (ActivePS::ShouldProfileThread(aLock, info)) {
5092       registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
5093       nsCOMPtr<nsIEventTarget> eventTarget = registeredThread->GetEventTarget();
5094       ProfiledThreadData* profiledThreadData = ActivePS::AddLiveProfiledThread(
5095           aLock, registeredThread.get(),
5096           MakeUnique<ProfiledThreadData>(info, eventTarget));
5097       ClearThreadRunningTimes(aLock, *registeredThread);
5098       if (ActivePS::FeatureJS(aLock)) {
5099         registeredThread->StartJSSampling(ActivePS::JSFlags(aLock));
5100         if (info->ThreadId() == tid) {
5101           // We can manually poll the current thread so it starts sampling
5102           // immediately.
5103           registeredThread->PollJSSampling();
5104         } else if (info->IsMainThread()) {
5105           // Dispatch a runnable to the main thread to call PollJSSampling(),
5106           // so that we don't have wait for the next JS interrupt callback in
5107           // order to start profiling JS.
5108           TriggerPollJSSamplingOnMainThread();
5109         }
5110       }
5111 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
5112       if (info->IsMainThread()) {
5113         isMainThreadBeingProfiled = true;
5114       }
5115 #endif
5116       registeredThread->RacyRegisteredThread().ReinitializeOnResume();
5117       if (registeredThread->GetJSContext()) {
5118         profiledThreadData->NotifyReceivedJSContext(0);
5119       }
5120     }
5121   }
5122 
5123   // Setup support for pushing/popping labels in mozglue.
5124   RegisterProfilerLabelEnterExit(MozGlueLabelEnter, MozGlueLabelExit);
5125 
5126 #if defined(GP_OS_android)
5127   if (ActivePS::FeatureJava(aLock)) {
5128     int javaInterval = interval;
5129     // Java sampling doesn't accurately keep up with the sampling rate that is
5130     // lower than 1ms.
5131     if (javaInterval < 1) {
5132       javaInterval = 1;
5133     }
5134     // Send the interval-relative entry count, but we have 100000 hard cap in
5135     // the java code, it can't be more than that.
5136     java::GeckoJavaSampler::Start(
5137         javaInterval, std::round((double)(capacity.Value()) * interval /
5138                                  (double)(javaInterval)));
5139   }
5140 #endif
5141 
5142 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
5143   if (ActivePS::FeatureNativeAllocations(aLock)) {
5144     if (isMainThreadBeingProfiled) {
5145       mozilla::profiler::enable_native_allocations();
5146     } else {
5147       NS_WARNING(
5148           "The nativeallocations feature is turned on, but the main thread is "
5149           "not being profiled. The allocations are only stored on the main "
5150           "thread.");
5151     }
5152   }
5153 #endif
5154 
5155   if (ProfilerFeature::HasAudioCallbackTracing(aFeatures)) {
5156     StartAudioCallbackTracing();
5157   }
5158 
5159   // At the very end, set up RacyFeatures.
5160   RacyFeatures::SetActive(ActivePS::Features(aLock));
5161 }
5162 
profiler_start(PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,uint64_t aActiveTabID,const Maybe<double> & aDuration)5163 void profiler_start(PowerOfTwo32 aCapacity, double aInterval,
5164                     uint32_t aFeatures, const char** aFilters,
5165                     uint32_t aFilterCount, uint64_t aActiveTabID,
5166                     const Maybe<double>& aDuration) {
5167   LOG("profiler_start");
5168 
5169   ProfilerParent::ProfilerWillStopIfStarted();
5170 
5171   SamplerThread* samplerThread = nullptr;
5172   {
5173     PSAutoLock lock(gPSMutex);
5174 
5175     // Initialize if necessary.
5176     if (!CorePS::Exists()) {
5177       profiler_init(nullptr);
5178     }
5179 
5180     // Reset the current state if the profiler is running.
5181     if (ActivePS::Exists(lock)) {
5182       // Note: Not invoking callbacks with ProfilingState::Stopping, because
5183       // we're under lock, and also it would not be useful: Any profiling data
5184       // will be discarded, and we're immediately restarting the profiler below
5185       // and then notifying ProfilingState::Started.
5186       samplerThread = locked_profiler_stop(lock);
5187     }
5188 
5189     locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
5190                           aFilterCount, aActiveTabID, aDuration);
5191   }
5192 
5193 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
5194   // Start counting memory allocations (outside of lock because this may call
5195   // profiler_add_sampled_counter which would attempt to take the lock.)
5196   ActivePS::SetMemoryCounter(mozilla::profiler::install_memory_hooks());
5197 #endif
5198 
5199   invoke_profiler_state_change_callbacks(ProfilingState::Started);
5200 
5201   // We do these operations with gPSMutex unlocked. The comments in
5202   // profiler_stop() explain why.
5203   if (samplerThread) {
5204     ProfilerParent::ProfilerStopped();
5205     NotifyObservers("profiler-stopped");
5206     delete samplerThread;
5207   }
5208   NotifyProfilerStarted(aCapacity, aDuration, aInterval, aFeatures, aFilters,
5209                         aFilterCount, aActiveTabID);
5210 }
5211 
profiler_ensure_started(PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,uint64_t aActiveTabID,const Maybe<double> & aDuration)5212 void profiler_ensure_started(PowerOfTwo32 aCapacity, double aInterval,
5213                              uint32_t aFeatures, const char** aFilters,
5214                              uint32_t aFilterCount, uint64_t aActiveTabID,
5215                              const Maybe<double>& aDuration) {
5216   LOG("profiler_ensure_started");
5217 
5218   ProfilerParent::ProfilerWillStopIfStarted();
5219 
5220   bool startedProfiler = false;
5221   SamplerThread* samplerThread = nullptr;
5222   {
5223     PSAutoLock lock(gPSMutex);
5224 
5225     // Initialize if necessary.
5226     if (!CorePS::Exists()) {
5227       profiler_init(nullptr);
5228     }
5229 
5230     if (ActivePS::Exists(lock)) {
5231       // The profiler is active.
5232       if (!ActivePS::Equals(lock, aCapacity, aDuration, aInterval, aFeatures,
5233                             aFilters, aFilterCount, aActiveTabID)) {
5234         // Stop and restart with different settings.
5235         // Note: Not invoking callbacks with ProfilingState::Stopping, because
5236         // we're under lock, and also it would not be useful: Any profiling data
5237         // will be discarded, and we're immediately restarting the profiler
5238         // below and then notifying ProfilingState::Started.
5239         samplerThread = locked_profiler_stop(lock);
5240         locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
5241                               aFilterCount, aActiveTabID, aDuration);
5242         startedProfiler = true;
5243       }
5244     } else {
5245       // The profiler is stopped.
5246       locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
5247                             aFilterCount, aActiveTabID, aDuration);
5248       startedProfiler = true;
5249     }
5250   }
5251 
5252   // We do these operations with gPSMutex unlocked. The comments in
5253   // profiler_stop() explain why.
5254   if (samplerThread) {
5255     ProfilerParent::ProfilerStopped();
5256     NotifyObservers("profiler-stopped");
5257     delete samplerThread;
5258   }
5259 
5260   if (startedProfiler) {
5261     invoke_profiler_state_change_callbacks(ProfilingState::Started);
5262 
5263     NotifyProfilerStarted(aCapacity, aDuration, aInterval, aFeatures, aFilters,
5264                           aFilterCount, aActiveTabID);
5265   }
5266 }
5267 
locked_profiler_stop(PSLockRef aLock)5268 [[nodiscard]] static SamplerThread* locked_profiler_stop(PSLockRef aLock) {
5269   LOG("locked_profiler_stop");
5270 
5271   MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
5272 
5273   // At the very start, clear RacyFeatures.
5274   RacyFeatures::SetInactive();
5275 
5276   if (ActivePS::FeatureAudioCallbackTracing(aLock)) {
5277     StopAudioCallbackTracing();
5278   }
5279 
5280 #if defined(GP_OS_android)
5281   if (ActivePS::FeatureJava(aLock)) {
5282     java::GeckoJavaSampler::Stop();
5283   }
5284 #endif
5285 
5286   // Remove support for pushing/popping labels in mozglue.
5287   RegisterProfilerLabelEnterExit(nullptr, nullptr);
5288 
5289   // Stop sampling live threads.
5290   int tid = profiler_current_thread_id();
5291   const Vector<LiveProfiledThreadData>& liveProfiledThreads =
5292       ActivePS::LiveProfiledThreads(aLock);
5293   for (auto& thread : liveProfiledThreads) {
5294     RegisteredThread* registeredThread = thread.mRegisteredThread;
5295     registeredThread->RacyRegisteredThread().SetIsBeingProfiled(false);
5296     if (ActivePS::FeatureJS(aLock)) {
5297       registeredThread->StopJSSampling();
5298       RefPtr<ThreadInfo> info = registeredThread->Info();
5299       if (info->ThreadId() == tid) {
5300         // We can manually poll the current thread so it stops profiling
5301         // immediately.
5302         registeredThread->PollJSSampling();
5303       } else if (info->IsMainThread()) {
5304         // Dispatch a runnable to the main thread to call PollJSSampling(),
5305         // so that we don't have wait for the next JS interrupt callback in
5306         // order to start profiling JS.
5307         TriggerPollJSSamplingOnMainThread();
5308       }
5309     }
5310   }
5311 
5312 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
5313   if (ActivePS::FeatureNativeAllocations(aLock)) {
5314     mozilla::profiler::disable_native_allocations();
5315   }
5316 #endif
5317 
5318   // The Stop() call doesn't actually stop Run(); that happens in this
5319   // function's caller when the sampler thread is destroyed. Stop() just gives
5320   // the SamplerThread a chance to do some cleanup with gPSMutex locked.
5321   SamplerThread* samplerThread = ActivePS::Destroy(aLock);
5322   samplerThread->Stop(aLock);
5323 
5324   return samplerThread;
5325 }
5326 
profiler_stop()5327 void profiler_stop() {
5328   LOG("profiler_stop");
5329 
5330   MOZ_RELEASE_ASSERT(CorePS::Exists());
5331 
5332   if (profiler_is_active()) {
5333     invoke_profiler_state_change_callbacks(ProfilingState::Stopping);
5334   }
5335 
5336   ProfilerParent::ProfilerWillStopIfStarted();
5337 
5338 #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
5339   // Remove the hooks early, as native allocations (if they are on) can be
5340   // quite expensive.
5341   mozilla::profiler::remove_memory_hooks();
5342 #endif
5343 
5344   SamplerThread* samplerThread;
5345   {
5346     PSAutoLock lock(gPSMutex);
5347 
5348     if (!ActivePS::Exists(lock)) {
5349       return;
5350     }
5351 
5352     samplerThread = locked_profiler_stop(lock);
5353   }
5354 
5355   // We notify observers with gPSMutex unlocked. Otherwise we might get a
5356   // deadlock, if code run by these functions calls a profiler function that
5357   // locks gPSMutex, for example when it wants to insert a marker.
5358   // (This has been seen in practise in bug 1346356, when we were still firing
5359   // these notifications synchronously.)
5360   ProfilerParent::ProfilerStopped();
5361   NotifyObservers("profiler-stopped");
5362 
5363   // We delete with gPSMutex unlocked. Otherwise we would get a deadlock: we
5364   // would be waiting here with gPSMutex locked for SamplerThread::Run() to
5365   // return so the join operation within the destructor can complete, but Run()
5366   // needs to lock gPSMutex to return.
5367   //
5368   // Because this call occurs with gPSMutex unlocked, it -- including the final
5369   // iteration of Run()'s loop -- must be able detect deactivation and return
5370   // in a way that's safe with respect to other gPSMutex-locking operations
5371   // that may have occurred in the meantime.
5372   delete samplerThread;
5373 }
5374 
profiler_is_paused()5375 bool profiler_is_paused() {
5376   MOZ_RELEASE_ASSERT(CorePS::Exists());
5377 
5378   PSAutoLock lock(gPSMutex);
5379 
5380   if (!ActivePS::Exists(lock)) {
5381     return false;
5382   }
5383 
5384   return ActivePS::IsPaused(lock);
5385 }
5386 
profiler_callback_after_sampling(PostSamplingCallback && aCallback)5387 /* [[nodiscard]] */ bool profiler_callback_after_sampling(
5388     PostSamplingCallback&& aCallback) {
5389   LOG("profiler_callback_after_sampling");
5390 
5391   MOZ_RELEASE_ASSERT(CorePS::Exists());
5392 
5393   PSAutoLock lock(gPSMutex);
5394 
5395   return ActivePS::AppendPostSamplingCallback(lock, std::move(aCallback));
5396 }
5397 
profiler_pause()5398 void profiler_pause() {
5399   LOG("profiler_pause");
5400 
5401   MOZ_RELEASE_ASSERT(CorePS::Exists());
5402 
5403   invoke_profiler_state_change_callbacks(ProfilingState::Pausing);
5404 
5405   {
5406     PSAutoLock lock(gPSMutex);
5407 
5408     if (!ActivePS::Exists(lock)) {
5409       return;
5410     }
5411 
5412 #if defined(GP_OS_android)
5413     if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) {
5414       // Not paused yet, so this is the first pause, let Java know.
5415       // TODO: Distinguish Pause and PauseSampling in Java.
5416       java::GeckoJavaSampler::PauseSampling();
5417     }
5418 #endif
5419 
5420     RacyFeatures::SetPaused();
5421     ActivePS::SetIsPaused(lock, true);
5422     ActivePS::Buffer(lock).AddEntry(ProfileBufferEntry::Pause(profiler_time()));
5423   }
5424 
5425   // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
5426   ProfilerParent::ProfilerPaused();
5427   NotifyObservers("profiler-paused");
5428 }
5429 
profiler_resume()5430 void profiler_resume() {
5431   LOG("profiler_resume");
5432 
5433   MOZ_RELEASE_ASSERT(CorePS::Exists());
5434 
5435   {
5436     PSAutoLock lock(gPSMutex);
5437 
5438     if (!ActivePS::Exists(lock)) {
5439       return;
5440     }
5441 
5442     ActivePS::Buffer(lock).AddEntry(
5443         ProfileBufferEntry::Resume(profiler_time()));
5444     ActivePS::SetIsPaused(lock, false);
5445     RacyFeatures::SetUnpaused();
5446 
5447 #if defined(GP_OS_android)
5448     if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) {
5449       // Not paused anymore, so this is the last unpause, let Java know.
5450       // TODO: Distinguish Unpause and UnpauseSampling in Java.
5451       java::GeckoJavaSampler::UnpauseSampling();
5452     }
5453 #endif
5454   }
5455 
5456   // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
5457   ProfilerParent::ProfilerResumed();
5458   NotifyObservers("profiler-resumed");
5459 
5460   invoke_profiler_state_change_callbacks(ProfilingState::Resumed);
5461 }
5462 
profiler_is_sampling_paused()5463 bool profiler_is_sampling_paused() {
5464   MOZ_RELEASE_ASSERT(CorePS::Exists());
5465 
5466   PSAutoLock lock(gPSMutex);
5467 
5468   if (!ActivePS::Exists(lock)) {
5469     return false;
5470   }
5471 
5472   return ActivePS::IsSamplingPaused(lock);
5473 }
5474 
profiler_pause_sampling()5475 void profiler_pause_sampling() {
5476   LOG("profiler_pause_sampling");
5477 
5478   MOZ_RELEASE_ASSERT(CorePS::Exists());
5479 
5480   {
5481     PSAutoLock lock(gPSMutex);
5482 
5483     if (!ActivePS::Exists(lock)) {
5484       return;
5485     }
5486 
5487 #if defined(GP_OS_android)
5488     if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) {
5489       // Not paused yet, so this is the first pause, let Java know.
5490       // TODO: Distinguish Pause and PauseSampling in Java.
5491       java::GeckoJavaSampler::PauseSampling();
5492     }
5493 #endif
5494 
5495     RacyFeatures::SetSamplingPaused();
5496     ActivePS::SetIsSamplingPaused(lock, true);
5497     ActivePS::Buffer(lock).AddEntry(
5498         ProfileBufferEntry::PauseSampling(profiler_time()));
5499   }
5500 
5501   // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
5502   ProfilerParent::ProfilerPausedSampling();
5503   NotifyObservers("profiler-paused-sampling");
5504 }
5505 
profiler_resume_sampling()5506 void profiler_resume_sampling() {
5507   LOG("profiler_resume_sampling");
5508 
5509   MOZ_RELEASE_ASSERT(CorePS::Exists());
5510 
5511   {
5512     PSAutoLock lock(gPSMutex);
5513 
5514     if (!ActivePS::Exists(lock)) {
5515       return;
5516     }
5517 
5518     ActivePS::Buffer(lock).AddEntry(
5519         ProfileBufferEntry::ResumeSampling(profiler_time()));
5520     ActivePS::SetIsSamplingPaused(lock, false);
5521     RacyFeatures::SetSamplingUnpaused();
5522 
5523 #if defined(GP_OS_android)
5524     if (ActivePS::FeatureJava(lock) && !ActivePS::IsSamplingPaused(lock)) {
5525       // Not paused anymore, so this is the last unpause, let Java know.
5526       // TODO: Distinguish Unpause and UnpauseSampling in Java.
5527       java::GeckoJavaSampler::UnpauseSampling();
5528     }
5529 #endif
5530   }
5531 
5532   // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
5533   ProfilerParent::ProfilerResumedSampling();
5534   NotifyObservers("profiler-resumed-sampling");
5535 }
5536 
profiler_feature_active(uint32_t aFeature)5537 bool profiler_feature_active(uint32_t aFeature) {
5538   // This function runs both on and off the main thread.
5539 
5540   MOZ_RELEASE_ASSERT(CorePS::Exists());
5541 
5542   // This function is hot enough that we use RacyFeatures, not ActivePS.
5543   return RacyFeatures::IsActiveWithFeature(aFeature);
5544 }
5545 
profiler_write_active_configuration(JSONWriter & aWriter)5546 void profiler_write_active_configuration(JSONWriter& aWriter) {
5547   MOZ_RELEASE_ASSERT(CorePS::Exists());
5548   PSAutoLock lock(gPSMutex);
5549   ActivePS::WriteActiveConfiguration(lock, aWriter);
5550 }
5551 
profiler_add_sampled_counter(BaseProfilerCount * aCounter)5552 void profiler_add_sampled_counter(BaseProfilerCount* aCounter) {
5553   DEBUG_LOG("profiler_add_sampled_counter(%s)", aCounter->mLabel);
5554   PSAutoLock lock(gPSMutex);
5555   CorePS::AppendCounter(lock, aCounter);
5556 }
5557 
profiler_remove_sampled_counter(BaseProfilerCount * aCounter)5558 void profiler_remove_sampled_counter(BaseProfilerCount* aCounter) {
5559   DEBUG_LOG("profiler_remove_sampled_counter(%s)", aCounter->mLabel);
5560   PSAutoLock lock(gPSMutex);
5561   // Note: we don't enforce a final sample, though we could do so if the
5562   // profiler was active
5563   CorePS::RemoveCounter(lock, aCounter);
5564 }
5565 
profiler_register_thread(const char * aName,void * aGuessStackTop)5566 ProfilingStack* profiler_register_thread(const char* aName,
5567                                          void* aGuessStackTop) {
5568   DEBUG_LOG("profiler_register_thread(%s)", aName);
5569 
5570   MOZ_RELEASE_ASSERT(CorePS::Exists());
5571 
5572   // Make sure we have a nsThread wrapper for the current thread, and that NSPR
5573   // knows its name.
5574   (void)NS_GetCurrentThread();
5575   NS_SetCurrentThreadName(aName);
5576 
5577   if (!TLSRegisteredThread::IsTLSInited()) {
5578     return nullptr;
5579   }
5580 
5581   PSAutoLock lock(gPSMutex);
5582 
5583   if (RegisteredThread* thread = TLSRegisteredThread::RegisteredThread(lock)) {
5584     MOZ_RELEASE_ASSERT(IsRegisteredThreadInRegisteredThreadsList(lock, thread),
5585                        "Thread being re-registered is not in registered thread "
5586                        "list even though its TLS is non-null");
5587     MOZ_RELEASE_ASSERT(
5588         thread->Info()->ThreadId() == profiler_current_thread_id(),
5589         "Thread being re-registered has changed its TID");
5590     LOG("profiler_register_thread(%s) - thread %d already registered as %s",
5591         aName, profiler_current_thread_id(), thread->Info()->Name());
5592     // TODO: Use new name. This is currently not possible because the
5593     // RegisteredThread's ThreadInfo cannot be changed.
5594     // In the meantime, we record a marker that could be used in the frontend.
5595     nsCString text("Thread ");
5596     text.AppendInt(profiler_current_thread_id());
5597     text.AppendLiteral(" \"");
5598     text.AppendASCII(thread->Info()->Name());
5599     text.AppendLiteral("\" attempted to re-register as \"");
5600     text.AppendASCII(aName);
5601     text.AppendLiteral("\"");
5602     PROFILER_MARKER_TEXT("profiler_register_thread again", OTHER_Profiling,
5603                          MarkerThreadId::MainThread(), text);
5604 
5605     return &thread->RacyRegisteredThread().ProfilingStack();
5606   }
5607 
5608   void* stackTop = GetStackTop(aGuessStackTop);
5609   return locked_register_thread(lock, aName, stackTop);
5610 }
5611 
profiler_unregister_thread()5612 void profiler_unregister_thread() {
5613   PSAutoLock lock(gPSMutex);
5614 
5615   if (!TLSRegisteredThread::IsTLSInited()) {
5616     return;
5617   }
5618 
5619   if (!CorePS::Exists()) {
5620     // This function can be called after the main thread has already shut down.
5621     // We want to reset the AutoProfilerLabel's ProfilingStack pointer (if
5622     // needed), because a thread could stay registered after the profiler has
5623     // shut down.
5624     TLSRegisteredThread::ResetAutoProfilerLabelProfilingStack(lock);
5625     return;
5626   }
5627 
5628   // We don't call RegisteredThread::StopJSSampling() here; there's no point
5629   // doing that for a JS thread that is in the process of disappearing.
5630 
5631   if (RegisteredThread* registeredThread =
5632           TLSRegisteredThread::RegisteredThread(lock)) {
5633     MOZ_RELEASE_ASSERT(
5634         IsRegisteredThreadInRegisteredThreadsList(lock, registeredThread),
5635         "Thread being unregistered is not in registered thread list even "
5636         "though its TLS is non-null");
5637     MOZ_RELEASE_ASSERT(
5638         registeredThread->Info()->ThreadId() == profiler_current_thread_id(),
5639         "Thread being unregistered has changed its TID");
5640     RefPtr<ThreadInfo> info = registeredThread->Info();
5641 
5642     DEBUG_LOG("profiler_unregister_thread: %s", info->Name());
5643 
5644     if (ActivePS::Exists(lock)) {
5645       ActivePS::UnregisterThread(lock, registeredThread);
5646     }
5647 
5648     // Clear the pointer to the RegisteredThread object that we're about to
5649     // destroy, as well as the AutoProfilerLabel's ProfilingStack because the
5650     // thread is unregistering itself and won't need the ProfilingStack anymore.
5651     TLSRegisteredThread::ResetRegisteredThread(lock);
5652     TLSRegisteredThread::ResetAutoProfilerLabelProfilingStack(lock);
5653 
5654     // Remove the thread from the list of registered threads. This deletes the
5655     // registeredThread object.
5656     CorePS::RemoveRegisteredThread(lock, registeredThread);
5657 
5658     MOZ_RELEASE_ASSERT(
5659         !IsRegisteredThreadInRegisteredThreadsList(lock, registeredThread),
5660         "After unregistering, thread should no longer be in the registered "
5661         "thread list");
5662     MOZ_RELEASE_ASSERT(
5663         !TLSRegisteredThread::RegisteredThread(lock),
5664         "TLS should have been reset after un-registering thread");
5665   } else {
5666     // There are two ways TLSRegisteredThread::RegisteredThread() might be
5667     // empty.
5668     //
5669     // - TLSRegisteredThread::Init() failed in locked_register_thread().
5670     //
5671     // - We've already called profiler_unregister_thread() for this thread.
5672     //   (Whether or not it should, this does happen in practice.)
5673     LOG("profiler_unregister_thread() - thread %d already unregistered",
5674         profiler_current_thread_id());
5675     // We cannot record a marker on this thread because it was already
5676     // unregistered. Send it to the main thread (unless this *is* already the
5677     // main thread, which has been unregistered); this may be useful to catch
5678     // mismatched register/unregister pairs in Firefox.
5679     if (int tid = profiler_current_thread_id();
5680         tid != profiler_main_thread_id()) {
5681       nsCString threadIdString;
5682       threadIdString.AppendInt(tid);
5683       PROFILER_MARKER_TEXT("profiler_unregister_thread again", OTHER_Profiling,
5684                            MarkerThreadId::MainThread(), threadIdString);
5685     }
5686   }
5687 }
5688 
profiler_register_page(uint64_t aTabID,uint64_t aInnerWindowID,const nsCString & aUrl,uint64_t aEmbedderInnerWindowID)5689 void profiler_register_page(uint64_t aTabID, uint64_t aInnerWindowID,
5690                             const nsCString& aUrl,
5691                             uint64_t aEmbedderInnerWindowID) {
5692   DEBUG_LOG("profiler_register_page(%" PRIu64 ", %" PRIu64 ", %s, %" PRIu64 ")",
5693             aTabID, aInnerWindowID, aUrl.get(), aEmbedderInnerWindowID);
5694 
5695   MOZ_RELEASE_ASSERT(CorePS::Exists());
5696 
5697   PSAutoLock lock(gPSMutex);
5698 
5699   // When a Browsing context is first loaded, the first url loaded in it will be
5700   // about:blank. Because of that, this call keeps the first non-about:blank
5701   // registration of window and discards the previous one.
5702   RefPtr<PageInformation> pageInfo =
5703       new PageInformation(aTabID, aInnerWindowID, aUrl, aEmbedderInnerWindowID);
5704   CorePS::AppendRegisteredPage(lock, std::move(pageInfo));
5705 
5706   // After appending the given page to CorePS, look for the expired
5707   // pages and remove them if there are any.
5708   if (ActivePS::Exists(lock)) {
5709     ActivePS::DiscardExpiredPages(lock);
5710   }
5711 }
5712 
profiler_unregister_page(uint64_t aRegisteredInnerWindowID)5713 void profiler_unregister_page(uint64_t aRegisteredInnerWindowID) {
5714   PSAutoLock lock(gPSMutex);
5715 
5716   if (!CorePS::Exists()) {
5717     // This function can be called after the main thread has already shut down.
5718     return;
5719   }
5720 
5721   // During unregistration, if the profiler is active, we have to keep the
5722   // page information since there may be some markers associated with the given
5723   // page. But if profiler is not active. we have no reason to keep the
5724   // page information here because there can't be any marker associated with it.
5725   if (ActivePS::Exists(lock)) {
5726     ActivePS::UnregisterPage(lock, aRegisteredInnerWindowID);
5727   } else {
5728     CorePS::RemoveRegisteredPage(lock, aRegisteredInnerWindowID);
5729   }
5730 }
5731 
profiler_clear_all_pages()5732 void profiler_clear_all_pages() {
5733   {
5734     PSAutoLock lock(gPSMutex);
5735 
5736     if (!CorePS::Exists()) {
5737       // This function can be called after the main thread has already shut
5738       // down.
5739       return;
5740     }
5741 
5742     CorePS::ClearRegisteredPages(lock);
5743     if (ActivePS::Exists(lock)) {
5744       ActivePS::ClearUnregisteredPages(lock);
5745     }
5746   }
5747 
5748   // gPSMutex must be unlocked when we notify, to avoid potential deadlocks.
5749   ProfilerParent::ClearAllPages();
5750 }
5751 
5752 namespace geckoprofiler::markers::detail {
5753 
profiler_get_inner_window_id_from_docshell(nsIDocShell * aDocshell)5754 Maybe<uint64_t> profiler_get_inner_window_id_from_docshell(
5755     nsIDocShell* aDocshell) {
5756   Maybe<uint64_t> innerWindowID = Nothing();
5757   if (aDocshell) {
5758     auto outerWindow = aDocshell->GetWindow();
5759     if (outerWindow) {
5760       auto innerWindow = outerWindow->GetCurrentInnerWindow();
5761       if (innerWindow) {
5762         innerWindowID = Some(innerWindow->WindowID());
5763       }
5764     }
5765   }
5766   return innerWindowID;
5767 }
5768 
5769 }  // namespace geckoprofiler::markers::detail
5770 
profiler_thread_sleep()5771 void profiler_thread_sleep() {
5772   // This function runs both on and off the main thread.
5773 
5774   MOZ_RELEASE_ASSERT(CorePS::Exists());
5775 
5776   RacyRegisteredThread* racyRegisteredThread =
5777       TLSRegisteredThread::RacyRegisteredThread();
5778   if (!racyRegisteredThread) {
5779     return;
5780   }
5781 
5782   racyRegisteredThread->SetSleeping();
5783 }
5784 
profiler_thread_wake()5785 void profiler_thread_wake() {
5786   // This function runs both on and off the main thread.
5787 
5788   MOZ_RELEASE_ASSERT(CorePS::Exists());
5789 
5790   RacyRegisteredThread* racyRegisteredThread =
5791       TLSRegisteredThread::RacyRegisteredThread();
5792   if (!racyRegisteredThread) {
5793     return;
5794   }
5795 
5796   racyRegisteredThread->SetAwake();
5797 }
5798 
IsThreadBeingProfiled()5799 bool mozilla::profiler::detail::IsThreadBeingProfiled() {
5800   MOZ_RELEASE_ASSERT(CorePS::Exists());
5801 
5802   const RacyRegisteredThread* racyRegisteredThread =
5803       TLSRegisteredThread::RacyRegisteredThread();
5804   return racyRegisteredThread && racyRegisteredThread->IsBeingProfiled();
5805 }
5806 
IsThreadRegistered()5807 bool mozilla::profiler::detail::IsThreadRegistered() {
5808   MOZ_RELEASE_ASSERT(CorePS::Exists());
5809 
5810   const RacyRegisteredThread* racyRegisteredThread =
5811       TLSRegisteredThread::RacyRegisteredThread();
5812   // The simple presence of this TLS pointer is proof that the thread is
5813   // registered.
5814   return !!racyRegisteredThread;
5815 }
5816 
profiler_thread_is_sleeping()5817 bool profiler_thread_is_sleeping() {
5818   MOZ_RELEASE_ASSERT(NS_IsMainThread());
5819   MOZ_RELEASE_ASSERT(CorePS::Exists());
5820 
5821   RacyRegisteredThread* racyRegisteredThread =
5822       TLSRegisteredThread::RacyRegisteredThread();
5823   if (!racyRegisteredThread) {
5824     return false;
5825   }
5826   return racyRegisteredThread->IsSleeping();
5827 }
5828 
profiler_js_interrupt_callback()5829 void profiler_js_interrupt_callback() {
5830   // This function runs on JS threads being sampled.
5831   PollJSSamplingForCurrentThread();
5832 }
5833 
profiler_time()5834 double profiler_time() {
5835   MOZ_RELEASE_ASSERT(CorePS::Exists());
5836 
5837   TimeDuration delta = TimeStamp::NowUnfuzzed() - CorePS::ProcessStartTime();
5838   return delta.ToMilliseconds();
5839 }
5840 
profiler_capture_backtrace_into(ProfileChunkedBuffer & aChunkedBuffer,StackCaptureOptions aCaptureOptions)5841 bool profiler_capture_backtrace_into(ProfileChunkedBuffer& aChunkedBuffer,
5842                                      StackCaptureOptions aCaptureOptions) {
5843   MOZ_RELEASE_ASSERT(CorePS::Exists());
5844 
5845   PSAutoLock lock(gPSMutex);
5846 
5847   if (!ActivePS::Exists(lock) ||
5848       aCaptureOptions == StackCaptureOptions::NoStack) {
5849     return false;
5850   }
5851 
5852   RegisteredThread* registeredThread =
5853       TLSRegisteredThread::RegisteredThread(lock);
5854   if (!registeredThread) {
5855     // If this was called from a non-registered thread, return false and do no
5856     // more work. This can happen from a memory hook. Before the allocation
5857     // tracking there was a MOZ_ASSERT() here checking for the existence of a
5858     // registeredThread.
5859     return false;
5860   }
5861 
5862   ProfileBuffer profileBuffer(aChunkedBuffer);
5863 
5864   Registers regs;
5865 #if defined(HAVE_NATIVE_UNWIND)
5866   regs.SyncPopulate();
5867 #else
5868   regs.Clear();
5869 #endif
5870 
5871   DoSyncSample(lock, *registeredThread, TimeStamp::NowUnfuzzed(), regs,
5872                profileBuffer, aCaptureOptions);
5873 
5874   return true;
5875 }
5876 
profiler_capture_backtrace()5877 UniquePtr<ProfileChunkedBuffer> profiler_capture_backtrace() {
5878   MOZ_RELEASE_ASSERT(CorePS::Exists());
5879   AUTO_PROFILER_LABEL("profiler_capture_backtrace", PROFILER);
5880 
5881   // Quick is-active check before allocating a buffer.
5882   if (!profiler_is_active()) {
5883     return nullptr;
5884   }
5885 
5886   auto buffer = MakeUnique<ProfileChunkedBuffer>(
5887       ProfileChunkedBuffer::ThreadSafety::WithoutMutex,
5888       MakeUnique<ProfileBufferChunkManagerSingle>(
5889           ProfileBufferChunkManager::scExpectedMaximumStackSize));
5890 
5891   if (!profiler_capture_backtrace_into(*buffer, StackCaptureOptions::Full)) {
5892     return nullptr;
5893   }
5894 
5895   return buffer;
5896 }
5897 
profiler_get_backtrace()5898 UniqueProfilerBacktrace profiler_get_backtrace() {
5899   UniquePtr<ProfileChunkedBuffer> buffer = profiler_capture_backtrace();
5900 
5901   if (!buffer) {
5902     return nullptr;
5903   }
5904 
5905   return UniqueProfilerBacktrace(
5906       new ProfilerBacktrace("SyncProfile", std::move(buffer)));
5907 }
5908 
operator ()(ProfilerBacktrace * aBacktrace)5909 void ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace) {
5910   delete aBacktrace;
5911 }
5912 
5913 // This is a simplified version of profiler_add_marker that can be easily passed
5914 // into the JS engine.
profiler_add_js_marker(const char * aMarkerName,const char * aMarkerText)5915 void profiler_add_js_marker(const char* aMarkerName, const char* aMarkerText) {
5916   PROFILER_MARKER_TEXT(
5917       ProfilerString8View::WrapNullTerminatedString(aMarkerName), JS, {},
5918       ProfilerString8View::WrapNullTerminatedString(aMarkerText));
5919 }
5920 
profiler_add_js_allocation_marker(JS::RecordAllocationInfo && info)5921 void profiler_add_js_allocation_marker(JS::RecordAllocationInfo&& info) {
5922   if (!profiler_can_accept_markers()) {
5923     return;
5924   }
5925 
5926   struct JsAllocationMarker {
5927     static constexpr mozilla::Span<const char> MarkerTypeName() {
5928       return mozilla::MakeStringSpan("JS allocation");
5929     }
5930     static void StreamJSONMarkerData(
5931         mozilla::baseprofiler::SpliceableJSONWriter& aWriter,
5932         const mozilla::ProfilerString16View& aTypeName,
5933         const mozilla::ProfilerString8View& aClassName,
5934         const mozilla::ProfilerString16View& aDescriptiveTypeName,
5935         const mozilla::ProfilerString8View& aCoarseType, uint64_t aSize,
5936         bool aInNursery) {
5937       if (aClassName.Length() != 0) {
5938         aWriter.StringProperty("className", aClassName);
5939       }
5940       if (aTypeName.Length() != 0) {
5941         aWriter.StringProperty(
5942             "typeName",
5943             NS_ConvertUTF16toUTF8(aTypeName.Data(), aTypeName.Length()));
5944       }
5945       if (aDescriptiveTypeName.Length() != 0) {
5946         aWriter.StringProperty(
5947             "descriptiveTypeName",
5948             NS_ConvertUTF16toUTF8(aDescriptiveTypeName.Data(),
5949                                   aDescriptiveTypeName.Length()));
5950       }
5951       aWriter.StringProperty("coarseType", aCoarseType);
5952       aWriter.IntProperty("size", aSize);
5953       aWriter.BoolProperty("inNursery", aInNursery);
5954     }
5955     static mozilla::MarkerSchema MarkerTypeDisplay() {
5956       return mozilla::MarkerSchema::SpecialFrontendLocation{};
5957     }
5958   };
5959 
5960   profiler_add_marker(
5961       "JS allocation", geckoprofiler::category::JS, MarkerStack::Capture(),
5962       JsAllocationMarker{},
5963       ProfilerString16View::WrapNullTerminatedString(info.typeName),
5964       ProfilerString8View::WrapNullTerminatedString(info.className),
5965       ProfilerString16View::WrapNullTerminatedString(info.descriptiveTypeName),
5966       ProfilerString8View::WrapNullTerminatedString(info.coarseType), info.size,
5967       info.inNursery);
5968 }
5969 
profiler_is_locked_on_current_thread()5970 bool profiler_is_locked_on_current_thread() {
5971   // This function is used to help users avoid calling `profiler_...` functions
5972   // when the profiler may already have a lock in place, which would prevent a
5973   // 2nd recursive lock (resulting in a crash or a never-ending wait), or a
5974   // deadlock between any two mutexes. So we must return `true` for any of:
5975   // - The main profiler mutex, used by most functions, and/or
5976   // - The buffer mutex, used directly in some functions without locking the
5977   //   main mutex, e.g., marker-related functions.
5978   // - The ProfilerParent or ProfilerChild mutex, used to store and process
5979   //   buffer chunk updates.
5980   return gPSMutex.IsLockedOnCurrentThread() ||
5981          CorePS::CoreBuffer().IsThreadSafeAndLockedOnCurrentThread() ||
5982          ProfilerParent::IsLockedOnCurrentThread() ||
5983          ProfilerChild::IsLockedOnCurrentThread();
5984 }
5985 
5986 static constexpr net::TimingStruct scEmptyNetTimingStruct;
5987 
profiler_add_network_marker(nsIURI * aURI,const nsACString & aRequestMethod,int32_t aPriority,uint64_t aChannelId,NetworkLoadType aType,mozilla::TimeStamp aStart,mozilla::TimeStamp aEnd,int64_t aCount,mozilla::net::CacheDisposition aCacheDisposition,uint64_t aInnerWindowID,const mozilla::net::TimingStruct * aTimings,UniquePtr<ProfileChunkedBuffer> aSource,const Maybe<nsDependentCString> & aContentType,nsIURI * aRedirectURI,uint32_t aRedirectFlags,uint64_t aRedirectChannelId)5988 void profiler_add_network_marker(
5989     nsIURI* aURI, const nsACString& aRequestMethod, int32_t aPriority,
5990     uint64_t aChannelId, NetworkLoadType aType, mozilla::TimeStamp aStart,
5991     mozilla::TimeStamp aEnd, int64_t aCount,
5992     mozilla::net::CacheDisposition aCacheDisposition, uint64_t aInnerWindowID,
5993     const mozilla::net::TimingStruct* aTimings,
5994     UniquePtr<ProfileChunkedBuffer> aSource,
5995     const Maybe<nsDependentCString>& aContentType, nsIURI* aRedirectURI,
5996     uint32_t aRedirectFlags, uint64_t aRedirectChannelId) {
5997   if (!profiler_can_accept_markers()) {
5998     return;
5999   }
6000 
6001   nsAutoCStringN<2048> name;
6002   name.AppendASCII("Load ");
6003   // top 32 bits are process id of the load
6004   name.AppendInt(aChannelId & 0xFFFFFFFFu);
6005 
6006   // These can do allocations/frees/etc; avoid if not active
6007   nsAutoCStringN<2048> spec;
6008   if (aURI) {
6009     aURI->GetAsciiSpec(spec);
6010     name.AppendASCII(": ");
6011     name.Append(spec);
6012   }
6013 
6014   nsAutoCString redirect_spec;
6015   if (aRedirectURI) {
6016     aRedirectURI->GetAsciiSpec(redirect_spec);
6017   }
6018 
6019   struct NetworkMarker {
6020     static constexpr Span<const char> MarkerTypeName() {
6021       return MakeStringSpan("Network");
6022     }
6023     static void StreamJSONMarkerData(
6024         baseprofiler::SpliceableJSONWriter& aWriter, mozilla::TimeStamp aStart,
6025         mozilla::TimeStamp aEnd, int64_t aID, const ProfilerString8View& aURI,
6026         const ProfilerString8View& aRequestMethod, NetworkLoadType aType,
6027         int32_t aPri, int64_t aCount, net::CacheDisposition aCacheDisposition,
6028         const net::TimingStruct& aTimings,
6029         const ProfilerString8View& aRedirectURI,
6030         const ProfilerString8View& aContentType, uint32_t aRedirectFlags,
6031         int64_t aRedirectChannelId) {
6032       // This payload still streams a startTime and endTime property because it
6033       // made the migration to MarkerTiming on the front-end easier.
6034       aWriter.TimeProperty("startTime", aStart);
6035       aWriter.TimeProperty("endTime", aEnd);
6036 
6037       aWriter.IntProperty("id", aID);
6038       aWriter.StringProperty("status", GetNetworkState(aType));
6039       if (Span<const char> cacheString = GetCacheState(aCacheDisposition);
6040           !cacheString.IsEmpty()) {
6041         aWriter.StringProperty("cache", cacheString);
6042       }
6043       aWriter.IntProperty("pri", aPri);
6044       if (aCount > 0) {
6045         aWriter.IntProperty("count", aCount);
6046       }
6047       if (aURI.Length() != 0) {
6048         aWriter.StringProperty("URI", aURI);
6049       }
6050       if (aRedirectURI.Length() != 0) {
6051         aWriter.StringProperty("RedirectURI", aRedirectURI);
6052         aWriter.StringProperty("redirectType", getRedirectType(aRedirectFlags));
6053         aWriter.BoolProperty(
6054             "isHttpToHttpsRedirect",
6055             aRedirectFlags & nsIChannelEventSink::REDIRECT_STS_UPGRADE);
6056 
6057         MOZ_ASSERT(
6058             aRedirectChannelId != 0,
6059             "aRedirectChannelId should be non-zero for a redirected request");
6060         aWriter.IntProperty("redirectId", aRedirectChannelId);
6061       }
6062 
6063       aWriter.StringProperty("requestMethod", aRequestMethod);
6064 
6065       if (aContentType.Length() != 0) {
6066         aWriter.StringProperty("contentType", aContentType);
6067       } else {
6068         aWriter.NullProperty("contentType");
6069       }
6070 
6071       if (aType != NetworkLoadType::LOAD_START) {
6072         aWriter.TimeProperty("domainLookupStart", aTimings.domainLookupStart);
6073         aWriter.TimeProperty("domainLookupEnd", aTimings.domainLookupEnd);
6074         aWriter.TimeProperty("connectStart", aTimings.connectStart);
6075         aWriter.TimeProperty("tcpConnectEnd", aTimings.tcpConnectEnd);
6076         aWriter.TimeProperty("secureConnectionStart",
6077                              aTimings.secureConnectionStart);
6078         aWriter.TimeProperty("connectEnd", aTimings.connectEnd);
6079         aWriter.TimeProperty("requestStart", aTimings.requestStart);
6080         aWriter.TimeProperty("responseStart", aTimings.responseStart);
6081         aWriter.TimeProperty("responseEnd", aTimings.responseEnd);
6082       }
6083     }
6084     static MarkerSchema MarkerTypeDisplay() {
6085       return MarkerSchema::SpecialFrontendLocation{};
6086     }
6087 
6088    private:
6089     static Span<const char> GetNetworkState(NetworkLoadType aType) {
6090       switch (aType) {
6091         case NetworkLoadType::LOAD_START:
6092           return MakeStringSpan("STATUS_START");
6093         case NetworkLoadType::LOAD_STOP:
6094           return MakeStringSpan("STATUS_STOP");
6095         case NetworkLoadType::LOAD_REDIRECT:
6096           return MakeStringSpan("STATUS_REDIRECT");
6097         default:
6098           MOZ_ASSERT(false, "Unexpected NetworkLoadType enum value.");
6099           return MakeStringSpan("");
6100       }
6101     }
6102 
6103     static Span<const char> GetCacheState(
6104         net::CacheDisposition aCacheDisposition) {
6105       switch (aCacheDisposition) {
6106         case net::kCacheUnresolved:
6107           return MakeStringSpan("Unresolved");
6108         case net::kCacheHit:
6109           return MakeStringSpan("Hit");
6110         case net::kCacheHitViaReval:
6111           return MakeStringSpan("HitViaReval");
6112         case net::kCacheMissedViaReval:
6113           return MakeStringSpan("MissedViaReval");
6114         case net::kCacheMissed:
6115           return MakeStringSpan("Missed");
6116         case net::kCacheUnknown:
6117           return MakeStringSpan("");
6118         default:
6119           MOZ_ASSERT(false, "Unexpected CacheDisposition enum value.");
6120           return MakeStringSpan("");
6121       }
6122     }
6123 
6124     static Span<const char> getRedirectType(uint32_t aRedirectFlags) {
6125       MOZ_ASSERT(aRedirectFlags != 0, "aRedirectFlags should be non-zero");
6126       if (aRedirectFlags & nsIChannelEventSink::REDIRECT_TEMPORARY) {
6127         return MakeStringSpan("Temporary");
6128       }
6129       if (aRedirectFlags & nsIChannelEventSink::REDIRECT_PERMANENT) {
6130         return MakeStringSpan("Permanent");
6131       }
6132       if (aRedirectFlags & nsIChannelEventSink::REDIRECT_INTERNAL) {
6133         return MakeStringSpan("Internal");
6134       }
6135       MOZ_ASSERT(false, "Couldn't find a redirect type from aRedirectFlags");
6136       return MakeStringSpan("");
6137     }
6138   };
6139 
6140   profiler_add_marker(
6141       name, geckoprofiler::category::NETWORK,
6142       {MarkerTiming::Interval(aStart, aEnd),
6143        MarkerStack::TakeBacktrace(std::move(aSource)),
6144        MarkerInnerWindowId(aInnerWindowID)},
6145       NetworkMarker{}, aStart, aEnd, static_cast<int64_t>(aChannelId), spec,
6146       aRequestMethod, aType, aPriority, aCount, aCacheDisposition,
6147       aTimings ? *aTimings : scEmptyNetTimingStruct, redirect_spec,
6148       aContentType ? ProfilerString8View(*aContentType) : ProfilerString8View(),
6149       aRedirectFlags, aRedirectChannelId);
6150 }
6151 
profiler_add_native_allocation_marker(int64_t aSize,uintptr_t aMemoryAddress)6152 bool profiler_add_native_allocation_marker(int64_t aSize,
6153                                            uintptr_t aMemoryAddress) {
6154   if (!profiler_can_accept_markers()) {
6155     return false;
6156   }
6157 
6158   // Because native allocations may be intercepted anywhere, blocking while
6159   // locking the profiler mutex here could end up causing a deadlock if another
6160   // mutex is taken, which the profiler may indirectly need elsewhere.
6161   // See bug 1642726 for such a scenario.
6162   // So instead we bail out if the mutex is already locked. Native allocations
6163   // are statistically sampled anyway, so missing a few because of this is
6164   // acceptable.
6165   if (gPSMutex.IsLockedOnCurrentThread()) {
6166     return false;
6167   }
6168 
6169   struct NativeAllocationMarker {
6170     static constexpr mozilla::Span<const char> MarkerTypeName() {
6171       return mozilla::MakeStringSpan("Native allocation");
6172     }
6173     static void StreamJSONMarkerData(
6174         mozilla::baseprofiler::SpliceableJSONWriter& aWriter, int64_t aSize,
6175         uintptr_t aMemoryAddress, int aThreadId) {
6176       aWriter.IntProperty("size", aSize);
6177       aWriter.IntProperty("memoryAddress",
6178                           static_cast<int64_t>(aMemoryAddress));
6179       aWriter.IntProperty("threadId", aThreadId);
6180     }
6181     static mozilla::MarkerSchema MarkerTypeDisplay() {
6182       return mozilla::MarkerSchema::SpecialFrontendLocation{};
6183     }
6184   };
6185 
6186   profiler_add_marker("Native allocation", geckoprofiler::category::OTHER,
6187                       {MarkerThreadId::MainThread(), MarkerStack::Capture()},
6188                       NativeAllocationMarker{}, aSize, aMemoryAddress,
6189                       profiler_current_thread_id());
6190   return true;
6191 }
6192 
profiler_set_js_context(JSContext * aCx)6193 void profiler_set_js_context(JSContext* aCx) {
6194   MOZ_ASSERT(aCx);
6195 
6196   PSAutoLock lock(gPSMutex);
6197 
6198   RegisteredThread* registeredThread =
6199       TLSRegisteredThread::RegisteredThread(lock);
6200   if (!registeredThread) {
6201     return;
6202   }
6203 
6204   registeredThread->SetJSContext(aCx);
6205 
6206   // This call is on-thread, so we can call PollJSSampling() to start JS
6207   // sampling immediately.
6208   registeredThread->PollJSSampling();
6209 
6210   if (ActivePS::Exists(lock)) {
6211     ProfiledThreadData* profiledThreadData =
6212         ActivePS::GetProfiledThreadData(lock, registeredThread);
6213     if (profiledThreadData) {
6214       profiledThreadData->NotifyReceivedJSContext(
6215           ActivePS::Buffer(lock).BufferRangeEnd());
6216     }
6217   }
6218 }
6219 
profiler_clear_js_context()6220 void profiler_clear_js_context() {
6221   MOZ_RELEASE_ASSERT(CorePS::Exists());
6222 
6223   PSAutoLock lock(gPSMutex);
6224 
6225   RegisteredThread* registeredThread =
6226       TLSRegisteredThread::RegisteredThread(lock);
6227   if (!registeredThread) {
6228     return;
6229   }
6230 
6231   JSContext* cx = registeredThread->GetJSContext();
6232   if (!cx) {
6233     return;
6234   }
6235 
6236   if (ActivePS::Exists(lock) && ActivePS::FeatureJS(lock)) {
6237     ProfiledThreadData* profiledThreadData =
6238         ActivePS::GetProfiledThreadData(lock, registeredThread);
6239     if (profiledThreadData) {
6240       profiledThreadData->NotifyAboutToLoseJSContext(
6241           cx, CorePS::ProcessStartTime(), ActivePS::Buffer(lock));
6242 
6243       // Notify the JS context that profiling for this context has stopped.
6244       // Do this by calling StopJSSampling and PollJSSampling before
6245       // nulling out the JSContext.
6246       registeredThread->StopJSSampling();
6247       registeredThread->PollJSSampling();
6248 
6249       registeredThread->ClearJSContext();
6250 
6251       // Tell the thread that we'd like to have JS sampling on this
6252       // thread again, once it gets a new JSContext (if ever).
6253       registeredThread->StartJSSampling(ActivePS::JSFlags(lock));
6254       return;
6255     }
6256   }
6257 
6258   registeredThread->ClearJSContext();
6259 }
6260 
6261 // NOTE: aCollector's methods will be called while the target thread is paused.
6262 // Doing things in those methods like allocating -- which may try to claim
6263 // locks -- is a surefire way to deadlock.
profiler_suspend_and_sample_thread(int aThreadId,uint32_t aFeatures,ProfilerStackCollector & aCollector,bool aSampleNative)6264 void profiler_suspend_and_sample_thread(int aThreadId, uint32_t aFeatures,
6265                                         ProfilerStackCollector& aCollector,
6266                                         bool aSampleNative /* = true */) {
6267   const bool isSynchronous = [&aThreadId]() {
6268     const int currentThreadId = profiler_current_thread_id();
6269     if (aThreadId == 0) {
6270       aThreadId = currentThreadId;
6271       return true;
6272     }
6273     return aThreadId == currentThreadId;
6274   }();
6275 
6276   // Lock the profiler mutex
6277   PSAutoLock lock(gPSMutex);
6278 
6279   const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
6280       CorePS::RegisteredThreads(lock);
6281   for (auto& thread : registeredThreads) {
6282     RefPtr<ThreadInfo> info = thread->Info();
6283     RegisteredThread& registeredThread = *thread.get();
6284 
6285     if (info->ThreadId() == aThreadId) {
6286       if (info->IsMainThread()) {
6287         aCollector.SetIsMainThread();
6288       }
6289 
6290       // Allocate the space for the native stack
6291       NativeStack nativeStack;
6292 
6293       auto collectStack = [&](const Registers& aRegs, const TimeStamp& aNow) {
6294         // The target thread is now suspended. Collect a native backtrace,
6295         // and call the callback.
6296         JsFrameBuffer& jsFrames = CorePS::JsFrames(lock);
6297         StackWalkControl* stackWalkControlIfSupported = nullptr;
6298 #if defined(HAVE_FASTINIT_NATIVE_UNWIND)
6299         StackWalkControl stackWalkControl;
6300         if constexpr (StackWalkControl::scIsSupported) {
6301           if (aSampleNative) {
6302             stackWalkControlIfSupported = &stackWalkControl;
6303           }
6304         }
6305 #endif
6306         const uint32_t jsFramesCount =
6307             ExtractJsFrames(isSynchronous, registeredThread, aRegs, aCollector,
6308                             jsFrames, stackWalkControlIfSupported);
6309 
6310 #if defined(HAVE_FASTINIT_NATIVE_UNWIND)
6311         if (aSampleNative) {
6312           // We can only use FramePointerStackWalk or MozStackWalk from
6313           // suspend_and_sample_thread as other stackwalking methods may not be
6314           // initialized.
6315 #  if defined(USE_FRAME_POINTER_STACK_WALK)
6316           DoFramePointerBacktrace(lock, registeredThread, aRegs, nativeStack,
6317                                   stackWalkControlIfSupported);
6318 #  elif defined(USE_MOZ_STACK_WALK)
6319           DoMozStackWalkBacktrace(lock, registeredThread, aRegs, nativeStack,
6320                                   stackWalkControlIfSupported);
6321 #  else
6322 #    error "Invalid configuration"
6323 #  endif
6324 
6325           MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs,
6326                       nativeStack, aCollector, jsFrames, jsFramesCount);
6327         } else
6328 #endif
6329         {
6330           MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs,
6331                       nativeStack, aCollector, jsFrames, jsFramesCount);
6332 
6333           if (ProfilerFeature::HasLeaf(aFeatures)) {
6334             aCollector.CollectNativeLeafAddr((void*)aRegs.mPC);
6335           }
6336         }
6337       };
6338 
6339       if (isSynchronous) {
6340         // Sampling the current thread, do NOT suspend it!
6341         Registers regs;
6342 #if defined(HAVE_NATIVE_UNWIND)
6343         regs.SyncPopulate();
6344 #else
6345         regs.Clear();
6346 #endif
6347         collectStack(regs, TimeStamp::Now());
6348       } else {
6349         // Suspend, sample, and then resume the target thread.
6350         Sampler sampler(lock);
6351         TimeStamp now = TimeStamp::Now();
6352         sampler.SuspendAndSampleAndResumeThread(lock, registeredThread, now,
6353                                                 collectStack);
6354 
6355         // NOTE: Make sure to disable the sampler before it is destroyed, in
6356         // case the profiler is running at the same time.
6357         sampler.Disable(lock);
6358       }
6359       break;
6360     }
6361   }
6362 }
6363 
6364 // END externally visible functions
6365 ////////////////////////////////////////////////////////////////////////
6366