1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 // There are three kinds of samples done by the profiler.
8 //
9 // - A "periodic" sample is the most complex kind. It is done in response to a
10 //   timer while the profiler is active. It involves writing a stack trace plus
11 //   a variety of other values (memory measurements, responsiveness
12 //   measurements, markers, etc.) into the main ProfileBuffer. The sampling is
13 //   done from off-thread, and so SuspendAndSampleAndResumeThread() is used to
14 //   get the register values.
15 //
16 // - A "synchronous" sample is a simpler kind. It is done in response to an API
17 //   call (profiler_get_backtrace()). It involves writing a stack trace and
18 //   little else into a temporary ProfileBuffer, and wrapping that up in a
19 //   ProfilerBacktrace that can be subsequently used in a marker. The sampling
20 //   is done on-thread, and so Registers::SyncPopulate() is used to get the
21 //   register values.
22 //
23 // - A "backtrace" sample is the simplest kind. It is done in response to an
24 //   API call (profiler_suspend_and_sample_thread()). It involves getting a
25 //   stack trace via a ProfilerStackCollector; it does not write to a
26 //   ProfileBuffer. The sampling is done from off-thread, and so uses
27 //   SuspendAndSampleAndResumeThread() to get the register values.
28 
29 #include "platform.h"
30 
31 #include <algorithm>
32 #include <errno.h>
33 #include <fstream>
34 #include <ostream>
35 #include <sstream>
36 
37 // #include "memory_hooks.h"
38 #include "mozilla/ArrayUtils.h"
39 #include "mozilla/Atomics.h"
40 #include "mozilla/AutoProfilerLabel.h"
41 #include "mozilla/BaseProfilerDetail.h"
42 #include "mozilla/DoubleConversion.h"
43 #include "mozilla/Printf.h"
44 #include "mozilla/ProfileBufferChunkManagerSingle.h"
45 #include "mozilla/ProfileBufferChunkManagerWithLocalLimit.h"
46 #include "mozilla/ProfileChunkedBuffer.h"
47 #include "mozilla/Services.h"
48 #include "mozilla/Span.h"
49 #include "mozilla/StackWalk.h"
50 #include "mozilla/StaticPtr.h"
51 #include "mozilla/ThreadLocal.h"
52 #include "mozilla/TimeStamp.h"
53 #include "mozilla/Tuple.h"
54 #include "mozilla/UniquePtr.h"
55 #include "mozilla/Vector.h"
56 #include "prdtoa.h"
57 #include "prtime.h"
58 
59 #include "BaseProfiler.h"
60 #include "PageInformation.h"
61 #include "ProfiledThreadData.h"
62 #include "ProfilerBacktrace.h"
63 #include "ProfileBuffer.h"
64 #include "BaseProfilerMarkerPayload.h"
65 #include "RegisteredThread.h"
66 #include "BaseProfilerSharedLibraries.h"
67 #include "ThreadInfo.h"
68 #include "VTuneProfiler.h"
69 
70 // Win32 builds always have frame pointers, so FramePointerStackWalk() always
71 // works.
72 #if defined(GP_PLAT_x86_windows)
73 #  define HAVE_NATIVE_UNWIND
74 #  define USE_FRAME_POINTER_STACK_WALK
75 #endif
76 
77 // Win64 builds always omit frame pointers, so we use the slower
78 // MozStackWalk(), which works in that case.
79 #if defined(GP_PLAT_amd64_windows)
80 #  define HAVE_NATIVE_UNWIND
81 #  define USE_MOZ_STACK_WALK
82 #endif
83 
84 // AArch64 Win64 doesn't seem to use frame pointers, so we use the slower
85 // MozStackWalk().
86 #if defined(GP_PLAT_arm64_windows)
87 #  define HAVE_NATIVE_UNWIND
88 #  define USE_MOZ_STACK_WALK
89 #endif
90 
91 // Mac builds only have frame pointers when MOZ_PROFILING is specified, so
92 // FramePointerStackWalk() only works in that case. We don't use MozStackWalk()
93 // on Mac.
94 #if defined(GP_OS_darwin) && defined(MOZ_PROFILING)
95 #  define HAVE_NATIVE_UNWIND
96 #  define USE_FRAME_POINTER_STACK_WALK
97 #endif
98 
99 // Android builds use the ARM Exception Handling ABI to unwind.
100 #if defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
101 #  define HAVE_NATIVE_UNWIND
102 #  define USE_EHABI_STACKWALK
103 #  include "EHABIStackWalk.h"
104 #endif
105 
106 // Linux/BSD builds use LUL, which uses DWARF info to unwind stacks.
107 #if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_x86_linux) ||       \
108     defined(GP_PLAT_amd64_android) || defined(GP_PLAT_x86_android) ||   \
109     defined(GP_PLAT_mips64_linux) || defined(GP_PLAT_arm64_linux) ||    \
110     defined(GP_PLAT_arm64_android) || defined(GP_PLAT_amd64_freebsd) || \
111     defined(GP_PLAT_arm64_freebsd)
112 #  define HAVE_NATIVE_UNWIND
113 #  define USE_LUL_STACKWALK
114 #  include "lul/LulMain.h"
115 #  include "lul/platform-linux-lul.h"
116 
117 // On linux we use LUL for periodic samples and synchronous samples, but we use
118 // FramePointerStackWalk for backtrace samples when MOZ_PROFILING is enabled.
119 // (See the comment at the top of the file for a definition of
120 // periodic/synchronous/backtrace.).
121 //
122 // FramePointerStackWalk can produce incomplete stacks when the current entry is
123 // in a shared library without framepointers, however LUL can take a long time
124 // to initialize, which is undesirable for consumers of
125 // profiler_suspend_and_sample_thread like the Background Hang Reporter.
126 #  if defined(MOZ_PROFILING)
127 #    define USE_FRAME_POINTER_STACK_WALK
128 #  endif
129 #endif
130 
131 // We can only stackwalk without expensive initialization on platforms which
132 // support FramePointerStackWalk or MozStackWalk. LUL Stackwalking requires
133 // initializing LUL, and EHABIStackWalk requires initializing EHABI, both of
134 // which can be expensive.
135 #if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
136 #  define HAVE_FASTINIT_NATIVE_UNWIND
137 #endif
138 
139 #ifdef MOZ_VALGRIND
140 #  include <valgrind/memcheck.h>
141 #else
142 #  define VALGRIND_MAKE_MEM_DEFINED(_addr, _len) ((void)0)
143 #endif
144 
145 #if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
146 #  include <ucontext.h>
147 #endif
148 
149 namespace mozilla {
150 namespace baseprofiler {
151 
152 using detail::RacyFeatures;
153 
LogTest(int aLevelToTest)154 bool LogTest(int aLevelToTest) {
155   static const int maxLevel =
156       getenv("MOZ_BASE_PROFILER_VERBOSE_LOGGING")
157           ? 5
158           : getenv("MOZ_BASE_PROFILER_DEBUG_LOGGING")
159                 ? 4
160                 : getenv("MOZ_BASE_PROFILER_LOGGING") ? 3 : 0;
161   return aLevelToTest <= maxLevel;
162 }
163 
PrintToConsole(const char * aFmt,...)164 void PrintToConsole(const char* aFmt, ...) {
165   va_list args;
166   va_start(args, aFmt);
167 #if defined(ANDROID)
168   __android_log_vprint(ANDROID_LOG_INFO, "Gecko", aFmt, args);
169 #else
170   vfprintf(stderr, aFmt, args);
171 #endif
172   va_end(args);
173 }
174 
ValidateFeatures()175 constexpr static bool ValidateFeatures() {
176   int expectedFeatureNumber = 0;
177 
178   // Feature numbers should start at 0 and increase by 1 each.
179 #define CHECK_FEATURE(n_, str_, Name_, desc_) \
180   if ((n_) != expectedFeatureNumber) {        \
181     return false;                             \
182   }                                           \
183   ++expectedFeatureNumber;
184 
185   BASE_PROFILER_FOR_EACH_FEATURE(CHECK_FEATURE)
186 
187 #undef CHECK_FEATURE
188 
189   return true;
190 }
191 
192 static_assert(ValidateFeatures(), "Feature list is invalid");
193 
194 // Return all features that are available on this platform.
AvailableFeatures()195 static uint32_t AvailableFeatures() {
196   uint32_t features = 0;
197 
198 #define ADD_FEATURE(n_, str_, Name_, desc_) \
199   ProfilerFeature::Set##Name_(features);
200 
201   // Add all the possible features.
202   BASE_PROFILER_FOR_EACH_FEATURE(ADD_FEATURE)
203 
204 #undef ADD_FEATURE
205 
206   // Now remove features not supported on this platform/configuration.
207   ProfilerFeature::ClearJava(features);
208   ProfilerFeature::ClearJS(features);
209   ProfilerFeature::ClearScreenshots(features);
210 #if !defined(HAVE_NATIVE_UNWIND)
211   ProfilerFeature::ClearStackWalk(features);
212 #endif
213   ProfilerFeature::ClearTaskTracer(features);
214   ProfilerFeature::ClearJSTracer(features);
215 
216   return features;
217 }
218 
219 // Default features common to all contexts (even if not available).
DefaultFeatures()220 static uint32_t DefaultFeatures() {
221   return ProfilerFeature::Java | ProfilerFeature::JS | ProfilerFeature::Leaf |
222          ProfilerFeature::StackWalk | ProfilerFeature::Threads;
223 }
224 
225 // Extra default features when MOZ_PROFILER_STARTUP is set (even if not
226 // available).
StartupExtraDefaultFeatures()227 static uint32_t StartupExtraDefaultFeatures() {
228   // Enable mainthreadio by default for startup profiles as startup is heavy on
229   // I/O operations, and main thread I/O is really important to see there.
230   return ProfilerFeature::MainThreadIO;
231 }
232 
233 // The auto-lock/unlock mutex that guards accesses to CorePS and ActivePS.
234 // Use `PSAutoLock lock;` to take the lock until the end of the enclosing block.
235 // External profilers may use this same lock for their own data, but as the lock
236 // is non-recursive, *only* `f(PSLockRef, ...)` functions below should be
237 // called, to avoid double-locking.
238 class MOZ_RAII PSAutoLock {
239  public:
PSAutoLock()240   PSAutoLock() { gPSMutex.Lock(); }
241 
~PSAutoLock()242   ~PSAutoLock() { gPSMutex.Unlock(); }
243 
244   PSAutoLock(const PSAutoLock&) = delete;
245   void operator=(const PSAutoLock&) = delete;
246 
IsLockedOnCurrentThread()247   [[nodiscard]] static bool IsLockedOnCurrentThread() {
248     return gPSMutex.IsLockedOnCurrentThread();
249   }
250 
251  private:
252   static detail::BaseProfilerMutex gPSMutex;
253 };
254 
255 detail::BaseProfilerMutex PSAutoLock::gPSMutex;
256 
257 // Only functions that take a PSLockRef arg can access CorePS's and ActivePS's
258 // fields.
259 typedef const PSAutoLock& PSLockRef;
260 
261 #define PS_GET(type_, name_)      \
262   static type_ name_(PSLockRef) { \
263     MOZ_ASSERT(sInstance);        \
264     return sInstance->m##name_;   \
265   }
266 
267 #define PS_GET_LOCKLESS(type_, name_) \
268   static type_ name_() {              \
269     MOZ_ASSERT(sInstance);            \
270     return sInstance->m##name_;       \
271   }
272 
273 #define PS_GET_AND_SET(type_, name_)                  \
274   PS_GET(type_, name_)                                \
275   static void Set##name_(PSLockRef, type_ a##name_) { \
276     MOZ_ASSERT(sInstance);                            \
277     sInstance->m##name_ = a##name_;                   \
278   }
279 
280 // All functions in this file can run on multiple threads unless they have an
281 // NS_IsMainThread() assertion.
282 
283 // This class contains the profiler's core global state, i.e. that which is
284 // valid even when the profiler is not active. Most profile operations can't do
285 // anything useful when this class is not instantiated, so we release-assert
286 // its non-nullness in all such operations.
287 //
288 // Accesses to CorePS are guarded by gPSMutex. Getters and setters take a
289 // PSAutoLock reference as an argument as proof that the gPSMutex is currently
290 // locked. This makes it clear when gPSMutex is locked and helps avoid
291 // accidental unlocked accesses to global state. There are ways to circumvent
292 // this mechanism, but please don't do so without *very* good reason and a
293 // detailed explanation.
294 //
295 // The exceptions to this rule:
296 //
297 // - mProcessStartTime, because it's immutable;
298 //
299 // - each thread's RacyRegisteredThread object is accessible without locking via
300 //   TLSRegisteredThread::RacyRegisteredThread().
301 class CorePS {
302  private:
CorePS()303   CorePS()
304       : mMainThreadId(profiler_current_thread_id()),
305         mProcessStartTime(TimeStamp::ProcessCreation()),
306         // This needs its own mutex, because it is used concurrently from
307         // functions guarded by gPSMutex as well as others without safety (e.g.,
308         // profiler_add_marker). It is *not* used inside the critical section of
309         // the sampler, because mutexes cannot be used there.
310         mCoreBuffer(ProfileChunkedBuffer::ThreadSafety::WithMutex)
311 #ifdef USE_LUL_STACKWALK
312         ,
313         mLul(nullptr)
314 #endif
315   {
316   }
317 
~CorePS()318   ~CorePS() {}
319 
320  public:
Create(PSLockRef aLock)321   static void Create(PSLockRef aLock) {
322     MOZ_ASSERT(!sInstance);
323     sInstance = new CorePS();
324   }
325 
Destroy(PSLockRef aLock)326   static void Destroy(PSLockRef aLock) {
327     MOZ_ASSERT(sInstance);
328     delete sInstance;
329     sInstance = nullptr;
330   }
331 
332   // Unlike ActivePS::Exists(), CorePS::Exists() can be called without gPSMutex
333   // being locked. This is because CorePS is instantiated so early on the main
334   // thread that we don't have to worry about it being racy.
Exists()335   static bool Exists() { return !!sInstance; }
336 
IsMainThread()337   static bool IsMainThread() {
338     MOZ_ASSERT(sInstance);
339     return profiler_current_thread_id() == sInstance->mMainThreadId;
340   }
341 
AddSizeOf(PSLockRef,MallocSizeOf aMallocSizeOf,size_t & aProfSize,size_t & aLulSize)342   static void AddSizeOf(PSLockRef, MallocSizeOf aMallocSizeOf,
343                         size_t& aProfSize, size_t& aLulSize) {
344     MOZ_ASSERT(sInstance);
345 
346     aProfSize += aMallocSizeOf(sInstance);
347 
348     for (auto& registeredThread : sInstance->mRegisteredThreads) {
349       aProfSize += registeredThread->SizeOfIncludingThis(aMallocSizeOf);
350     }
351 
352     for (auto& registeredPage : sInstance->mRegisteredPages) {
353       aProfSize += registeredPage->SizeOfIncludingThis(aMallocSizeOf);
354     }
355 
356     // Measurement of the following things may be added later if DMD finds it
357     // is worthwhile:
358     // - CorePS::mRegisteredThreads itself (its elements' children are
359     // measured above)
360     // - CorePS::mRegisteredPages itself (its elements' children are
361     // measured above)
362     // - CorePS::mInterposeObserver
363 
364 #if defined(USE_LUL_STACKWALK)
365     if (sInstance->mLul) {
366       aLulSize += sInstance->mLul->SizeOfIncludingThis(aMallocSizeOf);
367     }
368 #endif
369   }
370 
371   // No PSLockRef is needed for this field because it's immutable.
PS_GET_LOCKLESS(int,MainThreadId)372   PS_GET_LOCKLESS(int, MainThreadId)
373 
374   // No PSLockRef is needed for this field because it's immutable.
375   PS_GET_LOCKLESS(TimeStamp, ProcessStartTime)
376 
377   // No PSLockRef is needed for this field because it's thread-safe.
378   PS_GET_LOCKLESS(ProfileChunkedBuffer&, CoreBuffer)
379 
380   PS_GET(const Vector<UniquePtr<RegisteredThread>>&, RegisteredThreads)
381 
382   static void AppendRegisteredThread(
383       PSLockRef, UniquePtr<RegisteredThread>&& aRegisteredThread) {
384     MOZ_ASSERT(sInstance);
385     MOZ_RELEASE_ASSERT(
386         sInstance->mRegisteredThreads.append(std::move(aRegisteredThread)));
387   }
388 
RemoveRegisteredThread(PSLockRef,RegisteredThread * aRegisteredThread)389   static void RemoveRegisteredThread(PSLockRef,
390                                      RegisteredThread* aRegisteredThread) {
391     MOZ_ASSERT(sInstance);
392     // Remove aRegisteredThread from mRegisteredThreads.
393     for (UniquePtr<RegisteredThread>& rt : sInstance->mRegisteredThreads) {
394       if (rt.get() == aRegisteredThread) {
395         sInstance->mRegisteredThreads.erase(&rt);
396         return;
397       }
398     }
399   }
400 
PS_GET(Vector<RefPtr<PageInformation>> &,RegisteredPages)401   PS_GET(Vector<RefPtr<PageInformation>>&, RegisteredPages)
402 
403   static void AppendRegisteredPage(PSLockRef,
404                                    RefPtr<PageInformation>&& aRegisteredPage) {
405     MOZ_ASSERT(sInstance);
406     struct RegisteredPageComparator {
407       PageInformation* aA;
408       bool operator()(PageInformation* aB) const { return aA->Equals(aB); }
409     };
410 
411     auto foundPageIter = std::find_if(
412         sInstance->mRegisteredPages.begin(), sInstance->mRegisteredPages.end(),
413         RegisteredPageComparator{aRegisteredPage.get()});
414 
415     if (foundPageIter != sInstance->mRegisteredPages.end()) {
416       if ((*foundPageIter)->Url() == "about:blank") {
417         // When a BrowsingContext is loaded, the first url loaded in it will be
418         // about:blank, and if the principal matches, the first document loaded
419         // in it will share an inner window. That's why we should delete the
420         // intermittent about:blank if they share the inner window.
421         sInstance->mRegisteredPages.erase(foundPageIter);
422       } else {
423         // Do not register the same page again.
424         return;
425       }
426     }
427     MOZ_RELEASE_ASSERT(
428         sInstance->mRegisteredPages.append(std::move(aRegisteredPage)));
429   }
430 
RemoveRegisteredPage(PSLockRef,uint64_t aRegisteredInnerWindowID)431   static void RemoveRegisteredPage(PSLockRef,
432                                    uint64_t aRegisteredInnerWindowID) {
433     MOZ_ASSERT(sInstance);
434     // Remove RegisteredPage from mRegisteredPages by given inner window ID.
435     sInstance->mRegisteredPages.eraseIf([&](const RefPtr<PageInformation>& rd) {
436       return rd->InnerWindowID() == aRegisteredInnerWindowID;
437     });
438   }
439 
ClearRegisteredPages(PSLockRef)440   static void ClearRegisteredPages(PSLockRef) {
441     MOZ_ASSERT(sInstance);
442     sInstance->mRegisteredPages.clear();
443   }
444 
PS_GET(const Vector<BaseProfilerCount * > &,Counters)445   PS_GET(const Vector<BaseProfilerCount*>&, Counters)
446 
447   static void AppendCounter(PSLockRef, BaseProfilerCount* aCounter) {
448     MOZ_ASSERT(sInstance);
449     // we don't own the counter; they may be stored in static objects
450     MOZ_RELEASE_ASSERT(sInstance->mCounters.append(aCounter));
451   }
452 
RemoveCounter(PSLockRef,BaseProfilerCount * aCounter)453   static void RemoveCounter(PSLockRef, BaseProfilerCount* aCounter) {
454     // we may be called to remove a counter after the profiler is stopped or
455     // late in shutdown.
456     if (sInstance) {
457       auto* counter = std::find(sInstance->mCounters.begin(),
458                                 sInstance->mCounters.end(), aCounter);
459       MOZ_RELEASE_ASSERT(counter != sInstance->mCounters.end());
460       sInstance->mCounters.erase(counter);
461     }
462   }
463 
464 #ifdef USE_LUL_STACKWALK
Lul(PSLockRef)465   static lul::LUL* Lul(PSLockRef) {
466     MOZ_ASSERT(sInstance);
467     return sInstance->mLul.get();
468   }
SetLul(PSLockRef,UniquePtr<lul::LUL> aLul)469   static void SetLul(PSLockRef, UniquePtr<lul::LUL> aLul) {
470     MOZ_ASSERT(sInstance);
471     sInstance->mLul = std::move(aLul);
472   }
473 #endif
474 
475   PS_GET_AND_SET(const std::string&, ProcessName)
476 
477  private:
478   // The singleton instance
479   static CorePS* sInstance;
480 
481   // ID of the main thread (assuming CorePS was started on the main thread).
482   const int mMainThreadId;
483 
484   // The time that the process started.
485   const TimeStamp mProcessStartTime;
486 
487   // The thread-safe blocks-oriented buffer into which all profiling data is
488   // recorded.
489   // ActivePS controls the lifetime of the underlying contents buffer: When
490   // ActivePS does not exist, mCoreBuffer is empty and rejects all reads&writes;
491   // see ActivePS for further details.
492   // Note: This needs to live here outside of ActivePS, because some producers
493   // are indirectly controlled (e.g., by atomic flags) and therefore may still
494   // attempt to write some data shortly after ActivePS has shutdown and deleted
495   // the underlying buffer in memory.
496   ProfileChunkedBuffer mCoreBuffer;
497 
498   // Info on all the registered threads.
499   // ThreadIds in mRegisteredThreads are unique.
500   Vector<UniquePtr<RegisteredThread>> mRegisteredThreads;
501 
502   // Info on all the registered pages.
503   // InnerWindowIDs in mRegisteredPages are unique.
504   Vector<RefPtr<PageInformation>> mRegisteredPages;
505 
506   // Non-owning pointers to all active counters
507   Vector<BaseProfilerCount*> mCounters;
508 
509 #ifdef USE_LUL_STACKWALK
510   // LUL's state. Null prior to the first activation, non-null thereafter.
511   UniquePtr<lul::LUL> mLul;
512 #endif
513 
514   // Process name, provided by child process initialization code.
515   std::string mProcessName;
516 };
517 
518 CorePS* CorePS::sInstance = nullptr;
519 
520 class SamplerThread;
521 
522 static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
523                                        double aInterval);
524 
525 struct LiveProfiledThreadData {
526   RegisteredThread* mRegisteredThread;
527   UniquePtr<ProfiledThreadData> mProfiledThreadData;
528 };
529 
530 // The buffer size is provided as a number of "entries", this is their size in
531 // bytes.
532 constexpr static uint32_t scBytesPerEntry = 8;
533 
534 // Expected maximum size needed to store one stack sample.
535 constexpr static uint32_t scExpectedMaximumStackSize = 64 * 1024;
536 
537 // This class contains the profiler's global state that is valid only when the
538 // profiler is active. When not instantiated, the profiler is inactive.
539 //
540 // Accesses to ActivePS are guarded by gPSMutex, in much the same fashion as
541 // CorePS.
542 //
543 class ActivePS {
544  private:
545   // We need to decide how many chunks of what size we want to fit in the given
546   // total maximum capacity for this process, in the (likely) context of
547   // multiple processes doing the same choice and having an inter-process
548   // mechanism to control the overal memory limit.
549 
550   // Minimum chunk size allowed, enough for at least one stack.
551   constexpr static uint32_t scMinimumChunkSize = 2 * scExpectedMaximumStackSize;
552 
553   // Ideally we want at least 2 unreleased chunks to work with (1 current and 1
554   // next), and 2 released chunks (so that one can be recycled when old, leaving
555   // one with some data).
556   constexpr static uint32_t scMinimumNumberOfChunks = 4;
557 
558   // And we want to limit chunks to a maximum size, which is a compromise
559   // between:
560   // - A big size, which helps with reducing the rate of allocations and IPCs.
561   // - A small size, which helps with equalizing the duration of recorded data
562   //   (as the inter-process controller will discard the oldest chunks in all
563   //   Firefox processes).
564   constexpr static uint32_t scMaximumChunkSize = 1024 * 1024;
565 
566  public:
567   // We should be able to store at least the minimum number of the smallest-
568   // possible chunks.
569   constexpr static uint32_t scMinimumBufferSize =
570       scMinimumNumberOfChunks * scMinimumChunkSize;
571   constexpr static uint32_t scMinimumBufferEntries =
572       scMinimumBufferSize / scBytesPerEntry;
573 
574   // Limit to 2GiB.
575   constexpr static uint32_t scMaximumBufferSize = 2u * 1024u * 1024u * 1024u;
576   constexpr static uint32_t scMaximumBufferEntries =
577       scMaximumBufferSize / scBytesPerEntry;
578 
ClampToAllowedEntries(uint32_t aEntries)579   constexpr static uint32_t ClampToAllowedEntries(uint32_t aEntries) {
580     if (aEntries <= scMinimumBufferEntries) {
581       return scMinimumBufferEntries;
582     }
583     if (aEntries >= scMaximumBufferEntries) {
584       return scMaximumBufferEntries;
585     }
586     return aEntries;
587   }
588 
589  private:
ChunkSizeForEntries(uint32_t aEntries)590   constexpr static uint32_t ChunkSizeForEntries(uint32_t aEntries) {
591     return uint32_t(std::min(size_t(ClampToAllowedEntries(aEntries)) *
592                                  scBytesPerEntry / scMinimumNumberOfChunks,
593                              size_t(scMaximumChunkSize)));
594   }
595 
AdjustFeatures(uint32_t aFeatures,uint32_t aFilterCount)596   static uint32_t AdjustFeatures(uint32_t aFeatures, uint32_t aFilterCount) {
597     // Filter out any features unavailable in this platform/configuration.
598     aFeatures &= AvailableFeatures();
599 
600     // Always enable ProfilerFeature::Threads if we have a filter, because
601     // users sometimes ask to filter by a list of threads but forget to
602     // explicitly specify ProfilerFeature::Threads.
603     if (aFilterCount > 0) {
604       aFeatures |= ProfilerFeature::Threads;
605     }
606 
607     // Some features imply others.
608     if (aFeatures & ProfilerFeature::FileIOAll) {
609       aFeatures |= ProfilerFeature::MainThreadIO | ProfilerFeature::FileIO;
610     } else if (aFeatures & ProfilerFeature::FileIO) {
611       aFeatures |= ProfilerFeature::MainThreadIO;
612     }
613 
614     return aFeatures;
615   }
616 
ActivePS(PSLockRef aLock,PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,const Maybe<double> & aDuration)617   ActivePS(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
618            uint32_t aFeatures, const char** aFilters, uint32_t aFilterCount,
619            const Maybe<double>& aDuration)
620       : mGeneration(sNextGeneration++),
621         mCapacity(aCapacity),
622         mDuration(aDuration),
623         mInterval(aInterval),
624         mFeatures(AdjustFeatures(aFeatures, aFilterCount)),
625         mProfileBufferChunkManager(
626             size_t(ClampToAllowedEntries(aCapacity.Value())) * scBytesPerEntry,
627             ChunkSizeForEntries(aCapacity.Value())),
628         mProfileBuffer([this]() -> ProfileChunkedBuffer& {
629           CorePS::CoreBuffer().SetChunkManager(mProfileBufferChunkManager);
630           return CorePS::CoreBuffer();
631         }()),
632         // The new sampler thread doesn't start sampling immediately because the
633         // main loop within Run() is blocked until this function's caller
634         // unlocks gPSMutex.
635         mSamplerThread(NewSamplerThread(aLock, mGeneration, aInterval))
636 #undef HAS_FEATURE
637         ,
638         mIsPaused(false)
639 #if defined(GP_OS_linux) || defined(GP_OS_freebsd)
640         ,
641         mWasPaused(false)
642 #endif
643   {
644     // Deep copy aFilters.
645     MOZ_ALWAYS_TRUE(mFilters.resize(aFilterCount));
646     for (uint32_t i = 0; i < aFilterCount; ++i) {
647       mFilters[i] = aFilters[i];
648     }
649   }
650 
~ActivePS()651   ~ActivePS() { CorePS::CoreBuffer().ResetChunkManager(); }
652 
ThreadSelected(const char * aThreadName)653   bool ThreadSelected(const char* aThreadName) {
654     if (mFilters.empty()) {
655       return true;
656     }
657 
658     std::string name = aThreadName;
659     std::transform(name.begin(), name.end(), name.begin(), ::tolower);
660 
661     for (uint32_t i = 0; i < mFilters.length(); ++i) {
662       std::string filter = mFilters[i];
663 
664       if (filter == "*") {
665         return true;
666       }
667 
668       std::transform(filter.begin(), filter.end(), filter.begin(), ::tolower);
669 
670       // Crude, non UTF-8 compatible, case insensitive substring search
671       if (name.find(filter) != std::string::npos) {
672         return true;
673       }
674 
675       // If the filter starts with pid:, check for a pid match
676       if (filter.find("pid:") == 0) {
677         std::string mypid = std::to_string(profiler_current_process_id());
678         if (filter.compare(4, std::string::npos, mypid) == 0) {
679           return true;
680         }
681       }
682     }
683 
684     return false;
685   }
686 
687  public:
Create(PSLockRef aLock,PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,const Maybe<double> & aDuration)688   static void Create(PSLockRef aLock, PowerOfTwo32 aCapacity, double aInterval,
689                      uint32_t aFeatures, const char** aFilters,
690                      uint32_t aFilterCount, const Maybe<double>& aDuration) {
691     MOZ_ASSERT(!sInstance);
692     sInstance = new ActivePS(aLock, aCapacity, aInterval, aFeatures, aFilters,
693                              aFilterCount, aDuration);
694   }
695 
Destroy(PSLockRef aLock)696   [[nodiscard]] static SamplerThread* Destroy(PSLockRef aLock) {
697     MOZ_ASSERT(sInstance);
698     auto samplerThread = sInstance->mSamplerThread;
699     delete sInstance;
700     sInstance = nullptr;
701 
702     return samplerThread;
703   }
704 
Exists(PSLockRef)705   static bool Exists(PSLockRef) { return !!sInstance; }
706 
Equals(PSLockRef,PowerOfTwo32 aCapacity,const Maybe<double> & aDuration,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount)707   static bool Equals(PSLockRef, PowerOfTwo32 aCapacity,
708                      const Maybe<double>& aDuration, double aInterval,
709                      uint32_t aFeatures, const char** aFilters,
710                      uint32_t aFilterCount) {
711     MOZ_ASSERT(sInstance);
712     if (sInstance->mCapacity != aCapacity ||
713         sInstance->mDuration != aDuration ||
714         sInstance->mInterval != aInterval ||
715         sInstance->mFeatures != aFeatures ||
716         sInstance->mFilters.length() != aFilterCount) {
717       return false;
718     }
719 
720     for (uint32_t i = 0; i < sInstance->mFilters.length(); ++i) {
721       if (strcmp(sInstance->mFilters[i].c_str(), aFilters[i]) != 0) {
722         return false;
723       }
724     }
725     return true;
726   }
727 
SizeOf(PSLockRef,MallocSizeOf aMallocSizeOf)728   static size_t SizeOf(PSLockRef, MallocSizeOf aMallocSizeOf) {
729     MOZ_ASSERT(sInstance);
730 
731     size_t n = aMallocSizeOf(sInstance);
732 
733     n += sInstance->mProfileBuffer.SizeOfExcludingThis(aMallocSizeOf);
734 
735     // Measurement of the following members may be added later if DMD finds it
736     // is worthwhile:
737     // - mLiveProfiledThreads (both the array itself, and the contents)
738     // - mDeadProfiledThreads (both the array itself, and the contents)
739     //
740 
741     return n;
742   }
743 
ShouldProfileThread(PSLockRef aLock,ThreadInfo * aInfo)744   static bool ShouldProfileThread(PSLockRef aLock, ThreadInfo* aInfo) {
745     MOZ_ASSERT(sInstance);
746     return ((aInfo->IsMainThread() || FeatureThreads(aLock)) &&
747             sInstance->ThreadSelected(aInfo->Name()));
748   }
749 
PS_GET(uint32_t,Generation)750   PS_GET(uint32_t, Generation)
751 
752   PS_GET(PowerOfTwo32, Capacity)
753 
754   PS_GET(Maybe<double>, Duration)
755 
756   PS_GET(double, Interval)
757 
758   PS_GET(uint32_t, Features)
759 
760 #define PS_GET_FEATURE(n_, str_, Name_, desc_)                \
761   static bool Feature##Name_(PSLockRef) {                     \
762     MOZ_ASSERT(sInstance);                                    \
763     return ProfilerFeature::Has##Name_(sInstance->mFeatures); \
764   }
765 
766   BASE_PROFILER_FOR_EACH_FEATURE(PS_GET_FEATURE)
767 
768 #undef PS_GET_FEATURE
769 
770   PS_GET(const Vector<std::string>&, Filters)
771 
772   static void FulfillChunkRequests(PSLockRef) {
773     MOZ_ASSERT(sInstance);
774     sInstance->mProfileBufferChunkManager.FulfillChunkRequests();
775   }
776 
Buffer(PSLockRef)777   static ProfileBuffer& Buffer(PSLockRef) {
778     MOZ_ASSERT(sInstance);
779     return sInstance->mProfileBuffer;
780   }
781 
LiveProfiledThreads(PSLockRef)782   static const Vector<LiveProfiledThreadData>& LiveProfiledThreads(PSLockRef) {
783     MOZ_ASSERT(sInstance);
784     return sInstance->mLiveProfiledThreads;
785   }
786 
787   // Returns an array containing (RegisteredThread*, ProfiledThreadData*) pairs
788   // for all threads that should be included in a profile, both for threads
789   // that are still registered, and for threads that have been unregistered but
790   // still have data in the buffer.
791   // For threads that have already been unregistered, the RegisteredThread
792   // pointer will be null.
793   // The returned array is sorted by thread register time.
794   // Do not hold on to the return value across thread registration or profiler
795   // restarts.
796   static Vector<std::pair<RegisteredThread*, ProfiledThreadData*>>
ProfiledThreads(PSLockRef)797   ProfiledThreads(PSLockRef) {
798     MOZ_ASSERT(sInstance);
799     Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> array;
800     MOZ_RELEASE_ASSERT(
801         array.initCapacity(sInstance->mLiveProfiledThreads.length() +
802                            sInstance->mDeadProfiledThreads.length()));
803     for (auto& t : sInstance->mLiveProfiledThreads) {
804       MOZ_RELEASE_ASSERT(array.append(
805           std::make_pair(t.mRegisteredThread, t.mProfiledThreadData.get())));
806     }
807     for (auto& t : sInstance->mDeadProfiledThreads) {
808       MOZ_RELEASE_ASSERT(
809           array.append(std::make_pair((RegisteredThread*)nullptr, t.get())));
810     }
811 
812     std::sort(array.begin(), array.end(),
813               [](const std::pair<RegisteredThread*, ProfiledThreadData*>& a,
814                  const std::pair<RegisteredThread*, ProfiledThreadData*>& b) {
815                 return a.second->Info()->RegisterTime() <
816                        b.second->Info()->RegisterTime();
817               });
818     return array;
819   }
820 
ProfiledPages(PSLockRef aLock)821   static Vector<RefPtr<PageInformation>> ProfiledPages(PSLockRef aLock) {
822     MOZ_ASSERT(sInstance);
823     Vector<RefPtr<PageInformation>> array;
824     for (auto& d : CorePS::RegisteredPages(aLock)) {
825       MOZ_RELEASE_ASSERT(array.append(d));
826     }
827     for (auto& d : sInstance->mDeadProfiledPages) {
828       MOZ_RELEASE_ASSERT(array.append(d));
829     }
830     // We don't need to sort the pages like threads since we won't show them
831     // as a list.
832     return array;
833   }
834 
835   // Do a linear search through mLiveProfiledThreads to find the
836   // ProfiledThreadData object for a RegisteredThread.
GetProfiledThreadData(PSLockRef,RegisteredThread * aRegisteredThread)837   static ProfiledThreadData* GetProfiledThreadData(
838       PSLockRef, RegisteredThread* aRegisteredThread) {
839     MOZ_ASSERT(sInstance);
840     for (const LiveProfiledThreadData& thread :
841          sInstance->mLiveProfiledThreads) {
842       if (thread.mRegisteredThread == aRegisteredThread) {
843         return thread.mProfiledThreadData.get();
844       }
845     }
846     return nullptr;
847   }
848 
AddLiveProfiledThread(PSLockRef,RegisteredThread * aRegisteredThread,UniquePtr<ProfiledThreadData> && aProfiledThreadData)849   static ProfiledThreadData* AddLiveProfiledThread(
850       PSLockRef, RegisteredThread* aRegisteredThread,
851       UniquePtr<ProfiledThreadData>&& aProfiledThreadData) {
852     MOZ_ASSERT(sInstance);
853     MOZ_RELEASE_ASSERT(
854         sInstance->mLiveProfiledThreads.append(LiveProfiledThreadData{
855             aRegisteredThread, std::move(aProfiledThreadData)}));
856 
857     // Return a weak pointer to the ProfiledThreadData object.
858     return sInstance->mLiveProfiledThreads.back().mProfiledThreadData.get();
859   }
860 
UnregisterThread(PSLockRef aLockRef,RegisteredThread * aRegisteredThread)861   static void UnregisterThread(PSLockRef aLockRef,
862                                RegisteredThread* aRegisteredThread) {
863     MOZ_ASSERT(sInstance);
864 
865     DiscardExpiredDeadProfiledThreads(aLockRef);
866 
867     // Find the right entry in the mLiveProfiledThreads array and remove the
868     // element, moving the ProfiledThreadData object for the thread into the
869     // mDeadProfiledThreads array.
870     // The thread's RegisteredThread object gets destroyed here.
871     for (size_t i = 0; i < sInstance->mLiveProfiledThreads.length(); i++) {
872       LiveProfiledThreadData& thread = sInstance->mLiveProfiledThreads[i];
873       if (thread.mRegisteredThread == aRegisteredThread) {
874         thread.mProfiledThreadData->NotifyUnregistered(
875             sInstance->mProfileBuffer.BufferRangeEnd());
876         MOZ_RELEASE_ASSERT(sInstance->mDeadProfiledThreads.append(
877             std::move(thread.mProfiledThreadData)));
878         sInstance->mLiveProfiledThreads.erase(
879             &sInstance->mLiveProfiledThreads[i]);
880         return;
881       }
882     }
883   }
884 
PS_GET_AND_SET(bool,IsPaused)885   PS_GET_AND_SET(bool, IsPaused)
886 
887 #if defined(GP_OS_linux) || defined(GP_OS_freebsd)
888   PS_GET_AND_SET(bool, WasPaused)
889 #endif
890 
891   static void DiscardExpiredDeadProfiledThreads(PSLockRef) {
892     MOZ_ASSERT(sInstance);
893     uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
894     // Discard any dead threads that were unregistered before bufferRangeStart.
895     sInstance->mDeadProfiledThreads.eraseIf(
896         [bufferRangeStart](
897             const UniquePtr<ProfiledThreadData>& aProfiledThreadData) {
898           Maybe<uint64_t> bufferPosition =
899               aProfiledThreadData->BufferPositionWhenUnregistered();
900           MOZ_RELEASE_ASSERT(bufferPosition,
901                              "should have unregistered this thread");
902           return *bufferPosition < bufferRangeStart;
903         });
904   }
905 
UnregisterPage(PSLockRef aLock,uint64_t aRegisteredInnerWindowID)906   static void UnregisterPage(PSLockRef aLock,
907                              uint64_t aRegisteredInnerWindowID) {
908     MOZ_ASSERT(sInstance);
909     auto& registeredPages = CorePS::RegisteredPages(aLock);
910     for (size_t i = 0; i < registeredPages.length(); i++) {
911       RefPtr<PageInformation>& page = registeredPages[i];
912       if (page->InnerWindowID() == aRegisteredInnerWindowID) {
913         page->NotifyUnregistered(sInstance->mProfileBuffer.BufferRangeEnd());
914         MOZ_RELEASE_ASSERT(
915             sInstance->mDeadProfiledPages.append(std::move(page)));
916         registeredPages.erase(&registeredPages[i--]);
917       }
918     }
919   }
920 
DiscardExpiredPages(PSLockRef)921   static void DiscardExpiredPages(PSLockRef) {
922     MOZ_ASSERT(sInstance);
923     uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
924     // Discard any dead pages that were unregistered before
925     // bufferRangeStart.
926     sInstance->mDeadProfiledPages.eraseIf(
927         [bufferRangeStart](const RefPtr<PageInformation>& aProfiledPage) {
928           Maybe<uint64_t> bufferPosition =
929               aProfiledPage->BufferPositionWhenUnregistered();
930           MOZ_RELEASE_ASSERT(bufferPosition,
931                              "should have unregistered this page");
932           return *bufferPosition < bufferRangeStart;
933         });
934   }
935 
ClearUnregisteredPages(PSLockRef)936   static void ClearUnregisteredPages(PSLockRef) {
937     MOZ_ASSERT(sInstance);
938     sInstance->mDeadProfiledPages.clear();
939   }
940 
ClearExpiredExitProfiles(PSLockRef)941   static void ClearExpiredExitProfiles(PSLockRef) {
942     MOZ_ASSERT(sInstance);
943     uint64_t bufferRangeStart = sInstance->mProfileBuffer.BufferRangeStart();
944     // Discard exit profiles that were gathered before our buffer RangeStart.
945     sInstance->mExitProfiles.eraseIf(
946         [bufferRangeStart](const ExitProfile& aExitProfile) {
947           return aExitProfile.mBufferPositionAtGatherTime < bufferRangeStart;
948         });
949   }
950 
AddExitProfile(PSLockRef aLock,const std::string & aExitProfile)951   static void AddExitProfile(PSLockRef aLock, const std::string& aExitProfile) {
952     MOZ_ASSERT(sInstance);
953 
954     ClearExpiredExitProfiles(aLock);
955 
956     MOZ_RELEASE_ASSERT(sInstance->mExitProfiles.append(
957         ExitProfile{aExitProfile, sInstance->mProfileBuffer.BufferRangeEnd()}));
958   }
959 
MoveExitProfiles(PSLockRef aLock)960   static Vector<std::string> MoveExitProfiles(PSLockRef aLock) {
961     MOZ_ASSERT(sInstance);
962 
963     ClearExpiredExitProfiles(aLock);
964 
965     Vector<std::string> profiles;
966     MOZ_RELEASE_ASSERT(
967         profiles.initCapacity(sInstance->mExitProfiles.length()));
968     for (auto& profile : sInstance->mExitProfiles) {
969       MOZ_RELEASE_ASSERT(profiles.append(std::move(profile.mJSON)));
970     }
971     sInstance->mExitProfiles.clear();
972     return profiles;
973   }
974 
975  private:
976   // The singleton instance.
977   static ActivePS* sInstance;
978 
979   // We need to track activity generations. If we didn't we could have the
980   // following scenario.
981   //
982   // - profiler_stop() locks gPSMutex, de-instantiates ActivePS, unlocks
983   //   gPSMutex, deletes the SamplerThread (which does a join).
984   //
985   // - profiler_start() runs on a different thread, locks gPSMutex,
986   //   re-instantiates ActivePS, unlocks gPSMutex -- all before the join
987   //   completes.
988   //
989   // - SamplerThread::Run() locks gPSMutex, sees that ActivePS is instantiated,
990   //   and continues as if the start/stop pair didn't occur. Also
991   //   profiler_stop() is stuck, unable to finish.
992   //
993   // By checking ActivePS *and* the generation, we can avoid this scenario.
994   // sNextGeneration is used to track the next generation number; it is static
995   // because it must persist across different ActivePS instantiations.
996   const uint32_t mGeneration;
997   static uint32_t sNextGeneration;
998 
999   // The maximum number of 8-byte entries in mProfileBuffer.
1000   const PowerOfTwo32 mCapacity;
1001 
1002   // The maximum duration of entries in mProfileBuffer, in seconds.
1003   const Maybe<double> mDuration;
1004 
1005   // The interval between samples, measured in milliseconds.
1006   const double mInterval;
1007 
1008   // The profile features that are enabled.
1009   const uint32_t mFeatures;
1010 
1011   // Substrings of names of threads we want to profile.
1012   Vector<std::string> mFilters;
1013 
1014   // The chunk manager used by `mProfileBuffer` below.
1015   ProfileBufferChunkManagerWithLocalLimit mProfileBufferChunkManager;
1016 
1017   // The buffer into which all samples are recorded.
1018   ProfileBuffer mProfileBuffer;
1019 
1020   // ProfiledThreadData objects for any threads that were profiled at any point
1021   // during this run of the profiler:
1022   //  - mLiveProfiledThreads contains all threads that are still registered, and
1023   //  - mDeadProfiledThreads contains all threads that have already been
1024   //    unregistered but for which there is still data in the profile buffer.
1025   Vector<LiveProfiledThreadData> mLiveProfiledThreads;
1026   Vector<UniquePtr<ProfiledThreadData>> mDeadProfiledThreads;
1027 
1028   // Info on all the dead pages.
1029   // Registered pages are being moved to this array after unregistration.
1030   // We are keeping them in case we need them in the profile data.
1031   // We are removing them when we ensure that we won't need them anymore.
1032   Vector<RefPtr<PageInformation>> mDeadProfiledPages;
1033 
1034   // The current sampler thread. This class is not responsible for destroying
1035   // the SamplerThread object; the Destroy() method returns it so the caller
1036   // can destroy it.
1037   SamplerThread* const mSamplerThread;
1038 
1039   // Is the profiler paused?
1040   bool mIsPaused;
1041 
1042 #if defined(GP_OS_linux) || defined(GP_OS_freebsd)
1043   // Used to record whether the profiler was paused just before forking. False
1044   // at all times except just before/after forking.
1045   bool mWasPaused;
1046 #endif
1047 
1048   struct ExitProfile {
1049     std::string mJSON;
1050     uint64_t mBufferPositionAtGatherTime;
1051   };
1052   Vector<ExitProfile> mExitProfiles;
1053 };
1054 
1055 ActivePS* ActivePS::sInstance = nullptr;
1056 uint32_t ActivePS::sNextGeneration = 0;
1057 
1058 #undef PS_GET
1059 #undef PS_GET_LOCKLESS
1060 #undef PS_GET_AND_SET
1061 
1062 Atomic<uint32_t, MemoryOrdering::Relaxed> RacyFeatures::sActiveAndFeatures(0);
1063 
1064 /* static */
SetActive(uint32_t aFeatures)1065 void RacyFeatures::SetActive(uint32_t aFeatures) {
1066   sActiveAndFeatures = Active | aFeatures;
1067 }
1068 
1069 /* static */
SetInactive()1070 void RacyFeatures::SetInactive() { sActiveAndFeatures = 0; }
1071 
1072 /* static */
IsActive()1073 bool RacyFeatures::IsActive() { return uint32_t(sActiveAndFeatures) & Active; }
1074 
1075 /* static */
SetPaused()1076 void RacyFeatures::SetPaused() { sActiveAndFeatures |= Paused; }
1077 
1078 /* static */
SetUnpaused()1079 void RacyFeatures::SetUnpaused() { sActiveAndFeatures &= ~Paused; }
1080 
1081 /* static */
IsActiveWithFeature(uint32_t aFeature)1082 bool RacyFeatures::IsActiveWithFeature(uint32_t aFeature) {
1083   uint32_t af = sActiveAndFeatures;  // copy it first
1084   return (af & Active) && (af & aFeature);
1085 }
1086 
1087 /* static */
IsActiveAndUnpaused()1088 bool RacyFeatures::IsActiveAndUnpaused() {
1089   uint32_t af = sActiveAndFeatures;  // copy it first
1090   return (af & Active) && !(af & Paused);
1091 }
1092 
1093 // Each live thread has a RegisteredThread, and we store a reference to it in
1094 // TLS. This class encapsulates that TLS.
1095 class TLSRegisteredThread {
1096  public:
Init(PSLockRef)1097   static bool Init(PSLockRef) {
1098     bool ok1 = sRegisteredThread.init();
1099     bool ok2 = AutoProfilerLabel::sProfilingStack.init();
1100     return ok1 && ok2;
1101   }
1102 
1103   // Get the entire RegisteredThread. Accesses are guarded by gPSMutex.
RegisteredThread(PSLockRef)1104   static class RegisteredThread* RegisteredThread(PSLockRef) {
1105     return sRegisteredThread.get();
1106   }
1107 
1108   // Get only the RacyRegisteredThread. Accesses are not guarded by gPSMutex.
RacyRegisteredThread()1109   static class RacyRegisteredThread* RacyRegisteredThread() {
1110     class RegisteredThread* registeredThread = sRegisteredThread.get();
1111     return registeredThread ? &registeredThread->RacyRegisteredThread()
1112                             : nullptr;
1113   }
1114 
1115   // Get only the ProfilingStack. Accesses are not guarded by gPSMutex.
1116   // RacyRegisteredThread() can also be used to get the ProfilingStack, but that
1117   // is marginally slower because it requires an extra pointer indirection.
Stack()1118   static ProfilingStack* Stack() {
1119     return AutoProfilerLabel::sProfilingStack.get();
1120   }
1121 
SetRegisteredThread(PSLockRef,class RegisteredThread * aRegisteredThread)1122   static void SetRegisteredThread(PSLockRef,
1123                                   class RegisteredThread* aRegisteredThread) {
1124     sRegisteredThread.set(aRegisteredThread);
1125     AutoProfilerLabel::sProfilingStack.set(
1126         aRegisteredThread
1127             ? &aRegisteredThread->RacyRegisteredThread().ProfilingStack()
1128             : nullptr);
1129   }
1130 
1131  private:
1132   // This is a non-owning reference to the RegisteredThread;
1133   // CorePS::mRegisteredThreads is the owning reference. On thread
1134   // deregistration, this reference is cleared and the RegisteredThread is
1135   // destroyed.
1136   static MOZ_THREAD_LOCAL(class RegisteredThread*) sRegisteredThread;
1137 };
1138 
1139 MOZ_THREAD_LOCAL(RegisteredThread*) TLSRegisteredThread::sRegisteredThread;
1140 
1141 /* static */
GetProfilingStack()1142 ProfilingStack* AutoProfilerLabel::GetProfilingStack() {
1143   return sProfilingStack.get();
1144 }
1145 
1146 // Although you can access a thread's ProfilingStack via
1147 // TLSRegisteredThread::sRegisteredThread, we also have a second TLS pointer
1148 // directly to the ProfilingStack. Here's why.
1149 //
1150 // - We need to be able to push to and pop from the ProfilingStack in
1151 //   AutoProfilerLabel.
1152 //
1153 // - The class functions are hot and must be defined in BaseProfiler.h so they
1154 //   can be inlined.
1155 //
1156 // - We don't want to expose TLSRegisteredThread (and RegisteredThread) in
1157 //   BaseProfiler.h.
1158 //
1159 // This second pointer isn't ideal, but does provide a way to satisfy those
1160 // constraints. TLSRegisteredThread is responsible for updating it.
1161 MOZ_THREAD_LOCAL(ProfilingStack*) AutoProfilerLabel::sProfilingStack;
1162 
1163 // The name of the main thread.
1164 static const char* const kMainThreadName = "GeckoMain";
1165 
1166 ////////////////////////////////////////////////////////////////////////
1167 // BEGIN sampling/unwinding code
1168 
1169 // The registers used for stack unwinding and a few other sampling purposes.
1170 // The ctor does nothing; users are responsible for filling in the fields.
1171 class Registers {
1172  public:
Registers()1173   Registers() : mPC{nullptr}, mSP{nullptr}, mFP{nullptr}, mLR{nullptr} {}
1174 
1175 #if defined(HAVE_NATIVE_UNWIND)
1176   // Fills in mPC, mSP, mFP, mLR, and mContext for a synchronous sample.
1177   void SyncPopulate();
1178 #endif
1179 
Clear()1180   void Clear() { memset(this, 0, sizeof(*this)); }
1181 
1182   // These fields are filled in by
1183   // Sampler::SuspendAndSampleAndResumeThread() for periodic and backtrace
1184   // samples, and by SyncPopulate() for synchronous samples.
1185   Address mPC;  // Instruction pointer.
1186   Address mSP;  // Stack pointer.
1187   Address mFP;  // Frame pointer.
1188   Address mLR;  // ARM link register.
1189 #if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
1190   // This contains all the registers, which means it duplicates the four fields
1191   // above. This is ok.
1192   ucontext_t* mContext;  // The context from the signal handler.
1193 #endif
1194 };
1195 
1196 // Setting MAX_NATIVE_FRAMES too high risks the unwinder wasting a lot of time
1197 // looping on corrupted stacks.
1198 static const size_t MAX_NATIVE_FRAMES = 1024;
1199 
1200 struct NativeStack {
1201   void* mPCs[MAX_NATIVE_FRAMES];
1202   void* mSPs[MAX_NATIVE_FRAMES];
1203   size_t mCount;  // Number of frames filled.
1204 
NativeStackmozilla::baseprofiler::NativeStack1205   NativeStack() : mPCs(), mSPs(), mCount(0) {}
1206 };
1207 
1208 // Merges the profiling stack and native stack, outputting the details to
1209 // aCollector.
MergeStacks(uint32_t aFeatures,bool aIsSynchronous,const RegisteredThread & aRegisteredThread,const Registers & aRegs,const NativeStack & aNativeStack,ProfilerStackCollector & aCollector)1210 static void MergeStacks(uint32_t aFeatures, bool aIsSynchronous,
1211                         const RegisteredThread& aRegisteredThread,
1212                         const Registers& aRegs, const NativeStack& aNativeStack,
1213                         ProfilerStackCollector& aCollector) {
1214   // WARNING: this function runs within the profiler's "critical section".
1215   // WARNING: this function might be called while the profiler is inactive, and
1216   //          cannot rely on ActivePS.
1217 
1218   const ProfilingStack& profilingStack =
1219       aRegisteredThread.RacyRegisteredThread().ProfilingStack();
1220   const ProfilingStackFrame* profilingStackFrames = profilingStack.frames;
1221   uint32_t profilingStackFrameCount = profilingStack.stackSize();
1222 
1223   Maybe<uint64_t> samplePosInBuffer;
1224   if (!aIsSynchronous) {
1225     // aCollector.SamplePositionInBuffer() will return Nothing() when
1226     // profiler_suspend_and_sample_thread is called from the background hang
1227     // reporter.
1228     samplePosInBuffer = aCollector.SamplePositionInBuffer();
1229   }
1230   // While the profiling stack array is ordered oldest-to-youngest, the JS and
1231   // native arrays are ordered youngest-to-oldest. We must add frames to aInfo
1232   // oldest-to-youngest. Thus, iterate over the profiling stack forwards and JS
1233   // and native arrays backwards. Note: this means the terminating condition
1234   // jsIndex and nativeIndex is being < 0.
1235   uint32_t profilingStackIndex = 0;
1236   int32_t nativeIndex = aNativeStack.mCount - 1;
1237 
1238   uint8_t* lastLabelFrameStackAddr = nullptr;
1239 
1240   // Iterate as long as there is at least one frame remaining.
1241   while (profilingStackIndex != profilingStackFrameCount || nativeIndex >= 0) {
1242     // There are 1 to 3 frames available. Find and add the oldest.
1243     uint8_t* profilingStackAddr = nullptr;
1244     uint8_t* nativeStackAddr = nullptr;
1245 
1246     if (profilingStackIndex != profilingStackFrameCount) {
1247       const ProfilingStackFrame& profilingStackFrame =
1248           profilingStackFrames[profilingStackIndex];
1249 
1250       if (profilingStackFrame.isLabelFrame() ||
1251           profilingStackFrame.isSpMarkerFrame()) {
1252         lastLabelFrameStackAddr = (uint8_t*)profilingStackFrame.stackAddress();
1253       }
1254 
1255       // Skip any JS_OSR frames. Such frames are used when the JS interpreter
1256       // enters a jit frame on a loop edge (via on-stack-replacement, or OSR).
1257       // To avoid both the profiling stack frame and jit frame being recorded
1258       // (and showing up twice), the interpreter marks the interpreter
1259       // profiling stack frame as JS_OSR to ensure that it doesn't get counted.
1260       if (profilingStackFrame.isOSRFrame()) {
1261         profilingStackIndex++;
1262         continue;
1263       }
1264 
1265       MOZ_ASSERT(lastLabelFrameStackAddr);
1266       profilingStackAddr = lastLabelFrameStackAddr;
1267     }
1268 
1269     if (nativeIndex >= 0) {
1270       nativeStackAddr = (uint8_t*)aNativeStack.mSPs[nativeIndex];
1271     }
1272 
1273     // If there's a native stack frame which has the same SP as a profiling
1274     // stack frame, pretend we didn't see the native stack frame.  Ditto for a
1275     // native stack frame which has the same SP as a JS stack frame.  In effect
1276     // this means profiling stack frames or JS frames trump conflicting native
1277     // frames.
1278     if (nativeStackAddr && (profilingStackAddr == nativeStackAddr)) {
1279       nativeStackAddr = nullptr;
1280       nativeIndex--;
1281       MOZ_ASSERT(profilingStackAddr);
1282     }
1283 
1284     // Sanity checks.
1285     MOZ_ASSERT_IF(profilingStackAddr, profilingStackAddr != nativeStackAddr);
1286     MOZ_ASSERT_IF(nativeStackAddr, nativeStackAddr != profilingStackAddr);
1287 
1288     // Check to see if profiling stack frame is top-most.
1289     if (profilingStackAddr > nativeStackAddr) {
1290       MOZ_ASSERT(profilingStackIndex < profilingStackFrameCount);
1291       const ProfilingStackFrame& profilingStackFrame =
1292           profilingStackFrames[profilingStackIndex];
1293 
1294       // Sp marker frames are just annotations and should not be recorded in
1295       // the profile.
1296       if (!profilingStackFrame.isSpMarkerFrame()) {
1297         aCollector.CollectProfilingStackFrame(profilingStackFrame);
1298       }
1299       profilingStackIndex++;
1300       continue;
1301     }
1302 
1303     // If we reach here, there must be a native stack frame and it must be the
1304     // greatest frame.
1305     if (nativeStackAddr) {
1306       MOZ_ASSERT(nativeIndex >= 0);
1307       void* addr = (void*)aNativeStack.mPCs[nativeIndex];
1308       aCollector.CollectNativeLeafAddr(addr);
1309     }
1310     if (nativeIndex >= 0) {
1311       nativeIndex--;
1312     }
1313   }
1314 }
1315 
1316 #if defined(GP_OS_windows) && defined(USE_MOZ_STACK_WALK)
1317 static HANDLE GetThreadHandle(PlatformData* aData);
1318 #endif
1319 
1320 #if defined(USE_FRAME_POINTER_STACK_WALK) || defined(USE_MOZ_STACK_WALK)
StackWalkCallback(uint32_t aFrameNumber,void * aPC,void * aSP,void * aClosure)1321 static void StackWalkCallback(uint32_t aFrameNumber, void* aPC, void* aSP,
1322                               void* aClosure) {
1323   NativeStack* nativeStack = static_cast<NativeStack*>(aClosure);
1324   MOZ_ASSERT(nativeStack->mCount < MAX_NATIVE_FRAMES);
1325   nativeStack->mSPs[nativeStack->mCount] = aSP;
1326   nativeStack->mPCs[nativeStack->mCount] = aPC;
1327   nativeStack->mCount++;
1328 }
1329 #endif
1330 
1331 #if defined(USE_FRAME_POINTER_STACK_WALK)
DoFramePointerBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack)1332 static void DoFramePointerBacktrace(PSLockRef aLock,
1333                                     const RegisteredThread& aRegisteredThread,
1334                                     const Registers& aRegs,
1335                                     NativeStack& aNativeStack) {
1336   // WARNING: this function runs within the profiler's "critical section".
1337   // WARNING: this function might be called while the profiler is inactive, and
1338   //          cannot rely on ActivePS.
1339 
1340   // Start with the current function. We use 0 as the frame number here because
1341   // the FramePointerStackWalk() call below will use 1..N. This is a bit weird
1342   // but it doesn't matter because StackWalkCallback() doesn't use the frame
1343   // number argument.
1344   StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
1345 
1346   uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
1347 
1348   const void* stackEnd = aRegisteredThread.StackTop();
1349   if (aRegs.mFP >= aRegs.mSP && aRegs.mFP <= stackEnd) {
1350     FramePointerStackWalk(StackWalkCallback, /* skipFrames */ 0, maxFrames,
1351                           &aNativeStack, reinterpret_cast<void**>(aRegs.mFP),
1352                           const_cast<void*>(stackEnd));
1353   }
1354 }
1355 #endif
1356 
1357 #if defined(USE_MOZ_STACK_WALK)
DoMozStackWalkBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack)1358 static void DoMozStackWalkBacktrace(PSLockRef aLock,
1359                                     const RegisteredThread& aRegisteredThread,
1360                                     const Registers& aRegs,
1361                                     NativeStack& aNativeStack) {
1362   // WARNING: this function runs within the profiler's "critical section".
1363   // WARNING: this function might be called while the profiler is inactive, and
1364   //          cannot rely on ActivePS.
1365 
1366   // Start with the current function. We use 0 as the frame number here because
1367   // the MozStackWalkThread() call below will use 1..N. This is a bit weird but
1368   // it doesn't matter because StackWalkCallback() doesn't use the frame number
1369   // argument.
1370   StackWalkCallback(/* frameNum */ 0, aRegs.mPC, aRegs.mSP, &aNativeStack);
1371 
1372   uint32_t maxFrames = uint32_t(MAX_NATIVE_FRAMES - aNativeStack.mCount);
1373 
1374   HANDLE thread = GetThreadHandle(aRegisteredThread.GetPlatformData());
1375   MOZ_ASSERT(thread);
1376   MozStackWalkThread(StackWalkCallback, /* skipFrames */ 0, maxFrames,
1377                      &aNativeStack, thread, /* context */ nullptr);
1378 }
1379 #endif
1380 
1381 #ifdef USE_EHABI_STACKWALK
DoEHABIBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack)1382 static void DoEHABIBacktrace(PSLockRef aLock,
1383                              const RegisteredThread& aRegisteredThread,
1384                              const Registers& aRegs,
1385                              NativeStack& aNativeStack) {
1386   // WARNING: this function runs within the profiler's "critical section".
1387   // WARNING: this function might be called while the profiler is inactive, and
1388   //          cannot rely on ActivePS.
1389 
1390   aNativeStack.mCount =
1391       EHABIStackWalk(aRegs.mContext->uc_mcontext,
1392                      const_cast<void*>(aRegisteredThread.StackTop()),
1393                      aNativeStack.mSPs, aNativeStack.mPCs, MAX_NATIVE_FRAMES);
1394 }
1395 #endif
1396 
1397 #ifdef USE_LUL_STACKWALK
1398 
1399 // See the comment at the callsite for why this function is necessary.
1400 #  if defined(MOZ_HAVE_ASAN_BLACKLIST)
ASAN_memcpy(void * aDst,const void * aSrc,size_t aLen)1401 MOZ_ASAN_BLACKLIST static void ASAN_memcpy(void* aDst, const void* aSrc,
1402                                            size_t aLen) {
1403   // The obvious thing to do here is call memcpy(). However, although
1404   // ASAN_memcpy() is not instrumented by ASAN, memcpy() still is, and the
1405   // false positive still manifests! So we must implement memcpy() ourselves
1406   // within this function.
1407   char* dst = static_cast<char*>(aDst);
1408   const char* src = static_cast<const char*>(aSrc);
1409 
1410   for (size_t i = 0; i < aLen; i++) {
1411     dst[i] = src[i];
1412   }
1413 }
1414 #  endif
1415 
DoLULBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack)1416 static void DoLULBacktrace(PSLockRef aLock,
1417                            const RegisteredThread& aRegisteredThread,
1418                            const Registers& aRegs, NativeStack& aNativeStack) {
1419   // WARNING: this function runs within the profiler's "critical section".
1420   // WARNING: this function might be called while the profiler is inactive, and
1421   //          cannot rely on ActivePS.
1422 
1423   const mcontext_t* mc = &aRegs.mContext->uc_mcontext;
1424 
1425   lul::UnwindRegs startRegs;
1426   memset(&startRegs, 0, sizeof(startRegs));
1427 
1428 #  if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android)
1429   startRegs.xip = lul::TaggedUWord(mc->gregs[REG_RIP]);
1430   startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_RSP]);
1431   startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_RBP]);
1432 #  elif defined(GP_PLAT_amd64_freebsd)
1433   startRegs.xip = lul::TaggedUWord(mc->mc_rip);
1434   startRegs.xsp = lul::TaggedUWord(mc->mc_rsp);
1435   startRegs.xbp = lul::TaggedUWord(mc->mc_rbp);
1436 #  elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
1437   startRegs.r15 = lul::TaggedUWord(mc->arm_pc);
1438   startRegs.r14 = lul::TaggedUWord(mc->arm_lr);
1439   startRegs.r13 = lul::TaggedUWord(mc->arm_sp);
1440   startRegs.r12 = lul::TaggedUWord(mc->arm_ip);
1441   startRegs.r11 = lul::TaggedUWord(mc->arm_fp);
1442   startRegs.r7 = lul::TaggedUWord(mc->arm_r7);
1443 #  elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android)
1444   startRegs.pc = lul::TaggedUWord(mc->pc);
1445   startRegs.x29 = lul::TaggedUWord(mc->regs[29]);
1446   startRegs.x30 = lul::TaggedUWord(mc->regs[30]);
1447   startRegs.sp = lul::TaggedUWord(mc->sp);
1448 #  elif defined(GP_PLAT_arm64_freebsd)
1449   startRegs.pc = lul::TaggedUWord(mc->mc_gpregs.gp_elr);
1450   startRegs.x29 = lul::TaggedUWord(mc->mc_gpregs.gp_x[29]);
1451   startRegs.x30 = lul::TaggedUWord(mc->mc_gpregs.gp_lr);
1452   startRegs.sp = lul::TaggedUWord(mc->mc_gpregs.gp_sp);
1453 #  elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
1454   startRegs.xip = lul::TaggedUWord(mc->gregs[REG_EIP]);
1455   startRegs.xsp = lul::TaggedUWord(mc->gregs[REG_ESP]);
1456   startRegs.xbp = lul::TaggedUWord(mc->gregs[REG_EBP]);
1457 #  elif defined(GP_PLAT_mips64_linux)
1458   startRegs.pc = lul::TaggedUWord(mc->pc);
1459   startRegs.sp = lul::TaggedUWord(mc->gregs[29]);
1460   startRegs.fp = lul::TaggedUWord(mc->gregs[30]);
1461 #  else
1462 #    error "Unknown plat"
1463 #  endif
1464 
1465   // Copy up to N_STACK_BYTES from rsp-REDZONE upwards, but not going past the
1466   // stack's registered top point.  Do some basic sanity checks too.  This
1467   // assumes that the TaggedUWord holding the stack pointer value is valid, but
1468   // it should be, since it was constructed that way in the code just above.
1469 
1470   // We could construct |stackImg| so that LUL reads directly from the stack in
1471   // question, rather than from a copy of it.  That would reduce overhead and
1472   // space use a bit.  However, it gives a problem with dynamic analysis tools
1473   // (ASan, TSan, Valgrind) which is that such tools will report invalid or
1474   // racing memory accesses, and such accesses will be reported deep inside LUL.
1475   // By taking a copy here, we can either sanitise the copy (for Valgrind) or
1476   // copy it using an unchecked memcpy (for ASan, TSan).  That way we don't have
1477   // to try and suppress errors inside LUL.
1478   //
1479   // N_STACK_BYTES is set to 160KB.  This is big enough to hold all stacks
1480   // observed in some minutes of testing, whilst keeping the size of this
1481   // function (DoNativeBacktrace)'s frame reasonable.  Most stacks observed in
1482   // practice are small, 4KB or less, and so the copy costs are insignificant
1483   // compared to other profiler overhead.
1484   //
1485   // |stackImg| is allocated on this (the sampling thread's) stack.  That
1486   // implies that the frame for this function is at least N_STACK_BYTES large.
1487   // In general it would be considered unacceptable to have such a large frame
1488   // on a stack, but it only exists for the unwinder thread, and so is not
1489   // expected to be a problem.  Allocating it on the heap is troublesome because
1490   // this function runs whilst the sampled thread is suspended, so any heap
1491   // allocation risks deadlock.  Allocating it as a global variable is not
1492   // thread safe, which would be a problem if we ever allow multiple sampler
1493   // threads.  Hence allocating it on the stack seems to be the least-worst
1494   // option.
1495 
1496   lul::StackImage stackImg;
1497 
1498   {
1499 #  if defined(GP_PLAT_amd64_linux) || defined(GP_PLAT_amd64_android) || \
1500       defined(GP_PLAT_amd64_freebsd)
1501     uintptr_t rEDZONE_SIZE = 128;
1502     uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
1503 #  elif defined(GP_PLAT_arm_linux) || defined(GP_PLAT_arm_android)
1504     uintptr_t rEDZONE_SIZE = 0;
1505     uintptr_t start = startRegs.r13.Value() - rEDZONE_SIZE;
1506 #  elif defined(GP_PLAT_arm64_linux) || defined(GP_PLAT_arm64_android) || \
1507       defined(GP_PLAT_arm64_freebsd)
1508     uintptr_t rEDZONE_SIZE = 0;
1509     uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
1510 #  elif defined(GP_PLAT_x86_linux) || defined(GP_PLAT_x86_android)
1511     uintptr_t rEDZONE_SIZE = 0;
1512     uintptr_t start = startRegs.xsp.Value() - rEDZONE_SIZE;
1513 #  elif defined(GP_PLAT_mips64_linux)
1514     uintptr_t rEDZONE_SIZE = 0;
1515     uintptr_t start = startRegs.sp.Value() - rEDZONE_SIZE;
1516 #  else
1517 #    error "Unknown plat"
1518 #  endif
1519     uintptr_t end = reinterpret_cast<uintptr_t>(aRegisteredThread.StackTop());
1520     uintptr_t ws = sizeof(void*);
1521     start &= ~(ws - 1);
1522     end &= ~(ws - 1);
1523     uintptr_t nToCopy = 0;
1524     if (start < end) {
1525       nToCopy = end - start;
1526       if (nToCopy > lul::N_STACK_BYTES) nToCopy = lul::N_STACK_BYTES;
1527     }
1528     MOZ_ASSERT(nToCopy <= lul::N_STACK_BYTES);
1529     stackImg.mLen = nToCopy;
1530     stackImg.mStartAvma = start;
1531     if (nToCopy > 0) {
1532       // If this is a vanilla memcpy(), ASAN makes the following complaint:
1533       //
1534       //   ERROR: AddressSanitizer: stack-buffer-underflow ...
1535       //   ...
1536       //   HINT: this may be a false positive if your program uses some custom
1537       //   stack unwind mechanism or swapcontext
1538       //
1539       // This code is very much a custom stack unwind mechanism! So we use an
1540       // alternative memcpy() implementation that is ignored by ASAN.
1541 #  if defined(MOZ_HAVE_ASAN_BLACKLIST)
1542       ASAN_memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
1543 #  else
1544       memcpy(&stackImg.mContents[0], (void*)start, nToCopy);
1545 #  endif
1546       (void)VALGRIND_MAKE_MEM_DEFINED(&stackImg.mContents[0], nToCopy);
1547     }
1548   }
1549 
1550   size_t framePointerFramesAcquired = 0;
1551   lul::LUL* lul = CorePS::Lul(aLock);
1552   lul->Unwind(reinterpret_cast<uintptr_t*>(aNativeStack.mPCs),
1553               reinterpret_cast<uintptr_t*>(aNativeStack.mSPs),
1554               &aNativeStack.mCount, &framePointerFramesAcquired,
1555               MAX_NATIVE_FRAMES, &startRegs, &stackImg);
1556 
1557   // Update stats in the LUL stats object.  Unfortunately this requires
1558   // three global memory operations.
1559   lul->mStats.mContext += 1;
1560   lul->mStats.mCFI += aNativeStack.mCount - 1 - framePointerFramesAcquired;
1561   lul->mStats.mFP += framePointerFramesAcquired;
1562 }
1563 
1564 #endif
1565 
1566 #ifdef HAVE_NATIVE_UNWIND
DoNativeBacktrace(PSLockRef aLock,const RegisteredThread & aRegisteredThread,const Registers & aRegs,NativeStack & aNativeStack)1567 static void DoNativeBacktrace(PSLockRef aLock,
1568                               const RegisteredThread& aRegisteredThread,
1569                               const Registers& aRegs,
1570                               NativeStack& aNativeStack) {
1571   // This method determines which stackwalker is used for periodic and
1572   // synchronous samples. (Backtrace samples are treated differently, see
1573   // profiler_suspend_and_sample_thread() for details). The only part of the
1574   // ordering that matters is that LUL must precede FRAME_POINTER, because on
1575   // Linux they can both be present.
1576 #  if defined(USE_LUL_STACKWALK)
1577   DoLULBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
1578 #  elif defined(USE_EHABI_STACKWALK)
1579   DoEHABIBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
1580 #  elif defined(USE_FRAME_POINTER_STACK_WALK)
1581   DoFramePointerBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
1582 #  elif defined(USE_MOZ_STACK_WALK)
1583   DoMozStackWalkBacktrace(aLock, aRegisteredThread, aRegs, aNativeStack);
1584 #  else
1585 #    error "Invalid configuration"
1586 #  endif
1587 }
1588 #endif
1589 
1590 // Writes some components shared by periodic and synchronous profiles to
1591 // ActivePS's ProfileBuffer. (This should only be called from DoSyncSample()
1592 // and DoPeriodicSample().)
1593 //
1594 // The grammar for entry sequences is in a comment above
1595 // ProfileBuffer::StreamSamplesToJSON.
DoSharedSample(PSLockRef aLock,bool aIsSynchronous,RegisteredThread & aRegisteredThread,const Registers & aRegs,uint64_t aSamplePos,ProfileBuffer & aBuffer)1596 static inline void DoSharedSample(PSLockRef aLock, bool aIsSynchronous,
1597                                   RegisteredThread& aRegisteredThread,
1598                                   const Registers& aRegs, uint64_t aSamplePos,
1599                                   ProfileBuffer& aBuffer) {
1600   // WARNING: this function runs within the profiler's "critical section".
1601 
1602   MOZ_ASSERT(!aBuffer.IsThreadSafe(),
1603              "Mutexes cannot be used inside this critical section");
1604 
1605   MOZ_RELEASE_ASSERT(ActivePS::Exists(aLock));
1606 
1607   ProfileBufferCollector collector(aBuffer, aSamplePos);
1608   NativeStack nativeStack;
1609 #if defined(HAVE_NATIVE_UNWIND)
1610   if (ActivePS::FeatureStackWalk(aLock)) {
1611     DoNativeBacktrace(aLock, aRegisteredThread, aRegs, nativeStack);
1612 
1613     MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
1614                 aRegs, nativeStack, collector);
1615   } else
1616 #endif
1617   {
1618     MergeStacks(ActivePS::Features(aLock), aIsSynchronous, aRegisteredThread,
1619                 aRegs, nativeStack, collector);
1620 
1621     // We can't walk the whole native stack, but we can record the top frame.
1622     if (ActivePS::FeatureLeaf(aLock)) {
1623       aBuffer.AddEntry(ProfileBufferEntry::NativeLeafAddr((void*)aRegs.mPC));
1624     }
1625   }
1626 }
1627 
1628 // Writes the components of a synchronous sample to the given ProfileBuffer.
DoSyncSample(PSLockRef aLock,RegisteredThread & aRegisteredThread,const TimeStamp & aNow,const Registers & aRegs,ProfileBuffer & aBuffer)1629 static void DoSyncSample(PSLockRef aLock, RegisteredThread& aRegisteredThread,
1630                          const TimeStamp& aNow, const Registers& aRegs,
1631                          ProfileBuffer& aBuffer) {
1632   // WARNING: this function runs within the profiler's "critical section".
1633 
1634   uint64_t samplePos =
1635       aBuffer.AddThreadIdEntry(aRegisteredThread.Info()->ThreadId());
1636 
1637   TimeDuration delta = aNow - CorePS::ProcessStartTime();
1638   aBuffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
1639 
1640   DoSharedSample(aLock, /* aIsSynchronous = */ true, aRegisteredThread, aRegs,
1641                  samplePos, aBuffer);
1642 }
1643 
1644 // Writes the components of a periodic sample to ActivePS's ProfileBuffer.
1645 // The ThreadId entry is already written in the main ProfileBuffer, its location
1646 // is `aSamplePos`, we can write the rest to `aBuffer` (which may be different).
DoPeriodicSample(PSLockRef aLock,RegisteredThread & aRegisteredThread,ProfiledThreadData & aProfiledThreadData,const Registers & aRegs,uint64_t aSamplePos,ProfileBuffer & aBuffer)1647 static void DoPeriodicSample(PSLockRef aLock,
1648                              RegisteredThread& aRegisteredThread,
1649                              ProfiledThreadData& aProfiledThreadData,
1650                              const Registers& aRegs, uint64_t aSamplePos,
1651                              ProfileBuffer& aBuffer) {
1652   // WARNING: this function runs within the profiler's "critical section".
1653 
1654   DoSharedSample(aLock, /* aIsSynchronous = */ false, aRegisteredThread, aRegs,
1655                  aSamplePos, aBuffer);
1656 }
1657 
1658 // END sampling/unwinding code
1659 ////////////////////////////////////////////////////////////////////////
1660 
1661 ////////////////////////////////////////////////////////////////////////
1662 // BEGIN saving/streaming code
1663 
1664 const static uint64_t kJS_MAX_SAFE_UINTEGER = +9007199254740991ULL;
1665 
SafeJSInteger(uint64_t aValue)1666 static int64_t SafeJSInteger(uint64_t aValue) {
1667   return aValue <= kJS_MAX_SAFE_UINTEGER ? int64_t(aValue) : -1;
1668 }
1669 
AddSharedLibraryInfoToStream(JSONWriter & aWriter,const SharedLibrary & aLib)1670 static void AddSharedLibraryInfoToStream(JSONWriter& aWriter,
1671                                          const SharedLibrary& aLib) {
1672   aWriter.StartObjectElement();
1673   aWriter.IntProperty("start", SafeJSInteger(aLib.GetStart()));
1674   aWriter.IntProperty("end", SafeJSInteger(aLib.GetEnd()));
1675   aWriter.IntProperty("offset", SafeJSInteger(aLib.GetOffset()));
1676   aWriter.StringProperty("name", aLib.GetModuleName().c_str());
1677   aWriter.StringProperty("path", aLib.GetModulePath().c_str());
1678   aWriter.StringProperty("debugName", aLib.GetDebugName().c_str());
1679   aWriter.StringProperty("debugPath", aLib.GetDebugPath().c_str());
1680   aWriter.StringProperty("breakpadId", aLib.GetBreakpadId().c_str());
1681   aWriter.StringProperty("arch", aLib.GetArch().c_str());
1682   aWriter.EndObject();
1683 }
1684 
AppendSharedLibraries(JSONWriter & aWriter)1685 void AppendSharedLibraries(JSONWriter& aWriter) {
1686   SharedLibraryInfo info = SharedLibraryInfo::GetInfoForSelf();
1687   info.SortByAddress();
1688   for (size_t i = 0; i < info.GetSize(); i++) {
1689     AddSharedLibraryInfoToStream(aWriter, info.GetEntry(i));
1690   }
1691 }
1692 
StreamCategories(SpliceableJSONWriter & aWriter)1693 static void StreamCategories(SpliceableJSONWriter& aWriter) {
1694   // Same order as ProfilingCategory. Format:
1695   // [
1696   //   {
1697   //     name: "Idle",
1698   //     color: "transparent",
1699   //     subcategories: ["Other"],
1700   //   },
1701   //   {
1702   //     name: "Other",
1703   //     color: "grey",
1704   //     subcategories: [
1705   //       "JSM loading",
1706   //       "Subprocess launching",
1707   //       "DLL loading"
1708   //     ]
1709   //   },
1710   //   ...
1711   // ]
1712 
1713 #define CATEGORY_JSON_BEGIN_CATEGORY(name, labelAsString, color) \
1714   aWriter.Start();                                               \
1715   aWriter.StringProperty("name", labelAsString);                 \
1716   aWriter.StringProperty("color", color);                        \
1717   aWriter.StartArrayProperty("subcategories");
1718 #define CATEGORY_JSON_SUBCATEGORY(supercategory, name, labelAsString) \
1719   aWriter.StringElement(labelAsString);
1720 #define CATEGORY_JSON_END_CATEGORY \
1721   aWriter.EndArray();              \
1722   aWriter.EndObject();
1723 
1724   BASE_PROFILING_CATEGORY_LIST(CATEGORY_JSON_BEGIN_CATEGORY,
1725                                CATEGORY_JSON_SUBCATEGORY,
1726                                CATEGORY_JSON_END_CATEGORY)
1727 
1728 #undef CATEGORY_JSON_BEGIN_CATEGORY
1729 #undef CATEGORY_JSON_SUBCATEGORY
1730 #undef CATEGORY_JSON_END_CATEGORY
1731 }
1732 
1733 static int64_t MicrosecondsSince1970();
1734 
StreamMetaJSCustomObject(PSLockRef aLock,SpliceableJSONWriter & aWriter,bool aIsShuttingDown)1735 static void StreamMetaJSCustomObject(PSLockRef aLock,
1736                                      SpliceableJSONWriter& aWriter,
1737                                      bool aIsShuttingDown) {
1738   MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
1739 
1740   aWriter.IntProperty("version", 19);
1741 
1742   // The "startTime" field holds the number of milliseconds since midnight
1743   // January 1, 1970 GMT. This grotty code computes (Now - (Now -
1744   // ProcessStartTime)) to convert CorePS::ProcessStartTime() into that form.
1745   TimeDuration delta = TimeStamp::NowUnfuzzed() - CorePS::ProcessStartTime();
1746   aWriter.DoubleProperty(
1747       "startTime", MicrosecondsSince1970() / 1000.0 - delta.ToMilliseconds());
1748 
1749   // Write the shutdownTime field. Unlike startTime, shutdownTime is not an
1750   // absolute time stamp: It's relative to startTime. This is consistent with
1751   // all other (non-"startTime") times anywhere in the profile JSON.
1752   if (aIsShuttingDown) {
1753     aWriter.DoubleProperty("shutdownTime", profiler_time());
1754   } else {
1755     aWriter.NullProperty("shutdownTime");
1756   }
1757 
1758   aWriter.StartArrayProperty("categories");
1759   StreamCategories(aWriter);
1760   aWriter.EndArray();
1761 
1762   if (!CorePS::IsMainThread()) {
1763     // Leave the rest of the properties out if we're not on the main thread.
1764     // At the moment, the only case in which this function is called on a
1765     // background thread is if we're in a content process and are going to
1766     // send this profile to the parent process. In that case, the parent
1767     // process profile's "meta" object already has the rest of the properties,
1768     // and the parent process profile is dumped on that process's main thread.
1769     return;
1770   }
1771 
1772   aWriter.DoubleProperty("interval", ActivePS::Interval(aLock));
1773   aWriter.IntProperty("stackwalk", ActivePS::FeatureStackWalk(aLock));
1774 
1775 #ifdef DEBUG
1776   aWriter.IntProperty("debug", 1);
1777 #else
1778   aWriter.IntProperty("debug", 0);
1779 #endif
1780 
1781   aWriter.IntProperty("gcpoison", 0);
1782 
1783   aWriter.IntProperty("asyncstack", 0);
1784 
1785   aWriter.IntProperty("processType", 0);
1786 }
1787 
StreamPages(PSLockRef aLock,SpliceableJSONWriter & aWriter)1788 static void StreamPages(PSLockRef aLock, SpliceableJSONWriter& aWriter) {
1789   MOZ_RELEASE_ASSERT(CorePS::Exists());
1790   ActivePS::DiscardExpiredPages(aLock);
1791   for (const auto& page : ActivePS::ProfiledPages(aLock)) {
1792     page->StreamJSON(aWriter);
1793   }
1794 }
1795 
locked_profiler_stream_json_for_this_process(PSLockRef aLock,SpliceableJSONWriter & aWriter,double aSinceTime,bool aIsShuttingDown,bool aOnlyThreads=false)1796 static void locked_profiler_stream_json_for_this_process(
1797     PSLockRef aLock, SpliceableJSONWriter& aWriter, double aSinceTime,
1798     bool aIsShuttingDown, bool aOnlyThreads = false) {
1799   LOG("locked_profiler_stream_json_for_this_process");
1800 
1801   MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
1802 
1803   AUTO_PROFILER_STATS(base_locked_profiler_stream_json_for_this_process);
1804 
1805   const double collectionStartMs = profiler_time();
1806 
1807   ProfileBuffer& buffer = ActivePS::Buffer(aLock);
1808 
1809   // If there is a set "Window length", discard older data.
1810   Maybe<double> durationS = ActivePS::Duration(aLock);
1811   if (durationS.isSome()) {
1812     const double durationStartMs = collectionStartMs - *durationS * 1000;
1813     buffer.DiscardSamplesBeforeTime(durationStartMs);
1814   }
1815 
1816   if (!aOnlyThreads) {
1817     // Put shared library info
1818     aWriter.StartArrayProperty("libs");
1819     AppendSharedLibraries(aWriter);
1820     aWriter.EndArray();
1821 
1822     // Put meta data
1823     aWriter.StartObjectProperty("meta");
1824     { StreamMetaJSCustomObject(aLock, aWriter, aIsShuttingDown); }
1825     aWriter.EndObject();
1826 
1827     // Put page data
1828     aWriter.StartArrayProperty("pages");
1829     { StreamPages(aLock, aWriter); }
1830     aWriter.EndArray();
1831 
1832     buffer.StreamProfilerOverheadToJSON(aWriter, CorePS::ProcessStartTime(),
1833                                         aSinceTime);
1834     buffer.StreamCountersToJSON(aWriter, CorePS::ProcessStartTime(),
1835                                 aSinceTime);
1836 
1837     // Lists the samples for each thread profile
1838     aWriter.StartArrayProperty("threads");
1839   }
1840 
1841   // if aOnlyThreads is true, the only output will be the threads array items.
1842   {
1843     ActivePS::DiscardExpiredDeadProfiledThreads(aLock);
1844     Vector<std::pair<RegisteredThread*, ProfiledThreadData*>> threads =
1845         ActivePS::ProfiledThreads(aLock);
1846     for (auto& thread : threads) {
1847       ProfiledThreadData* profiledThreadData = thread.second;
1848       profiledThreadData->StreamJSON(buffer, aWriter,
1849                                      CorePS::ProcessName(aLock),
1850                                      CorePS::ProcessStartTime(), aSinceTime);
1851     }
1852   }
1853 
1854   if (!aOnlyThreads) {
1855     aWriter.EndArray();
1856 
1857     aWriter.StartArrayProperty("pausedRanges");
1858     { buffer.StreamPausedRangesToJSON(aWriter, aSinceTime); }
1859     aWriter.EndArray();
1860   }
1861 
1862   const double collectionEndMs = profiler_time();
1863 
1864   // Record timestamps for the collection into the buffer, so that consumers
1865   // know why we didn't collect any samples for its duration.
1866   // We put these entries into the buffer after we've collected the profile,
1867   // so they'll be visible for the *next* profile collection (if they haven't
1868   // been overwritten due to buffer wraparound by then).
1869   buffer.AddEntry(ProfileBufferEntry::CollectionStart(collectionStartMs));
1870   buffer.AddEntry(ProfileBufferEntry::CollectionEnd(collectionEndMs));
1871 }
1872 
profiler_stream_json_for_this_process(SpliceableJSONWriter & aWriter,double aSinceTime,bool aIsShuttingDown,bool aOnlyThreads)1873 bool profiler_stream_json_for_this_process(SpliceableJSONWriter& aWriter,
1874                                            double aSinceTime,
1875                                            bool aIsShuttingDown,
1876                                            bool aOnlyThreads) {
1877   LOG("profiler_stream_json_for_this_process");
1878 
1879   MOZ_RELEASE_ASSERT(CorePS::Exists());
1880 
1881   PSAutoLock lock;
1882 
1883   if (!ActivePS::Exists(lock)) {
1884     return false;
1885   }
1886 
1887   locked_profiler_stream_json_for_this_process(lock, aWriter, aSinceTime,
1888                                                aIsShuttingDown, aOnlyThreads);
1889   return true;
1890 }
1891 
1892 // END saving/streaming code
1893 ////////////////////////////////////////////////////////////////////////
1894 
FeatureCategory(uint32_t aFeature)1895 static char FeatureCategory(uint32_t aFeature) {
1896   if (aFeature & DefaultFeatures()) {
1897     if (aFeature & AvailableFeatures()) {
1898       return 'D';
1899     }
1900     return 'd';
1901   }
1902 
1903   if (aFeature & StartupExtraDefaultFeatures()) {
1904     if (aFeature & AvailableFeatures()) {
1905       return 'S';
1906     }
1907     return 's';
1908   }
1909 
1910   if (aFeature & AvailableFeatures()) {
1911     return '-';
1912   }
1913   return 'x';
1914 }
1915 
PrintUsageThenExit(int aExitCode)1916 static void PrintUsageThenExit(int aExitCode) {
1917   PrintToConsole(
1918       "\n"
1919       "Profiler environment variable usage:\n"
1920       "\n"
1921       "  MOZ_BASE_PROFILER_HELP\n"
1922       "  If set to any value, prints this message.\n"
1923       "  (Only BaseProfiler features are known here; Use MOZ_PROFILER_HELP\n"
1924       "  for Gecko Profiler help, with more features).\n"
1925       "\n"
1926       "  MOZ_BASE_PROFILER_{,DEBUG_,VERBOSE}LOGGING\n"
1927       "  Enables BaseProfiler logging to stdout. The levels of logging\n"
1928       "  available are MOZ_BASE_PROFILER_LOGGING' (least verbose),\n"
1929       "  '..._DEBUG_LOGGING', '..._VERBOSE_LOGGING' (most verbose)\n"
1930       "\n"
1931       "  MOZ_PROFILER_STARTUP\n"
1932       "  If set to any value other than '' or '0'/'N'/'n', starts the\n"
1933       "  profiler immediately on start-up.\n"
1934       "  Useful if you want profile code that runs very early.\n"
1935       "\n"
1936       "  MOZ_PROFILER_STARTUP_ENTRIES=<%u..%u>\n"
1937       "  If MOZ_PROFILER_STARTUP is set, specifies the number of entries\n"
1938       "  per process in the profiler's circular buffer when the profiler is\n"
1939       "  first started.\n"
1940       "  If unset, the platform default is used:\n"
1941       "  %u entries per process, or %u when MOZ_PROFILER_STARTUP is set.\n"
1942       "  (%u bytes per entry -> %u or %u total bytes per process)\n"
1943       "\n"
1944       "  MOZ_PROFILER_STARTUP_DURATION=<1..>\n"
1945       "  If MOZ_PROFILER_STARTUP is set, specifies the maximum life time\n"
1946       "  of entries in the the profiler's circular buffer when the profiler\n"
1947       "  is first started, in seconds.\n"
1948       "  If unset, the life time of the entries will only be restricted by\n"
1949       "  MOZ_PROFILER_STARTUP_ENTRIES (or its default value), and no\n"
1950       "  additional time duration restriction will be applied.\n"
1951       "\n"
1952       "  MOZ_PROFILER_STARTUP_INTERVAL=<1..1000>\n"
1953       "  If MOZ_PROFILER_STARTUP is set, specifies the sample interval,\n"
1954       "  measured in milliseconds, when the profiler is first started.\n"
1955       "  If unset, the platform default is used.\n"
1956       "\n"
1957       "  MOZ_PROFILER_STARTUP_FEATURES_BITFIELD=<Number>\n"
1958       "  If MOZ_PROFILER_STARTUP is set, specifies the profiling\n"
1959       "  features, as the integer value of the features bitfield.\n"
1960       "  If unset, the value from MOZ_PROFILER_STARTUP_FEATURES is used.\n"
1961       "\n"
1962       "  MOZ_PROFILER_STARTUP_FEATURES=<Features>\n"
1963       "  If MOZ_PROFILER_STARTUP is set, specifies the profiling\n"
1964       "  features, as a comma-separated list of strings.\n"
1965       "  Ignored if MOZ_PROFILER_STARTUP_FEATURES_BITFIELD is set.\n"
1966       "  If unset, the platform default is used.\n"
1967       "\n"
1968       "    Features: (x=unavailable, D/d=default/unavailable,\n"
1969       "               S/s=MOZ_PROFILER_STARTUP extra "
1970       "default/unavailable)\n",
1971       unsigned(ActivePS::scMinimumBufferEntries),
1972       unsigned(ActivePS::scMaximumBufferEntries),
1973       unsigned(BASE_PROFILER_DEFAULT_ENTRIES.Value()),
1974       unsigned(BASE_PROFILER_DEFAULT_STARTUP_ENTRIES.Value()),
1975       unsigned(scBytesPerEntry),
1976       unsigned(BASE_PROFILER_DEFAULT_ENTRIES.Value() * scBytesPerEntry),
1977       unsigned(BASE_PROFILER_DEFAULT_STARTUP_ENTRIES.Value() *
1978                scBytesPerEntry));
1979 
1980 #define PRINT_FEATURE(n_, str_, Name_, desc_)             \
1981   PrintToConsole("    %c %7u: \"%s\" (%s)\n",             \
1982                  FeatureCategory(ProfilerFeature::Name_), \
1983                  ProfilerFeature::Name_, str_, desc_);
1984 
1985   BASE_PROFILER_FOR_EACH_FEATURE(PRINT_FEATURE)
1986 
1987 #undef PRINT_FEATURE
1988 
1989   PrintToConsole(
1990       "    -        \"default\" (All above D+S defaults)\n"
1991       "\n"
1992       "  MOZ_PROFILER_STARTUP_FILTERS=<Filters>\n"
1993       "  If MOZ_PROFILER_STARTUP is set, specifies the thread filters, as "
1994       "a\n"
1995       "  comma-separated list of strings. A given thread will be sampled if\n"
1996       "  any of the filters is a case-insensitive substring of the thread\n"
1997       "  name. If unset, a default is used.\n"
1998       "\n"
1999       "  MOZ_PROFILER_SHUTDOWN\n"
2000       "  If set, the profiler saves a profile to the named file on shutdown.\n"
2001       "\n"
2002       "  MOZ_PROFILER_SYMBOLICATE\n"
2003       "  If set, the profiler will pre-symbolicate profiles.\n"
2004       "  *Note* This will add a significant pause when gathering data, and\n"
2005       "  is intended mainly for local development.\n"
2006       "\n"
2007       "  MOZ_PROFILER_LUL_TEST\n"
2008       "  If set to any value, runs LUL unit tests at startup.\n"
2009       "\n"
2010       "  This platform %s native unwinding.\n"
2011       "\n",
2012 #if defined(HAVE_NATIVE_UNWIND)
2013       "supports"
2014 #else
2015       "does not support"
2016 #endif
2017   );
2018 
2019   exit(aExitCode);
2020 }
2021 
2022 ////////////////////////////////////////////////////////////////////////
2023 // BEGIN Sampler
2024 
2025 #if defined(GP_OS_linux) || defined(GP_OS_android)
2026 struct SigHandlerCoordinator;
2027 #endif
2028 
2029 // Sampler performs setup and teardown of the state required to sample with the
2030 // profiler. Sampler may exist when ActivePS is not present.
2031 //
2032 // SuspendAndSampleAndResumeThread must only be called from a single thread,
2033 // and must not sample the thread it is being called from. A separate Sampler
2034 // instance must be used for each thread which wants to capture samples.
2035 
2036 // WARNING WARNING WARNING WARNING WARNING WARNING WARNING WARNING
2037 //
2038 // With the exception of SamplerThread, all Sampler objects must be Disable-d
2039 // before releasing the lock which was used to create them. This avoids races
2040 // on linux with the SIGPROF signal handler.
2041 
2042 class Sampler {
2043  public:
2044   // Sets up the profiler such that it can begin sampling.
2045   explicit Sampler(PSLockRef aLock);
2046 
2047   // Disable the sampler, restoring it to its previous state. This must be
2048   // called once, and only once, before the Sampler is destroyed.
2049   void Disable(PSLockRef aLock);
2050 
2051   // This method suspends and resumes the samplee thread. It calls the passed-in
2052   // function-like object aProcessRegs (passing it a populated |const
2053   // Registers&| arg) while the samplee thread is suspended.
2054   //
2055   // Func must be a function-like object of type `void()`.
2056   template <typename Func>
2057   void SuspendAndSampleAndResumeThread(
2058       PSLockRef aLock, const RegisteredThread& aRegisteredThread,
2059       const TimeStamp& aNow, const Func& aProcessRegs);
2060 
2061  private:
2062 #if defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
2063   // Used to restore the SIGPROF handler when ours is removed.
2064   struct sigaction mOldSigprofHandler;
2065 
2066   // This process' ID. Needed as an argument for tgkill in
2067   // SuspendAndSampleAndResumeThread.
2068   int mMyPid;
2069 
2070   // The sampler thread's ID.  Used to assert that it is not sampling itself,
2071   // which would lead to deadlock.
2072   int mSamplerTid;
2073 
2074  public:
2075   // This is the one-and-only variable used to communicate between the sampler
2076   // thread and the samplee thread's signal handler. It's static because the
2077   // samplee thread's signal handler is static.
2078   static struct SigHandlerCoordinator* sSigHandlerCoordinator;
2079 #endif
2080 };
2081 
2082 // END Sampler
2083 ////////////////////////////////////////////////////////////////////////
2084 
2085 ////////////////////////////////////////////////////////////////////////
2086 // BEGIN SamplerThread
2087 
2088 // The sampler thread controls sampling and runs whenever the profiler is
2089 // active. It periodically runs through all registered threads, finds those
2090 // that should be sampled, then pauses and samples them.
2091 
2092 class SamplerThread {
2093  public:
2094   // Creates a sampler thread, but doesn't start it.
2095   SamplerThread(PSLockRef aLock, uint32_t aActivityGeneration,
2096                 double aIntervalMilliseconds);
2097   ~SamplerThread();
2098 
2099   // This runs on (is!) the sampler thread.
2100   void Run();
2101 
2102   // This runs on the main thread.
2103   void Stop(PSLockRef aLock);
2104 
2105  private:
2106   // This suspends the calling thread for the given number of microseconds.
2107   // Best effort timing.
2108   void SleepMicro(uint32_t aMicroseconds);
2109 
2110   // The sampler used to suspend and sample threads.
2111   Sampler mSampler;
2112 
2113   // The activity generation, for detecting when the sampler thread must stop.
2114   const uint32_t mActivityGeneration;
2115 
2116   // The interval between samples, measured in microseconds.
2117   const int mIntervalMicroseconds;
2118 
2119   // The OS-specific handle for the sampler thread.
2120 #if defined(GP_OS_windows)
2121   HANDLE mThread;
2122 #elif defined(GP_OS_darwin) || defined(GP_OS_linux) || \
2123     defined(GP_OS_android) || defined(GP_OS_freebsd)
2124   pthread_t mThread;
2125 #endif
2126 
2127   SamplerThread(const SamplerThread&) = delete;
2128   void operator=(const SamplerThread&) = delete;
2129 };
2130 
2131 // This function is required because we need to create a SamplerThread within
2132 // ActivePS's constructor, but SamplerThread is defined after ActivePS. It
2133 // could probably be removed by moving some code around.
NewSamplerThread(PSLockRef aLock,uint32_t aGeneration,double aInterval)2134 static SamplerThread* NewSamplerThread(PSLockRef aLock, uint32_t aGeneration,
2135                                        double aInterval) {
2136   return new SamplerThread(aLock, aGeneration, aInterval);
2137 }
2138 
2139 // This function is the sampler thread.  This implementation is used for all
2140 // targets.
Run()2141 void SamplerThread::Run() {
2142   // TODO: If possible, name this thread later on, after NSPR becomes available.
2143   // PR_SetCurrentThreadName("SamplerThread");
2144 
2145   // Features won't change during this SamplerThread's lifetime, so we can
2146   // determine now whether stack sampling is required.
2147   const bool noStackSampling = []() {
2148     PSAutoLock lock;
2149     if (!ActivePS::Exists(lock)) {
2150       // If there is no active profiler, it doesn't matter what we return,
2151       // because this thread will exit before any stack sampling is attempted.
2152       return false;
2153     }
2154     return ActivePS::FeatureNoStackSampling(lock);
2155   }();
2156 
2157   // Use local BlocksRingBuffer&ProfileBuffer to capture the stack.
2158   // (This is to avoid touching the CorePS::CoreBuffer lock while
2159   // a thread is suspended, because that thread could be working with
2160   // the CorePS::CoreBuffer as well.)
2161   ProfileBufferChunkManagerSingle localChunkManager(scExpectedMaximumStackSize);
2162   ProfileChunkedBuffer localBuffer(
2163       ProfileChunkedBuffer::ThreadSafety::WithoutMutex, localChunkManager);
2164   ProfileBuffer localProfileBuffer(localBuffer);
2165 
2166   // Will be kept between collections, to know what each collection does.
2167   auto previousState = localBuffer.GetState();
2168 
2169   // This will be positive if we are running behind schedule (sampling less
2170   // frequently than desired) and negative if we are ahead of schedule.
2171   TimeDuration lastSleepOvershoot = 0;
2172   TimeStamp sampleStart = TimeStamp::NowUnfuzzed();
2173 
2174   while (true) {
2175     // This scope is for |lock|. It ends before we sleep below.
2176     {
2177       PSAutoLock lock;
2178       TimeStamp lockAcquired = TimeStamp::NowUnfuzzed();
2179 
2180       if (!ActivePS::Exists(lock)) {
2181         return;
2182       }
2183 
2184       // At this point profiler_stop() might have been called, and
2185       // profiler_start() might have been called on another thread. If this
2186       // happens the generation won't match.
2187       if (ActivePS::Generation(lock) != mActivityGeneration) {
2188         return;
2189       }
2190 
2191       ActivePS::ClearExpiredExitProfiles(lock);
2192 
2193       TimeStamp expiredMarkersCleaned = TimeStamp::NowUnfuzzed();
2194 
2195       if (!ActivePS::IsPaused(lock)) {
2196         TimeDuration delta = sampleStart - CorePS::ProcessStartTime();
2197         ProfileBuffer& buffer = ActivePS::Buffer(lock);
2198 
2199         // handle per-process generic counters
2200         const Vector<BaseProfilerCount*>& counters = CorePS::Counters(lock);
2201         for (auto& counter : counters) {
2202           // create Buffer entries for each counter
2203           buffer.AddEntry(ProfileBufferEntry::CounterId(counter));
2204           buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
2205           // XXX support keyed maps of counts
2206           // In the future, we'll support keyed counters - for example, counters
2207           // with a key which is a thread ID. For "simple" counters we'll just
2208           // use a key of 0.
2209           int64_t count;
2210           uint64_t number;
2211           counter->Sample(count, number);
2212           buffer.AddEntry(ProfileBufferEntry::CounterKey(0));
2213           buffer.AddEntry(ProfileBufferEntry::Count(count));
2214           if (number) {
2215             buffer.AddEntry(ProfileBufferEntry::Number(number));
2216           }
2217         }
2218         TimeStamp countersSampled = TimeStamp::NowUnfuzzed();
2219 
2220         if (!noStackSampling) {
2221           const Vector<LiveProfiledThreadData>& liveThreads =
2222               ActivePS::LiveProfiledThreads(lock);
2223 
2224           for (auto& thread : liveThreads) {
2225             RegisteredThread* registeredThread = thread.mRegisteredThread;
2226             ProfiledThreadData* profiledThreadData =
2227                 thread.mProfiledThreadData.get();
2228             RefPtr<ThreadInfo> info = registeredThread->Info();
2229 
2230             // If the thread is asleep and has been sampled before in the same
2231             // sleep episode, find and copy the previous sample, as that's
2232             // cheaper than taking a new sample.
2233             if (registeredThread->RacyRegisteredThread()
2234                     .CanDuplicateLastSampleDueToSleep()) {
2235               bool dup_ok = ActivePS::Buffer(lock).DuplicateLastSample(
2236                   info->ThreadId(), CorePS::ProcessStartTime(),
2237                   profiledThreadData->LastSample());
2238               if (dup_ok) {
2239                 continue;
2240               }
2241             }
2242 
2243             AUTO_PROFILER_STATS(base_SamplerThread_Run_DoPeriodicSample);
2244 
2245             TimeStamp now = TimeStamp::NowUnfuzzed();
2246 
2247             // Add the thread ID now, so we know its position in the main
2248             // buffer, which is used by some JS data. (DoPeriodicSample only
2249             // knows about the temporary local buffer.)
2250             uint64_t samplePos =
2251                 buffer.AddThreadIdEntry(registeredThread->Info()->ThreadId());
2252             profiledThreadData->LastSample() = Some(samplePos);
2253 
2254             // Also add the time, so it's always there after the thread ID, as
2255             // expected by the parser. (Other stack data is optional.)
2256             TimeDuration delta = now - CorePS::ProcessStartTime();
2257             buffer.AddEntry(ProfileBufferEntry::Time(delta.ToMilliseconds()));
2258 
2259             mSampler.SuspendAndSampleAndResumeThread(
2260                 lock, *registeredThread, now,
2261                 [&](const Registers& aRegs, const TimeStamp& aNow) {
2262                   DoPeriodicSample(lock, *registeredThread, *profiledThreadData,
2263                                    aRegs, samplePos, localProfileBuffer);
2264                 });
2265 
2266             // If data is complete, copy it into the global buffer.
2267             auto state = localBuffer.GetState();
2268             if (state.mClearedBlockCount != previousState.mClearedBlockCount) {
2269               LOG("Stack sample too big for local storage, needed %u bytes",
2270                   unsigned(state.mRangeEnd - previousState.mRangeEnd));
2271             } else if (state.mRangeEnd - previousState.mRangeEnd >=
2272                        *CorePS::CoreBuffer().BufferLength()) {
2273               LOG("Stack sample too big for profiler storage, needed %u bytes",
2274                   unsigned(state.mRangeEnd - previousState.mRangeEnd));
2275             } else {
2276               CorePS::CoreBuffer().AppendContents(localBuffer);
2277             }
2278 
2279             // Clean up for the next run.
2280             localBuffer.Clear();
2281             previousState = localBuffer.GetState();
2282           }
2283         }
2284 
2285 #if defined(USE_LUL_STACKWALK)
2286         // The LUL unwind object accumulates frame statistics. Periodically we
2287         // should poke it to give it a chance to print those statistics.  This
2288         // involves doing I/O (fprintf, __android_log_print, etc.) and so
2289         // can't safely be done from the critical section inside
2290         // SuspendAndSampleAndResumeThread, which is why it is done here.
2291         CorePS::Lul(lock)->MaybeShowStats();
2292 #endif
2293         TimeStamp threadsSampled = TimeStamp::NowUnfuzzed();
2294 
2295         {
2296           AUTO_PROFILER_STATS(Sampler_FulfillChunkRequests);
2297           ActivePS::FulfillChunkRequests(lock);
2298         }
2299 
2300         buffer.CollectOverheadStats(delta, lockAcquired - sampleStart,
2301                                     expiredMarkersCleaned - lockAcquired,
2302                                     countersSampled - expiredMarkersCleaned,
2303                                     threadsSampled - countersSampled);
2304       }
2305     }
2306     // gPSMutex is not held after this point.
2307 
2308     // Calculate how long a sleep to request.  After the sleep, measure how
2309     // long we actually slept and take the difference into account when
2310     // calculating the sleep interval for the next iteration.  This is an
2311     // attempt to keep "to schedule" in the presence of inaccuracy of the
2312     // actual sleep intervals.
2313     TimeStamp targetSleepEndTime =
2314         sampleStart + TimeDuration::FromMicroseconds(mIntervalMicroseconds);
2315     TimeStamp beforeSleep = TimeStamp::NowUnfuzzed();
2316     TimeDuration targetSleepDuration = targetSleepEndTime - beforeSleep;
2317     double sleepTime = std::max(
2318         0.0, (targetSleepDuration - lastSleepOvershoot).ToMicroseconds());
2319     SleepMicro(static_cast<uint32_t>(sleepTime));
2320     sampleStart = TimeStamp::NowUnfuzzed();
2321     lastSleepOvershoot =
2322         sampleStart - (beforeSleep + TimeDuration::FromMicroseconds(sleepTime));
2323   }
2324 }
2325 
2326 // Temporary closing namespaces from enclosing platform.cpp.
2327 }  // namespace baseprofiler
2328 }  // namespace mozilla
2329 
2330 // We #include these files directly because it means those files can use
2331 // declarations from this file trivially.  These provide target-specific
2332 // implementations of all SamplerThread methods except Run().
2333 #if defined(GP_OS_windows)
2334 #  include "platform-win32.cpp"
2335 #elif defined(GP_OS_darwin)
2336 #  include "platform-macos.cpp"
2337 #elif defined(GP_OS_linux) || defined(GP_OS_android) || defined(GP_OS_freebsd)
2338 #  include "platform-linux-android.cpp"
2339 #else
2340 #  error "bad platform"
2341 #endif
2342 
2343 namespace mozilla {
2344 namespace baseprofiler {
2345 
AllocPlatformData(int aThreadId)2346 UniquePlatformData AllocPlatformData(int aThreadId) {
2347   return UniquePlatformData(new PlatformData(aThreadId));
2348 }
2349 
operator ()(PlatformData * aData)2350 void PlatformDataDestructor::operator()(PlatformData* aData) { delete aData; }
2351 
2352 // END SamplerThread
2353 ////////////////////////////////////////////////////////////////////////
2354 
2355 ////////////////////////////////////////////////////////////////////////
2356 // BEGIN externally visible functions
2357 
ParseFeature(const char * aFeature,bool aIsStartup)2358 static uint32_t ParseFeature(const char* aFeature, bool aIsStartup) {
2359   if (strcmp(aFeature, "default") == 0) {
2360     return (aIsStartup ? (DefaultFeatures() | StartupExtraDefaultFeatures())
2361                        : DefaultFeatures()) &
2362            AvailableFeatures();
2363   }
2364 
2365 #define PARSE_FEATURE_BIT(n_, str_, Name_, desc_) \
2366   if (strcmp(aFeature, str_) == 0) {              \
2367     return ProfilerFeature::Name_;                \
2368   }
2369 
2370   BASE_PROFILER_FOR_EACH_FEATURE(PARSE_FEATURE_BIT)
2371 
2372 #undef PARSE_FEATURE_BIT
2373 
2374   PrintToConsole("\nUnrecognized feature \"%s\".\n\n", aFeature);
2375   // Since we may have an old feature we don't implement anymore, don't exit
2376   PrintUsageThenExit(0);
2377   return 0;
2378 }
2379 
ParseFeaturesFromStringArray(const char ** aFeatures,uint32_t aFeatureCount,bool aIsStartup)2380 uint32_t ParseFeaturesFromStringArray(const char** aFeatures,
2381                                       uint32_t aFeatureCount,
2382                                       bool aIsStartup /* = false */) {
2383   uint32_t features = 0;
2384   for (size_t i = 0; i < aFeatureCount; i++) {
2385     features |= ParseFeature(aFeatures[i], aIsStartup);
2386   }
2387   return features;
2388 }
2389 
2390 // Find the RegisteredThread for the current thread. This should only be called
2391 // in places where TLSRegisteredThread can't be used.
FindCurrentThreadRegisteredThread(PSLockRef aLock)2392 static RegisteredThread* FindCurrentThreadRegisteredThread(PSLockRef aLock) {
2393   int id = profiler_current_thread_id();
2394   const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
2395       CorePS::RegisteredThreads(aLock);
2396   for (auto& registeredThread : registeredThreads) {
2397     if (registeredThread->Info()->ThreadId() == id) {
2398       return registeredThread.get();
2399     }
2400   }
2401 
2402   return nullptr;
2403 }
2404 
locked_register_thread(PSLockRef aLock,const char * aName,void * aStackTop)2405 static ProfilingStack* locked_register_thread(PSLockRef aLock,
2406                                               const char* aName,
2407                                               void* aStackTop) {
2408   MOZ_RELEASE_ASSERT(CorePS::Exists());
2409 
2410   MOZ_RELEASE_ASSERT(!FindCurrentThreadRegisteredThread(aLock));
2411 
2412   VTUNE_REGISTER_THREAD(aName);
2413 
2414   if (!TLSRegisteredThread::Init(aLock)) {
2415     return nullptr;
2416   }
2417 
2418   RefPtr<ThreadInfo> info = new ThreadInfo(aName, profiler_current_thread_id(),
2419                                            CorePS::IsMainThread());
2420   UniquePtr<RegisteredThread> registeredThread =
2421       MakeUnique<RegisteredThread>(info, aStackTop);
2422 
2423   TLSRegisteredThread::SetRegisteredThread(aLock, registeredThread.get());
2424 
2425   if (ActivePS::Exists(aLock) && ActivePS::ShouldProfileThread(aLock, info)) {
2426     registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
2427     ActivePS::AddLiveProfiledThread(aLock, registeredThread.get(),
2428                                     MakeUnique<ProfiledThreadData>(info));
2429   }
2430 
2431   ProfilingStack* profilingStack =
2432       &registeredThread->RacyRegisteredThread().ProfilingStack();
2433 
2434   CorePS::AppendRegisteredThread(aLock, std::move(registeredThread));
2435 
2436   return profilingStack;
2437 }
2438 
2439 static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
2440                                   double aInterval, uint32_t aFeatures,
2441                                   const char** aFilters, uint32_t aFilterCount,
2442                                   const Maybe<double>& aDuration);
2443 
SplitAtCommas(const char * aString,UniquePtr<char[]> & aStorage)2444 static Vector<const char*> SplitAtCommas(const char* aString,
2445                                          UniquePtr<char[]>& aStorage) {
2446   size_t len = strlen(aString);
2447   aStorage = MakeUnique<char[]>(len + 1);
2448   PodCopy(aStorage.get(), aString, len + 1);
2449 
2450   // Iterate over all characters in aStorage and split at commas, by
2451   // overwriting commas with the null char.
2452   Vector<const char*> array;
2453   size_t currentElementStart = 0;
2454   for (size_t i = 0; i <= len; i++) {
2455     if (aStorage[i] == ',') {
2456       aStorage[i] = '\0';
2457     }
2458     if (aStorage[i] == '\0') {
2459       MOZ_RELEASE_ASSERT(array.append(&aStorage[currentElementStart]));
2460       currentElementStart = i + 1;
2461     }
2462   }
2463   return array;
2464 }
2465 
profiler_init(void * aStackTop)2466 void profiler_init(void* aStackTop) {
2467   LOG("profiler_init");
2468 
2469   VTUNE_INIT();
2470 
2471   MOZ_RELEASE_ASSERT(!CorePS::Exists());
2472 
2473   if (getenv("MOZ_BASE_PROFILER_HELP")) {
2474     PrintUsageThenExit(0);  // terminates execution
2475   }
2476 
2477   SharedLibraryInfo::Initialize();
2478 
2479   uint32_t features = DefaultFeatures() & AvailableFeatures();
2480 
2481   UniquePtr<char[]> filterStorage;
2482 
2483   Vector<const char*> filters;
2484   MOZ_RELEASE_ASSERT(filters.append(kMainThreadName));
2485 
2486   PowerOfTwo32 capacity = BASE_PROFILER_DEFAULT_ENTRIES;
2487   Maybe<double> duration = Nothing();
2488   double interval = BASE_PROFILER_DEFAULT_INTERVAL;
2489 
2490   {
2491     PSAutoLock lock;
2492 
2493     // We've passed the possible failure point. Instantiate CorePS, which
2494     // indicates that the profiler has initialized successfully.
2495     CorePS::Create(lock);
2496 
2497     locked_register_thread(lock, kMainThreadName, aStackTop);
2498 
2499     // Platform-specific initialization.
2500     PlatformInit(lock);
2501 
2502     // (Linux-only) We could create CorePS::mLul and read unwind info into it
2503     // at this point. That would match the lifetime implied by destruction of
2504     // it in profiler_shutdown() just below. However, that gives a big delay on
2505     // startup, even if no profiling is actually to be done. So, instead, it is
2506     // created on demand at the first call to PlatformStart().
2507 
2508     const char* startupEnv = getenv("MOZ_PROFILER_STARTUP");
2509     if (!startupEnv || startupEnv[0] == '\0' ||
2510         ((startupEnv[0] == '0' || startupEnv[0] == 'N' ||
2511           startupEnv[0] == 'n') &&
2512          startupEnv[1] == '\0')) {
2513       return;
2514     }
2515 
2516     LOG("- MOZ_PROFILER_STARTUP is set");
2517 
2518     // Startup default capacity may be different.
2519     capacity = BASE_PROFILER_DEFAULT_STARTUP_ENTRIES;
2520 
2521     const char* startupCapacity = getenv("MOZ_PROFILER_STARTUP_ENTRIES");
2522     if (startupCapacity && startupCapacity[0] != '\0') {
2523       errno = 0;
2524       long capacityLong = strtol(startupCapacity, nullptr, 10);
2525       // `long` could be 32 or 64 bits, so we force a 64-bit comparison with
2526       // the maximum 32-bit signed number (as more than that is clamped down to
2527       // 2^31 anyway).
2528       if (errno == 0 && capacityLong > 0 &&
2529           static_cast<uint64_t>(capacityLong) <=
2530               static_cast<uint64_t>(INT32_MAX)) {
2531         capacity = PowerOfTwo32(ActivePS::ClampToAllowedEntries(
2532             static_cast<uint32_t>(capacityLong)));
2533         LOG("- MOZ_PROFILER_STARTUP_ENTRIES = %u", unsigned(capacity.Value()));
2534       } else {
2535         PrintToConsole("- MOZ_PROFILER_STARTUP_ENTRIES not a valid integer: %s",
2536                        startupCapacity);
2537         PrintUsageThenExit(1);
2538       }
2539     }
2540 
2541     const char* startupDuration = getenv("MOZ_PROFILER_STARTUP_DURATION");
2542     if (startupDuration && startupDuration[0] != '\0') {
2543       // The duration is a floating point number. Use StringToDouble rather than
2544       // strtod, so that "." is used as the decimal separator regardless of OS
2545       // locale.
2546       auto durationVal = StringToDouble(std::string(startupDuration));
2547       if (durationVal && *durationVal >= 0.0) {
2548         if (*durationVal > 0.0) {
2549           duration = Some(*durationVal);
2550         }
2551         LOG("- MOZ_PROFILER_STARTUP_DURATION = %f", *durationVal);
2552       } else {
2553         PrintToConsole("- MOZ_PROFILER_STARTUP_DURATION not a valid float: %s",
2554                        startupDuration);
2555         PrintUsageThenExit(1);
2556       }
2557     }
2558 
2559     const char* startupInterval = getenv("MOZ_PROFILER_STARTUP_INTERVAL");
2560     if (startupInterval && startupInterval[0] != '\0') {
2561       // The interval is a floating point number. Use StringToDouble rather than
2562       // strtod, so that "." is used as the decimal separator regardless of OS
2563       // locale.
2564       auto intervalValue = StringToDouble(MakeStringSpan(startupInterval));
2565       if (intervalValue && *intervalValue > 0.0 && *intervalValue <= 1000.0) {
2566         interval = *intervalValue;
2567         LOG("- MOZ_PROFILER_STARTUP_INTERVAL = %f", interval);
2568       } else {
2569         PrintToConsole("- MOZ_PROFILER_STARTUP_INTERVAL not a valid float: %s",
2570                        startupInterval);
2571         PrintUsageThenExit(1);
2572       }
2573     }
2574 
2575     features |= StartupExtraDefaultFeatures() & AvailableFeatures();
2576 
2577     const char* startupFeaturesBitfield =
2578         getenv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD");
2579     if (startupFeaturesBitfield && startupFeaturesBitfield[0] != '\0') {
2580       errno = 0;
2581       features = strtol(startupFeaturesBitfield, nullptr, 10);
2582       if (errno == 0 && features != 0) {
2583         LOG("- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD = %d", features);
2584       } else {
2585         PrintToConsole(
2586             "- MOZ_PROFILER_STARTUP_FEATURES_BITFIELD not a valid integer: %s",
2587             startupFeaturesBitfield);
2588         PrintUsageThenExit(1);
2589       }
2590     } else {
2591       const char* startupFeatures = getenv("MOZ_PROFILER_STARTUP_FEATURES");
2592       if (startupFeatures && startupFeatures[0] != '\0') {
2593         // Interpret startupFeatures as a list of feature strings, separated by
2594         // commas.
2595         UniquePtr<char[]> featureStringStorage;
2596         Vector<const char*> featureStringArray =
2597             SplitAtCommas(startupFeatures, featureStringStorage);
2598         features = ParseFeaturesFromStringArray(featureStringArray.begin(),
2599                                                 featureStringArray.length(),
2600                                                 /* aIsStartup */ true);
2601         LOG("- MOZ_PROFILER_STARTUP_FEATURES = %d", features);
2602       }
2603     }
2604 
2605     const char* startupFilters = getenv("MOZ_PROFILER_STARTUP_FILTERS");
2606     if (startupFilters && startupFilters[0] != '\0') {
2607       filters = SplitAtCommas(startupFilters, filterStorage);
2608       LOG("- MOZ_PROFILER_STARTUP_FILTERS = %s", startupFilters);
2609     }
2610 
2611     locked_profiler_start(lock, capacity, interval, features, filters.begin(),
2612                           filters.length(), duration);
2613   }
2614 
2615   // TODO: Install memory counter if it is possible from mozglue.
2616   // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
2617   //   // start counting memory allocations (outside of lock because this may
2618   //   call
2619   //   // profiler_add_sampled_counter which would attempt to take the lock.)
2620   //   mozilla::profiler::install_memory_counter(true);
2621   // #endif
2622 }
2623 
2624 static void locked_profiler_save_profile_to_file(PSLockRef aLock,
2625                                                  const char* aFilename,
2626                                                  bool aIsShuttingDown);
2627 
2628 static SamplerThread* locked_profiler_stop(PSLockRef aLock);
2629 
profiler_shutdown()2630 void profiler_shutdown() {
2631   LOG("profiler_shutdown");
2632 
2633   VTUNE_SHUTDOWN();
2634 
2635   MOZ_RELEASE_ASSERT(CorePS::IsMainThread());
2636   MOZ_RELEASE_ASSERT(CorePS::Exists());
2637 
2638   // If the profiler is active we must get a handle to the SamplerThread before
2639   // ActivePS is destroyed, in order to delete it.
2640   SamplerThread* samplerThread = nullptr;
2641   {
2642     PSAutoLock lock;
2643 
2644     // Save the profile on shutdown if requested.
2645     if (ActivePS::Exists(lock)) {
2646       const char* filename = getenv("MOZ_PROFILER_SHUTDOWN");
2647       if (filename) {
2648         locked_profiler_save_profile_to_file(lock, filename,
2649                                              /* aIsShuttingDown */ true);
2650       }
2651 
2652       samplerThread = locked_profiler_stop(lock);
2653     }
2654 
2655     CorePS::Destroy(lock);
2656 
2657     // We just destroyed CorePS and the ThreadInfos it contains, so we can
2658     // clear this thread's TLSRegisteredThread.
2659     TLSRegisteredThread::SetRegisteredThread(lock, nullptr);
2660   }
2661 
2662   // We do these operations with gPSMutex unlocked. The comments in
2663   // profiler_stop() explain why.
2664   if (samplerThread) {
2665     delete samplerThread;
2666   }
2667 }
2668 
WriteProfileToJSONWriter(SpliceableChunkedJSONWriter & aWriter,double aSinceTime,bool aIsShuttingDown,bool aOnlyThreads=false)2669 static bool WriteProfileToJSONWriter(SpliceableChunkedJSONWriter& aWriter,
2670                                      double aSinceTime, bool aIsShuttingDown,
2671                                      bool aOnlyThreads = false) {
2672   LOG("WriteProfileToJSONWriter");
2673 
2674   MOZ_RELEASE_ASSERT(CorePS::Exists());
2675 
2676   if (!aOnlyThreads) {
2677     aWriter.Start();
2678     {
2679       if (!profiler_stream_json_for_this_process(
2680               aWriter, aSinceTime, aIsShuttingDown, aOnlyThreads)) {
2681         return false;
2682       }
2683 
2684       // Don't include profiles from other processes because this is a
2685       // synchronous function.
2686       aWriter.StartArrayProperty("processes");
2687       aWriter.EndArray();
2688     }
2689     aWriter.End();
2690   } else {
2691     aWriter.StartBareList();
2692     if (!profiler_stream_json_for_this_process(aWriter, aSinceTime,
2693                                                aIsShuttingDown, aOnlyThreads)) {
2694       return false;
2695     }
2696     aWriter.EndBareList();
2697   }
2698   return true;
2699 }
2700 
profiler_set_process_name(const std::string & aProcessName)2701 void profiler_set_process_name(const std::string& aProcessName) {
2702   LOG("profiler_set_process_name(\"%s\")", aProcessName.c_str());
2703   PSAutoLock lock;
2704   CorePS::SetProcessName(lock, aProcessName);
2705 }
2706 
profiler_get_profile(double aSinceTime,bool aIsShuttingDown,bool aOnlyThreads)2707 UniquePtr<char[]> profiler_get_profile(double aSinceTime, bool aIsShuttingDown,
2708                                        bool aOnlyThreads) {
2709   LOG("profiler_get_profile");
2710 
2711   SpliceableChunkedJSONWriter b;
2712   if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown, aOnlyThreads)) {
2713     return nullptr;
2714   }
2715   return b.WriteFunc()->CopyData();
2716 }
2717 
profiler_get_profile_json_into_lazily_allocated_buffer(const std::function<char * (size_t)> & aAllocator,double aSinceTime,bool aIsShuttingDown)2718 void profiler_get_profile_json_into_lazily_allocated_buffer(
2719     const std::function<char*(size_t)>& aAllocator, double aSinceTime,
2720     bool aIsShuttingDown) {
2721   LOG("profiler_get_profile_json_into_lazily_allocated_buffer");
2722 
2723   SpliceableChunkedJSONWriter b;
2724   if (!WriteProfileToJSONWriter(b, aSinceTime, aIsShuttingDown)) {
2725     return;
2726   }
2727 
2728   b.WriteFunc()->CopyDataIntoLazilyAllocatedBuffer(aAllocator);
2729 }
2730 
profiler_get_start_params(int * aCapacity,Maybe<double> * aDuration,double * aInterval,uint32_t * aFeatures,Vector<const char * > * aFilters)2731 void profiler_get_start_params(int* aCapacity, Maybe<double>* aDuration,
2732                                double* aInterval, uint32_t* aFeatures,
2733                                Vector<const char*>* aFilters) {
2734   MOZ_RELEASE_ASSERT(CorePS::Exists());
2735 
2736   if (!aCapacity || !aDuration || !aInterval || !aFeatures || !aFilters) {
2737     return;
2738   }
2739 
2740   PSAutoLock lock;
2741 
2742   if (!ActivePS::Exists(lock)) {
2743     *aCapacity = 0;
2744     *aDuration = Nothing();
2745     *aInterval = 0;
2746     *aFeatures = 0;
2747     aFilters->clear();
2748     return;
2749   }
2750 
2751   *aCapacity = ActivePS::Capacity(lock).Value();
2752   *aDuration = ActivePS::Duration(lock);
2753   *aInterval = ActivePS::Interval(lock);
2754   *aFeatures = ActivePS::Features(lock);
2755 
2756   const Vector<std::string>& filters = ActivePS::Filters(lock);
2757   MOZ_ALWAYS_TRUE(aFilters->resize(filters.length()));
2758   for (uint32_t i = 0; i < filters.length(); ++i) {
2759     (*aFilters)[i] = filters[i].c_str();
2760   }
2761 }
2762 
GetProfilerEnvVarsForChildProcess(std::function<void (const char * key,const char * value)> && aSetEnv)2763 void GetProfilerEnvVarsForChildProcess(
2764     std::function<void(const char* key, const char* value)>&& aSetEnv) {
2765   MOZ_RELEASE_ASSERT(CorePS::Exists());
2766 
2767   PSAutoLock lock;
2768 
2769   if (!ActivePS::Exists(lock)) {
2770     aSetEnv("MOZ_PROFILER_STARTUP", "");
2771     return;
2772   }
2773 
2774   aSetEnv("MOZ_PROFILER_STARTUP", "1");
2775   auto capacityString =
2776       Smprintf("%u", unsigned(ActivePS::Capacity(lock).Value()));
2777   aSetEnv("MOZ_PROFILER_STARTUP_ENTRIES", capacityString.get());
2778 
2779   // Use AppendFloat instead of Smprintf with %f because the decimal
2780   // separator used by %f is locale-dependent. But the string we produce needs
2781   // to be parseable by strtod, which only accepts the period character as a
2782   // decimal separator. AppendFloat always uses the period character.
2783   std::string intervalString = std::to_string(ActivePS::Interval(lock));
2784   aSetEnv("MOZ_PROFILER_STARTUP_INTERVAL", intervalString.c_str());
2785 
2786   auto featuresString = Smprintf("%d", ActivePS::Features(lock));
2787   aSetEnv("MOZ_PROFILER_STARTUP_FEATURES_BITFIELD", featuresString.get());
2788 
2789   std::string filtersString;
2790   const Vector<std::string>& filters = ActivePS::Filters(lock);
2791   for (uint32_t i = 0; i < filters.length(); ++i) {
2792     filtersString += filters[i];
2793     if (i != filters.length() - 1) {
2794       filtersString += ",";
2795     }
2796   }
2797   aSetEnv("MOZ_PROFILER_STARTUP_FILTERS", filtersString.c_str());
2798 }
2799 
profiler_received_exit_profile(const std::string & aExitProfile)2800 void profiler_received_exit_profile(const std::string& aExitProfile) {
2801   MOZ_RELEASE_ASSERT(CorePS::Exists());
2802   PSAutoLock lock;
2803   if (!ActivePS::Exists(lock)) {
2804     return;
2805   }
2806   ActivePS::AddExitProfile(lock, aExitProfile);
2807 }
2808 
profiler_move_exit_profiles()2809 Vector<std::string> profiler_move_exit_profiles() {
2810   MOZ_RELEASE_ASSERT(CorePS::Exists());
2811   PSAutoLock lock;
2812   Vector<std::string> profiles;
2813   if (ActivePS::Exists(lock)) {
2814     profiles = ActivePS::MoveExitProfiles(lock);
2815   }
2816   return profiles;
2817 }
2818 
locked_profiler_save_profile_to_file(PSLockRef aLock,const char * aFilename,bool aIsShuttingDown=false)2819 static void locked_profiler_save_profile_to_file(PSLockRef aLock,
2820                                                  const char* aFilename,
2821                                                  bool aIsShuttingDown = false) {
2822   LOG("locked_profiler_save_profile_to_file(%s)", aFilename);
2823 
2824   MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
2825 
2826   std::ofstream stream;
2827   stream.open(aFilename);
2828   if (stream.is_open()) {
2829     SpliceableJSONWriter w(MakeUnique<OStreamJSONWriteFunc>(stream));
2830     w.Start();
2831     {
2832       locked_profiler_stream_json_for_this_process(aLock, w, /* sinceTime */ 0,
2833                                                    aIsShuttingDown);
2834 
2835       w.StartArrayProperty("processes");
2836       Vector<std::string> exitProfiles = ActivePS::MoveExitProfiles(aLock);
2837       for (auto& exitProfile : exitProfiles) {
2838         if (!exitProfile.empty()) {
2839           w.Splice(exitProfile.c_str());
2840         }
2841       }
2842       w.EndArray();
2843     }
2844     w.End();
2845 
2846     stream.close();
2847   }
2848 }
2849 
profiler_save_profile_to_file(const char * aFilename)2850 void profiler_save_profile_to_file(const char* aFilename) {
2851   LOG("profiler_save_profile_to_file(%s)", aFilename);
2852 
2853   MOZ_RELEASE_ASSERT(CorePS::Exists());
2854 
2855   PSAutoLock lock;
2856 
2857   if (!ActivePS::Exists(lock)) {
2858     return;
2859   }
2860 
2861   locked_profiler_save_profile_to_file(lock, aFilename);
2862 }
2863 
profiler_get_available_features()2864 uint32_t profiler_get_available_features() {
2865   MOZ_RELEASE_ASSERT(CorePS::Exists());
2866   return AvailableFeatures();
2867 }
2868 
profiler_get_buffer_info()2869 Maybe<ProfilerBufferInfo> profiler_get_buffer_info() {
2870   MOZ_RELEASE_ASSERT(CorePS::Exists());
2871 
2872   PSAutoLock lock;
2873 
2874   if (!ActivePS::Exists(lock)) {
2875     return Nothing();
2876   }
2877 
2878   return Some(ActivePS::Buffer(lock).GetProfilerBufferInfo());
2879 }
2880 
2881 // This basically duplicates AutoProfilerLabel's constructor.
MozGlueBaseLabelEnter(const char * aLabel,const char * aDynamicString,void * aSp)2882 static void* MozGlueBaseLabelEnter(const char* aLabel,
2883                                    const char* aDynamicString, void* aSp) {
2884   ProfilingStack* profilingStack = AutoProfilerLabel::sProfilingStack.get();
2885   if (profilingStack) {
2886     profilingStack->pushLabelFrame(aLabel, aDynamicString, aSp,
2887                                    ProfilingCategoryPair::OTHER);
2888   }
2889   return profilingStack;
2890 }
2891 
2892 // This basically duplicates AutoProfilerLabel's destructor.
MozGlueBaseLabelExit(void * sProfilingStack)2893 static void MozGlueBaseLabelExit(void* sProfilingStack) {
2894   if (sProfilingStack) {
2895     reinterpret_cast<ProfilingStack*>(sProfilingStack)->pop();
2896   }
2897 }
2898 
locked_profiler_start(PSLockRef aLock,PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,const Maybe<double> & aDuration)2899 static void locked_profiler_start(PSLockRef aLock, PowerOfTwo32 aCapacity,
2900                                   double aInterval, uint32_t aFeatures,
2901                                   const char** aFilters, uint32_t aFilterCount,
2902                                   const Maybe<double>& aDuration) {
2903   if (LOG_TEST) {
2904     LOG("locked_profiler_start");
2905     LOG("- capacity  = %d", int(aCapacity.Value()));
2906     LOG("- duration  = %.2f", aDuration ? *aDuration : -1);
2907     LOG("- interval = %.2f", aInterval);
2908 
2909 #define LOG_FEATURE(n_, str_, Name_, desc_)     \
2910   if (ProfilerFeature::Has##Name_(aFeatures)) { \
2911     LOG("- feature  = %s", str_);               \
2912   }
2913 
2914     BASE_PROFILER_FOR_EACH_FEATURE(LOG_FEATURE)
2915 
2916 #undef LOG_FEATURE
2917 
2918     for (uint32_t i = 0; i < aFilterCount; i++) {
2919       LOG("- threads  = %s", aFilters[i]);
2920     }
2921   }
2922 
2923   MOZ_RELEASE_ASSERT(CorePS::Exists() && !ActivePS::Exists(aLock));
2924 
2925 #if defined(GP_PLAT_amd64_windows)
2926   InitializeWin64ProfilerHooks();
2927 #endif
2928 
2929   // Fall back to the default values if the passed-in values are unreasonable.
2930   // Less than 8192 entries (65536 bytes) may not be enough for the most complex
2931   // stack, so we should be able to store at least one full stack.
2932   // TODO: Review magic numbers.
2933   PowerOfTwo32 capacity =
2934       (aCapacity.Value() >= 8192u) ? aCapacity : BASE_PROFILER_DEFAULT_ENTRIES;
2935   Maybe<double> duration = aDuration;
2936 
2937   if (aDuration && *aDuration <= 0) {
2938     duration = Nothing();
2939   }
2940   double interval = aInterval > 0 ? aInterval : BASE_PROFILER_DEFAULT_INTERVAL;
2941 
2942   ActivePS::Create(aLock, capacity, interval, aFeatures, aFilters, aFilterCount,
2943                    duration);
2944 
2945   // Set up profiling for each registered thread, if appropriate.
2946   const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
2947       CorePS::RegisteredThreads(aLock);
2948   for (auto& registeredThread : registeredThreads) {
2949     RefPtr<ThreadInfo> info = registeredThread->Info();
2950 
2951     if (ActivePS::ShouldProfileThread(aLock, info)) {
2952       registeredThread->RacyRegisteredThread().SetIsBeingProfiled(true);
2953       ActivePS::AddLiveProfiledThread(aLock, registeredThread.get(),
2954                                       MakeUnique<ProfiledThreadData>(info));
2955       registeredThread->RacyRegisteredThread().ReinitializeOnResume();
2956     }
2957   }
2958 
2959   // Setup support for pushing/popping labels in mozglue.
2960   RegisterProfilerLabelEnterExit(MozGlueBaseLabelEnter, MozGlueBaseLabelExit);
2961 
2962   // At the very end, set up RacyFeatures.
2963   RacyFeatures::SetActive(ActivePS::Features(aLock));
2964 }
2965 
profiler_start(PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,const Maybe<double> & aDuration)2966 void profiler_start(PowerOfTwo32 aCapacity, double aInterval,
2967                     uint32_t aFeatures, const char** aFilters,
2968                     uint32_t aFilterCount, const Maybe<double>& aDuration) {
2969   LOG("profiler_start");
2970 
2971   SamplerThread* samplerThread = nullptr;
2972   {
2973     PSAutoLock lock;
2974 
2975     // Initialize if necessary.
2976     if (!CorePS::Exists()) {
2977       profiler_init(nullptr);
2978     }
2979 
2980     // Reset the current state if the profiler is running.
2981     if (ActivePS::Exists(lock)) {
2982       samplerThread = locked_profiler_stop(lock);
2983     }
2984 
2985     locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
2986                           aFilterCount, aDuration);
2987   }
2988 
2989   // TODO: Install memory counter if it is possible from mozglue.
2990   // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
2991   //   // start counting memory allocations (outside of lock because this may
2992   //   call
2993   //   // profiler_add_sampled_counter which would attempt to take the lock.)
2994   //   mozilla::profiler::install_memory_counter(true);
2995   // #endif
2996 
2997   // We do these operations with gPSMutex unlocked. The comments in
2998   // profiler_stop() explain why.
2999   if (samplerThread) {
3000     delete samplerThread;
3001   }
3002 }
3003 
profiler_ensure_started(PowerOfTwo32 aCapacity,double aInterval,uint32_t aFeatures,const char ** aFilters,uint32_t aFilterCount,const Maybe<double> & aDuration)3004 void profiler_ensure_started(PowerOfTwo32 aCapacity, double aInterval,
3005                              uint32_t aFeatures, const char** aFilters,
3006                              uint32_t aFilterCount,
3007                              const Maybe<double>& aDuration) {
3008   LOG("profiler_ensure_started");
3009 
3010   // bool startedProfiler = false; (See TODO below)
3011   SamplerThread* samplerThread = nullptr;
3012   {
3013     PSAutoLock lock;
3014 
3015     // Initialize if necessary.
3016     if (!CorePS::Exists()) {
3017       profiler_init(nullptr);
3018     }
3019 
3020     if (ActivePS::Exists(lock)) {
3021       // The profiler is active.
3022       if (!ActivePS::Equals(lock, aCapacity, aDuration, aInterval, aFeatures,
3023                             aFilters, aFilterCount)) {
3024         // Stop and restart with different settings.
3025         samplerThread = locked_profiler_stop(lock);
3026         locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
3027                               aFilterCount, aDuration);
3028         // startedProfiler = true; (See TODO below)
3029       }
3030     } else {
3031       // The profiler is stopped.
3032       locked_profiler_start(lock, aCapacity, aInterval, aFeatures, aFilters,
3033                             aFilterCount, aDuration);
3034       // startedProfiler = true; (See TODO below)
3035     }
3036   }
3037 
3038   // TODO: Install memory counter if it is possible from mozglue.
3039   // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
3040   //   // start counting memory allocations (outside of lock because this may
3041   //   // call profiler_add_sampled_counter which would attempt to take the
3042   //   // lock.)
3043   //   mozilla::profiler::install_memory_counter(true);
3044   // #endif
3045 
3046   // We do these operations with gPSMutex unlocked. The comments in
3047   // profiler_stop() explain why.
3048   if (samplerThread) {
3049     delete samplerThread;
3050   }
3051 }
3052 
locked_profiler_stop(PSLockRef aLock)3053 [[nodiscard]] static SamplerThread* locked_profiler_stop(PSLockRef aLock) {
3054   LOG("locked_profiler_stop");
3055 
3056   MOZ_RELEASE_ASSERT(CorePS::Exists() && ActivePS::Exists(aLock));
3057 
3058   // At the very start, clear RacyFeatures.
3059   RacyFeatures::SetInactive();
3060 
3061   // TODO: Uninstall memory counter if it is possible from mozglue.
3062   // #if defined(MOZ_REPLACE_MALLOC) && defined(MOZ_PROFILER_MEMORY)
3063   //   mozilla::profiler::install_memory_counter(false);
3064   // #endif
3065 
3066   // Remove support for pushing/popping labels in mozglue.
3067   RegisterProfilerLabelEnterExit(nullptr, nullptr);
3068 
3069   // Stop sampling live threads.
3070   const Vector<LiveProfiledThreadData>& liveProfiledThreads =
3071       ActivePS::LiveProfiledThreads(aLock);
3072   for (auto& thread : liveProfiledThreads) {
3073     RegisteredThread* registeredThread = thread.mRegisteredThread;
3074     registeredThread->RacyRegisteredThread().SetIsBeingProfiled(false);
3075   }
3076 
3077   // The Stop() call doesn't actually stop Run(); that happens in this
3078   // function's caller when the sampler thread is destroyed. Stop() just gives
3079   // the SamplerThread a chance to do some cleanup with gPSMutex locked.
3080   SamplerThread* samplerThread = ActivePS::Destroy(aLock);
3081   samplerThread->Stop(aLock);
3082 
3083   return samplerThread;
3084 }
3085 
profiler_stop()3086 void profiler_stop() {
3087   LOG("profiler_stop");
3088 
3089   MOZ_RELEASE_ASSERT(CorePS::Exists());
3090 
3091   SamplerThread* samplerThread;
3092   {
3093     PSAutoLock lock;
3094 
3095     if (!ActivePS::Exists(lock)) {
3096       return;
3097     }
3098 
3099     samplerThread = locked_profiler_stop(lock);
3100   }
3101 
3102   // We delete with gPSMutex unlocked. Otherwise we would get a deadlock: we
3103   // would be waiting here with gPSMutex locked for SamplerThread::Run() to
3104   // return so the join operation within the destructor can complete, but Run()
3105   // needs to lock gPSMutex to return.
3106   //
3107   // Because this call occurs with gPSMutex unlocked, it -- including the final
3108   // iteration of Run()'s loop -- must be able detect deactivation and return
3109   // in a way that's safe with respect to other gPSMutex-locking operations
3110   // that may have occurred in the meantime.
3111   delete samplerThread;
3112 }
3113 
profiler_is_paused()3114 bool profiler_is_paused() {
3115   MOZ_RELEASE_ASSERT(CorePS::Exists());
3116 
3117   PSAutoLock lock;
3118 
3119   if (!ActivePS::Exists(lock)) {
3120     return false;
3121   }
3122 
3123   return ActivePS::IsPaused(lock);
3124 }
3125 
profiler_pause()3126 void profiler_pause() {
3127   LOG("profiler_pause");
3128 
3129   MOZ_RELEASE_ASSERT(CorePS::Exists());
3130 
3131   {
3132     PSAutoLock lock;
3133 
3134     if (!ActivePS::Exists(lock)) {
3135       return;
3136     }
3137 
3138     RacyFeatures::SetPaused();
3139     ActivePS::SetIsPaused(lock, true);
3140     ActivePS::Buffer(lock).AddEntry(ProfileBufferEntry::Pause(profiler_time()));
3141   }
3142 }
3143 
profiler_resume()3144 void profiler_resume() {
3145   LOG("profiler_resume");
3146 
3147   MOZ_RELEASE_ASSERT(CorePS::Exists());
3148 
3149   {
3150     PSAutoLock lock;
3151 
3152     if (!ActivePS::Exists(lock)) {
3153       return;
3154     }
3155 
3156     ActivePS::Buffer(lock).AddEntry(
3157         ProfileBufferEntry::Resume(profiler_time()));
3158     ActivePS::SetIsPaused(lock, false);
3159     RacyFeatures::SetUnpaused();
3160   }
3161 }
3162 
profiler_feature_active(uint32_t aFeature)3163 bool profiler_feature_active(uint32_t aFeature) {
3164   // This function runs both on and off the main thread.
3165 
3166   MOZ_RELEASE_ASSERT(CorePS::Exists());
3167 
3168   // This function is hot enough that we use RacyFeatures, not ActivePS.
3169   return RacyFeatures::IsActiveWithFeature(aFeature);
3170 }
3171 
profiler_add_sampled_counter(BaseProfilerCount * aCounter)3172 void profiler_add_sampled_counter(BaseProfilerCount* aCounter) {
3173   DEBUG_LOG("profiler_add_sampled_counter(%s)", aCounter->mLabel);
3174   PSAutoLock lock;
3175   CorePS::AppendCounter(lock, aCounter);
3176 }
3177 
profiler_remove_sampled_counter(BaseProfilerCount * aCounter)3178 void profiler_remove_sampled_counter(BaseProfilerCount* aCounter) {
3179   DEBUG_LOG("profiler_remove_sampled_counter(%s)", aCounter->mLabel);
3180   PSAutoLock lock;
3181   // Note: we don't enforce a final sample, though we could do so if the
3182   // profiler was active
3183   CorePS::RemoveCounter(lock, aCounter);
3184 }
3185 
profiler_register_thread(const char * aName,void * aGuessStackTop)3186 ProfilingStack* profiler_register_thread(const char* aName,
3187                                          void* aGuessStackTop) {
3188   DEBUG_LOG("profiler_register_thread(%s)", aName);
3189 
3190   MOZ_RELEASE_ASSERT(CorePS::Exists());
3191 
3192   PSAutoLock lock;
3193 
3194   void* stackTop = GetStackTop(aGuessStackTop);
3195   return locked_register_thread(lock, aName, stackTop);
3196 }
3197 
profiler_unregister_thread()3198 void profiler_unregister_thread() {
3199   if (!CorePS::Exists()) {
3200     // This function can be called after the main thread has already shut down.
3201     return;
3202   }
3203 
3204   PSAutoLock lock;
3205 
3206   RegisteredThread* registeredThread = FindCurrentThreadRegisteredThread(lock);
3207   MOZ_RELEASE_ASSERT(registeredThread ==
3208                      TLSRegisteredThread::RegisteredThread(lock));
3209   if (registeredThread) {
3210     RefPtr<ThreadInfo> info = registeredThread->Info();
3211 
3212     DEBUG_LOG("profiler_unregister_thread: %s", info->Name());
3213 
3214     if (ActivePS::Exists(lock)) {
3215       ActivePS::UnregisterThread(lock, registeredThread);
3216     }
3217 
3218     // Clear the pointer to the RegisteredThread object that we're about to
3219     // destroy.
3220     TLSRegisteredThread::SetRegisteredThread(lock, nullptr);
3221 
3222     // Remove the thread from the list of registered threads. This deletes the
3223     // registeredThread object.
3224     CorePS::RemoveRegisteredThread(lock, registeredThread);
3225   } else {
3226     // There are two ways FindCurrentThreadRegisteredThread() might have failed.
3227     //
3228     // - TLSRegisteredThread::Init() failed in locked_register_thread().
3229     //
3230     // - We've already called profiler_unregister_thread() for this thread.
3231     //   (Whether or not it should, this does happen in practice.)
3232     //
3233     // Either way, TLSRegisteredThread should be empty.
3234     MOZ_RELEASE_ASSERT(!TLSRegisteredThread::RegisteredThread(lock));
3235   }
3236 }
3237 
profiler_register_page(uint64_t aBrowsingContextID,uint64_t aInnerWindowID,const std::string & aUrl,uint64_t aEmbedderInnerWindowID)3238 void profiler_register_page(uint64_t aBrowsingContextID,
3239                             uint64_t aInnerWindowID, const std::string& aUrl,
3240                             uint64_t aEmbedderInnerWindowID) {
3241   DEBUG_LOG("profiler_register_page(%" PRIu64 ", %" PRIu64 ", %s, %" PRIu64 ")",
3242             aBrowsingContextID, aInnerWindowID, aUrl.c_str(),
3243             aEmbedderInnerWindowID);
3244 
3245   MOZ_RELEASE_ASSERT(CorePS::Exists());
3246 
3247   PSAutoLock lock;
3248 
3249   // When a Browsing context is first loaded, the first url loaded in it will be
3250   // about:blank. Because of that, this call keeps the first non-about:blank
3251   // registration of window and discards the previous one.
3252   RefPtr<PageInformation> pageInfo = new PageInformation(
3253       aBrowsingContextID, aInnerWindowID, aUrl, aEmbedderInnerWindowID);
3254   CorePS::AppendRegisteredPage(lock, std::move(pageInfo));
3255 
3256   // After appending the given page to CorePS, look for the expired
3257   // pages and remove them if there are any.
3258   if (ActivePS::Exists(lock)) {
3259     ActivePS::DiscardExpiredPages(lock);
3260   }
3261 }
3262 
profiler_unregister_page(uint64_t aRegisteredInnerWindowID)3263 void profiler_unregister_page(uint64_t aRegisteredInnerWindowID) {
3264   if (!CorePS::Exists()) {
3265     // This function can be called after the main thread has already shut down.
3266     return;
3267   }
3268 
3269   PSAutoLock lock;
3270 
3271   // During unregistration, if the profiler is active, we have to keep the
3272   // page information since there may be some markers associated with the given
3273   // page. But if profiler is not active. we have no reason to keep the
3274   // page information here because there can't be any marker associated with it.
3275   if (ActivePS::Exists(lock)) {
3276     ActivePS::UnregisterPage(lock, aRegisteredInnerWindowID);
3277   } else {
3278     CorePS::RemoveRegisteredPage(lock, aRegisteredInnerWindowID);
3279   }
3280 }
3281 
profiler_clear_all_pages()3282 void profiler_clear_all_pages() {
3283   if (!CorePS::Exists()) {
3284     // This function can be called after the main thread has already shut down.
3285     return;
3286   }
3287 
3288   {
3289     PSAutoLock lock;
3290     CorePS::ClearRegisteredPages(lock);
3291     if (ActivePS::Exists(lock)) {
3292       ActivePS::ClearUnregisteredPages(lock);
3293     }
3294   }
3295 }
3296 
profiler_thread_sleep()3297 void profiler_thread_sleep() {
3298   // This function runs both on and off the main thread.
3299 
3300   MOZ_RELEASE_ASSERT(CorePS::Exists());
3301 
3302   RacyRegisteredThread* racyRegisteredThread =
3303       TLSRegisteredThread::RacyRegisteredThread();
3304   if (!racyRegisteredThread) {
3305     return;
3306   }
3307 
3308   racyRegisteredThread->SetSleeping();
3309 }
3310 
profiler_thread_wake()3311 void profiler_thread_wake() {
3312   // This function runs both on and off the main thread.
3313 
3314   MOZ_RELEASE_ASSERT(CorePS::Exists());
3315 
3316   RacyRegisteredThread* racyRegisteredThread =
3317       TLSRegisteredThread::RacyRegisteredThread();
3318   if (!racyRegisteredThread) {
3319     return;
3320   }
3321 
3322   racyRegisteredThread->SetAwake();
3323 }
3324 
IsThreadBeingProfiled()3325 bool detail::IsThreadBeingProfiled() {
3326   MOZ_RELEASE_ASSERT(CorePS::Exists());
3327 
3328   const RacyRegisteredThread* racyRegisteredThread =
3329       TLSRegisteredThread::RacyRegisteredThread();
3330   return racyRegisteredThread && racyRegisteredThread->IsBeingProfiled();
3331 }
3332 
profiler_thread_is_sleeping()3333 bool profiler_thread_is_sleeping() {
3334   MOZ_RELEASE_ASSERT(CorePS::IsMainThread());
3335   MOZ_RELEASE_ASSERT(CorePS::Exists());
3336 
3337   RacyRegisteredThread* racyRegisteredThread =
3338       TLSRegisteredThread::RacyRegisteredThread();
3339   if (!racyRegisteredThread) {
3340     return false;
3341   }
3342   return racyRegisteredThread->IsSleeping();
3343 }
3344 
profiler_time()3345 double profiler_time() {
3346   MOZ_RELEASE_ASSERT(CorePS::Exists());
3347 
3348   TimeDuration delta = TimeStamp::NowUnfuzzed() - CorePS::ProcessStartTime();
3349   return delta.ToMilliseconds();
3350 }
3351 
profiler_get_backtrace()3352 UniqueProfilerBacktrace profiler_get_backtrace() {
3353   MOZ_RELEASE_ASSERT(CorePS::Exists());
3354 
3355   PSAutoLock lock;
3356 
3357   if (!ActivePS::Exists(lock)) {
3358     return nullptr;
3359   }
3360 
3361   RegisteredThread* registeredThread =
3362       TLSRegisteredThread::RegisteredThread(lock);
3363   if (!registeredThread) {
3364     MOZ_ASSERT(registeredThread);
3365     return nullptr;
3366   }
3367 
3368   int tid = profiler_current_thread_id();
3369 
3370   TimeStamp now = TimeStamp::NowUnfuzzed();
3371 
3372   Registers regs;
3373 #if defined(HAVE_NATIVE_UNWIND)
3374   regs.SyncPopulate();
3375 #else
3376   regs.Clear();
3377 #endif
3378 
3379   auto bufferManager = MakeUnique<ProfileChunkedBuffer>(
3380       ProfileChunkedBuffer::ThreadSafety::WithoutMutex,
3381       MakeUnique<ProfileBufferChunkManagerSingle>(scExpectedMaximumStackSize));
3382   auto buffer = MakeUnique<ProfileBuffer>(*bufferManager);
3383 
3384   DoSyncSample(lock, *registeredThread, now, regs, *buffer.get());
3385 
3386   return UniqueProfilerBacktrace(new ProfilerBacktrace(
3387       "SyncProfile", tid, std::move(bufferManager), std::move(buffer)));
3388 }
3389 
operator ()(ProfilerBacktrace * aBacktrace)3390 void ProfilerBacktraceDestructor::operator()(ProfilerBacktrace* aBacktrace) {
3391   delete aBacktrace;
3392 }
3393 
profiler_is_locked_on_current_thread()3394 bool profiler_is_locked_on_current_thread() {
3395   // This function is used to help users avoid calling `profiler_...` functions
3396   // when the profiler may already have a lock in place, which would prevent a
3397   // 2nd recursive lock (resulting in a crash or a never-ending wait).
3398   // So we must return `true` for any of:
3399   // - The main profiler mutex, used by most functions, and/or
3400   // - The buffer mutex, used directly in some functions without locking the
3401   //   main mutex, e.g., marker-related functions.
3402   return PSAutoLock::IsLockedOnCurrentThread() ||
3403          CorePS::CoreBuffer().IsThreadSafeAndLockedOnCurrentThread();
3404 }
3405 
racy_profiler_add_marker(const char * aMarkerName,ProfilingCategoryPair aCategoryPair,const ProfilerMarkerPayload * aPayload)3406 static void racy_profiler_add_marker(const char* aMarkerName,
3407                                      ProfilingCategoryPair aCategoryPair,
3408                                      const ProfilerMarkerPayload* aPayload) {
3409   MOZ_RELEASE_ASSERT(CorePS::Exists());
3410 
3411   // This function is hot enough that we use RacyFeatures, not ActivePS.
3412   if (!profiler_can_accept_markers()) {
3413     return;
3414   }
3415 
3416   // Note that it's possible that the above test would change again before we
3417   // actually record the marker. Because of this imprecision it's possible to
3418   // miss a marker or record one we shouldn't. Either way is not a big deal.
3419 
3420   RacyRegisteredThread* racyRegisteredThread =
3421       TLSRegisteredThread::RacyRegisteredThread();
3422   if (!racyRegisteredThread || !racyRegisteredThread->IsBeingProfiled()) {
3423     return;
3424   }
3425 
3426   TimeStamp origin = (aPayload && !aPayload->GetStartTime().IsNull())
3427                          ? aPayload->GetStartTime()
3428                          : TimeStamp::NowUnfuzzed();
3429   TimeDuration delta = origin - CorePS::ProcessStartTime();
3430   CorePS::CoreBuffer().PutObjects(
3431       ProfileBufferEntry::Kind::MarkerData, racyRegisteredThread->ThreadId(),
3432       WrapProfileBufferUnownedCString(aMarkerName),
3433       static_cast<uint32_t>(aCategoryPair), aPayload, delta.ToMilliseconds());
3434 }
3435 
profiler_add_marker(const char * aMarkerName,ProfilingCategoryPair aCategoryPair,const ProfilerMarkerPayload & aPayload)3436 void profiler_add_marker(const char* aMarkerName,
3437                          ProfilingCategoryPair aCategoryPair,
3438                          const ProfilerMarkerPayload& aPayload) {
3439   racy_profiler_add_marker(aMarkerName, aCategoryPair, &aPayload);
3440 }
3441 
profiler_add_marker(const char * aMarkerName,ProfilingCategoryPair aCategoryPair)3442 void profiler_add_marker(const char* aMarkerName,
3443                          ProfilingCategoryPair aCategoryPair) {
3444   racy_profiler_add_marker(aMarkerName, aCategoryPair, nullptr);
3445 }
3446 
3447 // This is a simplified version of profiler_add_marker that can be easily passed
3448 // into the JS engine.
profiler_add_js_marker(const char * aMarkerName)3449 void profiler_add_js_marker(const char* aMarkerName) {
3450   AUTO_PROFILER_STATS(base_add_marker);
3451   profiler_add_marker(aMarkerName, ProfilingCategoryPair::JS);
3452 }
3453 
3454 // This logic needs to add a marker for a different thread, so we actually need
3455 // to lock here.
profiler_add_marker_for_thread(int aThreadId,ProfilingCategoryPair aCategoryPair,const char * aMarkerName,const ProfilerMarkerPayload & aPayload)3456 void profiler_add_marker_for_thread(int aThreadId,
3457                                     ProfilingCategoryPair aCategoryPair,
3458                                     const char* aMarkerName,
3459                                     const ProfilerMarkerPayload& aPayload) {
3460   MOZ_RELEASE_ASSERT(CorePS::Exists());
3461 
3462   if (!profiler_can_accept_markers()) {
3463     return;
3464   }
3465 
3466 #ifdef DEBUG
3467   {
3468     PSAutoLock lock;
3469     if (!ActivePS::Exists(lock)) {
3470       return;
3471     }
3472 
3473     // Assert that our thread ID makes sense
3474     bool realThread = false;
3475     const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
3476         CorePS::RegisteredThreads(lock);
3477     for (auto& thread : registeredThreads) {
3478       RefPtr<ThreadInfo> info = thread->Info();
3479       if (info->ThreadId() == aThreadId) {
3480         realThread = true;
3481         break;
3482       }
3483     }
3484     MOZ_ASSERT(realThread, "Invalid thread id");
3485   }
3486 #endif
3487 
3488   // Insert the marker into the buffer
3489   TimeStamp origin = (!aPayload.GetStartTime().IsNull())
3490                          ? aPayload.GetStartTime()
3491                          : TimeStamp::NowUnfuzzed();
3492   TimeDuration delta = origin - CorePS::ProcessStartTime();
3493   CorePS::CoreBuffer().PutObjects(
3494       ProfileBufferEntry::Kind::MarkerData, aThreadId,
3495       WrapProfileBufferUnownedCString(aMarkerName),
3496       static_cast<uint32_t>(aCategoryPair), &aPayload, delta.ToMilliseconds());
3497 }
3498 
profiler_add_marker_for_mainthread(ProfilingCategoryPair aCategoryPair,const char * aMarkerName,const ProfilerMarkerPayload & aPayload)3499 void profiler_add_marker_for_mainthread(ProfilingCategoryPair aCategoryPair,
3500                                         const char* aMarkerName,
3501                                         const ProfilerMarkerPayload& aPayload) {
3502   profiler_add_marker_for_thread(CorePS::MainThreadId(), aCategoryPair,
3503                                  aMarkerName, aPayload);
3504 }
3505 
profiler_tracing_marker(const char * aCategoryString,const char * aMarkerName,ProfilingCategoryPair aCategoryPair,TracingKind aKind,const Maybe<uint64_t> & aInnerWindowID)3506 void profiler_tracing_marker(const char* aCategoryString,
3507                              const char* aMarkerName,
3508                              ProfilingCategoryPair aCategoryPair,
3509                              TracingKind aKind,
3510                              const Maybe<uint64_t>& aInnerWindowID) {
3511   MOZ_RELEASE_ASSERT(CorePS::Exists());
3512 
3513   VTUNE_TRACING(aMarkerName, aKind);
3514 
3515   // This function is hot enough that we use RacyFeatures, notActivePS.
3516   if (!profiler_can_accept_markers()) {
3517     return;
3518   }
3519 
3520   AUTO_PROFILER_STATS(base_add_marker_with_TracingMarkerPayload);
3521   profiler_add_marker(
3522       aMarkerName, aCategoryPair,
3523       TracingMarkerPayload(aCategoryString, aKind, aInnerWindowID));
3524 }
3525 
profiler_tracing_marker(const char * aCategoryString,const char * aMarkerName,ProfilingCategoryPair aCategoryPair,TracingKind aKind,UniqueProfilerBacktrace aCause,const Maybe<uint64_t> & aInnerWindowID)3526 void profiler_tracing_marker(const char* aCategoryString,
3527                              const char* aMarkerName,
3528                              ProfilingCategoryPair aCategoryPair,
3529                              TracingKind aKind, UniqueProfilerBacktrace aCause,
3530                              const Maybe<uint64_t>& aInnerWindowID) {
3531   MOZ_RELEASE_ASSERT(CorePS::Exists());
3532 
3533   VTUNE_TRACING(aMarkerName, aKind);
3534 
3535   // This function is hot enough that we use RacyFeatures, notActivePS.
3536   if (!profiler_can_accept_markers()) {
3537     return;
3538   }
3539 
3540   AUTO_PROFILER_STATS(base_add_marker_with_TracingMarkerPayload);
3541   profiler_add_marker(aMarkerName, aCategoryPair,
3542                       TracingMarkerPayload(aCategoryString, aKind,
3543                                            aInnerWindowID, std::move(aCause)));
3544 }
3545 
profiler_add_text_marker(const char * aMarkerName,const std::string & aText,ProfilingCategoryPair aCategoryPair,const TimeStamp & aStartTime,const TimeStamp & aEndTime,const Maybe<uint64_t> & aInnerWindowID,UniqueProfilerBacktrace aCause)3546 void profiler_add_text_marker(const char* aMarkerName, const std::string& aText,
3547                               ProfilingCategoryPair aCategoryPair,
3548                               const TimeStamp& aStartTime,
3549                               const TimeStamp& aEndTime,
3550                               const Maybe<uint64_t>& aInnerWindowID,
3551                               UniqueProfilerBacktrace aCause) {
3552   AUTO_PROFILER_STATS(base_add_marker_with_TextMarkerPayload);
3553   profiler_add_marker(aMarkerName, aCategoryPair,
3554                       TextMarkerPayload(aText, aStartTime, aEndTime,
3555                                         aInnerWindowID, std::move(aCause)));
3556 }
3557 
3558 // NOTE: aCollector's methods will be called while the target thread is paused.
3559 // Doing things in those methods like allocating -- which may try to claim
3560 // locks -- is a surefire way to deadlock.
profiler_suspend_and_sample_thread(int aThreadId,uint32_t aFeatures,ProfilerStackCollector & aCollector,bool aSampleNative)3561 void profiler_suspend_and_sample_thread(int aThreadId, uint32_t aFeatures,
3562                                         ProfilerStackCollector& aCollector,
3563                                         bool aSampleNative /* = true */) {
3564   // Lock the profiler mutex
3565   PSAutoLock lock;
3566 
3567   const Vector<UniquePtr<RegisteredThread>>& registeredThreads =
3568       CorePS::RegisteredThreads(lock);
3569   for (auto& thread : registeredThreads) {
3570     RefPtr<ThreadInfo> info = thread->Info();
3571     RegisteredThread& registeredThread = *thread.get();
3572 
3573     if (info->ThreadId() == aThreadId) {
3574       if (info->IsMainThread()) {
3575         aCollector.SetIsMainThread();
3576       }
3577 
3578       // Allocate the space for the native stack
3579       NativeStack nativeStack;
3580 
3581       // Suspend, sample, and then resume the target thread.
3582       Sampler sampler(lock);
3583       TimeStamp now = TimeStamp::NowUnfuzzed();
3584       sampler.SuspendAndSampleAndResumeThread(
3585           lock, registeredThread, now,
3586           [&](const Registers& aRegs, const TimeStamp& aNow) {
3587             // The target thread is now suspended. Collect a native
3588             // backtrace, and call the callback.
3589             bool isSynchronous = false;
3590 #if defined(HAVE_FASTINIT_NATIVE_UNWIND)
3591             if (aSampleNative) {
3592           // We can only use FramePointerStackWalk or MozStackWalk from
3593           // suspend_and_sample_thread as other stackwalking methods may not be
3594           // initialized.
3595 #  if defined(USE_FRAME_POINTER_STACK_WALK)
3596               DoFramePointerBacktrace(lock, registeredThread, aRegs,
3597                                       nativeStack);
3598 #  elif defined(USE_MOZ_STACK_WALK)
3599               DoMozStackWalkBacktrace(lock, registeredThread, aRegs,
3600                                       nativeStack);
3601 #  else
3602 #    error "Invalid configuration"
3603 #  endif
3604 
3605               MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs,
3606                           nativeStack, aCollector);
3607             } else
3608 #endif
3609             {
3610               MergeStacks(aFeatures, isSynchronous, registeredThread, aRegs,
3611                           nativeStack, aCollector);
3612 
3613               if (ProfilerFeature::HasLeaf(aFeatures)) {
3614                 aCollector.CollectNativeLeafAddr((void*)aRegs.mPC);
3615               }
3616             }
3617           });
3618 
3619       // NOTE: Make sure to disable the sampler before it is destroyed, in case
3620       // the profiler is running at the same time.
3621       sampler.Disable(lock);
3622       break;
3623     }
3624   }
3625 }
3626 
3627 // END externally visible functions
3628 ////////////////////////////////////////////////////////////////////////
3629 
3630 }  // namespace baseprofiler
3631 }  // namespace mozilla
3632