1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "mozilla/BackgroundHangMonitor.h"
8 
9 #include <utility>
10 
11 #include "GeckoProfiler.h"
12 #include "HangDetails.h"
13 #include "ThreadStackHelper.h"
14 #include "mozilla/ArrayUtils.h"
15 #include "mozilla/CPUUsageWatcher.h"
16 #include "mozilla/LinkedList.h"
17 #include "mozilla/Monitor.h"
18 #include "mozilla/Preferences.h"
19 #include "mozilla/StaticPrefs_toolkit.h"
20 #include "mozilla/Services.h"
21 #include "mozilla/StaticPtr.h"
22 #include "mozilla/Telemetry.h"
23 #include "mozilla/ThreadLocal.h"
24 #include "mozilla/Unused.h"
25 #include "mozilla/dom/RemoteType.h"
26 #include "nsAppDirectoryServiceDefs.h"
27 #include "nsIObserver.h"
28 #include "nsIObserverService.h"
29 #include "nsIThread.h"
30 #include "nsThreadUtils.h"
31 #include "nsXULAppAPI.h"
32 #include "prinrval.h"
33 #include "prthread.h"
34 
35 #include <algorithm>
36 
37 // Activate BHR only for one every BHR_BETA_MOD users.
38 // We're doing experimentation with collecting a lot more data from BHR, and
39 // don't want to enable it for beta users at the moment. We can scale this up in
40 // the future.
41 #define BHR_BETA_MOD INT32_MAX;
42 
43 // Maximum depth of the call stack in the reported thread hangs. This value
44 // represents the 99.9th percentile of the thread hangs stack depths reported by
45 // Telemetry.
46 static const size_t kMaxThreadHangStackDepth = 30;
47 
48 // Interval at which we check the global and per-process CPU usage in order to
49 // determine if there is high external CPU usage.
50 static const int32_t kCheckCPUIntervalMilliseconds = 2000;
51 
52 // An utility comparator function used by std::unique to collapse "(* script)"
53 // entries in a vector representing a call stack.
StackScriptEntriesCollapser(const char * aStackEntry,const char * aAnotherStackEntry)54 bool StackScriptEntriesCollapser(const char* aStackEntry,
55                                  const char* aAnotherStackEntry) {
56   return !strcmp(aStackEntry, aAnotherStackEntry) &&
57          (!strcmp(aStackEntry, "(chrome script)") ||
58           !strcmp(aStackEntry, "(content script)"));
59 }
60 
61 namespace mozilla {
62 
63 /**
64  * BackgroundHangManager is the global object that
65  * manages all instances of BackgroundHangThread.
66  */
67 class BackgroundHangManager : public nsIObserver {
68  private:
69   // Background hang monitor thread function
MonitorThread(void * aData)70   static void MonitorThread(void* aData) {
71     AUTO_PROFILER_REGISTER_THREAD("BgHangMonitor");
72     NS_SetCurrentThreadName("BHMgr Monitor");
73 
74     /* We do not hold a reference to BackgroundHangManager here
75        because the monitor thread only exists as long as the
76        BackgroundHangManager instance exists. We stop the monitor
77        thread in the BackgroundHangManager destructor, and we can
78        only get to the destructor if we don't hold a reference here. */
79     static_cast<BackgroundHangManager*>(aData)->RunMonitorThread();
80   }
81 
82   // Hang monitor thread
83   PRThread* mHangMonitorThread;
84   // Stop hang monitoring
85   bool mShutdown;
86 
87   BackgroundHangManager(const BackgroundHangManager&);
88   BackgroundHangManager& operator=(const BackgroundHangManager&);
89   void RunMonitorThread();
90 
91  public:
92   NS_DECL_THREADSAFE_ISUPPORTS
93   NS_DECL_NSIOBSERVER
94   static StaticRefPtr<BackgroundHangManager> sInstance;
95   static bool sDisabled;
96 
97   // Lock for access to members of this class
98   Monitor mLock;
99   // Current time as seen by hang monitors
100   TimeStamp mNow;
101   // List of BackgroundHangThread instances associated with each thread
102   LinkedList<BackgroundHangThread> mHangThreads;
103 
104   // Unwinding and reporting of hangs is despatched to this thread.
105   nsCOMPtr<nsIThread> mHangProcessingThread;
106 
107   // Used for recording a permahang in case we don't ever make it back to
108   // the main thread to record/send it.
109   nsCOMPtr<nsIFile> mPermahangFile;
110 
111   // Allows us to watch CPU usage and annotate hangs when the system is
112   // under high external load.
113   CPUUsageWatcher mCPUUsageWatcher;
114 
Shutdown()115   void Shutdown() {
116     MonitorAutoLock autoLock(mLock);
117     mShutdown = true;
118     autoLock.Notify();
119   }
120 
121   // Attempt to wakeup the hang monitor thread.
Wakeup()122   void Wakeup() {
123     mLock.AssertCurrentThreadOwns();
124     mLock.NotifyAll();
125   }
126 
127   BackgroundHangManager();
128 
129  private:
130   virtual ~BackgroundHangManager();
131 };
132 
NS_IMPL_ISUPPORTS(BackgroundHangManager,nsIObserver)133 NS_IMPL_ISUPPORTS(BackgroundHangManager, nsIObserver)
134 
135 NS_IMETHODIMP
136 BackgroundHangManager::Observe(nsISupports* aSubject, const char* aTopic,
137                                const char16_t* aData) {
138   if (!strcmp(aTopic, "browser-delayed-startup-finished")) {
139     MonitorAutoLock autoLock(mLock);
140     nsresult rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_50_DIR,
141                                          getter_AddRefs(mPermahangFile));
142     if (NS_SUCCEEDED(rv)) {
143       mPermahangFile->AppendNative("last_permahang.bin"_ns);
144     } else {
145       mPermahangFile = nullptr;
146     }
147 
148     if (mHangProcessingThread && mPermahangFile) {
149       nsCOMPtr<nsIRunnable> submitRunnable =
150           new SubmitPersistedPermahangRunnable(mPermahangFile);
151       mHangProcessingThread->Dispatch(submitRunnable.forget());
152     }
153     nsCOMPtr<nsIObserverService> observerService =
154         mozilla::services::GetObserverService();
155     MOZ_ASSERT(observerService);
156     observerService->RemoveObserver(BackgroundHangManager::sInstance,
157                                     "browser-delayed-startup-finished");
158   } else if (!strcmp(aTopic, "profile-after-change")) {
159     BackgroundHangMonitor::DisableOnBeta();
160     nsCOMPtr<nsIObserverService> observerService =
161         mozilla::services::GetObserverService();
162     MOZ_ASSERT(observerService);
163     observerService->RemoveObserver(BackgroundHangManager::sInstance,
164                                     "profile-after-change");
165   } else {
166     return NS_ERROR_UNEXPECTED;
167   }
168 
169   return NS_OK;
170 }
171 
172 /**
173  * BackgroundHangThread is a per-thread object that is used
174  * by all instances of BackgroundHangMonitor to monitor hangs.
175  */
176 class BackgroundHangThread : public LinkedListElement<BackgroundHangThread> {
177  private:
178   static MOZ_THREAD_LOCAL(BackgroundHangThread*) sTlsKey;
179   static bool sTlsKeyInitialized;
180 
181   BackgroundHangThread(const BackgroundHangThread&);
182   BackgroundHangThread& operator=(const BackgroundHangThread&);
183   ~BackgroundHangThread();
184 
185   /* Keep a reference to the manager, so we can keep going even
186      after BackgroundHangManager::Shutdown is called. */
187   const RefPtr<BackgroundHangManager> mManager;
188   // Unique thread ID for identification
189   const PRThread* mThreadID;
190 
191   void Update();
192 
193  public:
194   NS_INLINE_DECL_REFCOUNTING(BackgroundHangThread)
195   /**
196    * Returns the BackgroundHangThread associated with the
197    * running thread. Note that this will not find private
198    * BackgroundHangThread threads.
199    *
200    * @return BackgroundHangThread*, or nullptr if no thread
201    *         is found.
202    */
203   static BackgroundHangThread* FindThread();
204 
Startup()205   static void Startup() {
206     /* We can tolerate init() failing. */
207     sTlsKeyInitialized = sTlsKey.init();
208   }
209 
210   // Hang timeout
211   const TimeDuration mTimeout;
212   // PermaHang timeout
213   const TimeDuration mMaxTimeout;
214   // Time at last activity
215   TimeStamp mLastActivity;
216   // Time when a hang started
217   TimeStamp mHangStart;
218   // Is the thread in a hang
219   bool mHanging;
220   // Is the thread in a waiting state
221   bool mWaiting;
222   // Is the thread dedicated to a single BackgroundHangMonitor
223   BackgroundHangMonitor::ThreadType mThreadType;
224 #ifdef MOZ_GECKO_PROFILER
225   // Platform-specific helper to get hang stacks
226   ThreadStackHelper mStackHelper;
227 #endif
228   // Stack of current hang
229   HangStack mHangStack;
230   // Annotations for the current hang
231   BackgroundHangAnnotations mAnnotations;
232   // Annotators registered for this thread
233   BackgroundHangAnnotators mAnnotators;
234   // The name of the runnable which is hanging the current process
235   nsCString mRunnableName;
236   // The name of the thread which is being monitored
237   nsCString mThreadName;
238 
239   BackgroundHangThread(const char* aName, uint32_t aTimeoutMs,
240                        uint32_t aMaxTimeoutMs,
241                        BackgroundHangMonitor::ThreadType aThreadType =
242                            BackgroundHangMonitor::THREAD_SHARED);
243 
244   // Report a hang; aManager->mLock IS locked. The hang will be processed
245   // off-main-thread, and will then be submitted back.
246   void ReportHang(TimeDuration aHangTime,
247                   PersistedToDisk aPersistedToDisk = PersistedToDisk::No);
248   // Report a permanent hang; aManager->mLock IS locked
249   void ReportPermaHang();
250   // Called by BackgroundHangMonitor::NotifyActivity
NotifyActivity()251   void NotifyActivity() {
252     MonitorAutoLock autoLock(mManager->mLock);
253     Update();
254   }
255   // Called by BackgroundHangMonitor::NotifyWait
NotifyWait()256   void NotifyWait() {
257     MonitorAutoLock autoLock(mManager->mLock);
258 
259     if (mWaiting) {
260       return;
261     }
262 
263     Update();
264     if (mHanging) {
265       // We were hanging! We're done with that now, so let's report it.
266       // ReportHang() doesn't do much work on the current thread, and is
267       // safe to call from any thread as long as we're holding the lock.
268       ReportHang(mLastActivity - mHangStart);
269       mHanging = false;
270     }
271     mWaiting = true;
272   }
273 
274   // Returns true if this thread is (or might be) shared between other
275   // BackgroundHangMonitors for the monitored thread.
IsShared()276   bool IsShared() {
277     return mThreadType == BackgroundHangMonitor::THREAD_SHARED;
278   }
279 };
280 
281 StaticRefPtr<BackgroundHangManager> BackgroundHangManager::sInstance;
282 bool BackgroundHangManager::sDisabled = false;
283 
284 MOZ_THREAD_LOCAL(BackgroundHangThread*) BackgroundHangThread::sTlsKey;
285 bool BackgroundHangThread::sTlsKeyInitialized;
286 
BackgroundHangManager()287 BackgroundHangManager::BackgroundHangManager()
288     : mShutdown(false), mLock("BackgroundHangManager") {
289   // Lock so we don't race against the new monitor thread
290   MonitorAutoLock autoLock(mLock);
291 
292   mHangMonitorThread = PR_CreateThread(
293       PR_USER_THREAD, MonitorThread, this, PR_PRIORITY_LOW, PR_GLOBAL_THREAD,
294       PR_JOINABLE_THREAD, nsIThreadManager::DEFAULT_STACK_SIZE);
295 
296   MOZ_ASSERT(mHangMonitorThread, "Failed to create BHR monitor thread");
297 
298   DebugOnly<nsresult> rv = NS_NewNamedThread(
299       "BHMgr Processor", getter_AddRefs(mHangProcessingThread));
300   MOZ_ASSERT(NS_SUCCEEDED(rv) && mHangProcessingThread,
301              "Failed to create BHR processing thread");
302 }
303 
~BackgroundHangManager()304 BackgroundHangManager::~BackgroundHangManager() {
305   MOZ_ASSERT(mShutdown, "Destruction without Shutdown call");
306   MOZ_ASSERT(mHangThreads.isEmpty(), "Destruction with outstanding monitors");
307   MOZ_ASSERT(mHangMonitorThread, "No monitor thread");
308   MOZ_ASSERT(mHangProcessingThread, "No processing thread");
309 
310   // PR_CreateThread could have failed above due to resource limitation
311   if (mHangMonitorThread) {
312     // The monitor thread can only live as long as the instance lives
313     PR_JoinThread(mHangMonitorThread);
314   }
315 
316   // Similarly, NS_NewNamedThread above could have failed.
317   if (mHangProcessingThread) {
318     mHangProcessingThread->Shutdown();
319   }
320 }
321 
RunMonitorThread()322 void BackgroundHangManager::RunMonitorThread() {
323   // Keep us locked except when waiting
324   MonitorAutoLock autoLock(mLock);
325 
326   /* mNow is updated at various intervals determined by waitTime.
327      However, if an update latency is too long (due to CPU scheduling, system
328      sleep, etc.), we don't update mNow at all. This is done so that
329      long latencies in our timing are not detected as hangs. systemTime is
330      used to track TimeStamp::Now() and determine our latency. */
331 
332   TimeStamp systemTime = TimeStamp::Now();
333   // Default values for the first iteration of thread loop
334   TimeDuration waitTime;
335   TimeDuration recheckTimeout;
336   TimeStamp lastCheckedCPUUsage = systemTime;
337   TimeDuration checkCPUUsageInterval =
338       TimeDuration::FromMilliseconds(kCheckCPUIntervalMilliseconds);
339 
340   while (!mShutdown) {
341     autoLock.Wait(waitTime);
342 
343     TimeStamp newTime = TimeStamp::Now();
344     TimeDuration systemInterval = newTime - systemTime;
345     systemTime = newTime;
346 
347     if (systemTime - lastCheckedCPUUsage > checkCPUUsageInterval) {
348       Unused << NS_WARN_IF(mCPUUsageWatcher.CollectCPUUsage().isErr());
349       lastCheckedCPUUsage = systemTime;
350     }
351 
352     /* waitTime is a quarter of the shortest timeout value; If our timing
353        latency is low enough (less than half the shortest timeout value),
354        we can update mNow. */
355     if (MOZ_LIKELY(waitTime != TimeDuration::Forever() &&
356                    systemInterval < waitTime * 2)) {
357       mNow += systemInterval;
358     }
359 
360     /* If it's before the next recheck timeout, and our wait did not get
361        interrupted, we can keep the current waitTime and skip iterating
362        through hang monitors. */
363     if (MOZ_LIKELY(systemInterval < recheckTimeout &&
364                    systemInterval >= waitTime)) {
365       recheckTimeout -= systemInterval;
366       continue;
367     }
368 
369     /* We are in one of the following scenarios,
370      - Hang or permahang recheck timeout
371      - Thread added/removed
372      - Thread wait or hang ended
373        In all cases, we want to go through our list of hang
374        monitors and update waitTime and recheckTimeout. */
375     waitTime = TimeDuration::Forever();
376     recheckTimeout = TimeDuration::Forever();
377 
378     // Locally hold mNow
379     TimeStamp now = mNow;
380 
381     // iterate through hang monitors
382     for (BackgroundHangThread* currentThread = mHangThreads.getFirst();
383          currentThread; currentThread = currentThread->getNext()) {
384       if (currentThread->mWaiting) {
385         // Thread is waiting, not hanging
386         continue;
387       }
388       TimeStamp lastActivity = currentThread->mLastActivity;
389       TimeDuration hangTime = now - lastActivity;
390       if (MOZ_UNLIKELY(hangTime >= currentThread->mMaxTimeout)) {
391         // A permahang started
392         // Skip subsequent iterations and tolerate a race on mWaiting here
393         currentThread->mWaiting = true;
394         currentThread->mHanging = false;
395         currentThread->ReportPermaHang();
396         continue;
397       }
398 
399       if (MOZ_LIKELY(!currentThread->mHanging)) {
400         if (MOZ_UNLIKELY(hangTime >= currentThread->mTimeout)) {
401 #ifdef MOZ_GECKO_PROFILER
402           // A hang started, collect a stack
403           currentThread->mStackHelper.GetStack(
404               currentThread->mHangStack, currentThread->mRunnableName, true);
405 #endif
406 
407           // If we hang immediately on waking, then the most recently collected
408           // CPU usage is going to be an average across the whole time we were
409           // sleeping. Accordingly, we want to make sure that when we hang, we
410           // collect a fresh value.
411           if (systemTime != lastCheckedCPUUsage) {
412             Unused << NS_WARN_IF(mCPUUsageWatcher.CollectCPUUsage().isErr());
413             lastCheckedCPUUsage = systemTime;
414           }
415 
416           currentThread->mHangStart = lastActivity;
417           currentThread->mHanging = true;
418           currentThread->mAnnotations =
419               currentThread->mAnnotators.GatherAnnotations();
420         }
421       } else {
422         if (MOZ_LIKELY(lastActivity != currentThread->mHangStart)) {
423           // A hang ended
424           currentThread->ReportHang(now - currentThread->mHangStart);
425           currentThread->mHanging = false;
426         }
427       }
428 
429       /* If we are hanging, the next time we check for hang status is when
430          the hang turns into a permahang. If we're not hanging, the next
431          recheck timeout is when we may be entering a hang. */
432       TimeDuration nextRecheck;
433       if (currentThread->mHanging) {
434         nextRecheck = currentThread->mMaxTimeout;
435       } else {
436         nextRecheck = currentThread->mTimeout;
437       }
438       recheckTimeout =
439           TimeDuration::Min(recheckTimeout, nextRecheck - hangTime);
440 
441       if (currentThread->mTimeout != TimeDuration::Forever()) {
442         /* We wait for a quarter of the shortest timeout
443            value to give mNow enough granularity. */
444         waitTime =
445             TimeDuration::Min(waitTime, currentThread->mTimeout / (int64_t)4);
446       }
447     }
448   }
449 
450   /* We are shutting down now.
451      Wait for all outstanding monitors to unregister. */
452   while (!mHangThreads.isEmpty()) {
453     autoLock.Wait();
454   }
455 }
456 
BackgroundHangThread(const char * aName,uint32_t aTimeoutMs,uint32_t aMaxTimeoutMs,BackgroundHangMonitor::ThreadType aThreadType)457 BackgroundHangThread::BackgroundHangThread(
458     const char* aName, uint32_t aTimeoutMs, uint32_t aMaxTimeoutMs,
459     BackgroundHangMonitor::ThreadType aThreadType)
460     : mManager(BackgroundHangManager::sInstance),
461       mThreadID(PR_GetCurrentThread()),
462       mTimeout(aTimeoutMs == BackgroundHangMonitor::kNoTimeout
463                    ? TimeDuration::Forever()
464                    : TimeDuration::FromMilliseconds(aTimeoutMs)),
465       mMaxTimeout(aMaxTimeoutMs == BackgroundHangMonitor::kNoTimeout
466                       ? TimeDuration::Forever()
467                       : TimeDuration::FromMilliseconds(aMaxTimeoutMs)),
468       mLastActivity(mManager->mNow),
469       mHangStart(mLastActivity),
470       mHanging(false),
471       mWaiting(true),
472       mThreadType(aThreadType),
473       mThreadName(aName) {
474   if (sTlsKeyInitialized && IsShared()) {
475     sTlsKey.set(this);
476   }
477   // Lock here because LinkedList is not thread-safe
478   MonitorAutoLock autoLock(mManager->mLock);
479   // Add to thread list
480   mManager->mHangThreads.insertBack(this);
481   // Wake up monitor thread to process new thread
482   autoLock.Notify();
483 }
484 
~BackgroundHangThread()485 BackgroundHangThread::~BackgroundHangThread() {
486   // Lock here because LinkedList is not thread-safe
487   MonitorAutoLock autoLock(mManager->mLock);
488   // Remove from thread list
489   remove();
490   // Wake up monitor thread to process removed thread
491   autoLock.Notify();
492 
493   // We no longer have a thread
494   if (sTlsKeyInitialized && IsShared()) {
495     sTlsKey.set(nullptr);
496   }
497 }
498 
ReportHang(TimeDuration aHangTime,PersistedToDisk aPersistedToDisk)499 void BackgroundHangThread::ReportHang(TimeDuration aHangTime,
500                                       PersistedToDisk aPersistedToDisk) {
501   // Recovered from a hang; called on the monitor thread
502   // mManager->mLock IS locked
503 
504   HangDetails hangDetails(aHangTime,
505                           nsDependentCString(XRE_GetProcessTypeString()),
506                           NOT_REMOTE_TYPE, mThreadName, mRunnableName,
507                           std::move(mHangStack), std::move(mAnnotations));
508 
509   PersistedToDisk persistedToDisk = aPersistedToDisk;
510   if (aPersistedToDisk == PersistedToDisk::Yes && XRE_IsParentProcess() &&
511       mManager->mPermahangFile) {
512     auto res = WriteHangDetailsToFile(hangDetails, mManager->mPermahangFile);
513     persistedToDisk = res.isOk() ? PersistedToDisk::Yes : PersistedToDisk::No;
514   }
515 
516   // If the hang processing thread exists, we can process the native stack
517   // on it. Otherwise, we are unable to report a native stack, so we just
518   // report without one.
519   if (mManager->mHangProcessingThread) {
520     nsCOMPtr<nsIRunnable> processHangStackRunnable =
521         new ProcessHangStackRunnable(std::move(hangDetails), persistedToDisk);
522     mManager->mHangProcessingThread->Dispatch(
523         processHangStackRunnable.forget());
524   } else {
525     NS_WARNING("Unable to report native stack without a BHR processing thread");
526     RefPtr<nsHangDetails> hd =
527         new nsHangDetails(std::move(hangDetails), persistedToDisk);
528     hd->Submit();
529   }
530 
531   // If the profiler is enabled, add a marker.
532 #ifdef MOZ_GECKO_PROFILER
533   if (profiler_can_accept_markers()) {
534     struct HangMarker {
535       static constexpr Span<const char> MarkerTypeName() {
536         return MakeStringSpan("BHR-detected hang");
537       }
538       static void StreamJSONMarkerData(
539           baseprofiler::SpliceableJSONWriter& aWriter) {}
540       static MarkerSchema MarkerTypeDisplay() {
541         using MS = MarkerSchema;
542         MS schema{MS::Location::markerChart, MS::Location::markerTable};
543         return schema;
544       }
545     };
546 
547     const TimeStamp endTime = TimeStamp::NowUnfuzzed();
548     const TimeStamp startTime = endTime - aHangTime;
549     profiler_add_marker("BHR-detected hang", geckoprofiler::category::OTHER,
550                         {MarkerThreadId(mStackHelper.GetThreadId()),
551                          MarkerTiming::Interval(startTime, endTime)},
552                         HangMarker{});
553   }
554 #endif
555 }
556 
ReportPermaHang()557 void BackgroundHangThread::ReportPermaHang() {
558   // Permanently hanged; called on the monitor thread
559   // mManager->mLock IS locked
560 
561   // The significance of a permahang is that it's likely that we won't ever
562   // recover and be allowed to submit this hang. On the parent thread, we
563   // compensate for this by writing the hang details to disk on this thread,
564   // and in our next session we'll try to read those details
565   ReportHang(mMaxTimeout, PersistedToDisk::Yes);
566 }
567 
Update()568 MOZ_ALWAYS_INLINE void BackgroundHangThread::Update() {
569   TimeStamp now = mManager->mNow;
570   if (mWaiting) {
571     mLastActivity = now;
572     mWaiting = false;
573     /* We have to wake up the manager thread because when all threads
574        are waiting, the manager thread waits indefinitely as well. */
575     mManager->Wakeup();
576   } else {
577     TimeDuration duration = now - mLastActivity;
578     if (MOZ_UNLIKELY(duration >= mTimeout)) {
579       /* Wake up the manager thread to tell it that a hang ended */
580       mManager->Wakeup();
581     }
582     mLastActivity = now;
583   }
584 }
585 
FindThread()586 BackgroundHangThread* BackgroundHangThread::FindThread() {
587 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
588   if (BackgroundHangManager::sInstance == nullptr) {
589     MOZ_ASSERT(BackgroundHangManager::sDisabled,
590                "BackgroundHandleManager is not initialized");
591     return nullptr;
592   }
593 
594   if (sTlsKeyInitialized) {
595     // Use TLS if available
596     return sTlsKey.get();
597   }
598   // If TLS is unavailable, we can search through the thread list
599   RefPtr<BackgroundHangManager> manager(BackgroundHangManager::sInstance);
600   MOZ_ASSERT(manager, "Creating BackgroundHangMonitor after shutdown");
601 
602   PRThread* threadID = PR_GetCurrentThread();
603   // Lock thread list for traversal
604   MonitorAutoLock autoLock(manager->mLock);
605   for (BackgroundHangThread* thread = manager->mHangThreads.getFirst(); thread;
606        thread = thread->getNext()) {
607     if (thread->mThreadID == threadID && thread->IsShared()) {
608       return thread;
609     }
610   }
611 #endif
612   // Current thread is not initialized
613   return nullptr;
614 }
615 
ShouldDisableOnBeta(const nsCString & clientID)616 bool BackgroundHangMonitor::ShouldDisableOnBeta(const nsCString& clientID) {
617   MOZ_ASSERT(clientID.Length() == 36, "clientID is invalid");
618   const char* suffix = clientID.get() + clientID.Length() - 4;
619   return strtol(suffix, NULL, 16) % BHR_BETA_MOD;
620 }
621 
DisableOnBeta()622 bool BackgroundHangMonitor::DisableOnBeta() {
623   nsAutoCString clientID;
624   nsresult rv =
625       Preferences::GetCString("toolkit.telemetry.cachedClientID", clientID);
626   bool telemetryEnabled = Telemetry::CanRecordPrereleaseData();
627 
628   if (!telemetryEnabled || NS_FAILED(rv) ||
629       BackgroundHangMonitor::ShouldDisableOnBeta(clientID)) {
630     if (XRE_IsParentProcess()) {
631       BackgroundHangMonitor::Shutdown();
632     } else {
633       BackgroundHangManager::sDisabled = true;
634     }
635     return true;
636   }
637 
638   return false;
639 }
640 
Startup()641 void BackgroundHangMonitor::Startup() {
642   MOZ_RELEASE_ASSERT(NS_IsMainThread());
643 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
644   MOZ_ASSERT(!BackgroundHangManager::sInstance, "Already initialized");
645 
646   if (XRE_IsContentProcess() &&
647       StaticPrefs::toolkit_content_background_hang_monitor_disabled()) {
648     BackgroundHangManager::sDisabled = true;
649     return;
650   }
651 
652   nsCOMPtr<nsIObserverService> observerService =
653       mozilla::services::GetObserverService();
654   MOZ_ASSERT(observerService);
655 
656   if (!strcmp(MOZ_STRINGIFY(MOZ_UPDATE_CHANNEL), "beta")) {
657     if (XRE_IsParentProcess()) {  // cached ClientID hasn't been read yet
658       BackgroundHangThread::Startup();
659       BackgroundHangManager::sInstance = new BackgroundHangManager();
660       Unused << NS_WARN_IF(
661           BackgroundHangManager::sInstance->mCPUUsageWatcher.Init().isErr());
662       observerService->AddObserver(BackgroundHangManager::sInstance,
663                                    "profile-after-change", false);
664       return;
665     } else if (DisableOnBeta()) {
666       return;
667     }
668   }
669 
670   BackgroundHangThread::Startup();
671   BackgroundHangManager::sInstance = new BackgroundHangManager();
672   Unused << NS_WARN_IF(
673       BackgroundHangManager::sInstance->mCPUUsageWatcher.Init().isErr());
674   if (XRE_IsParentProcess()) {
675     observerService->AddObserver(BackgroundHangManager::sInstance,
676                                  "browser-delayed-startup-finished", false);
677   }
678 #endif
679 }
680 
Shutdown()681 void BackgroundHangMonitor::Shutdown() {
682 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
683   if (BackgroundHangManager::sDisabled) {
684     MOZ_ASSERT(!BackgroundHangManager::sInstance, "Initialized");
685     return;
686   }
687 
688   MOZ_ASSERT(BackgroundHangManager::sInstance, "Not initialized");
689   BackgroundHangManager::sInstance->mCPUUsageWatcher.Uninit();
690   /* Scope our lock inside Shutdown() because the sInstance object can
691      be destroyed as soon as we set sInstance to nullptr below, and
692      we don't want to hold the lock when it's being destroyed. */
693   BackgroundHangManager::sInstance->Shutdown();
694   BackgroundHangManager::sInstance = nullptr;
695   BackgroundHangManager::sDisabled = true;
696 #endif
697 }
698 
BackgroundHangMonitor(const char * aName,uint32_t aTimeoutMs,uint32_t aMaxTimeoutMs,ThreadType aThreadType)699 BackgroundHangMonitor::BackgroundHangMonitor(const char* aName,
700                                              uint32_t aTimeoutMs,
701                                              uint32_t aMaxTimeoutMs,
702                                              ThreadType aThreadType)
703     : mThread(aThreadType == THREAD_SHARED ? BackgroundHangThread::FindThread()
704                                            : nullptr) {
705 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
706 #  ifdef MOZ_VALGRIND
707   // If we're running on Valgrind, we'll be making forward progress at a
708   // rate of somewhere between 1/25th and 1/50th of normal.  This causes the
709   // BHR to capture a lot of stacks, which slows us down even more.  As an
710   // attempt to avoid the worst of this, scale up all presented timeouts by
711   // a factor of thirty, and add six seconds so as to impose a six second
712   // floor on all timeouts.  For a non-Valgrind-enabled build, or for an
713   // enabled build which isn't running on Valgrind, the timeouts are
714   // unchanged.
715   if (RUNNING_ON_VALGRIND) {
716     const uint32_t scaleUp = 30;
717     const uint32_t extraMs = 6000;
718     if (aTimeoutMs != BackgroundHangMonitor::kNoTimeout) {
719       aTimeoutMs *= scaleUp;
720       aTimeoutMs += extraMs;
721     }
722     if (aMaxTimeoutMs != BackgroundHangMonitor::kNoTimeout) {
723       aMaxTimeoutMs *= scaleUp;
724       aMaxTimeoutMs += extraMs;
725     }
726   }
727 #  endif
728 
729   if (!BackgroundHangManager::sDisabled && !mThread) {
730     mThread =
731         new BackgroundHangThread(aName, aTimeoutMs, aMaxTimeoutMs, aThreadType);
732   }
733 #endif
734 }
735 
BackgroundHangMonitor()736 BackgroundHangMonitor::BackgroundHangMonitor()
737     : mThread(BackgroundHangThread::FindThread()) {
738 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
739   if (BackgroundHangManager::sDisabled) {
740     return;
741   }
742 #endif
743 }
744 
745 BackgroundHangMonitor::~BackgroundHangMonitor() = default;
746 
NotifyActivity()747 void BackgroundHangMonitor::NotifyActivity() {
748 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
749   if (mThread == nullptr) {
750     MOZ_ASSERT(BackgroundHangManager::sDisabled,
751                "This thread is not initialized for hang monitoring");
752     return;
753   }
754 
755   if (Telemetry::CanRecordExtended()) {
756     mThread->NotifyActivity();
757   }
758 #endif
759 }
760 
NotifyWait()761 void BackgroundHangMonitor::NotifyWait() {
762 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
763   if (mThread == nullptr) {
764     MOZ_ASSERT(BackgroundHangManager::sDisabled,
765                "This thread is not initialized for hang monitoring");
766     return;
767   }
768 
769   if (Telemetry::CanRecordExtended()) {
770     mThread->NotifyWait();
771   }
772 #endif
773 }
774 
RegisterAnnotator(BackgroundHangAnnotator & aAnnotator)775 bool BackgroundHangMonitor::RegisterAnnotator(
776     BackgroundHangAnnotator& aAnnotator) {
777 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
778   BackgroundHangThread* thisThread = BackgroundHangThread::FindThread();
779   if (!thisThread) {
780     return false;
781   }
782   return thisThread->mAnnotators.Register(aAnnotator);
783 #else
784   return false;
785 #endif
786 }
787 
UnregisterAnnotator(BackgroundHangAnnotator & aAnnotator)788 bool BackgroundHangMonitor::UnregisterAnnotator(
789     BackgroundHangAnnotator& aAnnotator) {
790 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
791   BackgroundHangThread* thisThread = BackgroundHangThread::FindThread();
792   if (!thisThread) {
793     return false;
794   }
795   return thisThread->mAnnotators.Unregister(aAnnotator);
796 #else
797   return false;
798 #endif
799 }
800 
801 }  // namespace mozilla
802