1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 #include "mozilla/BackgroundHangMonitor.h"
8 
9 #include <utility>
10 
11 #include "GeckoProfiler.h"
12 #include "HangDetails.h"
13 #include "ThreadStackHelper.h"
14 #include "mozilla/ArrayUtils.h"
15 #include "mozilla/CPUUsageWatcher.h"
16 #include "mozilla/LinkedList.h"
17 #include "mozilla/Monitor.h"
18 #include "mozilla/Preferences.h"
19 #include "mozilla/Services.h"
20 #include "mozilla/StaticPtr.h"
21 #include "mozilla/Telemetry.h"
22 #include "mozilla/ThreadLocal.h"
23 #include "mozilla/Unused.h"
24 #include "nsAppDirectoryServiceDefs.h"
25 #include "nsIObserver.h"
26 #include "nsIObserverService.h"
27 #include "nsThreadUtils.h"
28 #include "nsXULAppAPI.h"
29 #include "prinrval.h"
30 #include "prthread.h"
31 
32 #ifdef MOZ_GECKO_PROFILER
33 #  include "ProfilerMarkerPayload.h"
34 #endif
35 
36 #include <algorithm>
37 
38 // Activate BHR only for one every BHR_BETA_MOD users.
39 // We're doing experimentation with collecting a lot more data from BHR, and
40 // don't want to enable it for beta users at the moment. We can scale this up in
41 // the future.
42 #define BHR_BETA_MOD INT32_MAX;
43 
44 // Maximum depth of the call stack in the reported thread hangs. This value
45 // represents the 99.9th percentile of the thread hangs stack depths reported by
46 // Telemetry.
47 static const size_t kMaxThreadHangStackDepth = 30;
48 
49 // Interval at which we check the global and per-process CPU usage in order to
50 // determine if there is high external CPU usage.
51 static const int32_t kCheckCPUIntervalMilliseconds = 2000;
52 
53 // An utility comparator function used by std::unique to collapse "(* script)"
54 // entries in a vector representing a call stack.
StackScriptEntriesCollapser(const char * aStackEntry,const char * aAnotherStackEntry)55 bool StackScriptEntriesCollapser(const char* aStackEntry,
56                                  const char* aAnotherStackEntry) {
57   return !strcmp(aStackEntry, aAnotherStackEntry) &&
58          (!strcmp(aStackEntry, "(chrome script)") ||
59           !strcmp(aStackEntry, "(content script)"));
60 }
61 
62 namespace mozilla {
63 
64 /**
65  * BackgroundHangManager is the global object that
66  * manages all instances of BackgroundHangThread.
67  */
68 class BackgroundHangManager : public nsIObserver {
69  private:
70   // Background hang monitor thread function
MonitorThread(void * aData)71   static void MonitorThread(void* aData) {
72     AUTO_PROFILER_REGISTER_THREAD("BgHangMonitor");
73     NS_SetCurrentThreadName("BHMgr Monitor");
74 
75     /* We do not hold a reference to BackgroundHangManager here
76        because the monitor thread only exists as long as the
77        BackgroundHangManager instance exists. We stop the monitor
78        thread in the BackgroundHangManager destructor, and we can
79        only get to the destructor if we don't hold a reference here. */
80     static_cast<BackgroundHangManager*>(aData)->RunMonitorThread();
81   }
82 
83   // Hang monitor thread
84   PRThread* mHangMonitorThread;
85   // Stop hang monitoring
86   bool mShutdown;
87 
88   BackgroundHangManager(const BackgroundHangManager&);
89   BackgroundHangManager& operator=(const BackgroundHangManager&);
90   void RunMonitorThread();
91 
92  public:
93   NS_DECL_THREADSAFE_ISUPPORTS
94   NS_DECL_NSIOBSERVER
95   static StaticRefPtr<BackgroundHangManager> sInstance;
96   static bool sDisabled;
97 
98   // Lock for access to members of this class
99   Monitor mLock;
100   // Current time as seen by hang monitors
101   TimeStamp mNow;
102   // List of BackgroundHangThread instances associated with each thread
103   LinkedList<BackgroundHangThread> mHangThreads;
104 
105   // Unwinding and reporting of hangs is despatched to this thread.
106   nsCOMPtr<nsIThread> mHangProcessingThread;
107 
108   // Used for recording a permahang in case we don't ever make it back to
109   // the main thread to record/send it.
110   nsCOMPtr<nsIFile> mPermahangFile;
111 
112   // Allows us to watch CPU usage and annotate hangs when the system is
113   // under high external load.
114   CPUUsageWatcher mCPUUsageWatcher;
115 
Shutdown()116   void Shutdown() {
117     MonitorAutoLock autoLock(mLock);
118     mShutdown = true;
119     autoLock.Notify();
120   }
121 
122   // Attempt to wakeup the hang monitor thread.
Wakeup()123   void Wakeup() {
124     mLock.AssertCurrentThreadOwns();
125     mLock.NotifyAll();
126   }
127 
128   BackgroundHangManager();
129 
130  private:
131   virtual ~BackgroundHangManager();
132 };
133 
NS_IMPL_ISUPPORTS(BackgroundHangManager,nsIObserver)134 NS_IMPL_ISUPPORTS(BackgroundHangManager, nsIObserver)
135 
136 NS_IMETHODIMP
137 BackgroundHangManager::Observe(nsISupports* aSubject, const char* aTopic,
138                                const char16_t* aData) {
139   if (!strcmp(aTopic, "browser-delayed-startup-finished")) {
140     MonitorAutoLock autoLock(mLock);
141     nsresult rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_50_DIR,
142                                          getter_AddRefs(mPermahangFile));
143     if (NS_SUCCEEDED(rv)) {
144       mPermahangFile->AppendNative(NS_LITERAL_CSTRING("last_permahang.bin"));
145     } else {
146       mPermahangFile = nullptr;
147     }
148 
149     if (mHangProcessingThread && mPermahangFile) {
150       nsCOMPtr<nsIRunnable> submitRunnable =
151           new SubmitPersistedPermahangRunnable(mPermahangFile);
152       mHangProcessingThread->Dispatch(submitRunnable.forget());
153     }
154     nsCOMPtr<nsIObserverService> observerService =
155         mozilla::services::GetObserverService();
156     MOZ_ASSERT(observerService);
157     observerService->RemoveObserver(BackgroundHangManager::sInstance,
158                                     "browser-delayed-startup-finished");
159   } else if (!strcmp(aTopic, "profile-after-change")) {
160     BackgroundHangMonitor::DisableOnBeta();
161     nsCOMPtr<nsIObserverService> observerService =
162         mozilla::services::GetObserverService();
163     MOZ_ASSERT(observerService);
164     observerService->RemoveObserver(BackgroundHangManager::sInstance,
165                                     "profile-after-change");
166   } else {
167     return NS_ERROR_UNEXPECTED;
168   }
169 
170   return NS_OK;
171 }
172 
173 /**
174  * BackgroundHangThread is a per-thread object that is used
175  * by all instances of BackgroundHangMonitor to monitor hangs.
176  */
177 class BackgroundHangThread : public LinkedListElement<BackgroundHangThread> {
178  private:
179   static MOZ_THREAD_LOCAL(BackgroundHangThread*) sTlsKey;
180   static bool sTlsKeyInitialized;
181 
182   BackgroundHangThread(const BackgroundHangThread&);
183   BackgroundHangThread& operator=(const BackgroundHangThread&);
184   ~BackgroundHangThread();
185 
186   /* Keep a reference to the manager, so we can keep going even
187      after BackgroundHangManager::Shutdown is called. */
188   const RefPtr<BackgroundHangManager> mManager;
189   // Unique thread ID for identification
190   const PRThread* mThreadID;
191 
192   void Update();
193 
194  public:
195   NS_INLINE_DECL_REFCOUNTING(BackgroundHangThread)
196   /**
197    * Returns the BackgroundHangThread associated with the
198    * running thread. Note that this will not find private
199    * BackgroundHangThread threads.
200    *
201    * @return BackgroundHangThread*, or nullptr if no thread
202    *         is found.
203    */
204   static BackgroundHangThread* FindThread();
205 
Startup()206   static void Startup() {
207     /* We can tolerate init() failing. */
208     sTlsKeyInitialized = sTlsKey.init();
209   }
210 
211   // Hang timeout
212   const TimeDuration mTimeout;
213   // PermaHang timeout
214   const TimeDuration mMaxTimeout;
215   // Time at last activity
216   TimeStamp mLastActivity;
217   // Time when a hang started
218   TimeStamp mHangStart;
219   // Is the thread in a hang
220   bool mHanging;
221   // Is the thread in a waiting state
222   bool mWaiting;
223   // Is the thread dedicated to a single BackgroundHangMonitor
224   BackgroundHangMonitor::ThreadType mThreadType;
225 #ifdef MOZ_GECKO_PROFILER
226   // Platform-specific helper to get hang stacks
227   ThreadStackHelper mStackHelper;
228 #endif
229   // Stack of current hang
230   HangStack mHangStack;
231   // Annotations for the current hang
232   BackgroundHangAnnotations mAnnotations;
233   // Annotators registered for this thread
234   BackgroundHangAnnotators mAnnotators;
235   // The name of the runnable which is hanging the current process
236   nsCString mRunnableName;
237   // The name of the thread which is being monitored
238   nsCString mThreadName;
239 
240   BackgroundHangThread(const char* aName, uint32_t aTimeoutMs,
241                        uint32_t aMaxTimeoutMs,
242                        BackgroundHangMonitor::ThreadType aThreadType =
243                            BackgroundHangMonitor::THREAD_SHARED);
244 
245   // Report a hang; aManager->mLock IS locked. The hang will be processed
246   // off-main-thread, and will then be submitted back.
247   void ReportHang(TimeDuration aHangTime,
248                   PersistedToDisk aPersistedToDisk = PersistedToDisk::No);
249   // Report a permanent hang; aManager->mLock IS locked
250   void ReportPermaHang();
251   // Called by BackgroundHangMonitor::NotifyActivity
NotifyActivity()252   void NotifyActivity() {
253     MonitorAutoLock autoLock(mManager->mLock);
254     Update();
255   }
256   // Called by BackgroundHangMonitor::NotifyWait
NotifyWait()257   void NotifyWait() {
258     MonitorAutoLock autoLock(mManager->mLock);
259 
260     if (mWaiting) {
261       return;
262     }
263 
264     Update();
265     if (mHanging) {
266       // We were hanging! We're done with that now, so let's report it.
267       // ReportHang() doesn't do much work on the current thread, and is
268       // safe to call from any thread as long as we're holding the lock.
269       ReportHang(mLastActivity - mHangStart);
270       mHanging = false;
271     }
272     mWaiting = true;
273   }
274 
275   // Returns true if this thread is (or might be) shared between other
276   // BackgroundHangMonitors for the monitored thread.
IsShared()277   bool IsShared() {
278     return mThreadType == BackgroundHangMonitor::THREAD_SHARED;
279   }
280 };
281 
282 StaticRefPtr<BackgroundHangManager> BackgroundHangManager::sInstance;
283 bool BackgroundHangManager::sDisabled = false;
284 
285 MOZ_THREAD_LOCAL(BackgroundHangThread*) BackgroundHangThread::sTlsKey;
286 bool BackgroundHangThread::sTlsKeyInitialized;
287 
BackgroundHangManager()288 BackgroundHangManager::BackgroundHangManager()
289     : mShutdown(false), mLock("BackgroundHangManager") {
290   // Lock so we don't race against the new monitor thread
291   MonitorAutoLock autoLock(mLock);
292 
293   mHangMonitorThread = PR_CreateThread(
294       PR_USER_THREAD, MonitorThread, this, PR_PRIORITY_LOW, PR_GLOBAL_THREAD,
295       PR_JOINABLE_THREAD, nsIThreadManager::DEFAULT_STACK_SIZE);
296 
297   MOZ_ASSERT(mHangMonitorThread, "Failed to create BHR monitor thread");
298 
299   DebugOnly<nsresult> rv = NS_NewNamedThread(
300       "BHMgr Processor", getter_AddRefs(mHangProcessingThread));
301   MOZ_ASSERT(NS_SUCCEEDED(rv) && mHangProcessingThread,
302              "Failed to create BHR processing thread");
303 }
304 
~BackgroundHangManager()305 BackgroundHangManager::~BackgroundHangManager() {
306   MOZ_ASSERT(mShutdown, "Destruction without Shutdown call");
307   MOZ_ASSERT(mHangThreads.isEmpty(), "Destruction with outstanding monitors");
308   MOZ_ASSERT(mHangMonitorThread, "No monitor thread");
309   MOZ_ASSERT(mHangProcessingThread, "No processing thread");
310 
311   // PR_CreateThread could have failed above due to resource limitation
312   if (mHangMonitorThread) {
313     // The monitor thread can only live as long as the instance lives
314     PR_JoinThread(mHangMonitorThread);
315   }
316 
317   // Similarly, NS_NewNamedThread above could have failed.
318   if (mHangProcessingThread) {
319     mHangProcessingThread->Shutdown();
320   }
321 }
322 
RunMonitorThread()323 void BackgroundHangManager::RunMonitorThread() {
324   // Keep us locked except when waiting
325   MonitorAutoLock autoLock(mLock);
326 
327   /* mNow is updated at various intervals determined by waitTime.
328      However, if an update latency is too long (due to CPU scheduling, system
329      sleep, etc.), we don't update mNow at all. This is done so that
330      long latencies in our timing are not detected as hangs. systemTime is
331      used to track TimeStamp::Now() and determine our latency. */
332 
333   TimeStamp systemTime = TimeStamp::Now();
334   // Default values for the first iteration of thread loop
335   TimeDuration waitTime;
336   TimeDuration recheckTimeout;
337   TimeStamp lastCheckedCPUUsage = systemTime;
338   TimeDuration checkCPUUsageInterval =
339       TimeDuration::FromMilliseconds(kCheckCPUIntervalMilliseconds);
340 
341   while (!mShutdown) {
342     autoLock.Wait(waitTime);
343 
344     TimeStamp newTime = TimeStamp::Now();
345     TimeDuration systemInterval = newTime - systemTime;
346     systemTime = newTime;
347 
348     if (systemTime - lastCheckedCPUUsage > checkCPUUsageInterval) {
349       Unused << NS_WARN_IF(mCPUUsageWatcher.CollectCPUUsage().isErr());
350       lastCheckedCPUUsage = systemTime;
351     }
352 
353     /* waitTime is a quarter of the shortest timeout value; If our timing
354        latency is low enough (less than half the shortest timeout value),
355        we can update mNow. */
356     if (MOZ_LIKELY(waitTime != TimeDuration::Forever() &&
357                    systemInterval < waitTime * 2)) {
358       mNow += systemInterval;
359     }
360 
361     /* If it's before the next recheck timeout, and our wait did not get
362        interrupted, we can keep the current waitTime and skip iterating
363        through hang monitors. */
364     if (MOZ_LIKELY(systemInterval < recheckTimeout &&
365                    systemInterval >= waitTime)) {
366       recheckTimeout -= systemInterval;
367       continue;
368     }
369 
370     /* We are in one of the following scenarios,
371      - Hang or permahang recheck timeout
372      - Thread added/removed
373      - Thread wait or hang ended
374        In all cases, we want to go through our list of hang
375        monitors and update waitTime and recheckTimeout. */
376     waitTime = TimeDuration::Forever();
377     recheckTimeout = TimeDuration::Forever();
378 
379     // Locally hold mNow
380     TimeStamp now = mNow;
381 
382     // iterate through hang monitors
383     for (BackgroundHangThread* currentThread = mHangThreads.getFirst();
384          currentThread; currentThread = currentThread->getNext()) {
385       if (currentThread->mWaiting) {
386         // Thread is waiting, not hanging
387         continue;
388       }
389       TimeStamp lastActivity = currentThread->mLastActivity;
390       TimeDuration hangTime = now - lastActivity;
391       if (MOZ_UNLIKELY(hangTime >= currentThread->mMaxTimeout)) {
392         // A permahang started
393         // Skip subsequent iterations and tolerate a race on mWaiting here
394         currentThread->mWaiting = true;
395         currentThread->mHanging = false;
396         currentThread->ReportPermaHang();
397         continue;
398       }
399 
400       if (MOZ_LIKELY(!currentThread->mHanging)) {
401         if (MOZ_UNLIKELY(hangTime >= currentThread->mTimeout)) {
402 #ifdef MOZ_GECKO_PROFILER
403           // A hang started, collect a stack
404           currentThread->mStackHelper.GetStack(
405               currentThread->mHangStack, currentThread->mRunnableName, true);
406 #endif
407 
408           // If we hang immediately on waking, then the most recently collected
409           // CPU usage is going to be an average across the whole time we were
410           // sleeping. Accordingly, we want to make sure that when we hang, we
411           // collect a fresh value.
412           if (systemTime != lastCheckedCPUUsage) {
413             Unused << NS_WARN_IF(mCPUUsageWatcher.CollectCPUUsage().isErr());
414             lastCheckedCPUUsage = systemTime;
415           }
416 
417           currentThread->mHangStart = lastActivity;
418           currentThread->mHanging = true;
419           currentThread->mAnnotations =
420               currentThread->mAnnotators.GatherAnnotations();
421         }
422       } else {
423         if (MOZ_LIKELY(lastActivity != currentThread->mHangStart)) {
424           // A hang ended
425           currentThread->ReportHang(now - currentThread->mHangStart);
426           currentThread->mHanging = false;
427         }
428       }
429 
430       /* If we are hanging, the next time we check for hang status is when
431          the hang turns into a permahang. If we're not hanging, the next
432          recheck timeout is when we may be entering a hang. */
433       TimeDuration nextRecheck;
434       if (currentThread->mHanging) {
435         nextRecheck = currentThread->mMaxTimeout;
436       } else {
437         nextRecheck = currentThread->mTimeout;
438       }
439       recheckTimeout =
440           TimeDuration::Min(recheckTimeout, nextRecheck - hangTime);
441 
442       if (currentThread->mTimeout != TimeDuration::Forever()) {
443         /* We wait for a quarter of the shortest timeout
444            value to give mNow enough granularity. */
445         waitTime =
446             TimeDuration::Min(waitTime, currentThread->mTimeout / (int64_t)4);
447       }
448     }
449   }
450 
451   /* We are shutting down now.
452      Wait for all outstanding monitors to unregister. */
453   while (!mHangThreads.isEmpty()) {
454     autoLock.Wait();
455   }
456 }
457 
BackgroundHangThread(const char * aName,uint32_t aTimeoutMs,uint32_t aMaxTimeoutMs,BackgroundHangMonitor::ThreadType aThreadType)458 BackgroundHangThread::BackgroundHangThread(
459     const char* aName, uint32_t aTimeoutMs, uint32_t aMaxTimeoutMs,
460     BackgroundHangMonitor::ThreadType aThreadType)
461     : mManager(BackgroundHangManager::sInstance),
462       mThreadID(PR_GetCurrentThread()),
463       mTimeout(aTimeoutMs == BackgroundHangMonitor::kNoTimeout
464                    ? TimeDuration::Forever()
465                    : TimeDuration::FromMilliseconds(aTimeoutMs)),
466       mMaxTimeout(aMaxTimeoutMs == BackgroundHangMonitor::kNoTimeout
467                       ? TimeDuration::Forever()
468                       : TimeDuration::FromMilliseconds(aMaxTimeoutMs)),
469       mLastActivity(mManager->mNow),
470       mHangStart(mLastActivity),
471       mHanging(false),
472       mWaiting(true),
473       mThreadType(aThreadType),
474       mThreadName(aName) {
475   if (sTlsKeyInitialized && IsShared()) {
476     sTlsKey.set(this);
477   }
478   // Lock here because LinkedList is not thread-safe
479   MonitorAutoLock autoLock(mManager->mLock);
480   // Add to thread list
481   mManager->mHangThreads.insertBack(this);
482   // Wake up monitor thread to process new thread
483   autoLock.Notify();
484 }
485 
~BackgroundHangThread()486 BackgroundHangThread::~BackgroundHangThread() {
487   // Lock here because LinkedList is not thread-safe
488   MonitorAutoLock autoLock(mManager->mLock);
489   // Remove from thread list
490   remove();
491   // Wake up monitor thread to process removed thread
492   autoLock.Notify();
493 
494   // We no longer have a thread
495   if (sTlsKeyInitialized && IsShared()) {
496     sTlsKey.set(nullptr);
497   }
498 }
499 
ReportHang(TimeDuration aHangTime,PersistedToDisk aPersistedToDisk)500 void BackgroundHangThread::ReportHang(TimeDuration aHangTime,
501                                       PersistedToDisk aPersistedToDisk) {
502   // Recovered from a hang; called on the monitor thread
503   // mManager->mLock IS locked
504 
505   HangDetails hangDetails(aHangTime,
506                           nsDependentCString(XRE_GetProcessTypeString()),
507                           VoidString(), mThreadName, mRunnableName,
508                           std::move(mHangStack), std::move(mAnnotations));
509 
510   PersistedToDisk persistedToDisk = aPersistedToDisk;
511   if (aPersistedToDisk == PersistedToDisk::Yes && XRE_IsParentProcess() &&
512       mManager->mPermahangFile) {
513     auto res = WriteHangDetailsToFile(hangDetails, mManager->mPermahangFile);
514     persistedToDisk = res.isOk() ? PersistedToDisk::Yes : PersistedToDisk::No;
515   }
516 
517   // If the hang processing thread exists, we can process the native stack
518   // on it. Otherwise, we are unable to report a native stack, so we just
519   // report without one.
520   if (mManager->mHangProcessingThread) {
521     nsCOMPtr<nsIRunnable> processHangStackRunnable =
522         new ProcessHangStackRunnable(std::move(hangDetails), persistedToDisk);
523     mManager->mHangProcessingThread->Dispatch(
524         processHangStackRunnable.forget());
525   } else {
526     NS_WARNING("Unable to report native stack without a BHR processing thread");
527     RefPtr<nsHangDetails> hd =
528         new nsHangDetails(std::move(hangDetails), persistedToDisk);
529     hd->Submit();
530   }
531 
532   // If the profiler is enabled, add a marker.
533 #ifdef MOZ_GECKO_PROFILER
534   if (profiler_can_accept_markers()) {
535     TimeStamp endTime = TimeStamp::Now();
536     TimeStamp startTime = endTime - aHangTime;
537     AUTO_PROFILER_STATS(add_marker_with_HangMarkerPayload);
538     profiler_add_marker_for_thread(
539         mStackHelper.GetThreadId(), JS::ProfilingCategoryPair::OTHER,
540         "BHR-detected hang", HangMarkerPayload(startTime, endTime));
541   }
542 #endif
543 }
544 
ReportPermaHang()545 void BackgroundHangThread::ReportPermaHang() {
546   // Permanently hanged; called on the monitor thread
547   // mManager->mLock IS locked
548 
549   // The significance of a permahang is that it's likely that we won't ever
550   // recover and be allowed to submit this hang. On the parent thread, we
551   // compensate for this by writing the hang details to disk on this thread,
552   // and in our next session we'll try to read those details
553   ReportHang(mMaxTimeout, PersistedToDisk::Yes);
554 }
555 
Update()556 MOZ_ALWAYS_INLINE void BackgroundHangThread::Update() {
557   TimeStamp now = mManager->mNow;
558   if (mWaiting) {
559     mLastActivity = now;
560     mWaiting = false;
561     /* We have to wake up the manager thread because when all threads
562        are waiting, the manager thread waits indefinitely as well. */
563     mManager->Wakeup();
564   } else {
565     TimeDuration duration = now - mLastActivity;
566     if (MOZ_UNLIKELY(duration >= mTimeout)) {
567       /* Wake up the manager thread to tell it that a hang ended */
568       mManager->Wakeup();
569     }
570     mLastActivity = now;
571   }
572 }
573 
FindThread()574 BackgroundHangThread* BackgroundHangThread::FindThread() {
575 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
576   if (BackgroundHangManager::sInstance == nullptr) {
577     MOZ_ASSERT(BackgroundHangManager::sDisabled,
578                "BackgroundHandleManager is not initialized");
579     return nullptr;
580   }
581 
582   if (sTlsKeyInitialized) {
583     // Use TLS if available
584     return sTlsKey.get();
585   }
586   // If TLS is unavailable, we can search through the thread list
587   RefPtr<BackgroundHangManager> manager(BackgroundHangManager::sInstance);
588   MOZ_ASSERT(manager, "Creating BackgroundHangMonitor after shutdown");
589 
590   PRThread* threadID = PR_GetCurrentThread();
591   // Lock thread list for traversal
592   MonitorAutoLock autoLock(manager->mLock);
593   for (BackgroundHangThread* thread = manager->mHangThreads.getFirst(); thread;
594        thread = thread->getNext()) {
595     if (thread->mThreadID == threadID && thread->IsShared()) {
596       return thread;
597     }
598   }
599 #endif
600   // Current thread is not initialized
601   return nullptr;
602 }
603 
ShouldDisableOnBeta(const nsCString & clientID)604 bool BackgroundHangMonitor::ShouldDisableOnBeta(const nsCString& clientID) {
605   MOZ_ASSERT(clientID.Length() == 36, "clientID is invalid");
606   const char* suffix = clientID.get() + clientID.Length() - 4;
607   return strtol(suffix, NULL, 16) % BHR_BETA_MOD;
608 }
609 
IsDisabled()610 bool BackgroundHangMonitor::IsDisabled() {
611   static bool sPrefCached = false;
612   static bool sPrefCacheValue = false;
613   if (!sPrefCached) {
614     sPrefCached = true;
615     Preferences::AddBoolVarCache(
616         &sPrefCacheValue, "toolkit.content-background-hang-monitor.disabled");
617   }
618 
619   return sPrefCacheValue;
620 }
621 
DisableOnBeta()622 bool BackgroundHangMonitor::DisableOnBeta() {
623   nsAutoCString clientID;
624   nsresult rv =
625       Preferences::GetCString("toolkit.telemetry.cachedClientID", clientID);
626   bool telemetryEnabled = Telemetry::CanRecordPrereleaseData();
627 
628   if (!telemetryEnabled || NS_FAILED(rv) ||
629       BackgroundHangMonitor::ShouldDisableOnBeta(clientID)) {
630     if (XRE_IsParentProcess()) {
631       BackgroundHangMonitor::Shutdown();
632     } else {
633       BackgroundHangManager::sDisabled = true;
634     }
635     return true;
636   }
637 
638   return false;
639 }
640 
Startup()641 void BackgroundHangMonitor::Startup() {
642   MOZ_RELEASE_ASSERT(NS_IsMainThread());
643 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
644   MOZ_ASSERT(!BackgroundHangManager::sInstance, "Already initialized");
645 
646   if (XRE_IsContentProcess() && IsDisabled()) {
647     BackgroundHangManager::sDisabled = true;
648     return;
649   }
650 
651   nsCOMPtr<nsIObserverService> observerService =
652       mozilla::services::GetObserverService();
653   MOZ_ASSERT(observerService);
654 
655   if (!strcmp(MOZ_STRINGIFY(MOZ_UPDATE_CHANNEL), "beta")) {
656     if (XRE_IsParentProcess()) {  // cached ClientID hasn't been read yet
657       BackgroundHangThread::Startup();
658       BackgroundHangManager::sInstance = new BackgroundHangManager();
659       Unused << NS_WARN_IF(
660           BackgroundHangManager::sInstance->mCPUUsageWatcher.Init().isErr());
661       observerService->AddObserver(BackgroundHangManager::sInstance,
662                                    "profile-after-change", false);
663       return;
664     } else if (DisableOnBeta()) {
665       return;
666     }
667   }
668 
669   BackgroundHangThread::Startup();
670   BackgroundHangManager::sInstance = new BackgroundHangManager();
671   Unused << NS_WARN_IF(
672       BackgroundHangManager::sInstance->mCPUUsageWatcher.Init().isErr());
673   if (XRE_IsParentProcess()) {
674     observerService->AddObserver(BackgroundHangManager::sInstance,
675                                  "browser-delayed-startup-finished", false);
676   }
677 #endif
678 }
679 
Shutdown()680 void BackgroundHangMonitor::Shutdown() {
681 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
682   if (BackgroundHangManager::sDisabled) {
683     MOZ_ASSERT(!BackgroundHangManager::sInstance, "Initialized");
684     return;
685   }
686 
687   MOZ_ASSERT(BackgroundHangManager::sInstance, "Not initialized");
688   BackgroundHangManager::sInstance->mCPUUsageWatcher.Uninit();
689   /* Scope our lock inside Shutdown() because the sInstance object can
690      be destroyed as soon as we set sInstance to nullptr below, and
691      we don't want to hold the lock when it's being destroyed. */
692   BackgroundHangManager::sInstance->Shutdown();
693   BackgroundHangManager::sInstance = nullptr;
694   BackgroundHangManager::sDisabled = true;
695 #endif
696 }
697 
BackgroundHangMonitor(const char * aName,uint32_t aTimeoutMs,uint32_t aMaxTimeoutMs,ThreadType aThreadType)698 BackgroundHangMonitor::BackgroundHangMonitor(const char* aName,
699                                              uint32_t aTimeoutMs,
700                                              uint32_t aMaxTimeoutMs,
701                                              ThreadType aThreadType)
702     : mThread(aThreadType == THREAD_SHARED ? BackgroundHangThread::FindThread()
703                                            : nullptr) {
704 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
705 #  ifdef MOZ_VALGRIND
706   // If we're running on Valgrind, we'll be making forward progress at a
707   // rate of somewhere between 1/25th and 1/50th of normal.  This causes the
708   // BHR to capture a lot of stacks, which slows us down even more.  As an
709   // attempt to avoid the worst of this, scale up all presented timeouts by
710   // a factor of thirty, and add six seconds so as to impose a six second
711   // floor on all timeouts.  For a non-Valgrind-enabled build, or for an
712   // enabled build which isn't running on Valgrind, the timeouts are
713   // unchanged.
714   if (RUNNING_ON_VALGRIND) {
715     const uint32_t scaleUp = 30;
716     const uint32_t extraMs = 6000;
717     if (aTimeoutMs != BackgroundHangMonitor::kNoTimeout) {
718       aTimeoutMs *= scaleUp;
719       aTimeoutMs += extraMs;
720     }
721     if (aMaxTimeoutMs != BackgroundHangMonitor::kNoTimeout) {
722       aMaxTimeoutMs *= scaleUp;
723       aMaxTimeoutMs += extraMs;
724     }
725   }
726 #  endif
727 
728   if (!BackgroundHangManager::sDisabled && !mThread) {
729     mThread =
730         new BackgroundHangThread(aName, aTimeoutMs, aMaxTimeoutMs, aThreadType);
731   }
732 #endif
733 }
734 
BackgroundHangMonitor()735 BackgroundHangMonitor::BackgroundHangMonitor()
736     : mThread(BackgroundHangThread::FindThread()) {
737 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
738   if (BackgroundHangManager::sDisabled) {
739     return;
740   }
741 #endif
742 }
743 
744 BackgroundHangMonitor::~BackgroundHangMonitor() = default;
745 
NotifyActivity()746 void BackgroundHangMonitor::NotifyActivity() {
747 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
748   if (mThread == nullptr) {
749     MOZ_ASSERT(BackgroundHangManager::sDisabled,
750                "This thread is not initialized for hang monitoring");
751     return;
752   }
753 
754   if (Telemetry::CanRecordExtended()) {
755     mThread->NotifyActivity();
756   }
757 #endif
758 }
759 
NotifyWait()760 void BackgroundHangMonitor::NotifyWait() {
761 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
762   if (mThread == nullptr) {
763     MOZ_ASSERT(BackgroundHangManager::sDisabled,
764                "This thread is not initialized for hang monitoring");
765     return;
766   }
767 
768   if (Telemetry::CanRecordExtended()) {
769     mThread->NotifyWait();
770   }
771 #endif
772 }
773 
RegisterAnnotator(BackgroundHangAnnotator & aAnnotator)774 bool BackgroundHangMonitor::RegisterAnnotator(
775     BackgroundHangAnnotator& aAnnotator) {
776 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
777   BackgroundHangThread* thisThread = BackgroundHangThread::FindThread();
778   if (!thisThread) {
779     return false;
780   }
781   return thisThread->mAnnotators.Register(aAnnotator);
782 #else
783   return false;
784 #endif
785 }
786 
UnregisterAnnotator(BackgroundHangAnnotator & aAnnotator)787 bool BackgroundHangMonitor::UnregisterAnnotator(
788     BackgroundHangAnnotator& aAnnotator) {
789 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
790   BackgroundHangThread* thisThread = BackgroundHangThread::FindThread();
791   if (!thisThread) {
792     return false;
793   }
794   return thisThread->mAnnotators.Unregister(aAnnotator);
795 #else
796   return false;
797 #endif
798 }
799 
800 }  // namespace mozilla
801