1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 #include "mozilla/BackgroundHangMonitor.h"
8
9 #include <utility>
10
11 #include "GeckoProfiler.h"
12 #include "HangDetails.h"
13 #include "ThreadStackHelper.h"
14 #include "mozilla/ArrayUtils.h"
15 #include "mozilla/CPUUsageWatcher.h"
16 #include "mozilla/LinkedList.h"
17 #include "mozilla/Monitor.h"
18 #include "mozilla/Preferences.h"
19 #include "mozilla/StaticPrefs_toolkit.h"
20 #include "mozilla/Services.h"
21 #include "mozilla/StaticPtr.h"
22 #include "mozilla/Telemetry.h"
23 #include "mozilla/ThreadLocal.h"
24 #include "mozilla/Unused.h"
25 #include "mozilla/dom/RemoteType.h"
26 #include "nsAppDirectoryServiceDefs.h"
27 #include "nsIObserver.h"
28 #include "nsIObserverService.h"
29 #include "nsIThread.h"
30 #include "nsThreadUtils.h"
31 #include "nsXULAppAPI.h"
32 #include "prinrval.h"
33 #include "prthread.h"
34
35 #include <algorithm>
36
37 // Activate BHR only for one every BHR_BETA_MOD users.
38 // We're doing experimentation with collecting a lot more data from BHR, and
39 // don't want to enable it for beta users at the moment. We can scale this up in
40 // the future.
41 #define BHR_BETA_MOD INT32_MAX;
42
43 // Maximum depth of the call stack in the reported thread hangs. This value
44 // represents the 99.9th percentile of the thread hangs stack depths reported by
45 // Telemetry.
46 static const size_t kMaxThreadHangStackDepth = 30;
47
48 // Interval at which we check the global and per-process CPU usage in order to
49 // determine if there is high external CPU usage.
50 static const int32_t kCheckCPUIntervalMilliseconds = 2000;
51
52 // An utility comparator function used by std::unique to collapse "(* script)"
53 // entries in a vector representing a call stack.
StackScriptEntriesCollapser(const char * aStackEntry,const char * aAnotherStackEntry)54 bool StackScriptEntriesCollapser(const char* aStackEntry,
55 const char* aAnotherStackEntry) {
56 return !strcmp(aStackEntry, aAnotherStackEntry) &&
57 (!strcmp(aStackEntry, "(chrome script)") ||
58 !strcmp(aStackEntry, "(content script)"));
59 }
60
61 namespace mozilla {
62
63 /**
64 * BackgroundHangManager is the global object that
65 * manages all instances of BackgroundHangThread.
66 */
67 class BackgroundHangManager : public nsIObserver {
68 private:
69 // Background hang monitor thread function
MonitorThread(void * aData)70 static void MonitorThread(void* aData) {
71 AUTO_PROFILER_REGISTER_THREAD("BgHangMonitor");
72 NS_SetCurrentThreadName("BHMgr Monitor");
73
74 /* We do not hold a reference to BackgroundHangManager here
75 because the monitor thread only exists as long as the
76 BackgroundHangManager instance exists. We stop the monitor
77 thread in the BackgroundHangManager destructor, and we can
78 only get to the destructor if we don't hold a reference here. */
79 static_cast<BackgroundHangManager*>(aData)->RunMonitorThread();
80 }
81
82 // Hang monitor thread
83 PRThread* mHangMonitorThread;
84 // Stop hang monitoring
85 bool mShutdown;
86
87 BackgroundHangManager(const BackgroundHangManager&);
88 BackgroundHangManager& operator=(const BackgroundHangManager&);
89 void RunMonitorThread();
90
91 public:
92 NS_DECL_THREADSAFE_ISUPPORTS
93 NS_DECL_NSIOBSERVER
94 static StaticRefPtr<BackgroundHangManager> sInstance;
95 static bool sDisabled;
96
97 // Lock for access to members of this class
98 Monitor mLock;
99 // Current time as seen by hang monitors
100 TimeStamp mNow;
101 // List of BackgroundHangThread instances associated with each thread
102 LinkedList<BackgroundHangThread> mHangThreads;
103
104 // Unwinding and reporting of hangs is despatched to this thread.
105 nsCOMPtr<nsIThread> mHangProcessingThread;
106
107 // Used for recording a permahang in case we don't ever make it back to
108 // the main thread to record/send it.
109 nsCOMPtr<nsIFile> mPermahangFile;
110
111 // Allows us to watch CPU usage and annotate hangs when the system is
112 // under high external load.
113 CPUUsageWatcher mCPUUsageWatcher;
114
Shutdown()115 void Shutdown() {
116 MonitorAutoLock autoLock(mLock);
117 mShutdown = true;
118 autoLock.Notify();
119 }
120
121 // Attempt to wakeup the hang monitor thread.
Wakeup()122 void Wakeup() {
123 mLock.AssertCurrentThreadOwns();
124 mLock.NotifyAll();
125 }
126
127 BackgroundHangManager();
128
129 private:
130 virtual ~BackgroundHangManager();
131 };
132
NS_IMPL_ISUPPORTS(BackgroundHangManager,nsIObserver)133 NS_IMPL_ISUPPORTS(BackgroundHangManager, nsIObserver)
134
135 NS_IMETHODIMP
136 BackgroundHangManager::Observe(nsISupports* aSubject, const char* aTopic,
137 const char16_t* aData) {
138 if (!strcmp(aTopic, "browser-delayed-startup-finished")) {
139 MonitorAutoLock autoLock(mLock);
140 nsresult rv = NS_GetSpecialDirectory(NS_APP_USER_PROFILE_50_DIR,
141 getter_AddRefs(mPermahangFile));
142 if (NS_SUCCEEDED(rv)) {
143 mPermahangFile->AppendNative("last_permahang.bin"_ns);
144 } else {
145 mPermahangFile = nullptr;
146 }
147
148 if (mHangProcessingThread && mPermahangFile) {
149 nsCOMPtr<nsIRunnable> submitRunnable =
150 new SubmitPersistedPermahangRunnable(mPermahangFile);
151 mHangProcessingThread->Dispatch(submitRunnable.forget());
152 }
153 nsCOMPtr<nsIObserverService> observerService =
154 mozilla::services::GetObserverService();
155 MOZ_ASSERT(observerService);
156 observerService->RemoveObserver(BackgroundHangManager::sInstance,
157 "browser-delayed-startup-finished");
158 } else if (!strcmp(aTopic, "profile-after-change")) {
159 BackgroundHangMonitor::DisableOnBeta();
160 nsCOMPtr<nsIObserverService> observerService =
161 mozilla::services::GetObserverService();
162 MOZ_ASSERT(observerService);
163 observerService->RemoveObserver(BackgroundHangManager::sInstance,
164 "profile-after-change");
165 } else {
166 return NS_ERROR_UNEXPECTED;
167 }
168
169 return NS_OK;
170 }
171
172 /**
173 * BackgroundHangThread is a per-thread object that is used
174 * by all instances of BackgroundHangMonitor to monitor hangs.
175 */
176 class BackgroundHangThread : public LinkedListElement<BackgroundHangThread> {
177 private:
178 static MOZ_THREAD_LOCAL(BackgroundHangThread*) sTlsKey;
179 static bool sTlsKeyInitialized;
180
181 BackgroundHangThread(const BackgroundHangThread&);
182 BackgroundHangThread& operator=(const BackgroundHangThread&);
183 ~BackgroundHangThread();
184
185 /* Keep a reference to the manager, so we can keep going even
186 after BackgroundHangManager::Shutdown is called. */
187 const RefPtr<BackgroundHangManager> mManager;
188 // Unique thread ID for identification
189 const PRThread* mThreadID;
190
191 void Update();
192
193 public:
194 NS_INLINE_DECL_REFCOUNTING(BackgroundHangThread)
195 /**
196 * Returns the BackgroundHangThread associated with the
197 * running thread. Note that this will not find private
198 * BackgroundHangThread threads.
199 *
200 * @return BackgroundHangThread*, or nullptr if no thread
201 * is found.
202 */
203 static BackgroundHangThread* FindThread();
204
Startup()205 static void Startup() {
206 /* We can tolerate init() failing. */
207 sTlsKeyInitialized = sTlsKey.init();
208 }
209
210 // Hang timeout
211 const TimeDuration mTimeout;
212 // PermaHang timeout
213 const TimeDuration mMaxTimeout;
214 // Time at last activity
215 TimeStamp mLastActivity;
216 // Time when a hang started
217 TimeStamp mHangStart;
218 // Is the thread in a hang
219 bool mHanging;
220 // Is the thread in a waiting state
221 bool mWaiting;
222 // Is the thread dedicated to a single BackgroundHangMonitor
223 BackgroundHangMonitor::ThreadType mThreadType;
224 #ifdef MOZ_GECKO_PROFILER
225 // Platform-specific helper to get hang stacks
226 ThreadStackHelper mStackHelper;
227 #endif
228 // Stack of current hang
229 HangStack mHangStack;
230 // Annotations for the current hang
231 BackgroundHangAnnotations mAnnotations;
232 // Annotators registered for this thread
233 BackgroundHangAnnotators mAnnotators;
234 // The name of the runnable which is hanging the current process
235 nsCString mRunnableName;
236 // The name of the thread which is being monitored
237 nsCString mThreadName;
238
239 BackgroundHangThread(const char* aName, uint32_t aTimeoutMs,
240 uint32_t aMaxTimeoutMs,
241 BackgroundHangMonitor::ThreadType aThreadType =
242 BackgroundHangMonitor::THREAD_SHARED);
243
244 // Report a hang; aManager->mLock IS locked. The hang will be processed
245 // off-main-thread, and will then be submitted back.
246 void ReportHang(TimeDuration aHangTime,
247 PersistedToDisk aPersistedToDisk = PersistedToDisk::No);
248 // Report a permanent hang; aManager->mLock IS locked
249 void ReportPermaHang();
250 // Called by BackgroundHangMonitor::NotifyActivity
NotifyActivity()251 void NotifyActivity() {
252 MonitorAutoLock autoLock(mManager->mLock);
253 Update();
254 }
255 // Called by BackgroundHangMonitor::NotifyWait
NotifyWait()256 void NotifyWait() {
257 MonitorAutoLock autoLock(mManager->mLock);
258
259 if (mWaiting) {
260 return;
261 }
262
263 Update();
264 if (mHanging) {
265 // We were hanging! We're done with that now, so let's report it.
266 // ReportHang() doesn't do much work on the current thread, and is
267 // safe to call from any thread as long as we're holding the lock.
268 ReportHang(mLastActivity - mHangStart);
269 mHanging = false;
270 }
271 mWaiting = true;
272 }
273
274 // Returns true if this thread is (or might be) shared between other
275 // BackgroundHangMonitors for the monitored thread.
IsShared()276 bool IsShared() {
277 return mThreadType == BackgroundHangMonitor::THREAD_SHARED;
278 }
279 };
280
281 StaticRefPtr<BackgroundHangManager> BackgroundHangManager::sInstance;
282 bool BackgroundHangManager::sDisabled = false;
283
284 MOZ_THREAD_LOCAL(BackgroundHangThread*) BackgroundHangThread::sTlsKey;
285 bool BackgroundHangThread::sTlsKeyInitialized;
286
BackgroundHangManager()287 BackgroundHangManager::BackgroundHangManager()
288 : mShutdown(false), mLock("BackgroundHangManager") {
289 // Lock so we don't race against the new monitor thread
290 MonitorAutoLock autoLock(mLock);
291
292 mHangMonitorThread = PR_CreateThread(
293 PR_USER_THREAD, MonitorThread, this, PR_PRIORITY_LOW, PR_GLOBAL_THREAD,
294 PR_JOINABLE_THREAD, nsIThreadManager::DEFAULT_STACK_SIZE);
295
296 MOZ_ASSERT(mHangMonitorThread, "Failed to create BHR monitor thread");
297
298 DebugOnly<nsresult> rv = NS_NewNamedThread(
299 "BHMgr Processor", getter_AddRefs(mHangProcessingThread));
300 MOZ_ASSERT(NS_SUCCEEDED(rv) && mHangProcessingThread,
301 "Failed to create BHR processing thread");
302 }
303
~BackgroundHangManager()304 BackgroundHangManager::~BackgroundHangManager() {
305 MOZ_ASSERT(mShutdown, "Destruction without Shutdown call");
306 MOZ_ASSERT(mHangThreads.isEmpty(), "Destruction with outstanding monitors");
307 MOZ_ASSERT(mHangMonitorThread, "No monitor thread");
308 MOZ_ASSERT(mHangProcessingThread, "No processing thread");
309
310 // PR_CreateThread could have failed above due to resource limitation
311 if (mHangMonitorThread) {
312 // The monitor thread can only live as long as the instance lives
313 PR_JoinThread(mHangMonitorThread);
314 }
315
316 // Similarly, NS_NewNamedThread above could have failed.
317 if (mHangProcessingThread) {
318 mHangProcessingThread->Shutdown();
319 }
320 }
321
RunMonitorThread()322 void BackgroundHangManager::RunMonitorThread() {
323 // Keep us locked except when waiting
324 MonitorAutoLock autoLock(mLock);
325
326 /* mNow is updated at various intervals determined by waitTime.
327 However, if an update latency is too long (due to CPU scheduling, system
328 sleep, etc.), we don't update mNow at all. This is done so that
329 long latencies in our timing are not detected as hangs. systemTime is
330 used to track TimeStamp::Now() and determine our latency. */
331
332 TimeStamp systemTime = TimeStamp::Now();
333 // Default values for the first iteration of thread loop
334 TimeDuration waitTime;
335 TimeDuration recheckTimeout;
336 TimeStamp lastCheckedCPUUsage = systemTime;
337 TimeDuration checkCPUUsageInterval =
338 TimeDuration::FromMilliseconds(kCheckCPUIntervalMilliseconds);
339
340 while (!mShutdown) {
341 autoLock.Wait(waitTime);
342
343 TimeStamp newTime = TimeStamp::Now();
344 TimeDuration systemInterval = newTime - systemTime;
345 systemTime = newTime;
346
347 if (systemTime - lastCheckedCPUUsage > checkCPUUsageInterval) {
348 Unused << NS_WARN_IF(mCPUUsageWatcher.CollectCPUUsage().isErr());
349 lastCheckedCPUUsage = systemTime;
350 }
351
352 /* waitTime is a quarter of the shortest timeout value; If our timing
353 latency is low enough (less than half the shortest timeout value),
354 we can update mNow. */
355 if (MOZ_LIKELY(waitTime != TimeDuration::Forever() &&
356 systemInterval < waitTime * 2)) {
357 mNow += systemInterval;
358 }
359
360 /* If it's before the next recheck timeout, and our wait did not get
361 interrupted, we can keep the current waitTime and skip iterating
362 through hang monitors. */
363 if (MOZ_LIKELY(systemInterval < recheckTimeout &&
364 systemInterval >= waitTime)) {
365 recheckTimeout -= systemInterval;
366 continue;
367 }
368
369 /* We are in one of the following scenarios,
370 - Hang or permahang recheck timeout
371 - Thread added/removed
372 - Thread wait or hang ended
373 In all cases, we want to go through our list of hang
374 monitors and update waitTime and recheckTimeout. */
375 waitTime = TimeDuration::Forever();
376 recheckTimeout = TimeDuration::Forever();
377
378 // Locally hold mNow
379 TimeStamp now = mNow;
380
381 // iterate through hang monitors
382 for (BackgroundHangThread* currentThread = mHangThreads.getFirst();
383 currentThread; currentThread = currentThread->getNext()) {
384 if (currentThread->mWaiting) {
385 // Thread is waiting, not hanging
386 continue;
387 }
388 TimeStamp lastActivity = currentThread->mLastActivity;
389 TimeDuration hangTime = now - lastActivity;
390 if (MOZ_UNLIKELY(hangTime >= currentThread->mMaxTimeout)) {
391 // A permahang started
392 // Skip subsequent iterations and tolerate a race on mWaiting here
393 currentThread->mWaiting = true;
394 currentThread->mHanging = false;
395 currentThread->ReportPermaHang();
396 continue;
397 }
398
399 if (MOZ_LIKELY(!currentThread->mHanging)) {
400 if (MOZ_UNLIKELY(hangTime >= currentThread->mTimeout)) {
401 #ifdef MOZ_GECKO_PROFILER
402 // A hang started, collect a stack
403 currentThread->mStackHelper.GetStack(
404 currentThread->mHangStack, currentThread->mRunnableName, true);
405 #endif
406
407 // If we hang immediately on waking, then the most recently collected
408 // CPU usage is going to be an average across the whole time we were
409 // sleeping. Accordingly, we want to make sure that when we hang, we
410 // collect a fresh value.
411 if (systemTime != lastCheckedCPUUsage) {
412 Unused << NS_WARN_IF(mCPUUsageWatcher.CollectCPUUsage().isErr());
413 lastCheckedCPUUsage = systemTime;
414 }
415
416 currentThread->mHangStart = lastActivity;
417 currentThread->mHanging = true;
418 currentThread->mAnnotations =
419 currentThread->mAnnotators.GatherAnnotations();
420 }
421 } else {
422 if (MOZ_LIKELY(lastActivity != currentThread->mHangStart)) {
423 // A hang ended
424 currentThread->ReportHang(now - currentThread->mHangStart);
425 currentThread->mHanging = false;
426 }
427 }
428
429 /* If we are hanging, the next time we check for hang status is when
430 the hang turns into a permahang. If we're not hanging, the next
431 recheck timeout is when we may be entering a hang. */
432 TimeDuration nextRecheck;
433 if (currentThread->mHanging) {
434 nextRecheck = currentThread->mMaxTimeout;
435 } else {
436 nextRecheck = currentThread->mTimeout;
437 }
438 recheckTimeout =
439 TimeDuration::Min(recheckTimeout, nextRecheck - hangTime);
440
441 if (currentThread->mTimeout != TimeDuration::Forever()) {
442 /* We wait for a quarter of the shortest timeout
443 value to give mNow enough granularity. */
444 waitTime =
445 TimeDuration::Min(waitTime, currentThread->mTimeout / (int64_t)4);
446 }
447 }
448 }
449
450 /* We are shutting down now.
451 Wait for all outstanding monitors to unregister. */
452 while (!mHangThreads.isEmpty()) {
453 autoLock.Wait();
454 }
455 }
456
BackgroundHangThread(const char * aName,uint32_t aTimeoutMs,uint32_t aMaxTimeoutMs,BackgroundHangMonitor::ThreadType aThreadType)457 BackgroundHangThread::BackgroundHangThread(
458 const char* aName, uint32_t aTimeoutMs, uint32_t aMaxTimeoutMs,
459 BackgroundHangMonitor::ThreadType aThreadType)
460 : mManager(BackgroundHangManager::sInstance),
461 mThreadID(PR_GetCurrentThread()),
462 mTimeout(aTimeoutMs == BackgroundHangMonitor::kNoTimeout
463 ? TimeDuration::Forever()
464 : TimeDuration::FromMilliseconds(aTimeoutMs)),
465 mMaxTimeout(aMaxTimeoutMs == BackgroundHangMonitor::kNoTimeout
466 ? TimeDuration::Forever()
467 : TimeDuration::FromMilliseconds(aMaxTimeoutMs)),
468 mLastActivity(mManager->mNow),
469 mHangStart(mLastActivity),
470 mHanging(false),
471 mWaiting(true),
472 mThreadType(aThreadType),
473 mThreadName(aName) {
474 if (sTlsKeyInitialized && IsShared()) {
475 sTlsKey.set(this);
476 }
477 // Lock here because LinkedList is not thread-safe
478 MonitorAutoLock autoLock(mManager->mLock);
479 // Add to thread list
480 mManager->mHangThreads.insertBack(this);
481 // Wake up monitor thread to process new thread
482 autoLock.Notify();
483 }
484
~BackgroundHangThread()485 BackgroundHangThread::~BackgroundHangThread() {
486 // Lock here because LinkedList is not thread-safe
487 MonitorAutoLock autoLock(mManager->mLock);
488 // Remove from thread list
489 remove();
490 // Wake up monitor thread to process removed thread
491 autoLock.Notify();
492
493 // We no longer have a thread
494 if (sTlsKeyInitialized && IsShared()) {
495 sTlsKey.set(nullptr);
496 }
497 }
498
ReportHang(TimeDuration aHangTime,PersistedToDisk aPersistedToDisk)499 void BackgroundHangThread::ReportHang(TimeDuration aHangTime,
500 PersistedToDisk aPersistedToDisk) {
501 // Recovered from a hang; called on the monitor thread
502 // mManager->mLock IS locked
503
504 HangDetails hangDetails(aHangTime,
505 nsDependentCString(XRE_GetProcessTypeString()),
506 NOT_REMOTE_TYPE, mThreadName, mRunnableName,
507 std::move(mHangStack), std::move(mAnnotations));
508
509 PersistedToDisk persistedToDisk = aPersistedToDisk;
510 if (aPersistedToDisk == PersistedToDisk::Yes && XRE_IsParentProcess() &&
511 mManager->mPermahangFile) {
512 auto res = WriteHangDetailsToFile(hangDetails, mManager->mPermahangFile);
513 persistedToDisk = res.isOk() ? PersistedToDisk::Yes : PersistedToDisk::No;
514 }
515
516 // If the hang processing thread exists, we can process the native stack
517 // on it. Otherwise, we are unable to report a native stack, so we just
518 // report without one.
519 if (mManager->mHangProcessingThread) {
520 nsCOMPtr<nsIRunnable> processHangStackRunnable =
521 new ProcessHangStackRunnable(std::move(hangDetails), persistedToDisk);
522 mManager->mHangProcessingThread->Dispatch(
523 processHangStackRunnable.forget());
524 } else {
525 NS_WARNING("Unable to report native stack without a BHR processing thread");
526 RefPtr<nsHangDetails> hd =
527 new nsHangDetails(std::move(hangDetails), persistedToDisk);
528 hd->Submit();
529 }
530
531 // If the profiler is enabled, add a marker.
532 #ifdef MOZ_GECKO_PROFILER
533 if (profiler_can_accept_markers()) {
534 struct HangMarker {
535 static constexpr Span<const char> MarkerTypeName() {
536 return MakeStringSpan("BHR-detected hang");
537 }
538 static void StreamJSONMarkerData(
539 baseprofiler::SpliceableJSONWriter& aWriter) {}
540 static MarkerSchema MarkerTypeDisplay() {
541 using MS = MarkerSchema;
542 MS schema{MS::Location::markerChart, MS::Location::markerTable};
543 return schema;
544 }
545 };
546
547 const TimeStamp endTime = TimeStamp::NowUnfuzzed();
548 const TimeStamp startTime = endTime - aHangTime;
549 profiler_add_marker("BHR-detected hang", geckoprofiler::category::OTHER,
550 {MarkerThreadId(mStackHelper.GetThreadId()),
551 MarkerTiming::Interval(startTime, endTime)},
552 HangMarker{});
553 }
554 #endif
555 }
556
ReportPermaHang()557 void BackgroundHangThread::ReportPermaHang() {
558 // Permanently hanged; called on the monitor thread
559 // mManager->mLock IS locked
560
561 // The significance of a permahang is that it's likely that we won't ever
562 // recover and be allowed to submit this hang. On the parent thread, we
563 // compensate for this by writing the hang details to disk on this thread,
564 // and in our next session we'll try to read those details
565 ReportHang(mMaxTimeout, PersistedToDisk::Yes);
566 }
567
Update()568 MOZ_ALWAYS_INLINE void BackgroundHangThread::Update() {
569 TimeStamp now = mManager->mNow;
570 if (mWaiting) {
571 mLastActivity = now;
572 mWaiting = false;
573 /* We have to wake up the manager thread because when all threads
574 are waiting, the manager thread waits indefinitely as well. */
575 mManager->Wakeup();
576 } else {
577 TimeDuration duration = now - mLastActivity;
578 if (MOZ_UNLIKELY(duration >= mTimeout)) {
579 /* Wake up the manager thread to tell it that a hang ended */
580 mManager->Wakeup();
581 }
582 mLastActivity = now;
583 }
584 }
585
FindThread()586 BackgroundHangThread* BackgroundHangThread::FindThread() {
587 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
588 if (BackgroundHangManager::sInstance == nullptr) {
589 MOZ_ASSERT(BackgroundHangManager::sDisabled,
590 "BackgroundHandleManager is not initialized");
591 return nullptr;
592 }
593
594 if (sTlsKeyInitialized) {
595 // Use TLS if available
596 return sTlsKey.get();
597 }
598 // If TLS is unavailable, we can search through the thread list
599 RefPtr<BackgroundHangManager> manager(BackgroundHangManager::sInstance);
600 MOZ_ASSERT(manager, "Creating BackgroundHangMonitor after shutdown");
601
602 PRThread* threadID = PR_GetCurrentThread();
603 // Lock thread list for traversal
604 MonitorAutoLock autoLock(manager->mLock);
605 for (BackgroundHangThread* thread = manager->mHangThreads.getFirst(); thread;
606 thread = thread->getNext()) {
607 if (thread->mThreadID == threadID && thread->IsShared()) {
608 return thread;
609 }
610 }
611 #endif
612 // Current thread is not initialized
613 return nullptr;
614 }
615
ShouldDisableOnBeta(const nsCString & clientID)616 bool BackgroundHangMonitor::ShouldDisableOnBeta(const nsCString& clientID) {
617 MOZ_ASSERT(clientID.Length() == 36, "clientID is invalid");
618 const char* suffix = clientID.get() + clientID.Length() - 4;
619 return strtol(suffix, NULL, 16) % BHR_BETA_MOD;
620 }
621
DisableOnBeta()622 bool BackgroundHangMonitor::DisableOnBeta() {
623 nsAutoCString clientID;
624 nsresult rv =
625 Preferences::GetCString("toolkit.telemetry.cachedClientID", clientID);
626 bool telemetryEnabled = Telemetry::CanRecordPrereleaseData();
627
628 if (!telemetryEnabled || NS_FAILED(rv) ||
629 BackgroundHangMonitor::ShouldDisableOnBeta(clientID)) {
630 if (XRE_IsParentProcess()) {
631 BackgroundHangMonitor::Shutdown();
632 } else {
633 BackgroundHangManager::sDisabled = true;
634 }
635 return true;
636 }
637
638 return false;
639 }
640
Startup()641 void BackgroundHangMonitor::Startup() {
642 MOZ_RELEASE_ASSERT(NS_IsMainThread());
643 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
644 MOZ_ASSERT(!BackgroundHangManager::sInstance, "Already initialized");
645
646 if (XRE_IsContentProcess() &&
647 StaticPrefs::toolkit_content_background_hang_monitor_disabled()) {
648 BackgroundHangManager::sDisabled = true;
649 return;
650 }
651
652 nsCOMPtr<nsIObserverService> observerService =
653 mozilla::services::GetObserverService();
654 MOZ_ASSERT(observerService);
655
656 if (!strcmp(MOZ_STRINGIFY(MOZ_UPDATE_CHANNEL), "beta")) {
657 if (XRE_IsParentProcess()) { // cached ClientID hasn't been read yet
658 BackgroundHangThread::Startup();
659 BackgroundHangManager::sInstance = new BackgroundHangManager();
660 Unused << NS_WARN_IF(
661 BackgroundHangManager::sInstance->mCPUUsageWatcher.Init().isErr());
662 observerService->AddObserver(BackgroundHangManager::sInstance,
663 "profile-after-change", false);
664 return;
665 } else if (DisableOnBeta()) {
666 return;
667 }
668 }
669
670 BackgroundHangThread::Startup();
671 BackgroundHangManager::sInstance = new BackgroundHangManager();
672 Unused << NS_WARN_IF(
673 BackgroundHangManager::sInstance->mCPUUsageWatcher.Init().isErr());
674 if (XRE_IsParentProcess()) {
675 observerService->AddObserver(BackgroundHangManager::sInstance,
676 "browser-delayed-startup-finished", false);
677 }
678 #endif
679 }
680
Shutdown()681 void BackgroundHangMonitor::Shutdown() {
682 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
683 if (BackgroundHangManager::sDisabled) {
684 MOZ_ASSERT(!BackgroundHangManager::sInstance, "Initialized");
685 return;
686 }
687
688 MOZ_ASSERT(BackgroundHangManager::sInstance, "Not initialized");
689 BackgroundHangManager::sInstance->mCPUUsageWatcher.Uninit();
690 /* Scope our lock inside Shutdown() because the sInstance object can
691 be destroyed as soon as we set sInstance to nullptr below, and
692 we don't want to hold the lock when it's being destroyed. */
693 BackgroundHangManager::sInstance->Shutdown();
694 BackgroundHangManager::sInstance = nullptr;
695 BackgroundHangManager::sDisabled = true;
696 #endif
697 }
698
BackgroundHangMonitor(const char * aName,uint32_t aTimeoutMs,uint32_t aMaxTimeoutMs,ThreadType aThreadType)699 BackgroundHangMonitor::BackgroundHangMonitor(const char* aName,
700 uint32_t aTimeoutMs,
701 uint32_t aMaxTimeoutMs,
702 ThreadType aThreadType)
703 : mThread(aThreadType == THREAD_SHARED ? BackgroundHangThread::FindThread()
704 : nullptr) {
705 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
706 # ifdef MOZ_VALGRIND
707 // If we're running on Valgrind, we'll be making forward progress at a
708 // rate of somewhere between 1/25th and 1/50th of normal. This causes the
709 // BHR to capture a lot of stacks, which slows us down even more. As an
710 // attempt to avoid the worst of this, scale up all presented timeouts by
711 // a factor of thirty, and add six seconds so as to impose a six second
712 // floor on all timeouts. For a non-Valgrind-enabled build, or for an
713 // enabled build which isn't running on Valgrind, the timeouts are
714 // unchanged.
715 if (RUNNING_ON_VALGRIND) {
716 const uint32_t scaleUp = 30;
717 const uint32_t extraMs = 6000;
718 if (aTimeoutMs != BackgroundHangMonitor::kNoTimeout) {
719 aTimeoutMs *= scaleUp;
720 aTimeoutMs += extraMs;
721 }
722 if (aMaxTimeoutMs != BackgroundHangMonitor::kNoTimeout) {
723 aMaxTimeoutMs *= scaleUp;
724 aMaxTimeoutMs += extraMs;
725 }
726 }
727 # endif
728
729 if (!BackgroundHangManager::sDisabled && !mThread) {
730 mThread =
731 new BackgroundHangThread(aName, aTimeoutMs, aMaxTimeoutMs, aThreadType);
732 }
733 #endif
734 }
735
BackgroundHangMonitor()736 BackgroundHangMonitor::BackgroundHangMonitor()
737 : mThread(BackgroundHangThread::FindThread()) {
738 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
739 if (BackgroundHangManager::sDisabled) {
740 return;
741 }
742 #endif
743 }
744
745 BackgroundHangMonitor::~BackgroundHangMonitor() = default;
746
NotifyActivity()747 void BackgroundHangMonitor::NotifyActivity() {
748 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
749 if (mThread == nullptr) {
750 MOZ_ASSERT(BackgroundHangManager::sDisabled,
751 "This thread is not initialized for hang monitoring");
752 return;
753 }
754
755 if (Telemetry::CanRecordExtended()) {
756 mThread->NotifyActivity();
757 }
758 #endif
759 }
760
NotifyWait()761 void BackgroundHangMonitor::NotifyWait() {
762 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
763 if (mThread == nullptr) {
764 MOZ_ASSERT(BackgroundHangManager::sDisabled,
765 "This thread is not initialized for hang monitoring");
766 return;
767 }
768
769 if (Telemetry::CanRecordExtended()) {
770 mThread->NotifyWait();
771 }
772 #endif
773 }
774
RegisterAnnotator(BackgroundHangAnnotator & aAnnotator)775 bool BackgroundHangMonitor::RegisterAnnotator(
776 BackgroundHangAnnotator& aAnnotator) {
777 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
778 BackgroundHangThread* thisThread = BackgroundHangThread::FindThread();
779 if (!thisThread) {
780 return false;
781 }
782 return thisThread->mAnnotators.Register(aAnnotator);
783 #else
784 return false;
785 #endif
786 }
787
UnregisterAnnotator(BackgroundHangAnnotator & aAnnotator)788 bool BackgroundHangMonitor::UnregisterAnnotator(
789 BackgroundHangAnnotator& aAnnotator) {
790 #ifdef MOZ_ENABLE_BACKGROUND_HANG_MONITOR
791 BackgroundHangThread* thisThread = BackgroundHangThread::FindThread();
792 if (!thisThread) {
793 return false;
794 }
795 return thisThread->mAnnotators.Unregister(aAnnotator);
796 #else
797 return false;
798 #endif
799 }
800
801 } // namespace mozilla
802