1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4  * License, v. 2.0. If a copy of the MPL was not distributed with this
5  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6 
7 // PHC is a probabilistic heap checker. A tiny fraction of randomly chosen heap
8 // allocations are subject to some expensive checking via the use of OS page
9 // access protection. A failed check triggers a crash, whereupon useful
10 // information about the failure is put into the crash report. The cost and
11 // coverage for each user is minimal, but spread over the entire user base the
12 // coverage becomes significant.
13 //
14 // The idea comes from Chromium, where it is called GWP-ASAN. (Firefox uses PHC
15 // as the name because GWP-ASAN is long, awkward, and doesn't have any
16 // particular meaning.)
17 //
18 // In the current implementation up to 64 allocations per process can become
19 // PHC allocations. These allocations must be page-sized or smaller. Each PHC
20 // allocation gets its own page, and when the allocation is freed its page is
21 // marked inaccessible until the page is reused for another allocation. This
22 // means that a use-after-free defect (which includes double-frees) will be
23 // caught if the use occurs before the page is reused for another allocation.
24 // The crash report will contain stack traces for the allocation site, the free
25 // site, and the use-after-free site, which is often enough to diagnose the
26 // defect.
27 //
28 // Also, each PHC allocation is followed by a guard page. The PHC allocation is
29 // positioned so that its end abuts the guard page (or as close as possible,
30 // given alignment constraints). This means that a bounds violation at the end
31 // of the allocation (overflow) will be caught. The crash report will contain
32 // stack traces for the allocation site and the bounds violation use site,
33 // which is often enough to diagnose the defect.
34 //
35 // (A bounds violation at the start of the allocation (underflow) will not be
36 // caught, unless it is sufficiently large to hit the preceding allocation's
37 // guard page, which is not that likely. It would be possible to look more
38 // assiduously for underflow by randomly placing some allocations at the end of
39 // the page and some at the start of the page, and GWP-ASAN does this. PHC does
40 // not, however, because overflow is likely to be much more common than
41 // underflow in practice.)
42 //
43 // We use a simple heuristic to categorize a guard page access as overflow or
44 // underflow: if the address falls in the lower half of the guard page, we
45 // assume it is overflow, otherwise we assume it is underflow. More
46 // sophisticated heuristics are possible, but this one is very simple, and it is
47 // likely that most overflows/underflows in practice are very close to the page
48 // boundary.
49 //
50 // The design space for the randomization strategy is large. The current
51 // implementation has a large random delay before it starts operating, and a
52 // small random delay between each PHC allocation attempt. Each freed PHC
53 // allocation is quarantined for a medium random delay before being reused, in
54 // order to increase the chance of catching UAFs.
55 //
56 // The basic cost of PHC's operation is as follows.
57 //
58 // - The physical memory cost is 64 * 4 KiB = 256 KiB per process (assuming 4
59 //   KiB pages) plus some metadata (including stack traces) for each page.
60 //
61 // - The virtual memory cost is the physical memory cost plus the guard pages:
62 //   another 64 * 4 KiB = 256 KiB per process. PHC is currently only enabled on
63 //   64-bit platforms so the impact of the virtual memory usage is negligible.
64 //
65 // - Every allocation requires a size check and a decrement-and-check of an
66 //   atomic counter. When the counter reaches zero a PHC allocation can occur,
67 //   which involves marking a page as accessible and getting a stack trace for
68 //   the allocation site. Otherwise, mozjemalloc performs the allocation.
69 //
70 // - Every deallocation requires a range check on the pointer to see if it
71 //   involves a PHC allocation. (The choice to only do PHC allocations that are
72 //   a page or smaller enables this range check, because the 64 pages are
73 //   contiguous. Allowing larger allocations would make this more complicated,
74 //   and we definitely don't want something as slow as a hash table lookup on
75 //   every deallocation.) PHC deallocations involve marking a page as
76 //   inaccessible and getting a stack trace for the deallocation site.
77 //
78 // Note that calls to realloc(), free(), and malloc_usable_size() will
79 // immediately crash if the given pointer falls within a page allocation's
80 // page, but does not point to the start of the allocation itself.
81 //
82 //   void* p = malloc(64);
83 //   free(p + 1);     // p+1 doesn't point to the allocation start; crash
84 //
85 // Such crashes will not have the PHC fields in the crash report.
86 //
87 // PHC-specific tests can be run with the following commands:
88 // - gtests: `./mach gtest '*PHC*'`
89 // - xpcshell-tests: `./mach test toolkit/crashreporter/test/unit`
90 //   - This runs some non-PHC tests as well.
91 
92 #include "PHC.h"
93 
94 #include <stdlib.h>
95 #include <time.h>
96 
97 #include <algorithm>
98 
99 #ifdef XP_WIN
100 #  include <process.h>
101 #else
102 #  include <sys/mman.h>
103 #  include <sys/types.h>
104 #  include <pthread.h>
105 #  include <unistd.h>
106 #endif
107 
108 #include "replace_malloc.h"
109 #include "FdPrintf.h"
110 #include "Mutex.h"
111 #include "mozilla/Assertions.h"
112 #include "mozilla/Atomics.h"
113 #include "mozilla/Attributes.h"
114 #include "mozilla/CheckedInt.h"
115 #include "mozilla/Maybe.h"
116 #include "mozilla/StackWalk.h"
117 #include "mozilla/ThreadLocal.h"
118 #include "mozilla/XorShift128PlusRNG.h"
119 
120 using namespace mozilla;
121 
122 //---------------------------------------------------------------------------
123 // Utilities
124 //---------------------------------------------------------------------------
125 
126 #ifdef ANDROID
127 // Android doesn't have pthread_atfork defined in pthread.h.
128 extern "C" MOZ_EXPORT int pthread_atfork(void (*)(void), void (*)(void),
129                                          void (*)(void));
130 #endif
131 
132 #ifndef DISALLOW_COPY_AND_ASSIGN
133 #  define DISALLOW_COPY_AND_ASSIGN(T) \
134     T(const T&);                      \
135     void operator=(const T&)
136 #endif
137 
138 static malloc_table_t sMallocTable;
139 
140 // This class provides infallible operations for the small number of heap
141 // allocations that PHC does for itself. It would be nice if we could use the
142 // InfallibleAllocPolicy from mozalloc, but PHC cannot use mozalloc.
143 class InfallibleAllocPolicy {
144  public:
AbortOnFailure(const void * aP)145   static void AbortOnFailure(const void* aP) {
146     if (!aP) {
147       MOZ_CRASH("PHC failed to allocate");
148     }
149   }
150 
151   template <class T>
new_()152   static T* new_() {
153     void* p = sMallocTable.malloc(sizeof(T));
154     AbortOnFailure(p);
155     return new (p) T;
156   }
157 };
158 
159 //---------------------------------------------------------------------------
160 // Stack traces
161 //---------------------------------------------------------------------------
162 
163 // This code is similar to the equivalent code within DMD.
164 
165 class StackTrace : public phc::StackTrace {
166  public:
StackTrace()167   StackTrace() : phc::StackTrace() {}
168 
Clear()169   void Clear() { mLength = 0; }
170 
171   void Fill();
172 
173  private:
StackWalkCallback(uint32_t aFrameNumber,void * aPc,void * aSp,void * aClosure)174   static void StackWalkCallback(uint32_t aFrameNumber, void* aPc, void* aSp,
175                                 void* aClosure) {
176     StackTrace* st = (StackTrace*)aClosure;
177     MOZ_ASSERT(st->mLength < kMaxFrames);
178     st->mPcs[st->mLength] = aPc;
179     st->mLength++;
180     MOZ_ASSERT(st->mLength == aFrameNumber);
181   }
182 };
183 
184 // WARNING WARNING WARNING: this function must only be called when GMut::sMutex
185 // is *not* locked, otherwise we might get deadlocks.
186 //
187 // How? On Windows, MozStackWalk() can lock a mutex, M, from the shared library
188 // loader. Another thread might call malloc() while holding M locked (when
189 // loading a shared library) and try to lock GMut::sMutex, causing a deadlock.
190 // So GMut::sMutex can't be locked during the call to MozStackWalk(). (For
191 // details, see https://bugzilla.mozilla.org/show_bug.cgi?id=374829#c8. On
192 // Linux, something similar can happen; see bug 824340. So we just disallow it
193 // on all platforms.)
194 //
195 // In DMD, to avoid this problem we temporarily unlock the equivalent mutex for
196 // the MozStackWalk() call. But that's grotty, and things are a bit different
197 // here, so we just require that stack traces be obtained before locking
198 // GMut::sMutex.
199 //
200 // Unfortunately, there is no reliable way at compile-time or run-time to ensure
201 // this pre-condition. Hence this large comment.
202 //
Fill()203 void StackTrace::Fill() {
204   mLength = 0;
205 
206 #if defined(XP_WIN) && defined(_M_IX86)
207   // This avoids MozStackWalk(), which causes unusably slow startup on Win32
208   // when it is called during static initialization (see bug 1241684).
209   //
210   // This code is cribbed from the Gecko Profiler, which also uses
211   // FramePointerStackWalk() on Win32: Registers::SyncPopulate() for the
212   // frame pointer, and GetStackTop() for the stack end.
213   CONTEXT context;
214   RtlCaptureContext(&context);
215   void** fp = reinterpret_cast<void**>(context.Ebp);
216 
217   PNT_TIB pTib = reinterpret_cast<PNT_TIB>(NtCurrentTeb());
218   void* stackEnd = static_cast<void*>(pTib->StackBase);
219   FramePointerStackWalk(StackWalkCallback, /* aSkipFrames = */ 0, kMaxFrames,
220                         this, fp, stackEnd);
221 #elif defined(XP_MACOSX)
222   // This avoids MozStackWalk(), which has become unusably slow on Mac due to
223   // changes in libunwind.
224   //
225   // This code is cribbed from the Gecko Profiler, which also uses
226   // FramePointerStackWalk() on Mac: Registers::SyncPopulate() for the frame
227   // pointer, and GetStackTop() for the stack end.
228   void** fp;
229   asm(
230       // Dereference %rbp to get previous %rbp
231       "movq (%%rbp), %0\n\t"
232       : "=r"(fp));
233   void* stackEnd = pthread_get_stackaddr_np(pthread_self());
234   FramePointerStackWalk(StackWalkCallback, /* skipFrames = */ 0, kMaxFrames,
235                         this, fp, stackEnd);
236 #else
237   MozStackWalk(StackWalkCallback, /* aSkipFrames = */ 0, kMaxFrames, this);
238 #endif
239 }
240 
241 //---------------------------------------------------------------------------
242 // Logging
243 //---------------------------------------------------------------------------
244 
245 // Change this to 1 to enable some PHC logging. Useful for debugging.
246 #define PHC_LOGGING 0
247 
248 #if PHC_LOGGING
249 
GetPid()250 static size_t GetPid() { return size_t(getpid()); }
251 
GetTid()252 static size_t GetTid() {
253 #  if defined(XP_WIN)
254   return size_t(GetCurrentThreadId());
255 #  else
256   return size_t(pthread_self());
257 #  endif
258 }
259 
260 #  if defined(XP_WIN)
261 #    define LOG_STDERR \
262       reinterpret_cast<intptr_t>(GetStdHandle(STD_ERROR_HANDLE))
263 #  else
264 #    define LOG_STDERR 2
265 #  endif
266 #  define LOG(fmt, ...)                                                \
267     FdPrintf(LOG_STDERR, "PHC[%zu,%zu,~%zu] " fmt, GetPid(), GetTid(), \
268              size_t(GAtomic::Now()), __VA_ARGS__)
269 
270 #else
271 
272 #  define LOG(fmt, ...)
273 
274 #endif  // PHC_LOGGING
275 
276 //---------------------------------------------------------------------------
277 // Global state
278 //---------------------------------------------------------------------------
279 
280 // Throughout this entire file time is measured as the number of sub-page
281 // allocations performed (by PHC and mozjemalloc combined). `Time` is 64-bit
282 // because we could have more than 2**32 allocations in a long-running session.
283 // `Delay` is 32-bit because the delays used within PHC are always much smaller
284 // than 2**32.
285 using Time = uint64_t;   // A moment in time.
286 using Delay = uint32_t;  // A time duration.
287 
288 // PHC only runs if the page size is 4 KiB; anything more is uncommon and would
289 // use too much memory. So we hardwire this size.
290 static const size_t kPageSize = 4096;
291 
292 // There are two kinds of page.
293 // - Allocation pages, from which allocations are made.
294 // - Guard pages, which are never touched by PHC.
295 //
296 // These page kinds are interleaved; each allocation page has a guard page on
297 // either side.
298 static const size_t kNumAllocPages = 64;
299 static const size_t kNumAllPages = kNumAllocPages * 2 + 1;
300 
301 // The total size of the allocation pages and guard pages.
302 static const size_t kAllPagesSize = kNumAllPages * kPageSize;
303 
304 // The junk value used to fill new allocation in debug builds. It's same value
305 // as the one used by mozjemalloc. PHC applies it unconditionally in debug
306 // builds. Unlike mozjemalloc, PHC doesn't consult the MALLOC_OPTIONS
307 // environment variable to possibly change that behaviour.
308 //
309 // Also note that, unlike mozjemalloc, PHC doesn't have a poison value for freed
310 // allocations because freed allocations are protected by OS page protection.
311 const uint8_t kAllocJunk = 0xe4;
312 
313 // The maximum time.
314 static const Time kMaxTime = ~(Time(0));
315 
316 // The average delay before doing any page allocations at the start of a
317 // process. Note that roughly 1 million allocations occur in the main process
318 // while starting the browser. The delay range is 1..kAvgFirstAllocDelay*2.
319 static const Delay kAvgFirstAllocDelay = 512 * 1024;
320 
321 // The average delay until the next attempted page allocation, once we get past
322 // the first delay. The delay range is 1..kAvgAllocDelay*2.
323 static const Delay kAvgAllocDelay = 16 * 1024;
324 
325 // The average delay before reusing a freed page. Should be significantly larger
326 // than kAvgAllocDelay, otherwise there's not much point in having it. The delay
327 // range is (kAvgAllocDelay / 2)..(kAvgAllocDelay / 2 * 3). This is different to
328 // the other delay ranges in not having a minimum of 1, because that's such a
329 // short delay that there is a high likelihood of bad stacks in any crash
330 // report.
331 static const Delay kAvgPageReuseDelay = 256 * 1024;
332 
333 // Truncate aRnd to the range (1 .. AvgDelay*2). If aRnd is random, this
334 // results in an average value of aAvgDelay + 0.5, which is close enough to
335 // aAvgDelay. aAvgDelay must be a power-of-two (otherwise it will crash) for
336 // speed.
337 template <Delay AvgDelay>
Rnd64ToDelay(uint64_t aRnd)338 constexpr Delay Rnd64ToDelay(uint64_t aRnd) {
339   static_assert(IsPowerOfTwo(AvgDelay), "must be a power of two");
340 
341   return aRnd % (AvgDelay * 2) + 1;
342 }
343 
344 // Maps a pointer to a PHC-specific structure:
345 // - Nothing
346 // - A guard page (it is unspecified which one)
347 // - An allocation page (with an index < kNumAllocPages)
348 //
349 // The standard way of handling a PtrKind is to check IsNothing(), and if that
350 // fails, to check IsGuardPage(), and if that fails, to call AllocPage().
351 class PtrKind {
352  private:
353   enum class Tag : uint8_t {
354     Nothing,
355     GuardPage,
356     AllocPage,
357   };
358 
359   Tag mTag;
360   uintptr_t mIndex;  // Only used if mTag == Tag::AllocPage.
361 
362  public:
363   // Detect what a pointer points to. This constructor must be fast because it
364   // is called for every call to free(), realloc(), malloc_usable_size(), and
365   // jemalloc_ptr_info().
PtrKind(const void * aPtr,const uint8_t * aPagesStart,const uint8_t * aPagesLimit)366   PtrKind(const void* aPtr, const uint8_t* aPagesStart,
367           const uint8_t* aPagesLimit) {
368     if (!(aPagesStart <= aPtr && aPtr < aPagesLimit)) {
369       mTag = Tag::Nothing;
370     } else {
371       uintptr_t offset = static_cast<const uint8_t*>(aPtr) - aPagesStart;
372       uintptr_t allPageIndex = offset / kPageSize;
373       MOZ_ASSERT(allPageIndex < kNumAllPages);
374       if (allPageIndex & 1) {
375         // Odd-indexed pages are allocation pages.
376         uintptr_t allocPageIndex = allPageIndex / 2;
377         MOZ_ASSERT(allocPageIndex < kNumAllocPages);
378         mTag = Tag::AllocPage;
379         mIndex = allocPageIndex;
380       } else {
381         // Even-numbered pages are guard pages.
382         mTag = Tag::GuardPage;
383       }
384     }
385   }
386 
IsNothing() const387   bool IsNothing() const { return mTag == Tag::Nothing; }
IsGuardPage() const388   bool IsGuardPage() const { return mTag == Tag::GuardPage; }
389 
390   // This should only be called after IsNothing() and IsGuardPage() have been
391   // checked and failed.
AllocPageIndex() const392   uintptr_t AllocPageIndex() const {
393     MOZ_RELEASE_ASSERT(mTag == Tag::AllocPage);
394     return mIndex;
395   }
396 };
397 
398 // Shared, atomic, mutable global state.
399 class GAtomic {
400  public:
Init(Delay aFirstDelay)401   static void Init(Delay aFirstDelay) {
402     sAllocDelay = aFirstDelay;
403 
404     LOG("Initial sAllocDelay <- %zu\n", size_t(aFirstDelay));
405   }
406 
Now()407   static Time Now() { return sNow; }
408 
IncrementNow()409   static void IncrementNow() { sNow++; }
410 
411   // Decrements the delay and returns the decremented value.
DecrementDelay()412   static int32_t DecrementDelay() { return --sAllocDelay; }
413 
SetAllocDelay(Delay aAllocDelay)414   static void SetAllocDelay(Delay aAllocDelay) { sAllocDelay = aAllocDelay; }
415 
416  private:
417   // The current time. Relaxed semantics because it's primarily used for
418   // determining if an allocation can be recycled yet and therefore it doesn't
419   // need to be exact.
420   static Atomic<Time, Relaxed> sNow;
421 
422   // Delay until the next attempt at a page allocation. See the comment in
423   // MaybePageAlloc() for an explanation of why it is a signed integer, and why
424   // it uses ReleaseAcquire semantics.
425   static Atomic<Delay, ReleaseAcquire> sAllocDelay;
426 };
427 
428 Atomic<Time, Relaxed> GAtomic::sNow;
429 Atomic<Delay, ReleaseAcquire> GAtomic::sAllocDelay;
430 
431 // Shared, immutable global state. Initialized by replace_init() and never
432 // changed after that. replace_init() runs early enough that no synchronization
433 // is needed.
434 class GConst {
435  private:
436   // The bounds of the allocated pages.
437   uint8_t* const mPagesStart;
438   uint8_t* const mPagesLimit;
439 
440   // Allocates the allocation pages and the guard pages, contiguously.
AllocAllPages()441   uint8_t* AllocAllPages() {
442     // Allocate the pages so that they are inaccessible. They are never freed,
443     // because it would happen at process termination when it would be of little
444     // use.
445     void* pages =
446 #ifdef XP_WIN
447         VirtualAlloc(nullptr, kAllPagesSize, MEM_RESERVE, PAGE_NOACCESS);
448 #else
449         mmap(nullptr, kAllPagesSize, PROT_NONE, MAP_ANONYMOUS | MAP_PRIVATE, -1,
450              0);
451 #endif
452     if (!pages) {
453       MOZ_CRASH();
454     }
455 
456     return static_cast<uint8_t*>(pages);
457   }
458 
459  public:
GConst()460   GConst()
461       : mPagesStart(AllocAllPages()), mPagesLimit(mPagesStart + kAllPagesSize) {
462     LOG("AllocAllPages at %p..%p\n", mPagesStart, mPagesLimit);
463   }
464 
PtrKind(const void * aPtr)465   class PtrKind PtrKind(const void* aPtr) {
466     class PtrKind pk(aPtr, mPagesStart, mPagesLimit);
467     return pk;
468   }
469 
IsInFirstGuardPage(const void * aPtr)470   bool IsInFirstGuardPage(const void* aPtr) {
471     return mPagesStart <= aPtr && aPtr < mPagesStart + kPageSize;
472   }
473 
474   // Get the address of the allocation page referred to via an index. Used when
475   // marking the page as accessible/inaccessible.
AllocPagePtr(uintptr_t aIndex)476   uint8_t* AllocPagePtr(uintptr_t aIndex) {
477     MOZ_ASSERT(aIndex < kNumAllocPages);
478     // Multiply by two and add one to account for allocation pages *and* guard
479     // pages.
480     return mPagesStart + (2 * aIndex + 1) * kPageSize;
481   }
482 };
483 
484 static GConst* gConst;
485 
486 // On MacOS, the first __thread/thread_local access calls malloc, which leads
487 // to an infinite loop. So we use pthread-based TLS instead, which somehow
488 // doesn't have this problem.
489 #if !defined(XP_DARWIN)
490 #  define PHC_THREAD_LOCAL(T) MOZ_THREAD_LOCAL(T)
491 #else
492 #  define PHC_THREAD_LOCAL(T) \
493     detail::ThreadLocal<T, detail::ThreadLocalKeyStorage>
494 #endif
495 
496 // Thread-local state.
497 class GTls {
498   DISALLOW_COPY_AND_ASSIGN(GTls);
499 
500   // When true, PHC does as little as possible.
501   //
502   // (a) It does not allocate any new page allocations.
503   //
504   // (b) It avoids doing any operations that might call malloc/free/etc., which
505   //     would cause re-entry into PHC. (In practice, MozStackWalk() is the
506   //     only such operation.) Note that calls to the functions in sMallocTable
507   //     are ok.
508   //
509   // For example, replace_malloc() will just fall back to mozjemalloc. However,
510   // operations involving existing allocations are more complex, because those
511   // existing allocations may be page allocations. For example, if
512   // replace_free() is passed a page allocation on a PHC-disabled thread, it
513   // will free the page allocation in the usual way, but it will get a dummy
514   // freeStack in order to avoid calling MozStackWalk(), as per (b) above.
515   //
516   // This single disabling mechanism has two distinct uses.
517   //
518   // - It's used to prevent re-entry into PHC, which can cause correctness
519   //   problems. For example, consider this sequence.
520   //
521   //   1. enter replace_free()
522   //   2. which calls PageFree()
523   //   3. which calls MozStackWalk()
524   //   4. which locks a mutex M, and then calls malloc
525   //   5. enter replace_malloc()
526   //   6. which calls MaybePageAlloc()
527   //   7. which calls MozStackWalk()
528   //   8. which (re)locks a mutex M --> deadlock
529   //
530   //   We avoid this sequence by "disabling" the thread in PageFree() (at step
531   //   2), which causes MaybePageAlloc() to fail, avoiding the call to
532   //   MozStackWalk() (at step 7).
533   //
534   //   In practice, realloc or free of a PHC allocation is unlikely on a thread
535   //   that is disabled because of this use: MozStackWalk() will probably only
536   //   realloc/free allocations that it allocated itself, but those won't be
537   //   page allocations because PHC is disabled before calling MozStackWalk().
538   //
539   //   (Note that MaybePageAlloc() could safely do a page allocation so long as
540   //   it avoided calling MozStackWalk() by getting a dummy allocStack. But it
541   //   wouldn't be useful, and it would prevent the second use below.)
542   //
543   // - It's used to prevent PHC allocations in some tests that rely on
544   //   mozjemalloc's exact allocation behaviour, which PHC does not replicate
545   //   exactly. (Note that (b) isn't necessary for this use -- MozStackWalk()
546   //   could be safely called -- but it is necessary for the first use above.)
547   //
548   static PHC_THREAD_LOCAL(bool) tlsIsDisabled;
549 
550  public:
Init()551   static void Init() {
552     if (!tlsIsDisabled.init()) {
553       MOZ_CRASH();
554     }
555   }
556 
DisableOnCurrentThread()557   static void DisableOnCurrentThread() {
558     MOZ_ASSERT(!GTls::tlsIsDisabled.get());
559     tlsIsDisabled.set(true);
560   }
561 
EnableOnCurrentThread()562   static void EnableOnCurrentThread() {
563     MOZ_ASSERT(GTls::tlsIsDisabled.get());
564     tlsIsDisabled.set(false);
565   }
566 
IsDisabledOnCurrentThread()567   static bool IsDisabledOnCurrentThread() { return tlsIsDisabled.get(); }
568 };
569 
570 PHC_THREAD_LOCAL(bool) GTls::tlsIsDisabled;
571 
572 class AutoDisableOnCurrentThread {
573   DISALLOW_COPY_AND_ASSIGN(AutoDisableOnCurrentThread);
574 
575  public:
AutoDisableOnCurrentThread()576   explicit AutoDisableOnCurrentThread() { GTls::DisableOnCurrentThread(); }
~AutoDisableOnCurrentThread()577   ~AutoDisableOnCurrentThread() { GTls::EnableOnCurrentThread(); }
578 };
579 
580 // This type is used as a proof-of-lock token, to make it clear which functions
581 // require sMutex to be locked.
582 using GMutLock = const MutexAutoLock&;
583 
584 // Shared, mutable global state. Protected by sMutex; all accessing functions
585 // take a GMutLock as proof that sMutex is held.
586 class GMut {
587   enum class AllocPageState {
588     NeverAllocated = 0,
589     InUse = 1,
590     Freed = 2,
591   };
592 
593   // Metadata for each allocation page.
594   class AllocPageInfo {
595    public:
AllocPageInfo()596     AllocPageInfo()
597         : mState(AllocPageState::NeverAllocated),
598           mArenaId(),
599           mBaseAddr(nullptr),
600           mAllocStack(),
601           mFreeStack(),
602           mReuseTime(0) {}
603 
604     // The current allocation page state.
605     AllocPageState mState;
606 
607     // The arena that the allocation is nominally from. This isn't meaningful
608     // within PHC, which has no arenas. But it is necessary for reallocation of
609     // page allocations as normal allocations, such as in this code:
610     //
611     //   p = moz_arena_malloc(arenaId, 4096);
612     //   realloc(p, 8192);
613     //
614     // The realloc is more than one page, and thus too large for PHC to handle.
615     // Therefore, if PHC handles the first allocation, it must ask mozjemalloc
616     // to allocate the 8192 bytes in the correct arena, and to do that, it must
617     // call sMallocTable.moz_arena_malloc with the correct arenaId under the
618     // covers. Therefore it must record that arenaId.
619     //
620     // This field is also needed for jemalloc_ptr_info() to work, because it
621     // also returns the arena ID (but only in debug builds).
622     //
623     // - NeverAllocated: must be 0.
624     // - InUse | Freed: can be any valid arena ID value.
625     Maybe<arena_id_t> mArenaId;
626 
627     // The starting address of the allocation. Will not be the same as the page
628     // address unless the allocation is a full page.
629     // - NeverAllocated: must be 0.
630     // - InUse | Freed: must be within the allocation page.
631     uint8_t* mBaseAddr;
632 
633     // Usable size is computed as the number of bytes between the pointer and
634     // the end of the allocation page. This might be bigger than the requested
635     // size, especially if an outsized alignment is requested.
UsableSize() const636     size_t UsableSize() const {
637       return mState == AllocPageState::NeverAllocated
638                  ? 0
639                  : kPageSize - (reinterpret_cast<uintptr_t>(mBaseAddr) &
640                                 (kPageSize - 1));
641     }
642 
643     // The allocation stack.
644     // - NeverAllocated: Nothing.
645     // - InUse | Freed: Some.
646     Maybe<StackTrace> mAllocStack;
647 
648     // The free stack.
649     // - NeverAllocated | InUse: Nothing.
650     // - Freed: Some.
651     Maybe<StackTrace> mFreeStack;
652 
653     // The time at which the page is available for reuse, as measured against
654     // GAtomic::sNow. When the page is in use this value will be kMaxTime.
655     // - NeverAllocated: must be 0.
656     // - InUse: must be kMaxTime.
657     // - Freed: must be > 0 and < kMaxTime.
658     Time mReuseTime;
659   };
660 
661  public:
662   // The mutex that protects the other members.
663   static Mutex sMutex;
664 
GMut()665   GMut()
666       : mRNG(RandomSeed<0>(), RandomSeed<1>()),
667         mAllocPages(),
668         mNumPageAllocs(0),
669         mPageAllocHits(0),
670         mPageAllocMisses(0) {
671     sMutex.Init();
672   }
673 
Random64(GMutLock)674   uint64_t Random64(GMutLock) { return mRNG.next(); }
675 
IsPageInUse(GMutLock,uintptr_t aIndex)676   bool IsPageInUse(GMutLock, uintptr_t aIndex) {
677     return mAllocPages[aIndex].mState == AllocPageState::InUse;
678   }
679 
680   // Is the page free? And if so, has enough time passed that we can use it?
IsPageAllocatable(GMutLock,uintptr_t aIndex,Time aNow)681   bool IsPageAllocatable(GMutLock, uintptr_t aIndex, Time aNow) {
682     const AllocPageInfo& page = mAllocPages[aIndex];
683     return page.mState != AllocPageState::InUse && aNow >= page.mReuseTime;
684   }
685 
PageArena(GMutLock aLock,uintptr_t aIndex)686   Maybe<arena_id_t> PageArena(GMutLock aLock, uintptr_t aIndex) {
687     const AllocPageInfo& page = mAllocPages[aIndex];
688     AssertAllocPageInUse(aLock, page);
689 
690     return page.mArenaId;
691   }
692 
PageUsableSize(GMutLock aLock,uintptr_t aIndex)693   size_t PageUsableSize(GMutLock aLock, uintptr_t aIndex) {
694     const AllocPageInfo& page = mAllocPages[aIndex];
695     AssertAllocPageInUse(aLock, page);
696 
697     return page.UsableSize();
698   }
699 
SetPageInUse(GMutLock aLock,uintptr_t aIndex,const Maybe<arena_id_t> & aArenaId,uint8_t * aBaseAddr,const StackTrace & aAllocStack)700   void SetPageInUse(GMutLock aLock, uintptr_t aIndex,
701                     const Maybe<arena_id_t>& aArenaId, uint8_t* aBaseAddr,
702                     const StackTrace& aAllocStack) {
703     AllocPageInfo& page = mAllocPages[aIndex];
704     AssertAllocPageNotInUse(aLock, page);
705 
706     page.mState = AllocPageState::InUse;
707     page.mArenaId = aArenaId;
708     page.mBaseAddr = aBaseAddr;
709     page.mAllocStack = Some(aAllocStack);
710     page.mFreeStack = Nothing();
711     page.mReuseTime = kMaxTime;
712 
713     mNumPageAllocs++;
714     MOZ_RELEASE_ASSERT(mNumPageAllocs <= kNumAllocPages);
715   }
716 
ResizePageInUse(GMutLock aLock,uintptr_t aIndex,const Maybe<arena_id_t> & aArenaId,uint8_t * aNewBaseAddr,const StackTrace & aAllocStack)717   void ResizePageInUse(GMutLock aLock, uintptr_t aIndex,
718                        const Maybe<arena_id_t>& aArenaId, uint8_t* aNewBaseAddr,
719                        const StackTrace& aAllocStack) {
720     AllocPageInfo& page = mAllocPages[aIndex];
721     AssertAllocPageInUse(aLock, page);
722 
723     // page.mState is not changed.
724     if (aArenaId.isSome()) {
725       // Crash if the arenas don't match.
726       MOZ_RELEASE_ASSERT(page.mArenaId == aArenaId);
727     }
728     page.mBaseAddr = aNewBaseAddr;
729     // We could just keep the original alloc stack, but the realloc stack is
730     // more recent and therefore seems more useful.
731     page.mAllocStack = Some(aAllocStack);
732     // page.mFreeStack is not changed.
733     // page.mReuseTime is not changed.
734   };
735 
SetPageFreed(GMutLock aLock,uintptr_t aIndex,const Maybe<arena_id_t> & aArenaId,const StackTrace & aFreeStack,Delay aReuseDelay)736   void SetPageFreed(GMutLock aLock, uintptr_t aIndex,
737                     const Maybe<arena_id_t>& aArenaId,
738                     const StackTrace& aFreeStack, Delay aReuseDelay) {
739     AllocPageInfo& page = mAllocPages[aIndex];
740     AssertAllocPageInUse(aLock, page);
741 
742     page.mState = AllocPageState::Freed;
743 
744     // page.mArenaId is left unchanged, for jemalloc_ptr_info() calls that
745     // occur after freeing (e.g. in the PtrInfo test in TestJemalloc.cpp).
746     if (aArenaId.isSome()) {
747       // Crash if the arenas don't match.
748       MOZ_RELEASE_ASSERT(page.mArenaId == aArenaId);
749     }
750 
751     // page.musableSize is left unchanged, for reporting on UAF, and for
752     // jemalloc_ptr_info() calls that occur after freeing (e.g. in the PtrInfo
753     // test in TestJemalloc.cpp).
754 
755     // page.mAllocStack is left unchanged, for reporting on UAF.
756 
757     page.mFreeStack = Some(aFreeStack);
758     page.mReuseTime = GAtomic::Now() + aReuseDelay;
759 
760     MOZ_RELEASE_ASSERT(mNumPageAllocs > 0);
761     mNumPageAllocs--;
762   }
763 
CrashOnGuardPage(void * aPtr)764   static void CrashOnGuardPage(void* aPtr) {
765     // An operation on a guard page? This is a bounds violation. Deliberately
766     // touch the page in question, to cause a crash that triggers the usual PHC
767     // machinery.
768     LOG("CrashOnGuardPage(%p), bounds violation\n", aPtr);
769     *static_cast<uint8_t*>(aPtr) = 0;
770     MOZ_CRASH("unreachable");
771   }
772 
EnsureValidAndInUse(GMutLock,void * aPtr,uintptr_t aIndex)773   void EnsureValidAndInUse(GMutLock, void* aPtr, uintptr_t aIndex) {
774     const AllocPageInfo& page = mAllocPages[aIndex];
775 
776     // The pointer must point to the start of the allocation.
777     MOZ_RELEASE_ASSERT(page.mBaseAddr == aPtr);
778 
779     if (page.mState == AllocPageState::Freed) {
780       // An operation on a freed page? This is a particular kind of
781       // use-after-free. Deliberately touch the page in question, in order to
782       // cause a crash that triggers the usual PHC machinery. But unlock sMutex
783       // first, because that self-same PHC machinery needs to re-lock it, and
784       // the crash causes non-local control flow so sMutex won't be unlocked
785       // the normal way in the caller.
786       LOG("EnsureValidAndInUse(%p), use-after-free\n", aPtr);
787       sMutex.Unlock();
788       *static_cast<uint8_t*>(aPtr) = 0;
789       MOZ_CRASH("unreachable");
790     }
791   }
792 
FillAddrInfo(GMutLock,uintptr_t aIndex,const void * aBaseAddr,bool isGuardPage,phc::AddrInfo & aOut)793   void FillAddrInfo(GMutLock, uintptr_t aIndex, const void* aBaseAddr,
794                     bool isGuardPage, phc::AddrInfo& aOut) {
795     const AllocPageInfo& page = mAllocPages[aIndex];
796     if (isGuardPage) {
797       aOut.mKind = phc::AddrInfo::Kind::GuardPage;
798     } else {
799       switch (page.mState) {
800         case AllocPageState::NeverAllocated:
801           aOut.mKind = phc::AddrInfo::Kind::NeverAllocatedPage;
802           break;
803 
804         case AllocPageState::InUse:
805           aOut.mKind = phc::AddrInfo::Kind::InUsePage;
806           break;
807 
808         case AllocPageState::Freed:
809           aOut.mKind = phc::AddrInfo::Kind::FreedPage;
810           break;
811 
812         default:
813           MOZ_CRASH();
814       }
815     }
816     aOut.mBaseAddr = page.mBaseAddr;
817     aOut.mUsableSize = page.UsableSize();
818     aOut.mAllocStack = page.mAllocStack;
819     aOut.mFreeStack = page.mFreeStack;
820   }
821 
FillJemallocPtrInfo(GMutLock,const void * aPtr,uintptr_t aIndex,jemalloc_ptr_info_t * aInfo)822   void FillJemallocPtrInfo(GMutLock, const void* aPtr, uintptr_t aIndex,
823                            jemalloc_ptr_info_t* aInfo) {
824     const AllocPageInfo& page = mAllocPages[aIndex];
825     switch (page.mState) {
826       case AllocPageState::NeverAllocated:
827         break;
828 
829       case AllocPageState::InUse: {
830         // Only return TagLiveAlloc if the pointer is within the bounds of the
831         // allocation's usable size.
832         uint8_t* base = page.mBaseAddr;
833         uint8_t* limit = base + page.UsableSize();
834         if (base <= aPtr && aPtr < limit) {
835           *aInfo = {TagLiveAlloc, page.mBaseAddr, page.UsableSize(),
836                     page.mArenaId.valueOr(0)};
837           return;
838         }
839         break;
840       }
841 
842       case AllocPageState::Freed: {
843         // Only return TagFreedAlloc if the pointer is within the bounds of the
844         // former allocation's usable size.
845         uint8_t* base = page.mBaseAddr;
846         uint8_t* limit = base + page.UsableSize();
847         if (base <= aPtr && aPtr < limit) {
848           *aInfo = {TagFreedAlloc, page.mBaseAddr, page.UsableSize(),
849                     page.mArenaId.valueOr(0)};
850           return;
851         }
852         break;
853       }
854 
855       default:
856         MOZ_CRASH();
857     }
858 
859     // Pointers into guard pages will end up here, as will pointers into
860     // allocation pages that aren't within the allocation's bounds.
861     *aInfo = {TagUnknown, nullptr, 0, 0};
862   }
863 
prefork()864   static void prefork() { sMutex.Lock(); }
postfork()865   static void postfork() { sMutex.Unlock(); }
866 
IncPageAllocHits(GMutLock)867   void IncPageAllocHits(GMutLock) { mPageAllocHits++; }
IncPageAllocMisses(GMutLock)868   void IncPageAllocMisses(GMutLock) { mPageAllocMisses++; }
869 
NumPageAllocs(GMutLock)870   size_t NumPageAllocs(GMutLock) { return mNumPageAllocs; }
871 
PageAllocHits(GMutLock)872   size_t PageAllocHits(GMutLock) { return mPageAllocHits; }
PageAllocAttempts(GMutLock)873   size_t PageAllocAttempts(GMutLock) {
874     return mPageAllocHits + mPageAllocMisses;
875   }
876 
877   // This is an integer because FdPrintf only supports integer printing.
PageAllocHitRate(GMutLock)878   size_t PageAllocHitRate(GMutLock) {
879     return mPageAllocHits * 100 / (mPageAllocHits + mPageAllocMisses);
880   }
881 
882  private:
883   template <int N>
RandomSeed()884   uint64_t RandomSeed() {
885     // An older version of this code used RandomUint64() here, but on Mac that
886     // function uses arc4random(), which can allocate, which would cause
887     // re-entry, which would be bad. So we just use time() and a local variable
888     // address. These are mediocre sources of entropy, but good enough for PHC.
889     static_assert(N == 0 || N == 1, "must be 0 or 1");
890     uint64_t seed;
891     if (N == 0) {
892       time_t t = time(nullptr);
893       seed = t ^ (t << 32);
894     } else {
895       seed = uintptr_t(&seed) ^ (uintptr_t(&seed) << 32);
896     }
897     return seed;
898   }
899 
AssertAllocPageInUse(GMutLock,const AllocPageInfo & aPage)900   void AssertAllocPageInUse(GMutLock, const AllocPageInfo& aPage) {
901     MOZ_ASSERT(aPage.mState == AllocPageState::InUse);
902     // There is nothing to assert about aPage.mArenaId.
903     MOZ_ASSERT(aPage.mBaseAddr);
904     MOZ_ASSERT(aPage.UsableSize() > 0);
905     MOZ_ASSERT(aPage.mAllocStack.isSome());
906     MOZ_ASSERT(aPage.mFreeStack.isNothing());
907     MOZ_ASSERT(aPage.mReuseTime == kMaxTime);
908   }
909 
AssertAllocPageNotInUse(GMutLock,const AllocPageInfo & aPage)910   void AssertAllocPageNotInUse(GMutLock, const AllocPageInfo& aPage) {
911     // We can assert a lot about `NeverAllocated` pages, but not much about
912     // `Freed` pages.
913 #ifdef DEBUG
914     bool isFresh = aPage.mState == AllocPageState::NeverAllocated;
915     MOZ_ASSERT(isFresh || aPage.mState == AllocPageState::Freed);
916     MOZ_ASSERT_IF(isFresh, aPage.mArenaId == Nothing());
917     MOZ_ASSERT(isFresh == (aPage.mBaseAddr == nullptr));
918     MOZ_ASSERT(isFresh == (aPage.mAllocStack.isNothing()));
919     MOZ_ASSERT(isFresh == (aPage.mFreeStack.isNothing()));
920     MOZ_ASSERT(aPage.mReuseTime != kMaxTime);
921 #endif
922   }
923 
924   // RNG for deciding which allocations to treat specially. It doesn't need to
925   // be high quality.
926   //
927   // This is a raw pointer for the reason explained in the comment above
928   // GMut's constructor. Don't change it to UniquePtr or anything like that.
929   non_crypto::XorShift128PlusRNG mRNG;
930 
931   AllocPageInfo mAllocPages[kNumAllocPages];
932 
933   // How many page allocs are currently in use (the max is kNumAllocPages).
934   size_t mNumPageAllocs;
935 
936   // How many allocations that could have been page allocs actually were? As
937   // constrained kNumAllocPages. If the hit ratio isn't close to 100% it's
938   // likely that the global constants are poorly chosen.
939   size_t mPageAllocHits;
940   size_t mPageAllocMisses;
941 };
942 
943 Mutex GMut::sMutex;
944 
945 static GMut* gMut;
946 
947 //---------------------------------------------------------------------------
948 // Page allocation operations
949 //---------------------------------------------------------------------------
950 
951 // Attempt a page allocation if the time and the size are right. Allocated
952 // memory is zeroed if aZero is true. On failure, the caller should attempt a
953 // normal allocation via sMallocTable. Can be called in a context where
954 // GMut::sMutex is locked.
MaybePageAlloc(const Maybe<arena_id_t> & aArenaId,size_t aReqSize,size_t aAlignment,bool aZero)955 static void* MaybePageAlloc(const Maybe<arena_id_t>& aArenaId, size_t aReqSize,
956                             size_t aAlignment, bool aZero) {
957   MOZ_ASSERT(IsPowerOfTwo(aAlignment));
958 
959   if (aReqSize > kPageSize) {
960     return nullptr;
961   }
962 
963   GAtomic::IncrementNow();
964 
965   // Decrement the delay. If it's zero, we do a page allocation and reset the
966   // delay to a random number. Because the assignment to the random number isn't
967   // atomic w.r.t. the decrement, we might have a sequence like this:
968   //
969   //     Thread 1                      Thread 2           Thread 3
970   //     --------                      --------           --------
971   // (a) newDelay = --sAllocDelay (-> 0)
972   // (b)                               --sAllocDelay (-> -1)
973   // (c) (newDelay != 0) fails
974   // (d)                                                  --sAllocDelay (-> -2)
975   // (e) sAllocDelay = new_random_number()
976   //
977   // It's critical that sAllocDelay has ReleaseAcquire semantics, because that
978   // guarantees that exactly one thread will see sAllocDelay have the value 0.
979   // (Relaxed semantics wouldn't guarantee that.)
980   //
981   // It's also nice that sAllocDelay is signed, given that we can decrement to
982   // below zero. (Strictly speaking, an unsigned integer would also work due
983   // to wrapping, but a signed integer is conceptually cleaner.)
984   //
985   // Finally, note that the decrements that occur between (a) and (e) above are
986   // effectively ignored, because (e) clobbers them. This shouldn't be a
987   // problem; it effectively just adds a little more randomness to
988   // new_random_number(). An early version of this code tried to account for
989   // these decrements by doing `sAllocDelay += new_random_number()`. However, if
990   // new_random_value() is small, the number of decrements between (a) and (e)
991   // can easily exceed it, whereupon sAllocDelay ends up negative after
992   // `sAllocDelay += new_random_number()`, and the zero-check never succeeds
993   // again. (At least, not until sAllocDelay wraps around on overflow, which
994   // would take a very long time indeed.)
995   //
996   int32_t newDelay = GAtomic::DecrementDelay();
997   if (newDelay != 0) {
998     return nullptr;
999   }
1000 
1001   if (GTls::IsDisabledOnCurrentThread()) {
1002     return nullptr;
1003   }
1004 
1005   // Disable on this thread *before* getting the stack trace.
1006   AutoDisableOnCurrentThread disable;
1007 
1008   // Get the stack trace *before* locking the mutex. If we return nullptr then
1009   // it was a waste, but it's not so frequent, and doing a stack walk while
1010   // the mutex is locked is problematic (see the big comment on
1011   // StackTrace::Fill() for details).
1012   StackTrace allocStack;
1013   allocStack.Fill();
1014 
1015   MutexAutoLock lock(GMut::sMutex);
1016 
1017   Time now = GAtomic::Now();
1018   Delay newAllocDelay = Rnd64ToDelay<kAvgAllocDelay>(gMut->Random64(lock));
1019 
1020   // We start at a random page alloc and wrap around, to ensure pages get even
1021   // amounts of use.
1022   uint8_t* ptr = nullptr;
1023   uint8_t* pagePtr = nullptr;
1024   for (uintptr_t n = 0, i = size_t(gMut->Random64(lock)) % kNumAllocPages;
1025        n < kNumAllocPages; n++, i = (i + 1) % kNumAllocPages) {
1026     if (!gMut->IsPageAllocatable(lock, i, now)) {
1027       continue;
1028     }
1029 
1030     pagePtr = gConst->AllocPagePtr(i);
1031     MOZ_ASSERT(pagePtr);
1032     bool ok =
1033 #ifdef XP_WIN
1034         !!VirtualAlloc(pagePtr, kPageSize, MEM_COMMIT, PAGE_READWRITE);
1035 #else
1036         mprotect(pagePtr, kPageSize, PROT_READ | PROT_WRITE) == 0;
1037 #endif
1038     size_t usableSize = sMallocTable.malloc_good_size(aReqSize);
1039     if (ok) {
1040       MOZ_ASSERT(usableSize > 0);
1041 
1042       // Put the allocation as close to the end of the page as possible,
1043       // allowing for alignment requirements.
1044       ptr = pagePtr + kPageSize - usableSize;
1045       if (aAlignment != 1) {
1046         ptr = reinterpret_cast<uint8_t*>(
1047             (reinterpret_cast<uintptr_t>(ptr) & ~(aAlignment - 1)));
1048       }
1049 
1050       gMut->SetPageInUse(lock, i, aArenaId, ptr, allocStack);
1051 
1052       if (aZero) {
1053         memset(ptr, 0, usableSize);
1054       } else {
1055 #ifdef DEBUG
1056         memset(ptr, kAllocJunk, usableSize);
1057 #endif
1058       }
1059     }
1060 
1061     gMut->IncPageAllocHits(lock);
1062     LOG("PageAlloc(%zu, %zu) -> %p[%zu]/%p (%zu) (z%zu), sAllocDelay <- %zu, "
1063         "fullness %zu/%zu, hits %zu/%zu (%zu%%)\n",
1064         aReqSize, aAlignment, pagePtr, i, ptr, usableSize, size_t(aZero),
1065         size_t(newAllocDelay), gMut->NumPageAllocs(lock), kNumAllocPages,
1066         gMut->PageAllocHits(lock), gMut->PageAllocAttempts(lock),
1067         gMut->PageAllocHitRate(lock));
1068     break;
1069   }
1070 
1071   if (!pagePtr) {
1072     // No pages are available, or VirtualAlloc/mprotect failed.
1073     gMut->IncPageAllocMisses(lock);
1074     LOG("No PageAlloc(%zu, %zu), sAllocDelay <- %zu, fullness %zu/%zu, hits "
1075         "%zu/%zu "
1076         "(%zu%%)\n",
1077         aReqSize, aAlignment, size_t(newAllocDelay), gMut->NumPageAllocs(lock),
1078         kNumAllocPages, gMut->PageAllocHits(lock),
1079         gMut->PageAllocAttempts(lock), gMut->PageAllocHitRate(lock));
1080   }
1081 
1082   // Set the new alloc delay.
1083   GAtomic::SetAllocDelay(newAllocDelay);
1084 
1085   return ptr;
1086 }
1087 
FreePage(GMutLock aLock,uintptr_t aIndex,const Maybe<arena_id_t> & aArenaId,const StackTrace & aFreeStack,Delay aReuseDelay)1088 static void FreePage(GMutLock aLock, uintptr_t aIndex,
1089                      const Maybe<arena_id_t>& aArenaId,
1090                      const StackTrace& aFreeStack, Delay aReuseDelay) {
1091   void* pagePtr = gConst->AllocPagePtr(aIndex);
1092 #ifdef XP_WIN
1093   if (!VirtualFree(pagePtr, kPageSize, MEM_DECOMMIT)) {
1094     return;
1095   }
1096 #else
1097   if (!mmap(pagePtr, kPageSize, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON,
1098             -1, 0)) {
1099     return;
1100   }
1101 #endif
1102 
1103   gMut->SetPageFreed(aLock, aIndex, aArenaId, aFreeStack, aReuseDelay);
1104 }
1105 
1106 //---------------------------------------------------------------------------
1107 // replace-malloc machinery
1108 //---------------------------------------------------------------------------
1109 
1110 // This handles malloc, moz_arena_malloc, and realloc-with-a-nullptr.
PageMalloc(const Maybe<arena_id_t> & aArenaId,size_t aReqSize)1111 MOZ_ALWAYS_INLINE static void* PageMalloc(const Maybe<arena_id_t>& aArenaId,
1112                                           size_t aReqSize) {
1113   void* ptr = MaybePageAlloc(aArenaId, aReqSize, /* aAlignment */ 1,
1114                              /* aZero */ false);
1115   return ptr ? ptr
1116              : (aArenaId.isSome()
1117                     ? sMallocTable.moz_arena_malloc(*aArenaId, aReqSize)
1118                     : sMallocTable.malloc(aReqSize));
1119 }
1120 
replace_malloc(size_t aReqSize)1121 static void* replace_malloc(size_t aReqSize) {
1122   return PageMalloc(Nothing(), aReqSize);
1123 }
1124 
ReuseDelay(GMutLock aLock)1125 static Delay ReuseDelay(GMutLock aLock) {
1126   return (kAvgPageReuseDelay / 2) +
1127          Rnd64ToDelay<kAvgPageReuseDelay / 2>(gMut->Random64(aLock));
1128 }
1129 
1130 // This handles both calloc and moz_arena_calloc.
PageCalloc(const Maybe<arena_id_t> & aArenaId,size_t aNum,size_t aReqSize)1131 MOZ_ALWAYS_INLINE static void* PageCalloc(const Maybe<arena_id_t>& aArenaId,
1132                                           size_t aNum, size_t aReqSize) {
1133   CheckedInt<size_t> checkedSize = CheckedInt<size_t>(aNum) * aReqSize;
1134   if (!checkedSize.isValid()) {
1135     return nullptr;
1136   }
1137 
1138   void* ptr = MaybePageAlloc(aArenaId, checkedSize.value(), /* aAlignment */ 1,
1139                              /* aZero */ true);
1140   return ptr ? ptr
1141              : (aArenaId.isSome()
1142                     ? sMallocTable.moz_arena_calloc(*aArenaId, aNum, aReqSize)
1143                     : sMallocTable.calloc(aNum, aReqSize));
1144 }
1145 
replace_calloc(size_t aNum,size_t aReqSize)1146 static void* replace_calloc(size_t aNum, size_t aReqSize) {
1147   return PageCalloc(Nothing(), aNum, aReqSize);
1148 }
1149 
1150 // This function handles both realloc and moz_arena_realloc.
1151 //
1152 // As always, realloc is complicated, and doubly so when there are two
1153 // different kinds of allocations in play. Here are the possible transitions,
1154 // and what we do in practice.
1155 //
1156 // - normal-to-normal: This is straightforward and obviously necessary.
1157 //
1158 // - normal-to-page: This is disallowed because it would require getting the
1159 //   arenaId of the normal allocation, which isn't possible in non-DEBUG builds
1160 //   for security reasons.
1161 //
1162 // - page-to-page: This is done whenever possible, i.e. whenever the new size
1163 //   is less than or equal to 4 KiB. This choice counterbalances the
1164 //   disallowing of normal-to-page allocations, in order to avoid biasing
1165 //   towards or away from page allocations. It always occurs in-place.
1166 //
1167 // - page-to-normal: this is done only when necessary, i.e. only when the new
1168 //   size is greater than 4 KiB. This choice naturally flows from the
1169 //   prior choice on page-to-page transitions.
1170 //
1171 // In summary: realloc doesn't change the allocation kind unless it must.
1172 //
PageRealloc(const Maybe<arena_id_t> & aArenaId,void * aOldPtr,size_t aNewSize)1173 MOZ_ALWAYS_INLINE static void* PageRealloc(const Maybe<arena_id_t>& aArenaId,
1174                                            void* aOldPtr, size_t aNewSize) {
1175   if (!aOldPtr) {
1176     // Null pointer. Treat like malloc(aNewSize).
1177     return PageMalloc(aArenaId, aNewSize);
1178   }
1179 
1180   PtrKind pk = gConst->PtrKind(aOldPtr);
1181   if (pk.IsNothing()) {
1182     // A normal-to-normal transition.
1183     return aArenaId.isSome()
1184                ? sMallocTable.moz_arena_realloc(*aArenaId, aOldPtr, aNewSize)
1185                : sMallocTable.realloc(aOldPtr, aNewSize);
1186   }
1187 
1188   if (pk.IsGuardPage()) {
1189     GMut::CrashOnGuardPage(aOldPtr);
1190   }
1191 
1192   // At this point we know we have an allocation page.
1193   uintptr_t index = pk.AllocPageIndex();
1194 
1195   // A page-to-something transition.
1196 
1197   // Note that `disable` has no effect unless it is emplaced below.
1198   Maybe<AutoDisableOnCurrentThread> disable;
1199   // Get the stack trace *before* locking the mutex.
1200   StackTrace stack;
1201   if (GTls::IsDisabledOnCurrentThread()) {
1202     // PHC is disabled on this thread. Leave the stack empty.
1203   } else {
1204     // Disable on this thread *before* getting the stack trace.
1205     disable.emplace();
1206     stack.Fill();
1207   }
1208 
1209   MutexAutoLock lock(GMut::sMutex);
1210 
1211   // Check for realloc() of a freed block.
1212   gMut->EnsureValidAndInUse(lock, aOldPtr, index);
1213 
1214   if (aNewSize <= kPageSize) {
1215     // A page-to-page transition. Just keep using the page allocation. We do
1216     // this even if the thread is disabled, because it doesn't create a new
1217     // page allocation. Note that ResizePageInUse() checks aArenaId.
1218     //
1219     // Move the bytes with memmove(), because the old allocation and the new
1220     // allocation overlap. Move the usable size rather than the requested size,
1221     // because the user might have used malloc_usable_size() and filled up the
1222     // usable size.
1223     size_t oldUsableSize = gMut->PageUsableSize(lock, index);
1224     size_t newUsableSize = sMallocTable.malloc_good_size(aNewSize);
1225     uint8_t* pagePtr = gConst->AllocPagePtr(index);
1226     uint8_t* newPtr = pagePtr + kPageSize - newUsableSize;
1227     memmove(newPtr, aOldPtr, std::min(oldUsableSize, aNewSize));
1228     gMut->ResizePageInUse(lock, index, aArenaId, newPtr, stack);
1229     LOG("PageRealloc-Reuse(%p, %zu) -> %p\n", aOldPtr, aNewSize, newPtr);
1230     return newPtr;
1231   }
1232 
1233   // A page-to-normal transition (with the new size greater than page-sized).
1234   // (Note that aArenaId is checked below.)
1235   void* newPtr;
1236   if (aArenaId.isSome()) {
1237     newPtr = sMallocTable.moz_arena_malloc(*aArenaId, aNewSize);
1238   } else {
1239     Maybe<arena_id_t> oldArenaId = gMut->PageArena(lock, index);
1240     newPtr = (oldArenaId.isSome()
1241                   ? sMallocTable.moz_arena_malloc(*oldArenaId, aNewSize)
1242                   : sMallocTable.malloc(aNewSize));
1243   }
1244   if (!newPtr) {
1245     return nullptr;
1246   }
1247 
1248   MOZ_ASSERT(aNewSize > kPageSize);
1249 
1250   Delay reuseDelay = ReuseDelay(lock);
1251 
1252   // Copy the usable size rather than the requested size, because the user
1253   // might have used malloc_usable_size() and filled up the usable size. Note
1254   // that FreePage() checks aArenaId (via SetPageFreed()).
1255   size_t oldUsableSize = gMut->PageUsableSize(lock, index);
1256   memcpy(newPtr, aOldPtr, std::min(oldUsableSize, aNewSize));
1257   FreePage(lock, index, aArenaId, stack, reuseDelay);
1258   LOG("PageRealloc-Free(%p[%zu], %zu) -> %p, %zu delay, reuse at ~%zu\n",
1259       aOldPtr, index, aNewSize, newPtr, size_t(reuseDelay),
1260       size_t(GAtomic::Now()) + reuseDelay);
1261 
1262   return newPtr;
1263 }
1264 
replace_realloc(void * aOldPtr,size_t aNewSize)1265 static void* replace_realloc(void* aOldPtr, size_t aNewSize) {
1266   return PageRealloc(Nothing(), aOldPtr, aNewSize);
1267 }
1268 
1269 // This handles both free and moz_arena_free.
PageFree(const Maybe<arena_id_t> & aArenaId,void * aPtr)1270 MOZ_ALWAYS_INLINE static void PageFree(const Maybe<arena_id_t>& aArenaId,
1271                                        void* aPtr) {
1272   PtrKind pk = gConst->PtrKind(aPtr);
1273   if (pk.IsNothing()) {
1274     // Not a page allocation.
1275     return aArenaId.isSome() ? sMallocTable.moz_arena_free(*aArenaId, aPtr)
1276                              : sMallocTable.free(aPtr);
1277   }
1278 
1279   if (pk.IsGuardPage()) {
1280     GMut::CrashOnGuardPage(aPtr);
1281   }
1282 
1283   // At this point we know we have an allocation page.
1284   uintptr_t index = pk.AllocPageIndex();
1285 
1286   // Note that `disable` has no effect unless it is emplaced below.
1287   Maybe<AutoDisableOnCurrentThread> disable;
1288   // Get the stack trace *before* locking the mutex.
1289   StackTrace freeStack;
1290   if (GTls::IsDisabledOnCurrentThread()) {
1291     // PHC is disabled on this thread. Leave the stack empty.
1292   } else {
1293     // Disable on this thread *before* getting the stack trace.
1294     disable.emplace();
1295     freeStack.Fill();
1296   }
1297 
1298   MutexAutoLock lock(GMut::sMutex);
1299 
1300   // Check for a double-free.
1301   gMut->EnsureValidAndInUse(lock, aPtr, index);
1302 
1303   // Note that FreePage() checks aArenaId (via SetPageFreed()).
1304   Delay reuseDelay = ReuseDelay(lock);
1305   FreePage(lock, index, aArenaId, freeStack, reuseDelay);
1306 
1307   LOG("PageFree(%p[%zu]), %zu delay, reuse at ~%zu, fullness %zu/%zu\n", aPtr,
1308       index, size_t(reuseDelay), size_t(GAtomic::Now()) + reuseDelay,
1309       gMut->NumPageAllocs(lock), kNumAllocPages);
1310 }
1311 
replace_free(void * aPtr)1312 static void replace_free(void* aPtr) { return PageFree(Nothing(), aPtr); }
1313 
1314 // This handles memalign and moz_arena_memalign.
PageMemalign(const Maybe<arena_id_t> & aArenaId,size_t aAlignment,size_t aReqSize)1315 MOZ_ALWAYS_INLINE static void* PageMemalign(const Maybe<arena_id_t>& aArenaId,
1316                                             size_t aAlignment,
1317                                             size_t aReqSize) {
1318   MOZ_RELEASE_ASSERT(IsPowerOfTwo(aAlignment));
1319 
1320   // PHC can't satisfy an alignment greater than a page size, so fall back to
1321   // mozjemalloc in that case.
1322   void* ptr = nullptr;
1323   if (aAlignment <= kPageSize) {
1324     ptr = MaybePageAlloc(aArenaId, aReqSize, aAlignment, /* aZero */ false);
1325   }
1326   return ptr ? ptr
1327              : (aArenaId.isSome()
1328                     ? sMallocTable.moz_arena_memalign(*aArenaId, aAlignment,
1329                                                       aReqSize)
1330                     : sMallocTable.memalign(aAlignment, aReqSize));
1331 }
1332 
replace_memalign(size_t aAlignment,size_t aReqSize)1333 static void* replace_memalign(size_t aAlignment, size_t aReqSize) {
1334   return PageMemalign(Nothing(), aAlignment, aReqSize);
1335 }
1336 
replace_malloc_usable_size(usable_ptr_t aPtr)1337 static size_t replace_malloc_usable_size(usable_ptr_t aPtr) {
1338   PtrKind pk = gConst->PtrKind(aPtr);
1339   if (pk.IsNothing()) {
1340     // Not a page allocation. Measure it normally.
1341     return sMallocTable.malloc_usable_size(aPtr);
1342   }
1343 
1344   if (pk.IsGuardPage()) {
1345     GMut::CrashOnGuardPage(const_cast<void*>(aPtr));
1346   }
1347 
1348   // At this point we know we have an allocation page.
1349   uintptr_t index = pk.AllocPageIndex();
1350 
1351   MutexAutoLock lock(GMut::sMutex);
1352 
1353   // Check for malloc_usable_size() of a freed block.
1354   gMut->EnsureValidAndInUse(lock, const_cast<void*>(aPtr), index);
1355 
1356   return gMut->PageUsableSize(lock, index);
1357 }
1358 
replace_jemalloc_stats(jemalloc_stats_t * aStats)1359 void replace_jemalloc_stats(jemalloc_stats_t* aStats) {
1360   sMallocTable.jemalloc_stats(aStats);
1361 
1362   // Add all the pages to `mapped`.
1363   size_t mapped = kAllPagesSize;
1364   aStats->mapped += mapped;
1365 
1366   size_t allocated = 0;
1367   {
1368     MutexAutoLock lock(GMut::sMutex);
1369 
1370     // Add usable space of in-use allocations to `allocated`.
1371     for (size_t i = 0; i < kNumAllocPages; i++) {
1372       if (gMut->IsPageInUse(lock, i)) {
1373         allocated += gMut->PageUsableSize(lock, i);
1374       }
1375     }
1376   }
1377   aStats->allocated += allocated;
1378 
1379   // Waste is the gap between `allocated` and `mapped`.
1380   size_t waste = mapped - allocated;
1381   aStats->waste += waste;
1382 
1383   // aStats.page_cache and aStats.bin_unused are left unchanged because PHC
1384   // doesn't have anything corresponding to those.
1385 
1386   // gConst and gMut are normal heap allocations, so they're measured by
1387   // mozjemalloc as `allocated`. Move them into `bookkeeping`.
1388   size_t bookkeeping = sMallocTable.malloc_usable_size(gConst) +
1389                        sMallocTable.malloc_usable_size(gMut);
1390   aStats->allocated -= bookkeeping;
1391   aStats->bookkeeping += bookkeeping;
1392 }
1393 
replace_jemalloc_ptr_info(const void * aPtr,jemalloc_ptr_info_t * aInfo)1394 void replace_jemalloc_ptr_info(const void* aPtr, jemalloc_ptr_info_t* aInfo) {
1395   // We need to implement this properly, because various code locations do
1396   // things like checking that allocations are in the expected arena.
1397   PtrKind pk = gConst->PtrKind(aPtr);
1398   if (pk.IsNothing()) {
1399     // Not a page allocation.
1400     return sMallocTable.jemalloc_ptr_info(aPtr, aInfo);
1401   }
1402 
1403   if (pk.IsGuardPage()) {
1404     // Treat a guard page as unknown because there's no better alternative.
1405     *aInfo = {TagUnknown, nullptr, 0, 0};
1406     return;
1407   }
1408 
1409   // At this point we know we have an allocation page.
1410   uintptr_t index = pk.AllocPageIndex();
1411 
1412   MutexAutoLock lock(GMut::sMutex);
1413 
1414   gMut->FillJemallocPtrInfo(lock, aPtr, index, aInfo);
1415 #if DEBUG
1416   LOG("JemallocPtrInfo(%p[%zu]) -> {%zu, %p, %zu, %zu}\n", aPtr, index,
1417       size_t(aInfo->tag), aInfo->addr, aInfo->size, aInfo->arenaId);
1418 #else
1419   LOG("JemallocPtrInfo(%p[%zu]) -> {%zu, %p, %zu}\n", aPtr, index,
1420       size_t(aInfo->tag), aInfo->addr, aInfo->size);
1421 #endif
1422 }
1423 
replace_moz_create_arena_with_params(arena_params_t * aParams)1424 arena_id_t replace_moz_create_arena_with_params(arena_params_t* aParams) {
1425   // No need to do anything special here.
1426   return sMallocTable.moz_create_arena_with_params(aParams);
1427 }
1428 
replace_moz_dispose_arena(arena_id_t aArenaId)1429 void replace_moz_dispose_arena(arena_id_t aArenaId) {
1430   // No need to do anything special here.
1431   return sMallocTable.moz_dispose_arena(aArenaId);
1432 }
1433 
replace_moz_arena_malloc(arena_id_t aArenaId,size_t aReqSize)1434 void* replace_moz_arena_malloc(arena_id_t aArenaId, size_t aReqSize) {
1435   return PageMalloc(Some(aArenaId), aReqSize);
1436 }
1437 
replace_moz_arena_calloc(arena_id_t aArenaId,size_t aNum,size_t aReqSize)1438 void* replace_moz_arena_calloc(arena_id_t aArenaId, size_t aNum,
1439                                size_t aReqSize) {
1440   return PageCalloc(Some(aArenaId), aNum, aReqSize);
1441 }
1442 
replace_moz_arena_realloc(arena_id_t aArenaId,void * aOldPtr,size_t aNewSize)1443 void* replace_moz_arena_realloc(arena_id_t aArenaId, void* aOldPtr,
1444                                 size_t aNewSize) {
1445   return PageRealloc(Some(aArenaId), aOldPtr, aNewSize);
1446 }
1447 
replace_moz_arena_free(arena_id_t aArenaId,void * aPtr)1448 void replace_moz_arena_free(arena_id_t aArenaId, void* aPtr) {
1449   return PageFree(Some(aArenaId), aPtr);
1450 }
1451 
replace_moz_arena_memalign(arena_id_t aArenaId,size_t aAlignment,size_t aReqSize)1452 void* replace_moz_arena_memalign(arena_id_t aArenaId, size_t aAlignment,
1453                                  size_t aReqSize) {
1454   return PageMemalign(Some(aArenaId), aAlignment, aReqSize);
1455 }
1456 
1457 class PHCBridge : public ReplaceMallocBridge {
IsPHCAllocation(const void * aPtr,phc::AddrInfo * aOut)1458   virtual bool IsPHCAllocation(const void* aPtr, phc::AddrInfo* aOut) override {
1459     PtrKind pk = gConst->PtrKind(aPtr);
1460     if (pk.IsNothing()) {
1461       return false;
1462     }
1463 
1464     bool isGuardPage = false;
1465     if (pk.IsGuardPage()) {
1466       if ((uintptr_t(aPtr) % kPageSize) < (kPageSize / 2)) {
1467         // The address is in the lower half of a guard page, so it's probably an
1468         // overflow. But first check that it is not on the very first guard
1469         // page, in which case it cannot be an overflow, and we ignore it.
1470         if (gConst->IsInFirstGuardPage(aPtr)) {
1471           return false;
1472         }
1473 
1474         // Get the allocation page preceding this guard page.
1475         pk = gConst->PtrKind(static_cast<const uint8_t*>(aPtr) - kPageSize);
1476 
1477       } else {
1478         // The address is in the upper half of a guard page, so it's probably an
1479         // underflow. Get the allocation page following this guard page.
1480         pk = gConst->PtrKind(static_cast<const uint8_t*>(aPtr) + kPageSize);
1481       }
1482 
1483       // Make a note of the fact that we hit a guard page.
1484       isGuardPage = true;
1485     }
1486 
1487     // At this point we know we have an allocation page.
1488     uintptr_t index = pk.AllocPageIndex();
1489 
1490     if (aOut) {
1491       MutexAutoLock lock(GMut::sMutex);
1492       gMut->FillAddrInfo(lock, index, aPtr, isGuardPage, *aOut);
1493       LOG("IsPHCAllocation: %zu, %p, %zu, %zu, %zu\n", size_t(aOut->mKind),
1494           aOut->mBaseAddr, aOut->mUsableSize,
1495           aOut->mAllocStack.isSome() ? aOut->mAllocStack->mLength : 0,
1496           aOut->mFreeStack.isSome() ? aOut->mFreeStack->mLength : 0);
1497     }
1498     return true;
1499   }
1500 
DisablePHCOnCurrentThread()1501   virtual void DisablePHCOnCurrentThread() override {
1502     GTls::DisableOnCurrentThread();
1503     LOG("DisablePHCOnCurrentThread: %zu\n", 0ul);
1504   }
1505 
ReenablePHCOnCurrentThread()1506   virtual void ReenablePHCOnCurrentThread() override {
1507     GTls::EnableOnCurrentThread();
1508     LOG("ReenablePHCOnCurrentThread: %zu\n", 0ul);
1509   }
1510 
IsPHCEnabledOnCurrentThread()1511   virtual bool IsPHCEnabledOnCurrentThread() override {
1512     bool enabled = !GTls::IsDisabledOnCurrentThread();
1513     LOG("IsPHCEnabledOnCurrentThread: %zu\n", size_t(enabled));
1514     return enabled;
1515   }
1516 };
1517 
1518 // WARNING: this function runs *very* early -- before all static initializers
1519 // have run. For this reason, non-scalar globals (gConst, gMut) are allocated
1520 // dynamically (so we can guarantee their construction in this function) rather
1521 // than statically. GAtomic and GTls contain simple static data that doesn't
1522 // involve static initializers so they don't need to be allocated dynamically.
replace_init(malloc_table_t * aMallocTable,ReplaceMallocBridge ** aBridge)1523 void replace_init(malloc_table_t* aMallocTable, ReplaceMallocBridge** aBridge) {
1524   // Don't run PHC if the page size isn't 4 KiB.
1525   jemalloc_stats_t stats;
1526   aMallocTable->jemalloc_stats(&stats);
1527   if (stats.page_size != kPageSize) {
1528     return;
1529   }
1530 
1531   sMallocTable = *aMallocTable;
1532 
1533   // The choices of which functions to replace are complex enough that we set
1534   // them individually instead of using MALLOC_FUNCS/malloc_decls.h.
1535 
1536   aMallocTable->malloc = replace_malloc;
1537   aMallocTable->calloc = replace_calloc;
1538   aMallocTable->realloc = replace_realloc;
1539   aMallocTable->free = replace_free;
1540   aMallocTable->memalign = replace_memalign;
1541 
1542   // posix_memalign, aligned_alloc & valloc: unset, which means they fall back
1543   // to replace_memalign.
1544   aMallocTable->malloc_usable_size = replace_malloc_usable_size;
1545   // default malloc_good_size: the default suffices.
1546 
1547   aMallocTable->jemalloc_stats = replace_jemalloc_stats;
1548   // jemalloc_purge_freed_pages: the default suffices.
1549   // jemalloc_free_dirty_pages: the default suffices.
1550   // jemalloc_thread_local_arena: the default suffices.
1551   aMallocTable->jemalloc_ptr_info = replace_jemalloc_ptr_info;
1552 
1553   aMallocTable->moz_create_arena_with_params =
1554       replace_moz_create_arena_with_params;
1555   aMallocTable->moz_dispose_arena = replace_moz_dispose_arena;
1556   aMallocTable->moz_arena_malloc = replace_moz_arena_malloc;
1557   aMallocTable->moz_arena_calloc = replace_moz_arena_calloc;
1558   aMallocTable->moz_arena_realloc = replace_moz_arena_realloc;
1559   aMallocTable->moz_arena_free = replace_moz_arena_free;
1560   aMallocTable->moz_arena_memalign = replace_moz_arena_memalign;
1561 
1562   static PHCBridge bridge;
1563   *aBridge = &bridge;
1564 
1565 #ifndef XP_WIN
1566   // Avoid deadlocks when forking by acquiring our state lock prior to forking
1567   // and releasing it after forking. See |LogAlloc|'s |replace_init| for
1568   // in-depth details.
1569   //
1570   // Note: This must run after attempting an allocation so as to give the
1571   // system malloc a chance to insert its own atfork handler.
1572   sMallocTable.malloc(-1);
1573   pthread_atfork(GMut::prefork, GMut::postfork, GMut::postfork);
1574 #endif
1575 
1576   // gConst and gMut are never freed. They live for the life of the process.
1577   gConst = InfallibleAllocPolicy::new_<GConst>();
1578   GTls::Init();
1579   gMut = InfallibleAllocPolicy::new_<GMut>();
1580   {
1581     MutexAutoLock lock(GMut::sMutex);
1582     Delay firstAllocDelay =
1583         Rnd64ToDelay<kAvgFirstAllocDelay>(gMut->Random64(lock));
1584     GAtomic::Init(firstAllocDelay);
1585   }
1586 }
1587