1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* vim: set ts=8 sts=2 et sw=2 tw=80: */
3 /* This Source Code Form is subject to the terms of the Mozilla Public
4 * License, v. 2.0. If a copy of the MPL was not distributed with this
5 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
6
7 // Portions of this file were originally under the following license:
8 //
9 // Copyright (C) 2006-2008 Jason Evans <jasone@FreeBSD.org>.
10 // All rights reserved.
11 // Copyright (C) 2007-2017 Mozilla Foundation.
12 //
13 // Redistribution and use in source and binary forms, with or without
14 // modification, are permitted provided that the following conditions
15 // are met:
16 // 1. Redistributions of source code must retain the above copyright
17 // notice(s), this list of conditions and the following disclaimer as
18 // the first lines of this file unmodified other than the possible
19 // addition of one or more copyright notices.
20 // 2. Redistributions in binary form must reproduce the above copyright
21 // notice(s), this list of conditions and the following disclaimer in
22 // the documentation and/or other materials provided with the
23 // distribution.
24 //
25 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) ``AS IS'' AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER(S) BE
29 // LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
30 // CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
31 // SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
32 // BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
33 // WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
34 // OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE,
35 // EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // *****************************************************************************
38 //
39 // This allocator implementation is designed to provide scalable performance
40 // for multi-threaded programs on multi-processor systems. The following
41 // features are included for this purpose:
42 //
43 // + Multiple arenas are used if there are multiple CPUs, which reduces lock
44 // contention and cache sloshing.
45 //
46 // + Cache line sharing between arenas is avoided for internal data
47 // structures.
48 //
49 // + Memory is managed in chunks and runs (chunks can be split into runs),
50 // rather than as individual pages. This provides a constant-time
51 // mechanism for associating allocations with particular arenas.
52 //
53 // Allocation requests are rounded up to the nearest size class, and no record
54 // of the original request size is maintained. Allocations are broken into
55 // categories according to size class. Assuming runtime defaults, the size
56 // classes in each category are as follows (for x86, x86_64 and Apple Silicon):
57 //
58 // |======================================================================|
59 // | Category | Subcategory | x86 | x86_64 | Mac x86_64 | Mac ARM |
60 // |---------------------------+---------+---------+------------+---------|
61 // | Word size | 32 bit | 64 bit | 64 bit | 64 bit |
62 // | Page size | 4 Kb | 4 Kb | 4 Kb | 16 Kb |
63 // |======================================================================|
64 // | Small | Tiny | 4/-w | -w | - | - |
65 // | | | 8 | 8/-w | 8 | 8 |
66 // | |----------------+---------|---------|------------|---------|
67 // | | Quantum-spaced | 16 | 16 | 16 | 16 |
68 // | | | 32 | 32 | 32 | 32 |
69 // | | | 48 | 48 | 48 | 48 |
70 // | | | ... | ... | ... | ... |
71 // | | | 480 | 480 | 480 | 480 |
72 // | | | 496 | 496 | 496 | 496 |
73 // | |----------------+---------|---------|------------|---------|
74 // | | Quantum-wide- | 512 | 512 | - | - |
75 // | | spaced | 768 | 768 | - | - |
76 // | | | ... | ... | - | - |
77 // | | | 3584 | 3584 | - | - |
78 // | | | 3840 | 3840 | - | - |
79 // | |----------------+---------|---------|------------|---------|
80 // | | Sub-page | - | - | 512 | 512 |
81 // | | | - | - | 1024 | 1024 |
82 // | | | - | - | 2048 | 2048 |
83 // | | | - | - | | 4096 |
84 // | | | - | - | | 8 kB |
85 // |============================================================|=========|
86 // | Large | 4 kB | 4 kB | 4 kB | - |
87 // | | 8 kB | 8 kB | 8 kB | - |
88 // | | 12 kB | 12 kB | 12 kB | - |
89 // | | 16 kB | 16 kB | 16 kB | 16 kB |
90 // | | ... | ... | ... | - |
91 // | | 32 kB | 32 kB | 32 kB | 32 kB |
92 // | | ... | ... | ... | ... |
93 // | | 1008 kB | 1008 kB | 1008 kB | 1008 kB |
94 // | | 1012 kB | 1012 kB | 1012 kB | - |
95 // | | 1016 kB | 1016 kB | 1016 kB | - |
96 // | | 1020 kB | 1020 kB | 1020 kB | - |
97 // |======================================================================|
98 // | Huge | 1 MB | 1 MB | 1 MB | 1 MB |
99 // | | 2 MB | 2 MB | 2 MB | 2 MB |
100 // | | 3 MB | 3 MB | 3 MB | 3 MB |
101 // | | ... | ... | ... | ... |
102 // |======================================================================|
103 //
104 // Legend:
105 // n: Size class exists for this platform.
106 // n/-w: This size class doesn't exist on Windows (see kMinTinyClass).
107 // -: This size class doesn't exist for this platform.
108 // ...: Size classes follow a pattern here.
109 //
110 // NOTE: Due to Mozilla bug 691003, we cannot reserve less than one word for an
111 // allocation on Linux or Mac. So on 32-bit *nix, the smallest bucket size is
112 // 4 bytes, and on 64-bit, the smallest bucket size is 8 bytes.
113 //
114 // A different mechanism is used for each category:
115 //
116 // Small : Each size class is segregated into its own set of runs. Each run
117 // maintains a bitmap of which regions are free/allocated.
118 //
119 // Large : Each allocation is backed by a dedicated run. Metadata are stored
120 // in the associated arena chunk header maps.
121 //
122 // Huge : Each allocation is backed by a dedicated contiguous set of chunks.
123 // Metadata are stored in a separate red-black tree.
124 //
125 // *****************************************************************************
126
127 #include "mozmemory_wrap.h"
128 #include "mozjemalloc.h"
129 #include "mozjemalloc_types.h"
130
131 #include <cstring>
132 #include <cerrno>
133 #ifdef XP_WIN
134 # include <io.h>
135 # include <windows.h>
136 #else
137 # include <sys/mman.h>
138 # include <unistd.h>
139 #endif
140 #ifdef XP_DARWIN
141 # include <libkern/OSAtomic.h>
142 # include <mach/mach_init.h>
143 # include <mach/vm_map.h>
144 #endif
145
146 #include "mozilla/Atomics.h"
147 #include "mozilla/Alignment.h"
148 #include "mozilla/ArrayUtils.h"
149 #include "mozilla/Assertions.h"
150 #include "mozilla/CheckedInt.h"
151 #include "mozilla/DoublyLinkedList.h"
152 #include "mozilla/HelperMacros.h"
153 #include "mozilla/Likely.h"
154 #include "mozilla/MathAlgorithms.h"
155 #include "mozilla/RandomNum.h"
156 #include "mozilla/Sprintf.h"
157 // Note: MozTaggedAnonymousMmap() could call an LD_PRELOADed mmap
158 // instead of the one defined here; use only MozTagAnonymousMemory().
159 #include "mozilla/TaggedAnonymousMemory.h"
160 #include "mozilla/ThreadLocal.h"
161 #include "mozilla/UniquePtr.h"
162 #include "mozilla/Unused.h"
163 #include "mozilla/XorShift128PlusRNG.h"
164 #include "mozilla/fallible.h"
165 #include "rb.h"
166 #include "Mutex.h"
167 #include "Utils.h"
168
169 using namespace mozilla;
170
171 // On Linux, we use madvise(MADV_DONTNEED) to release memory back to the
172 // operating system. If we release 1MB of live pages with MADV_DONTNEED, our
173 // RSS will decrease by 1MB (almost) immediately.
174 //
175 // On Mac, we use madvise(MADV_FREE). Unlike MADV_DONTNEED on Linux, MADV_FREE
176 // on Mac doesn't cause the OS to release the specified pages immediately; the
177 // OS keeps them in our process until the machine comes under memory pressure.
178 //
179 // It's therefore difficult to measure the process's RSS on Mac, since, in the
180 // absence of memory pressure, the contribution from the heap to RSS will not
181 // decrease due to our madvise calls.
182 //
183 // We therefore define MALLOC_DOUBLE_PURGE on Mac. This causes jemalloc to
184 // track which pages have been MADV_FREE'd. You can then call
185 // jemalloc_purge_freed_pages(), which will force the OS to release those
186 // MADV_FREE'd pages, making the process's RSS reflect its true memory usage.
187 //
188 // The jemalloc_purge_freed_pages definition in memory/build/mozmemory.h needs
189 // to be adjusted if MALLOC_DOUBLE_PURGE is ever enabled on Linux.
190
191 #ifdef XP_DARWIN
192 # define MALLOC_DOUBLE_PURGE
193 #endif
194
195 #ifdef XP_WIN
196 # define MALLOC_DECOMMIT
197 #endif
198
199 // When MALLOC_STATIC_PAGESIZE is defined, the page size is fixed at
200 // compile-time for better performance, as opposed to determined at
201 // runtime. Some platforms can have different page sizes at runtime
202 // depending on kernel configuration, so they are opted out by default.
203 // Debug builds are opted out too, for test coverage.
204 #ifndef MOZ_DEBUG
205 # if !defined(__ia64__) && !defined(__sparc__) && !defined(__mips__) && \
206 !defined(__aarch64__) && !defined(__powerpc__) && !defined(XP_MACOSX) && \
207 !defined(__loongarch__)
208 # define MALLOC_STATIC_PAGESIZE 1
209 # endif
210 #endif
211
212 #ifdef XP_WIN
213 # define STDERR_FILENO 2
214
215 // Implement getenv without using malloc.
216 static char mozillaMallocOptionsBuf[64];
217
218 # define getenv xgetenv
getenv(const char * name)219 static char* getenv(const char* name) {
220 if (GetEnvironmentVariableA(name, mozillaMallocOptionsBuf,
221 sizeof(mozillaMallocOptionsBuf)) > 0) {
222 return mozillaMallocOptionsBuf;
223 }
224
225 return nullptr;
226 }
227 #endif
228
229 #ifndef XP_WIN
230 // Newer Linux systems support MADV_FREE, but we're not supporting
231 // that properly. bug #1406304.
232 # if defined(XP_LINUX) && defined(MADV_FREE)
233 # undef MADV_FREE
234 # endif
235 # ifndef MADV_FREE
236 # define MADV_FREE MADV_DONTNEED
237 # endif
238 #endif
239
240 // Some tools, such as /dev/dsp wrappers, LD_PRELOAD libraries that
241 // happen to override mmap() and call dlsym() from their overridden
242 // mmap(). The problem is that dlsym() calls malloc(), and this ends
243 // up in a dead lock in jemalloc.
244 // On these systems, we prefer to directly use the system call.
245 // We do that for Linux systems and kfreebsd with GNU userland.
246 // Note sanity checks are not done (alignment of offset, ...) because
247 // the uses of mmap are pretty limited, in jemalloc.
248 //
249 // On Alpha, glibc has a bug that prevents syscall() to work for system
250 // calls with 6 arguments.
251 #if (defined(XP_LINUX) && !defined(__alpha__)) || \
252 (defined(__FreeBSD_kernel__) && defined(__GLIBC__))
253 # include <sys/syscall.h>
254 # if defined(SYS_mmap) || defined(SYS_mmap2)
_mmap(void * addr,size_t length,int prot,int flags,int fd,off_t offset)255 static inline void* _mmap(void* addr, size_t length, int prot, int flags,
256 int fd, off_t offset) {
257 // S390 only passes one argument to the mmap system call, which is a
258 // pointer to a structure containing the arguments.
259 # ifdef __s390__
260 struct {
261 void* addr;
262 size_t length;
263 long prot;
264 long flags;
265 long fd;
266 off_t offset;
267 } args = {addr, length, prot, flags, fd, offset};
268 return (void*)syscall(SYS_mmap, &args);
269 # else
270 # if defined(ANDROID) && defined(__aarch64__) && defined(SYS_mmap2)
271 // Android NDK defines SYS_mmap2 for AArch64 despite it not supporting mmap2.
272 # undef SYS_mmap2
273 # endif
274 # ifdef SYS_mmap2
275 return (void*)syscall(SYS_mmap2, addr, length, prot, flags, fd, offset >> 12);
276 # else
277 return (void*)syscall(SYS_mmap, addr, length, prot, flags, fd, offset);
278 # endif
279 # endif
280 }
281 # define mmap _mmap
282 # define munmap(a, l) syscall(SYS_munmap, a, l)
283 # endif
284 #endif
285
286 // ***************************************************************************
287 // Structures for chunk headers for chunks used for non-huge allocations.
288
289 struct arena_t;
290
291 // Each element of the chunk map corresponds to one page within the chunk.
292 struct arena_chunk_map_t {
293 // Linkage for run trees. There are two disjoint uses:
294 //
295 // 1) arena_t's tree or available runs.
296 // 2) arena_run_t conceptually uses this linkage for in-use non-full
297 // runs, rather than directly embedding linkage.
298 RedBlackTreeNode<arena_chunk_map_t> link;
299
300 // Run address (or size) and various flags are stored together. The bit
301 // layout looks like (assuming 32-bit system):
302 //
303 // ???????? ???????? ????---- -mckdzla
304 //
305 // ? : Unallocated: Run address for first/last pages, unset for internal
306 // pages.
307 // Small: Run address.
308 // Large: Run size for first page, unset for trailing pages.
309 // - : Unused.
310 // m : MADV_FREE/MADV_DONTNEED'ed?
311 // c : decommitted?
312 // k : key?
313 // d : dirty?
314 // z : zeroed?
315 // l : large?
316 // a : allocated?
317 //
318 // Following are example bit patterns for the three types of runs.
319 //
320 // r : run address
321 // s : run size
322 // x : don't care
323 // - : 0
324 // [cdzla] : bit set
325 //
326 // Unallocated:
327 // ssssssss ssssssss ssss---- --c-----
328 // xxxxxxxx xxxxxxxx xxxx---- ----d---
329 // ssssssss ssssssss ssss---- -----z--
330 //
331 // Small:
332 // rrrrrrrr rrrrrrrr rrrr---- -------a
333 // rrrrrrrr rrrrrrrr rrrr---- -------a
334 // rrrrrrrr rrrrrrrr rrrr---- -------a
335 //
336 // Large:
337 // ssssssss ssssssss ssss---- ------la
338 // -------- -------- -------- ------la
339 // -------- -------- -------- ------la
340 size_t bits;
341
342 // Note that CHUNK_MAP_DECOMMITTED's meaning varies depending on whether
343 // MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are defined.
344 //
345 // If MALLOC_DECOMMIT is defined, a page which is CHUNK_MAP_DECOMMITTED must be
346 // re-committed with pages_commit() before it may be touched. If
347 // MALLOC_DECOMMIT is defined, MALLOC_DOUBLE_PURGE may not be defined.
348 //
349 // If neither MALLOC_DECOMMIT nor MALLOC_DOUBLE_PURGE is defined, pages which
350 // are madvised (with either MADV_DONTNEED or MADV_FREE) are marked with
351 // CHUNK_MAP_MADVISED.
352 //
353 // Otherwise, if MALLOC_DECOMMIT is not defined and MALLOC_DOUBLE_PURGE is
354 // defined, then a page which is madvised is marked as CHUNK_MAP_MADVISED.
355 // When it's finally freed with jemalloc_purge_freed_pages, the page is marked
356 // as CHUNK_MAP_DECOMMITTED.
357 #define CHUNK_MAP_MADVISED ((size_t)0x40U)
358 #define CHUNK_MAP_DECOMMITTED ((size_t)0x20U)
359 #define CHUNK_MAP_MADVISED_OR_DECOMMITTED \
360 (CHUNK_MAP_MADVISED | CHUNK_MAP_DECOMMITTED)
361 #define CHUNK_MAP_KEY ((size_t)0x10U)
362 #define CHUNK_MAP_DIRTY ((size_t)0x08U)
363 #define CHUNK_MAP_ZEROED ((size_t)0x04U)
364 #define CHUNK_MAP_LARGE ((size_t)0x02U)
365 #define CHUNK_MAP_ALLOCATED ((size_t)0x01U)
366 };
367
368 // Arena chunk header.
369 struct arena_chunk_t {
370 // Arena that owns the chunk.
371 arena_t* arena;
372
373 // Linkage for the arena's tree of dirty chunks.
374 RedBlackTreeNode<arena_chunk_t> link_dirty;
375
376 #ifdef MALLOC_DOUBLE_PURGE
377 // If we're double-purging, we maintain a linked list of chunks which
378 // have pages which have been madvise(MADV_FREE)'d but not explicitly
379 // purged.
380 //
381 // We're currently lazy and don't remove a chunk from this list when
382 // all its madvised pages are recommitted.
383 DoublyLinkedListElement<arena_chunk_t> chunks_madvised_elem;
384 #endif
385
386 // Number of dirty pages.
387 size_t ndirty;
388
389 // Map of pages within chunk that keeps track of free/large/small.
390 arena_chunk_map_t map[1]; // Dynamically sized.
391 };
392
393 // ***************************************************************************
394 // Constants defining allocator size classes and behavior.
395
396 // Maximum size of L1 cache line. This is used to avoid cache line aliasing,
397 // so over-estimates are okay (up to a point), but under-estimates will
398 // negatively affect performance.
399 static const size_t kCacheLineSize = 64;
400
401 // Our size classes are inclusive ranges of memory sizes. By describing the
402 // minimums and how memory is allocated in each range the maximums can be
403 // calculated.
404
405 // Smallest size class to support. On Windows the smallest allocation size
406 // must be 8 bytes on 32-bit, 16 bytes on 64-bit. On Linux and Mac, even
407 // malloc(1) must reserve a word's worth of memory (see Mozilla bug 691003).
408 #ifdef XP_WIN
409 static const size_t kMinTinyClass = sizeof(void*) * 2;
410 #else
411 static const size_t kMinTinyClass = sizeof(void*);
412 #endif
413
414 // Maximum tiny size class.
415 static const size_t kMaxTinyClass = 8;
416
417 // Smallest quantum-spaced size classes. It could actually also be labelled a
418 // tiny allocation, and is spaced as such from the largest tiny size class.
419 // Tiny classes being powers of 2, this is twice as large as the largest of
420 // them.
421 static const size_t kMinQuantumClass = kMaxTinyClass * 2;
422 static const size_t kMinQuantumWideClass = 512;
423 #ifdef XP_MACOSX
424 static const size_t kMinSubPageClass = 512;
425 #else
426 static const size_t kMinSubPageClass = 4_KiB;
427 #endif
428
429 // Amount (quantum) separating quantum-spaced size classes.
430 static const size_t kQuantum = 16;
431 static const size_t kQuantumMask = kQuantum - 1;
432 static const size_t kQuantumWide = 256;
433 static const size_t kQuantumWideMask = kQuantumWide - 1;
434
435 static const size_t kMaxQuantumClass = kMinQuantumWideClass - kQuantum;
436 static const size_t kMaxQuantumWideClass = kMinSubPageClass - kQuantumWide;
437
438 // We can optimise some divisions to shifts if these are powers of two.
439 static_assert(mozilla::IsPowerOfTwo(kQuantum),
440 "kQuantum is not a power of two");
441 static_assert(mozilla::IsPowerOfTwo(kQuantumWide),
442 "kQuantumWide is not a power of two");
443
444 static_assert(kMaxQuantumClass % kQuantum == 0,
445 "kMaxQuantumClass is not a multiple of kQuantum");
446 static_assert(kMaxQuantumWideClass % kQuantumWide == 0,
447 "kMaxQuantumWideClass is not a multiple of kQuantumWide");
448 static_assert(kQuantum < kQuantumWide,
449 "kQuantum must be smaller than kQuantumWide");
450 static_assert(mozilla::IsPowerOfTwo(kMinSubPageClass),
451 "kMinSubPageClass is not a power of two");
452
453 // Number of (2^n)-spaced tiny classes.
454 static const size_t kNumTinyClasses =
455 LOG2(kMaxTinyClass) - LOG2(kMinTinyClass) + 1;
456
457 // Number of quantum-spaced classes. We add kQuantum(Max) before subtracting to
458 // avoid underflow when a class is empty (Max<Min).
459 static const size_t kNumQuantumClasses =
460 (kMaxQuantumClass + kQuantum - kMinQuantumClass) / kQuantum;
461 static const size_t kNumQuantumWideClasses =
462 (kMaxQuantumWideClass + kQuantumWide - kMinQuantumWideClass) / kQuantumWide;
463
464 // Size and alignment of memory chunks that are allocated by the OS's virtual
465 // memory system.
466 static const size_t kChunkSize = 1_MiB;
467 static const size_t kChunkSizeMask = kChunkSize - 1;
468
469 #ifdef MALLOC_STATIC_PAGESIZE
470 // VM page size. It must divide the runtime CPU page size or the code
471 // will abort.
472 // Platform specific page size conditions copied from js/public/HeapAPI.h
473 # if defined(__powerpc64__)
474 static const size_t gPageSize = 64_KiB;
475 # elif defined(__loongarch64)
476 static const size_t gPageSize = 16_KiB;
477 # else
478 static const size_t gPageSize = 4_KiB;
479 # endif
480 static const size_t gRealPageSize = gPageSize;
481
482 #else
483 // When MALLOC_OPTIONS contains one or several `P`s, the page size used
484 // across the allocator is multiplied by 2 for each `P`, but we also keep
485 // the real page size for code paths that need it. gPageSize is thus a
486 // power of two greater or equal to gRealPageSize.
487 static size_t gRealPageSize;
488 static size_t gPageSize;
489 #endif
490
491 #ifdef MALLOC_STATIC_PAGESIZE
492 # define DECLARE_GLOBAL(type, name)
493 # define DEFINE_GLOBALS
494 # define END_GLOBALS
495 # define DEFINE_GLOBAL(type) static const type
496 # define GLOBAL_LOG2 LOG2
497 # define GLOBAL_ASSERT_HELPER1(x) static_assert(x, # x)
498 # define GLOBAL_ASSERT_HELPER2(x, y) static_assert(x, y)
499 # define GLOBAL_ASSERT(...) \
500 MACRO_CALL( \
501 MOZ_PASTE_PREFIX_AND_ARG_COUNT(GLOBAL_ASSERT_HELPER, __VA_ARGS__), \
502 (__VA_ARGS__))
503 # define GLOBAL_CONSTEXPR constexpr
504 #else
505 # define DECLARE_GLOBAL(type, name) static type name;
506 # define DEFINE_GLOBALS static void DefineGlobals() {
507 # define END_GLOBALS }
508 # define DEFINE_GLOBAL(type)
509 # define GLOBAL_LOG2 FloorLog2
510 # define GLOBAL_ASSERT MOZ_RELEASE_ASSERT
511 # define GLOBAL_CONSTEXPR
512 #endif
513
514 DECLARE_GLOBAL(size_t, gMaxSubPageClass)
515 DECLARE_GLOBAL(uint8_t, gNumSubPageClasses)
516 DECLARE_GLOBAL(uint8_t, gPageSize2Pow)
517 DECLARE_GLOBAL(size_t, gPageSizeMask)
518 DECLARE_GLOBAL(size_t, gChunkNumPages)
519 DECLARE_GLOBAL(size_t, gChunkHeaderNumPages)
520 DECLARE_GLOBAL(size_t, gMaxLargeClass)
521
522 DEFINE_GLOBALS
523
524 // Largest sub-page size class, or zero if there are none
525 DEFINE_GLOBAL(size_t)
526 gMaxSubPageClass = gPageSize / 2 >= kMinSubPageClass ? gPageSize / 2 : 0;
527
528 // Max size class for bins.
529 #define gMaxBinClass \
530 (gMaxSubPageClass ? gMaxSubPageClass : kMaxQuantumWideClass)
531
532 // Number of sub-page bins.
533 DEFINE_GLOBAL(uint8_t)
__anon8ba63ed40202() 534 gNumSubPageClasses = []() GLOBAL_CONSTEXPR -> uint8_t {
535 if GLOBAL_CONSTEXPR (gMaxSubPageClass != 0) {
536 return FloorLog2(gMaxSubPageClass) - LOG2(kMinSubPageClass) + 1;
537 }
538 return 0;
539 }();
540
541 DEFINE_GLOBAL(uint8_t) gPageSize2Pow = GLOBAL_LOG2(gPageSize);
542 DEFINE_GLOBAL(size_t) gPageSizeMask = gPageSize - 1;
543
544 // Number of pages in a chunk.
545 DEFINE_GLOBAL(size_t) gChunkNumPages = kChunkSize >> gPageSize2Pow;
546
547 // Number of pages necessary for a chunk header plus a guard page.
548 DEFINE_GLOBAL(size_t)
549 gChunkHeaderNumPages =
550 1 + (((sizeof(arena_chunk_t) +
551 sizeof(arena_chunk_map_t) * (gChunkNumPages - 1) + gPageSizeMask) &
552 ~gPageSizeMask) >>
553 gPageSize2Pow);
554
555 // One chunk, minus the header, minus a guard page
556 DEFINE_GLOBAL(size_t)
557 gMaxLargeClass =
558 kChunkSize - gPageSize - (gChunkHeaderNumPages << gPageSize2Pow);
559
560 // Various sanity checks that regard configuration.
561 GLOBAL_ASSERT(1ULL << gPageSize2Pow == gPageSize,
562 "Page size is not a power of two");
563 GLOBAL_ASSERT(kQuantum >= sizeof(void*));
564 GLOBAL_ASSERT(kQuantum <= kQuantumWide);
565 GLOBAL_ASSERT(!kNumQuantumWideClasses ||
566 kQuantumWide <= (kMinSubPageClass - kMaxQuantumClass));
567
568 GLOBAL_ASSERT(kQuantumWide <= kMaxQuantumClass);
569
570 GLOBAL_ASSERT(gMaxSubPageClass >= kMinSubPageClass || gMaxSubPageClass == 0);
571 GLOBAL_ASSERT(gMaxLargeClass >= gMaxSubPageClass);
572 GLOBAL_ASSERT(kChunkSize >= gPageSize);
573 GLOBAL_ASSERT(kQuantum * 4 <= kChunkSize);
574
575 END_GLOBALS
576
577 // Recycle at most 128 MiB of chunks. This means we retain at most
578 // 6.25% of the process address space on a 32-bit OS for later use.
579 static const size_t gRecycleLimit = 128_MiB;
580
581 // The current amount of recycled bytes, updated atomically.
582 static Atomic<size_t, ReleaseAcquire> gRecycledSize;
583
584 // Maximum number of dirty pages per arena.
585 #define DIRTY_MAX_DEFAULT (1U << 8)
586
587 static size_t opt_dirty_max = DIRTY_MAX_DEFAULT;
588
589 // Return the smallest chunk multiple that is >= s.
590 #define CHUNK_CEILING(s) (((s) + kChunkSizeMask) & ~kChunkSizeMask)
591
592 // Return the smallest cacheline multiple that is >= s.
593 #define CACHELINE_CEILING(s) \
594 (((s) + (kCacheLineSize - 1)) & ~(kCacheLineSize - 1))
595
596 // Return the smallest quantum multiple that is >= a.
597 #define QUANTUM_CEILING(a) (((a) + (kQuantumMask)) & ~(kQuantumMask))
598 #define QUANTUM_WIDE_CEILING(a) \
599 (((a) + (kQuantumWideMask)) & ~(kQuantumWideMask))
600
601 // Return the smallest sub page-size that is >= a.
602 #define SUBPAGE_CEILING(a) (RoundUpPow2(a))
603
604 // Return the smallest pagesize multiple that is >= s.
605 #define PAGE_CEILING(s) (((s) + gPageSizeMask) & ~gPageSizeMask)
606
607 // Number of all the small-allocated classes
608 #define NUM_SMALL_CLASSES \
609 (kNumTinyClasses + kNumQuantumClasses + kNumQuantumWideClasses + \
610 gNumSubPageClasses)
611
612 // ***************************************************************************
613 // MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are mutually exclusive.
614 #if defined(MALLOC_DECOMMIT) && defined(MALLOC_DOUBLE_PURGE)
615 # error MALLOC_DECOMMIT and MALLOC_DOUBLE_PURGE are mutually exclusive.
616 #endif
617
618 static void* base_alloc(size_t aSize);
619
620 // Set to true once the allocator has been initialized.
621 #if defined(_MSC_VER) && !defined(__clang__)
622 // MSVC may create a static initializer for an Atomic<bool>, which may actually
623 // run after `malloc_init` has been called once, which triggers multiple
624 // initializations.
625 // We work around the problem by not using an Atomic<bool> at all. There is a
626 // theoretical problem with using `malloc_initialized` non-atomically, but
627 // practically, this is only true if `malloc_init` is never called before
628 // threads are created.
629 static bool malloc_initialized;
630 #else
631 static Atomic<bool, SequentiallyConsistent> malloc_initialized;
632 #endif
633
634 static StaticMutex gInitLock = {STATIC_MUTEX_INIT};
635
636 // ***************************************************************************
637 // Statistics data structures.
638
639 struct arena_stats_t {
640 // Number of bytes currently mapped.
641 size_t mapped;
642
643 // Current number of committed pages.
644 size_t committed;
645
646 // Per-size-category statistics.
647 size_t allocated_small;
648
649 size_t allocated_large;
650 };
651
652 // ***************************************************************************
653 // Extent data structures.
654
655 enum ChunkType {
656 UNKNOWN_CHUNK,
657 ZEROED_CHUNK, // chunk only contains zeroes.
658 ARENA_CHUNK, // used to back arena runs created by arena_t::AllocRun.
659 HUGE_CHUNK, // used to back huge allocations (e.g. arena_t::MallocHuge).
660 RECYCLED_CHUNK, // chunk has been stored for future use by chunk_recycle.
661 };
662
663 // Tree of extents.
664 struct extent_node_t {
665 union {
666 // Linkage for the size/address-ordered tree for chunk recycling.
667 RedBlackTreeNode<extent_node_t> mLinkBySize;
668 // Arena id for huge allocations. It's meant to match mArena->mId,
669 // which only holds true when the arena hasn't been disposed of.
670 arena_id_t mArenaId;
671 };
672
673 // Linkage for the address-ordered tree.
674 RedBlackTreeNode<extent_node_t> mLinkByAddr;
675
676 // Pointer to the extent that this tree node is responsible for.
677 void* mAddr;
678
679 // Total region size.
680 size_t mSize;
681
682 union {
683 // What type of chunk is there; used for chunk recycling.
684 ChunkType mChunkType;
685
686 // A pointer to the associated arena, for huge allocations.
687 arena_t* mArena;
688 };
689 };
690
691 struct ExtentTreeSzTrait {
GetTreeNodeExtentTreeSzTrait692 static RedBlackTreeNode<extent_node_t>& GetTreeNode(extent_node_t* aThis) {
693 return aThis->mLinkBySize;
694 }
695
CompareExtentTreeSzTrait696 static inline Order Compare(extent_node_t* aNode, extent_node_t* aOther) {
697 Order ret = CompareInt(aNode->mSize, aOther->mSize);
698 return (ret != Order::eEqual) ? ret
699 : CompareAddr(aNode->mAddr, aOther->mAddr);
700 }
701 };
702
703 struct ExtentTreeTrait {
GetTreeNodeExtentTreeTrait704 static RedBlackTreeNode<extent_node_t>& GetTreeNode(extent_node_t* aThis) {
705 return aThis->mLinkByAddr;
706 }
707
CompareExtentTreeTrait708 static inline Order Compare(extent_node_t* aNode, extent_node_t* aOther) {
709 return CompareAddr(aNode->mAddr, aOther->mAddr);
710 }
711 };
712
713 struct ExtentTreeBoundsTrait : public ExtentTreeTrait {
CompareExtentTreeBoundsTrait714 static inline Order Compare(extent_node_t* aKey, extent_node_t* aNode) {
715 uintptr_t key_addr = reinterpret_cast<uintptr_t>(aKey->mAddr);
716 uintptr_t node_addr = reinterpret_cast<uintptr_t>(aNode->mAddr);
717 size_t node_size = aNode->mSize;
718
719 // Is aKey within aNode?
720 if (node_addr <= key_addr && key_addr < node_addr + node_size) {
721 return Order::eEqual;
722 }
723
724 return CompareAddr(aKey->mAddr, aNode->mAddr);
725 }
726 };
727
728 // Describe size classes to which allocations are rounded up to.
729 // TODO: add large and huge types when the arena allocation code
730 // changes in a way that allows it to be beneficial.
731 class SizeClass {
732 public:
733 enum ClassType {
734 Tiny,
735 Quantum,
736 QuantumWide,
737 SubPage,
738 Large,
739 };
740
SizeClass(size_t aSize)741 explicit inline SizeClass(size_t aSize) {
742 if (aSize <= kMaxTinyClass) {
743 mType = Tiny;
744 mSize = std::max(RoundUpPow2(aSize), kMinTinyClass);
745 } else if (aSize <= kMaxQuantumClass) {
746 mType = Quantum;
747 mSize = QUANTUM_CEILING(aSize);
748 } else if (aSize <= kMaxQuantumWideClass) {
749 mType = QuantumWide;
750 mSize = QUANTUM_WIDE_CEILING(aSize);
751 } else if (aSize <= gMaxSubPageClass) {
752 mType = SubPage;
753 mSize = SUBPAGE_CEILING(aSize);
754 } else if (aSize <= gMaxLargeClass) {
755 mType = Large;
756 mSize = PAGE_CEILING(aSize);
757 } else {
758 MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("Invalid size");
759 }
760 }
761
762 SizeClass& operator=(const SizeClass& aOther) = default;
763
operator ==(const SizeClass & aOther)764 bool operator==(const SizeClass& aOther) { return aOther.mSize == mSize; }
765
Size()766 size_t Size() { return mSize; }
767
Type()768 ClassType Type() { return mType; }
769
Next()770 SizeClass Next() { return SizeClass(mSize + 1); }
771
772 private:
773 ClassType mType;
774 size_t mSize;
775 };
776
777 // ***************************************************************************
778 // Radix tree data structures.
779 //
780 // The number of bits passed to the template is the number of significant bits
781 // in an address to do a radix lookup with.
782 //
783 // An address is looked up by splitting it in kBitsPerLevel bit chunks, except
784 // the most significant bits, where the bit chunk is kBitsAtLevel1 which can be
785 // different if Bits is not a multiple of kBitsPerLevel.
786 //
787 // With e.g. sizeof(void*)=4, Bits=16 and kBitsPerLevel=8, an address is split
788 // like the following:
789 // 0x12345678 -> mRoot[0x12][0x34]
790 template <size_t Bits>
791 class AddressRadixTree {
792 // Size of each radix tree node (as a power of 2).
793 // This impacts tree depth.
794 #ifdef HAVE_64BIT_BUILD
795 static const size_t kNodeSize = kCacheLineSize;
796 #else
797 static const size_t kNodeSize = 16_KiB;
798 #endif
799 static const size_t kBitsPerLevel = LOG2(kNodeSize) - LOG2(sizeof(void*));
800 static const size_t kBitsAtLevel1 =
801 (Bits % kBitsPerLevel) ? Bits % kBitsPerLevel : kBitsPerLevel;
802 static const size_t kHeight = (Bits + kBitsPerLevel - 1) / kBitsPerLevel;
803 static_assert(kBitsAtLevel1 + (kHeight - 1) * kBitsPerLevel == Bits,
804 "AddressRadixTree parameters don't work out");
805
806 Mutex mLock;
807 void** mRoot;
808
809 public:
810 bool Init();
811
812 inline void* Get(void* aAddr);
813
814 // Returns whether the value was properly set.
815 inline bool Set(void* aAddr, void* aValue);
816
Unset(void * aAddr)817 inline bool Unset(void* aAddr) { return Set(aAddr, nullptr); }
818
819 private:
820 inline void** GetSlot(void* aAddr, bool aCreate = false);
821 };
822
823 // ***************************************************************************
824 // Arena data structures.
825
826 struct arena_bin_t;
827
828 struct ArenaChunkMapLink {
GetTreeNodeArenaChunkMapLink829 static RedBlackTreeNode<arena_chunk_map_t>& GetTreeNode(
830 arena_chunk_map_t* aThis) {
831 return aThis->link;
832 }
833 };
834
835 struct ArenaRunTreeTrait : public ArenaChunkMapLink {
CompareArenaRunTreeTrait836 static inline Order Compare(arena_chunk_map_t* aNode,
837 arena_chunk_map_t* aOther) {
838 MOZ_ASSERT(aNode);
839 MOZ_ASSERT(aOther);
840 return CompareAddr(aNode, aOther);
841 }
842 };
843
844 struct ArenaAvailTreeTrait : public ArenaChunkMapLink {
CompareArenaAvailTreeTrait845 static inline Order Compare(arena_chunk_map_t* aNode,
846 arena_chunk_map_t* aOther) {
847 size_t size1 = aNode->bits & ~gPageSizeMask;
848 size_t size2 = aOther->bits & ~gPageSizeMask;
849 Order ret = CompareInt(size1, size2);
850 return (ret != Order::eEqual)
851 ? ret
852 : CompareAddr((aNode->bits & CHUNK_MAP_KEY) ? nullptr : aNode,
853 aOther);
854 }
855 };
856
857 struct ArenaDirtyChunkTrait {
GetTreeNodeArenaDirtyChunkTrait858 static RedBlackTreeNode<arena_chunk_t>& GetTreeNode(arena_chunk_t* aThis) {
859 return aThis->link_dirty;
860 }
861
CompareArenaDirtyChunkTrait862 static inline Order Compare(arena_chunk_t* aNode, arena_chunk_t* aOther) {
863 MOZ_ASSERT(aNode);
864 MOZ_ASSERT(aOther);
865 return CompareAddr(aNode, aOther);
866 }
867 };
868
869 #ifdef MALLOC_DOUBLE_PURGE
870 namespace mozilla {
871
872 template <>
873 struct GetDoublyLinkedListElement<arena_chunk_t> {
Getmozilla::GetDoublyLinkedListElement874 static DoublyLinkedListElement<arena_chunk_t>& Get(arena_chunk_t* aThis) {
875 return aThis->chunks_madvised_elem;
876 }
877 };
878 } // namespace mozilla
879 #endif
880
881 struct arena_run_t {
882 #if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
883 uint32_t mMagic;
884 # define ARENA_RUN_MAGIC 0x384adf93
885
886 // On 64-bit platforms, having the arena_bin_t pointer following
887 // the mMagic field means there's padding between both fields, making
888 // the run header larger than necessary.
889 // But when MOZ_DIAGNOSTIC_ASSERT_ENABLED is not set, starting the
890 // header with this field followed by the arena_bin_t pointer yields
891 // the same padding. We do want the mMagic field to appear first, so
892 // depending whether MOZ_DIAGNOSTIC_ASSERT_ENABLED is set or not, we
893 // move some field to avoid padding.
894
895 // Number of free regions in run.
896 unsigned mNumFree;
897 #endif
898
899 // Bin this run is associated with.
900 arena_bin_t* mBin;
901
902 // Index of first element that might have a free region.
903 unsigned mRegionsMinElement;
904
905 #if !defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
906 // Number of free regions in run.
907 unsigned mNumFree;
908 #endif
909
910 // Bitmask of in-use regions (0: in use, 1: free).
911 unsigned mRegionsMask[1]; // Dynamically sized.
912 };
913
914 struct arena_bin_t {
915 // Current run being used to service allocations of this bin's size
916 // class.
917 arena_run_t* mCurrentRun;
918
919 // Tree of non-full runs. This tree is used when looking for an
920 // existing run when mCurrentRun is no longer usable. We choose the
921 // non-full run that is lowest in memory; this policy tends to keep
922 // objects packed well, and it can also help reduce the number of
923 // almost-empty chunks.
924 RedBlackTree<arena_chunk_map_t, ArenaRunTreeTrait> mNonFullRuns;
925
926 // Bin's size class.
927 size_t mSizeClass;
928
929 // Total size of a run for this bin's size class.
930 size_t mRunSize;
931
932 // Total number of regions in a run for this bin's size class.
933 uint32_t mRunNumRegions;
934
935 // Number of elements in a run's mRegionsMask for this bin's size class.
936 uint32_t mRunNumRegionsMask;
937
938 // Offset of first region in a run for this bin's size class.
939 uint32_t mRunFirstRegionOffset;
940
941 // Current number of runs in this bin, full or otherwise.
942 unsigned long mNumRuns;
943
944 // Amount of overhead runs are allowed to have.
945 static constexpr double kRunOverhead = 1.6_percent;
946 static constexpr double kRunRelaxedOverhead = 2.4_percent;
947
948 // Initialize a bin for the given size class.
949 // The generated run sizes, for a page size of 4 KiB, are:
950 // size|run size|run size|run size|run
951 // class|size class|size class|size class|size
952 // 4 4 KiB 8 4 KiB 16 4 KiB 32 4 KiB
953 // 48 4 KiB 64 4 KiB 80 4 KiB 96 4 KiB
954 // 112 4 KiB 128 8 KiB 144 4 KiB 160 8 KiB
955 // 176 4 KiB 192 4 KiB 208 8 KiB 224 4 KiB
956 // 240 8 KiB 256 16 KiB 272 8 KiB 288 4 KiB
957 // 304 12 KiB 320 12 KiB 336 4 KiB 352 8 KiB
958 // 368 4 KiB 384 8 KiB 400 20 KiB 416 16 KiB
959 // 432 12 KiB 448 4 KiB 464 16 KiB 480 8 KiB
960 // 496 20 KiB 512 32 KiB 768 16 KiB 1024 64 KiB
961 // 1280 24 KiB 1536 32 KiB 1792 16 KiB 2048 128 KiB
962 // 2304 16 KiB 2560 48 KiB 2816 36 KiB 3072 64 KiB
963 // 3328 36 KiB 3584 32 KiB 3840 64 KiB
964 inline void Init(SizeClass aSizeClass);
965 };
966
967 struct arena_t {
968 #if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
969 uint32_t mMagic;
970 # define ARENA_MAGIC 0x947d3d24
971 #endif
972
973 // Linkage for the tree of arenas by id.
974 RedBlackTreeNode<arena_t> mLink;
975
976 // Arena id, that we keep away from the beginning of the struct so that
977 // free list pointers in TypedBaseAlloc<arena_t> don't overflow in it,
978 // and it keeps the value it had after the destructor.
979 arena_id_t mId;
980
981 // All operations on this arena require that lock be locked.
982 Mutex mLock;
983
984 arena_stats_t mStats;
985
986 private:
987 // Tree of dirty-page-containing chunks this arena manages.
988 RedBlackTree<arena_chunk_t, ArenaDirtyChunkTrait> mChunksDirty;
989
990 #ifdef MALLOC_DOUBLE_PURGE
991 // Head of a linked list of MADV_FREE'd-page-containing chunks this
992 // arena manages.
993 DoublyLinkedList<arena_chunk_t> mChunksMAdvised;
994 #endif
995
996 // In order to avoid rapid chunk allocation/deallocation when an arena
997 // oscillates right on the cusp of needing a new chunk, cache the most
998 // recently freed chunk. The spare is left in the arena's chunk trees
999 // until it is deleted.
1000 //
1001 // There is one spare chunk per arena, rather than one spare total, in
1002 // order to avoid interactions between multiple threads that could make
1003 // a single spare inadequate.
1004 arena_chunk_t* mSpare;
1005
1006 // A per-arena opt-in to randomize the offset of small allocations
1007 bool mRandomizeSmallAllocations;
1008
1009 // Whether this is a private arena. Multiple public arenas are just a
1010 // performance optimization and not a safety feature.
1011 //
1012 // Since, for example, we don't want thread-local arenas to grow too much, we
1013 // use the default arena for bigger allocations. We use this member to allow
1014 // realloc() to switch out of our arena if needed (which is not allowed for
1015 // private arenas for security).
1016 bool mIsPrivate;
1017
1018 // A pseudorandom number generator. Initially null, it gets initialized
1019 // on first use to avoid recursive malloc initialization (e.g. on OSX
1020 // arc4random allocates memory).
1021 mozilla::non_crypto::XorShift128PlusRNG* mPRNG;
1022
1023 public:
1024 // Current count of pages within unused runs that are potentially
1025 // dirty, and for which madvise(... MADV_FREE) has not been called. By
1026 // tracking this, we can institute a limit on how much dirty unused
1027 // memory is mapped for each arena.
1028 size_t mNumDirty;
1029
1030 // Maximum value allowed for mNumDirty.
1031 size_t mMaxDirty;
1032
1033 private:
1034 // Size/address-ordered tree of this arena's available runs. This tree
1035 // is used for first-best-fit run allocation.
1036 RedBlackTree<arena_chunk_map_t, ArenaAvailTreeTrait> mRunsAvail;
1037
1038 public:
1039 // mBins is used to store rings of free regions of the following sizes,
1040 // assuming a 16-byte quantum, 4kB pagesize, and default MALLOC_OPTIONS.
1041 //
1042 // mBins[i] | size |
1043 // --------+------+
1044 // 0 | 2 |
1045 // 1 | 4 |
1046 // 2 | 8 |
1047 // --------+------+
1048 // 3 | 16 |
1049 // 4 | 32 |
1050 // 5 | 48 |
1051 // 6 | 64 |
1052 // : :
1053 // : :
1054 // 33 | 496 |
1055 // 34 | 512 |
1056 // --------+------+
1057 // 35 | 768 |
1058 // 36 | 1024 |
1059 // : :
1060 // : :
1061 // 46 | 3584 |
1062 // 47 | 3840 |
1063 // --------+------+
1064 arena_bin_t mBins[1]; // Dynamically sized.
1065
1066 explicit arena_t(arena_params_t* aParams, bool aIsPrivate);
1067 ~arena_t();
1068
1069 private:
1070 void InitChunk(arena_chunk_t* aChunk, bool aZeroed);
1071
1072 void DeallocChunk(arena_chunk_t* aChunk);
1073
1074 arena_run_t* AllocRun(size_t aSize, bool aLarge, bool aZero);
1075
1076 void DallocRun(arena_run_t* aRun, bool aDirty);
1077
1078 [[nodiscard]] bool SplitRun(arena_run_t* aRun, size_t aSize, bool aLarge,
1079 bool aZero);
1080
1081 void TrimRunHead(arena_chunk_t* aChunk, arena_run_t* aRun, size_t aOldSize,
1082 size_t aNewSize);
1083
1084 void TrimRunTail(arena_chunk_t* aChunk, arena_run_t* aRun, size_t aOldSize,
1085 size_t aNewSize, bool dirty);
1086
1087 arena_run_t* GetNonFullBinRun(arena_bin_t* aBin);
1088
1089 inline uint8_t FindFreeBitInMask(uint32_t aMask, uint32_t& aRng);
1090
1091 inline void* ArenaRunRegAlloc(arena_run_t* aRun, arena_bin_t* aBin);
1092
1093 inline void* MallocSmall(size_t aSize, bool aZero);
1094
1095 void* MallocLarge(size_t aSize, bool aZero);
1096
1097 void* MallocHuge(size_t aSize, bool aZero);
1098
1099 void* PallocLarge(size_t aAlignment, size_t aSize, size_t aAllocSize);
1100
1101 void* PallocHuge(size_t aSize, size_t aAlignment, bool aZero);
1102
1103 void RallocShrinkLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
1104 size_t aOldSize);
1105
1106 bool RallocGrowLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
1107 size_t aOldSize);
1108
1109 void* RallocSmallOrLarge(void* aPtr, size_t aSize, size_t aOldSize);
1110
1111 void* RallocHuge(void* aPtr, size_t aSize, size_t aOldSize);
1112
1113 public:
1114 inline void* Malloc(size_t aSize, bool aZero);
1115
1116 void* Palloc(size_t aAlignment, size_t aSize);
1117
1118 inline void DallocSmall(arena_chunk_t* aChunk, void* aPtr,
1119 arena_chunk_map_t* aMapElm);
1120
1121 void DallocLarge(arena_chunk_t* aChunk, void* aPtr);
1122
1123 void* Ralloc(void* aPtr, size_t aSize, size_t aOldSize);
1124
1125 void Purge(bool aAll);
1126
1127 void HardPurge();
1128
1129 void* operator new(size_t aCount) = delete;
1130
1131 void* operator new(size_t aCount, const fallible_t&) noexcept;
1132
1133 void operator delete(void*);
1134 };
1135
1136 struct ArenaTreeTrait {
GetTreeNodeArenaTreeTrait1137 static RedBlackTreeNode<arena_t>& GetTreeNode(arena_t* aThis) {
1138 return aThis->mLink;
1139 }
1140
CompareArenaTreeTrait1141 static inline Order Compare(arena_t* aNode, arena_t* aOther) {
1142 MOZ_ASSERT(aNode);
1143 MOZ_ASSERT(aOther);
1144 return CompareInt(aNode->mId, aOther->mId);
1145 }
1146 };
1147
1148 // Bookkeeping for all the arenas used by the allocator.
1149 // Arenas are separated in two categories:
1150 // - "private" arenas, used through the moz_arena_* API
1151 // - all the other arenas: the default arena, and thread-local arenas,
1152 // used by the standard API.
1153 class ArenaCollection {
1154 public:
Init()1155 bool Init() {
1156 mArenas.Init();
1157 mPrivateArenas.Init();
1158 arena_params_t params;
1159 // The main arena allows more dirty pages than the default for other arenas.
1160 params.mMaxDirty = opt_dirty_max;
1161 mDefaultArena =
1162 mLock.Init() ? CreateArena(/* IsPrivate = */ false, ¶ms) : nullptr;
1163 return bool(mDefaultArena);
1164 }
1165
1166 inline arena_t* GetById(arena_id_t aArenaId, bool aIsPrivate);
1167
1168 arena_t* CreateArena(bool aIsPrivate, arena_params_t* aParams);
1169
DisposeArena(arena_t * aArena)1170 void DisposeArena(arena_t* aArena) {
1171 MutexAutoLock lock(mLock);
1172 MOZ_RELEASE_ASSERT(mPrivateArenas.Search(aArena),
1173 "Can only dispose of private arenas");
1174 mPrivateArenas.Remove(aArena);
1175 delete aArena;
1176 }
1177
1178 using Tree = RedBlackTree<arena_t, ArenaTreeTrait>;
1179
1180 struct Iterator : Tree::Iterator {
IteratorArenaCollection::Iterator1181 explicit Iterator(Tree* aTree, Tree* aSecondTree)
1182 : Tree::Iterator(aTree), mNextTree(aSecondTree) {}
1183
beginArenaCollection::Iterator1184 Item<Iterator> begin() {
1185 return Item<Iterator>(this, *Tree::Iterator::begin());
1186 }
1187
endArenaCollection::Iterator1188 Item<Iterator> end() { return Item<Iterator>(this, nullptr); }
1189
NextArenaCollection::Iterator1190 arena_t* Next() {
1191 arena_t* result = Tree::Iterator::Next();
1192 if (!result && mNextTree) {
1193 new (this) Iterator(mNextTree, nullptr);
1194 result = *Tree::Iterator::begin();
1195 }
1196 return result;
1197 }
1198
1199 private:
1200 Tree* mNextTree;
1201 };
1202
iter()1203 Iterator iter() { return Iterator(&mArenas, &mPrivateArenas); }
1204
GetDefault()1205 inline arena_t* GetDefault() { return mDefaultArena; }
1206
1207 Mutex mLock;
1208
1209 private:
1210 inline arena_t* GetByIdInternal(arena_id_t aArenaId, bool aIsPrivate);
1211
1212 arena_t* mDefaultArena;
1213 arena_id_t mLastPublicArenaId;
1214 Tree mArenas;
1215 Tree mPrivateArenas;
1216 };
1217
1218 static ArenaCollection gArenas;
1219
1220 // ******
1221 // Chunks.
1222 static AddressRadixTree<(sizeof(void*) << 3) - LOG2(kChunkSize)> gChunkRTree;
1223
1224 // Protects chunk-related data structures.
1225 static Mutex chunks_mtx;
1226
1227 // Trees of chunks that were previously allocated (trees differ only in node
1228 // ordering). These are used when allocating chunks, in an attempt to re-use
1229 // address space. Depending on function, different tree orderings are needed,
1230 // which is why there are two trees with the same contents.
1231 static RedBlackTree<extent_node_t, ExtentTreeSzTrait> gChunksBySize;
1232 static RedBlackTree<extent_node_t, ExtentTreeTrait> gChunksByAddress;
1233
1234 // Protects huge allocation-related data structures.
1235 static Mutex huge_mtx;
1236
1237 // Tree of chunks that are stand-alone huge allocations.
1238 static RedBlackTree<extent_node_t, ExtentTreeTrait> huge;
1239
1240 // Huge allocation statistics.
1241 static size_t huge_allocated;
1242 static size_t huge_mapped;
1243
1244 // **************************
1245 // base (internal allocation).
1246
1247 // Current pages that are being used for internal memory allocations. These
1248 // pages are carved up in cacheline-size quanta, so that there is no chance of
1249 // false cache line sharing.
1250
1251 static void* base_pages;
1252 static void* base_next_addr;
1253 static void* base_next_decommitted;
1254 static void* base_past_addr; // Addr immediately past base_pages.
1255 static Mutex base_mtx;
1256 static size_t base_mapped;
1257 static size_t base_committed;
1258
1259 // ******
1260 // Arenas.
1261
1262 // The arena associated with the current thread (per
1263 // jemalloc_thread_local_arena) On OSX, __thread/thread_local circles back
1264 // calling malloc to allocate storage on first access on each thread, which
1265 // leads to an infinite loop, but pthread-based TLS somehow doesn't have this
1266 // problem.
1267 #if !defined(XP_DARWIN)
1268 static MOZ_THREAD_LOCAL(arena_t*) thread_arena;
1269 #else
1270 static detail::ThreadLocal<arena_t*, detail::ThreadLocalKeyStorage>
1271 thread_arena;
1272 #endif
1273
1274 // *****************************
1275 // Runtime configuration options.
1276
1277 const uint8_t kAllocJunk = 0xe4;
1278 const uint8_t kAllocPoison = 0xe5;
1279
1280 #ifdef MOZ_DEBUG
1281 static bool opt_junk = true;
1282 static bool opt_zero = false;
1283 #else
1284 static const bool opt_junk = false;
1285 static const bool opt_zero = false;
1286 #endif
1287 static bool opt_randomize_small = true;
1288
1289 // ***************************************************************************
1290 // Begin forward declarations.
1291
1292 static void* chunk_alloc(size_t aSize, size_t aAlignment, bool aBase,
1293 bool* aZeroed = nullptr);
1294 static void chunk_dealloc(void* aChunk, size_t aSize, ChunkType aType);
1295 static void chunk_ensure_zero(void* aPtr, size_t aSize, bool aZeroed);
1296 static void huge_dalloc(void* aPtr, arena_t* aArena);
1297 static bool malloc_init_hard();
1298
1299 #ifdef XP_DARWIN
1300 # define FORK_HOOK extern "C"
1301 #else
1302 # define FORK_HOOK static
1303 #endif
1304 FORK_HOOK void _malloc_prefork(void);
1305 FORK_HOOK void _malloc_postfork_parent(void);
1306 FORK_HOOK void _malloc_postfork_child(void);
1307
1308 // End forward declarations.
1309 // ***************************************************************************
1310
1311 // FreeBSD's pthreads implementation calls malloc(3), so the malloc
1312 // implementation has to take pains to avoid infinite recursion during
1313 // initialization.
1314 // Returns whether the allocator was successfully initialized.
malloc_init()1315 static inline bool malloc_init() {
1316 if (malloc_initialized == false) {
1317 return malloc_init_hard();
1318 }
1319
1320 return true;
1321 }
1322
_malloc_message(const char * p)1323 static void _malloc_message(const char* p) {
1324 #if !defined(XP_WIN)
1325 # define _write write
1326 #endif
1327 // Pretend to check _write() errors to suppress gcc warnings about
1328 // warn_unused_result annotations in some versions of glibc headers.
1329 if (_write(STDERR_FILENO, p, (unsigned int)strlen(p)) < 0) {
1330 return;
1331 }
1332 }
1333
1334 template <typename... Args>
_malloc_message(const char * p,Args...args)1335 static void _malloc_message(const char* p, Args... args) {
1336 _malloc_message(p);
1337 _malloc_message(args...);
1338 }
1339
1340 #ifdef ANDROID
1341 // Android's pthread.h does not declare pthread_atfork() until SDK 21.
1342 extern "C" MOZ_EXPORT int pthread_atfork(void (*)(void), void (*)(void),
1343 void (*)(void));
1344 #endif
1345
1346 // ***************************************************************************
1347 // Begin Utility functions/macros.
1348
1349 // Return the chunk address for allocation address a.
GetChunkForPtr(const void * aPtr)1350 static inline arena_chunk_t* GetChunkForPtr(const void* aPtr) {
1351 return (arena_chunk_t*)(uintptr_t(aPtr) & ~kChunkSizeMask);
1352 }
1353
1354 // Return the chunk offset of address a.
GetChunkOffsetForPtr(const void * aPtr)1355 static inline size_t GetChunkOffsetForPtr(const void* aPtr) {
1356 return (size_t)(uintptr_t(aPtr) & kChunkSizeMask);
1357 }
1358
_getprogname(void)1359 static inline const char* _getprogname(void) { return "<jemalloc>"; }
1360
1361 // Fill the given range of memory with zeroes or junk depending on opt_junk and
1362 // opt_zero. Callers can force filling with zeroes through the aForceZero
1363 // argument.
ApplyZeroOrJunk(void * aPtr,size_t aSize)1364 static inline void ApplyZeroOrJunk(void* aPtr, size_t aSize) {
1365 if (opt_junk) {
1366 memset(aPtr, kAllocJunk, aSize);
1367 } else if (opt_zero) {
1368 memset(aPtr, 0, aSize);
1369 }
1370 }
1371
1372 // ***************************************************************************
1373
pages_decommit(void * aAddr,size_t aSize)1374 static inline void pages_decommit(void* aAddr, size_t aSize) {
1375 #ifdef XP_WIN
1376 // The region starting at addr may have been allocated in multiple calls
1377 // to VirtualAlloc and recycled, so decommitting the entire region in one
1378 // go may not be valid. However, since we allocate at least a chunk at a
1379 // time, we may touch any region in chunksized increments.
1380 size_t pages_size = std::min(aSize, kChunkSize - GetChunkOffsetForPtr(aAddr));
1381 while (aSize > 0) {
1382 // This will cause Access Violation on read and write and thus act as a
1383 // guard page or region as well.
1384 if (!VirtualFree(aAddr, pages_size, MEM_DECOMMIT)) {
1385 MOZ_CRASH();
1386 }
1387 aAddr = (void*)((uintptr_t)aAddr + pages_size);
1388 aSize -= pages_size;
1389 pages_size = std::min(aSize, kChunkSize);
1390 }
1391 #else
1392 if (mmap(aAddr, aSize, PROT_NONE, MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1,
1393 0) == MAP_FAILED) {
1394 // We'd like to report the OOM for our tooling, but we can't allocate
1395 // memory at this point, so avoid the use of printf.
1396 const char out_of_mappings[] =
1397 "[unhandlable oom] Failed to mmap, likely no more mappings "
1398 "available " __FILE__ " : " MOZ_STRINGIFY(__LINE__);
1399 if (errno == ENOMEM) {
1400 # ifndef ANDROID
1401 fputs(out_of_mappings, stderr);
1402 fflush(stderr);
1403 # endif
1404 MOZ_CRASH_ANNOTATE(out_of_mappings);
1405 }
1406 MOZ_REALLY_CRASH(__LINE__);
1407 }
1408 MozTagAnonymousMemory(aAddr, aSize, "jemalloc-decommitted");
1409 #endif
1410 }
1411
1412 // Commit pages. Returns whether pages were committed.
pages_commit(void * aAddr,size_t aSize)1413 [[nodiscard]] static inline bool pages_commit(void* aAddr, size_t aSize) {
1414 #ifdef XP_WIN
1415 // The region starting at addr may have been allocated in multiple calls
1416 // to VirtualAlloc and recycled, so committing the entire region in one
1417 // go may not be valid. However, since we allocate at least a chunk at a
1418 // time, we may touch any region in chunksized increments.
1419 size_t pages_size = std::min(aSize, kChunkSize - GetChunkOffsetForPtr(aAddr));
1420 while (aSize > 0) {
1421 if (!VirtualAlloc(aAddr, pages_size, MEM_COMMIT, PAGE_READWRITE)) {
1422 return false;
1423 }
1424 aAddr = (void*)((uintptr_t)aAddr + pages_size);
1425 aSize -= pages_size;
1426 pages_size = std::min(aSize, kChunkSize);
1427 }
1428 #else
1429 if (mmap(aAddr, aSize, PROT_READ | PROT_WRITE,
1430 MAP_FIXED | MAP_PRIVATE | MAP_ANON, -1, 0) == MAP_FAILED) {
1431 return false;
1432 }
1433 MozTagAnonymousMemory(aAddr, aSize, "jemalloc");
1434 #endif
1435 return true;
1436 }
1437
base_pages_alloc(size_t minsize)1438 static bool base_pages_alloc(size_t minsize) {
1439 size_t csize;
1440 size_t pminsize;
1441
1442 MOZ_ASSERT(minsize != 0);
1443 csize = CHUNK_CEILING(minsize);
1444 base_pages = chunk_alloc(csize, kChunkSize, true);
1445 if (!base_pages) {
1446 return true;
1447 }
1448 base_next_addr = base_pages;
1449 base_past_addr = (void*)((uintptr_t)base_pages + csize);
1450 // Leave enough pages for minsize committed, since otherwise they would
1451 // have to be immediately recommitted.
1452 pminsize = PAGE_CEILING(minsize);
1453 base_next_decommitted = (void*)((uintptr_t)base_pages + pminsize);
1454 if (pminsize < csize) {
1455 pages_decommit(base_next_decommitted, csize - pminsize);
1456 }
1457 base_mapped += csize;
1458 base_committed += pminsize;
1459
1460 return false;
1461 }
1462
base_alloc(size_t aSize)1463 static void* base_alloc(size_t aSize) {
1464 void* ret;
1465 size_t csize;
1466
1467 // Round size up to nearest multiple of the cacheline size.
1468 csize = CACHELINE_CEILING(aSize);
1469
1470 MutexAutoLock lock(base_mtx);
1471 // Make sure there's enough space for the allocation.
1472 if ((uintptr_t)base_next_addr + csize > (uintptr_t)base_past_addr) {
1473 if (base_pages_alloc(csize)) {
1474 return nullptr;
1475 }
1476 }
1477 // Allocate.
1478 ret = base_next_addr;
1479 base_next_addr = (void*)((uintptr_t)base_next_addr + csize);
1480 // Make sure enough pages are committed for the new allocation.
1481 if ((uintptr_t)base_next_addr > (uintptr_t)base_next_decommitted) {
1482 void* pbase_next_addr = (void*)(PAGE_CEILING((uintptr_t)base_next_addr));
1483
1484 if (!pages_commit(
1485 base_next_decommitted,
1486 (uintptr_t)pbase_next_addr - (uintptr_t)base_next_decommitted)) {
1487 return nullptr;
1488 }
1489
1490 base_committed +=
1491 (uintptr_t)pbase_next_addr - (uintptr_t)base_next_decommitted;
1492 base_next_decommitted = pbase_next_addr;
1493 }
1494
1495 return ret;
1496 }
1497
base_calloc(size_t aNumber,size_t aSize)1498 static void* base_calloc(size_t aNumber, size_t aSize) {
1499 void* ret = base_alloc(aNumber * aSize);
1500 if (ret) {
1501 memset(ret, 0, aNumber * aSize);
1502 }
1503 return ret;
1504 }
1505
1506 // A specialization of the base allocator with a free list.
1507 template <typename T>
1508 struct TypedBaseAlloc {
1509 static T* sFirstFree;
1510
size_ofTypedBaseAlloc1511 static size_t size_of() { return sizeof(T); }
1512
allocTypedBaseAlloc1513 static T* alloc() {
1514 T* ret;
1515
1516 base_mtx.Lock();
1517 if (sFirstFree) {
1518 ret = sFirstFree;
1519 sFirstFree = *(T**)ret;
1520 base_mtx.Unlock();
1521 } else {
1522 base_mtx.Unlock();
1523 ret = (T*)base_alloc(size_of());
1524 }
1525
1526 return ret;
1527 }
1528
deallocTypedBaseAlloc1529 static void dealloc(T* aNode) {
1530 MutexAutoLock lock(base_mtx);
1531 *(T**)aNode = sFirstFree;
1532 sFirstFree = aNode;
1533 }
1534 };
1535
1536 using ExtentAlloc = TypedBaseAlloc<extent_node_t>;
1537
1538 template <>
1539 extent_node_t* ExtentAlloc::sFirstFree = nullptr;
1540
1541 template <>
1542 arena_t* TypedBaseAlloc<arena_t>::sFirstFree = nullptr;
1543
1544 template <>
size_of()1545 size_t TypedBaseAlloc<arena_t>::size_of() {
1546 // Allocate enough space for trailing bins.
1547 return sizeof(arena_t) + (sizeof(arena_bin_t) * (NUM_SMALL_CLASSES - 1));
1548 }
1549
1550 template <typename T>
1551 struct BaseAllocFreePolicy {
operator ()BaseAllocFreePolicy1552 void operator()(T* aPtr) { TypedBaseAlloc<T>::dealloc(aPtr); }
1553 };
1554
1555 using UniqueBaseNode =
1556 UniquePtr<extent_node_t, BaseAllocFreePolicy<extent_node_t>>;
1557
1558 // End Utility functions/macros.
1559 // ***************************************************************************
1560 // Begin chunk management functions.
1561
1562 #ifdef XP_WIN
1563
pages_map(void * aAddr,size_t aSize)1564 static void* pages_map(void* aAddr, size_t aSize) {
1565 void* ret = nullptr;
1566 ret = VirtualAlloc(aAddr, aSize, MEM_COMMIT | MEM_RESERVE, PAGE_READWRITE);
1567 return ret;
1568 }
1569
pages_unmap(void * aAddr,size_t aSize)1570 static void pages_unmap(void* aAddr, size_t aSize) {
1571 if (VirtualFree(aAddr, 0, MEM_RELEASE) == 0) {
1572 _malloc_message(_getprogname(), ": (malloc) Error in VirtualFree()\n");
1573 }
1574 }
1575 #else
1576
pages_unmap(void * aAddr,size_t aSize)1577 static void pages_unmap(void* aAddr, size_t aSize) {
1578 if (munmap(aAddr, aSize) == -1) {
1579 char buf[64];
1580
1581 if (strerror_r(errno, buf, sizeof(buf)) == 0) {
1582 _malloc_message(_getprogname(), ": (malloc) Error in munmap(): ", buf,
1583 "\n");
1584 }
1585 }
1586 }
1587
pages_map(void * aAddr,size_t aSize)1588 static void* pages_map(void* aAddr, size_t aSize) {
1589 void* ret;
1590 # if defined(__ia64__) || \
1591 (defined(__sparc__) && defined(__arch64__) && defined(__linux__))
1592 // The JS engine assumes that all allocated pointers have their high 17 bits
1593 // clear, which ia64's mmap doesn't support directly. However, we can emulate
1594 // it by passing mmap an "addr" parameter with those bits clear. The mmap will
1595 // return that address, or the nearest available memory above that address,
1596 // providing a near-guarantee that those bits are clear. If they are not, we
1597 // return nullptr below to indicate out-of-memory.
1598 //
1599 // The addr is chosen as 0x0000070000000000, which still allows about 120TB of
1600 // virtual address space.
1601 //
1602 // See Bug 589735 for more information.
1603 bool check_placement = true;
1604 if (!aAddr) {
1605 aAddr = (void*)0x0000070000000000;
1606 check_placement = false;
1607 }
1608 # endif
1609
1610 # if defined(__sparc__) && defined(__arch64__) && defined(__linux__)
1611 const uintptr_t start = 0x0000070000000000ULL;
1612 const uintptr_t end = 0x0000800000000000ULL;
1613
1614 // Copied from js/src/gc/Memory.cpp and adapted for this source
1615 uintptr_t hint;
1616 void* region = MAP_FAILED;
1617 for (hint = start; region == MAP_FAILED && hint + aSize <= end;
1618 hint += kChunkSize) {
1619 region = mmap((void*)hint, aSize, PROT_READ | PROT_WRITE,
1620 MAP_PRIVATE | MAP_ANON, -1, 0);
1621 if (region != MAP_FAILED) {
1622 if (((size_t)region + (aSize - 1)) & 0xffff800000000000) {
1623 if (munmap(region, aSize)) {
1624 MOZ_ASSERT(errno == ENOMEM);
1625 }
1626 region = MAP_FAILED;
1627 }
1628 }
1629 }
1630 ret = region;
1631 # else
1632 // We don't use MAP_FIXED here, because it can cause the *replacement*
1633 // of existing mappings, and we only want to create new mappings.
1634 ret =
1635 mmap(aAddr, aSize, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
1636 MOZ_ASSERT(ret);
1637 # endif
1638 if (ret == MAP_FAILED) {
1639 ret = nullptr;
1640 }
1641 # if defined(__ia64__) || \
1642 (defined(__sparc__) && defined(__arch64__) && defined(__linux__))
1643 // If the allocated memory doesn't have its upper 17 bits clear, consider it
1644 // as out of memory.
1645 else if ((long long)ret & 0xffff800000000000) {
1646 munmap(ret, aSize);
1647 ret = nullptr;
1648 }
1649 // If the caller requested a specific memory location, verify that's what mmap
1650 // returned.
1651 else if (check_placement && ret != aAddr) {
1652 # else
1653 else if (aAddr && ret != aAddr) {
1654 # endif
1655 // We succeeded in mapping memory, but not in the right place.
1656 pages_unmap(ret, aSize);
1657 ret = nullptr;
1658 }
1659 if (ret) {
1660 MozTagAnonymousMemory(ret, aSize, "jemalloc");
1661 }
1662
1663 # if defined(__ia64__) || \
1664 (defined(__sparc__) && defined(__arch64__) && defined(__linux__))
1665 MOZ_ASSERT(!ret || (!check_placement && ret) ||
1666 (check_placement && ret == aAddr));
1667 # else
1668 MOZ_ASSERT(!ret || (!aAddr && ret != aAddr) || (aAddr && ret == aAddr));
1669 # endif
1670 return ret;
1671 }
1672 #endif
1673
1674 #ifdef XP_DARWIN
1675 # define VM_COPY_MIN kChunkSize
1676 static inline void pages_copy(void* dest, const void* src, size_t n) {
1677 MOZ_ASSERT((void*)((uintptr_t)dest & ~gPageSizeMask) == dest);
1678 MOZ_ASSERT(n >= VM_COPY_MIN);
1679 MOZ_ASSERT((void*)((uintptr_t)src & ~gPageSizeMask) == src);
1680
1681 kern_return_t r = vm_copy(mach_task_self(), (vm_address_t)src, (vm_size_t)n,
1682 (vm_address_t)dest);
1683 if (r != KERN_SUCCESS) {
1684 MOZ_CRASH("vm_copy() failed");
1685 }
1686 }
1687
1688 #endif
1689
1690 template <size_t Bits>
1691 bool AddressRadixTree<Bits>::Init() {
1692 mLock.Init();
1693 mRoot = (void**)base_calloc(1 << kBitsAtLevel1, sizeof(void*));
1694 return mRoot;
1695 }
1696
1697 template <size_t Bits>
1698 void** AddressRadixTree<Bits>::GetSlot(void* aKey, bool aCreate) {
1699 uintptr_t key = reinterpret_cast<uintptr_t>(aKey);
1700 uintptr_t subkey;
1701 unsigned i, lshift, height, bits;
1702 void** node;
1703 void** child;
1704
1705 for (i = lshift = 0, height = kHeight, node = mRoot; i < height - 1;
1706 i++, lshift += bits, node = child) {
1707 bits = i ? kBitsPerLevel : kBitsAtLevel1;
1708 subkey = (key << lshift) >> ((sizeof(void*) << 3) - bits);
1709 child = (void**)node[subkey];
1710 if (!child && aCreate) {
1711 child = (void**)base_calloc(1 << kBitsPerLevel, sizeof(void*));
1712 if (child) {
1713 node[subkey] = child;
1714 }
1715 }
1716 if (!child) {
1717 return nullptr;
1718 }
1719 }
1720
1721 // node is a leaf, so it contains values rather than node
1722 // pointers.
1723 bits = i ? kBitsPerLevel : kBitsAtLevel1;
1724 subkey = (key << lshift) >> ((sizeof(void*) << 3) - bits);
1725 return &node[subkey];
1726 }
1727
1728 template <size_t Bits>
1729 void* AddressRadixTree<Bits>::Get(void* aKey) {
1730 void* ret = nullptr;
1731
1732 void** slot = GetSlot(aKey);
1733
1734 if (slot) {
1735 ret = *slot;
1736 }
1737 #ifdef MOZ_DEBUG
1738 MutexAutoLock lock(mLock);
1739
1740 // Suppose that it were possible for a jemalloc-allocated chunk to be
1741 // munmap()ped, followed by a different allocator in another thread re-using
1742 // overlapping virtual memory, all without invalidating the cached rtree
1743 // value. The result would be a false positive (the rtree would claim that
1744 // jemalloc owns memory that it had actually discarded). I don't think this
1745 // scenario is possible, but the following assertion is a prudent sanity
1746 // check.
1747 if (!slot) {
1748 // In case a slot has been created in the meantime.
1749 slot = GetSlot(aKey);
1750 }
1751 if (slot) {
1752 // The MutexAutoLock above should act as a memory barrier, forcing
1753 // the compiler to emit a new read instruction for *slot.
1754 MOZ_ASSERT(ret == *slot);
1755 } else {
1756 MOZ_ASSERT(ret == nullptr);
1757 }
1758 #endif
1759 return ret;
1760 }
1761
1762 template <size_t Bits>
1763 bool AddressRadixTree<Bits>::Set(void* aKey, void* aValue) {
1764 MutexAutoLock lock(mLock);
1765 void** slot = GetSlot(aKey, /* create = */ true);
1766 if (slot) {
1767 *slot = aValue;
1768 }
1769 return slot;
1770 }
1771
1772 // pages_trim, chunk_alloc_mmap_slow and chunk_alloc_mmap were cherry-picked
1773 // from upstream jemalloc 3.4.1 to fix Mozilla bug 956501.
1774
1775 // Return the offset between a and the nearest aligned address at or below a.
1776 #define ALIGNMENT_ADDR2OFFSET(a, alignment) \
1777 ((size_t)((uintptr_t)(a) & (alignment - 1)))
1778
1779 // Return the smallest alignment multiple that is >= s.
1780 #define ALIGNMENT_CEILING(s, alignment) \
1781 (((s) + (alignment - 1)) & (~(alignment - 1)))
1782
1783 static void* pages_trim(void* addr, size_t alloc_size, size_t leadsize,
1784 size_t size) {
1785 void* ret = (void*)((uintptr_t)addr + leadsize);
1786
1787 MOZ_ASSERT(alloc_size >= leadsize + size);
1788 #ifdef XP_WIN
1789 {
1790 void* new_addr;
1791
1792 pages_unmap(addr, alloc_size);
1793 new_addr = pages_map(ret, size);
1794 if (new_addr == ret) {
1795 return ret;
1796 }
1797 if (new_addr) {
1798 pages_unmap(new_addr, size);
1799 }
1800 return nullptr;
1801 }
1802 #else
1803 {
1804 size_t trailsize = alloc_size - leadsize - size;
1805
1806 if (leadsize != 0) {
1807 pages_unmap(addr, leadsize);
1808 }
1809 if (trailsize != 0) {
1810 pages_unmap((void*)((uintptr_t)ret + size), trailsize);
1811 }
1812 return ret;
1813 }
1814 #endif
1815 }
1816
1817 static void* chunk_alloc_mmap_slow(size_t size, size_t alignment) {
1818 void *ret, *pages;
1819 size_t alloc_size, leadsize;
1820
1821 alloc_size = size + alignment - gRealPageSize;
1822 // Beware size_t wrap-around.
1823 if (alloc_size < size) {
1824 return nullptr;
1825 }
1826 do {
1827 pages = pages_map(nullptr, alloc_size);
1828 if (!pages) {
1829 return nullptr;
1830 }
1831 leadsize =
1832 ALIGNMENT_CEILING((uintptr_t)pages, alignment) - (uintptr_t)pages;
1833 ret = pages_trim(pages, alloc_size, leadsize, size);
1834 } while (!ret);
1835
1836 MOZ_ASSERT(ret);
1837 return ret;
1838 }
1839
1840 static void* chunk_alloc_mmap(size_t size, size_t alignment) {
1841 void* ret;
1842 size_t offset;
1843
1844 // Ideally, there would be a way to specify alignment to mmap() (like
1845 // NetBSD has), but in the absence of such a feature, we have to work
1846 // hard to efficiently create aligned mappings. The reliable, but
1847 // slow method is to create a mapping that is over-sized, then trim the
1848 // excess. However, that always results in one or two calls to
1849 // pages_unmap().
1850 //
1851 // Optimistically try mapping precisely the right amount before falling
1852 // back to the slow method, with the expectation that the optimistic
1853 // approach works most of the time.
1854 ret = pages_map(nullptr, size);
1855 if (!ret) {
1856 return nullptr;
1857 }
1858 offset = ALIGNMENT_ADDR2OFFSET(ret, alignment);
1859 if (offset != 0) {
1860 pages_unmap(ret, size);
1861 return chunk_alloc_mmap_slow(size, alignment);
1862 }
1863
1864 MOZ_ASSERT(ret);
1865 return ret;
1866 }
1867
1868 // Purge and release the pages in the chunk of length `length` at `addr` to
1869 // the OS.
1870 // Returns whether the pages are guaranteed to be full of zeroes when the
1871 // function returns.
1872 // The force_zero argument explicitly requests that the memory is guaranteed
1873 // to be full of zeroes when the function returns.
1874 static bool pages_purge(void* addr, size_t length, bool force_zero) {
1875 pages_decommit(addr, length);
1876 return true;
1877 }
1878
1879 static void* chunk_recycle(size_t aSize, size_t aAlignment, bool* aZeroed) {
1880 extent_node_t key;
1881
1882 size_t alloc_size = aSize + aAlignment - kChunkSize;
1883 // Beware size_t wrap-around.
1884 if (alloc_size < aSize) {
1885 return nullptr;
1886 }
1887 key.mAddr = nullptr;
1888 key.mSize = alloc_size;
1889 chunks_mtx.Lock();
1890 extent_node_t* node = gChunksBySize.SearchOrNext(&key);
1891 if (!node) {
1892 chunks_mtx.Unlock();
1893 return nullptr;
1894 }
1895 size_t leadsize = ALIGNMENT_CEILING((uintptr_t)node->mAddr, aAlignment) -
1896 (uintptr_t)node->mAddr;
1897 MOZ_ASSERT(node->mSize >= leadsize + aSize);
1898 size_t trailsize = node->mSize - leadsize - aSize;
1899 void* ret = (void*)((uintptr_t)node->mAddr + leadsize);
1900 ChunkType chunk_type = node->mChunkType;
1901 if (aZeroed) {
1902 *aZeroed = (chunk_type == ZEROED_CHUNK);
1903 }
1904 // Remove node from the tree.
1905 gChunksBySize.Remove(node);
1906 gChunksByAddress.Remove(node);
1907 if (leadsize != 0) {
1908 // Insert the leading space as a smaller chunk.
1909 node->mSize = leadsize;
1910 gChunksBySize.Insert(node);
1911 gChunksByAddress.Insert(node);
1912 node = nullptr;
1913 }
1914 if (trailsize != 0) {
1915 // Insert the trailing space as a smaller chunk.
1916 if (!node) {
1917 // An additional node is required, but
1918 // TypedBaseAlloc::alloc() can cause a new base chunk to be
1919 // allocated. Drop chunks_mtx in order to avoid
1920 // deadlock, and if node allocation fails, deallocate
1921 // the result before returning an error.
1922 chunks_mtx.Unlock();
1923 node = ExtentAlloc::alloc();
1924 if (!node) {
1925 chunk_dealloc(ret, aSize, chunk_type);
1926 return nullptr;
1927 }
1928 chunks_mtx.Lock();
1929 }
1930 node->mAddr = (void*)((uintptr_t)(ret) + aSize);
1931 node->mSize = trailsize;
1932 node->mChunkType = chunk_type;
1933 gChunksBySize.Insert(node);
1934 gChunksByAddress.Insert(node);
1935 node = nullptr;
1936 }
1937
1938 gRecycledSize -= aSize;
1939
1940 chunks_mtx.Unlock();
1941
1942 if (node) {
1943 ExtentAlloc::dealloc(node);
1944 }
1945 if (!pages_commit(ret, aSize)) {
1946 return nullptr;
1947 }
1948 // pages_commit is guaranteed to zero the chunk.
1949 if (aZeroed) {
1950 *aZeroed = true;
1951 }
1952
1953 return ret;
1954 }
1955
1956 #ifdef XP_WIN
1957 // On Windows, calls to VirtualAlloc and VirtualFree must be matched, making it
1958 // awkward to recycle allocations of varying sizes. Therefore we only allow
1959 // recycling when the size equals the chunksize, unless deallocation is entirely
1960 // disabled.
1961 # define CAN_RECYCLE(size) (size == kChunkSize)
1962 #else
1963 # define CAN_RECYCLE(size) true
1964 #endif
1965
1966 // Allocates `size` bytes of system memory aligned for `alignment`.
1967 // `base` indicates whether the memory will be used for the base allocator
1968 // (e.g. base_alloc).
1969 // `zeroed` is an outvalue that returns whether the allocated memory is
1970 // guaranteed to be full of zeroes. It can be omitted when the caller doesn't
1971 // care about the result.
1972 static void* chunk_alloc(size_t aSize, size_t aAlignment, bool aBase,
1973 bool* aZeroed) {
1974 void* ret = nullptr;
1975
1976 MOZ_ASSERT(aSize != 0);
1977 MOZ_ASSERT((aSize & kChunkSizeMask) == 0);
1978 MOZ_ASSERT(aAlignment != 0);
1979 MOZ_ASSERT((aAlignment & kChunkSizeMask) == 0);
1980
1981 // Base allocations can't be fulfilled by recycling because of
1982 // possible deadlock or infinite recursion.
1983 if (CAN_RECYCLE(aSize) && !aBase) {
1984 ret = chunk_recycle(aSize, aAlignment, aZeroed);
1985 }
1986 if (!ret) {
1987 ret = chunk_alloc_mmap(aSize, aAlignment);
1988 if (aZeroed) {
1989 *aZeroed = true;
1990 }
1991 }
1992 if (ret && !aBase) {
1993 if (!gChunkRTree.Set(ret, ret)) {
1994 chunk_dealloc(ret, aSize, UNKNOWN_CHUNK);
1995 return nullptr;
1996 }
1997 }
1998
1999 MOZ_ASSERT(GetChunkOffsetForPtr(ret) == 0);
2000 return ret;
2001 }
2002
2003 static void chunk_ensure_zero(void* aPtr, size_t aSize, bool aZeroed) {
2004 if (aZeroed == false) {
2005 memset(aPtr, 0, aSize);
2006 }
2007 #ifdef MOZ_DEBUG
2008 else {
2009 size_t i;
2010 size_t* p = (size_t*)(uintptr_t)aPtr;
2011
2012 for (i = 0; i < aSize / sizeof(size_t); i++) {
2013 MOZ_ASSERT(p[i] == 0);
2014 }
2015 }
2016 #endif
2017 }
2018
2019 static void chunk_record(void* aChunk, size_t aSize, ChunkType aType) {
2020 extent_node_t key;
2021
2022 if (aType != ZEROED_CHUNK) {
2023 if (pages_purge(aChunk, aSize, aType == HUGE_CHUNK)) {
2024 aType = ZEROED_CHUNK;
2025 }
2026 }
2027
2028 // Allocate a node before acquiring chunks_mtx even though it might not
2029 // be needed, because TypedBaseAlloc::alloc() may cause a new base chunk to
2030 // be allocated, which could cause deadlock if chunks_mtx were already
2031 // held.
2032 UniqueBaseNode xnode(ExtentAlloc::alloc());
2033 // Use xprev to implement conditional deferred deallocation of prev.
2034 UniqueBaseNode xprev;
2035
2036 // RAII deallocates xnode and xprev defined above after unlocking
2037 // in order to avoid potential dead-locks
2038 MutexAutoLock lock(chunks_mtx);
2039 key.mAddr = (void*)((uintptr_t)aChunk + aSize);
2040 extent_node_t* node = gChunksByAddress.SearchOrNext(&key);
2041 // Try to coalesce forward.
2042 if (node && node->mAddr == key.mAddr) {
2043 // Coalesce chunk with the following address range. This does
2044 // not change the position within gChunksByAddress, so only
2045 // remove/insert from/into gChunksBySize.
2046 gChunksBySize.Remove(node);
2047 node->mAddr = aChunk;
2048 node->mSize += aSize;
2049 if (node->mChunkType != aType) {
2050 node->mChunkType = RECYCLED_CHUNK;
2051 }
2052 gChunksBySize.Insert(node);
2053 } else {
2054 // Coalescing forward failed, so insert a new node.
2055 if (!xnode) {
2056 // TypedBaseAlloc::alloc() failed, which is an exceedingly
2057 // unlikely failure. Leak chunk; its pages have
2058 // already been purged, so this is only a virtual
2059 // memory leak.
2060 return;
2061 }
2062 node = xnode.release();
2063 node->mAddr = aChunk;
2064 node->mSize = aSize;
2065 node->mChunkType = aType;
2066 gChunksByAddress.Insert(node);
2067 gChunksBySize.Insert(node);
2068 }
2069
2070 // Try to coalesce backward.
2071 extent_node_t* prev = gChunksByAddress.Prev(node);
2072 if (prev && (void*)((uintptr_t)prev->mAddr + prev->mSize) == aChunk) {
2073 // Coalesce chunk with the previous address range. This does
2074 // not change the position within gChunksByAddress, so only
2075 // remove/insert node from/into gChunksBySize.
2076 gChunksBySize.Remove(prev);
2077 gChunksByAddress.Remove(prev);
2078
2079 gChunksBySize.Remove(node);
2080 node->mAddr = prev->mAddr;
2081 node->mSize += prev->mSize;
2082 if (node->mChunkType != prev->mChunkType) {
2083 node->mChunkType = RECYCLED_CHUNK;
2084 }
2085 gChunksBySize.Insert(node);
2086
2087 xprev.reset(prev);
2088 }
2089
2090 gRecycledSize += aSize;
2091 }
2092
2093 static void chunk_dealloc(void* aChunk, size_t aSize, ChunkType aType) {
2094 MOZ_ASSERT(aChunk);
2095 MOZ_ASSERT(GetChunkOffsetForPtr(aChunk) == 0);
2096 MOZ_ASSERT(aSize != 0);
2097 MOZ_ASSERT((aSize & kChunkSizeMask) == 0);
2098
2099 gChunkRTree.Unset(aChunk);
2100
2101 if (CAN_RECYCLE(aSize)) {
2102 size_t recycled_so_far = gRecycledSize;
2103 // In case some race condition put us above the limit.
2104 if (recycled_so_far < gRecycleLimit) {
2105 size_t recycle_remaining = gRecycleLimit - recycled_so_far;
2106 size_t to_recycle;
2107 if (aSize > recycle_remaining) {
2108 to_recycle = recycle_remaining;
2109 // Drop pages that would overflow the recycle limit
2110 pages_trim(aChunk, aSize, 0, to_recycle);
2111 } else {
2112 to_recycle = aSize;
2113 }
2114 chunk_record(aChunk, to_recycle, aType);
2115 return;
2116 }
2117 }
2118
2119 pages_unmap(aChunk, aSize);
2120 }
2121
2122 #undef CAN_RECYCLE
2123
2124 // End chunk management functions.
2125 // ***************************************************************************
2126 // Begin arena.
2127
2128 static inline arena_t* thread_local_arena(bool enabled) {
2129 arena_t* arena;
2130
2131 if (enabled) {
2132 // The arena will essentially be leaked if this function is
2133 // called with `false`, but it doesn't matter at the moment.
2134 // because in practice nothing actually calls this function
2135 // with `false`, except maybe at shutdown.
2136 arena =
2137 gArenas.CreateArena(/* IsPrivate = */ false, /* Params = */ nullptr);
2138 } else {
2139 arena = gArenas.GetDefault();
2140 }
2141 thread_arena.set(arena);
2142 return arena;
2143 }
2144
2145 template <>
2146 inline void MozJemalloc::jemalloc_thread_local_arena(bool aEnabled) {
2147 if (malloc_init()) {
2148 thread_local_arena(aEnabled);
2149 }
2150 }
2151
2152 // Choose an arena based on a per-thread value.
2153 static inline arena_t* choose_arena(size_t size) {
2154 arena_t* ret = nullptr;
2155
2156 // We can only use TLS if this is a PIC library, since for the static
2157 // library version, libc's malloc is used by TLS allocation, which
2158 // introduces a bootstrapping issue.
2159
2160 if (size > kMaxQuantumClass) {
2161 // Force the default arena for larger allocations.
2162 ret = gArenas.GetDefault();
2163 } else {
2164 // Check TLS to see if our thread has requested a pinned arena.
2165 ret = thread_arena.get();
2166 if (!ret) {
2167 // Nothing in TLS. Pin this thread to the default arena.
2168 ret = thread_local_arena(false);
2169 }
2170 }
2171
2172 MOZ_DIAGNOSTIC_ASSERT(ret);
2173 return ret;
2174 }
2175
2176 inline uint8_t arena_t::FindFreeBitInMask(uint32_t aMask, uint32_t& aRng) {
2177 if (mPRNG != nullptr) {
2178 if (aRng == UINT_MAX) {
2179 aRng = mPRNG->next() % 32;
2180 }
2181 uint8_t bitIndex;
2182 // RotateRight asserts when provided bad input.
2183 aMask = aRng ? RotateRight(aMask, aRng)
2184 : aMask; // Rotate the mask a random number of slots
2185 bitIndex = CountTrailingZeroes32(aMask);
2186 return (bitIndex + aRng) % 32;
2187 }
2188 return CountTrailingZeroes32(aMask);
2189 }
2190
2191 inline void* arena_t::ArenaRunRegAlloc(arena_run_t* aRun, arena_bin_t* aBin) {
2192 void* ret;
2193 unsigned i, mask, bit, regind;
2194 uint32_t rndPos = UINT_MAX;
2195
2196 MOZ_DIAGNOSTIC_ASSERT(aRun->mMagic == ARENA_RUN_MAGIC);
2197 MOZ_ASSERT(aRun->mRegionsMinElement < aBin->mRunNumRegionsMask);
2198
2199 // Move the first check outside the loop, so that aRun->mRegionsMinElement can
2200 // be updated unconditionally, without the possibility of updating it
2201 // multiple times.
2202 i = aRun->mRegionsMinElement;
2203 mask = aRun->mRegionsMask[i];
2204 if (mask != 0) {
2205 bit = FindFreeBitInMask(mask, rndPos);
2206
2207 regind = ((i << (LOG2(sizeof(int)) + 3)) + bit);
2208 MOZ_ASSERT(regind < aBin->mRunNumRegions);
2209 ret = (void*)(((uintptr_t)aRun) + aBin->mRunFirstRegionOffset +
2210 (aBin->mSizeClass * regind));
2211
2212 // Clear bit.
2213 mask ^= (1U << bit);
2214 aRun->mRegionsMask[i] = mask;
2215
2216 return ret;
2217 }
2218
2219 for (i++; i < aBin->mRunNumRegionsMask; i++) {
2220 mask = aRun->mRegionsMask[i];
2221 if (mask != 0) {
2222 bit = FindFreeBitInMask(mask, rndPos);
2223
2224 regind = ((i << (LOG2(sizeof(int)) + 3)) + bit);
2225 MOZ_ASSERT(regind < aBin->mRunNumRegions);
2226 ret = (void*)(((uintptr_t)aRun) + aBin->mRunFirstRegionOffset +
2227 (aBin->mSizeClass * regind));
2228
2229 // Clear bit.
2230 mask ^= (1U << bit);
2231 aRun->mRegionsMask[i] = mask;
2232
2233 // Make a note that nothing before this element
2234 // contains a free region.
2235 aRun->mRegionsMinElement = i; // Low payoff: + (mask == 0);
2236
2237 return ret;
2238 }
2239 }
2240 // Not reached.
2241 MOZ_DIAGNOSTIC_ASSERT(0);
2242 return nullptr;
2243 }
2244
2245 // To divide by a number D that is not a power of two we multiply by (2^21 /
2246 // D) and then right shift by 21 positions.
2247 //
2248 // X / D
2249 //
2250 // becomes
2251 //
2252 // (X * size_invs[D - 3]) >> SIZE_INV_SHIFT
2253 //
2254 // Where D is d/Q and Q is a constant factor.
2255 template <unsigned Q, unsigned Max>
2256 struct FastDivide {
2257 static_assert(IsPowerOfTwo(Q), "q must be a power-of-two");
2258
2259 // We don't need FastDivide when dividing by a power-of-two. So when we set
2260 // the range (min_divisor - max_divisor inclusive) we can avoid powers-of-two.
2261
2262 // Because Q is a power of two Q*3 is the first not-power-of-two.
2263 static const unsigned min_divisor = Q * 3;
2264 static const unsigned max_divisor =
2265 mozilla::IsPowerOfTwo(Max) ? Max - Q : Max;
2266 // +1 because this range is inclusive.
2267 static const unsigned num_divisors = (max_divisor - min_divisor) / Q + 1;
2268
2269 static const unsigned inv_shift = 21;
2270
2271 static constexpr unsigned inv(unsigned s) {
2272 return ((1U << inv_shift) / (s * Q)) + 1;
2273 }
2274
2275 static unsigned divide(size_t num, unsigned div) {
2276 // clang-format off
2277 static const unsigned size_invs[] = {
2278 inv(3),
2279 inv(4), inv(5), inv(6), inv(7),
2280 inv(8), inv(9), inv(10), inv(11),
2281 inv(12), inv(13), inv(14), inv(15),
2282 inv(16), inv(17), inv(18), inv(19),
2283 inv(20), inv(21), inv(22), inv(23),
2284 inv(24), inv(25), inv(26), inv(27),
2285 inv(28), inv(29), inv(30), inv(31)
2286 };
2287 // clang-format on
2288
2289 // If the divisor is valid (min is below max) then the size_invs array must
2290 // be large enough.
2291 static_assert(!(min_divisor < max_divisor) ||
2292 num_divisors <= sizeof(size_invs) / sizeof(unsigned),
2293 "num_divisors does not match array size");
2294
2295 MOZ_ASSERT(div >= min_divisor);
2296 MOZ_ASSERT(div <= max_divisor);
2297 MOZ_ASSERT(div % Q == 0);
2298
2299 // If Q isn't a power of two this optimisation would be pointless, we expect
2300 // /Q to be reduced to a shift, but we asserted this above.
2301 const unsigned idx = div / Q - 3;
2302 MOZ_ASSERT(idx < sizeof(size_invs) / sizeof(unsigned));
2303 return (num * size_invs[idx]) >> inv_shift;
2304 }
2305 };
2306
2307 static inline void arena_run_reg_dalloc(arena_run_t* run, arena_bin_t* bin,
2308 void* ptr, size_t size) {
2309 unsigned diff, regind, elm, bit;
2310
2311 MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC);
2312
2313 // Avoid doing division with a variable divisor if possible. Using
2314 // actual division here can reduce allocator throughput by over 20%!
2315 diff =
2316 (unsigned)((uintptr_t)ptr - (uintptr_t)run - bin->mRunFirstRegionOffset);
2317 if (mozilla::IsPowerOfTwo(size)) {
2318 regind = diff >> FloorLog2(size);
2319 } else {
2320 SizeClass sc(size);
2321 switch (sc.Type()) {
2322 case SizeClass::Quantum:
2323 regind = FastDivide<kQuantum, kMaxQuantumClass>::divide(diff, size);
2324 break;
2325 case SizeClass::QuantumWide:
2326 regind =
2327 FastDivide<kQuantumWide, kMaxQuantumWideClass>::divide(diff, size);
2328 break;
2329 default:
2330 regind = diff / size;
2331 }
2332 }
2333 MOZ_DIAGNOSTIC_ASSERT(diff == regind * size);
2334 MOZ_DIAGNOSTIC_ASSERT(regind < bin->mRunNumRegions);
2335
2336 elm = regind >> (LOG2(sizeof(int)) + 3);
2337 if (elm < run->mRegionsMinElement) {
2338 run->mRegionsMinElement = elm;
2339 }
2340 bit = regind - (elm << (LOG2(sizeof(int)) + 3));
2341 MOZ_RELEASE_ASSERT((run->mRegionsMask[elm] & (1U << bit)) == 0,
2342 "Double-free?");
2343 run->mRegionsMask[elm] |= (1U << bit);
2344 }
2345
2346 bool arena_t::SplitRun(arena_run_t* aRun, size_t aSize, bool aLarge,
2347 bool aZero) {
2348 arena_chunk_t* chunk;
2349 size_t old_ndirty, run_ind, total_pages, need_pages, rem_pages, i;
2350
2351 chunk = GetChunkForPtr(aRun);
2352 old_ndirty = chunk->ndirty;
2353 run_ind = (unsigned)((uintptr_t(aRun) - uintptr_t(chunk)) >> gPageSize2Pow);
2354 total_pages = (chunk->map[run_ind].bits & ~gPageSizeMask) >> gPageSize2Pow;
2355 need_pages = (aSize >> gPageSize2Pow);
2356 MOZ_ASSERT(need_pages > 0);
2357 MOZ_ASSERT(need_pages <= total_pages);
2358 rem_pages = total_pages - need_pages;
2359
2360 for (i = 0; i < need_pages; i++) {
2361 // Commit decommitted pages if necessary. If a decommitted
2362 // page is encountered, commit all needed adjacent decommitted
2363 // pages in one operation, in order to reduce system call
2364 // overhead.
2365 if (chunk->map[run_ind + i].bits & CHUNK_MAP_MADVISED_OR_DECOMMITTED) {
2366 size_t j;
2367
2368 // Advance i+j to just past the index of the last page
2369 // to commit. Clear CHUNK_MAP_DECOMMITTED and
2370 // CHUNK_MAP_MADVISED along the way.
2371 for (j = 0; i + j < need_pages && (chunk->map[run_ind + i + j].bits &
2372 CHUNK_MAP_MADVISED_OR_DECOMMITTED);
2373 j++) {
2374 // DECOMMITTED and MADVISED are mutually exclusive.
2375 MOZ_ASSERT(!(chunk->map[run_ind + i + j].bits & CHUNK_MAP_DECOMMITTED &&
2376 chunk->map[run_ind + i + j].bits & CHUNK_MAP_MADVISED));
2377
2378 chunk->map[run_ind + i + j].bits &= ~CHUNK_MAP_MADVISED_OR_DECOMMITTED;
2379 }
2380
2381 #ifdef MALLOC_DECOMMIT
2382 bool committed = pages_commit(
2383 (void*)(uintptr_t(chunk) + ((run_ind + i) << gPageSize2Pow)),
2384 j << gPageSize2Pow);
2385 // pages_commit zeroes pages, so mark them as such if it succeeded.
2386 // That's checked further below to avoid manually zeroing the pages.
2387 for (size_t k = 0; k < j; k++) {
2388 chunk->map[run_ind + i + k].bits |=
2389 committed ? CHUNK_MAP_ZEROED : CHUNK_MAP_DECOMMITTED;
2390 }
2391 if (!committed) {
2392 return false;
2393 }
2394 #endif
2395
2396 mStats.committed += j;
2397 }
2398 }
2399
2400 mRunsAvail.Remove(&chunk->map[run_ind]);
2401
2402 // Keep track of trailing unused pages for later use.
2403 if (rem_pages > 0) {
2404 chunk->map[run_ind + need_pages].bits =
2405 (rem_pages << gPageSize2Pow) |
2406 (chunk->map[run_ind + need_pages].bits & gPageSizeMask);
2407 chunk->map[run_ind + total_pages - 1].bits =
2408 (rem_pages << gPageSize2Pow) |
2409 (chunk->map[run_ind + total_pages - 1].bits & gPageSizeMask);
2410 mRunsAvail.Insert(&chunk->map[run_ind + need_pages]);
2411 }
2412
2413 for (i = 0; i < need_pages; i++) {
2414 // Zero if necessary.
2415 if (aZero) {
2416 if ((chunk->map[run_ind + i].bits & CHUNK_MAP_ZEROED) == 0) {
2417 memset((void*)(uintptr_t(chunk) + ((run_ind + i) << gPageSize2Pow)), 0,
2418 gPageSize);
2419 // CHUNK_MAP_ZEROED is cleared below.
2420 }
2421 }
2422
2423 // Update dirty page accounting.
2424 if (chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) {
2425 chunk->ndirty--;
2426 mNumDirty--;
2427 // CHUNK_MAP_DIRTY is cleared below.
2428 }
2429
2430 // Initialize the chunk map.
2431 if (aLarge) {
2432 chunk->map[run_ind + i].bits = CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
2433 } else {
2434 chunk->map[run_ind + i].bits = size_t(aRun) | CHUNK_MAP_ALLOCATED;
2435 }
2436 }
2437
2438 // Set the run size only in the first element for large runs. This is
2439 // primarily a debugging aid, since the lack of size info for trailing
2440 // pages only matters if the application tries to operate on an
2441 // interior pointer.
2442 if (aLarge) {
2443 chunk->map[run_ind].bits |= aSize;
2444 }
2445
2446 if (chunk->ndirty == 0 && old_ndirty > 0) {
2447 mChunksDirty.Remove(chunk);
2448 }
2449 return true;
2450 }
2451
2452 void arena_t::InitChunk(arena_chunk_t* aChunk, bool aZeroed) {
2453 size_t i;
2454 // WARNING: The following relies on !aZeroed meaning "used to be an arena
2455 // chunk".
2456 // When the chunk we're initializating as an arena chunk is zeroed, we
2457 // mark all runs are decommitted and zeroed.
2458 // When it is not, which we can assume means it's a recycled arena chunk,
2459 // all it can contain is an arena chunk header (which we're overwriting),
2460 // and zeroed or poisoned memory (because a recycled arena chunk will
2461 // have been emptied before being recycled). In that case, we can get
2462 // away with reusing the chunk as-is, marking all runs as madvised.
2463
2464 size_t flags =
2465 aZeroed ? CHUNK_MAP_DECOMMITTED | CHUNK_MAP_ZEROED : CHUNK_MAP_MADVISED;
2466
2467 mStats.mapped += kChunkSize;
2468
2469 aChunk->arena = this;
2470
2471 // Claim that no pages are in use, since the header is merely overhead.
2472 aChunk->ndirty = 0;
2473
2474 // Initialize the map to contain one maximal free untouched run.
2475 arena_run_t* run = (arena_run_t*)(uintptr_t(aChunk) +
2476 (gChunkHeaderNumPages << gPageSize2Pow));
2477
2478 // Clear the bits for the real header pages.
2479 for (i = 0; i < gChunkHeaderNumPages - 1; i++) {
2480 aChunk->map[i].bits = 0;
2481 }
2482 // Mark the leading guard page (last header page) as decommitted.
2483 aChunk->map[i++].bits = CHUNK_MAP_DECOMMITTED;
2484
2485 // Mark the area usable for runs as available, note size at start and end
2486 aChunk->map[i++].bits = gMaxLargeClass | flags;
2487 for (; i < gChunkNumPages - 2; i++) {
2488 aChunk->map[i].bits = flags;
2489 }
2490 aChunk->map[gChunkNumPages - 2].bits = gMaxLargeClass | flags;
2491
2492 // Mark the trailing guard page as decommitted.
2493 aChunk->map[gChunkNumPages - 1].bits = CHUNK_MAP_DECOMMITTED;
2494
2495 #ifdef MALLOC_DECOMMIT
2496 // Start out decommitted, in order to force a closer correspondence
2497 // between dirty pages and committed untouched pages. This includes
2498 // leading and trailing guard pages.
2499 pages_decommit((void*)(uintptr_t(run) - gPageSize),
2500 gMaxLargeClass + 2 * gPageSize);
2501 #else
2502 // Decommit the last header page (=leading page) as a guard.
2503 pages_decommit((void*)(uintptr_t(run) - gPageSize), gPageSize);
2504 // Decommit the last page as a guard.
2505 pages_decommit((void*)(uintptr_t(aChunk) + kChunkSize - gPageSize),
2506 gPageSize);
2507 #endif
2508
2509 mStats.committed += gChunkHeaderNumPages;
2510
2511 // Insert the run into the tree of available runs.
2512 mRunsAvail.Insert(&aChunk->map[gChunkHeaderNumPages]);
2513
2514 #ifdef MALLOC_DOUBLE_PURGE
2515 new (&aChunk->chunks_madvised_elem) DoublyLinkedListElement<arena_chunk_t>();
2516 #endif
2517 }
2518
2519 void arena_t::DeallocChunk(arena_chunk_t* aChunk) {
2520 if (mSpare) {
2521 if (mSpare->ndirty > 0) {
2522 aChunk->arena->mChunksDirty.Remove(mSpare);
2523 mNumDirty -= mSpare->ndirty;
2524 mStats.committed -= mSpare->ndirty;
2525 }
2526
2527 #ifdef MALLOC_DOUBLE_PURGE
2528 if (mChunksMAdvised.ElementProbablyInList(mSpare)) {
2529 mChunksMAdvised.remove(mSpare);
2530 }
2531 #endif
2532
2533 chunk_dealloc((void*)mSpare, kChunkSize, ARENA_CHUNK);
2534 mStats.mapped -= kChunkSize;
2535 mStats.committed -= gChunkHeaderNumPages;
2536 }
2537
2538 // Remove run from the tree of available runs, so that the arena does not use
2539 // it. Dirty page flushing only uses the tree of dirty chunks, so leaving this
2540 // chunk in the chunks_* trees is sufficient for that purpose.
2541 mRunsAvail.Remove(&aChunk->map[gChunkHeaderNumPages]);
2542
2543 mSpare = aChunk;
2544 }
2545
2546 arena_run_t* arena_t::AllocRun(size_t aSize, bool aLarge, bool aZero) {
2547 arena_run_t* run;
2548 arena_chunk_map_t* mapelm;
2549 arena_chunk_map_t key;
2550
2551 MOZ_ASSERT(aSize <= gMaxLargeClass);
2552 MOZ_ASSERT((aSize & gPageSizeMask) == 0);
2553
2554 // Search the arena's chunks for the lowest best fit.
2555 key.bits = aSize | CHUNK_MAP_KEY;
2556 mapelm = mRunsAvail.SearchOrNext(&key);
2557 if (mapelm) {
2558 arena_chunk_t* chunk = GetChunkForPtr(mapelm);
2559 size_t pageind =
2560 (uintptr_t(mapelm) - uintptr_t(chunk->map)) / sizeof(arena_chunk_map_t);
2561
2562 run = (arena_run_t*)(uintptr_t(chunk) + (pageind << gPageSize2Pow));
2563 } else if (mSpare) {
2564 // Use the spare.
2565 arena_chunk_t* chunk = mSpare;
2566 mSpare = nullptr;
2567 run = (arena_run_t*)(uintptr_t(chunk) +
2568 (gChunkHeaderNumPages << gPageSize2Pow));
2569 // Insert the run into the tree of available runs.
2570 mRunsAvail.Insert(&chunk->map[gChunkHeaderNumPages]);
2571 } else {
2572 // No usable runs. Create a new chunk from which to allocate
2573 // the run.
2574 bool zeroed;
2575 arena_chunk_t* chunk =
2576 (arena_chunk_t*)chunk_alloc(kChunkSize, kChunkSize, false, &zeroed);
2577 if (!chunk) {
2578 return nullptr;
2579 }
2580
2581 InitChunk(chunk, zeroed);
2582 run = (arena_run_t*)(uintptr_t(chunk) +
2583 (gChunkHeaderNumPages << gPageSize2Pow));
2584 }
2585 // Update page map.
2586 return SplitRun(run, aSize, aLarge, aZero) ? run : nullptr;
2587 }
2588
2589 void arena_t::Purge(bool aAll) {
2590 arena_chunk_t* chunk;
2591 size_t i, npages;
2592 // If all is set purge all dirty pages.
2593 size_t dirty_max = aAll ? 1 : mMaxDirty;
2594 #ifdef MOZ_DEBUG
2595 size_t ndirty = 0;
2596 for (auto chunk : mChunksDirty.iter()) {
2597 ndirty += chunk->ndirty;
2598 }
2599 MOZ_ASSERT(ndirty == mNumDirty);
2600 #endif
2601 MOZ_DIAGNOSTIC_ASSERT(aAll || (mNumDirty > mMaxDirty));
2602
2603 // Iterate downward through chunks until enough dirty memory has been
2604 // purged. Terminate as soon as possible in order to minimize the
2605 // number of system calls, even if a chunk has only been partially
2606 // purged.
2607 while (mNumDirty > (dirty_max >> 1)) {
2608 #ifdef MALLOC_DOUBLE_PURGE
2609 bool madvised = false;
2610 #endif
2611 chunk = mChunksDirty.Last();
2612 MOZ_DIAGNOSTIC_ASSERT(chunk);
2613 // Last page is DECOMMITTED as a guard page.
2614 MOZ_ASSERT((chunk->map[gChunkNumPages - 1].bits & CHUNK_MAP_DECOMMITTED) !=
2615 0);
2616 for (i = gChunkNumPages - 2; chunk->ndirty > 0; i--) {
2617 MOZ_DIAGNOSTIC_ASSERT(i >= gChunkHeaderNumPages);
2618
2619 if (chunk->map[i].bits & CHUNK_MAP_DIRTY) {
2620 #ifdef MALLOC_DECOMMIT
2621 const size_t free_operation = CHUNK_MAP_DECOMMITTED;
2622 #else
2623 const size_t free_operation = CHUNK_MAP_MADVISED;
2624 #endif
2625 MOZ_ASSERT((chunk->map[i].bits & CHUNK_MAP_MADVISED_OR_DECOMMITTED) ==
2626 0);
2627 chunk->map[i].bits ^= free_operation | CHUNK_MAP_DIRTY;
2628 // Find adjacent dirty run(s).
2629 for (npages = 1; i > gChunkHeaderNumPages &&
2630 (chunk->map[i - 1].bits & CHUNK_MAP_DIRTY);
2631 npages++) {
2632 i--;
2633 MOZ_ASSERT((chunk->map[i].bits & CHUNK_MAP_MADVISED_OR_DECOMMITTED) ==
2634 0);
2635 chunk->map[i].bits ^= free_operation | CHUNK_MAP_DIRTY;
2636 }
2637 chunk->ndirty -= npages;
2638 mNumDirty -= npages;
2639
2640 #ifdef MALLOC_DECOMMIT
2641 pages_decommit((void*)(uintptr_t(chunk) + (i << gPageSize2Pow)),
2642 (npages << gPageSize2Pow));
2643 #endif
2644 mStats.committed -= npages;
2645
2646 #ifndef MALLOC_DECOMMIT
2647 # ifdef XP_SOLARIS
2648 posix_madvise((void*)(uintptr_t(chunk) + (i << gPageSize2Pow)),
2649 (npages << gPageSize2Pow), MADV_FREE);
2650 # else
2651 madvise((void*)(uintptr_t(chunk) + (i << gPageSize2Pow)),
2652 (npages << gPageSize2Pow), MADV_FREE);
2653 # endif
2654 # ifdef MALLOC_DOUBLE_PURGE
2655 madvised = true;
2656 # endif
2657 #endif
2658 if (mNumDirty <= (dirty_max >> 1)) {
2659 break;
2660 }
2661 }
2662 }
2663
2664 if (chunk->ndirty == 0) {
2665 mChunksDirty.Remove(chunk);
2666 }
2667 #ifdef MALLOC_DOUBLE_PURGE
2668 if (madvised) {
2669 // The chunk might already be in the list, but this
2670 // makes sure it's at the front.
2671 if (mChunksMAdvised.ElementProbablyInList(chunk)) {
2672 mChunksMAdvised.remove(chunk);
2673 }
2674 mChunksMAdvised.pushFront(chunk);
2675 }
2676 #endif
2677 }
2678 }
2679
2680 void arena_t::DallocRun(arena_run_t* aRun, bool aDirty) {
2681 arena_chunk_t* chunk;
2682 size_t size, run_ind, run_pages;
2683
2684 chunk = GetChunkForPtr(aRun);
2685 run_ind = (size_t)((uintptr_t(aRun) - uintptr_t(chunk)) >> gPageSize2Pow);
2686 MOZ_DIAGNOSTIC_ASSERT(run_ind >= gChunkHeaderNumPages);
2687 MOZ_RELEASE_ASSERT(run_ind < gChunkNumPages - 1);
2688 if ((chunk->map[run_ind].bits & CHUNK_MAP_LARGE) != 0) {
2689 size = chunk->map[run_ind].bits & ~gPageSizeMask;
2690 } else {
2691 size = aRun->mBin->mRunSize;
2692 }
2693 run_pages = (size >> gPageSize2Pow);
2694
2695 // Mark pages as unallocated in the chunk map.
2696 if (aDirty) {
2697 size_t i;
2698
2699 for (i = 0; i < run_pages; i++) {
2700 MOZ_DIAGNOSTIC_ASSERT((chunk->map[run_ind + i].bits & CHUNK_MAP_DIRTY) ==
2701 0);
2702 chunk->map[run_ind + i].bits = CHUNK_MAP_DIRTY;
2703 }
2704
2705 if (chunk->ndirty == 0) {
2706 mChunksDirty.Insert(chunk);
2707 }
2708 chunk->ndirty += run_pages;
2709 mNumDirty += run_pages;
2710 } else {
2711 size_t i;
2712
2713 for (i = 0; i < run_pages; i++) {
2714 chunk->map[run_ind + i].bits &= ~(CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED);
2715 }
2716 }
2717 chunk->map[run_ind].bits = size | (chunk->map[run_ind].bits & gPageSizeMask);
2718 chunk->map[run_ind + run_pages - 1].bits =
2719 size | (chunk->map[run_ind + run_pages - 1].bits & gPageSizeMask);
2720
2721 // Try to coalesce forward.
2722 if (run_ind + run_pages < gChunkNumPages - 1 &&
2723 (chunk->map[run_ind + run_pages].bits & CHUNK_MAP_ALLOCATED) == 0) {
2724 size_t nrun_size = chunk->map[run_ind + run_pages].bits & ~gPageSizeMask;
2725
2726 // Remove successor from tree of available runs; the coalesced run is
2727 // inserted later.
2728 mRunsAvail.Remove(&chunk->map[run_ind + run_pages]);
2729
2730 size += nrun_size;
2731 run_pages = size >> gPageSize2Pow;
2732
2733 MOZ_DIAGNOSTIC_ASSERT((chunk->map[run_ind + run_pages - 1].bits &
2734 ~gPageSizeMask) == nrun_size);
2735 chunk->map[run_ind].bits =
2736 size | (chunk->map[run_ind].bits & gPageSizeMask);
2737 chunk->map[run_ind + run_pages - 1].bits =
2738 size | (chunk->map[run_ind + run_pages - 1].bits & gPageSizeMask);
2739 }
2740
2741 // Try to coalesce backward.
2742 if (run_ind > gChunkHeaderNumPages &&
2743 (chunk->map[run_ind - 1].bits & CHUNK_MAP_ALLOCATED) == 0) {
2744 size_t prun_size = chunk->map[run_ind - 1].bits & ~gPageSizeMask;
2745
2746 run_ind -= prun_size >> gPageSize2Pow;
2747
2748 // Remove predecessor from tree of available runs; the coalesced run is
2749 // inserted later.
2750 mRunsAvail.Remove(&chunk->map[run_ind]);
2751
2752 size += prun_size;
2753 run_pages = size >> gPageSize2Pow;
2754
2755 MOZ_DIAGNOSTIC_ASSERT((chunk->map[run_ind].bits & ~gPageSizeMask) ==
2756 prun_size);
2757 chunk->map[run_ind].bits =
2758 size | (chunk->map[run_ind].bits & gPageSizeMask);
2759 chunk->map[run_ind + run_pages - 1].bits =
2760 size | (chunk->map[run_ind + run_pages - 1].bits & gPageSizeMask);
2761 }
2762
2763 // Insert into tree of available runs, now that coalescing is complete.
2764 mRunsAvail.Insert(&chunk->map[run_ind]);
2765
2766 // Deallocate chunk if it is now completely unused.
2767 if ((chunk->map[gChunkHeaderNumPages].bits &
2768 (~gPageSizeMask | CHUNK_MAP_ALLOCATED)) == gMaxLargeClass) {
2769 DeallocChunk(chunk);
2770 }
2771
2772 // Enforce mMaxDirty.
2773 if (mNumDirty > mMaxDirty) {
2774 Purge(false);
2775 }
2776 }
2777
2778 void arena_t::TrimRunHead(arena_chunk_t* aChunk, arena_run_t* aRun,
2779 size_t aOldSize, size_t aNewSize) {
2780 size_t pageind = (uintptr_t(aRun) - uintptr_t(aChunk)) >> gPageSize2Pow;
2781 size_t head_npages = (aOldSize - aNewSize) >> gPageSize2Pow;
2782
2783 MOZ_ASSERT(aOldSize > aNewSize);
2784
2785 // Update the chunk map so that arena_t::RunDalloc() can treat the
2786 // leading run as separately allocated.
2787 aChunk->map[pageind].bits =
2788 (aOldSize - aNewSize) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
2789 aChunk->map[pageind + head_npages].bits =
2790 aNewSize | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
2791
2792 DallocRun(aRun, false);
2793 }
2794
2795 void arena_t::TrimRunTail(arena_chunk_t* aChunk, arena_run_t* aRun,
2796 size_t aOldSize, size_t aNewSize, bool aDirty) {
2797 size_t pageind = (uintptr_t(aRun) - uintptr_t(aChunk)) >> gPageSize2Pow;
2798 size_t npages = aNewSize >> gPageSize2Pow;
2799
2800 MOZ_ASSERT(aOldSize > aNewSize);
2801
2802 // Update the chunk map so that arena_t::RunDalloc() can treat the
2803 // trailing run as separately allocated.
2804 aChunk->map[pageind].bits = aNewSize | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
2805 aChunk->map[pageind + npages].bits =
2806 (aOldSize - aNewSize) | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
2807
2808 DallocRun((arena_run_t*)(uintptr_t(aRun) + aNewSize), aDirty);
2809 }
2810
2811 arena_run_t* arena_t::GetNonFullBinRun(arena_bin_t* aBin) {
2812 arena_chunk_map_t* mapelm;
2813 arena_run_t* run;
2814 unsigned i, remainder;
2815
2816 // Look for a usable run.
2817 mapelm = aBin->mNonFullRuns.First();
2818 if (mapelm) {
2819 // run is guaranteed to have available space.
2820 aBin->mNonFullRuns.Remove(mapelm);
2821 run = (arena_run_t*)(mapelm->bits & ~gPageSizeMask);
2822 return run;
2823 }
2824 // No existing runs have any space available.
2825
2826 // Allocate a new run.
2827 run = AllocRun(aBin->mRunSize, false, false);
2828 if (!run) {
2829 return nullptr;
2830 }
2831 // Don't initialize if a race in arena_t::RunAlloc() allowed an existing
2832 // run to become usable.
2833 if (run == aBin->mCurrentRun) {
2834 return run;
2835 }
2836
2837 // Initialize run internals.
2838 run->mBin = aBin;
2839
2840 for (i = 0; i < aBin->mRunNumRegionsMask - 1; i++) {
2841 run->mRegionsMask[i] = UINT_MAX;
2842 }
2843 remainder = aBin->mRunNumRegions & ((1U << (LOG2(sizeof(int)) + 3)) - 1);
2844 if (remainder == 0) {
2845 run->mRegionsMask[i] = UINT_MAX;
2846 } else {
2847 // The last element has spare bits that need to be unset.
2848 run->mRegionsMask[i] =
2849 (UINT_MAX >> ((1U << (LOG2(sizeof(int)) + 3)) - remainder));
2850 }
2851
2852 run->mRegionsMinElement = 0;
2853
2854 run->mNumFree = aBin->mRunNumRegions;
2855 #if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
2856 run->mMagic = ARENA_RUN_MAGIC;
2857 #endif
2858
2859 aBin->mNumRuns++;
2860 return run;
2861 }
2862
2863 void arena_bin_t::Init(SizeClass aSizeClass) {
2864 size_t try_run_size;
2865 unsigned try_nregs, try_mask_nelms, try_reg0_offset;
2866 // Size of the run header, excluding mRegionsMask.
2867 static const size_t kFixedHeaderSize = offsetof(arena_run_t, mRegionsMask);
2868
2869 MOZ_ASSERT(aSizeClass.Size() <= gMaxBinClass);
2870
2871 try_run_size = gPageSize;
2872
2873 mCurrentRun = nullptr;
2874 mNonFullRuns.Init();
2875 mSizeClass = aSizeClass.Size();
2876 mNumRuns = 0;
2877
2878 // mRunSize expansion loop.
2879 while (true) {
2880 try_nregs = ((try_run_size - kFixedHeaderSize) / mSizeClass) +
2881 1; // Counter-act try_nregs-- in loop.
2882
2883 // The do..while loop iteratively reduces the number of regions until
2884 // the run header and the regions no longer overlap. A closed formula
2885 // would be quite messy, since there is an interdependency between the
2886 // header's mask length and the number of regions.
2887 do {
2888 try_nregs--;
2889 try_mask_nelms =
2890 (try_nregs >> (LOG2(sizeof(int)) + 3)) +
2891 ((try_nregs & ((1U << (LOG2(sizeof(int)) + 3)) - 1)) ? 1 : 0);
2892 try_reg0_offset = try_run_size - (try_nregs * mSizeClass);
2893 } while (kFixedHeaderSize + (sizeof(unsigned) * try_mask_nelms) >
2894 try_reg0_offset);
2895
2896 // Try to keep the run overhead below kRunOverhead.
2897 if (Fraction(try_reg0_offset, try_run_size) <= kRunOverhead) {
2898 break;
2899 }
2900
2901 // If the overhead is larger than the size class, it means the size class
2902 // is small and doesn't align very well with the header. It's desirable to
2903 // have smaller run sizes for them, so relax the overhead requirement.
2904 if (try_reg0_offset > mSizeClass) {
2905 if (Fraction(try_reg0_offset, try_run_size) <= kRunRelaxedOverhead) {
2906 break;
2907 }
2908 }
2909
2910 // The run header includes one bit per region of the given size. For sizes
2911 // small enough, the number of regions is large enough that growing the run
2912 // size barely moves the needle for the overhead because of all those bits.
2913 // For example, for a size of 8 bytes, adding 4KiB to the run size adds
2914 // close to 512 bits to the header, which is 64 bytes.
2915 // With such overhead, there is no way to get to the wanted overhead above,
2916 // so we give up if the required size for mRegionsMask more than doubles the
2917 // size of the run header.
2918 if (try_mask_nelms * sizeof(unsigned) >= kFixedHeaderSize) {
2919 break;
2920 }
2921
2922 // If next iteration is going to be larger than the largest possible large
2923 // size class, then we didn't find a setup where the overhead is small
2924 // enough, and we can't do better than the current settings, so just use
2925 // that.
2926 if (try_run_size + gPageSize > gMaxLargeClass) {
2927 break;
2928 }
2929
2930 // Try more aggressive settings.
2931 try_run_size += gPageSize;
2932 }
2933
2934 MOZ_ASSERT(kFixedHeaderSize + (sizeof(unsigned) * try_mask_nelms) <=
2935 try_reg0_offset);
2936 MOZ_ASSERT((try_mask_nelms << (LOG2(sizeof(int)) + 3)) >= try_nregs);
2937
2938 // Copy final settings.
2939 mRunSize = try_run_size;
2940 mRunNumRegions = try_nregs;
2941 mRunNumRegionsMask = try_mask_nelms;
2942 mRunFirstRegionOffset = try_reg0_offset;
2943 }
2944
2945 void* arena_t::MallocSmall(size_t aSize, bool aZero) {
2946 void* ret;
2947 arena_bin_t* bin;
2948 arena_run_t* run;
2949 SizeClass sizeClass(aSize);
2950 aSize = sizeClass.Size();
2951
2952 switch (sizeClass.Type()) {
2953 case SizeClass::Tiny:
2954 bin = &mBins[FloorLog2(aSize / kMinTinyClass)];
2955 break;
2956 case SizeClass::Quantum:
2957 // Although we divide 2 things by kQuantum, the compiler will
2958 // reduce `kMinQuantumClass / kQuantum` and `kNumTinyClasses` to a
2959 // single constant.
2960 bin = &mBins[kNumTinyClasses + (aSize / kQuantum) -
2961 (kMinQuantumClass / kQuantum)];
2962 break;
2963 case SizeClass::QuantumWide:
2964 bin =
2965 &mBins[kNumTinyClasses + kNumQuantumClasses + (aSize / kQuantumWide) -
2966 (kMinQuantumWideClass / kQuantumWide)];
2967 break;
2968 case SizeClass::SubPage:
2969 bin =
2970 &mBins[kNumTinyClasses + kNumQuantumClasses + kNumQuantumWideClasses +
2971 (FloorLog2(aSize) - LOG2(kMinSubPageClass))];
2972 break;
2973 default:
2974 MOZ_MAKE_COMPILER_ASSUME_IS_UNREACHABLE("Unexpected size class type");
2975 }
2976 MOZ_DIAGNOSTIC_ASSERT(aSize == bin->mSizeClass);
2977
2978 {
2979 // Before we lock, we determine if we need to randomize the allocation
2980 // because if we do, we need to create the PRNG which might require
2981 // allocating memory (arc4random on OSX for example) and we need to
2982 // avoid the deadlock
2983 if (MOZ_UNLIKELY(mRandomizeSmallAllocations && mPRNG == nullptr)) {
2984 // This is frustrating. Because the code backing RandomUint64 (arc4random
2985 // for example) may allocate memory, and because
2986 // mRandomizeSmallAllocations is true and we haven't yet initilized mPRNG,
2987 // we would re-enter this same case and cause a deadlock inside e.g.
2988 // arc4random. So we temporarily disable mRandomizeSmallAllocations to
2989 // skip this case and then re-enable it
2990 mRandomizeSmallAllocations = false;
2991 mozilla::Maybe<uint64_t> prngState1 = mozilla::RandomUint64();
2992 mozilla::Maybe<uint64_t> prngState2 = mozilla::RandomUint64();
2993 void* backing =
2994 base_alloc(sizeof(mozilla::non_crypto::XorShift128PlusRNG));
2995 mPRNG = new (backing) mozilla::non_crypto::XorShift128PlusRNG(
2996 prngState1.valueOr(0), prngState2.valueOr(0));
2997 mRandomizeSmallAllocations = true;
2998 }
2999 MOZ_ASSERT(!mRandomizeSmallAllocations || mPRNG);
3000
3001 MutexAutoLock lock(mLock);
3002 run = bin->mCurrentRun;
3003 if (MOZ_UNLIKELY(!run || run->mNumFree == 0)) {
3004 run = bin->mCurrentRun = GetNonFullBinRun(bin);
3005 }
3006 if (MOZ_UNLIKELY(!run)) {
3007 return nullptr;
3008 }
3009 MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC);
3010 MOZ_DIAGNOSTIC_ASSERT(run->mNumFree > 0);
3011 ret = ArenaRunRegAlloc(run, bin);
3012 MOZ_DIAGNOSTIC_ASSERT(ret);
3013 run->mNumFree--;
3014 if (!ret) {
3015 return nullptr;
3016 }
3017
3018 mStats.allocated_small += aSize;
3019 }
3020
3021 if (!aZero) {
3022 ApplyZeroOrJunk(ret, aSize);
3023 } else {
3024 memset(ret, 0, aSize);
3025 }
3026
3027 return ret;
3028 }
3029
3030 void* arena_t::MallocLarge(size_t aSize, bool aZero) {
3031 void* ret;
3032
3033 // Large allocation.
3034 aSize = PAGE_CEILING(aSize);
3035
3036 {
3037 MutexAutoLock lock(mLock);
3038 ret = AllocRun(aSize, true, aZero);
3039 if (!ret) {
3040 return nullptr;
3041 }
3042 mStats.allocated_large += aSize;
3043 }
3044
3045 if (!aZero) {
3046 ApplyZeroOrJunk(ret, aSize);
3047 }
3048
3049 return ret;
3050 }
3051
3052 void* arena_t::Malloc(size_t aSize, bool aZero) {
3053 MOZ_DIAGNOSTIC_ASSERT(mMagic == ARENA_MAGIC);
3054 MOZ_ASSERT(aSize != 0);
3055
3056 if (aSize <= gMaxBinClass) {
3057 return MallocSmall(aSize, aZero);
3058 }
3059 if (aSize <= gMaxLargeClass) {
3060 return MallocLarge(aSize, aZero);
3061 }
3062 return MallocHuge(aSize, aZero);
3063 }
3064
3065 // Only handles large allocations that require more than page alignment.
3066 void* arena_t::PallocLarge(size_t aAlignment, size_t aSize, size_t aAllocSize) {
3067 void* ret;
3068 size_t offset;
3069 arena_chunk_t* chunk;
3070
3071 MOZ_ASSERT((aSize & gPageSizeMask) == 0);
3072 MOZ_ASSERT((aAlignment & gPageSizeMask) == 0);
3073
3074 {
3075 MutexAutoLock lock(mLock);
3076 ret = AllocRun(aAllocSize, true, false);
3077 if (!ret) {
3078 return nullptr;
3079 }
3080
3081 chunk = GetChunkForPtr(ret);
3082
3083 offset = uintptr_t(ret) & (aAlignment - 1);
3084 MOZ_ASSERT((offset & gPageSizeMask) == 0);
3085 MOZ_ASSERT(offset < aAllocSize);
3086 if (offset == 0) {
3087 TrimRunTail(chunk, (arena_run_t*)ret, aAllocSize, aSize, false);
3088 } else {
3089 size_t leadsize, trailsize;
3090
3091 leadsize = aAlignment - offset;
3092 if (leadsize > 0) {
3093 TrimRunHead(chunk, (arena_run_t*)ret, aAllocSize,
3094 aAllocSize - leadsize);
3095 ret = (void*)(uintptr_t(ret) + leadsize);
3096 }
3097
3098 trailsize = aAllocSize - leadsize - aSize;
3099 if (trailsize != 0) {
3100 // Trim trailing space.
3101 MOZ_ASSERT(trailsize < aAllocSize);
3102 TrimRunTail(chunk, (arena_run_t*)ret, aSize + trailsize, aSize, false);
3103 }
3104 }
3105
3106 mStats.allocated_large += aSize;
3107 }
3108
3109 ApplyZeroOrJunk(ret, aSize);
3110 return ret;
3111 }
3112
3113 void* arena_t::Palloc(size_t aAlignment, size_t aSize) {
3114 void* ret;
3115 size_t ceil_size;
3116
3117 // Round size up to the nearest multiple of alignment.
3118 //
3119 // This done, we can take advantage of the fact that for each small
3120 // size class, every object is aligned at the smallest power of two
3121 // that is non-zero in the base two representation of the size. For
3122 // example:
3123 //
3124 // Size | Base 2 | Minimum alignment
3125 // -----+----------+------------------
3126 // 96 | 1100000 | 32
3127 // 144 | 10100000 | 32
3128 // 192 | 11000000 | 64
3129 //
3130 // Depending on runtime settings, it is possible that arena_malloc()
3131 // will further round up to a power of two, but that never causes
3132 // correctness issues.
3133 ceil_size = ALIGNMENT_CEILING(aSize, aAlignment);
3134
3135 // (ceil_size < aSize) protects against the combination of maximal
3136 // alignment and size greater than maximal alignment.
3137 if (ceil_size < aSize) {
3138 // size_t overflow.
3139 return nullptr;
3140 }
3141
3142 if (ceil_size <= gPageSize ||
3143 (aAlignment <= gPageSize && ceil_size <= gMaxLargeClass)) {
3144 ret = Malloc(ceil_size, false);
3145 } else {
3146 size_t run_size;
3147
3148 // We can't achieve sub-page alignment, so round up alignment
3149 // permanently; it makes later calculations simpler.
3150 aAlignment = PAGE_CEILING(aAlignment);
3151 ceil_size = PAGE_CEILING(aSize);
3152
3153 // (ceil_size < aSize) protects against very large sizes within
3154 // pagesize of SIZE_T_MAX.
3155 //
3156 // (ceil_size + aAlignment < ceil_size) protects against the
3157 // combination of maximal alignment and ceil_size large enough
3158 // to cause overflow. This is similar to the first overflow
3159 // check above, but it needs to be repeated due to the new
3160 // ceil_size value, which may now be *equal* to maximal
3161 // alignment, whereas before we only detected overflow if the
3162 // original size was *greater* than maximal alignment.
3163 if (ceil_size < aSize || ceil_size + aAlignment < ceil_size) {
3164 // size_t overflow.
3165 return nullptr;
3166 }
3167
3168 // Calculate the size of the over-size run that arena_palloc()
3169 // would need to allocate in order to guarantee the alignment.
3170 if (ceil_size >= aAlignment) {
3171 run_size = ceil_size + aAlignment - gPageSize;
3172 } else {
3173 // It is possible that (aAlignment << 1) will cause
3174 // overflow, but it doesn't matter because we also
3175 // subtract pagesize, which in the case of overflow
3176 // leaves us with a very large run_size. That causes
3177 // the first conditional below to fail, which means
3178 // that the bogus run_size value never gets used for
3179 // anything important.
3180 run_size = (aAlignment << 1) - gPageSize;
3181 }
3182
3183 if (run_size <= gMaxLargeClass) {
3184 ret = PallocLarge(aAlignment, ceil_size, run_size);
3185 } else if (aAlignment <= kChunkSize) {
3186 ret = MallocHuge(ceil_size, false);
3187 } else {
3188 ret = PallocHuge(ceil_size, aAlignment, false);
3189 }
3190 }
3191
3192 MOZ_ASSERT((uintptr_t(ret) & (aAlignment - 1)) == 0);
3193 return ret;
3194 }
3195
3196 class AllocInfo {
3197 public:
3198 template <bool Validate = false>
3199 static inline AllocInfo Get(const void* aPtr) {
3200 // If the allocator is not initialized, the pointer can't belong to it.
3201 if (Validate && malloc_initialized == false) {
3202 return AllocInfo();
3203 }
3204
3205 auto chunk = GetChunkForPtr(aPtr);
3206 if (Validate) {
3207 if (!chunk || !gChunkRTree.Get(chunk)) {
3208 return AllocInfo();
3209 }
3210 }
3211
3212 if (chunk != aPtr) {
3213 MOZ_DIAGNOSTIC_ASSERT(chunk->arena->mMagic == ARENA_MAGIC);
3214
3215 size_t pageind = (((uintptr_t)aPtr - (uintptr_t)chunk) >> gPageSize2Pow);
3216 size_t mapbits = chunk->map[pageind].bits;
3217 MOZ_DIAGNOSTIC_ASSERT((mapbits & CHUNK_MAP_ALLOCATED) != 0);
3218
3219 size_t size;
3220 if ((mapbits & CHUNK_MAP_LARGE) == 0) {
3221 arena_run_t* run = (arena_run_t*)(mapbits & ~gPageSizeMask);
3222 MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC);
3223 size = run->mBin->mSizeClass;
3224 } else {
3225 size = mapbits & ~gPageSizeMask;
3226 MOZ_DIAGNOSTIC_ASSERT(size != 0);
3227 }
3228
3229 return AllocInfo(size, chunk);
3230 }
3231
3232 extent_node_t key;
3233
3234 // Huge allocation
3235 key.mAddr = chunk;
3236 MutexAutoLock lock(huge_mtx);
3237 extent_node_t* node = huge.Search(&key);
3238 if (Validate && !node) {
3239 return AllocInfo();
3240 }
3241 return AllocInfo(node->mSize, node);
3242 }
3243
3244 // Validate ptr before assuming that it points to an allocation. Currently,
3245 // the following validation is performed:
3246 //
3247 // + Check that ptr is not nullptr.
3248 //
3249 // + Check that ptr lies within a mapped chunk.
3250 static inline AllocInfo GetValidated(const void* aPtr) {
3251 return Get<true>(aPtr);
3252 }
3253
3254 AllocInfo() : mSize(0), mChunk(nullptr) {}
3255
3256 explicit AllocInfo(size_t aSize, arena_chunk_t* aChunk)
3257 : mSize(aSize), mChunk(aChunk) {
3258 MOZ_ASSERT(mSize <= gMaxLargeClass);
3259 }
3260
3261 explicit AllocInfo(size_t aSize, extent_node_t* aNode)
3262 : mSize(aSize), mNode(aNode) {
3263 MOZ_ASSERT(mSize > gMaxLargeClass);
3264 }
3265
3266 size_t Size() { return mSize; }
3267
3268 arena_t* Arena() {
3269 if (mSize <= gMaxLargeClass) {
3270 return mChunk->arena;
3271 }
3272 // Best effort detection that we're not trying to access an already
3273 // disposed arena. In the case of a disposed arena, the memory location
3274 // pointed by mNode->mArena is either free (but still a valid memory
3275 // region, per TypedBaseAlloc<arena_t>), in which case its id was reset,
3276 // or has been reallocated for a new region, and its id is very likely
3277 // different (per randomness). In both cases, the id is unlikely to
3278 // match what it was for the disposed arena.
3279 MOZ_RELEASE_ASSERT(mNode->mArenaId == mNode->mArena->mId);
3280 return mNode->mArena;
3281 }
3282
3283 private:
3284 size_t mSize;
3285 union {
3286 // Pointer to the chunk associated with the allocation for small
3287 // and large allocations.
3288 arena_chunk_t* mChunk;
3289
3290 // Pointer to the extent node for huge allocations.
3291 extent_node_t* mNode;
3292 };
3293 };
3294
3295 template <>
3296 inline void MozJemalloc::jemalloc_ptr_info(const void* aPtr,
3297 jemalloc_ptr_info_t* aInfo) {
3298 arena_chunk_t* chunk = GetChunkForPtr(aPtr);
3299
3300 // Is the pointer null, or within one chunk's size of null?
3301 // Alternatively, if the allocator is not initialized yet, the pointer
3302 // can't be known.
3303 if (!chunk || !malloc_initialized) {
3304 *aInfo = {TagUnknown, nullptr, 0, 0};
3305 return;
3306 }
3307
3308 // Look for huge allocations before looking for |chunk| in gChunkRTree.
3309 // This is necessary because |chunk| won't be in gChunkRTree if it's
3310 // the second or subsequent chunk in a huge allocation.
3311 extent_node_t* node;
3312 extent_node_t key;
3313 {
3314 MutexAutoLock lock(huge_mtx);
3315 key.mAddr = const_cast<void*>(aPtr);
3316 node =
3317 reinterpret_cast<RedBlackTree<extent_node_t, ExtentTreeBoundsTrait>*>(
3318 &huge)
3319 ->Search(&key);
3320 if (node) {
3321 *aInfo = {TagLiveAlloc, node->mAddr, node->mSize, node->mArena->mId};
3322 return;
3323 }
3324 }
3325
3326 // It's not a huge allocation. Check if we have a known chunk.
3327 if (!gChunkRTree.Get(chunk)) {
3328 *aInfo = {TagUnknown, nullptr, 0, 0};
3329 return;
3330 }
3331
3332 MOZ_DIAGNOSTIC_ASSERT(chunk->arena->mMagic == ARENA_MAGIC);
3333
3334 // Get the page number within the chunk.
3335 size_t pageind = (((uintptr_t)aPtr - (uintptr_t)chunk) >> gPageSize2Pow);
3336 if (pageind < gChunkHeaderNumPages) {
3337 // Within the chunk header.
3338 *aInfo = {TagUnknown, nullptr, 0, 0};
3339 return;
3340 }
3341
3342 size_t mapbits = chunk->map[pageind].bits;
3343
3344 if (!(mapbits & CHUNK_MAP_ALLOCATED)) {
3345 void* pageaddr = (void*)(uintptr_t(aPtr) & ~gPageSizeMask);
3346 *aInfo = {TagFreedPage, pageaddr, gPageSize, chunk->arena->mId};
3347 return;
3348 }
3349
3350 if (mapbits & CHUNK_MAP_LARGE) {
3351 // It's a large allocation. Only the first page of a large
3352 // allocation contains its size, so if the address is not in
3353 // the first page, scan back to find the allocation size.
3354 size_t size;
3355 while (true) {
3356 size = mapbits & ~gPageSizeMask;
3357 if (size != 0) {
3358 break;
3359 }
3360
3361 // The following two return paths shouldn't occur in
3362 // practice unless there is heap corruption.
3363 pageind--;
3364 MOZ_DIAGNOSTIC_ASSERT(pageind >= gChunkHeaderNumPages);
3365 if (pageind < gChunkHeaderNumPages) {
3366 *aInfo = {TagUnknown, nullptr, 0, 0};
3367 return;
3368 }
3369
3370 mapbits = chunk->map[pageind].bits;
3371 MOZ_DIAGNOSTIC_ASSERT(mapbits & CHUNK_MAP_LARGE);
3372 if (!(mapbits & CHUNK_MAP_LARGE)) {
3373 *aInfo = {TagUnknown, nullptr, 0, 0};
3374 return;
3375 }
3376 }
3377
3378 void* addr = ((char*)chunk) + (pageind << gPageSize2Pow);
3379 *aInfo = {TagLiveAlloc, addr, size, chunk->arena->mId};
3380 return;
3381 }
3382
3383 // It must be a small allocation.
3384 auto run = (arena_run_t*)(mapbits & ~gPageSizeMask);
3385 MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC);
3386
3387 // The allocation size is stored in the run metadata.
3388 size_t size = run->mBin->mSizeClass;
3389
3390 // Address of the first possible pointer in the run after its headers.
3391 uintptr_t reg0_addr = (uintptr_t)run + run->mBin->mRunFirstRegionOffset;
3392 if (aPtr < (void*)reg0_addr) {
3393 // In the run header.
3394 *aInfo = {TagUnknown, nullptr, 0, 0};
3395 return;
3396 }
3397
3398 // Position in the run.
3399 unsigned regind = ((uintptr_t)aPtr - reg0_addr) / size;
3400
3401 // Pointer to the allocation's base address.
3402 void* addr = (void*)(reg0_addr + regind * size);
3403
3404 // Check if the allocation has been freed.
3405 unsigned elm = regind >> (LOG2(sizeof(int)) + 3);
3406 unsigned bit = regind - (elm << (LOG2(sizeof(int)) + 3));
3407 PtrInfoTag tag =
3408 ((run->mRegionsMask[elm] & (1U << bit))) ? TagFreedAlloc : TagLiveAlloc;
3409
3410 *aInfo = {tag, addr, size, chunk->arena->mId};
3411 }
3412
3413 namespace Debug {
3414 // Helper for debuggers. We don't want it to be inlined and optimized out.
3415 MOZ_NEVER_INLINE jemalloc_ptr_info_t* jemalloc_ptr_info(const void* aPtr) {
3416 static jemalloc_ptr_info_t info;
3417 MozJemalloc::jemalloc_ptr_info(aPtr, &info);
3418 return &info;
3419 }
3420 } // namespace Debug
3421
3422 void arena_t::DallocSmall(arena_chunk_t* aChunk, void* aPtr,
3423 arena_chunk_map_t* aMapElm) {
3424 arena_run_t* run;
3425 arena_bin_t* bin;
3426 size_t size;
3427
3428 run = (arena_run_t*)(aMapElm->bits & ~gPageSizeMask);
3429 MOZ_DIAGNOSTIC_ASSERT(run->mMagic == ARENA_RUN_MAGIC);
3430 bin = run->mBin;
3431 size = bin->mSizeClass;
3432 MOZ_DIAGNOSTIC_ASSERT(uintptr_t(aPtr) >=
3433 uintptr_t(run) + bin->mRunFirstRegionOffset);
3434
3435 memset(aPtr, kAllocPoison, size);
3436
3437 arena_run_reg_dalloc(run, bin, aPtr, size);
3438 run->mNumFree++;
3439
3440 if (run->mNumFree == bin->mRunNumRegions) {
3441 // Deallocate run.
3442 if (run == bin->mCurrentRun) {
3443 bin->mCurrentRun = nullptr;
3444 } else if (bin->mRunNumRegions != 1) {
3445 size_t run_pageind =
3446 (uintptr_t(run) - uintptr_t(aChunk)) >> gPageSize2Pow;
3447 arena_chunk_map_t* run_mapelm = &aChunk->map[run_pageind];
3448
3449 // This block's conditional is necessary because if the
3450 // run only contains one region, then it never gets
3451 // inserted into the non-full runs tree.
3452 MOZ_DIAGNOSTIC_ASSERT(bin->mNonFullRuns.Search(run_mapelm) == run_mapelm);
3453 bin->mNonFullRuns.Remove(run_mapelm);
3454 }
3455 #if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
3456 run->mMagic = 0;
3457 #endif
3458 DallocRun(run, true);
3459 bin->mNumRuns--;
3460 } else if (run->mNumFree == 1 && run != bin->mCurrentRun) {
3461 // Make sure that bin->mCurrentRun always refers to the lowest
3462 // non-full run, if one exists.
3463 if (!bin->mCurrentRun) {
3464 bin->mCurrentRun = run;
3465 } else if (uintptr_t(run) < uintptr_t(bin->mCurrentRun)) {
3466 // Switch mCurrentRun.
3467 if (bin->mCurrentRun->mNumFree > 0) {
3468 arena_chunk_t* runcur_chunk = GetChunkForPtr(bin->mCurrentRun);
3469 size_t runcur_pageind =
3470 (uintptr_t(bin->mCurrentRun) - uintptr_t(runcur_chunk)) >>
3471 gPageSize2Pow;
3472 arena_chunk_map_t* runcur_mapelm = &runcur_chunk->map[runcur_pageind];
3473
3474 // Insert runcur.
3475 MOZ_DIAGNOSTIC_ASSERT(!bin->mNonFullRuns.Search(runcur_mapelm));
3476 bin->mNonFullRuns.Insert(runcur_mapelm);
3477 }
3478 bin->mCurrentRun = run;
3479 } else {
3480 size_t run_pageind =
3481 (uintptr_t(run) - uintptr_t(aChunk)) >> gPageSize2Pow;
3482 arena_chunk_map_t* run_mapelm = &aChunk->map[run_pageind];
3483
3484 MOZ_DIAGNOSTIC_ASSERT(bin->mNonFullRuns.Search(run_mapelm) == nullptr);
3485 bin->mNonFullRuns.Insert(run_mapelm);
3486 }
3487 }
3488 mStats.allocated_small -= size;
3489 }
3490
3491 void arena_t::DallocLarge(arena_chunk_t* aChunk, void* aPtr) {
3492 MOZ_DIAGNOSTIC_ASSERT((uintptr_t(aPtr) & gPageSizeMask) == 0);
3493 size_t pageind = (uintptr_t(aPtr) - uintptr_t(aChunk)) >> gPageSize2Pow;
3494 size_t size = aChunk->map[pageind].bits & ~gPageSizeMask;
3495
3496 memset(aPtr, kAllocPoison, size);
3497 mStats.allocated_large -= size;
3498
3499 DallocRun((arena_run_t*)aPtr, true);
3500 }
3501
3502 static inline void arena_dalloc(void* aPtr, size_t aOffset, arena_t* aArena) {
3503 MOZ_ASSERT(aPtr);
3504 MOZ_ASSERT(aOffset != 0);
3505 MOZ_ASSERT(GetChunkOffsetForPtr(aPtr) == aOffset);
3506
3507 auto chunk = (arena_chunk_t*)((uintptr_t)aPtr - aOffset);
3508 auto arena = chunk->arena;
3509 MOZ_ASSERT(arena);
3510 MOZ_DIAGNOSTIC_ASSERT(arena->mMagic == ARENA_MAGIC);
3511 MOZ_RELEASE_ASSERT(!aArena || arena == aArena);
3512
3513 MutexAutoLock lock(arena->mLock);
3514 size_t pageind = aOffset >> gPageSize2Pow;
3515 arena_chunk_map_t* mapelm = &chunk->map[pageind];
3516 MOZ_RELEASE_ASSERT((mapelm->bits & CHUNK_MAP_DECOMMITTED) == 0,
3517 "Freeing in decommitted page.");
3518 MOZ_RELEASE_ASSERT((mapelm->bits & CHUNK_MAP_ALLOCATED) != 0, "Double-free?");
3519 if ((mapelm->bits & CHUNK_MAP_LARGE) == 0) {
3520 // Small allocation.
3521 arena->DallocSmall(chunk, aPtr, mapelm);
3522 } else {
3523 // Large allocation.
3524 arena->DallocLarge(chunk, aPtr);
3525 }
3526 }
3527
3528 static inline void idalloc(void* ptr, arena_t* aArena) {
3529 size_t offset;
3530
3531 MOZ_ASSERT(ptr);
3532
3533 offset = GetChunkOffsetForPtr(ptr);
3534 if (offset != 0) {
3535 arena_dalloc(ptr, offset, aArena);
3536 } else {
3537 huge_dalloc(ptr, aArena);
3538 }
3539 }
3540
3541 void arena_t::RallocShrinkLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
3542 size_t aOldSize) {
3543 MOZ_ASSERT(aSize < aOldSize);
3544
3545 // Shrink the run, and make trailing pages available for other
3546 // allocations.
3547 MutexAutoLock lock(mLock);
3548 TrimRunTail(aChunk, (arena_run_t*)aPtr, aOldSize, aSize, true);
3549 mStats.allocated_large -= aOldSize - aSize;
3550 }
3551
3552 // Returns whether reallocation was successful.
3553 bool arena_t::RallocGrowLarge(arena_chunk_t* aChunk, void* aPtr, size_t aSize,
3554 size_t aOldSize) {
3555 size_t pageind = (uintptr_t(aPtr) - uintptr_t(aChunk)) >> gPageSize2Pow;
3556 size_t npages = aOldSize >> gPageSize2Pow;
3557
3558 MutexAutoLock lock(mLock);
3559 MOZ_DIAGNOSTIC_ASSERT(aOldSize ==
3560 (aChunk->map[pageind].bits & ~gPageSizeMask));
3561
3562 // Try to extend the run.
3563 MOZ_ASSERT(aSize > aOldSize);
3564 if (pageind + npages < gChunkNumPages - 1 &&
3565 (aChunk->map[pageind + npages].bits & CHUNK_MAP_ALLOCATED) == 0 &&
3566 (aChunk->map[pageind + npages].bits & ~gPageSizeMask) >=
3567 aSize - aOldSize) {
3568 // The next run is available and sufficiently large. Split the
3569 // following run, then merge the first part with the existing
3570 // allocation.
3571 if (!SplitRun((arena_run_t*)(uintptr_t(aChunk) +
3572 ((pageind + npages) << gPageSize2Pow)),
3573 aSize - aOldSize, true, false)) {
3574 return false;
3575 }
3576
3577 aChunk->map[pageind].bits = aSize | CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
3578 aChunk->map[pageind + npages].bits = CHUNK_MAP_LARGE | CHUNK_MAP_ALLOCATED;
3579
3580 mStats.allocated_large += aSize - aOldSize;
3581 return true;
3582 }
3583
3584 return false;
3585 }
3586
3587 void* arena_t::RallocSmallOrLarge(void* aPtr, size_t aSize, size_t aOldSize) {
3588 void* ret;
3589 size_t copysize;
3590 SizeClass sizeClass(aSize);
3591
3592 // Try to avoid moving the allocation.
3593 if (aOldSize <= gMaxLargeClass && sizeClass.Size() == aOldSize) {
3594 if (aSize < aOldSize) {
3595 memset((void*)(uintptr_t(aPtr) + aSize), kAllocPoison, aOldSize - aSize);
3596 }
3597 return aPtr;
3598 }
3599 if (sizeClass.Type() == SizeClass::Large && aOldSize > gMaxBinClass &&
3600 aOldSize <= gMaxLargeClass) {
3601 arena_chunk_t* chunk = GetChunkForPtr(aPtr);
3602 if (sizeClass.Size() < aOldSize) {
3603 // Fill before shrinking in order to avoid a race.
3604 memset((void*)((uintptr_t)aPtr + aSize), kAllocPoison, aOldSize - aSize);
3605 RallocShrinkLarge(chunk, aPtr, sizeClass.Size(), aOldSize);
3606 return aPtr;
3607 }
3608 if (RallocGrowLarge(chunk, aPtr, sizeClass.Size(), aOldSize)) {
3609 ApplyZeroOrJunk((void*)((uintptr_t)aPtr + aOldSize), aSize - aOldSize);
3610 return aPtr;
3611 }
3612 }
3613
3614 // If we get here, then aSize and aOldSize are different enough that we
3615 // need to move the object. In that case, fall back to allocating new
3616 // space and copying. Allow non-private arenas to switch arenas.
3617 ret = (mIsPrivate ? this : choose_arena(aSize))->Malloc(aSize, false);
3618 if (!ret) {
3619 return nullptr;
3620 }
3621
3622 // Junk/zero-filling were already done by arena_t::Malloc().
3623 copysize = (aSize < aOldSize) ? aSize : aOldSize;
3624 #ifdef VM_COPY_MIN
3625 if (copysize >= VM_COPY_MIN) {
3626 pages_copy(ret, aPtr, copysize);
3627 } else
3628 #endif
3629 {
3630 memcpy(ret, aPtr, copysize);
3631 }
3632 idalloc(aPtr, this);
3633 return ret;
3634 }
3635
3636 void* arena_t::Ralloc(void* aPtr, size_t aSize, size_t aOldSize) {
3637 MOZ_DIAGNOSTIC_ASSERT(mMagic == ARENA_MAGIC);
3638 MOZ_ASSERT(aPtr);
3639 MOZ_ASSERT(aSize != 0);
3640
3641 return (aSize <= gMaxLargeClass) ? RallocSmallOrLarge(aPtr, aSize, aOldSize)
3642 : RallocHuge(aPtr, aSize, aOldSize);
3643 }
3644
3645 void* arena_t::operator new(size_t aCount, const fallible_t&) noexcept {
3646 MOZ_ASSERT(aCount == sizeof(arena_t));
3647 return TypedBaseAlloc<arena_t>::alloc();
3648 }
3649
3650 void arena_t::operator delete(void* aPtr) {
3651 TypedBaseAlloc<arena_t>::dealloc((arena_t*)aPtr);
3652 }
3653
3654 arena_t::arena_t(arena_params_t* aParams, bool aIsPrivate) {
3655 unsigned i;
3656
3657 MOZ_RELEASE_ASSERT(mLock.Init());
3658
3659 memset(&mLink, 0, sizeof(mLink));
3660 memset(&mStats, 0, sizeof(arena_stats_t));
3661 mId = 0;
3662
3663 // Initialize chunks.
3664 mChunksDirty.Init();
3665 #ifdef MALLOC_DOUBLE_PURGE
3666 new (&mChunksMAdvised) DoublyLinkedList<arena_chunk_t>();
3667 #endif
3668 mSpare = nullptr;
3669
3670 mRandomizeSmallAllocations = opt_randomize_small;
3671 if (aParams) {
3672 uint32_t flags = aParams->mFlags & ARENA_FLAG_RANDOMIZE_SMALL_MASK;
3673 switch (flags) {
3674 case ARENA_FLAG_RANDOMIZE_SMALL_ENABLED:
3675 mRandomizeSmallAllocations = true;
3676 break;
3677 case ARENA_FLAG_RANDOMIZE_SMALL_DISABLED:
3678 mRandomizeSmallAllocations = false;
3679 break;
3680 case ARENA_FLAG_RANDOMIZE_SMALL_DEFAULT:
3681 default:
3682 break;
3683 }
3684 }
3685 mPRNG = nullptr;
3686
3687 mIsPrivate = aIsPrivate;
3688
3689 mNumDirty = 0;
3690 // The default maximum amount of dirty pages allowed on arenas is a fraction
3691 // of opt_dirty_max.
3692 mMaxDirty = (aParams && aParams->mMaxDirty) ? aParams->mMaxDirty
3693 : (opt_dirty_max / 8);
3694
3695 mRunsAvail.Init();
3696
3697 // Initialize bins.
3698 SizeClass sizeClass(1);
3699
3700 for (i = 0;; i++) {
3701 arena_bin_t& bin = mBins[i];
3702 bin.Init(sizeClass);
3703
3704 // SizeClass doesn't want sizes larger than gMaxBinClass for now.
3705 if (sizeClass.Size() == gMaxBinClass) {
3706 break;
3707 }
3708 sizeClass = sizeClass.Next();
3709 }
3710 MOZ_ASSERT(i == NUM_SMALL_CLASSES - 1);
3711
3712 #if defined(MOZ_DIAGNOSTIC_ASSERT_ENABLED)
3713 mMagic = ARENA_MAGIC;
3714 #endif
3715 }
3716
3717 arena_t::~arena_t() {
3718 size_t i;
3719 MutexAutoLock lock(mLock);
3720 MOZ_RELEASE_ASSERT(!mLink.Left() && !mLink.Right(),
3721 "Arena is still registered");
3722 MOZ_RELEASE_ASSERT(!mStats.allocated_small && !mStats.allocated_large,
3723 "Arena is not empty");
3724 if (mSpare) {
3725 chunk_dealloc(mSpare, kChunkSize, ARENA_CHUNK);
3726 }
3727 for (i = 0; i < NUM_SMALL_CLASSES; i++) {
3728 MOZ_RELEASE_ASSERT(!mBins[i].mNonFullRuns.First(), "Bin is not empty");
3729 }
3730 #ifdef MOZ_DEBUG
3731 {
3732 MutexAutoLock lock(huge_mtx);
3733 // This is an expensive check, so we only do it on debug builds.
3734 for (auto node : huge.iter()) {
3735 MOZ_RELEASE_ASSERT(node->mArenaId != mId, "Arena has huge allocations");
3736 }
3737 }
3738 #endif
3739 mId = 0;
3740 }
3741
3742 arena_t* ArenaCollection::CreateArena(bool aIsPrivate,
3743 arena_params_t* aParams) {
3744 arena_t* ret = new (fallible) arena_t(aParams, aIsPrivate);
3745 if (!ret) {
3746 // Only reached if there is an OOM error.
3747
3748 // OOM here is quite inconvenient to propagate, since dealing with it
3749 // would require a check for failure in the fast path. Instead, punt
3750 // by using the first arena.
3751 // In practice, this is an extremely unlikely failure.
3752 _malloc_message(_getprogname(), ": (malloc) Error initializing arena\n");
3753
3754 return mDefaultArena;
3755 }
3756
3757 MutexAutoLock lock(mLock);
3758
3759 // For public arenas, it's fine to just use incrementing arena id
3760 if (!aIsPrivate) {
3761 ret->mId = mLastPublicArenaId++;
3762 mArenas.Insert(ret);
3763 return ret;
3764 }
3765
3766 // For private arenas, generate a cryptographically-secure random id for the
3767 // new arena. If an attacker manages to get control of the process, this
3768 // should make it more difficult for them to "guess" the ID of a memory
3769 // arena, stopping them from getting data they may want
3770
3771 while (true) {
3772 mozilla::Maybe<uint64_t> maybeRandomId = mozilla::RandomUint64();
3773 MOZ_RELEASE_ASSERT(maybeRandomId.isSome());
3774
3775 // Avoid 0 as an arena Id. We use 0 for disposed arenas.
3776 if (!maybeRandomId.value()) {
3777 continue;
3778 }
3779
3780 // Keep looping until we ensure that the random number we just generated
3781 // isn't already in use by another active arena
3782 arena_t* existingArena =
3783 GetByIdInternal(maybeRandomId.value(), true /*aIsPrivate*/);
3784
3785 if (!existingArena) {
3786 ret->mId = static_cast<arena_id_t>(maybeRandomId.value());
3787 mPrivateArenas.Insert(ret);
3788 return ret;
3789 }
3790 }
3791 }
3792
3793 // End arena.
3794 // ***************************************************************************
3795 // Begin general internal functions.
3796
3797 void* arena_t::MallocHuge(size_t aSize, bool aZero) {
3798 return PallocHuge(aSize, kChunkSize, aZero);
3799 }
3800
3801 void* arena_t::PallocHuge(size_t aSize, size_t aAlignment, bool aZero) {
3802 void* ret;
3803 size_t csize;
3804 size_t psize;
3805 extent_node_t* node;
3806 bool zeroed;
3807
3808 // We're going to configure guard pages in the region between the
3809 // page-aligned size and the chunk-aligned size, so if those are the same
3810 // then we need to force that region into existence.
3811 csize = CHUNK_CEILING(aSize + gPageSize);
3812 if (csize < aSize) {
3813 // size is large enough to cause size_t wrap-around.
3814 return nullptr;
3815 }
3816
3817 // Allocate an extent node with which to track the chunk.
3818 node = ExtentAlloc::alloc();
3819 if (!node) {
3820 return nullptr;
3821 }
3822
3823 // Allocate one or more contiguous chunks for this request.
3824 ret = chunk_alloc(csize, aAlignment, false, &zeroed);
3825 if (!ret) {
3826 ExtentAlloc::dealloc(node);
3827 return nullptr;
3828 }
3829 psize = PAGE_CEILING(aSize);
3830 if (aZero) {
3831 // We will decommit anything past psize so there is no need to zero
3832 // further.
3833 chunk_ensure_zero(ret, psize, zeroed);
3834 }
3835
3836 // Insert node into huge.
3837 node->mAddr = ret;
3838 node->mSize = psize;
3839 node->mArena = this;
3840 node->mArenaId = mId;
3841
3842 {
3843 MutexAutoLock lock(huge_mtx);
3844 huge.Insert(node);
3845
3846 // Although we allocated space for csize bytes, we indicate that we've
3847 // allocated only psize bytes.
3848 //
3849 // If DECOMMIT is defined, this is a reasonable thing to do, since
3850 // we'll explicitly decommit the bytes in excess of psize.
3851 //
3852 // If DECOMMIT is not defined, then we're relying on the OS to be lazy
3853 // about how it allocates physical pages to mappings. If we never
3854 // touch the pages in excess of psize, the OS won't allocate a physical
3855 // page, and we won't use more than psize bytes of physical memory.
3856 //
3857 // A correct program will only touch memory in excess of how much it
3858 // requested if it first calls malloc_usable_size and finds out how
3859 // much space it has to play with. But because we set node->mSize =
3860 // psize above, malloc_usable_size will return psize, not csize, and
3861 // the program will (hopefully) never touch bytes in excess of psize.
3862 // Thus those bytes won't take up space in physical memory, and we can
3863 // reasonably claim we never "allocated" them in the first place.
3864 huge_allocated += psize;
3865 huge_mapped += csize;
3866 }
3867
3868 pages_decommit((void*)((uintptr_t)ret + psize), csize - psize);
3869
3870 if (!aZero) {
3871 ApplyZeroOrJunk(ret, psize);
3872 }
3873
3874 return ret;
3875 }
3876
3877 void* arena_t::RallocHuge(void* aPtr, size_t aSize, size_t aOldSize) {
3878 void* ret;
3879 size_t copysize;
3880
3881 // Avoid moving the allocation if the size class would not change.
3882 if (aOldSize > gMaxLargeClass &&
3883 CHUNK_CEILING(aSize + gPageSize) == CHUNK_CEILING(aOldSize + gPageSize)) {
3884 size_t psize = PAGE_CEILING(aSize);
3885 if (aSize < aOldSize) {
3886 memset((void*)((uintptr_t)aPtr + aSize), kAllocPoison, aOldSize - aSize);
3887 }
3888 if (psize < aOldSize) {
3889 extent_node_t key;
3890
3891 pages_decommit((void*)((uintptr_t)aPtr + psize), aOldSize - psize);
3892
3893 // Update recorded size.
3894 MutexAutoLock lock(huge_mtx);
3895 key.mAddr = const_cast<void*>(aPtr);
3896 extent_node_t* node = huge.Search(&key);
3897 MOZ_ASSERT(node);
3898 MOZ_ASSERT(node->mSize == aOldSize);
3899 MOZ_RELEASE_ASSERT(node->mArena == this);
3900 huge_allocated -= aOldSize - psize;
3901 // No need to change huge_mapped, because we didn't (un)map anything.
3902 node->mSize = psize;
3903 } else if (psize > aOldSize) {
3904 if (!pages_commit((void*)((uintptr_t)aPtr + aOldSize),
3905 psize - aOldSize)) {
3906 return nullptr;
3907 }
3908
3909 // We need to update the recorded size if the size increased,
3910 // so malloc_usable_size doesn't return a value smaller than
3911 // what was requested via realloc().
3912 extent_node_t key;
3913 MutexAutoLock lock(huge_mtx);
3914 key.mAddr = const_cast<void*>(aPtr);
3915 extent_node_t* node = huge.Search(&key);
3916 MOZ_ASSERT(node);
3917 MOZ_ASSERT(node->mSize == aOldSize);
3918 MOZ_RELEASE_ASSERT(node->mArena == this);
3919 huge_allocated += psize - aOldSize;
3920 // No need to change huge_mapped, because we didn't
3921 // (un)map anything.
3922 node->mSize = psize;
3923 }
3924
3925 if (aSize > aOldSize) {
3926 ApplyZeroOrJunk((void*)((uintptr_t)aPtr + aOldSize), aSize - aOldSize);
3927 }
3928 return aPtr;
3929 }
3930
3931 // If we get here, then aSize and aOldSize are different enough that we
3932 // need to use a different size class. In that case, fall back to allocating
3933 // new space and copying. Allow non-private arenas to switch arenas.
3934 ret = (mIsPrivate ? this : choose_arena(aSize))->MallocHuge(aSize, false);
3935 if (!ret) {
3936 return nullptr;
3937 }
3938
3939 copysize = (aSize < aOldSize) ? aSize : aOldSize;
3940 #ifdef VM_COPY_MIN
3941 if (copysize >= VM_COPY_MIN) {
3942 pages_copy(ret, aPtr, copysize);
3943 } else
3944 #endif
3945 {
3946 memcpy(ret, aPtr, copysize);
3947 }
3948 idalloc(aPtr, this);
3949 return ret;
3950 }
3951
3952 static void huge_dalloc(void* aPtr, arena_t* aArena) {
3953 extent_node_t* node;
3954 size_t mapped = 0;
3955 {
3956 extent_node_t key;
3957 MutexAutoLock lock(huge_mtx);
3958
3959 // Extract from tree of huge allocations.
3960 key.mAddr = aPtr;
3961 node = huge.Search(&key);
3962 MOZ_RELEASE_ASSERT(node, "Double-free?");
3963 MOZ_ASSERT(node->mAddr == aPtr);
3964 MOZ_RELEASE_ASSERT(!aArena || node->mArena == aArena);
3965 // See AllocInfo::Arena.
3966 MOZ_RELEASE_ASSERT(node->mArenaId == node->mArena->mId);
3967 huge.Remove(node);
3968
3969 mapped = CHUNK_CEILING(node->mSize + gPageSize);
3970 huge_allocated -= node->mSize;
3971 huge_mapped -= mapped;
3972 }
3973
3974 // Unmap chunk.
3975 chunk_dealloc(node->mAddr, mapped, HUGE_CHUNK);
3976
3977 ExtentAlloc::dealloc(node);
3978 }
3979
3980 static size_t GetKernelPageSize() {
3981 static size_t kernel_page_size = ([]() {
3982 #ifdef XP_WIN
3983 SYSTEM_INFO info;
3984 GetSystemInfo(&info);
3985 return info.dwPageSize;
3986 #else
3987 long result = sysconf(_SC_PAGESIZE);
3988 MOZ_ASSERT(result != -1);
3989 return result;
3990 #endif
3991 })();
3992 return kernel_page_size;
3993 }
3994
3995 // Returns whether the allocator was successfully initialized.
3996 static bool malloc_init_hard() {
3997 unsigned i;
3998 const char* opts;
3999 long result;
4000
4001 AutoLock<StaticMutex> lock(gInitLock);
4002
4003 if (malloc_initialized) {
4004 // Another thread initialized the allocator before this one
4005 // acquired gInitLock.
4006 return true;
4007 }
4008
4009 if (!thread_arena.init()) {
4010 return true;
4011 }
4012
4013 // Get page size and number of CPUs
4014 result = GetKernelPageSize();
4015 // We assume that the page size is a power of 2.
4016 MOZ_ASSERT(((result - 1) & result) == 0);
4017 #ifdef MALLOC_STATIC_PAGESIZE
4018 if (gPageSize % (size_t)result) {
4019 _malloc_message(
4020 _getprogname(),
4021 "Compile-time page size does not divide the runtime one.\n");
4022 MOZ_CRASH();
4023 }
4024 #else
4025 gRealPageSize = gPageSize = (size_t)result;
4026 #endif
4027
4028 // Get runtime configuration.
4029 if ((opts = getenv("MALLOC_OPTIONS"))) {
4030 for (i = 0; opts[i] != '\0'; i++) {
4031 unsigned j, nreps;
4032 bool nseen;
4033
4034 // Parse repetition count, if any.
4035 for (nreps = 0, nseen = false;; i++, nseen = true) {
4036 switch (opts[i]) {
4037 case '0':
4038 case '1':
4039 case '2':
4040 case '3':
4041 case '4':
4042 case '5':
4043 case '6':
4044 case '7':
4045 case '8':
4046 case '9':
4047 nreps *= 10;
4048 nreps += opts[i] - '0';
4049 break;
4050 default:
4051 goto MALLOC_OUT;
4052 }
4053 }
4054 MALLOC_OUT:
4055 if (nseen == false) {
4056 nreps = 1;
4057 }
4058
4059 for (j = 0; j < nreps; j++) {
4060 switch (opts[i]) {
4061 case 'f':
4062 opt_dirty_max >>= 1;
4063 break;
4064 case 'F':
4065 if (opt_dirty_max == 0) {
4066 opt_dirty_max = 1;
4067 } else if ((opt_dirty_max << 1) != 0) {
4068 opt_dirty_max <<= 1;
4069 }
4070 break;
4071 #ifdef MOZ_DEBUG
4072 case 'j':
4073 opt_junk = false;
4074 break;
4075 case 'J':
4076 opt_junk = true;
4077 break;
4078 case 'z':
4079 opt_zero = false;
4080 break;
4081 case 'Z':
4082 opt_zero = true;
4083 break;
4084 # ifndef MALLOC_STATIC_PAGESIZE
4085 case 'P':
4086 if (gPageSize < 64_KiB) {
4087 gPageSize <<= 1;
4088 }
4089 break;
4090 # endif
4091 #endif
4092 case 'r':
4093 opt_randomize_small = false;
4094 break;
4095 case 'R':
4096 opt_randomize_small = true;
4097 break;
4098 default: {
4099 char cbuf[2];
4100
4101 cbuf[0] = opts[i];
4102 cbuf[1] = '\0';
4103 _malloc_message(_getprogname(),
4104 ": (malloc) Unsupported character "
4105 "in malloc options: '",
4106 cbuf, "'\n");
4107 }
4108 }
4109 }
4110 }
4111 }
4112
4113 #ifndef MALLOC_STATIC_PAGESIZE
4114 DefineGlobals();
4115 #endif
4116 gRecycledSize = 0;
4117
4118 // Initialize chunks data.
4119 chunks_mtx.Init();
4120 gChunksBySize.Init();
4121 gChunksByAddress.Init();
4122
4123 // Initialize huge allocation data.
4124 huge_mtx.Init();
4125 huge.Init();
4126 huge_allocated = 0;
4127 huge_mapped = 0;
4128
4129 // Initialize base allocation data structures.
4130 base_mapped = 0;
4131 base_committed = 0;
4132 base_mtx.Init();
4133
4134 // Initialize arenas collection here.
4135 if (!gArenas.Init()) {
4136 return false;
4137 }
4138
4139 // Assign the default arena to the initial thread.
4140 thread_arena.set(gArenas.GetDefault());
4141
4142 if (!gChunkRTree.Init()) {
4143 return false;
4144 }
4145
4146 malloc_initialized = true;
4147
4148 // Dummy call so that the function is not removed by dead-code elimination
4149 Debug::jemalloc_ptr_info(nullptr);
4150
4151 #if !defined(XP_WIN) && !defined(XP_DARWIN)
4152 // Prevent potential deadlock on malloc locks after fork.
4153 pthread_atfork(_malloc_prefork, _malloc_postfork_parent,
4154 _malloc_postfork_child);
4155 #endif
4156
4157 return true;
4158 }
4159
4160 // End general internal functions.
4161 // ***************************************************************************
4162 // Begin malloc(3)-compatible functions.
4163
4164 // The BaseAllocator class is a helper class that implements the base allocator
4165 // functions (malloc, calloc, realloc, free, memalign) for a given arena,
4166 // or an appropriately chosen arena (per choose_arena()) when none is given.
4167 struct BaseAllocator {
4168 #define MALLOC_DECL(name, return_type, ...) \
4169 inline return_type name(__VA_ARGS__);
4170
4171 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE
4172 #include "malloc_decls.h"
4173
4174 explicit BaseAllocator(arena_t* aArena) : mArena(aArena) {}
4175
4176 private:
4177 arena_t* mArena;
4178 };
4179
4180 #define MALLOC_DECL(name, return_type, ...) \
4181 template <> \
4182 inline return_type MozJemalloc::name( \
4183 ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) { \
4184 BaseAllocator allocator(nullptr); \
4185 return allocator.name(ARGS_HELPER(ARGS, ##__VA_ARGS__)); \
4186 }
4187 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE
4188 #include "malloc_decls.h"
4189
4190 inline void* BaseAllocator::malloc(size_t aSize) {
4191 void* ret;
4192 arena_t* arena;
4193
4194 if (!malloc_init()) {
4195 ret = nullptr;
4196 goto RETURN;
4197 }
4198
4199 if (aSize == 0) {
4200 aSize = 1;
4201 }
4202 arena = mArena ? mArena : choose_arena(aSize);
4203 ret = arena->Malloc(aSize, /* zero = */ false);
4204
4205 RETURN:
4206 if (!ret) {
4207 errno = ENOMEM;
4208 }
4209
4210 return ret;
4211 }
4212
4213 inline void* BaseAllocator::memalign(size_t aAlignment, size_t aSize) {
4214 MOZ_ASSERT(((aAlignment - 1) & aAlignment) == 0);
4215
4216 if (!malloc_init()) {
4217 return nullptr;
4218 }
4219
4220 if (aSize == 0) {
4221 aSize = 1;
4222 }
4223
4224 aAlignment = aAlignment < sizeof(void*) ? sizeof(void*) : aAlignment;
4225 arena_t* arena = mArena ? mArena : choose_arena(aSize);
4226 return arena->Palloc(aAlignment, aSize);
4227 }
4228
4229 inline void* BaseAllocator::calloc(size_t aNum, size_t aSize) {
4230 void* ret;
4231
4232 if (malloc_init()) {
4233 CheckedInt<size_t> checkedSize = CheckedInt<size_t>(aNum) * aSize;
4234 if (checkedSize.isValid()) {
4235 size_t allocSize = checkedSize.value();
4236 if (allocSize == 0) {
4237 allocSize = 1;
4238 }
4239 arena_t* arena = mArena ? mArena : choose_arena(allocSize);
4240 ret = arena->Malloc(allocSize, /* zero = */ true);
4241 } else {
4242 ret = nullptr;
4243 }
4244 } else {
4245 ret = nullptr;
4246 }
4247
4248 if (!ret) {
4249 errno = ENOMEM;
4250 }
4251
4252 return ret;
4253 }
4254
4255 inline void* BaseAllocator::realloc(void* aPtr, size_t aSize) {
4256 void* ret;
4257
4258 if (aSize == 0) {
4259 aSize = 1;
4260 }
4261
4262 if (aPtr) {
4263 MOZ_RELEASE_ASSERT(malloc_initialized);
4264
4265 auto info = AllocInfo::Get(aPtr);
4266 auto arena = info.Arena();
4267 MOZ_RELEASE_ASSERT(!mArena || arena == mArena);
4268 ret = arena->Ralloc(aPtr, aSize, info.Size());
4269 } else {
4270 if (!malloc_init()) {
4271 ret = nullptr;
4272 } else {
4273 arena_t* arena = mArena ? mArena : choose_arena(aSize);
4274 ret = arena->Malloc(aSize, /* zero = */ false);
4275 }
4276 }
4277
4278 if (!ret) {
4279 errno = ENOMEM;
4280 }
4281 return ret;
4282 }
4283
4284 inline void BaseAllocator::free(void* aPtr) {
4285 size_t offset;
4286
4287 // A version of idalloc that checks for nullptr pointer.
4288 offset = GetChunkOffsetForPtr(aPtr);
4289 if (offset != 0) {
4290 MOZ_RELEASE_ASSERT(malloc_initialized);
4291 arena_dalloc(aPtr, offset, mArena);
4292 } else if (aPtr) {
4293 MOZ_RELEASE_ASSERT(malloc_initialized);
4294 huge_dalloc(aPtr, mArena);
4295 }
4296 }
4297
4298 template <void* (*memalign)(size_t, size_t)>
4299 struct AlignedAllocator {
4300 static inline int posix_memalign(void** aMemPtr, size_t aAlignment,
4301 size_t aSize) {
4302 void* result;
4303
4304 // alignment must be a power of two and a multiple of sizeof(void*)
4305 if (((aAlignment - 1) & aAlignment) != 0 || aAlignment < sizeof(void*)) {
4306 return EINVAL;
4307 }
4308
4309 // The 0-->1 size promotion is done in the memalign() call below
4310 result = memalign(aAlignment, aSize);
4311
4312 if (!result) {
4313 return ENOMEM;
4314 }
4315
4316 *aMemPtr = result;
4317 return 0;
4318 }
4319
4320 static inline void* aligned_alloc(size_t aAlignment, size_t aSize) {
4321 if (aSize % aAlignment) {
4322 return nullptr;
4323 }
4324 return memalign(aAlignment, aSize);
4325 }
4326
4327 static inline void* valloc(size_t aSize) {
4328 return memalign(GetKernelPageSize(), aSize);
4329 }
4330 };
4331
4332 template <>
4333 inline int MozJemalloc::posix_memalign(void** aMemPtr, size_t aAlignment,
4334 size_t aSize) {
4335 return AlignedAllocator<memalign>::posix_memalign(aMemPtr, aAlignment, aSize);
4336 }
4337
4338 template <>
4339 inline void* MozJemalloc::aligned_alloc(size_t aAlignment, size_t aSize) {
4340 return AlignedAllocator<memalign>::aligned_alloc(aAlignment, aSize);
4341 }
4342
4343 template <>
4344 inline void* MozJemalloc::valloc(size_t aSize) {
4345 return AlignedAllocator<memalign>::valloc(aSize);
4346 }
4347
4348 // End malloc(3)-compatible functions.
4349 // ***************************************************************************
4350 // Begin non-standard functions.
4351
4352 // This was added by Mozilla for use by SQLite.
4353 template <>
4354 inline size_t MozJemalloc::malloc_good_size(size_t aSize) {
4355 if (aSize <= gMaxLargeClass) {
4356 // Small or large
4357 aSize = SizeClass(aSize).Size();
4358 } else {
4359 // Huge. We use PAGE_CEILING to get psize, instead of using
4360 // CHUNK_CEILING to get csize. This ensures that this
4361 // malloc_usable_size(malloc(n)) always matches
4362 // malloc_good_size(n).
4363 aSize = PAGE_CEILING(aSize);
4364 }
4365 return aSize;
4366 }
4367
4368 template <>
4369 inline size_t MozJemalloc::malloc_usable_size(usable_ptr_t aPtr) {
4370 return AllocInfo::GetValidated(aPtr).Size();
4371 }
4372
4373 template <>
4374 inline void MozJemalloc::jemalloc_stats_internal(
4375 jemalloc_stats_t* aStats, jemalloc_bin_stats_t* aBinStats) {
4376 size_t non_arena_mapped, chunk_header_size;
4377
4378 if (!aStats) {
4379 return;
4380 }
4381 if (!malloc_init()) {
4382 memset(aStats, 0, sizeof(*aStats));
4383 return;
4384 }
4385 if (aBinStats) {
4386 memset(aBinStats, 0, sizeof(jemalloc_bin_stats_t) * NUM_SMALL_CLASSES);
4387 }
4388
4389 // Gather runtime settings.
4390 aStats->opt_junk = opt_junk;
4391 aStats->opt_zero = opt_zero;
4392 aStats->quantum = kQuantum;
4393 aStats->quantum_max = kMaxQuantumClass;
4394 aStats->quantum_wide = kQuantumWide;
4395 aStats->quantum_wide_max = kMaxQuantumWideClass;
4396 aStats->subpage_max = gMaxSubPageClass;
4397 aStats->large_max = gMaxLargeClass;
4398 aStats->chunksize = kChunkSize;
4399 aStats->page_size = gPageSize;
4400 aStats->dirty_max = opt_dirty_max;
4401
4402 // Gather current memory usage statistics.
4403 aStats->narenas = 0;
4404 aStats->mapped = 0;
4405 aStats->allocated = 0;
4406 aStats->waste = 0;
4407 aStats->page_cache = 0;
4408 aStats->bookkeeping = 0;
4409 aStats->bin_unused = 0;
4410
4411 non_arena_mapped = 0;
4412
4413 // Get huge mapped/allocated.
4414 {
4415 MutexAutoLock lock(huge_mtx);
4416 non_arena_mapped += huge_mapped;
4417 aStats->allocated += huge_allocated;
4418 MOZ_ASSERT(huge_mapped >= huge_allocated);
4419 }
4420
4421 // Get base mapped/allocated.
4422 {
4423 MutexAutoLock lock(base_mtx);
4424 non_arena_mapped += base_mapped;
4425 aStats->bookkeeping += base_committed;
4426 MOZ_ASSERT(base_mapped >= base_committed);
4427 }
4428
4429 gArenas.mLock.Lock();
4430 // Iterate over arenas.
4431 for (auto arena : gArenas.iter()) {
4432 size_t arena_mapped, arena_allocated, arena_committed, arena_dirty, j,
4433 arena_unused, arena_headers;
4434
4435 arena_headers = 0;
4436 arena_unused = 0;
4437
4438 {
4439 MutexAutoLock lock(arena->mLock);
4440
4441 arena_mapped = arena->mStats.mapped;
4442
4443 // "committed" counts dirty and allocated memory.
4444 arena_committed = arena->mStats.committed << gPageSize2Pow;
4445
4446 arena_allocated =
4447 arena->mStats.allocated_small + arena->mStats.allocated_large;
4448
4449 arena_dirty = arena->mNumDirty << gPageSize2Pow;
4450
4451 for (j = 0; j < NUM_SMALL_CLASSES; j++) {
4452 arena_bin_t* bin = &arena->mBins[j];
4453 size_t bin_unused = 0;
4454 size_t num_non_full_runs = 0;
4455
4456 for (auto mapelm : bin->mNonFullRuns.iter()) {
4457 arena_run_t* run = (arena_run_t*)(mapelm->bits & ~gPageSizeMask);
4458 bin_unused += run->mNumFree * bin->mSizeClass;
4459 num_non_full_runs++;
4460 }
4461
4462 if (bin->mCurrentRun) {
4463 bin_unused += bin->mCurrentRun->mNumFree * bin->mSizeClass;
4464 num_non_full_runs++;
4465 }
4466
4467 arena_unused += bin_unused;
4468 arena_headers += bin->mNumRuns * bin->mRunFirstRegionOffset;
4469 if (aBinStats) {
4470 aBinStats[j].size = bin->mSizeClass;
4471 aBinStats[j].num_non_full_runs += num_non_full_runs;
4472 aBinStats[j].num_runs += bin->mNumRuns;
4473 aBinStats[j].bytes_unused += bin_unused;
4474 aBinStats[j].bytes_total +=
4475 bin->mNumRuns * (bin->mRunSize - bin->mRunFirstRegionOffset);
4476 aBinStats[j].bytes_per_run = bin->mRunSize;
4477 }
4478 }
4479 }
4480
4481 MOZ_ASSERT(arena_mapped >= arena_committed);
4482 MOZ_ASSERT(arena_committed >= arena_allocated + arena_dirty);
4483
4484 aStats->mapped += arena_mapped;
4485 aStats->allocated += arena_allocated;
4486 aStats->page_cache += arena_dirty;
4487 // "waste" is committed memory that is neither dirty nor
4488 // allocated. If you change this definition please update
4489 // memory/replace/logalloc/replay/Replay.cpp's jemalloc_stats calculation of
4490 // committed.
4491 aStats->waste += arena_committed - arena_allocated - arena_dirty -
4492 arena_unused - arena_headers;
4493 aStats->bin_unused += arena_unused;
4494 aStats->bookkeeping += arena_headers;
4495 aStats->narenas++;
4496 }
4497 gArenas.mLock.Unlock();
4498
4499 // Account for arena chunk headers in bookkeeping rather than waste.
4500 chunk_header_size =
4501 ((aStats->mapped / aStats->chunksize) * gChunkHeaderNumPages)
4502 << gPageSize2Pow;
4503
4504 aStats->mapped += non_arena_mapped;
4505 aStats->bookkeeping += chunk_header_size;
4506 aStats->waste -= chunk_header_size;
4507
4508 MOZ_ASSERT(aStats->mapped >= aStats->allocated + aStats->waste +
4509 aStats->page_cache + aStats->bookkeeping);
4510 }
4511
4512 template <>
4513 inline size_t MozJemalloc::jemalloc_stats_num_bins() {
4514 return NUM_SMALL_CLASSES;
4515 }
4516
4517 #ifdef MALLOC_DOUBLE_PURGE
4518
4519 // Explicitly remove all of this chunk's MADV_FREE'd pages from memory.
4520 static void hard_purge_chunk(arena_chunk_t* aChunk) {
4521 // See similar logic in arena_t::Purge().
4522 for (size_t i = gChunkHeaderNumPages; i < gChunkNumPages; i++) {
4523 // Find all adjacent pages with CHUNK_MAP_MADVISED set.
4524 size_t npages;
4525 for (npages = 0; aChunk->map[i + npages].bits & CHUNK_MAP_MADVISED &&
4526 i + npages < gChunkNumPages;
4527 npages++) {
4528 // Turn off the chunk's MADV_FREED bit and turn on its
4529 // DECOMMITTED bit.
4530 MOZ_DIAGNOSTIC_ASSERT(
4531 !(aChunk->map[i + npages].bits & CHUNK_MAP_DECOMMITTED));
4532 aChunk->map[i + npages].bits ^= CHUNK_MAP_MADVISED_OR_DECOMMITTED;
4533 }
4534
4535 // We could use mincore to find out which pages are actually
4536 // present, but it's not clear that's better.
4537 if (npages > 0) {
4538 pages_decommit(((char*)aChunk) + (i << gPageSize2Pow),
4539 npages << gPageSize2Pow);
4540 Unused << pages_commit(((char*)aChunk) + (i << gPageSize2Pow),
4541 npages << gPageSize2Pow);
4542 }
4543 i += npages;
4544 }
4545 }
4546
4547 // Explicitly remove all of this arena's MADV_FREE'd pages from memory.
4548 void arena_t::HardPurge() {
4549 MutexAutoLock lock(mLock);
4550
4551 while (!mChunksMAdvised.isEmpty()) {
4552 arena_chunk_t* chunk = mChunksMAdvised.popFront();
4553 hard_purge_chunk(chunk);
4554 }
4555 }
4556
4557 template <>
4558 inline void MozJemalloc::jemalloc_purge_freed_pages() {
4559 if (malloc_initialized) {
4560 MutexAutoLock lock(gArenas.mLock);
4561 for (auto arena : gArenas.iter()) {
4562 arena->HardPurge();
4563 }
4564 }
4565 }
4566
4567 #else // !defined MALLOC_DOUBLE_PURGE
4568
4569 template <>
4570 inline void MozJemalloc::jemalloc_purge_freed_pages() {
4571 // Do nothing.
4572 }
4573
4574 #endif // defined MALLOC_DOUBLE_PURGE
4575
4576 template <>
4577 inline void MozJemalloc::jemalloc_free_dirty_pages(void) {
4578 if (malloc_initialized) {
4579 MutexAutoLock lock(gArenas.mLock);
4580 for (auto arena : gArenas.iter()) {
4581 MutexAutoLock arena_lock(arena->mLock);
4582 arena->Purge(true);
4583 }
4584 }
4585 }
4586
4587 inline arena_t* ArenaCollection::GetByIdInternal(arena_id_t aArenaId,
4588 bool aIsPrivate) {
4589 // Use AlignedStorage2 to avoid running the arena_t constructor, while
4590 // we only need it as a placeholder for mId.
4591 mozilla::AlignedStorage2<arena_t> key;
4592 key.addr()->mId = aArenaId;
4593 return (aIsPrivate ? mPrivateArenas : mArenas).Search(key.addr());
4594 }
4595
4596 inline arena_t* ArenaCollection::GetById(arena_id_t aArenaId, bool aIsPrivate) {
4597 if (!malloc_initialized) {
4598 return nullptr;
4599 }
4600
4601 MutexAutoLock lock(mLock);
4602 arena_t* result = GetByIdInternal(aArenaId, aIsPrivate);
4603 MOZ_RELEASE_ASSERT(result);
4604 return result;
4605 }
4606
4607 template <>
4608 inline arena_id_t MozJemalloc::moz_create_arena_with_params(
4609 arena_params_t* aParams) {
4610 if (malloc_init()) {
4611 arena_t* arena = gArenas.CreateArena(/* IsPrivate = */ true, aParams);
4612 return arena->mId;
4613 }
4614 return 0;
4615 }
4616
4617 template <>
4618 inline void MozJemalloc::moz_dispose_arena(arena_id_t aArenaId) {
4619 arena_t* arena = gArenas.GetById(aArenaId, /* IsPrivate = */ true);
4620 MOZ_RELEASE_ASSERT(arena);
4621 gArenas.DisposeArena(arena);
4622 }
4623
4624 #define MALLOC_DECL(name, return_type, ...) \
4625 template <> \
4626 inline return_type MozJemalloc::moz_arena_##name( \
4627 arena_id_t aArenaId, ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) { \
4628 BaseAllocator allocator( \
4629 gArenas.GetById(aArenaId, /* IsPrivate = */ true)); \
4630 return allocator.name(ARGS_HELPER(ARGS, ##__VA_ARGS__)); \
4631 }
4632 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC_BASE
4633 #include "malloc_decls.h"
4634
4635 // End non-standard functions.
4636 // ***************************************************************************
4637 #ifndef XP_WIN
4638 // Begin library-private functions, used by threading libraries for protection
4639 // of malloc during fork(). These functions are only called if the program is
4640 // running in threaded mode, so there is no need to check whether the program
4641 // is threaded here.
4642 # ifndef XP_DARWIN
4643 static
4644 # endif
4645 void
4646 _malloc_prefork(void) {
4647 // Acquire all mutexes in a safe order.
4648 gArenas.mLock.Lock();
4649
4650 for (auto arena : gArenas.iter()) {
4651 arena->mLock.Lock();
4652 }
4653
4654 base_mtx.Lock();
4655
4656 huge_mtx.Lock();
4657 }
4658
4659 # ifndef XP_DARWIN
4660 static
4661 # endif
4662 void
4663 _malloc_postfork_parent(void) {
4664 // Release all mutexes, now that fork() has completed.
4665 huge_mtx.Unlock();
4666
4667 base_mtx.Unlock();
4668
4669 for (auto arena : gArenas.iter()) {
4670 arena->mLock.Unlock();
4671 }
4672
4673 gArenas.mLock.Unlock();
4674 }
4675
4676 # ifndef XP_DARWIN
4677 static
4678 # endif
4679 void
4680 _malloc_postfork_child(void) {
4681 // Reinitialize all mutexes, now that fork() has completed.
4682 huge_mtx.Init();
4683
4684 base_mtx.Init();
4685
4686 for (auto arena : gArenas.iter()) {
4687 arena->mLock.Init();
4688 }
4689
4690 gArenas.mLock.Init();
4691 }
4692 #endif // XP_WIN
4693
4694 // End library-private functions.
4695 // ***************************************************************************
4696 #ifdef MOZ_REPLACE_MALLOC
4697 // Windows doesn't come with weak imports as they are possible with
4698 // LD_PRELOAD or DYLD_INSERT_LIBRARIES on Linux/OSX. On this platform,
4699 // the replacement functions are defined as variable pointers to the
4700 // function resolved with GetProcAddress() instead of weak definitions
4701 // of functions. On Android, the same needs to happen as well, because
4702 // the Android linker doesn't handle weak linking with non LD_PRELOADed
4703 // libraries, but LD_PRELOADing is not very convenient on Android, with
4704 // the zygote.
4705 # ifdef XP_DARWIN
4706 # define MOZ_REPLACE_WEAK __attribute__((weak_import))
4707 # elif defined(XP_WIN) || defined(ANDROID)
4708 # define MOZ_DYNAMIC_REPLACE_INIT
4709 # define replace_init replace_init_decl
4710 # elif defined(__GNUC__)
4711 # define MOZ_REPLACE_WEAK __attribute__((weak))
4712 # endif
4713
4714 # include "replace_malloc.h"
4715
4716 # define MALLOC_DECL(name, return_type, ...) MozJemalloc::name,
4717
4718 // The default malloc table, i.e. plain allocations. It never changes. It's
4719 // used by init(), and not used after that.
4720 static const malloc_table_t gDefaultMallocTable = {
4721 # include "malloc_decls.h"
4722 };
4723
4724 // The malloc table installed by init(). It never changes from that point
4725 // onward. It will be the same as gDefaultMallocTable if no replace-malloc tool
4726 // is enabled at startup.
4727 static malloc_table_t gOriginalMallocTable = {
4728 # include "malloc_decls.h"
4729 };
4730
4731 // The malloc table installed by jemalloc_replace_dynamic(). (Read the
4732 // comments above that function for more details.)
4733 static malloc_table_t gDynamicMallocTable = {
4734 # include "malloc_decls.h"
4735 };
4736
4737 // This briefly points to gDefaultMallocTable at startup. After that, it points
4738 // to either gOriginalMallocTable or gDynamicMallocTable. It's atomic to avoid
4739 // races when switching between tables.
4740 static Atomic<malloc_table_t const*, mozilla::MemoryOrdering::Relaxed>
4741 gMallocTablePtr;
4742
4743 # ifdef MOZ_DYNAMIC_REPLACE_INIT
4744 # undef replace_init
4745 typedef decltype(replace_init_decl) replace_init_impl_t;
4746 static replace_init_impl_t* replace_init = nullptr;
4747 # endif
4748
4749 # ifdef XP_WIN
4750 typedef HMODULE replace_malloc_handle_t;
4751
4752 static replace_malloc_handle_t replace_malloc_handle() {
4753 wchar_t replace_malloc_lib[1024];
4754 if (GetEnvironmentVariableW(L"MOZ_REPLACE_MALLOC_LIB", replace_malloc_lib,
4755 ArrayLength(replace_malloc_lib)) > 0) {
4756 return LoadLibraryW(replace_malloc_lib);
4757 }
4758 return nullptr;
4759 }
4760
4761 # define REPLACE_MALLOC_GET_INIT_FUNC(handle) \
4762 (replace_init_impl_t*)GetProcAddress(handle, "replace_init")
4763
4764 # elif defined(ANDROID)
4765 # include <dlfcn.h>
4766
4767 typedef void* replace_malloc_handle_t;
4768
4769 static replace_malloc_handle_t replace_malloc_handle() {
4770 const char* replace_malloc_lib = getenv("MOZ_REPLACE_MALLOC_LIB");
4771 if (replace_malloc_lib && *replace_malloc_lib) {
4772 return dlopen(replace_malloc_lib, RTLD_LAZY);
4773 }
4774 return nullptr;
4775 }
4776
4777 # define REPLACE_MALLOC_GET_INIT_FUNC(handle) \
4778 (replace_init_impl_t*)dlsym(handle, "replace_init")
4779
4780 # endif
4781
4782 static void replace_malloc_init_funcs(malloc_table_t*);
4783
4784 # ifdef MOZ_REPLACE_MALLOC_STATIC
4785 extern "C" void logalloc_init(malloc_table_t*, ReplaceMallocBridge**);
4786
4787 extern "C" void dmd_init(malloc_table_t*, ReplaceMallocBridge**);
4788
4789 extern "C" void phc_init(malloc_table_t*, ReplaceMallocBridge**);
4790 # endif
4791
4792 bool Equals(const malloc_table_t& aTable1, const malloc_table_t& aTable2) {
4793 return memcmp(&aTable1, &aTable2, sizeof(malloc_table_t)) == 0;
4794 }
4795
4796 // Below is the malloc implementation overriding jemalloc and calling the
4797 // replacement functions if they exist.
4798 static ReplaceMallocBridge* gReplaceMallocBridge = nullptr;
4799 static void init() {
4800 malloc_table_t tempTable = gDefaultMallocTable;
4801
4802 # ifdef MOZ_DYNAMIC_REPLACE_INIT
4803 replace_malloc_handle_t handle = replace_malloc_handle();
4804 if (handle) {
4805 replace_init = REPLACE_MALLOC_GET_INIT_FUNC(handle);
4806 }
4807 # endif
4808
4809 // Set this *before* calling replace_init, otherwise if replace_init calls
4810 // malloc() we'll get an infinite loop.
4811 gMallocTablePtr = &gDefaultMallocTable;
4812
4813 // Pass in the default allocator table so replace functions can copy and use
4814 // it for their allocations. The replace_init() function should modify the
4815 // table if it wants to be active, otherwise leave it unmodified.
4816 if (replace_init) {
4817 replace_init(&tempTable, &gReplaceMallocBridge);
4818 }
4819 # ifdef MOZ_REPLACE_MALLOC_STATIC
4820 if (Equals(tempTable, gDefaultMallocTable)) {
4821 logalloc_init(&tempTable, &gReplaceMallocBridge);
4822 }
4823 # ifdef MOZ_DMD
4824 if (Equals(tempTable, gDefaultMallocTable)) {
4825 dmd_init(&tempTable, &gReplaceMallocBridge);
4826 }
4827 # endif
4828 # ifdef MOZ_PHC
4829 if (Equals(tempTable, gDefaultMallocTable)) {
4830 phc_init(&tempTable, &gReplaceMallocBridge);
4831 }
4832 # endif
4833 # endif
4834 if (!Equals(tempTable, gDefaultMallocTable)) {
4835 replace_malloc_init_funcs(&tempTable);
4836 }
4837 gOriginalMallocTable = tempTable;
4838 gMallocTablePtr = &gOriginalMallocTable;
4839 }
4840
4841 // WARNING WARNING WARNING: this function should be used with extreme care. It
4842 // is not as general-purpose as it looks. It is currently used by
4843 // tools/profiler/core/memory_hooks.cpp for counting allocations and probably
4844 // should not be used for any other purpose.
4845 //
4846 // This function allows the original malloc table to be temporarily replaced by
4847 // a different malloc table. Or, if the argument is nullptr, it switches back to
4848 // the original malloc table.
4849 //
4850 // Limitations:
4851 //
4852 // - It is not threadsafe. If multiple threads pass it the same
4853 // `replace_init_func` at the same time, there will be data races writing to
4854 // the malloc_table_t within that function.
4855 //
4856 // - Only one replacement can be installed. No nesting is allowed.
4857 //
4858 // - The new malloc table must be able to free allocations made by the original
4859 // malloc table, and upon removal the original malloc table must be able to
4860 // free allocations made by the new malloc table. This means the new malloc
4861 // table can only do simple things like recording extra information, while
4862 // delegating actual allocation/free operations to the original malloc table.
4863 //
4864 MOZ_JEMALLOC_API void jemalloc_replace_dynamic(
4865 jemalloc_init_func replace_init_func) {
4866 if (replace_init_func) {
4867 malloc_table_t tempTable = gOriginalMallocTable;
4868 (*replace_init_func)(&tempTable, &gReplaceMallocBridge);
4869 if (!Equals(tempTable, gOriginalMallocTable)) {
4870 replace_malloc_init_funcs(&tempTable);
4871
4872 // Temporarily switch back to the original malloc table. In the
4873 // (supported) non-nested case, this is a no-op. But just in case this is
4874 // a (unsupported) nested call, it makes the overwriting of
4875 // gDynamicMallocTable less racy, because ongoing calls to malloc() and
4876 // friends won't go through gDynamicMallocTable.
4877 gMallocTablePtr = &gOriginalMallocTable;
4878
4879 gDynamicMallocTable = tempTable;
4880 gMallocTablePtr = &gDynamicMallocTable;
4881 // We assume that dynamic replaces don't occur close enough for a
4882 // thread to still have old copies of the table pointer when the 2nd
4883 // replace occurs.
4884 }
4885 } else {
4886 // Switch back to the original malloc table.
4887 gMallocTablePtr = &gOriginalMallocTable;
4888 }
4889 }
4890
4891 # define MALLOC_DECL(name, return_type, ...) \
4892 template <> \
4893 inline return_type ReplaceMalloc::name( \
4894 ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) { \
4895 if (MOZ_UNLIKELY(!gMallocTablePtr)) { \
4896 init(); \
4897 } \
4898 return (*gMallocTablePtr).name(ARGS_HELPER(ARGS, ##__VA_ARGS__)); \
4899 }
4900 # include "malloc_decls.h"
4901
4902 MOZ_JEMALLOC_API struct ReplaceMallocBridge* get_bridge(void) {
4903 if (MOZ_UNLIKELY(!gMallocTablePtr)) {
4904 init();
4905 }
4906 return gReplaceMallocBridge;
4907 }
4908
4909 // posix_memalign, aligned_alloc, memalign and valloc all implement some kind
4910 // of aligned memory allocation. For convenience, a replace-malloc library can
4911 // skip defining replace_posix_memalign, replace_aligned_alloc and
4912 // replace_valloc, and default implementations will be automatically derived
4913 // from replace_memalign.
4914 static void replace_malloc_init_funcs(malloc_table_t* table) {
4915 if (table->posix_memalign == MozJemalloc::posix_memalign &&
4916 table->memalign != MozJemalloc::memalign) {
4917 table->posix_memalign =
4918 AlignedAllocator<ReplaceMalloc::memalign>::posix_memalign;
4919 }
4920 if (table->aligned_alloc == MozJemalloc::aligned_alloc &&
4921 table->memalign != MozJemalloc::memalign) {
4922 table->aligned_alloc =
4923 AlignedAllocator<ReplaceMalloc::memalign>::aligned_alloc;
4924 }
4925 if (table->valloc == MozJemalloc::valloc &&
4926 table->memalign != MozJemalloc::memalign) {
4927 table->valloc = AlignedAllocator<ReplaceMalloc::memalign>::valloc;
4928 }
4929 if (table->moz_create_arena_with_params ==
4930 MozJemalloc::moz_create_arena_with_params &&
4931 table->malloc != MozJemalloc::malloc) {
4932 # define MALLOC_DECL(name, ...) \
4933 table->name = DummyArenaAllocator<ReplaceMalloc>::name;
4934 # define MALLOC_FUNCS MALLOC_FUNCS_ARENA_BASE
4935 # include "malloc_decls.h"
4936 }
4937 if (table->moz_arena_malloc == MozJemalloc::moz_arena_malloc &&
4938 table->malloc != MozJemalloc::malloc) {
4939 # define MALLOC_DECL(name, ...) \
4940 table->name = DummyArenaAllocator<ReplaceMalloc>::name;
4941 # define MALLOC_FUNCS MALLOC_FUNCS_ARENA_ALLOC
4942 # include "malloc_decls.h"
4943 }
4944 }
4945
4946 #endif // MOZ_REPLACE_MALLOC
4947 // ***************************************************************************
4948 // Definition of all the _impl functions
4949 // GENERIC_MALLOC_DECL2_MINGW is only used for the MinGW build, and aliases
4950 // the malloc funcs (e.g. malloc) to the je_ versions. It does not generate
4951 // aliases for the other functions (jemalloc and arena functions).
4952 //
4953 // We do need aliases for the other mozglue.def-redirected functions though,
4954 // these are done at the bottom of mozmemory_wrap.cpp
4955 #define GENERIC_MALLOC_DECL2_MINGW(name, name_impl, return_type, ...) \
4956 return_type name(ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) \
4957 __attribute__((alias(MOZ_STRINGIFY(name_impl))));
4958
4959 #define GENERIC_MALLOC_DECL2(attributes, name, name_impl, return_type, ...) \
4960 return_type name_impl(ARGS_HELPER(TYPED_ARGS, ##__VA_ARGS__)) attributes { \
4961 return DefaultMalloc::name(ARGS_HELPER(ARGS, ##__VA_ARGS__)); \
4962 }
4963
4964 #ifndef __MINGW32__
4965 # define GENERIC_MALLOC_DECL(attributes, name, return_type, ...) \
4966 GENERIC_MALLOC_DECL2(attributes, name, name##_impl, return_type, \
4967 ##__VA_ARGS__)
4968 #else
4969 # define GENERIC_MALLOC_DECL(attributes, name, return_type, ...) \
4970 GENERIC_MALLOC_DECL2(attributes, name, name##_impl, return_type, \
4971 ##__VA_ARGS__) \
4972 GENERIC_MALLOC_DECL2_MINGW(name, name##_impl, return_type, ##__VA_ARGS__)
4973 #endif
4974
4975 #define NOTHROW_MALLOC_DECL(...) \
4976 MOZ_MEMORY_API MACRO_CALL(GENERIC_MALLOC_DECL, (noexcept(true), __VA_ARGS__))
4977 #define MALLOC_DECL(...) \
4978 MOZ_MEMORY_API MACRO_CALL(GENERIC_MALLOC_DECL, (, __VA_ARGS__))
4979 #define MALLOC_FUNCS MALLOC_FUNCS_MALLOC
4980 #include "malloc_decls.h"
4981
4982 #undef GENERIC_MALLOC_DECL
4983 #define GENERIC_MALLOC_DECL(attributes, name, return_type, ...) \
4984 GENERIC_MALLOC_DECL2(attributes, name, name, return_type, ##__VA_ARGS__)
4985
4986 #define MALLOC_DECL(...) \
4987 MOZ_JEMALLOC_API MACRO_CALL(GENERIC_MALLOC_DECL, (, __VA_ARGS__))
4988 #define MALLOC_FUNCS (MALLOC_FUNCS_JEMALLOC | MALLOC_FUNCS_ARENA)
4989 #include "malloc_decls.h"
4990 // ***************************************************************************
4991
4992 #ifdef HAVE_DLOPEN
4993 # include <dlfcn.h>
4994 #endif
4995
4996 #if defined(__GLIBC__) && !defined(__UCLIBC__)
4997 // glibc provides the RTLD_DEEPBIND flag for dlopen which can make it possible
4998 // to inconsistently reference libc's malloc(3)-compatible functions
4999 // (bug 493541).
5000 //
5001 // These definitions interpose hooks in glibc. The functions are actually
5002 // passed an extra argument for the caller return address, which will be
5003 // ignored.
5004
5005 extern "C" {
5006 MOZ_EXPORT void (*__free_hook)(void*) = free_impl;
5007 MOZ_EXPORT void* (*__malloc_hook)(size_t) = malloc_impl;
5008 MOZ_EXPORT void* (*__realloc_hook)(void*, size_t) = realloc_impl;
5009 MOZ_EXPORT void* (*__memalign_hook)(size_t, size_t) = memalign_impl;
5010 }
5011
5012 #elif defined(RTLD_DEEPBIND)
5013 // XXX On systems that support RTLD_GROUP or DF_1_GROUP, do their
5014 // implementations permit similar inconsistencies? Should STV_SINGLETON
5015 // visibility be used for interposition where available?
5016 # error \
5017 "Interposing malloc is unsafe on this system without libc malloc hooks."
5018 #endif
5019
5020 #ifdef XP_WIN
5021 MOZ_EXPORT void* _recalloc(void* aPtr, size_t aCount, size_t aSize) {
5022 size_t oldsize = aPtr ? AllocInfo::Get(aPtr).Size() : 0;
5023 CheckedInt<size_t> checkedSize = CheckedInt<size_t>(aCount) * aSize;
5024
5025 if (!checkedSize.isValid()) {
5026 return nullptr;
5027 }
5028
5029 size_t newsize = checkedSize.value();
5030
5031 // In order for all trailing bytes to be zeroed, the caller needs to
5032 // use calloc(), followed by recalloc(). However, the current calloc()
5033 // implementation only zeros the bytes requested, so if recalloc() is
5034 // to work 100% correctly, calloc() will need to change to zero
5035 // trailing bytes.
5036 aPtr = DefaultMalloc::realloc(aPtr, newsize);
5037 if (aPtr && oldsize < newsize) {
5038 memset((void*)((uintptr_t)aPtr + oldsize), 0, newsize - oldsize);
5039 }
5040
5041 return aPtr;
5042 }
5043
5044 // This impl of _expand doesn't ever actually expand or shrink blocks: it
5045 // simply replies that you may continue using a shrunk block.
5046 MOZ_EXPORT void* _expand(void* aPtr, size_t newsize) {
5047 if (AllocInfo::Get(aPtr).Size() >= newsize) {
5048 return aPtr;
5049 }
5050
5051 return nullptr;
5052 }
5053
5054 MOZ_EXPORT size_t _msize(void* aPtr) {
5055 return DefaultMalloc::malloc_usable_size(aPtr);
5056 }
5057 #endif
5058