1 // Copyright (c) 2005, 2007, Google Inc.
2 // All rights reserved.
3 // Copyright (C) 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 //
5 // Redistribution and use in source and binary forms, with or without
6 // modification, are permitted provided that the following conditions are
7 // met:
8 //
9 // * Redistributions of source code must retain the above copyright
10 // notice, this list of conditions and the following disclaimer.
11 // * Redistributions in binary form must reproduce the above
12 // copyright notice, this list of conditions and the following disclaimer
13 // in the documentation and/or other materials provided with the
14 // distribution.
15 // * Neither the name of Google Inc. nor the names of its
16 // contributors may be used to endorse or promote products derived from
17 // this software without specific prior written permission.
18 //
19 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31 // ---
32 // Author: Sanjay Ghemawat <opensource@google.com>
33 //
34 // A malloc that uses a per-thread cache to satisfy small malloc requests.
35 // (The time for malloc/free of a small object drops from 300 ns to 50 ns.)
36 //
37 // See doc/tcmalloc.html for a high-level
38 // description of how this malloc works.
39 //
40 // SYNCHRONIZATION
41 // 1. The thread-specific lists are accessed without acquiring any locks.
42 // This is safe because each such list is only accessed by one thread.
43 // 2. We have a lock per central free-list, and hold it while manipulating
44 // the central free list for a particular size.
45 // 3. The central page allocator is protected by "pageheap_lock".
46 // 4. The pagemap (which maps from page-number to descriptor),
47 // can be read without holding any locks, and written while holding
48 // the "pageheap_lock".
49 // 5. To improve performance, a subset of the information one can get
50 // from the pagemap is cached in a data structure, pagemap_cache_,
51 // that atomically reads and writes its entries. This cache can be
52 // read and written without locking.
53 //
54 // This multi-threaded access to the pagemap is safe for fairly
55 // subtle reasons. We basically assume that when an object X is
56 // allocated by thread A and deallocated by thread B, there must
57 // have been appropriate synchronization in the handoff of object
58 // X from thread A to thread B. The same logic applies to pagemap_cache_.
59 //
60 // THE PAGEID-TO-SIZECLASS CACHE
61 // Hot PageID-to-sizeclass mappings are held by pagemap_cache_. If this cache
62 // returns 0 for a particular PageID then that means "no information," not that
63 // the sizeclass is 0. The cache may have stale information for pages that do
64 // not hold the beginning of any free()'able object. Staleness is eliminated
65 // in Populate() for pages with sizeclass > 0 objects, and in do_malloc() and
66 // do_memalign() for all other relevant pages.
67 //
68 // TODO: Bias reclamation to larger addresses
69 // TODO: implement mallinfo/mallopt
70 // TODO: Better testing
71 //
72 // 9/28/2003 (new page-level allocator replaces ptmalloc2):
73 // * malloc/free of small objects goes from ~300 ns to ~50 ns.
74 // * allocation of a reasonably complicated struct
75 // goes from about 1100 ns to about 300 ns.
76
77 #include "config.h"
78 #include "FastMalloc.h"
79
80 #include "Assertions.h"
81 #include <limits>
82 #if ENABLE(JSC_MULTIPLE_THREADS)
83 #include <pthread.h>
84 #endif
85
86 #ifndef NO_TCMALLOC_SAMPLES
87 #ifdef WTF_CHANGES
88 #define NO_TCMALLOC_SAMPLES
89 #endif
90 #endif
91
92 #if !(defined(USE_SYSTEM_MALLOC) && USE_SYSTEM_MALLOC) && defined(NDEBUG)
93 #define FORCE_SYSTEM_MALLOC 0
94 #else
95 #define FORCE_SYSTEM_MALLOC 1
96 #endif
97
98 // Use a background thread to periodically scavenge memory to release back to the system
99 // https://bugs.webkit.org/show_bug.cgi?id=27900: don't turn this on for Tiger until we have figured out why it caused a crash.
100 #if defined(BUILDING_ON_TIGER)
101 #define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 0
102 #else
103 #define USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY 1
104 #endif
105
106 #if defined(__HP_aCC)
107 // HP'a aCC compiler has broken for scoping
108 # define for if(0){}else for
109 #endif
110
111 #ifndef NDEBUG
112 namespace WTF {
113
114 #if ENABLE(JSC_MULTIPLE_THREADS)
115 static pthread_key_t isForbiddenKey;
116 static pthread_once_t isForbiddenKeyOnce = PTHREAD_ONCE_INIT;
initializeIsForbiddenKey()117 static void initializeIsForbiddenKey()
118 {
119 pthread_key_create(&isForbiddenKey, 0);
120 }
121
122 #if !ASSERT_DISABLED
isForbidden()123 static bool isForbidden()
124 {
125 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
126 return !!pthread_getspecific(isForbiddenKey);
127 }
128 #endif
129
fastMallocForbid()130 void fastMallocForbid()
131 {
132 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
133 pthread_setspecific(isForbiddenKey, &isForbiddenKey);
134 }
135
fastMallocAllow()136 void fastMallocAllow()
137 {
138 pthread_once(&isForbiddenKeyOnce, initializeIsForbiddenKey);
139 pthread_setspecific(isForbiddenKey, 0);
140 }
141
142 #else
143
144 static bool staticIsForbidden;
145 static bool isForbidden()
146 {
147 return staticIsForbidden;
148 }
149
150 void fastMallocForbid()
151 {
152 staticIsForbidden = true;
153 }
154
155 void fastMallocAllow()
156 {
157 staticIsForbidden = false;
158 }
159 #endif // ENABLE(JSC_MULTIPLE_THREADS)
160
161 } // namespace WTF
162 #endif // NDEBUG
163
164 #include <string.h>
165
166 namespace WTF {
167
168 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
169
170 namespace Internal {
171
fastMallocMatchFailed(void *)172 void fastMallocMatchFailed(void*)
173 {
174 CRASH();
175 }
176
177 } // namespace Internal
178
179 #endif
180
fastZeroedMalloc(size_t n)181 void* fastZeroedMalloc(size_t n)
182 {
183 void* result = fastMalloc(n);
184 memset(result, 0, n);
185 return result;
186 }
187
fastStrDup(const char * src)188 char* fastStrDup(const char* src)
189 {
190 int len = strlen(src) + 1;
191 char* dup = static_cast<char*>(fastMalloc(len));
192
193 if (dup)
194 memcpy(dup, src, len);
195
196 return dup;
197 }
198
tryFastZeroedMalloc(size_t n)199 TryMallocReturnValue tryFastZeroedMalloc(size_t n)
200 {
201 void* result;
202 if (!tryFastMalloc(n).getValue(result))
203 return 0;
204 memset(result, 0, n);
205 return result;
206 }
207
208 } // namespace WTF
209
210 #if FORCE_SYSTEM_MALLOC
211
212 namespace WTF {
213
tryFastMalloc(size_t n)214 TryMallocReturnValue tryFastMalloc(size_t n)
215 {
216 ASSERT(!isForbidden());
217
218 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
219 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur...
220 return 0;
221
222 void* result = malloc(n + sizeof(AllocAlignmentInteger));
223 if (!result)
224 return 0;
225
226 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
227 result = static_cast<AllocAlignmentInteger*>(result) + 1;
228
229 return result;
230 #else
231 return malloc(n);
232 #endif
233 }
234
fastMalloc(size_t n)235 void* fastMalloc(size_t n)
236 {
237 ASSERT(!isForbidden());
238
239 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
240 TryMallocReturnValue returnValue = tryFastMalloc(n);
241 void* result;
242 returnValue.getValue(result);
243 #else
244 void* result = malloc(n);
245 #endif
246
247 if (!result)
248 CRASH();
249 return result;
250 }
251
tryFastCalloc(size_t n_elements,size_t element_size)252 TryMallocReturnValue tryFastCalloc(size_t n_elements, size_t element_size)
253 {
254 ASSERT(!isForbidden());
255
256 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
257 size_t totalBytes = n_elements * element_size;
258 if (n_elements > 1 && element_size && (totalBytes / element_size) != n_elements || (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes))
259 return 0;
260
261 totalBytes += sizeof(AllocAlignmentInteger);
262 void* result = malloc(totalBytes);
263 if (!result)
264 return 0;
265
266 memset(result, 0, totalBytes);
267 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
268 result = static_cast<AllocAlignmentInteger*>(result) + 1;
269 return result;
270 #else
271 return calloc(n_elements, element_size);
272 #endif
273 }
274
fastCalloc(size_t n_elements,size_t element_size)275 void* fastCalloc(size_t n_elements, size_t element_size)
276 {
277 ASSERT(!isForbidden());
278
279 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
280 TryMallocReturnValue returnValue = tryFastCalloc(n_elements, element_size);
281 void* result;
282 returnValue.getValue(result);
283 #else
284 void* result = calloc(n_elements, element_size);
285 #endif
286
287 if (!result)
288 CRASH();
289 return result;
290 }
291
fastFree(void * p)292 void fastFree(void* p)
293 {
294 ASSERT(!isForbidden());
295
296 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
297 if (!p)
298 return;
299
300 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p);
301 if (*header != Internal::AllocTypeMalloc)
302 Internal::fastMallocMatchFailed(p);
303 free(header);
304 #else
305 free(p);
306 #endif
307 }
308
tryFastRealloc(void * p,size_t n)309 TryMallocReturnValue tryFastRealloc(void* p, size_t n)
310 {
311 ASSERT(!isForbidden());
312
313 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
314 if (p) {
315 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= n) // If overflow would occur...
316 return 0;
317 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(p);
318 if (*header != Internal::AllocTypeMalloc)
319 Internal::fastMallocMatchFailed(p);
320 void* result = realloc(header, n + sizeof(AllocAlignmentInteger));
321 if (!result)
322 return 0;
323
324 // This should not be needed because the value is already there:
325 // *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
326 result = static_cast<AllocAlignmentInteger*>(result) + 1;
327 return result;
328 } else {
329 return fastMalloc(n);
330 }
331 #else
332 return realloc(p, n);
333 #endif
334 }
335
fastRealloc(void * p,size_t n)336 void* fastRealloc(void* p, size_t n)
337 {
338 ASSERT(!isForbidden());
339
340 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
341 TryMallocReturnValue returnValue = tryFastRealloc(p, n);
342 void* result;
343 returnValue.getValue(result);
344 #else
345 void* result = realloc(p, n);
346 #endif
347
348 if (!result)
349 CRASH();
350 return result;
351 }
352
releaseFastMallocFreeMemory()353 void releaseFastMallocFreeMemory() { }
354
fastMallocStatistics()355 FastMallocStatistics fastMallocStatistics()
356 {
357 FastMallocStatistics statistics = { 0, 0, 0, 0 };
358 return statistics;
359 }
360
361 } // namespace WTF
362
363 #if OS(DARWIN)
364 // This symbol is present in the JavaScriptCore exports file even when FastMalloc is disabled.
365 // It will never be used in this case, so it's type and value are less interesting than its presence.
366 extern "C" const int jscore_fastmalloc_introspection = 0;
367 #endif
368
369 #else // FORCE_SYSTEM_MALLOC
370
371 #if HAVE(STDINT_H)
372 #include <stdint.h>
373 #elif HAVE(INTTYPES_H)
374 #include <inttypes.h>
375 #else
376 #include <sys/types.h>
377 #endif
378
379 #include "AlwaysInline.h"
380 #include "Assertions.h"
381 #include "TCPackedCache.h"
382 #include "TCPageMap.h"
383 #include "TCSpinLock.h"
384 #include "TCSystemAlloc.h"
385 #include <algorithm>
386 #include <errno.h>
387 #include <limits>
388 #include <new>
389 #include <pthread.h>
390 #include <stdarg.h>
391 #include <stddef.h>
392 #include <stdio.h>
393 #if OS(UNIX)
394 #include <unistd.h>
395 #endif
396 #if COMPILER(MSVC)
397 #ifndef WIN32_LEAN_AND_MEAN
398 #define WIN32_LEAN_AND_MEAN
399 #endif
400 #include <windows.h>
401 #endif
402
403 #if WTF_CHANGES
404
405 #if OS(DARWIN)
406 #include "MallocZoneSupport.h"
407 #include <wtf/HashSet.h>
408 #include <wtf/Vector.h>
409 #endif
410 #if HAVE(DISPATCH_H)
411 #include <dispatch/dispatch.h>
412 #endif
413
414
415 #ifndef PRIuS
416 #define PRIuS "zu"
417 #endif
418
419 // Calling pthread_getspecific through a global function pointer is faster than a normal
420 // call to the function on Mac OS X, and it's used in performance-critical code. So we
421 // use a function pointer. But that's not necessarily faster on other platforms, and we had
422 // problems with this technique on Windows, so we'll do this only on Mac OS X.
423 #if OS(DARWIN)
424 static void* (*pthread_getspecific_function_pointer)(pthread_key_t) = pthread_getspecific;
425 #define pthread_getspecific(key) pthread_getspecific_function_pointer(key)
426 #endif
427
428 #define DEFINE_VARIABLE(type, name, value, meaning) \
429 namespace FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead { \
430 type FLAGS_##name(value); \
431 char FLAGS_no##name; \
432 } \
433 using FLAG__namespace_do_not_use_directly_use_DECLARE_##type##_instead::FLAGS_##name
434
435 #define DEFINE_int64(name, value, meaning) \
436 DEFINE_VARIABLE(int64_t, name, value, meaning)
437
438 #define DEFINE_double(name, value, meaning) \
439 DEFINE_VARIABLE(double, name, value, meaning)
440
441 namespace WTF {
442
443 #define malloc fastMalloc
444 #define calloc fastCalloc
445 #define free fastFree
446 #define realloc fastRealloc
447
448 #define MESSAGE LOG_ERROR
449 #define CHECK_CONDITION ASSERT
450
451 #if OS(DARWIN)
452 class Span;
453 class TCMalloc_Central_FreeListPadded;
454 class TCMalloc_PageHeap;
455 class TCMalloc_ThreadCache;
456 template <typename T> class PageHeapAllocator;
457
458 class FastMallocZone {
459 public:
460 static void init();
461
462 static kern_return_t enumerate(task_t, void*, unsigned typeMmask, vm_address_t zoneAddress, memory_reader_t, vm_range_recorder_t);
goodSize(malloc_zone_t *,size_t size)463 static size_t goodSize(malloc_zone_t*, size_t size) { return size; }
check(malloc_zone_t *)464 static boolean_t check(malloc_zone_t*) { return true; }
print(malloc_zone_t *,boolean_t)465 static void print(malloc_zone_t*, boolean_t) { }
log(malloc_zone_t *,void *)466 static void log(malloc_zone_t*, void*) { }
forceLock(malloc_zone_t *)467 static void forceLock(malloc_zone_t*) { }
forceUnlock(malloc_zone_t *)468 static void forceUnlock(malloc_zone_t*) { }
statistics(malloc_zone_t *,malloc_statistics_t * stats)469 static void statistics(malloc_zone_t*, malloc_statistics_t* stats) { memset(stats, 0, sizeof(malloc_statistics_t)); }
470
471 private:
472 FastMallocZone(TCMalloc_PageHeap*, TCMalloc_ThreadCache**, TCMalloc_Central_FreeListPadded*, PageHeapAllocator<Span>*, PageHeapAllocator<TCMalloc_ThreadCache>*);
473 static size_t size(malloc_zone_t*, const void*);
474 static void* zoneMalloc(malloc_zone_t*, size_t);
475 static void* zoneCalloc(malloc_zone_t*, size_t numItems, size_t size);
476 static void zoneFree(malloc_zone_t*, void*);
477 static void* zoneRealloc(malloc_zone_t*, void*, size_t);
zoneValloc(malloc_zone_t *,size_t)478 static void* zoneValloc(malloc_zone_t*, size_t) { LOG_ERROR("valloc is not supported"); return 0; }
zoneDestroy(malloc_zone_t *)479 static void zoneDestroy(malloc_zone_t*) { }
480
481 malloc_zone_t m_zone;
482 TCMalloc_PageHeap* m_pageHeap;
483 TCMalloc_ThreadCache** m_threadHeaps;
484 TCMalloc_Central_FreeListPadded* m_centralCaches;
485 PageHeapAllocator<Span>* m_spanAllocator;
486 PageHeapAllocator<TCMalloc_ThreadCache>* m_pageHeapAllocator;
487 };
488
489 #endif
490
491 #endif
492
493 #ifndef WTF_CHANGES
494 // This #ifdef should almost never be set. Set NO_TCMALLOC_SAMPLES if
495 // you're porting to a system where you really can't get a stacktrace.
496 #ifdef NO_TCMALLOC_SAMPLES
497 // We use #define so code compiles even if you #include stacktrace.h somehow.
498 # define GetStackTrace(stack, depth, skip) (0)
499 #else
500 # include <google/stacktrace.h>
501 #endif
502 #endif
503
504 // Even if we have support for thread-local storage in the compiler
505 // and linker, the OS may not support it. We need to check that at
506 // runtime. Right now, we have to keep a manual set of "bad" OSes.
507 #if defined(HAVE_TLS)
508 static bool kernel_supports_tls = false; // be conservative
KernelSupportsTLS()509 static inline bool KernelSupportsTLS() {
510 return kernel_supports_tls;
511 }
512 # if !HAVE_DECL_UNAME // if too old for uname, probably too old for TLS
CheckIfKernelSupportsTLS()513 static void CheckIfKernelSupportsTLS() {
514 kernel_supports_tls = false;
515 }
516 # else
517 # include <sys/utsname.h> // DECL_UNAME checked for <sys/utsname.h> too
CheckIfKernelSupportsTLS()518 static void CheckIfKernelSupportsTLS() {
519 struct utsname buf;
520 if (uname(&buf) != 0) { // should be impossible
521 MESSAGE("uname failed assuming no TLS support (errno=%d)\n", errno);
522 kernel_supports_tls = false;
523 } else if (strcasecmp(buf.sysname, "linux") == 0) {
524 // The linux case: the first kernel to support TLS was 2.6.0
525 if (buf.release[0] < '2' && buf.release[1] == '.') // 0.x or 1.x
526 kernel_supports_tls = false;
527 else if (buf.release[0] == '2' && buf.release[1] == '.' &&
528 buf.release[2] >= '0' && buf.release[2] < '6' &&
529 buf.release[3] == '.') // 2.0 - 2.5
530 kernel_supports_tls = false;
531 else
532 kernel_supports_tls = true;
533 } else { // some other kernel, we'll be optimisitic
534 kernel_supports_tls = true;
535 }
536 // TODO(csilvers): VLOG(1) the tls status once we support RAW_VLOG
537 }
538 # endif // HAVE_DECL_UNAME
539 #endif // HAVE_TLS
540
541 // __THROW is defined in glibc systems. It means, counter-intuitively,
542 // "This function will never throw an exception." It's an optional
543 // optimization tool, but we may need to use it to match glibc prototypes.
544 #ifndef __THROW // I guess we're not on a glibc system
545 # define __THROW // __THROW is just an optimization, so ok to make it ""
546 #endif
547
548 //-------------------------------------------------------------------
549 // Configuration
550 //-------------------------------------------------------------------
551
552 // Not all possible combinations of the following parameters make
553 // sense. In particular, if kMaxSize increases, you may have to
554 // increase kNumClasses as well.
555 static const size_t kPageShift = 12;
556 static const size_t kPageSize = 1 << kPageShift;
557 static const size_t kMaxSize = 8u * kPageSize;
558 static const size_t kAlignShift = 3;
559 static const size_t kAlignment = 1 << kAlignShift;
560 static const size_t kNumClasses = 68;
561
562 // Allocates a big block of memory for the pagemap once we reach more than
563 // 128MB
564 static const size_t kPageMapBigAllocationThreshold = 128 << 20;
565
566 // Minimum number of pages to fetch from system at a time. Must be
567 // significantly bigger than kPageSize to amortize system-call
568 // overhead, and also to reduce external fragementation. Also, we
569 // should keep this value big because various incarnations of Linux
570 // have small limits on the number of mmap() regions per
571 // address-space.
572 static const size_t kMinSystemAlloc = 1 << (20 - kPageShift);
573
574 // Number of objects to move between a per-thread list and a central
575 // list in one shot. We want this to be not too small so we can
576 // amortize the lock overhead for accessing the central list. Making
577 // it too big may temporarily cause unnecessary memory wastage in the
578 // per-thread free list until the scavenger cleans up the list.
579 static int num_objects_to_move[kNumClasses];
580
581 // Maximum length we allow a per-thread free-list to have before we
582 // move objects from it into the corresponding central free-list. We
583 // want this big to avoid locking the central free-list too often. It
584 // should not hurt to make this list somewhat big because the
585 // scavenging code will shrink it down when its contents are not in use.
586 static const int kMaxFreeListLength = 256;
587
588 // Lower and upper bounds on the per-thread cache sizes
589 static const size_t kMinThreadCacheSize = kMaxSize * 2;
590 static const size_t kMaxThreadCacheSize = 2 << 20;
591
592 // Default bound on the total amount of thread caches
593 static const size_t kDefaultOverallThreadCacheSize = 16 << 20;
594
595 // For all span-lengths < kMaxPages we keep an exact-size list.
596 // REQUIRED: kMaxPages >= kMinSystemAlloc;
597 static const size_t kMaxPages = kMinSystemAlloc;
598
599 /* The smallest prime > 2^n */
600 static int primes_list[] = {
601 // Small values might cause high rates of sampling
602 // and hence commented out.
603 // 2, 5, 11, 17, 37, 67, 131, 257,
604 // 521, 1031, 2053, 4099, 8209, 16411,
605 32771, 65537, 131101, 262147, 524309, 1048583,
606 2097169, 4194319, 8388617, 16777259, 33554467 };
607
608 // Twice the approximate gap between sampling actions.
609 // I.e., we take one sample approximately once every
610 // tcmalloc_sample_parameter/2
611 // bytes of allocation, i.e., ~ once every 128KB.
612 // Must be a prime number.
613 #ifdef NO_TCMALLOC_SAMPLES
614 DEFINE_int64(tcmalloc_sample_parameter, 0,
615 "Unused: code is compiled with NO_TCMALLOC_SAMPLES");
616 static size_t sample_period = 0;
617 #else
618 DEFINE_int64(tcmalloc_sample_parameter, 262147,
619 "Twice the approximate gap between sampling actions."
620 " Must be a prime number. Otherwise will be rounded up to a "
621 " larger prime number");
622 static size_t sample_period = 262147;
623 #endif
624
625 // Protects sample_period above
626 static SpinLock sample_period_lock = SPINLOCK_INITIALIZER;
627
628 // Parameters for controlling how fast memory is returned to the OS.
629
630 DEFINE_double(tcmalloc_release_rate, 1,
631 "Rate at which we release unused memory to the system. "
632 "Zero means we never release memory back to the system. "
633 "Increase this flag to return memory faster; decrease it "
634 "to return memory slower. Reasonable rates are in the "
635 "range [0,10]");
636
637 //-------------------------------------------------------------------
638 // Mapping from size to size_class and vice versa
639 //-------------------------------------------------------------------
640
641 // Sizes <= 1024 have an alignment >= 8. So for such sizes we have an
642 // array indexed by ceil(size/8). Sizes > 1024 have an alignment >= 128.
643 // So for these larger sizes we have an array indexed by ceil(size/128).
644 //
645 // We flatten both logical arrays into one physical array and use
646 // arithmetic to compute an appropriate index. The constants used by
647 // ClassIndex() were selected to make the flattening work.
648 //
649 // Examples:
650 // Size Expression Index
651 // -------------------------------------------------------
652 // 0 (0 + 7) / 8 0
653 // 1 (1 + 7) / 8 1
654 // ...
655 // 1024 (1024 + 7) / 8 128
656 // 1025 (1025 + 127 + (120<<7)) / 128 129
657 // ...
658 // 32768 (32768 + 127 + (120<<7)) / 128 376
659 static const size_t kMaxSmallSize = 1024;
660 static const int shift_amount[2] = { 3, 7 }; // For divides by 8 or 128
661 static const int add_amount[2] = { 7, 127 + (120 << 7) };
662 static unsigned char class_array[377];
663
664 // Compute index of the class_array[] entry for a given size
ClassIndex(size_t s)665 static inline int ClassIndex(size_t s) {
666 const int i = (s > kMaxSmallSize);
667 return static_cast<int>((s + add_amount[i]) >> shift_amount[i]);
668 }
669
670 // Mapping from size class to max size storable in that class
671 static size_t class_to_size[kNumClasses];
672
673 // Mapping from size class to number of pages to allocate at a time
674 static size_t class_to_pages[kNumClasses];
675
676 // TransferCache is used to cache transfers of num_objects_to_move[size_class]
677 // back and forth between thread caches and the central cache for a given size
678 // class.
679 struct TCEntry {
680 void *head; // Head of chain of objects.
681 void *tail; // Tail of chain of objects.
682 };
683 // A central cache freelist can have anywhere from 0 to kNumTransferEntries
684 // slots to put link list chains into. To keep memory usage bounded the total
685 // number of TCEntries across size classes is fixed. Currently each size
686 // class is initially given one TCEntry which also means that the maximum any
687 // one class can have is kNumClasses.
688 static const int kNumTransferEntries = kNumClasses;
689
690 // Note: the following only works for "n"s that fit in 32-bits, but
691 // that is fine since we only use it for small sizes.
LgFloor(size_t n)692 static inline int LgFloor(size_t n) {
693 int log = 0;
694 for (int i = 4; i >= 0; --i) {
695 int shift = (1 << i);
696 size_t x = n >> shift;
697 if (x != 0) {
698 n = x;
699 log += shift;
700 }
701 }
702 ASSERT(n == 1);
703 return log;
704 }
705
706 // Some very basic linked list functions for dealing with using void * as
707 // storage.
708
SLL_Next(void * t)709 static inline void *SLL_Next(void *t) {
710 return *(reinterpret_cast<void**>(t));
711 }
712
SLL_SetNext(void * t,void * n)713 static inline void SLL_SetNext(void *t, void *n) {
714 *(reinterpret_cast<void**>(t)) = n;
715 }
716
SLL_Push(void ** list,void * element)717 static inline void SLL_Push(void **list, void *element) {
718 SLL_SetNext(element, *list);
719 *list = element;
720 }
721
SLL_Pop(void ** list)722 static inline void *SLL_Pop(void **list) {
723 void *result = *list;
724 *list = SLL_Next(*list);
725 return result;
726 }
727
728
729 // Remove N elements from a linked list to which head points. head will be
730 // modified to point to the new head. start and end will point to the first
731 // and last nodes of the range. Note that end will point to NULL after this
732 // function is called.
SLL_PopRange(void ** head,int N,void ** start,void ** end)733 static inline void SLL_PopRange(void **head, int N, void **start, void **end) {
734 if (N == 0) {
735 *start = NULL;
736 *end = NULL;
737 return;
738 }
739
740 void *tmp = *head;
741 for (int i = 1; i < N; ++i) {
742 tmp = SLL_Next(tmp);
743 }
744
745 *start = *head;
746 *end = tmp;
747 *head = SLL_Next(tmp);
748 // Unlink range from list.
749 SLL_SetNext(tmp, NULL);
750 }
751
SLL_PushRange(void ** head,void * start,void * end)752 static inline void SLL_PushRange(void **head, void *start, void *end) {
753 if (!start) return;
754 SLL_SetNext(end, *head);
755 *head = start;
756 }
757
SLL_Size(void * head)758 static inline size_t SLL_Size(void *head) {
759 int count = 0;
760 while (head) {
761 count++;
762 head = SLL_Next(head);
763 }
764 return count;
765 }
766
767 // Setup helper functions.
768
SizeClass(size_t size)769 static ALWAYS_INLINE size_t SizeClass(size_t size) {
770 return class_array[ClassIndex(size)];
771 }
772
773 // Get the byte-size for a specified class
ByteSizeForClass(size_t cl)774 static ALWAYS_INLINE size_t ByteSizeForClass(size_t cl) {
775 return class_to_size[cl];
776 }
NumMoveSize(size_t size)777 static int NumMoveSize(size_t size) {
778 if (size == 0) return 0;
779 // Use approx 64k transfers between thread and central caches.
780 int num = static_cast<int>(64.0 * 1024.0 / size);
781 if (num < 2) num = 2;
782 // Clamp well below kMaxFreeListLength to avoid ping pong between central
783 // and thread caches.
784 if (num > static_cast<int>(0.8 * kMaxFreeListLength))
785 num = static_cast<int>(0.8 * kMaxFreeListLength);
786
787 // Also, avoid bringing in too many objects into small object free
788 // lists. There are lots of such lists, and if we allow each one to
789 // fetch too many at a time, we end up having to scavenge too often
790 // (especially when there are lots of threads and each thread gets a
791 // small allowance for its thread cache).
792 //
793 // TODO: Make thread cache free list sizes dynamic so that we do not
794 // have to equally divide a fixed resource amongst lots of threads.
795 if (num > 32) num = 32;
796
797 return num;
798 }
799
800 // Initialize the mapping arrays
InitSizeClasses()801 static void InitSizeClasses() {
802 // Do some sanity checking on add_amount[]/shift_amount[]/class_array[]
803 if (ClassIndex(0) < 0) {
804 MESSAGE("Invalid class index %d for size 0\n", ClassIndex(0));
805 CRASH();
806 }
807 if (static_cast<size_t>(ClassIndex(kMaxSize)) >= sizeof(class_array)) {
808 MESSAGE("Invalid class index %d for kMaxSize\n", ClassIndex(kMaxSize));
809 CRASH();
810 }
811
812 // Compute the size classes we want to use
813 size_t sc = 1; // Next size class to assign
814 unsigned char alignshift = kAlignShift;
815 int last_lg = -1;
816 for (size_t size = kAlignment; size <= kMaxSize; size += (1 << alignshift)) {
817 int lg = LgFloor(size);
818 if (lg > last_lg) {
819 // Increase alignment every so often.
820 //
821 // Since we double the alignment every time size doubles and
822 // size >= 128, this means that space wasted due to alignment is
823 // at most 16/128 i.e., 12.5%. Plus we cap the alignment at 256
824 // bytes, so the space wasted as a percentage starts falling for
825 // sizes > 2K.
826 if ((lg >= 7) && (alignshift < 8)) {
827 alignshift++;
828 }
829 last_lg = lg;
830 }
831
832 // Allocate enough pages so leftover is less than 1/8 of total.
833 // This bounds wasted space to at most 12.5%.
834 size_t psize = kPageSize;
835 while ((psize % size) > (psize >> 3)) {
836 psize += kPageSize;
837 }
838 const size_t my_pages = psize >> kPageShift;
839
840 if (sc > 1 && my_pages == class_to_pages[sc-1]) {
841 // See if we can merge this into the previous class without
842 // increasing the fragmentation of the previous class.
843 const size_t my_objects = (my_pages << kPageShift) / size;
844 const size_t prev_objects = (class_to_pages[sc-1] << kPageShift)
845 / class_to_size[sc-1];
846 if (my_objects == prev_objects) {
847 // Adjust last class to include this size
848 class_to_size[sc-1] = size;
849 continue;
850 }
851 }
852
853 // Add new class
854 class_to_pages[sc] = my_pages;
855 class_to_size[sc] = size;
856 sc++;
857 }
858 if (sc != kNumClasses) {
859 MESSAGE("wrong number of size classes: found %" PRIuS " instead of %d\n",
860 sc, int(kNumClasses));
861 CRASH();
862 }
863
864 // Initialize the mapping arrays
865 int next_size = 0;
866 for (unsigned char c = 1; c < kNumClasses; c++) {
867 const size_t max_size_in_class = class_to_size[c];
868 for (size_t s = next_size; s <= max_size_in_class; s += kAlignment) {
869 class_array[ClassIndex(s)] = c;
870 }
871 next_size = static_cast<int>(max_size_in_class + kAlignment);
872 }
873
874 // Double-check sizes just to be safe
875 for (size_t size = 0; size <= kMaxSize; size++) {
876 const size_t sc = SizeClass(size);
877 if (sc == 0) {
878 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
879 CRASH();
880 }
881 if (sc > 1 && size <= class_to_size[sc-1]) {
882 MESSAGE("Allocating unnecessarily large class %" PRIuS " for %" PRIuS
883 "\n", sc, size);
884 CRASH();
885 }
886 if (sc >= kNumClasses) {
887 MESSAGE("Bad size class %" PRIuS " for %" PRIuS "\n", sc, size);
888 CRASH();
889 }
890 const size_t s = class_to_size[sc];
891 if (size > s) {
892 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
893 CRASH();
894 }
895 if (s == 0) {
896 MESSAGE("Bad size %" PRIuS " for %" PRIuS " (sc = %" PRIuS ")\n", s, size, sc);
897 CRASH();
898 }
899 }
900
901 // Initialize the num_objects_to_move array.
902 for (size_t cl = 1; cl < kNumClasses; ++cl) {
903 num_objects_to_move[cl] = NumMoveSize(ByteSizeForClass(cl));
904 }
905
906 #ifndef WTF_CHANGES
907 if (false) {
908 // Dump class sizes and maximum external wastage per size class
909 for (size_t cl = 1; cl < kNumClasses; ++cl) {
910 const int alloc_size = class_to_pages[cl] << kPageShift;
911 const int alloc_objs = alloc_size / class_to_size[cl];
912 const int min_used = (class_to_size[cl-1] + 1) * alloc_objs;
913 const int max_waste = alloc_size - min_used;
914 MESSAGE("SC %3d [ %8d .. %8d ] from %8d ; %2.0f%% maxwaste\n",
915 int(cl),
916 int(class_to_size[cl-1] + 1),
917 int(class_to_size[cl]),
918 int(class_to_pages[cl] << kPageShift),
919 max_waste * 100.0 / alloc_size
920 );
921 }
922 }
923 #endif
924 }
925
926 // -------------------------------------------------------------------------
927 // Simple allocator for objects of a specified type. External locking
928 // is required before accessing one of these objects.
929 // -------------------------------------------------------------------------
930
931 // Metadata allocator -- keeps stats about how many bytes allocated
932 static uint64_t metadata_system_bytes = 0;
MetaDataAlloc(size_t bytes)933 static void* MetaDataAlloc(size_t bytes) {
934 void* result = TCMalloc_SystemAlloc(bytes, 0);
935 if (result != NULL) {
936 metadata_system_bytes += bytes;
937 }
938 return result;
939 }
940
941 template <class T>
942 class PageHeapAllocator {
943 private:
944 // How much to allocate from system at a time
945 static const size_t kAllocIncrement = 32 << 10;
946
947 // Aligned size of T
948 static const size_t kAlignedSize
949 = (((sizeof(T) + kAlignment - 1) / kAlignment) * kAlignment);
950
951 // Free area from which to carve new objects
952 char* free_area_;
953 size_t free_avail_;
954
955 // Linked list of all regions allocated by this allocator
956 void* allocated_regions_;
957
958 // Free list of already carved objects
959 void* free_list_;
960
961 // Number of allocated but unfreed objects
962 int inuse_;
963
964 public:
Init()965 void Init() {
966 ASSERT(kAlignedSize <= kAllocIncrement);
967 inuse_ = 0;
968 allocated_regions_ = 0;
969 free_area_ = NULL;
970 free_avail_ = 0;
971 free_list_ = NULL;
972 }
973
New()974 T* New() {
975 // Consult free list
976 void* result;
977 if (free_list_ != NULL) {
978 result = free_list_;
979 free_list_ = *(reinterpret_cast<void**>(result));
980 } else {
981 if (free_avail_ < kAlignedSize) {
982 // Need more room
983 char* new_allocation = reinterpret_cast<char*>(MetaDataAlloc(kAllocIncrement));
984 if (!new_allocation)
985 CRASH();
986
987 *(void**)new_allocation = allocated_regions_;
988 allocated_regions_ = new_allocation;
989 free_area_ = new_allocation + kAlignedSize;
990 free_avail_ = kAllocIncrement - kAlignedSize;
991 }
992 result = free_area_;
993 free_area_ += kAlignedSize;
994 free_avail_ -= kAlignedSize;
995 }
996 inuse_++;
997 return reinterpret_cast<T*>(result);
998 }
999
Delete(T * p)1000 void Delete(T* p) {
1001 *(reinterpret_cast<void**>(p)) = free_list_;
1002 free_list_ = p;
1003 inuse_--;
1004 }
1005
inuse() const1006 int inuse() const { return inuse_; }
1007
1008 #if defined(WTF_CHANGES) && OS(DARWIN)
1009 template <class Recorder>
recordAdministrativeRegions(Recorder & recorder,const RemoteMemoryReader & reader)1010 void recordAdministrativeRegions(Recorder& recorder, const RemoteMemoryReader& reader)
1011 {
1012 vm_address_t adminAllocation = reinterpret_cast<vm_address_t>(allocated_regions_);
1013 while (adminAllocation) {
1014 recorder.recordRegion(adminAllocation, kAllocIncrement);
1015 adminAllocation = *reader(reinterpret_cast<vm_address_t*>(adminAllocation));
1016 }
1017 }
1018 #endif
1019 };
1020
1021 // -------------------------------------------------------------------------
1022 // Span - a contiguous run of pages
1023 // -------------------------------------------------------------------------
1024
1025 // Type that can hold a page number
1026 typedef uintptr_t PageID;
1027
1028 // Type that can hold the length of a run of pages
1029 typedef uintptr_t Length;
1030
1031 static const Length kMaxValidPages = (~static_cast<Length>(0)) >> kPageShift;
1032
1033 // Convert byte size into pages. This won't overflow, but may return
1034 // an unreasonably large value if bytes is huge enough.
pages(size_t bytes)1035 static inline Length pages(size_t bytes) {
1036 return (bytes >> kPageShift) +
1037 ((bytes & (kPageSize - 1)) > 0 ? 1 : 0);
1038 }
1039
1040 // Convert a user size into the number of bytes that will actually be
1041 // allocated
AllocationSize(size_t bytes)1042 static size_t AllocationSize(size_t bytes) {
1043 if (bytes > kMaxSize) {
1044 // Large object: we allocate an integral number of pages
1045 ASSERT(bytes <= (kMaxValidPages << kPageShift));
1046 return pages(bytes) << kPageShift;
1047 } else {
1048 // Small object: find the size class to which it belongs
1049 return ByteSizeForClass(SizeClass(bytes));
1050 }
1051 }
1052
1053 // Information kept for a span (a contiguous run of pages).
1054 struct Span {
1055 PageID start; // Starting page number
1056 Length length; // Number of pages in span
1057 Span* next; // Used when in link list
1058 Span* prev; // Used when in link list
1059 void* objects; // Linked list of free objects
1060 unsigned int free : 1; // Is the span free
1061 #ifndef NO_TCMALLOC_SAMPLES
1062 unsigned int sample : 1; // Sampled object?
1063 #endif
1064 unsigned int sizeclass : 8; // Size-class for small objects (or 0)
1065 unsigned int refcount : 11; // Number of non-free objects
1066 bool decommitted : 1;
1067
1068 #undef SPAN_HISTORY
1069 #ifdef SPAN_HISTORY
1070 // For debugging, we can keep a log events per span
1071 int nexthistory;
1072 char history[64];
1073 int value[64];
1074 #endif
1075 };
1076
1077 #define ASSERT_SPAN_COMMITTED(span) ASSERT(!span->decommitted)
1078
1079 #ifdef SPAN_HISTORY
Event(Span * span,char op,int v=0)1080 void Event(Span* span, char op, int v = 0) {
1081 span->history[span->nexthistory] = op;
1082 span->value[span->nexthistory] = v;
1083 span->nexthistory++;
1084 if (span->nexthistory == sizeof(span->history)) span->nexthistory = 0;
1085 }
1086 #else
1087 #define Event(s,o,v) ((void) 0)
1088 #endif
1089
1090 // Allocator/deallocator for spans
1091 static PageHeapAllocator<Span> span_allocator;
NewSpan(PageID p,Length len)1092 static Span* NewSpan(PageID p, Length len) {
1093 Span* result = span_allocator.New();
1094 memset(result, 0, sizeof(*result));
1095 result->start = p;
1096 result->length = len;
1097 #ifdef SPAN_HISTORY
1098 result->nexthistory = 0;
1099 #endif
1100 return result;
1101 }
1102
DeleteSpan(Span * span)1103 static inline void DeleteSpan(Span* span) {
1104 #ifndef NDEBUG
1105 // In debug mode, trash the contents of deleted Spans
1106 memset(span, 0x3f, sizeof(*span));
1107 #endif
1108 span_allocator.Delete(span);
1109 }
1110
1111 // -------------------------------------------------------------------------
1112 // Doubly linked list of spans.
1113 // -------------------------------------------------------------------------
1114
DLL_Init(Span * list)1115 static inline void DLL_Init(Span* list) {
1116 list->next = list;
1117 list->prev = list;
1118 }
1119
DLL_Remove(Span * span)1120 static inline void DLL_Remove(Span* span) {
1121 span->prev->next = span->next;
1122 span->next->prev = span->prev;
1123 span->prev = NULL;
1124 span->next = NULL;
1125 }
1126
DLL_IsEmpty(const Span * list)1127 static ALWAYS_INLINE bool DLL_IsEmpty(const Span* list) {
1128 return list->next == list;
1129 }
1130
DLL_Length(const Span * list)1131 static int DLL_Length(const Span* list) {
1132 int result = 0;
1133 for (Span* s = list->next; s != list; s = s->next) {
1134 result++;
1135 }
1136 return result;
1137 }
1138
1139 #if 0 /* Not needed at the moment -- causes compiler warnings if not used */
1140 static void DLL_Print(const char* label, const Span* list) {
1141 MESSAGE("%-10s %p:", label, list);
1142 for (const Span* s = list->next; s != list; s = s->next) {
1143 MESSAGE(" <%p,%u,%u>", s, s->start, s->length);
1144 }
1145 MESSAGE("\n");
1146 }
1147 #endif
1148
DLL_Prepend(Span * list,Span * span)1149 static inline void DLL_Prepend(Span* list, Span* span) {
1150 ASSERT(span->next == NULL);
1151 ASSERT(span->prev == NULL);
1152 span->next = list->next;
1153 span->prev = list;
1154 list->next->prev = span;
1155 list->next = span;
1156 }
1157
1158 // -------------------------------------------------------------------------
1159 // Stack traces kept for sampled allocations
1160 // The following state is protected by pageheap_lock_.
1161 // -------------------------------------------------------------------------
1162
1163 // size/depth are made the same size as a pointer so that some generic
1164 // code below can conveniently cast them back and forth to void*.
1165 static const int kMaxStackDepth = 31;
1166 struct StackTrace {
1167 uintptr_t size; // Size of object
1168 uintptr_t depth; // Number of PC values stored in array below
1169 void* stack[kMaxStackDepth];
1170 };
1171 static PageHeapAllocator<StackTrace> stacktrace_allocator;
1172 static Span sampled_objects;
1173
1174 // -------------------------------------------------------------------------
1175 // Map from page-id to per-page data
1176 // -------------------------------------------------------------------------
1177
1178 // We use PageMap2<> for 32-bit and PageMap3<> for 64-bit machines.
1179 // We also use a simple one-level cache for hot PageID-to-sizeclass mappings,
1180 // because sometimes the sizeclass is all the information we need.
1181
1182 // Selector class -- general selector uses 3-level map
1183 template <int BITS> class MapSelector {
1184 public:
1185 typedef TCMalloc_PageMap3<BITS-kPageShift> Type;
1186 typedef PackedCache<BITS, uint64_t> CacheType;
1187 };
1188
1189 #if defined(WTF_CHANGES)
1190 #if CPU(X86_64)
1191 // On all known X86-64 platforms, the upper 16 bits are always unused and therefore
1192 // can be excluded from the PageMap key.
1193 // See http://en.wikipedia.org/wiki/X86-64#Virtual_address_space_details
1194
1195 static const size_t kBitsUnusedOn64Bit = 16;
1196 #else
1197 static const size_t kBitsUnusedOn64Bit = 0;
1198 #endif
1199
1200 // A three-level map for 64-bit machines
1201 template <> class MapSelector<64> {
1202 public:
1203 typedef TCMalloc_PageMap3<64 - kPageShift - kBitsUnusedOn64Bit> Type;
1204 typedef PackedCache<64, uint64_t> CacheType;
1205 };
1206 #endif
1207
1208 // A two-level map for 32-bit machines
1209 template <> class MapSelector<32> {
1210 public:
1211 typedef TCMalloc_PageMap2<32 - kPageShift> Type;
1212 typedef PackedCache<32 - kPageShift, uint16_t> CacheType;
1213 };
1214
1215 // -------------------------------------------------------------------------
1216 // Page-level allocator
1217 // * Eager coalescing
1218 //
1219 // Heap for page-level allocation. We allow allocating and freeing a
1220 // contiguous runs of pages (called a "span").
1221 // -------------------------------------------------------------------------
1222
1223 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1224 // The central page heap collects spans of memory that have been deleted but are still committed until they are released
1225 // back to the system. We use a background thread to periodically scan the list of free spans and release some back to the
1226 // system. Every 5 seconds, the background thread wakes up and does the following:
1227 // - Check if we needed to commit memory in the last 5 seconds. If so, skip this scavenge because it's a sign that we are short
1228 // of free committed pages and so we should not release them back to the system yet.
1229 // - Otherwise, go through the list of free spans (from largest to smallest) and release up to a fraction of the free committed pages
1230 // back to the system.
1231 // - If the number of free committed pages reaches kMinimumFreeCommittedPageCount, we can stop the scavenging and block the
1232 // scavenging thread until the number of free committed pages goes above kMinimumFreeCommittedPageCount.
1233
1234 // Background thread wakes up every 5 seconds to scavenge as long as there is memory available to return to the system.
1235 static const int kScavengeTimerDelayInSeconds = 5;
1236
1237 // Number of free committed pages that we want to keep around.
1238 static const size_t kMinimumFreeCommittedPageCount = 512;
1239
1240 // During a scavenge, we'll release up to a fraction of the free committed pages.
1241 #if OS(WINDOWS)
1242 // We are slightly less aggressive in releasing memory on Windows due to performance reasons.
1243 static const int kMaxScavengeAmountFactor = 3;
1244 #else
1245 static const int kMaxScavengeAmountFactor = 2;
1246 #endif
1247 #endif
1248
1249 class TCMalloc_PageHeap {
1250 public:
1251 void init();
1252
1253 // Allocate a run of "n" pages. Returns zero if out of memory.
1254 Span* New(Length n);
1255
1256 // Delete the span "[p, p+n-1]".
1257 // REQUIRES: span was returned by earlier call to New() and
1258 // has not yet been deleted.
1259 void Delete(Span* span);
1260
1261 // Mark an allocated span as being used for small objects of the
1262 // specified size-class.
1263 // REQUIRES: span was returned by an earlier call to New()
1264 // and has not yet been deleted.
1265 void RegisterSizeClass(Span* span, size_t sc);
1266
1267 // Split an allocated span into two spans: one of length "n" pages
1268 // followed by another span of length "span->length - n" pages.
1269 // Modifies "*span" to point to the first span of length "n" pages.
1270 // Returns a pointer to the second span.
1271 //
1272 // REQUIRES: "0 < n < span->length"
1273 // REQUIRES: !span->free
1274 // REQUIRES: span->sizeclass == 0
1275 Span* Split(Span* span, Length n);
1276
1277 // Return the descriptor for the specified page.
GetDescriptor(PageID p) const1278 inline Span* GetDescriptor(PageID p) const {
1279 return reinterpret_cast<Span*>(pagemap_.get(p));
1280 }
1281
1282 #ifdef WTF_CHANGES
GetDescriptorEnsureSafe(PageID p)1283 inline Span* GetDescriptorEnsureSafe(PageID p)
1284 {
1285 pagemap_.Ensure(p, 1);
1286 return GetDescriptor(p);
1287 }
1288
1289 size_t ReturnedBytes() const;
1290 #endif
1291
1292 // Dump state to stderr
1293 #ifndef WTF_CHANGES
1294 void Dump(TCMalloc_Printer* out);
1295 #endif
1296
1297 // Return number of bytes allocated from system
SystemBytes() const1298 inline uint64_t SystemBytes() const { return system_bytes_; }
1299
1300 // Return number of free bytes in heap
FreeBytes() const1301 uint64_t FreeBytes() const {
1302 return (static_cast<uint64_t>(free_pages_) << kPageShift);
1303 }
1304
1305 bool Check();
1306 bool CheckList(Span* list, Length min_pages, Length max_pages);
1307
1308 // Release all pages on the free list for reuse by the OS:
1309 void ReleaseFreePages();
1310
1311 // Return 0 if we have no information, or else the correct sizeclass for p.
1312 // Reads and writes to pagemap_cache_ do not require locking.
1313 // The entries are 64 bits on 64-bit hardware and 16 bits on
1314 // 32-bit hardware, and we don't mind raciness as long as each read of
1315 // an entry yields a valid entry, not a partially updated entry.
GetSizeClassIfCached(PageID p) const1316 size_t GetSizeClassIfCached(PageID p) const {
1317 return pagemap_cache_.GetOrDefault(p, 0);
1318 }
CacheSizeClass(PageID p,size_t cl) const1319 void CacheSizeClass(PageID p, size_t cl) const { pagemap_cache_.Put(p, cl); }
1320
1321 private:
1322 // Pick the appropriate map and cache types based on pointer size
1323 typedef MapSelector<8*sizeof(uintptr_t)>::Type PageMap;
1324 typedef MapSelector<8*sizeof(uintptr_t)>::CacheType PageMapCache;
1325 PageMap pagemap_;
1326 mutable PageMapCache pagemap_cache_;
1327
1328 // We segregate spans of a given size into two circular linked
1329 // lists: one for normal spans, and one for spans whose memory
1330 // has been returned to the system.
1331 struct SpanList {
1332 Span normal;
1333 Span returned;
1334 };
1335
1336 // List of free spans of length >= kMaxPages
1337 SpanList large_;
1338
1339 // Array mapping from span length to a doubly linked list of free spans
1340 SpanList free_[kMaxPages];
1341
1342 // Number of pages kept in free lists
1343 uintptr_t free_pages_;
1344
1345 // Bytes allocated from system
1346 uint64_t system_bytes_;
1347
1348 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1349 // Number of pages kept in free lists that are still committed.
1350 Length free_committed_pages_;
1351
1352 // Number of pages that we committed in the last scavenge wait interval.
1353 Length pages_committed_since_last_scavenge_;
1354 #endif
1355
1356 bool GrowHeap(Length n);
1357
1358 // REQUIRES span->length >= n
1359 // Remove span from its free list, and move any leftover part of
1360 // span into appropriate free lists. Also update "span" to have
1361 // length exactly "n" and mark it as non-free so it can be returned
1362 // to the client.
1363 //
1364 // "released" is true iff "span" was found on a "returned" list.
1365 void Carve(Span* span, Length n, bool released);
1366
RecordSpan(Span * span)1367 void RecordSpan(Span* span) {
1368 pagemap_.set(span->start, span);
1369 if (span->length > 1) {
1370 pagemap_.set(span->start + span->length - 1, span);
1371 }
1372 }
1373
1374 // Allocate a large span of length == n. If successful, returns a
1375 // span of exactly the specified length. Else, returns NULL.
1376 Span* AllocLarge(Length n);
1377
1378 #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1379 // Incrementally release some memory to the system.
1380 // IncrementalScavenge(n) is called whenever n pages are freed.
1381 void IncrementalScavenge(Length n);
1382 #endif
1383
1384 // Number of pages to deallocate before doing more scavenging
1385 int64_t scavenge_counter_;
1386
1387 // Index of last free list we scavenged
1388 size_t scavenge_index_;
1389
1390 #if defined(WTF_CHANGES) && OS(DARWIN)
1391 friend class FastMallocZone;
1392 #endif
1393
1394 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1395 void initializeScavenger();
1396 ALWAYS_INLINE void signalScavenger();
1397 void scavenge();
1398 ALWAYS_INLINE bool shouldContinueScavenging() const;
1399
1400 #if !HAVE(DISPATCH_H)
1401 static NO_RETURN void* runScavengerThread(void*);
1402 NO_RETURN void scavengerThread();
1403
1404 // Keeps track of whether the background thread is actively scavenging memory every kScavengeTimerDelayInSeconds, or
1405 // it's blocked waiting for more pages to be deleted.
1406 bool m_scavengeThreadActive;
1407
1408 pthread_mutex_t m_scavengeMutex;
1409 pthread_cond_t m_scavengeCondition;
1410 #else // !HAVE(DISPATCH_H)
1411 void periodicScavenge();
1412
1413 dispatch_queue_t m_scavengeQueue;
1414 dispatch_source_t m_scavengeTimer;
1415 bool m_scavengingScheduled;
1416 #endif
1417
1418 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1419 };
1420
init()1421 void TCMalloc_PageHeap::init()
1422 {
1423 pagemap_.init(MetaDataAlloc);
1424 pagemap_cache_ = PageMapCache(0);
1425 free_pages_ = 0;
1426 system_bytes_ = 0;
1427
1428 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1429 free_committed_pages_ = 0;
1430 pages_committed_since_last_scavenge_ = 0;
1431 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1432
1433 scavenge_counter_ = 0;
1434 // Start scavenging at kMaxPages list
1435 scavenge_index_ = kMaxPages-1;
1436 COMPILE_ASSERT(kNumClasses <= (1 << PageMapCache::kValuebits), valuebits);
1437 DLL_Init(&large_.normal);
1438 DLL_Init(&large_.returned);
1439 for (size_t i = 0; i < kMaxPages; i++) {
1440 DLL_Init(&free_[i].normal);
1441 DLL_Init(&free_[i].returned);
1442 }
1443
1444 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1445 initializeScavenger();
1446 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1447 }
1448
1449 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1450
1451 #if !HAVE(DISPATCH_H)
1452
initializeScavenger()1453 void TCMalloc_PageHeap::initializeScavenger()
1454 {
1455 pthread_mutex_init(&m_scavengeMutex, 0);
1456 pthread_cond_init(&m_scavengeCondition, 0);
1457 m_scavengeThreadActive = true;
1458 pthread_t thread;
1459 pthread_create(&thread, 0, runScavengerThread, this);
1460 }
1461
runScavengerThread(void * context)1462 void* TCMalloc_PageHeap::runScavengerThread(void* context)
1463 {
1464 static_cast<TCMalloc_PageHeap*>(context)->scavengerThread();
1465 #if COMPILER(MSVC) || OS(SOLARIS)
1466 // Without this, Visual Studio will complain that this method does not return a value.
1467 return 0;
1468 #endif
1469 }
1470
signalScavenger()1471 ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger()
1472 {
1473 if (!m_scavengeThreadActive && shouldContinueScavenging())
1474 pthread_cond_signal(&m_scavengeCondition);
1475 }
1476
1477 #else // !HAVE(DISPATCH_H)
1478
initializeScavenger()1479 void TCMalloc_PageHeap::initializeScavenger()
1480 {
1481 m_scavengeQueue = dispatch_queue_create("com.apple.JavaScriptCore.FastMallocSavenger", NULL);
1482 m_scavengeTimer = dispatch_source_create(DISPATCH_SOURCE_TYPE_TIMER, 0, 0, m_scavengeQueue);
1483 dispatch_time_t startTime = dispatch_time(DISPATCH_TIME_NOW, kScavengeTimerDelayInSeconds * NSEC_PER_SEC);
1484 dispatch_source_set_timer(m_scavengeTimer, startTime, kScavengeTimerDelayInSeconds * NSEC_PER_SEC, 1000 * NSEC_PER_USEC);
1485 dispatch_source_set_event_handler(m_scavengeTimer, ^{ periodicScavenge(); });
1486 m_scavengingScheduled = false;
1487 }
1488
signalScavenger()1489 ALWAYS_INLINE void TCMalloc_PageHeap::signalScavenger()
1490 {
1491 if (!m_scavengingScheduled && shouldContinueScavenging()) {
1492 m_scavengingScheduled = true;
1493 dispatch_resume(m_scavengeTimer);
1494 }
1495 }
1496
1497 #endif
1498
scavenge()1499 void TCMalloc_PageHeap::scavenge()
1500 {
1501 // If we have to commit memory in the last 5 seconds, it means we don't have enough free committed pages
1502 // for the amount of allocations that we do. So hold off on releasing memory back to the system.
1503 if (pages_committed_since_last_scavenge_ > 0) {
1504 pages_committed_since_last_scavenge_ = 0;
1505 return;
1506 }
1507 Length pagesDecommitted = 0;
1508 for (int i = kMaxPages; i >= 0; i--) {
1509 SpanList* slist = (static_cast<size_t>(i) == kMaxPages) ? &large_ : &free_[i];
1510 if (!DLL_IsEmpty(&slist->normal)) {
1511 // Release the last span on the normal portion of this list
1512 Span* s = slist->normal.prev;
1513 // Only decommit up to a fraction of the free committed pages if pages_allocated_since_last_scavenge_ > 0.
1514 if ((pagesDecommitted + s->length) * kMaxScavengeAmountFactor > free_committed_pages_)
1515 continue;
1516 DLL_Remove(s);
1517 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1518 static_cast<size_t>(s->length << kPageShift));
1519 if (!s->decommitted) {
1520 pagesDecommitted += s->length;
1521 s->decommitted = true;
1522 }
1523 DLL_Prepend(&slist->returned, s);
1524 // We can stop scavenging if the number of free committed pages left is less than or equal to the minimum number we want to keep around.
1525 if (free_committed_pages_ <= kMinimumFreeCommittedPageCount + pagesDecommitted)
1526 break;
1527 }
1528 }
1529 pages_committed_since_last_scavenge_ = 0;
1530 ASSERT(free_committed_pages_ >= pagesDecommitted);
1531 free_committed_pages_ -= pagesDecommitted;
1532 }
1533
shouldContinueScavenging() const1534 ALWAYS_INLINE bool TCMalloc_PageHeap::shouldContinueScavenging() const
1535 {
1536 return free_committed_pages_ > kMinimumFreeCommittedPageCount;
1537 }
1538
1539 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1540
New(Length n)1541 inline Span* TCMalloc_PageHeap::New(Length n) {
1542 ASSERT(Check());
1543 ASSERT(n > 0);
1544
1545 // Find first size >= n that has a non-empty list
1546 for (Length s = n; s < kMaxPages; s++) {
1547 Span* ll = NULL;
1548 bool released = false;
1549 if (!DLL_IsEmpty(&free_[s].normal)) {
1550 // Found normal span
1551 ll = &free_[s].normal;
1552 } else if (!DLL_IsEmpty(&free_[s].returned)) {
1553 // Found returned span; reallocate it
1554 ll = &free_[s].returned;
1555 released = true;
1556 } else {
1557 // Keep looking in larger classes
1558 continue;
1559 }
1560
1561 Span* result = ll->next;
1562 Carve(result, n, released);
1563 if (result->decommitted) {
1564 TCMalloc_SystemCommit(reinterpret_cast<void*>(result->start << kPageShift), static_cast<size_t>(n << kPageShift));
1565 result->decommitted = false;
1566 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1567 pages_committed_since_last_scavenge_ += n;
1568 #endif
1569 }
1570 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1571 else {
1572 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the
1573 // free committed pages count.
1574 ASSERT(free_committed_pages_ >= n);
1575 free_committed_pages_ -= n;
1576 }
1577 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1578 ASSERT(Check());
1579 free_pages_ -= n;
1580 return result;
1581 }
1582
1583 Span* result = AllocLarge(n);
1584 if (result != NULL) {
1585 ASSERT_SPAN_COMMITTED(result);
1586 return result;
1587 }
1588
1589 // Grow the heap and try again
1590 if (!GrowHeap(n)) {
1591 ASSERT(Check());
1592 return NULL;
1593 }
1594
1595 return AllocLarge(n);
1596 }
1597
AllocLarge(Length n)1598 Span* TCMalloc_PageHeap::AllocLarge(Length n) {
1599 // find the best span (closest to n in size).
1600 // The following loops implements address-ordered best-fit.
1601 bool from_released = false;
1602 Span *best = NULL;
1603
1604 // Search through normal list
1605 for (Span* span = large_.normal.next;
1606 span != &large_.normal;
1607 span = span->next) {
1608 if (span->length >= n) {
1609 if ((best == NULL)
1610 || (span->length < best->length)
1611 || ((span->length == best->length) && (span->start < best->start))) {
1612 best = span;
1613 from_released = false;
1614 }
1615 }
1616 }
1617
1618 // Search through released list in case it has a better fit
1619 for (Span* span = large_.returned.next;
1620 span != &large_.returned;
1621 span = span->next) {
1622 if (span->length >= n) {
1623 if ((best == NULL)
1624 || (span->length < best->length)
1625 || ((span->length == best->length) && (span->start < best->start))) {
1626 best = span;
1627 from_released = true;
1628 }
1629 }
1630 }
1631
1632 if (best != NULL) {
1633 Carve(best, n, from_released);
1634 if (best->decommitted) {
1635 TCMalloc_SystemCommit(reinterpret_cast<void*>(best->start << kPageShift), static_cast<size_t>(n << kPageShift));
1636 best->decommitted = false;
1637 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1638 pages_committed_since_last_scavenge_ += n;
1639 #endif
1640 }
1641 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1642 else {
1643 // The newly allocated memory is from a span that's in the normal span list (already committed). Update the
1644 // free committed pages count.
1645 ASSERT(free_committed_pages_ >= n);
1646 free_committed_pages_ -= n;
1647 }
1648 #endif // USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1649 ASSERT(Check());
1650 free_pages_ -= n;
1651 return best;
1652 }
1653 return NULL;
1654 }
1655
Split(Span * span,Length n)1656 Span* TCMalloc_PageHeap::Split(Span* span, Length n) {
1657 ASSERT(0 < n);
1658 ASSERT(n < span->length);
1659 ASSERT(!span->free);
1660 ASSERT(span->sizeclass == 0);
1661 Event(span, 'T', n);
1662
1663 const Length extra = span->length - n;
1664 Span* leftover = NewSpan(span->start + n, extra);
1665 Event(leftover, 'U', extra);
1666 RecordSpan(leftover);
1667 pagemap_.set(span->start + n - 1, span); // Update map from pageid to span
1668 span->length = n;
1669
1670 return leftover;
1671 }
1672
propagateDecommittedState(Span * destination,Span * source)1673 static ALWAYS_INLINE void propagateDecommittedState(Span* destination, Span* source)
1674 {
1675 destination->decommitted = source->decommitted;
1676 }
1677
Carve(Span * span,Length n,bool released)1678 inline void TCMalloc_PageHeap::Carve(Span* span, Length n, bool released) {
1679 ASSERT(n > 0);
1680 DLL_Remove(span);
1681 span->free = 0;
1682 Event(span, 'A', n);
1683
1684 const int extra = static_cast<int>(span->length - n);
1685 ASSERT(extra >= 0);
1686 if (extra > 0) {
1687 Span* leftover = NewSpan(span->start + n, extra);
1688 leftover->free = 1;
1689 propagateDecommittedState(leftover, span);
1690 Event(leftover, 'S', extra);
1691 RecordSpan(leftover);
1692
1693 // Place leftover span on appropriate free list
1694 SpanList* listpair = (static_cast<size_t>(extra) < kMaxPages) ? &free_[extra] : &large_;
1695 Span* dst = released ? &listpair->returned : &listpair->normal;
1696 DLL_Prepend(dst, leftover);
1697
1698 span->length = n;
1699 pagemap_.set(span->start + n - 1, span);
1700 }
1701 }
1702
mergeDecommittedStates(Span * destination,Span * other)1703 static ALWAYS_INLINE void mergeDecommittedStates(Span* destination, Span* other)
1704 {
1705 if (destination->decommitted && !other->decommitted) {
1706 TCMalloc_SystemRelease(reinterpret_cast<void*>(other->start << kPageShift),
1707 static_cast<size_t>(other->length << kPageShift));
1708 } else if (other->decommitted && !destination->decommitted) {
1709 TCMalloc_SystemRelease(reinterpret_cast<void*>(destination->start << kPageShift),
1710 static_cast<size_t>(destination->length << kPageShift));
1711 destination->decommitted = true;
1712 }
1713 }
1714
Delete(Span * span)1715 inline void TCMalloc_PageHeap::Delete(Span* span) {
1716 ASSERT(Check());
1717 ASSERT(!span->free);
1718 ASSERT(span->length > 0);
1719 ASSERT(GetDescriptor(span->start) == span);
1720 ASSERT(GetDescriptor(span->start + span->length - 1) == span);
1721 span->sizeclass = 0;
1722 #ifndef NO_TCMALLOC_SAMPLES
1723 span->sample = 0;
1724 #endif
1725
1726 // Coalesce -- we guarantee that "p" != 0, so no bounds checking
1727 // necessary. We do not bother resetting the stale pagemap
1728 // entries for the pieces we are merging together because we only
1729 // care about the pagemap entries for the boundaries.
1730 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1731 // Track the total size of the neighboring free spans that are committed.
1732 Length neighboringCommittedSpansLength = 0;
1733 #endif
1734 const PageID p = span->start;
1735 const Length n = span->length;
1736 Span* prev = GetDescriptor(p-1);
1737 if (prev != NULL && prev->free) {
1738 // Merge preceding span into this span
1739 ASSERT(prev->start + prev->length == p);
1740 const Length len = prev->length;
1741 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1742 if (!prev->decommitted)
1743 neighboringCommittedSpansLength += len;
1744 #endif
1745 mergeDecommittedStates(span, prev);
1746 DLL_Remove(prev);
1747 DeleteSpan(prev);
1748 span->start -= len;
1749 span->length += len;
1750 pagemap_.set(span->start, span);
1751 Event(span, 'L', len);
1752 }
1753 Span* next = GetDescriptor(p+n);
1754 if (next != NULL && next->free) {
1755 // Merge next span into this span
1756 ASSERT(next->start == p+n);
1757 const Length len = next->length;
1758 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1759 if (!next->decommitted)
1760 neighboringCommittedSpansLength += len;
1761 #endif
1762 mergeDecommittedStates(span, next);
1763 DLL_Remove(next);
1764 DeleteSpan(next);
1765 span->length += len;
1766 pagemap_.set(span->start + span->length - 1, span);
1767 Event(span, 'R', len);
1768 }
1769
1770 Event(span, 'D', span->length);
1771 span->free = 1;
1772 if (span->decommitted) {
1773 if (span->length < kMaxPages)
1774 DLL_Prepend(&free_[span->length].returned, span);
1775 else
1776 DLL_Prepend(&large_.returned, span);
1777 } else {
1778 if (span->length < kMaxPages)
1779 DLL_Prepend(&free_[span->length].normal, span);
1780 else
1781 DLL_Prepend(&large_.normal, span);
1782 }
1783 free_pages_ += n;
1784
1785 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1786 if (span->decommitted) {
1787 // If the merged span is decommitted, that means we decommitted any neighboring spans that were
1788 // committed. Update the free committed pages count.
1789 free_committed_pages_ -= neighboringCommittedSpansLength;
1790 } else {
1791 // If the merged span remains committed, add the deleted span's size to the free committed pages count.
1792 free_committed_pages_ += n;
1793 }
1794
1795 // Make sure the scavenge thread becomes active if we have enough freed pages to release some back to the system.
1796 signalScavenger();
1797 #else
1798 IncrementalScavenge(n);
1799 #endif
1800
1801 ASSERT(Check());
1802 }
1803
1804 #if !USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
IncrementalScavenge(Length n)1805 void TCMalloc_PageHeap::IncrementalScavenge(Length n) {
1806 // Fast path; not yet time to release memory
1807 scavenge_counter_ -= n;
1808 if (scavenge_counter_ >= 0) return; // Not yet time to scavenge
1809
1810 // If there is nothing to release, wait for so many pages before
1811 // scavenging again. With 4K pages, this comes to 16MB of memory.
1812 static const size_t kDefaultReleaseDelay = 1 << 8;
1813
1814 // Find index of free list to scavenge
1815 size_t index = scavenge_index_ + 1;
1816 for (size_t i = 0; i < kMaxPages+1; i++) {
1817 if (index > kMaxPages) index = 0;
1818 SpanList* slist = (index == kMaxPages) ? &large_ : &free_[index];
1819 if (!DLL_IsEmpty(&slist->normal)) {
1820 // Release the last span on the normal portion of this list
1821 Span* s = slist->normal.prev;
1822 DLL_Remove(s);
1823 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
1824 static_cast<size_t>(s->length << kPageShift));
1825 s->decommitted = true;
1826 DLL_Prepend(&slist->returned, s);
1827
1828 scavenge_counter_ = std::max<size_t>(64UL, std::min<size_t>(kDefaultReleaseDelay, kDefaultReleaseDelay - (free_pages_ / kDefaultReleaseDelay)));
1829
1830 if (index == kMaxPages && !DLL_IsEmpty(&slist->normal))
1831 scavenge_index_ = index - 1;
1832 else
1833 scavenge_index_ = index;
1834 return;
1835 }
1836 index++;
1837 }
1838
1839 // Nothing to scavenge, delay for a while
1840 scavenge_counter_ = kDefaultReleaseDelay;
1841 }
1842 #endif
1843
RegisterSizeClass(Span * span,size_t sc)1844 void TCMalloc_PageHeap::RegisterSizeClass(Span* span, size_t sc) {
1845 // Associate span object with all interior pages as well
1846 ASSERT(!span->free);
1847 ASSERT(GetDescriptor(span->start) == span);
1848 ASSERT(GetDescriptor(span->start+span->length-1) == span);
1849 Event(span, 'C', sc);
1850 span->sizeclass = static_cast<unsigned int>(sc);
1851 for (Length i = 1; i < span->length-1; i++) {
1852 pagemap_.set(span->start+i, span);
1853 }
1854 }
1855
1856 #ifdef WTF_CHANGES
ReturnedBytes() const1857 size_t TCMalloc_PageHeap::ReturnedBytes() const {
1858 size_t result = 0;
1859 for (unsigned s = 0; s < kMaxPages; s++) {
1860 const int r_length = DLL_Length(&free_[s].returned);
1861 unsigned r_pages = s * r_length;
1862 result += r_pages << kPageShift;
1863 }
1864
1865 for (Span* s = large_.returned.next; s != &large_.returned; s = s->next)
1866 result += s->length << kPageShift;
1867 return result;
1868 }
1869 #endif
1870
1871 #ifndef WTF_CHANGES
PagesToMB(uint64_t pages)1872 static double PagesToMB(uint64_t pages) {
1873 return (pages << kPageShift) / 1048576.0;
1874 }
1875
Dump(TCMalloc_Printer * out)1876 void TCMalloc_PageHeap::Dump(TCMalloc_Printer* out) {
1877 int nonempty_sizes = 0;
1878 for (int s = 0; s < kMaxPages; s++) {
1879 if (!DLL_IsEmpty(&free_[s].normal) || !DLL_IsEmpty(&free_[s].returned)) {
1880 nonempty_sizes++;
1881 }
1882 }
1883 out->printf("------------------------------------------------\n");
1884 out->printf("PageHeap: %d sizes; %6.1f MB free\n",
1885 nonempty_sizes, PagesToMB(free_pages_));
1886 out->printf("------------------------------------------------\n");
1887 uint64_t total_normal = 0;
1888 uint64_t total_returned = 0;
1889 for (int s = 0; s < kMaxPages; s++) {
1890 const int n_length = DLL_Length(&free_[s].normal);
1891 const int r_length = DLL_Length(&free_[s].returned);
1892 if (n_length + r_length > 0) {
1893 uint64_t n_pages = s * n_length;
1894 uint64_t r_pages = s * r_length;
1895 total_normal += n_pages;
1896 total_returned += r_pages;
1897 out->printf("%6u pages * %6u spans ~ %6.1f MB; %6.1f MB cum"
1898 "; unmapped: %6.1f MB; %6.1f MB cum\n",
1899 s,
1900 (n_length + r_length),
1901 PagesToMB(n_pages + r_pages),
1902 PagesToMB(total_normal + total_returned),
1903 PagesToMB(r_pages),
1904 PagesToMB(total_returned));
1905 }
1906 }
1907
1908 uint64_t n_pages = 0;
1909 uint64_t r_pages = 0;
1910 int n_spans = 0;
1911 int r_spans = 0;
1912 out->printf("Normal large spans:\n");
1913 for (Span* s = large_.normal.next; s != &large_.normal; s = s->next) {
1914 out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n",
1915 s->length, PagesToMB(s->length));
1916 n_pages += s->length;
1917 n_spans++;
1918 }
1919 out->printf("Unmapped large spans:\n");
1920 for (Span* s = large_.returned.next; s != &large_.returned; s = s->next) {
1921 out->printf(" [ %6" PRIuS " pages ] %6.1f MB\n",
1922 s->length, PagesToMB(s->length));
1923 r_pages += s->length;
1924 r_spans++;
1925 }
1926 total_normal += n_pages;
1927 total_returned += r_pages;
1928 out->printf(">255 large * %6u spans ~ %6.1f MB; %6.1f MB cum"
1929 "; unmapped: %6.1f MB; %6.1f MB cum\n",
1930 (n_spans + r_spans),
1931 PagesToMB(n_pages + r_pages),
1932 PagesToMB(total_normal + total_returned),
1933 PagesToMB(r_pages),
1934 PagesToMB(total_returned));
1935 }
1936 #endif
1937
GrowHeap(Length n)1938 bool TCMalloc_PageHeap::GrowHeap(Length n) {
1939 ASSERT(kMaxPages >= kMinSystemAlloc);
1940 if (n > kMaxValidPages) return false;
1941 Length ask = (n>kMinSystemAlloc) ? n : static_cast<Length>(kMinSystemAlloc);
1942 size_t actual_size;
1943 void* ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
1944 if (ptr == NULL) {
1945 if (n < ask) {
1946 // Try growing just "n" pages
1947 ask = n;
1948 ptr = TCMalloc_SystemAlloc(ask << kPageShift, &actual_size, kPageSize);
1949 }
1950 if (ptr == NULL) return false;
1951 }
1952 ask = actual_size >> kPageShift;
1953
1954 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
1955 pages_committed_since_last_scavenge_ += ask;
1956 #endif
1957
1958 uint64_t old_system_bytes = system_bytes_;
1959 system_bytes_ += (ask << kPageShift);
1960 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
1961 ASSERT(p > 0);
1962
1963 // If we have already a lot of pages allocated, just pre allocate a bunch of
1964 // memory for the page map. This prevents fragmentation by pagemap metadata
1965 // when a program keeps allocating and freeing large blocks.
1966
1967 if (old_system_bytes < kPageMapBigAllocationThreshold
1968 && system_bytes_ >= kPageMapBigAllocationThreshold) {
1969 pagemap_.PreallocateMoreMemory();
1970 }
1971
1972 // Make sure pagemap_ has entries for all of the new pages.
1973 // Plus ensure one before and one after so coalescing code
1974 // does not need bounds-checking.
1975 if (pagemap_.Ensure(p-1, ask+2)) {
1976 // Pretend the new area is allocated and then Delete() it to
1977 // cause any necessary coalescing to occur.
1978 //
1979 // We do not adjust free_pages_ here since Delete() will do it for us.
1980 Span* span = NewSpan(p, ask);
1981 RecordSpan(span);
1982 Delete(span);
1983 ASSERT(Check());
1984 return true;
1985 } else {
1986 // We could not allocate memory within "pagemap_"
1987 // TODO: Once we can return memory to the system, return the new span
1988 return false;
1989 }
1990 }
1991
Check()1992 bool TCMalloc_PageHeap::Check() {
1993 ASSERT(free_[0].normal.next == &free_[0].normal);
1994 ASSERT(free_[0].returned.next == &free_[0].returned);
1995 CheckList(&large_.normal, kMaxPages, 1000000000);
1996 CheckList(&large_.returned, kMaxPages, 1000000000);
1997 for (Length s = 1; s < kMaxPages; s++) {
1998 CheckList(&free_[s].normal, s, s);
1999 CheckList(&free_[s].returned, s, s);
2000 }
2001 return true;
2002 }
2003
2004 #if ASSERT_DISABLED
CheckList(Span *,Length,Length)2005 bool TCMalloc_PageHeap::CheckList(Span*, Length, Length) {
2006 return true;
2007 }
2008 #else
CheckList(Span * list,Length min_pages,Length max_pages)2009 bool TCMalloc_PageHeap::CheckList(Span* list, Length min_pages, Length max_pages) {
2010 for (Span* s = list->next; s != list; s = s->next) {
2011 CHECK_CONDITION(s->free);
2012 CHECK_CONDITION(s->length >= min_pages);
2013 CHECK_CONDITION(s->length <= max_pages);
2014 CHECK_CONDITION(GetDescriptor(s->start) == s);
2015 CHECK_CONDITION(GetDescriptor(s->start+s->length-1) == s);
2016 }
2017 return true;
2018 }
2019 #endif
2020
ReleaseFreeList(Span * list,Span * returned)2021 static void ReleaseFreeList(Span* list, Span* returned) {
2022 // Walk backwards through list so that when we push these
2023 // spans on the "returned" list, we preserve the order.
2024 while (!DLL_IsEmpty(list)) {
2025 Span* s = list->prev;
2026 DLL_Remove(s);
2027 DLL_Prepend(returned, s);
2028 TCMalloc_SystemRelease(reinterpret_cast<void*>(s->start << kPageShift),
2029 static_cast<size_t>(s->length << kPageShift));
2030 }
2031 }
2032
ReleaseFreePages()2033 void TCMalloc_PageHeap::ReleaseFreePages() {
2034 for (Length s = 0; s < kMaxPages; s++) {
2035 ReleaseFreeList(&free_[s].normal, &free_[s].returned);
2036 }
2037 ReleaseFreeList(&large_.normal, &large_.returned);
2038 ASSERT(Check());
2039 }
2040
2041 //-------------------------------------------------------------------
2042 // Free list
2043 //-------------------------------------------------------------------
2044
2045 class TCMalloc_ThreadCache_FreeList {
2046 private:
2047 void* list_; // Linked list of nodes
2048 uint16_t length_; // Current length
2049 uint16_t lowater_; // Low water mark for list length
2050
2051 public:
Init()2052 void Init() {
2053 list_ = NULL;
2054 length_ = 0;
2055 lowater_ = 0;
2056 }
2057
2058 // Return current length of list
length() const2059 int length() const {
2060 return length_;
2061 }
2062
2063 // Is list empty?
empty() const2064 bool empty() const {
2065 return list_ == NULL;
2066 }
2067
2068 // Low-water mark management
lowwatermark() const2069 int lowwatermark() const { return lowater_; }
clear_lowwatermark()2070 void clear_lowwatermark() { lowater_ = length_; }
2071
Push(void * ptr)2072 ALWAYS_INLINE void Push(void* ptr) {
2073 SLL_Push(&list_, ptr);
2074 length_++;
2075 }
2076
PushRange(int N,void * start,void * end)2077 void PushRange(int N, void *start, void *end) {
2078 SLL_PushRange(&list_, start, end);
2079 length_ = length_ + static_cast<uint16_t>(N);
2080 }
2081
PopRange(int N,void ** start,void ** end)2082 void PopRange(int N, void **start, void **end) {
2083 SLL_PopRange(&list_, N, start, end);
2084 ASSERT(length_ >= N);
2085 length_ = length_ - static_cast<uint16_t>(N);
2086 if (length_ < lowater_) lowater_ = length_;
2087 }
2088
Pop()2089 ALWAYS_INLINE void* Pop() {
2090 ASSERT(list_ != NULL);
2091 length_--;
2092 if (length_ < lowater_) lowater_ = length_;
2093 return SLL_Pop(&list_);
2094 }
2095
2096 #ifdef WTF_CHANGES
2097 template <class Finder, class Reader>
enumerateFreeObjects(Finder & finder,const Reader & reader)2098 void enumerateFreeObjects(Finder& finder, const Reader& reader)
2099 {
2100 for (void* nextObject = list_; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject)))
2101 finder.visit(nextObject);
2102 }
2103 #endif
2104 };
2105
2106 //-------------------------------------------------------------------
2107 // Data kept per thread
2108 //-------------------------------------------------------------------
2109
2110 class TCMalloc_ThreadCache {
2111 private:
2112 typedef TCMalloc_ThreadCache_FreeList FreeList;
2113 #if COMPILER(MSVC)
2114 typedef DWORD ThreadIdentifier;
2115 #else
2116 typedef pthread_t ThreadIdentifier;
2117 #endif
2118
2119 size_t size_; // Combined size of data
2120 ThreadIdentifier tid_; // Which thread owns it
2121 bool in_setspecific_; // Called pthread_setspecific?
2122 FreeList list_[kNumClasses]; // Array indexed by size-class
2123
2124 // We sample allocations, biased by the size of the allocation
2125 uint32_t rnd_; // Cheap random number generator
2126 size_t bytes_until_sample_; // Bytes until we sample next
2127
2128 // Allocate a new heap. REQUIRES: pageheap_lock is held.
2129 static inline TCMalloc_ThreadCache* NewHeap(ThreadIdentifier tid);
2130
2131 // Use only as pthread thread-specific destructor function.
2132 static void DestroyThreadCache(void* ptr);
2133 public:
2134 // All ThreadCache objects are kept in a linked list (for stats collection)
2135 TCMalloc_ThreadCache* next_;
2136 TCMalloc_ThreadCache* prev_;
2137
2138 void Init(ThreadIdentifier tid);
2139 void Cleanup();
2140
2141 // Accessors (mostly just for printing stats)
freelist_length(size_t cl) const2142 int freelist_length(size_t cl) const { return list_[cl].length(); }
2143
2144 // Total byte size in cache
Size() const2145 size_t Size() const { return size_; }
2146
2147 void* Allocate(size_t size);
2148 void Deallocate(void* ptr, size_t size_class);
2149
2150 void FetchFromCentralCache(size_t cl, size_t allocationSize);
2151 void ReleaseToCentralCache(size_t cl, int N);
2152 void Scavenge();
2153 void Print() const;
2154
2155 // Record allocation of "k" bytes. Return true iff allocation
2156 // should be sampled
2157 bool SampleAllocation(size_t k);
2158
2159 // Pick next sampling point
2160 void PickNextSample(size_t k);
2161
2162 static void InitModule();
2163 static void InitTSD();
2164 static TCMalloc_ThreadCache* GetThreadHeap();
2165 static TCMalloc_ThreadCache* GetCache();
2166 static TCMalloc_ThreadCache* GetCacheIfPresent();
2167 static TCMalloc_ThreadCache* CreateCacheIfNecessary();
2168 static void DeleteCache(TCMalloc_ThreadCache* heap);
2169 static void BecomeIdle();
2170 static void RecomputeThreadCacheSize();
2171
2172 #ifdef WTF_CHANGES
2173 template <class Finder, class Reader>
enumerateFreeObjects(Finder & finder,const Reader & reader)2174 void enumerateFreeObjects(Finder& finder, const Reader& reader)
2175 {
2176 for (unsigned sizeClass = 0; sizeClass < kNumClasses; sizeClass++)
2177 list_[sizeClass].enumerateFreeObjects(finder, reader);
2178 }
2179 #endif
2180 };
2181
2182 //-------------------------------------------------------------------
2183 // Data kept per size-class in central cache
2184 //-------------------------------------------------------------------
2185
2186 class TCMalloc_Central_FreeList {
2187 public:
2188 void Init(size_t cl);
2189
2190 // These methods all do internal locking.
2191
2192 // Insert the specified range into the central freelist. N is the number of
2193 // elements in the range.
2194 void InsertRange(void *start, void *end, int N);
2195
2196 // Returns the actual number of fetched elements into N.
2197 void RemoveRange(void **start, void **end, int *N);
2198
2199 // Returns the number of free objects in cache.
length()2200 size_t length() {
2201 SpinLockHolder h(&lock_);
2202 return counter_;
2203 }
2204
2205 // Returns the number of free objects in the transfer cache.
tc_length()2206 int tc_length() {
2207 SpinLockHolder h(&lock_);
2208 return used_slots_ * num_objects_to_move[size_class_];
2209 }
2210
2211 #ifdef WTF_CHANGES
2212 template <class Finder, class Reader>
enumerateFreeObjects(Finder & finder,const Reader & reader,TCMalloc_Central_FreeList * remoteCentralFreeList)2213 void enumerateFreeObjects(Finder& finder, const Reader& reader, TCMalloc_Central_FreeList* remoteCentralFreeList)
2214 {
2215 for (Span* span = &empty_; span && span != &empty_; span = (span->next ? reader(span->next) : 0))
2216 ASSERT(!span->objects);
2217
2218 ASSERT(!nonempty_.objects);
2219 static const ptrdiff_t nonemptyOffset = reinterpret_cast<const char*>(&nonempty_) - reinterpret_cast<const char*>(this);
2220
2221 Span* remoteNonempty = reinterpret_cast<Span*>(reinterpret_cast<char*>(remoteCentralFreeList) + nonemptyOffset);
2222 Span* remoteSpan = nonempty_.next;
2223
2224 for (Span* span = reader(remoteSpan); span && remoteSpan != remoteNonempty; remoteSpan = span->next, span = (span->next ? reader(span->next) : 0)) {
2225 for (void* nextObject = span->objects; nextObject; nextObject = *reader(reinterpret_cast<void**>(nextObject)))
2226 finder.visit(nextObject);
2227 }
2228 }
2229 #endif
2230
2231 private:
2232 // REQUIRES: lock_ is held
2233 // Remove object from cache and return.
2234 // Return NULL if no free entries in cache.
2235 void* FetchFromSpans();
2236
2237 // REQUIRES: lock_ is held
2238 // Remove object from cache and return. Fetches
2239 // from pageheap if cache is empty. Only returns
2240 // NULL on allocation failure.
2241 void* FetchFromSpansSafe();
2242
2243 // REQUIRES: lock_ is held
2244 // Release a linked list of objects to spans.
2245 // May temporarily release lock_.
2246 void ReleaseListToSpans(void *start);
2247
2248 // REQUIRES: lock_ is held
2249 // Release an object to spans.
2250 // May temporarily release lock_.
2251 void ReleaseToSpans(void* object);
2252
2253 // REQUIRES: lock_ is held
2254 // Populate cache by fetching from the page heap.
2255 // May temporarily release lock_.
2256 void Populate();
2257
2258 // REQUIRES: lock is held.
2259 // Tries to make room for a TCEntry. If the cache is full it will try to
2260 // expand it at the cost of some other cache size. Return false if there is
2261 // no space.
2262 bool MakeCacheSpace();
2263
2264 // REQUIRES: lock_ for locked_size_class is held.
2265 // Picks a "random" size class to steal TCEntry slot from. In reality it
2266 // just iterates over the sizeclasses but does so without taking a lock.
2267 // Returns true on success.
2268 // May temporarily lock a "random" size class.
2269 static bool EvictRandomSizeClass(size_t locked_size_class, bool force);
2270
2271 // REQUIRES: lock_ is *not* held.
2272 // Tries to shrink the Cache. If force is true it will relase objects to
2273 // spans if it allows it to shrink the cache. Return false if it failed to
2274 // shrink the cache. Decrements cache_size_ on succeess.
2275 // May temporarily take lock_. If it takes lock_, the locked_size_class
2276 // lock is released to the thread from holding two size class locks
2277 // concurrently which could lead to a deadlock.
2278 bool ShrinkCache(int locked_size_class, bool force);
2279
2280 // This lock protects all the data members. cached_entries and cache_size_
2281 // may be looked at without holding the lock.
2282 SpinLock lock_;
2283
2284 // We keep linked lists of empty and non-empty spans.
2285 size_t size_class_; // My size class
2286 Span empty_; // Dummy header for list of empty spans
2287 Span nonempty_; // Dummy header for list of non-empty spans
2288 size_t counter_; // Number of free objects in cache entry
2289
2290 // Here we reserve space for TCEntry cache slots. Since one size class can
2291 // end up getting all the TCEntries quota in the system we just preallocate
2292 // sufficient number of entries here.
2293 TCEntry tc_slots_[kNumTransferEntries];
2294
2295 // Number of currently used cached entries in tc_slots_. This variable is
2296 // updated under a lock but can be read without one.
2297 int32_t used_slots_;
2298 // The current number of slots for this size class. This is an
2299 // adaptive value that is increased if there is lots of traffic
2300 // on a given size class.
2301 int32_t cache_size_;
2302 };
2303
2304 // Pad each CentralCache object to multiple of 64 bytes
2305 class TCMalloc_Central_FreeListPadded : public TCMalloc_Central_FreeList {
2306 private:
2307 char pad_[(64 - (sizeof(TCMalloc_Central_FreeList) % 64)) % 64];
2308 };
2309
2310 //-------------------------------------------------------------------
2311 // Global variables
2312 //-------------------------------------------------------------------
2313
2314 // Central cache -- a collection of free-lists, one per size-class.
2315 // We have a separate lock per free-list to reduce contention.
2316 static TCMalloc_Central_FreeListPadded central_cache[kNumClasses];
2317
2318 // Page-level allocator
2319 static SpinLock pageheap_lock = SPINLOCK_INITIALIZER;
2320 static void* pageheap_memory[(sizeof(TCMalloc_PageHeap) + sizeof(void*) - 1) / sizeof(void*)];
2321 static bool phinited = false;
2322
2323 // Avoid extra level of indirection by making "pageheap" be just an alias
2324 // of pageheap_memory.
2325 typedef union {
2326 void* m_memory;
2327 TCMalloc_PageHeap* m_pageHeap;
2328 } PageHeapUnion;
2329
getPageHeap()2330 static inline TCMalloc_PageHeap* getPageHeap()
2331 {
2332 PageHeapUnion u = { &pageheap_memory[0] };
2333 return u.m_pageHeap;
2334 }
2335
2336 #define pageheap getPageHeap()
2337
2338 #if USE_BACKGROUND_THREAD_TO_SCAVENGE_MEMORY
2339
2340 #if !HAVE(DISPATCH_H)
2341 #if OS(WINDOWS)
sleep(unsigned seconds)2342 static void sleep(unsigned seconds)
2343 {
2344 ::Sleep(seconds * 1000);
2345 }
2346 #endif
2347
scavengerThread()2348 void TCMalloc_PageHeap::scavengerThread()
2349 {
2350 #if HAVE(PTHREAD_SETNAME_NP)
2351 pthread_setname_np("JavaScriptCore: FastMalloc scavenger");
2352 #endif
2353
2354 while (1) {
2355 if (!shouldContinueScavenging()) {
2356 pthread_mutex_lock(&m_scavengeMutex);
2357 m_scavengeThreadActive = false;
2358 // Block until there are enough freed pages to release back to the system.
2359 pthread_cond_wait(&m_scavengeCondition, &m_scavengeMutex);
2360 m_scavengeThreadActive = true;
2361 pthread_mutex_unlock(&m_scavengeMutex);
2362 }
2363 sleep(kScavengeTimerDelayInSeconds);
2364 {
2365 SpinLockHolder h(&pageheap_lock);
2366 pageheap->scavenge();
2367 }
2368 }
2369 }
2370
2371 #else
2372
periodicScavenge()2373 void TCMalloc_PageHeap::periodicScavenge()
2374 {
2375 {
2376 SpinLockHolder h(&pageheap_lock);
2377 pageheap->scavenge();
2378 }
2379
2380 if (!shouldContinueScavenging()) {
2381 m_scavengingScheduled = false;
2382 dispatch_suspend(m_scavengeTimer);
2383 }
2384 }
2385 #endif // HAVE(DISPATCH_H)
2386
2387 #endif
2388
2389 // If TLS is available, we also store a copy
2390 // of the per-thread object in a __thread variable
2391 // since __thread variables are faster to read
2392 // than pthread_getspecific(). We still need
2393 // pthread_setspecific() because __thread
2394 // variables provide no way to run cleanup
2395 // code when a thread is destroyed.
2396 #ifdef HAVE_TLS
2397 static __thread TCMalloc_ThreadCache *threadlocal_heap;
2398 #endif
2399 // Thread-specific key. Initialization here is somewhat tricky
2400 // because some Linux startup code invokes malloc() before it
2401 // is in a good enough state to handle pthread_keycreate().
2402 // Therefore, we use TSD keys only after tsd_inited is set to true.
2403 // Until then, we use a slow path to get the heap object.
2404 static bool tsd_inited = false;
2405 static pthread_key_t heap_key;
2406 #if COMPILER(MSVC)
2407 DWORD tlsIndex = TLS_OUT_OF_INDEXES;
2408 #endif
2409
setThreadHeap(TCMalloc_ThreadCache * heap)2410 static ALWAYS_INLINE void setThreadHeap(TCMalloc_ThreadCache* heap)
2411 {
2412 // still do pthread_setspecific when using MSVC fast TLS to
2413 // benefit from the delete callback.
2414 pthread_setspecific(heap_key, heap);
2415 #if COMPILER(MSVC)
2416 TlsSetValue(tlsIndex, heap);
2417 #endif
2418 }
2419
2420 // Allocator for thread heaps
2421 static PageHeapAllocator<TCMalloc_ThreadCache> threadheap_allocator;
2422
2423 // Linked list of heap objects. Protected by pageheap_lock.
2424 static TCMalloc_ThreadCache* thread_heaps = NULL;
2425 static int thread_heap_count = 0;
2426
2427 // Overall thread cache size. Protected by pageheap_lock.
2428 static size_t overall_thread_cache_size = kDefaultOverallThreadCacheSize;
2429
2430 // Global per-thread cache size. Writes are protected by
2431 // pageheap_lock. Reads are done without any locking, which should be
2432 // fine as long as size_t can be written atomically and we don't place
2433 // invariants between this variable and other pieces of state.
2434 static volatile size_t per_thread_cache_size = kMaxThreadCacheSize;
2435
2436 //-------------------------------------------------------------------
2437 // Central cache implementation
2438 //-------------------------------------------------------------------
2439
Init(size_t cl)2440 void TCMalloc_Central_FreeList::Init(size_t cl) {
2441 lock_.Init();
2442 size_class_ = cl;
2443 DLL_Init(&empty_);
2444 DLL_Init(&nonempty_);
2445 counter_ = 0;
2446
2447 cache_size_ = 1;
2448 used_slots_ = 0;
2449 ASSERT(cache_size_ <= kNumTransferEntries);
2450 }
2451
ReleaseListToSpans(void * start)2452 void TCMalloc_Central_FreeList::ReleaseListToSpans(void* start) {
2453 while (start) {
2454 void *next = SLL_Next(start);
2455 ReleaseToSpans(start);
2456 start = next;
2457 }
2458 }
2459
ReleaseToSpans(void * object)2460 ALWAYS_INLINE void TCMalloc_Central_FreeList::ReleaseToSpans(void* object) {
2461 const PageID p = reinterpret_cast<uintptr_t>(object) >> kPageShift;
2462 Span* span = pageheap->GetDescriptor(p);
2463 ASSERT(span != NULL);
2464 ASSERT(span->refcount > 0);
2465
2466 // If span is empty, move it to non-empty list
2467 if (span->objects == NULL) {
2468 DLL_Remove(span);
2469 DLL_Prepend(&nonempty_, span);
2470 Event(span, 'N', 0);
2471 }
2472
2473 // The following check is expensive, so it is disabled by default
2474 if (false) {
2475 // Check that object does not occur in list
2476 unsigned got = 0;
2477 for (void* p = span->objects; p != NULL; p = *((void**) p)) {
2478 ASSERT(p != object);
2479 got++;
2480 }
2481 ASSERT(got + span->refcount ==
2482 (span->length<<kPageShift)/ByteSizeForClass(span->sizeclass));
2483 }
2484
2485 counter_++;
2486 span->refcount--;
2487 if (span->refcount == 0) {
2488 Event(span, '#', 0);
2489 counter_ -= (span->length<<kPageShift) / ByteSizeForClass(span->sizeclass);
2490 DLL_Remove(span);
2491
2492 // Release central list lock while operating on pageheap
2493 lock_.Unlock();
2494 {
2495 SpinLockHolder h(&pageheap_lock);
2496 pageheap->Delete(span);
2497 }
2498 lock_.Lock();
2499 } else {
2500 *(reinterpret_cast<void**>(object)) = span->objects;
2501 span->objects = object;
2502 }
2503 }
2504
EvictRandomSizeClass(size_t locked_size_class,bool force)2505 ALWAYS_INLINE bool TCMalloc_Central_FreeList::EvictRandomSizeClass(
2506 size_t locked_size_class, bool force) {
2507 static int race_counter = 0;
2508 int t = race_counter++; // Updated without a lock, but who cares.
2509 if (t >= static_cast<int>(kNumClasses)) {
2510 while (t >= static_cast<int>(kNumClasses)) {
2511 t -= kNumClasses;
2512 }
2513 race_counter = t;
2514 }
2515 ASSERT(t >= 0);
2516 ASSERT(t < static_cast<int>(kNumClasses));
2517 if (t == static_cast<int>(locked_size_class)) return false;
2518 return central_cache[t].ShrinkCache(static_cast<int>(locked_size_class), force);
2519 }
2520
MakeCacheSpace()2521 bool TCMalloc_Central_FreeList::MakeCacheSpace() {
2522 // Is there room in the cache?
2523 if (used_slots_ < cache_size_) return true;
2524 // Check if we can expand this cache?
2525 if (cache_size_ == kNumTransferEntries) return false;
2526 // Ok, we'll try to grab an entry from some other size class.
2527 if (EvictRandomSizeClass(size_class_, false) ||
2528 EvictRandomSizeClass(size_class_, true)) {
2529 // Succeeded in evicting, we're going to make our cache larger.
2530 cache_size_++;
2531 return true;
2532 }
2533 return false;
2534 }
2535
2536
2537 namespace {
2538 class LockInverter {
2539 private:
2540 SpinLock *held_, *temp_;
2541 public:
LockInverter(SpinLock * held,SpinLock * temp)2542 inline explicit LockInverter(SpinLock* held, SpinLock *temp)
2543 : held_(held), temp_(temp) { held_->Unlock(); temp_->Lock(); }
~LockInverter()2544 inline ~LockInverter() { temp_->Unlock(); held_->Lock(); }
2545 };
2546 }
2547
ShrinkCache(int locked_size_class,bool force)2548 bool TCMalloc_Central_FreeList::ShrinkCache(int locked_size_class, bool force) {
2549 // Start with a quick check without taking a lock.
2550 if (cache_size_ == 0) return false;
2551 // We don't evict from a full cache unless we are 'forcing'.
2552 if (force == false && used_slots_ == cache_size_) return false;
2553
2554 // Grab lock, but first release the other lock held by this thread. We use
2555 // the lock inverter to ensure that we never hold two size class locks
2556 // concurrently. That can create a deadlock because there is no well
2557 // defined nesting order.
2558 LockInverter li(¢ral_cache[locked_size_class].lock_, &lock_);
2559 ASSERT(used_slots_ <= cache_size_);
2560 ASSERT(0 <= cache_size_);
2561 if (cache_size_ == 0) return false;
2562 if (used_slots_ == cache_size_) {
2563 if (force == false) return false;
2564 // ReleaseListToSpans releases the lock, so we have to make all the
2565 // updates to the central list before calling it.
2566 cache_size_--;
2567 used_slots_--;
2568 ReleaseListToSpans(tc_slots_[used_slots_].head);
2569 return true;
2570 }
2571 cache_size_--;
2572 return true;
2573 }
2574
InsertRange(void * start,void * end,int N)2575 void TCMalloc_Central_FreeList::InsertRange(void *start, void *end, int N) {
2576 SpinLockHolder h(&lock_);
2577 if (N == num_objects_to_move[size_class_] &&
2578 MakeCacheSpace()) {
2579 int slot = used_slots_++;
2580 ASSERT(slot >=0);
2581 ASSERT(slot < kNumTransferEntries);
2582 TCEntry *entry = &tc_slots_[slot];
2583 entry->head = start;
2584 entry->tail = end;
2585 return;
2586 }
2587 ReleaseListToSpans(start);
2588 }
2589
RemoveRange(void ** start,void ** end,int * N)2590 void TCMalloc_Central_FreeList::RemoveRange(void **start, void **end, int *N) {
2591 int num = *N;
2592 ASSERT(num > 0);
2593
2594 SpinLockHolder h(&lock_);
2595 if (num == num_objects_to_move[size_class_] && used_slots_ > 0) {
2596 int slot = --used_slots_;
2597 ASSERT(slot >= 0);
2598 TCEntry *entry = &tc_slots_[slot];
2599 *start = entry->head;
2600 *end = entry->tail;
2601 return;
2602 }
2603
2604 // TODO: Prefetch multiple TCEntries?
2605 void *tail = FetchFromSpansSafe();
2606 if (!tail) {
2607 // We are completely out of memory.
2608 *start = *end = NULL;
2609 *N = 0;
2610 return;
2611 }
2612
2613 SLL_SetNext(tail, NULL);
2614 void *head = tail;
2615 int count = 1;
2616 while (count < num) {
2617 void *t = FetchFromSpans();
2618 if (!t) break;
2619 SLL_Push(&head, t);
2620 count++;
2621 }
2622 *start = head;
2623 *end = tail;
2624 *N = count;
2625 }
2626
2627
FetchFromSpansSafe()2628 void* TCMalloc_Central_FreeList::FetchFromSpansSafe() {
2629 void *t = FetchFromSpans();
2630 if (!t) {
2631 Populate();
2632 t = FetchFromSpans();
2633 }
2634 return t;
2635 }
2636
FetchFromSpans()2637 void* TCMalloc_Central_FreeList::FetchFromSpans() {
2638 if (DLL_IsEmpty(&nonempty_)) return NULL;
2639 Span* span = nonempty_.next;
2640
2641 ASSERT(span->objects != NULL);
2642 ASSERT_SPAN_COMMITTED(span);
2643 span->refcount++;
2644 void* result = span->objects;
2645 span->objects = *(reinterpret_cast<void**>(result));
2646 if (span->objects == NULL) {
2647 // Move to empty list
2648 DLL_Remove(span);
2649 DLL_Prepend(&empty_, span);
2650 Event(span, 'E', 0);
2651 }
2652 counter_--;
2653 return result;
2654 }
2655
2656 // Fetch memory from the system and add to the central cache freelist.
Populate()2657 ALWAYS_INLINE void TCMalloc_Central_FreeList::Populate() {
2658 // Release central list lock while operating on pageheap
2659 lock_.Unlock();
2660 const size_t npages = class_to_pages[size_class_];
2661
2662 Span* span;
2663 {
2664 SpinLockHolder h(&pageheap_lock);
2665 span = pageheap->New(npages);
2666 if (span) pageheap->RegisterSizeClass(span, size_class_);
2667 }
2668 if (span == NULL) {
2669 MESSAGE("allocation failed: %d\n", errno);
2670 lock_.Lock();
2671 return;
2672 }
2673 ASSERT_SPAN_COMMITTED(span);
2674 ASSERT(span->length == npages);
2675 // Cache sizeclass info eagerly. Locking is not necessary.
2676 // (Instead of being eager, we could just replace any stale info
2677 // about this span, but that seems to be no better in practice.)
2678 for (size_t i = 0; i < npages; i++) {
2679 pageheap->CacheSizeClass(span->start + i, size_class_);
2680 }
2681
2682 // Split the block into pieces and add to the free-list
2683 // TODO: coloring of objects to avoid cache conflicts?
2684 void** tail = &span->objects;
2685 char* ptr = reinterpret_cast<char*>(span->start << kPageShift);
2686 char* limit = ptr + (npages << kPageShift);
2687 const size_t size = ByteSizeForClass(size_class_);
2688 int num = 0;
2689 char* nptr;
2690 while ((nptr = ptr + size) <= limit) {
2691 *tail = ptr;
2692 tail = reinterpret_cast<void**>(ptr);
2693 ptr = nptr;
2694 num++;
2695 }
2696 ASSERT(ptr <= limit);
2697 *tail = NULL;
2698 span->refcount = 0; // No sub-object in use yet
2699
2700 // Add span to list of non-empty spans
2701 lock_.Lock();
2702 DLL_Prepend(&nonempty_, span);
2703 counter_ += num;
2704 }
2705
2706 //-------------------------------------------------------------------
2707 // TCMalloc_ThreadCache implementation
2708 //-------------------------------------------------------------------
2709
SampleAllocation(size_t k)2710 inline bool TCMalloc_ThreadCache::SampleAllocation(size_t k) {
2711 if (bytes_until_sample_ < k) {
2712 PickNextSample(k);
2713 return true;
2714 } else {
2715 bytes_until_sample_ -= k;
2716 return false;
2717 }
2718 }
2719
Init(ThreadIdentifier tid)2720 void TCMalloc_ThreadCache::Init(ThreadIdentifier tid) {
2721 size_ = 0;
2722 next_ = NULL;
2723 prev_ = NULL;
2724 tid_ = tid;
2725 in_setspecific_ = false;
2726 for (size_t cl = 0; cl < kNumClasses; ++cl) {
2727 list_[cl].Init();
2728 }
2729
2730 // Initialize RNG -- run it for a bit to get to good values
2731 bytes_until_sample_ = 0;
2732 rnd_ = static_cast<uint32_t>(reinterpret_cast<uintptr_t>(this));
2733 for (int i = 0; i < 100; i++) {
2734 PickNextSample(static_cast<size_t>(FLAGS_tcmalloc_sample_parameter * 2));
2735 }
2736 }
2737
Cleanup()2738 void TCMalloc_ThreadCache::Cleanup() {
2739 // Put unused memory back into central cache
2740 for (size_t cl = 0; cl < kNumClasses; ++cl) {
2741 if (list_[cl].length() > 0) {
2742 ReleaseToCentralCache(cl, list_[cl].length());
2743 }
2744 }
2745 }
2746
Allocate(size_t size)2747 ALWAYS_INLINE void* TCMalloc_ThreadCache::Allocate(size_t size) {
2748 ASSERT(size <= kMaxSize);
2749 const size_t cl = SizeClass(size);
2750 FreeList* list = &list_[cl];
2751 size_t allocationSize = ByteSizeForClass(cl);
2752 if (list->empty()) {
2753 FetchFromCentralCache(cl, allocationSize);
2754 if (list->empty()) return NULL;
2755 }
2756 size_ -= allocationSize;
2757 return list->Pop();
2758 }
2759
Deallocate(void * ptr,size_t cl)2760 inline void TCMalloc_ThreadCache::Deallocate(void* ptr, size_t cl) {
2761 size_ += ByteSizeForClass(cl);
2762 FreeList* list = &list_[cl];
2763 list->Push(ptr);
2764 // If enough data is free, put back into central cache
2765 if (list->length() > kMaxFreeListLength) {
2766 ReleaseToCentralCache(cl, num_objects_to_move[cl]);
2767 }
2768 if (size_ >= per_thread_cache_size) Scavenge();
2769 }
2770
2771 // Remove some objects of class "cl" from central cache and add to thread heap
FetchFromCentralCache(size_t cl,size_t allocationSize)2772 ALWAYS_INLINE void TCMalloc_ThreadCache::FetchFromCentralCache(size_t cl, size_t allocationSize) {
2773 int fetch_count = num_objects_to_move[cl];
2774 void *start, *end;
2775 central_cache[cl].RemoveRange(&start, &end, &fetch_count);
2776 list_[cl].PushRange(fetch_count, start, end);
2777 size_ += allocationSize * fetch_count;
2778 }
2779
2780 // Remove some objects of class "cl" from thread heap and add to central cache
ReleaseToCentralCache(size_t cl,int N)2781 inline void TCMalloc_ThreadCache::ReleaseToCentralCache(size_t cl, int N) {
2782 ASSERT(N > 0);
2783 FreeList* src = &list_[cl];
2784 if (N > src->length()) N = src->length();
2785 size_ -= N*ByteSizeForClass(cl);
2786
2787 // We return prepackaged chains of the correct size to the central cache.
2788 // TODO: Use the same format internally in the thread caches?
2789 int batch_size = num_objects_to_move[cl];
2790 while (N > batch_size) {
2791 void *tail, *head;
2792 src->PopRange(batch_size, &head, &tail);
2793 central_cache[cl].InsertRange(head, tail, batch_size);
2794 N -= batch_size;
2795 }
2796 void *tail, *head;
2797 src->PopRange(N, &head, &tail);
2798 central_cache[cl].InsertRange(head, tail, N);
2799 }
2800
2801 // Release idle memory to the central cache
Scavenge()2802 inline void TCMalloc_ThreadCache::Scavenge() {
2803 // If the low-water mark for the free list is L, it means we would
2804 // not have had to allocate anything from the central cache even if
2805 // we had reduced the free list size by L. We aim to get closer to
2806 // that situation by dropping L/2 nodes from the free list. This
2807 // may not release much memory, but if so we will call scavenge again
2808 // pretty soon and the low-water marks will be high on that call.
2809 //int64 start = CycleClock::Now();
2810
2811 for (size_t cl = 0; cl < kNumClasses; cl++) {
2812 FreeList* list = &list_[cl];
2813 const int lowmark = list->lowwatermark();
2814 if (lowmark > 0) {
2815 const int drop = (lowmark > 1) ? lowmark/2 : 1;
2816 ReleaseToCentralCache(cl, drop);
2817 }
2818 list->clear_lowwatermark();
2819 }
2820
2821 //int64 finish = CycleClock::Now();
2822 //CycleTimer ct;
2823 //MESSAGE("GC: %.0f ns\n", ct.CyclesToUsec(finish-start)*1000.0);
2824 }
2825
PickNextSample(size_t k)2826 void TCMalloc_ThreadCache::PickNextSample(size_t k) {
2827 // Make next "random" number
2828 // x^32+x^22+x^2+x^1+1 is a primitive polynomial for random numbers
2829 static const uint32_t kPoly = (1 << 22) | (1 << 2) | (1 << 1) | (1 << 0);
2830 uint32_t r = rnd_;
2831 rnd_ = (r << 1) ^ ((static_cast<int32_t>(r) >> 31) & kPoly);
2832
2833 // Next point is "rnd_ % (sample_period)". I.e., average
2834 // increment is "sample_period/2".
2835 const int flag_value = static_cast<int>(FLAGS_tcmalloc_sample_parameter);
2836 static int last_flag_value = -1;
2837
2838 if (flag_value != last_flag_value) {
2839 SpinLockHolder h(&sample_period_lock);
2840 int i;
2841 for (i = 0; i < (static_cast<int>(sizeof(primes_list)/sizeof(primes_list[0])) - 1); i++) {
2842 if (primes_list[i] >= flag_value) {
2843 break;
2844 }
2845 }
2846 sample_period = primes_list[i];
2847 last_flag_value = flag_value;
2848 }
2849
2850 bytes_until_sample_ += rnd_ % sample_period;
2851
2852 if (k > (static_cast<size_t>(-1) >> 2)) {
2853 // If the user has asked for a huge allocation then it is possible
2854 // for the code below to loop infinitely. Just return (note that
2855 // this throws off the sampling accuracy somewhat, but a user who
2856 // is allocating more than 1G of memory at a time can live with a
2857 // minor inaccuracy in profiling of small allocations, and also
2858 // would rather not wait for the loop below to terminate).
2859 return;
2860 }
2861
2862 while (bytes_until_sample_ < k) {
2863 // Increase bytes_until_sample_ by enough average sampling periods
2864 // (sample_period >> 1) to allow us to sample past the current
2865 // allocation.
2866 bytes_until_sample_ += (sample_period >> 1);
2867 }
2868
2869 bytes_until_sample_ -= k;
2870 }
2871
InitModule()2872 void TCMalloc_ThreadCache::InitModule() {
2873 // There is a slight potential race here because of double-checked
2874 // locking idiom. However, as long as the program does a small
2875 // allocation before switching to multi-threaded mode, we will be
2876 // fine. We increase the chances of doing such a small allocation
2877 // by doing one in the constructor of the module_enter_exit_hook
2878 // object declared below.
2879 SpinLockHolder h(&pageheap_lock);
2880 if (!phinited) {
2881 #ifdef WTF_CHANGES
2882 InitTSD();
2883 #endif
2884 InitSizeClasses();
2885 threadheap_allocator.Init();
2886 span_allocator.Init();
2887 span_allocator.New(); // Reduce cache conflicts
2888 span_allocator.New(); // Reduce cache conflicts
2889 stacktrace_allocator.Init();
2890 DLL_Init(&sampled_objects);
2891 for (size_t i = 0; i < kNumClasses; ++i) {
2892 central_cache[i].Init(i);
2893 }
2894 pageheap->init();
2895 phinited = 1;
2896 #if defined(WTF_CHANGES) && OS(DARWIN)
2897 FastMallocZone::init();
2898 #endif
2899 }
2900 }
2901
NewHeap(ThreadIdentifier tid)2902 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::NewHeap(ThreadIdentifier tid) {
2903 // Create the heap and add it to the linked list
2904 TCMalloc_ThreadCache *heap = threadheap_allocator.New();
2905 heap->Init(tid);
2906 heap->next_ = thread_heaps;
2907 heap->prev_ = NULL;
2908 if (thread_heaps != NULL) thread_heaps->prev_ = heap;
2909 thread_heaps = heap;
2910 thread_heap_count++;
2911 RecomputeThreadCacheSize();
2912 return heap;
2913 }
2914
GetThreadHeap()2915 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetThreadHeap() {
2916 #ifdef HAVE_TLS
2917 // __thread is faster, but only when the kernel supports it
2918 if (KernelSupportsTLS())
2919 return threadlocal_heap;
2920 #elif COMPILER(MSVC)
2921 return static_cast<TCMalloc_ThreadCache*>(TlsGetValue(tlsIndex));
2922 #else
2923 return static_cast<TCMalloc_ThreadCache*>(pthread_getspecific(heap_key));
2924 #endif
2925 }
2926
GetCache()2927 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCache() {
2928 TCMalloc_ThreadCache* ptr = NULL;
2929 if (!tsd_inited) {
2930 InitModule();
2931 } else {
2932 ptr = GetThreadHeap();
2933 }
2934 if (ptr == NULL) ptr = CreateCacheIfNecessary();
2935 return ptr;
2936 }
2937
2938 // In deletion paths, we do not try to create a thread-cache. This is
2939 // because we may be in the thread destruction code and may have
2940 // already cleaned up the cache for this thread.
GetCacheIfPresent()2941 inline TCMalloc_ThreadCache* TCMalloc_ThreadCache::GetCacheIfPresent() {
2942 if (!tsd_inited) return NULL;
2943 void* const p = GetThreadHeap();
2944 return reinterpret_cast<TCMalloc_ThreadCache*>(p);
2945 }
2946
InitTSD()2947 void TCMalloc_ThreadCache::InitTSD() {
2948 ASSERT(!tsd_inited);
2949 pthread_key_create(&heap_key, DestroyThreadCache);
2950 #if COMPILER(MSVC)
2951 tlsIndex = TlsAlloc();
2952 #endif
2953 tsd_inited = true;
2954
2955 #if !COMPILER(MSVC)
2956 // We may have used a fake pthread_t for the main thread. Fix it.
2957 pthread_t zero;
2958 memset(&zero, 0, sizeof(zero));
2959 #endif
2960 #ifndef WTF_CHANGES
2961 SpinLockHolder h(&pageheap_lock);
2962 #else
2963 ASSERT(pageheap_lock.IsHeld());
2964 #endif
2965 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
2966 #if COMPILER(MSVC)
2967 if (h->tid_ == 0) {
2968 h->tid_ = GetCurrentThreadId();
2969 }
2970 #else
2971 if (pthread_equal(h->tid_, zero)) {
2972 h->tid_ = pthread_self();
2973 }
2974 #endif
2975 }
2976 }
2977
CreateCacheIfNecessary()2978 TCMalloc_ThreadCache* TCMalloc_ThreadCache::CreateCacheIfNecessary() {
2979 // Initialize per-thread data if necessary
2980 TCMalloc_ThreadCache* heap = NULL;
2981 {
2982 SpinLockHolder lockholder(&pageheap_lock);
2983
2984 #if COMPILER(MSVC)
2985 DWORD me;
2986 if (!tsd_inited) {
2987 me = 0;
2988 } else {
2989 me = GetCurrentThreadId();
2990 }
2991 #else
2992 // Early on in glibc's life, we cannot even call pthread_self()
2993 pthread_t me;
2994 if (!tsd_inited) {
2995 memset(&me, 0, sizeof(me));
2996 } else {
2997 me = pthread_self();
2998 }
2999 #endif
3000
3001 // This may be a recursive malloc call from pthread_setspecific()
3002 // In that case, the heap for this thread has already been created
3003 // and added to the linked list. So we search for that first.
3004 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
3005 #if COMPILER(MSVC)
3006 if (h->tid_ == me) {
3007 #else
3008 if (pthread_equal(h->tid_, me)) {
3009 #endif
3010 heap = h;
3011 break;
3012 }
3013 }
3014
3015 if (heap == NULL) heap = NewHeap(me);
3016 }
3017
3018 // We call pthread_setspecific() outside the lock because it may
3019 // call malloc() recursively. The recursive call will never get
3020 // here again because it will find the already allocated heap in the
3021 // linked list of heaps.
3022 if (!heap->in_setspecific_ && tsd_inited) {
3023 heap->in_setspecific_ = true;
3024 setThreadHeap(heap);
3025 }
3026 return heap;
3027 }
3028
3029 void TCMalloc_ThreadCache::BecomeIdle() {
3030 if (!tsd_inited) return; // No caches yet
3031 TCMalloc_ThreadCache* heap = GetThreadHeap();
3032 if (heap == NULL) return; // No thread cache to remove
3033 if (heap->in_setspecific_) return; // Do not disturb the active caller
3034
3035 heap->in_setspecific_ = true;
3036 pthread_setspecific(heap_key, NULL);
3037 #ifdef HAVE_TLS
3038 // Also update the copy in __thread
3039 threadlocal_heap = NULL;
3040 #endif
3041 heap->in_setspecific_ = false;
3042 if (GetThreadHeap() == heap) {
3043 // Somehow heap got reinstated by a recursive call to malloc
3044 // from pthread_setspecific. We give up in this case.
3045 return;
3046 }
3047
3048 // We can now get rid of the heap
3049 DeleteCache(heap);
3050 }
3051
3052 void TCMalloc_ThreadCache::DestroyThreadCache(void* ptr) {
3053 // Note that "ptr" cannot be NULL since pthread promises not
3054 // to invoke the destructor on NULL values, but for safety,
3055 // we check anyway.
3056 if (ptr == NULL) return;
3057 #ifdef HAVE_TLS
3058 // Prevent fast path of GetThreadHeap() from returning heap.
3059 threadlocal_heap = NULL;
3060 #endif
3061 DeleteCache(reinterpret_cast<TCMalloc_ThreadCache*>(ptr));
3062 }
3063
3064 void TCMalloc_ThreadCache::DeleteCache(TCMalloc_ThreadCache* heap) {
3065 // Remove all memory from heap
3066 heap->Cleanup();
3067
3068 // Remove from linked list
3069 SpinLockHolder h(&pageheap_lock);
3070 if (heap->next_ != NULL) heap->next_->prev_ = heap->prev_;
3071 if (heap->prev_ != NULL) heap->prev_->next_ = heap->next_;
3072 if (thread_heaps == heap) thread_heaps = heap->next_;
3073 thread_heap_count--;
3074 RecomputeThreadCacheSize();
3075
3076 threadheap_allocator.Delete(heap);
3077 }
3078
3079 void TCMalloc_ThreadCache::RecomputeThreadCacheSize() {
3080 // Divide available space across threads
3081 int n = thread_heap_count > 0 ? thread_heap_count : 1;
3082 size_t space = overall_thread_cache_size / n;
3083
3084 // Limit to allowed range
3085 if (space < kMinThreadCacheSize) space = kMinThreadCacheSize;
3086 if (space > kMaxThreadCacheSize) space = kMaxThreadCacheSize;
3087
3088 per_thread_cache_size = space;
3089 }
3090
3091 void TCMalloc_ThreadCache::Print() const {
3092 for (size_t cl = 0; cl < kNumClasses; ++cl) {
3093 MESSAGE(" %5" PRIuS " : %4d len; %4d lo\n",
3094 ByteSizeForClass(cl),
3095 list_[cl].length(),
3096 list_[cl].lowwatermark());
3097 }
3098 }
3099
3100 // Extract interesting stats
3101 struct TCMallocStats {
3102 uint64_t system_bytes; // Bytes alloced from system
3103 uint64_t thread_bytes; // Bytes in thread caches
3104 uint64_t central_bytes; // Bytes in central cache
3105 uint64_t transfer_bytes; // Bytes in central transfer cache
3106 uint64_t pageheap_bytes; // Bytes in page heap
3107 uint64_t metadata_bytes; // Bytes alloced for metadata
3108 };
3109
3110 #ifndef WTF_CHANGES
3111 // Get stats into "r". Also get per-size-class counts if class_count != NULL
3112 static void ExtractStats(TCMallocStats* r, uint64_t* class_count) {
3113 r->central_bytes = 0;
3114 r->transfer_bytes = 0;
3115 for (int cl = 0; cl < kNumClasses; ++cl) {
3116 const int length = central_cache[cl].length();
3117 const int tc_length = central_cache[cl].tc_length();
3118 r->central_bytes += static_cast<uint64_t>(ByteSizeForClass(cl)) * length;
3119 r->transfer_bytes +=
3120 static_cast<uint64_t>(ByteSizeForClass(cl)) * tc_length;
3121 if (class_count) class_count[cl] = length + tc_length;
3122 }
3123
3124 // Add stats from per-thread heaps
3125 r->thread_bytes = 0;
3126 { // scope
3127 SpinLockHolder h(&pageheap_lock);
3128 for (TCMalloc_ThreadCache* h = thread_heaps; h != NULL; h = h->next_) {
3129 r->thread_bytes += h->Size();
3130 if (class_count) {
3131 for (size_t cl = 0; cl < kNumClasses; ++cl) {
3132 class_count[cl] += h->freelist_length(cl);
3133 }
3134 }
3135 }
3136 }
3137
3138 { //scope
3139 SpinLockHolder h(&pageheap_lock);
3140 r->system_bytes = pageheap->SystemBytes();
3141 r->metadata_bytes = metadata_system_bytes;
3142 r->pageheap_bytes = pageheap->FreeBytes();
3143 }
3144 }
3145 #endif
3146
3147 #ifndef WTF_CHANGES
3148 // WRITE stats to "out"
3149 static void DumpStats(TCMalloc_Printer* out, int level) {
3150 TCMallocStats stats;
3151 uint64_t class_count[kNumClasses];
3152 ExtractStats(&stats, (level >= 2 ? class_count : NULL));
3153
3154 if (level >= 2) {
3155 out->printf("------------------------------------------------\n");
3156 uint64_t cumulative = 0;
3157 for (int cl = 0; cl < kNumClasses; ++cl) {
3158 if (class_count[cl] > 0) {
3159 uint64_t class_bytes = class_count[cl] * ByteSizeForClass(cl);
3160 cumulative += class_bytes;
3161 out->printf("class %3d [ %8" PRIuS " bytes ] : "
3162 "%8" PRIu64 " objs; %5.1f MB; %5.1f cum MB\n",
3163 cl, ByteSizeForClass(cl),
3164 class_count[cl],
3165 class_bytes / 1048576.0,
3166 cumulative / 1048576.0);
3167 }
3168 }
3169
3170 SpinLockHolder h(&pageheap_lock);
3171 pageheap->Dump(out);
3172 }
3173
3174 const uint64_t bytes_in_use = stats.system_bytes
3175 - stats.pageheap_bytes
3176 - stats.central_bytes
3177 - stats.transfer_bytes
3178 - stats.thread_bytes;
3179
3180 out->printf("------------------------------------------------\n"
3181 "MALLOC: %12" PRIu64 " Heap size\n"
3182 "MALLOC: %12" PRIu64 " Bytes in use by application\n"
3183 "MALLOC: %12" PRIu64 " Bytes free in page heap\n"
3184 "MALLOC: %12" PRIu64 " Bytes free in central cache\n"
3185 "MALLOC: %12" PRIu64 " Bytes free in transfer cache\n"
3186 "MALLOC: %12" PRIu64 " Bytes free in thread caches\n"
3187 "MALLOC: %12" PRIu64 " Spans in use\n"
3188 "MALLOC: %12" PRIu64 " Thread heaps in use\n"
3189 "MALLOC: %12" PRIu64 " Metadata allocated\n"
3190 "------------------------------------------------\n",
3191 stats.system_bytes,
3192 bytes_in_use,
3193 stats.pageheap_bytes,
3194 stats.central_bytes,
3195 stats.transfer_bytes,
3196 stats.thread_bytes,
3197 uint64_t(span_allocator.inuse()),
3198 uint64_t(threadheap_allocator.inuse()),
3199 stats.metadata_bytes);
3200 }
3201
3202 static void PrintStats(int level) {
3203 const int kBufferSize = 16 << 10;
3204 char* buffer = new char[kBufferSize];
3205 TCMalloc_Printer printer(buffer, kBufferSize);
3206 DumpStats(&printer, level);
3207 write(STDERR_FILENO, buffer, strlen(buffer));
3208 delete[] buffer;
3209 }
3210
3211 static void** DumpStackTraces() {
3212 // Count how much space we need
3213 int needed_slots = 0;
3214 {
3215 SpinLockHolder h(&pageheap_lock);
3216 for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
3217 StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
3218 needed_slots += 3 + stack->depth;
3219 }
3220 needed_slots += 100; // Slop in case sample grows
3221 needed_slots += needed_slots/8; // An extra 12.5% slop
3222 }
3223
3224 void** result = new void*[needed_slots];
3225 if (result == NULL) {
3226 MESSAGE("tcmalloc: could not allocate %d slots for stack traces\n",
3227 needed_slots);
3228 return NULL;
3229 }
3230
3231 SpinLockHolder h(&pageheap_lock);
3232 int used_slots = 0;
3233 for (Span* s = sampled_objects.next; s != &sampled_objects; s = s->next) {
3234 ASSERT(used_slots < needed_slots); // Need to leave room for terminator
3235 StackTrace* stack = reinterpret_cast<StackTrace*>(s->objects);
3236 if (used_slots + 3 + stack->depth >= needed_slots) {
3237 // No more room
3238 break;
3239 }
3240
3241 result[used_slots+0] = reinterpret_cast<void*>(static_cast<uintptr_t>(1));
3242 result[used_slots+1] = reinterpret_cast<void*>(stack->size);
3243 result[used_slots+2] = reinterpret_cast<void*>(stack->depth);
3244 for (int d = 0; d < stack->depth; d++) {
3245 result[used_slots+3+d] = stack->stack[d];
3246 }
3247 used_slots += 3 + stack->depth;
3248 }
3249 result[used_slots] = reinterpret_cast<void*>(static_cast<uintptr_t>(0));
3250 return result;
3251 }
3252 #endif
3253
3254 #ifndef WTF_CHANGES
3255
3256 // TCMalloc's support for extra malloc interfaces
3257 class TCMallocImplementation : public MallocExtension {
3258 public:
3259 virtual void GetStats(char* buffer, int buffer_length) {
3260 ASSERT(buffer_length > 0);
3261 TCMalloc_Printer printer(buffer, buffer_length);
3262
3263 // Print level one stats unless lots of space is available
3264 if (buffer_length < 10000) {
3265 DumpStats(&printer, 1);
3266 } else {
3267 DumpStats(&printer, 2);
3268 }
3269 }
3270
3271 virtual void** ReadStackTraces() {
3272 return DumpStackTraces();
3273 }
3274
3275 virtual bool GetNumericProperty(const char* name, size_t* value) {
3276 ASSERT(name != NULL);
3277
3278 if (strcmp(name, "generic.current_allocated_bytes") == 0) {
3279 TCMallocStats stats;
3280 ExtractStats(&stats, NULL);
3281 *value = stats.system_bytes
3282 - stats.thread_bytes
3283 - stats.central_bytes
3284 - stats.pageheap_bytes;
3285 return true;
3286 }
3287
3288 if (strcmp(name, "generic.heap_size") == 0) {
3289 TCMallocStats stats;
3290 ExtractStats(&stats, NULL);
3291 *value = stats.system_bytes;
3292 return true;
3293 }
3294
3295 if (strcmp(name, "tcmalloc.slack_bytes") == 0) {
3296 // We assume that bytes in the page heap are not fragmented too
3297 // badly, and are therefore available for allocation.
3298 SpinLockHolder l(&pageheap_lock);
3299 *value = pageheap->FreeBytes();
3300 return true;
3301 }
3302
3303 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
3304 SpinLockHolder l(&pageheap_lock);
3305 *value = overall_thread_cache_size;
3306 return true;
3307 }
3308
3309 if (strcmp(name, "tcmalloc.current_total_thread_cache_bytes") == 0) {
3310 TCMallocStats stats;
3311 ExtractStats(&stats, NULL);
3312 *value = stats.thread_bytes;
3313 return true;
3314 }
3315
3316 return false;
3317 }
3318
3319 virtual bool SetNumericProperty(const char* name, size_t value) {
3320 ASSERT(name != NULL);
3321
3322 if (strcmp(name, "tcmalloc.max_total_thread_cache_bytes") == 0) {
3323 // Clip the value to a reasonable range
3324 if (value < kMinThreadCacheSize) value = kMinThreadCacheSize;
3325 if (value > (1<<30)) value = (1<<30); // Limit to 1GB
3326
3327 SpinLockHolder l(&pageheap_lock);
3328 overall_thread_cache_size = static_cast<size_t>(value);
3329 TCMalloc_ThreadCache::RecomputeThreadCacheSize();
3330 return true;
3331 }
3332
3333 return false;
3334 }
3335
3336 virtual void MarkThreadIdle() {
3337 TCMalloc_ThreadCache::BecomeIdle();
3338 }
3339
3340 virtual void ReleaseFreeMemory() {
3341 SpinLockHolder h(&pageheap_lock);
3342 pageheap->ReleaseFreePages();
3343 }
3344 };
3345 #endif
3346
3347 // The constructor allocates an object to ensure that initialization
3348 // runs before main(), and therefore we do not have a chance to become
3349 // multi-threaded before initialization. We also create the TSD key
3350 // here. Presumably by the time this constructor runs, glibc is in
3351 // good enough shape to handle pthread_key_create().
3352 //
3353 // The constructor also takes the opportunity to tell STL to use
3354 // tcmalloc. We want to do this early, before construct time, so
3355 // all user STL allocations go through tcmalloc (which works really
3356 // well for STL).
3357 //
3358 // The destructor prints stats when the program exits.
3359 class TCMallocGuard {
3360 public:
3361
3362 TCMallocGuard() {
3363 #ifdef HAVE_TLS // this is true if the cc/ld/libc combo support TLS
3364 // Check whether the kernel also supports TLS (needs to happen at runtime)
3365 CheckIfKernelSupportsTLS();
3366 #endif
3367 #ifndef WTF_CHANGES
3368 #ifdef WIN32 // patch the windows VirtualAlloc, etc.
3369 PatchWindowsFunctions(); // defined in windows/patch_functions.cc
3370 #endif
3371 #endif
3372 free(malloc(1));
3373 TCMalloc_ThreadCache::InitTSD();
3374 free(malloc(1));
3375 #ifndef WTF_CHANGES
3376 MallocExtension::Register(new TCMallocImplementation);
3377 #endif
3378 }
3379
3380 #ifndef WTF_CHANGES
3381 ~TCMallocGuard() {
3382 const char* env = getenv("MALLOCSTATS");
3383 if (env != NULL) {
3384 int level = atoi(env);
3385 if (level < 1) level = 1;
3386 PrintStats(level);
3387 }
3388 #ifdef WIN32
3389 UnpatchWindowsFunctions();
3390 #endif
3391 }
3392 #endif
3393 };
3394
3395 #ifndef WTF_CHANGES
3396 static TCMallocGuard module_enter_exit_hook;
3397 #endif
3398
3399
3400 //-------------------------------------------------------------------
3401 // Helpers for the exported routines below
3402 //-------------------------------------------------------------------
3403
3404 #ifndef WTF_CHANGES
3405
3406 static Span* DoSampledAllocation(size_t size) {
3407
3408 // Grab the stack trace outside the heap lock
3409 StackTrace tmp;
3410 tmp.depth = GetStackTrace(tmp.stack, kMaxStackDepth, 1);
3411 tmp.size = size;
3412
3413 SpinLockHolder h(&pageheap_lock);
3414 // Allocate span
3415 Span *span = pageheap->New(pages(size == 0 ? 1 : size));
3416 if (span == NULL) {
3417 return NULL;
3418 }
3419
3420 // Allocate stack trace
3421 StackTrace *stack = stacktrace_allocator.New();
3422 if (stack == NULL) {
3423 // Sampling failed because of lack of memory
3424 return span;
3425 }
3426
3427 *stack = tmp;
3428 span->sample = 1;
3429 span->objects = stack;
3430 DLL_Prepend(&sampled_objects, span);
3431
3432 return span;
3433 }
3434 #endif
3435
3436 static inline bool CheckCachedSizeClass(void *ptr) {
3437 PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
3438 size_t cached_value = pageheap->GetSizeClassIfCached(p);
3439 return cached_value == 0 ||
3440 cached_value == pageheap->GetDescriptor(p)->sizeclass;
3441 }
3442
3443 static inline void* CheckedMallocResult(void *result)
3444 {
3445 ASSERT(result == 0 || CheckCachedSizeClass(result));
3446 return result;
3447 }
3448
3449 static inline void* SpanToMallocResult(Span *span) {
3450 ASSERT_SPAN_COMMITTED(span);
3451 pageheap->CacheSizeClass(span->start, 0);
3452 return
3453 CheckedMallocResult(reinterpret_cast<void*>(span->start << kPageShift));
3454 }
3455
3456 #ifdef WTF_CHANGES
3457 template <bool crashOnFailure>
3458 #endif
3459 static ALWAYS_INLINE void* do_malloc(size_t size) {
3460 void* ret = NULL;
3461
3462 #ifdef WTF_CHANGES
3463 ASSERT(!isForbidden());
3464 #endif
3465
3466 // The following call forces module initialization
3467 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
3468 #ifndef WTF_CHANGES
3469 if ((FLAGS_tcmalloc_sample_parameter > 0) && heap->SampleAllocation(size)) {
3470 Span* span = DoSampledAllocation(size);
3471 if (span != NULL) {
3472 ret = SpanToMallocResult(span);
3473 }
3474 } else
3475 #endif
3476 if (size > kMaxSize) {
3477 // Use page-level allocator
3478 SpinLockHolder h(&pageheap_lock);
3479 Span* span = pageheap->New(pages(size));
3480 if (span != NULL) {
3481 ret = SpanToMallocResult(span);
3482 }
3483 } else {
3484 // The common case, and also the simplest. This just pops the
3485 // size-appropriate freelist, afer replenishing it if it's empty.
3486 ret = CheckedMallocResult(heap->Allocate(size));
3487 }
3488 if (!ret) {
3489 #ifdef WTF_CHANGES
3490 if (crashOnFailure) // This branch should be optimized out by the compiler.
3491 CRASH();
3492 #else
3493 errno = ENOMEM;
3494 #endif
3495 }
3496 return ret;
3497 }
3498
3499 static ALWAYS_INLINE void do_free(void* ptr) {
3500 if (ptr == NULL) return;
3501 ASSERT(pageheap != NULL); // Should not call free() before malloc()
3502 const PageID p = reinterpret_cast<uintptr_t>(ptr) >> kPageShift;
3503 Span* span = NULL;
3504 size_t cl = pageheap->GetSizeClassIfCached(p);
3505
3506 if (cl == 0) {
3507 span = pageheap->GetDescriptor(p);
3508 cl = span->sizeclass;
3509 pageheap->CacheSizeClass(p, cl);
3510 }
3511 if (cl != 0) {
3512 #ifndef NO_TCMALLOC_SAMPLES
3513 ASSERT(!pageheap->GetDescriptor(p)->sample);
3514 #endif
3515 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCacheIfPresent();
3516 if (heap != NULL) {
3517 heap->Deallocate(ptr, cl);
3518 } else {
3519 // Delete directly into central cache
3520 SLL_SetNext(ptr, NULL);
3521 central_cache[cl].InsertRange(ptr, ptr, 1);
3522 }
3523 } else {
3524 SpinLockHolder h(&pageheap_lock);
3525 ASSERT(reinterpret_cast<uintptr_t>(ptr) % kPageSize == 0);
3526 ASSERT(span != NULL && span->start == p);
3527 #ifndef NO_TCMALLOC_SAMPLES
3528 if (span->sample) {
3529 DLL_Remove(span);
3530 stacktrace_allocator.Delete(reinterpret_cast<StackTrace*>(span->objects));
3531 span->objects = NULL;
3532 }
3533 #endif
3534 pageheap->Delete(span);
3535 }
3536 }
3537
3538 #ifndef WTF_CHANGES
3539 // For use by exported routines below that want specific alignments
3540 //
3541 // Note: this code can be slow, and can significantly fragment memory.
3542 // The expectation is that memalign/posix_memalign/valloc/pvalloc will
3543 // not be invoked very often. This requirement simplifies our
3544 // implementation and allows us to tune for expected allocation
3545 // patterns.
3546 static void* do_memalign(size_t align, size_t size) {
3547 ASSERT((align & (align - 1)) == 0);
3548 ASSERT(align > 0);
3549 if (pageheap == NULL) TCMalloc_ThreadCache::InitModule();
3550
3551 // Allocate at least one byte to avoid boundary conditions below
3552 if (size == 0) size = 1;
3553
3554 if (size <= kMaxSize && align < kPageSize) {
3555 // Search through acceptable size classes looking for one with
3556 // enough alignment. This depends on the fact that
3557 // InitSizeClasses() currently produces several size classes that
3558 // are aligned at powers of two. We will waste time and space if
3559 // we miss in the size class array, but that is deemed acceptable
3560 // since memalign() should be used rarely.
3561 size_t cl = SizeClass(size);
3562 while (cl < kNumClasses && ((class_to_size[cl] & (align - 1)) != 0)) {
3563 cl++;
3564 }
3565 if (cl < kNumClasses) {
3566 TCMalloc_ThreadCache* heap = TCMalloc_ThreadCache::GetCache();
3567 return CheckedMallocResult(heap->Allocate(class_to_size[cl]));
3568 }
3569 }
3570
3571 // We will allocate directly from the page heap
3572 SpinLockHolder h(&pageheap_lock);
3573
3574 if (align <= kPageSize) {
3575 // Any page-level allocation will be fine
3576 // TODO: We could put the rest of this page in the appropriate
3577 // TODO: cache but it does not seem worth it.
3578 Span* span = pageheap->New(pages(size));
3579 return span == NULL ? NULL : SpanToMallocResult(span);
3580 }
3581
3582 // Allocate extra pages and carve off an aligned portion
3583 const Length alloc = pages(size + align);
3584 Span* span = pageheap->New(alloc);
3585 if (span == NULL) return NULL;
3586
3587 // Skip starting portion so that we end up aligned
3588 Length skip = 0;
3589 while ((((span->start+skip) << kPageShift) & (align - 1)) != 0) {
3590 skip++;
3591 }
3592 ASSERT(skip < alloc);
3593 if (skip > 0) {
3594 Span* rest = pageheap->Split(span, skip);
3595 pageheap->Delete(span);
3596 span = rest;
3597 }
3598
3599 // Skip trailing portion that we do not need to return
3600 const Length needed = pages(size);
3601 ASSERT(span->length >= needed);
3602 if (span->length > needed) {
3603 Span* trailer = pageheap->Split(span, needed);
3604 pageheap->Delete(trailer);
3605 }
3606 return SpanToMallocResult(span);
3607 }
3608 #endif
3609
3610 // Helpers for use by exported routines below:
3611
3612 #ifndef WTF_CHANGES
3613 static inline void do_malloc_stats() {
3614 PrintStats(1);
3615 }
3616 #endif
3617
3618 static inline int do_mallopt(int, int) {
3619 return 1; // Indicates error
3620 }
3621
3622 #ifdef HAVE_STRUCT_MALLINFO // mallinfo isn't defined on freebsd, for instance
3623 static inline struct mallinfo do_mallinfo() {
3624 TCMallocStats stats;
3625 ExtractStats(&stats, NULL);
3626
3627 // Just some of the fields are filled in.
3628 struct mallinfo info;
3629 memset(&info, 0, sizeof(info));
3630
3631 // Unfortunately, the struct contains "int" field, so some of the
3632 // size values will be truncated.
3633 info.arena = static_cast<int>(stats.system_bytes);
3634 info.fsmblks = static_cast<int>(stats.thread_bytes
3635 + stats.central_bytes
3636 + stats.transfer_bytes);
3637 info.fordblks = static_cast<int>(stats.pageheap_bytes);
3638 info.uordblks = static_cast<int>(stats.system_bytes
3639 - stats.thread_bytes
3640 - stats.central_bytes
3641 - stats.transfer_bytes
3642 - stats.pageheap_bytes);
3643
3644 return info;
3645 }
3646 #endif
3647
3648 //-------------------------------------------------------------------
3649 // Exported routines
3650 //-------------------------------------------------------------------
3651
3652 // CAVEAT: The code structure below ensures that MallocHook methods are always
3653 // called from the stack frame of the invoked allocation function.
3654 // heap-checker.cc depends on this to start a stack trace from
3655 // the call to the (de)allocation function.
3656
3657 #ifndef WTF_CHANGES
3658 extern "C"
3659 #else
3660 #define do_malloc do_malloc<crashOnFailure>
3661
3662 template <bool crashOnFailure>
3663 void* malloc(size_t);
3664
3665 void* fastMalloc(size_t size)
3666 {
3667 return malloc<true>(size);
3668 }
3669
3670 TryMallocReturnValue tryFastMalloc(size_t size)
3671 {
3672 return malloc<false>(size);
3673 }
3674
3675 template <bool crashOnFailure>
3676 ALWAYS_INLINE
3677 #endif
3678 void* malloc(size_t size) {
3679 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3680 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= size) // If overflow would occur...
3681 return 0;
3682 size += sizeof(AllocAlignmentInteger);
3683 void* result = do_malloc(size);
3684 if (!result)
3685 return 0;
3686
3687 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
3688 result = static_cast<AllocAlignmentInteger*>(result) + 1;
3689 #else
3690 void* result = do_malloc(size);
3691 #endif
3692
3693 #ifndef WTF_CHANGES
3694 MallocHook::InvokeNewHook(result, size);
3695 #endif
3696 return result;
3697 }
3698
3699 #ifndef WTF_CHANGES
3700 extern "C"
3701 #endif
3702 void free(void* ptr) {
3703 #ifndef WTF_CHANGES
3704 MallocHook::InvokeDeleteHook(ptr);
3705 #endif
3706
3707 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3708 if (!ptr)
3709 return;
3710
3711 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(ptr);
3712 if (*header != Internal::AllocTypeMalloc)
3713 Internal::fastMallocMatchFailed(ptr);
3714 do_free(header);
3715 #else
3716 do_free(ptr);
3717 #endif
3718 }
3719
3720 #ifndef WTF_CHANGES
3721 extern "C"
3722 #else
3723 template <bool crashOnFailure>
3724 void* calloc(size_t, size_t);
3725
3726 void* fastCalloc(size_t n, size_t elem_size)
3727 {
3728 return calloc<true>(n, elem_size);
3729 }
3730
3731 TryMallocReturnValue tryFastCalloc(size_t n, size_t elem_size)
3732 {
3733 return calloc<false>(n, elem_size);
3734 }
3735
3736 template <bool crashOnFailure>
3737 ALWAYS_INLINE
3738 #endif
3739 void* calloc(size_t n, size_t elem_size) {
3740 size_t totalBytes = n * elem_size;
3741
3742 // Protect against overflow
3743 if (n > 1 && elem_size && (totalBytes / elem_size) != n)
3744 return 0;
3745
3746 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3747 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= totalBytes) // If overflow would occur...
3748 return 0;
3749
3750 totalBytes += sizeof(AllocAlignmentInteger);
3751 void* result = do_malloc(totalBytes);
3752 if (!result)
3753 return 0;
3754
3755 memset(result, 0, totalBytes);
3756 *static_cast<AllocAlignmentInteger*>(result) = Internal::AllocTypeMalloc;
3757 result = static_cast<AllocAlignmentInteger*>(result) + 1;
3758 #else
3759 void* result = do_malloc(totalBytes);
3760 if (result != NULL) {
3761 memset(result, 0, totalBytes);
3762 }
3763 #endif
3764
3765 #ifndef WTF_CHANGES
3766 MallocHook::InvokeNewHook(result, totalBytes);
3767 #endif
3768 return result;
3769 }
3770
3771 // Since cfree isn't used anywhere, we don't compile it in.
3772 #ifndef WTF_CHANGES
3773 #ifndef WTF_CHANGES
3774 extern "C"
3775 #endif
3776 void cfree(void* ptr) {
3777 #ifndef WTF_CHANGES
3778 MallocHook::InvokeDeleteHook(ptr);
3779 #endif
3780 do_free(ptr);
3781 }
3782 #endif
3783
3784 #ifndef WTF_CHANGES
3785 extern "C"
3786 #else
3787 template <bool crashOnFailure>
3788 void* realloc(void*, size_t);
3789
3790 void* fastRealloc(void* old_ptr, size_t new_size)
3791 {
3792 return realloc<true>(old_ptr, new_size);
3793 }
3794
3795 TryMallocReturnValue tryFastRealloc(void* old_ptr, size_t new_size)
3796 {
3797 return realloc<false>(old_ptr, new_size);
3798 }
3799
3800 template <bool crashOnFailure>
3801 ALWAYS_INLINE
3802 #endif
3803 void* realloc(void* old_ptr, size_t new_size) {
3804 if (old_ptr == NULL) {
3805 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3806 void* result = malloc(new_size);
3807 #else
3808 void* result = do_malloc(new_size);
3809 #ifndef WTF_CHANGES
3810 MallocHook::InvokeNewHook(result, new_size);
3811 #endif
3812 #endif
3813 return result;
3814 }
3815 if (new_size == 0) {
3816 #ifndef WTF_CHANGES
3817 MallocHook::InvokeDeleteHook(old_ptr);
3818 #endif
3819 free(old_ptr);
3820 return NULL;
3821 }
3822
3823 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3824 if (std::numeric_limits<size_t>::max() - sizeof(AllocAlignmentInteger) <= new_size) // If overflow would occur...
3825 return 0;
3826 new_size += sizeof(AllocAlignmentInteger);
3827 AllocAlignmentInteger* header = Internal::fastMallocMatchValidationValue(old_ptr);
3828 if (*header != Internal::AllocTypeMalloc)
3829 Internal::fastMallocMatchFailed(old_ptr);
3830 old_ptr = header;
3831 #endif
3832
3833 // Get the size of the old entry
3834 const PageID p = reinterpret_cast<uintptr_t>(old_ptr) >> kPageShift;
3835 size_t cl = pageheap->GetSizeClassIfCached(p);
3836 Span *span = NULL;
3837 size_t old_size;
3838 if (cl == 0) {
3839 span = pageheap->GetDescriptor(p);
3840 cl = span->sizeclass;
3841 pageheap->CacheSizeClass(p, cl);
3842 }
3843 if (cl != 0) {
3844 old_size = ByteSizeForClass(cl);
3845 } else {
3846 ASSERT(span != NULL);
3847 old_size = span->length << kPageShift;
3848 }
3849
3850 // Reallocate if the new size is larger than the old size,
3851 // or if the new size is significantly smaller than the old size.
3852 if ((new_size > old_size) || (AllocationSize(new_size) < old_size)) {
3853 // Need to reallocate
3854 void* new_ptr = do_malloc(new_size);
3855 if (new_ptr == NULL) {
3856 return NULL;
3857 }
3858 #ifndef WTF_CHANGES
3859 MallocHook::InvokeNewHook(new_ptr, new_size);
3860 #endif
3861 memcpy(new_ptr, old_ptr, ((old_size < new_size) ? old_size : new_size));
3862 #ifndef WTF_CHANGES
3863 MallocHook::InvokeDeleteHook(old_ptr);
3864 #endif
3865 // We could use a variant of do_free() that leverages the fact
3866 // that we already know the sizeclass of old_ptr. The benefit
3867 // would be small, so don't bother.
3868 do_free(old_ptr);
3869 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3870 new_ptr = static_cast<AllocAlignmentInteger*>(new_ptr) + 1;
3871 #endif
3872 return new_ptr;
3873 } else {
3874 #if ENABLE(FAST_MALLOC_MATCH_VALIDATION)
3875 old_ptr = static_cast<AllocAlignmentInteger*>(old_ptr) + 1; // Set old_ptr back to the user pointer.
3876 #endif
3877 return old_ptr;
3878 }
3879 }
3880
3881 #ifdef WTF_CHANGES
3882 #undef do_malloc
3883 #else
3884
3885 static SpinLock set_new_handler_lock = SPINLOCK_INITIALIZER;
3886
3887 static inline void* cpp_alloc(size_t size, bool nothrow) {
3888 for (;;) {
3889 void* p = do_malloc(size);
3890 #ifdef PREANSINEW
3891 return p;
3892 #else
3893 if (p == NULL) { // allocation failed
3894 // Get the current new handler. NB: this function is not
3895 // thread-safe. We make a feeble stab at making it so here, but
3896 // this lock only protects against tcmalloc interfering with
3897 // itself, not with other libraries calling set_new_handler.
3898 std::new_handler nh;
3899 {
3900 SpinLockHolder h(&set_new_handler_lock);
3901 nh = std::set_new_handler(0);
3902 (void) std::set_new_handler(nh);
3903 }
3904 // If no new_handler is established, the allocation failed.
3905 if (!nh) {
3906 if (nothrow) return 0;
3907 throw std::bad_alloc();
3908 }
3909 // Otherwise, try the new_handler. If it returns, retry the
3910 // allocation. If it throws std::bad_alloc, fail the allocation.
3911 // if it throws something else, don't interfere.
3912 try {
3913 (*nh)();
3914 } catch (const std::bad_alloc&) {
3915 if (!nothrow) throw;
3916 return p;
3917 }
3918 } else { // allocation success
3919 return p;
3920 }
3921 #endif
3922 }
3923 }
3924
3925 void* operator new(size_t size) {
3926 void* p = cpp_alloc(size, false);
3927 // We keep this next instruction out of cpp_alloc for a reason: when
3928 // it's in, and new just calls cpp_alloc, the optimizer may fold the
3929 // new call into cpp_alloc, which messes up our whole section-based
3930 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc
3931 // isn't the last thing this fn calls, and prevents the folding.
3932 MallocHook::InvokeNewHook(p, size);
3933 return p;
3934 }
3935
3936 void* operator new(size_t size, const std::nothrow_t&) __THROW {
3937 void* p = cpp_alloc(size, true);
3938 MallocHook::InvokeNewHook(p, size);
3939 return p;
3940 }
3941
3942 void operator delete(void* p) __THROW {
3943 MallocHook::InvokeDeleteHook(p);
3944 do_free(p);
3945 }
3946
3947 void operator delete(void* p, const std::nothrow_t&) __THROW {
3948 MallocHook::InvokeDeleteHook(p);
3949 do_free(p);
3950 }
3951
3952 void* operator new[](size_t size) {
3953 void* p = cpp_alloc(size, false);
3954 // We keep this next instruction out of cpp_alloc for a reason: when
3955 // it's in, and new just calls cpp_alloc, the optimizer may fold the
3956 // new call into cpp_alloc, which messes up our whole section-based
3957 // stacktracing (see ATTRIBUTE_SECTION, above). This ensures cpp_alloc
3958 // isn't the last thing this fn calls, and prevents the folding.
3959 MallocHook::InvokeNewHook(p, size);
3960 return p;
3961 }
3962
3963 void* operator new[](size_t size, const std::nothrow_t&) __THROW {
3964 void* p = cpp_alloc(size, true);
3965 MallocHook::InvokeNewHook(p, size);
3966 return p;
3967 }
3968
3969 void operator delete[](void* p) __THROW {
3970 MallocHook::InvokeDeleteHook(p);
3971 do_free(p);
3972 }
3973
3974 void operator delete[](void* p, const std::nothrow_t&) __THROW {
3975 MallocHook::InvokeDeleteHook(p);
3976 do_free(p);
3977 }
3978
3979 extern "C" void* memalign(size_t align, size_t size) __THROW {
3980 void* result = do_memalign(align, size);
3981 MallocHook::InvokeNewHook(result, size);
3982 return result;
3983 }
3984
3985 extern "C" int posix_memalign(void** result_ptr, size_t align, size_t size)
3986 __THROW {
3987 if (((align % sizeof(void*)) != 0) ||
3988 ((align & (align - 1)) != 0) ||
3989 (align == 0)) {
3990 return EINVAL;
3991 }
3992
3993 void* result = do_memalign(align, size);
3994 MallocHook::InvokeNewHook(result, size);
3995 if (result == NULL) {
3996 return ENOMEM;
3997 } else {
3998 *result_ptr = result;
3999 return 0;
4000 }
4001 }
4002
4003 static size_t pagesize = 0;
4004
4005 extern "C" void* valloc(size_t size) __THROW {
4006 // Allocate page-aligned object of length >= size bytes
4007 if (pagesize == 0) pagesize = getpagesize();
4008 void* result = do_memalign(pagesize, size);
4009 MallocHook::InvokeNewHook(result, size);
4010 return result;
4011 }
4012
4013 extern "C" void* pvalloc(size_t size) __THROW {
4014 // Round up size to a multiple of pagesize
4015 if (pagesize == 0) pagesize = getpagesize();
4016 size = (size + pagesize - 1) & ~(pagesize - 1);
4017 void* result = do_memalign(pagesize, size);
4018 MallocHook::InvokeNewHook(result, size);
4019 return result;
4020 }
4021
4022 extern "C" void malloc_stats(void) {
4023 do_malloc_stats();
4024 }
4025
4026 extern "C" int mallopt(int cmd, int value) {
4027 return do_mallopt(cmd, value);
4028 }
4029
4030 #ifdef HAVE_STRUCT_MALLINFO
4031 extern "C" struct mallinfo mallinfo(void) {
4032 return do_mallinfo();
4033 }
4034 #endif
4035
4036 //-------------------------------------------------------------------
4037 // Some library routines on RedHat 9 allocate memory using malloc()
4038 // and free it using __libc_free() (or vice-versa). Since we provide
4039 // our own implementations of malloc/free, we need to make sure that
4040 // the __libc_XXX variants (defined as part of glibc) also point to
4041 // the same implementations.
4042 //-------------------------------------------------------------------
4043
4044 #if defined(__GLIBC__)
4045 extern "C" {
4046 #if COMPILER(GCC) && !defined(__MACH__) && defined(HAVE___ATTRIBUTE__)
4047 // Potentially faster variants that use the gcc alias extension.
4048 // Mach-O (Darwin) does not support weak aliases, hence the __MACH__ check.
4049 # define ALIAS(x) __attribute__ ((weak, alias (x)))
4050 void* __libc_malloc(size_t size) ALIAS("malloc");
4051 void __libc_free(void* ptr) ALIAS("free");
4052 void* __libc_realloc(void* ptr, size_t size) ALIAS("realloc");
4053 void* __libc_calloc(size_t n, size_t size) ALIAS("calloc");
4054 void __libc_cfree(void* ptr) ALIAS("cfree");
4055 void* __libc_memalign(size_t align, size_t s) ALIAS("memalign");
4056 void* __libc_valloc(size_t size) ALIAS("valloc");
4057 void* __libc_pvalloc(size_t size) ALIAS("pvalloc");
4058 int __posix_memalign(void** r, size_t a, size_t s) ALIAS("posix_memalign");
4059 # undef ALIAS
4060 # else /* not __GNUC__ */
4061 // Portable wrappers
4062 void* __libc_malloc(size_t size) { return malloc(size); }
4063 void __libc_free(void* ptr) { free(ptr); }
4064 void* __libc_realloc(void* ptr, size_t size) { return realloc(ptr, size); }
4065 void* __libc_calloc(size_t n, size_t size) { return calloc(n, size); }
4066 void __libc_cfree(void* ptr) { cfree(ptr); }
4067 void* __libc_memalign(size_t align, size_t s) { return memalign(align, s); }
4068 void* __libc_valloc(size_t size) { return valloc(size); }
4069 void* __libc_pvalloc(size_t size) { return pvalloc(size); }
4070 int __posix_memalign(void** r, size_t a, size_t s) {
4071 return posix_memalign(r, a, s);
4072 }
4073 # endif /* __GNUC__ */
4074 }
4075 #endif /* __GLIBC__ */
4076
4077 // Override __libc_memalign in libc on linux boxes specially.
4078 // They have a bug in libc that causes them to (very rarely) allocate
4079 // with __libc_memalign() yet deallocate with free() and the
4080 // definitions above don't catch it.
4081 // This function is an exception to the rule of calling MallocHook method
4082 // from the stack frame of the allocation function;
4083 // heap-checker handles this special case explicitly.
4084 static void *MemalignOverride(size_t align, size_t size, const void *caller)
4085 __THROW {
4086 void* result = do_memalign(align, size);
4087 MallocHook::InvokeNewHook(result, size);
4088 return result;
4089 }
4090 void *(*__memalign_hook)(size_t, size_t, const void *) = MemalignOverride;
4091
4092 #endif
4093
4094 #if defined(WTF_CHANGES) && OS(DARWIN)
4095
4096 class FreeObjectFinder {
4097 const RemoteMemoryReader& m_reader;
4098 HashSet<void*> m_freeObjects;
4099
4100 public:
4101 FreeObjectFinder(const RemoteMemoryReader& reader) : m_reader(reader) { }
4102
4103 void visit(void* ptr) { m_freeObjects.add(ptr); }
4104 bool isFreeObject(void* ptr) const { return m_freeObjects.contains(ptr); }
4105 bool isFreeObject(vm_address_t ptr) const { return isFreeObject(reinterpret_cast<void*>(ptr)); }
4106 size_t freeObjectCount() const { return m_freeObjects.size(); }
4107
4108 void findFreeObjects(TCMalloc_ThreadCache* threadCache)
4109 {
4110 for (; threadCache; threadCache = (threadCache->next_ ? m_reader(threadCache->next_) : 0))
4111 threadCache->enumerateFreeObjects(*this, m_reader);
4112 }
4113
4114 void findFreeObjects(TCMalloc_Central_FreeListPadded* centralFreeList, size_t numSizes, TCMalloc_Central_FreeListPadded* remoteCentralFreeList)
4115 {
4116 for (unsigned i = 0; i < numSizes; i++)
4117 centralFreeList[i].enumerateFreeObjects(*this, m_reader, remoteCentralFreeList + i);
4118 }
4119 };
4120
4121 class PageMapFreeObjectFinder {
4122 const RemoteMemoryReader& m_reader;
4123 FreeObjectFinder& m_freeObjectFinder;
4124
4125 public:
4126 PageMapFreeObjectFinder(const RemoteMemoryReader& reader, FreeObjectFinder& freeObjectFinder)
4127 : m_reader(reader)
4128 , m_freeObjectFinder(freeObjectFinder)
4129 { }
4130
4131 int visit(void* ptr) const
4132 {
4133 if (!ptr)
4134 return 1;
4135
4136 Span* span = m_reader(reinterpret_cast<Span*>(ptr));
4137 if (span->free) {
4138 void* ptr = reinterpret_cast<void*>(span->start << kPageShift);
4139 m_freeObjectFinder.visit(ptr);
4140 } else if (span->sizeclass) {
4141 // Walk the free list of the small-object span, keeping track of each object seen
4142 for (void* nextObject = span->objects; nextObject; nextObject = *m_reader(reinterpret_cast<void**>(nextObject)))
4143 m_freeObjectFinder.visit(nextObject);
4144 }
4145 return span->length;
4146 }
4147 };
4148
4149 class PageMapMemoryUsageRecorder {
4150 task_t m_task;
4151 void* m_context;
4152 unsigned m_typeMask;
4153 vm_range_recorder_t* m_recorder;
4154 const RemoteMemoryReader& m_reader;
4155 const FreeObjectFinder& m_freeObjectFinder;
4156
4157 HashSet<void*> m_seenPointers;
4158 Vector<Span*> m_coalescedSpans;
4159
4160 public:
4161 PageMapMemoryUsageRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader, const FreeObjectFinder& freeObjectFinder)
4162 : m_task(task)
4163 , m_context(context)
4164 , m_typeMask(typeMask)
4165 , m_recorder(recorder)
4166 , m_reader(reader)
4167 , m_freeObjectFinder(freeObjectFinder)
4168 { }
4169
4170 ~PageMapMemoryUsageRecorder()
4171 {
4172 ASSERT(!m_coalescedSpans.size());
4173 }
4174
4175 void recordPendingRegions()
4176 {
4177 Span* lastSpan = m_coalescedSpans[m_coalescedSpans.size() - 1];
4178 vm_range_t ptrRange = { m_coalescedSpans[0]->start << kPageShift, 0 };
4179 ptrRange.size = (lastSpan->start << kPageShift) - ptrRange.address + (lastSpan->length * kPageSize);
4180
4181 // Mark the memory region the spans represent as a candidate for containing pointers
4182 if (m_typeMask & MALLOC_PTR_REGION_RANGE_TYPE)
4183 (*m_recorder)(m_task, m_context, MALLOC_PTR_REGION_RANGE_TYPE, &ptrRange, 1);
4184
4185 if (!(m_typeMask & MALLOC_PTR_IN_USE_RANGE_TYPE)) {
4186 m_coalescedSpans.clear();
4187 return;
4188 }
4189
4190 Vector<vm_range_t, 1024> allocatedPointers;
4191 for (size_t i = 0; i < m_coalescedSpans.size(); ++i) {
4192 Span *theSpan = m_coalescedSpans[i];
4193 if (theSpan->free)
4194 continue;
4195
4196 vm_address_t spanStartAddress = theSpan->start << kPageShift;
4197 vm_size_t spanSizeInBytes = theSpan->length * kPageSize;
4198
4199 if (!theSpan->sizeclass) {
4200 // If it's an allocated large object span, mark it as in use
4201 if (!m_freeObjectFinder.isFreeObject(spanStartAddress))
4202 allocatedPointers.append((vm_range_t){spanStartAddress, spanSizeInBytes});
4203 } else {
4204 const size_t objectSize = ByteSizeForClass(theSpan->sizeclass);
4205
4206 // Mark each allocated small object within the span as in use
4207 const vm_address_t endOfSpan = spanStartAddress + spanSizeInBytes;
4208 for (vm_address_t object = spanStartAddress; object + objectSize <= endOfSpan; object += objectSize) {
4209 if (!m_freeObjectFinder.isFreeObject(object))
4210 allocatedPointers.append((vm_range_t){object, objectSize});
4211 }
4212 }
4213 }
4214
4215 (*m_recorder)(m_task, m_context, MALLOC_PTR_IN_USE_RANGE_TYPE, allocatedPointers.data(), allocatedPointers.size());
4216
4217 m_coalescedSpans.clear();
4218 }
4219
4220 int visit(void* ptr)
4221 {
4222 if (!ptr)
4223 return 1;
4224
4225 Span* span = m_reader(reinterpret_cast<Span*>(ptr));
4226 if (!span->start)
4227 return 1;
4228
4229 if (m_seenPointers.contains(ptr))
4230 return span->length;
4231 m_seenPointers.add(ptr);
4232
4233 if (!m_coalescedSpans.size()) {
4234 m_coalescedSpans.append(span);
4235 return span->length;
4236 }
4237
4238 Span* previousSpan = m_coalescedSpans[m_coalescedSpans.size() - 1];
4239 vm_address_t previousSpanStartAddress = previousSpan->start << kPageShift;
4240 vm_size_t previousSpanSizeInBytes = previousSpan->length * kPageSize;
4241
4242 // If the new span is adjacent to the previous span, do nothing for now.
4243 vm_address_t spanStartAddress = span->start << kPageShift;
4244 if (spanStartAddress == previousSpanStartAddress + previousSpanSizeInBytes) {
4245 m_coalescedSpans.append(span);
4246 return span->length;
4247 }
4248
4249 // New span is not adjacent to previous span, so record the spans coalesced so far.
4250 recordPendingRegions();
4251 m_coalescedSpans.append(span);
4252
4253 return span->length;
4254 }
4255 };
4256
4257 class AdminRegionRecorder {
4258 task_t m_task;
4259 void* m_context;
4260 unsigned m_typeMask;
4261 vm_range_recorder_t* m_recorder;
4262 const RemoteMemoryReader& m_reader;
4263
4264 Vector<vm_range_t, 1024> m_pendingRegions;
4265
4266 public:
4267 AdminRegionRecorder(task_t task, void* context, unsigned typeMask, vm_range_recorder_t* recorder, const RemoteMemoryReader& reader)
4268 : m_task(task)
4269 , m_context(context)
4270 , m_typeMask(typeMask)
4271 , m_recorder(recorder)
4272 , m_reader(reader)
4273 { }
4274
4275 void recordRegion(vm_address_t ptr, size_t size)
4276 {
4277 if (m_typeMask & MALLOC_ADMIN_REGION_RANGE_TYPE)
4278 m_pendingRegions.append((vm_range_t){ ptr, size });
4279 }
4280
4281 void visit(void *ptr, size_t size)
4282 {
4283 recordRegion(reinterpret_cast<vm_address_t>(ptr), size);
4284 }
4285
4286 void recordPendingRegions()
4287 {
4288 if (m_pendingRegions.size()) {
4289 (*m_recorder)(m_task, m_context, MALLOC_ADMIN_REGION_RANGE_TYPE, m_pendingRegions.data(), m_pendingRegions.size());
4290 m_pendingRegions.clear();
4291 }
4292 }
4293
4294 ~AdminRegionRecorder()
4295 {
4296 ASSERT(!m_pendingRegions.size());
4297 }
4298 };
4299
4300 kern_return_t FastMallocZone::enumerate(task_t task, void* context, unsigned typeMask, vm_address_t zoneAddress, memory_reader_t reader, vm_range_recorder_t recorder)
4301 {
4302 RemoteMemoryReader memoryReader(task, reader);
4303
4304 InitSizeClasses();
4305
4306 FastMallocZone* mzone = memoryReader(reinterpret_cast<FastMallocZone*>(zoneAddress));
4307 TCMalloc_PageHeap* pageHeap = memoryReader(mzone->m_pageHeap);
4308 TCMalloc_ThreadCache** threadHeapsPointer = memoryReader(mzone->m_threadHeaps);
4309 TCMalloc_ThreadCache* threadHeaps = memoryReader(*threadHeapsPointer);
4310
4311 TCMalloc_Central_FreeListPadded* centralCaches = memoryReader(mzone->m_centralCaches, sizeof(TCMalloc_Central_FreeListPadded) * kNumClasses);
4312
4313 FreeObjectFinder finder(memoryReader);
4314 finder.findFreeObjects(threadHeaps);
4315 finder.findFreeObjects(centralCaches, kNumClasses, mzone->m_centralCaches);
4316
4317 TCMalloc_PageHeap::PageMap* pageMap = &pageHeap->pagemap_;
4318 PageMapFreeObjectFinder pageMapFinder(memoryReader, finder);
4319 pageMap->visitValues(pageMapFinder, memoryReader);
4320
4321 PageMapMemoryUsageRecorder usageRecorder(task, context, typeMask, recorder, memoryReader, finder);
4322 pageMap->visitValues(usageRecorder, memoryReader);
4323 usageRecorder.recordPendingRegions();
4324
4325 AdminRegionRecorder adminRegionRecorder(task, context, typeMask, recorder, memoryReader);
4326 pageMap->visitAllocations(adminRegionRecorder, memoryReader);
4327
4328 PageHeapAllocator<Span>* spanAllocator = memoryReader(mzone->m_spanAllocator);
4329 PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator = memoryReader(mzone->m_pageHeapAllocator);
4330
4331 spanAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader);
4332 pageHeapAllocator->recordAdministrativeRegions(adminRegionRecorder, memoryReader);
4333
4334 adminRegionRecorder.recordPendingRegions();
4335
4336 return 0;
4337 }
4338
4339 size_t FastMallocZone::size(malloc_zone_t*, const void*)
4340 {
4341 return 0;
4342 }
4343
4344 void* FastMallocZone::zoneMalloc(malloc_zone_t*, size_t)
4345 {
4346 return 0;
4347 }
4348
4349 void* FastMallocZone::zoneCalloc(malloc_zone_t*, size_t, size_t)
4350 {
4351 return 0;
4352 }
4353
4354 void FastMallocZone::zoneFree(malloc_zone_t*, void* ptr)
4355 {
4356 // Due to <rdar://problem/5671357> zoneFree may be called by the system free even if the pointer
4357 // is not in this zone. When this happens, the pointer being freed was not allocated by any
4358 // zone so we need to print a useful error for the application developer.
4359 malloc_printf("*** error for object %p: pointer being freed was not allocated\n", ptr);
4360 }
4361
4362 void* FastMallocZone::zoneRealloc(malloc_zone_t*, void*, size_t)
4363 {
4364 return 0;
4365 }
4366
4367
4368 #undef malloc
4369 #undef free
4370 #undef realloc
4371 #undef calloc
4372
4373 extern "C" {
4374 malloc_introspection_t jscore_fastmalloc_introspection = { &FastMallocZone::enumerate, &FastMallocZone::goodSize, &FastMallocZone::check, &FastMallocZone::print,
4375 &FastMallocZone::log, &FastMallocZone::forceLock, &FastMallocZone::forceUnlock, &FastMallocZone::statistics
4376
4377 #if !defined(BUILDING_ON_TIGER) && !defined(BUILDING_ON_LEOPARD) && !OS(IPHONE_OS)
4378 , 0 // zone_locked will not be called on the zone unless it advertises itself as version five or higher.
4379 #endif
4380
4381 };
4382 }
4383
4384 FastMallocZone::FastMallocZone(TCMalloc_PageHeap* pageHeap, TCMalloc_ThreadCache** threadHeaps, TCMalloc_Central_FreeListPadded* centralCaches, PageHeapAllocator<Span>* spanAllocator, PageHeapAllocator<TCMalloc_ThreadCache>* pageHeapAllocator)
4385 : m_pageHeap(pageHeap)
4386 , m_threadHeaps(threadHeaps)
4387 , m_centralCaches(centralCaches)
4388 , m_spanAllocator(spanAllocator)
4389 , m_pageHeapAllocator(pageHeapAllocator)
4390 {
4391 memset(&m_zone, 0, sizeof(m_zone));
4392 m_zone.version = 4;
4393 m_zone.zone_name = "JavaScriptCore FastMalloc";
4394 m_zone.size = &FastMallocZone::size;
4395 m_zone.malloc = &FastMallocZone::zoneMalloc;
4396 m_zone.calloc = &FastMallocZone::zoneCalloc;
4397 m_zone.realloc = &FastMallocZone::zoneRealloc;
4398 m_zone.free = &FastMallocZone::zoneFree;
4399 m_zone.valloc = &FastMallocZone::zoneValloc;
4400 m_zone.destroy = &FastMallocZone::zoneDestroy;
4401 m_zone.introspect = &jscore_fastmalloc_introspection;
4402 malloc_zone_register(&m_zone);
4403 }
4404
4405
4406 void FastMallocZone::init()
4407 {
4408 static FastMallocZone zone(pageheap, &thread_heaps, static_cast<TCMalloc_Central_FreeListPadded*>(central_cache), &span_allocator, &threadheap_allocator);
4409 }
4410
4411 #endif
4412
4413 #if WTF_CHANGES
4414 void releaseFastMallocFreeMemory()
4415 {
4416 // Flush free pages in the current thread cache back to the page heap.
4417 // Low watermark mechanism in Scavenge() prevents full return on the first pass.
4418 // The second pass flushes everything.
4419 if (TCMalloc_ThreadCache* threadCache = TCMalloc_ThreadCache::GetCacheIfPresent()) {
4420 threadCache->Scavenge();
4421 threadCache->Scavenge();
4422 }
4423
4424 SpinLockHolder h(&pageheap_lock);
4425 pageheap->ReleaseFreePages();
4426 }
4427
4428 FastMallocStatistics fastMallocStatistics()
4429 {
4430 FastMallocStatistics statistics;
4431 {
4432 SpinLockHolder lockHolder(&pageheap_lock);
4433 statistics.heapSize = static_cast<size_t>(pageheap->SystemBytes());
4434 statistics.freeSizeInHeap = static_cast<size_t>(pageheap->FreeBytes());
4435 statistics.returnedSize = pageheap->ReturnedBytes();
4436 statistics.freeSizeInCaches = 0;
4437 for (TCMalloc_ThreadCache* threadCache = thread_heaps; threadCache ; threadCache = threadCache->next_)
4438 statistics.freeSizeInCaches += threadCache->Size();
4439 }
4440 for (unsigned cl = 0; cl < kNumClasses; ++cl) {
4441 const int length = central_cache[cl].length();
4442 const int tc_length = central_cache[cl].tc_length();
4443 statistics.freeSizeInCaches += ByteSizeForClass(cl) * (length + tc_length);
4444 }
4445 return statistics;
4446 }
4447
4448 } // namespace WTF
4449 #endif
4450
4451 #endif // FORCE_SYSTEM_MALLOC
4452