1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 #pragma once
8 #ifndef MIMALLOC_INTERNAL_H
9 #define MIMALLOC_INTERNAL_H
10
11 #include "mimalloc-types.h"
12
13 #if (MI_DEBUG>0)
14 #define mi_trace_message(...) _mi_trace_message(__VA_ARGS__)
15 #else
16 #define mi_trace_message(...)
17 #endif
18
19 #define MI_CACHE_LINE 64
20 #if defined(_MSC_VER)
21 #pragma warning(disable:4127) // suppress constant conditional warning (due to MI_SECURE paths)
22 #pragma warning(disable:26812) // unscoped enum warning
23 #define mi_decl_noinline __declspec(noinline)
24 #define mi_decl_thread __declspec(thread)
25 #define mi_decl_cache_align __declspec(align(MI_CACHE_LINE))
26 #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
27 #define mi_decl_noinline __attribute__((noinline))
28 #define mi_decl_thread __thread
29 #define mi_decl_cache_align __attribute__((aligned(MI_CACHE_LINE)))
30 #else
31 #define mi_decl_noinline
32 #define mi_decl_thread __thread // hope for the best :-)
33 #define mi_decl_cache_align
34 #endif
35
36 #if defined(__EMSCRIPTEN__) && !defined(__wasi__)
37 #define __wasi__
38 #endif
39
40 #if defined(__cplusplus)
41 #define mi_decl_externc extern "C"
42 #else
43 #define mi_decl_externc
44 #endif
45
46 // "options.c"
47 void _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message);
48 void _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...);
49 void _mi_warning_message(const char* fmt, ...);
50 void _mi_verbose_message(const char* fmt, ...);
51 void _mi_trace_message(const char* fmt, ...);
52 void _mi_options_init(void);
53 void _mi_error_message(int err, const char* fmt, ...);
54
55 // random.c
56 void _mi_random_init(mi_random_ctx_t* ctx);
57 void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
58 uintptr_t _mi_random_next(mi_random_ctx_t* ctx);
59 uintptr_t _mi_heap_random_next(mi_heap_t* heap);
60 uintptr_t _mi_os_random_weak(uintptr_t extra_seed);
61 static inline uintptr_t _mi_random_shuffle(uintptr_t x);
62
63 // init.c
64 extern mi_decl_cache_align mi_stats_t _mi_stats_main;
65 extern mi_decl_cache_align const mi_page_t _mi_page_empty;
66 bool _mi_is_main_thread(void);
67 size_t _mi_current_thread_count(void);
68 bool _mi_preloading(void); // true while the C runtime is not ready
69
70 // os.c
71 size_t _mi_os_page_size(void);
72 void _mi_os_init(void); // called from process init
73 void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data
74 void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data
75
76 bool _mi_os_protect(void* addr, size_t size);
77 bool _mi_os_unprotect(void* addr, size_t size);
78 bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats);
79 bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
80 bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
81 bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
82 size_t _mi_os_good_alloc_size(size_t size);
83 bool _mi_os_has_overcommit(void);
84
85 // arena.c
86 void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
87 void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
88 void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld);
89
90 // "segment-cache.c"
91 void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
92 bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
93 void _mi_segment_map_allocated_at(const mi_segment_t* segment);
94 void _mi_segment_map_freed_at(const mi_segment_t* segment);
95
96 // "segment.c"
97 mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
98 void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
99 void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
100 bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
101 void _mi_segment_thread_collect(mi_segments_tld_t* tld);
102 void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
103
104 uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
105 void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
106 void _mi_abandoned_await_readers(void);
107
108
109
110 // "page.c"
111 void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept mi_attr_malloc;
112
113 void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks
114 void _mi_page_unfull(mi_page_t* page);
115 void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force); // free the page
116 void _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq); // abandon the page, to be picked up by another thread...
117 void _mi_heap_delayed_free(mi_heap_t* heap);
118 void _mi_heap_collect_retired(mi_heap_t* heap, bool force);
119
120 void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
121 size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
122 void _mi_deferred_free(mi_heap_t* heap, bool force);
123
124 void _mi_page_free_collect(mi_page_t* page,bool force);
125 void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page); // callback from segments
126
127 size_t _mi_bin_size(uint8_t bin); // for stats
128 uint8_t _mi_bin(size_t size); // for stats
129
130 // "heap.c"
131 void _mi_heap_destroy_pages(mi_heap_t* heap);
132 void _mi_heap_collect_abandon(mi_heap_t* heap);
133 void _mi_heap_set_default_direct(mi_heap_t* heap);
134
135 // "stats.c"
136 void _mi_stats_done(mi_stats_t* stats);
137
138 mi_msecs_t _mi_clock_now(void);
139 mi_msecs_t _mi_clock_end(mi_msecs_t start);
140 mi_msecs_t _mi_clock_start(void);
141
142 // "alloc.c"
143 void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept; // called from `_mi_malloc_generic`
144 void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero);
145 void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero);
146 mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
147 bool _mi_free_delayed_block(mi_block_t* block);
148 void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size);
149
150 #if MI_DEBUG>1
151 bool _mi_page_is_valid(mi_page_t* page);
152 #endif
153
154
155 // ------------------------------------------------------
156 // Branches
157 // ------------------------------------------------------
158
159 #if defined(__GNUC__) || defined(__clang__)
160 #define mi_unlikely(x) __builtin_expect((x),0)
161 #define mi_likely(x) __builtin_expect((x),1)
162 #else
163 #define mi_unlikely(x) (x)
164 #define mi_likely(x) (x)
165 #endif
166
167 #ifndef __has_builtin
168 #define __has_builtin(x) 0
169 #endif
170
171
172 /* -----------------------------------------------------------
173 Error codes passed to `_mi_fatal_error`
174 All are recoverable but EFAULT is a serious error and aborts by default in secure mode.
175 For portability define undefined error codes using common Unix codes:
176 <https://www-numi.fnal.gov/offline_software/srt_public_context/WebDocs/Errors/unix_system_errors.html>
177 ----------------------------------------------------------- */
178 #include <errno.h>
179 #ifndef EAGAIN // double free
180 #define EAGAIN (11)
181 #endif
182 #ifndef ENOMEM // out of memory
183 #define ENOMEM (12)
184 #endif
185 #ifndef EFAULT // corrupted free-list or meta-data
186 #define EFAULT (14)
187 #endif
188 #ifndef EINVAL // trying to free an invalid pointer
189 #define EINVAL (22)
190 #endif
191 #ifndef EOVERFLOW // count*size overflow
192 #define EOVERFLOW (75)
193 #endif
194
195
196 /* -----------------------------------------------------------
197 Inlined definitions
198 ----------------------------------------------------------- */
199 #define MI_UNUSED(x) (void)(x)
200 #if (MI_DEBUG>0)
201 #define MI_UNUSED_RELEASE(x)
202 #else
203 #define MI_UNUSED_RELEASE(x) MI_UNUSED(x)
204 #endif
205
206 #define MI_INIT4(x) x(),x(),x(),x()
207 #define MI_INIT8(x) MI_INIT4(x),MI_INIT4(x)
208 #define MI_INIT16(x) MI_INIT8(x),MI_INIT8(x)
209 #define MI_INIT32(x) MI_INIT16(x),MI_INIT16(x)
210 #define MI_INIT64(x) MI_INIT32(x),MI_INIT32(x)
211 #define MI_INIT128(x) MI_INIT64(x),MI_INIT64(x)
212 #define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x)
213
214
215 // Is `x` a power of two? (0 is considered a power of two)
_mi_is_power_of_two(uintptr_t x)216 static inline bool _mi_is_power_of_two(uintptr_t x) {
217 return ((x & (x - 1)) == 0);
218 }
219
220 // Align upwards
_mi_align_up(uintptr_t sz,size_t alignment)221 static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
222 mi_assert_internal(alignment != 0);
223 uintptr_t mask = alignment - 1;
224 if ((alignment & mask) == 0) { // power of two?
225 return ((sz + mask) & ~mask);
226 }
227 else {
228 return (((sz + mask)/alignment)*alignment);
229 }
230 }
231
232 // Align downwards
_mi_align_down(uintptr_t sz,size_t alignment)233 static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) {
234 mi_assert_internal(alignment != 0);
235 uintptr_t mask = alignment - 1;
236 if ((alignment & mask) == 0) { // power of two?
237 return (sz & ~mask);
238 }
239 else {
240 return ((sz / alignment) * alignment);
241 }
242 }
243
244 // Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`.
_mi_divide_up(uintptr_t size,size_t divider)245 static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
246 mi_assert_internal(divider != 0);
247 return (divider == 0 ? size : ((size + divider - 1) / divider));
248 }
249
250 // Is memory zero initialized?
mi_mem_is_zero(void * p,size_t size)251 static inline bool mi_mem_is_zero(void* p, size_t size) {
252 for (size_t i = 0; i < size; i++) {
253 if (((uint8_t*)p)[i] != 0) return false;
254 }
255 return true;
256 }
257
258
259 // Align a byte size to a size in _machine words_,
260 // i.e. byte size == `wsize*sizeof(void*)`.
_mi_wsize_from_size(size_t size)261 static inline size_t _mi_wsize_from_size(size_t size) {
262 mi_assert_internal(size <= SIZE_MAX - sizeof(uintptr_t));
263 return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
264 }
265
266 // Does malloc satisfy the alignment constraints already?
mi_malloc_satisfies_alignment(size_t alignment,size_t size)267 static inline bool mi_malloc_satisfies_alignment(size_t alignment, size_t size) {
268 return (alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2)));
269 }
270
271 // Overflow detecting multiply
272 #if __has_builtin(__builtin_umul_overflow) || (defined(__GNUC__) && (__GNUC__ >= 5))
273 #include <limits.h> // UINT_MAX, ULONG_MAX
274 #if defined(_CLOCK_T) // for Illumos
275 #undef _CLOCK_T
276 #endif
mi_mul_overflow(size_t count,size_t size,size_t * total)277 static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
278 #if (SIZE_MAX == ULONG_MAX)
279 return __builtin_umull_overflow(count, size, (unsigned long *)total);
280 #elif (SIZE_MAX == UINT_MAX)
281 return __builtin_umul_overflow(count, size, (unsigned int *)total);
282 #else
283 return __builtin_umulll_overflow(count, size, (unsigned long long *)total);
284 #endif
285 }
286 #else /* __builtin_umul_overflow is unavailable */
mi_mul_overflow(size_t count,size_t size,size_t * total)287 static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
288 #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t))) // sqrt(SIZE_MAX)
289 *total = count * size;
290 return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW)
291 && size > 0 && (SIZE_MAX / size) < count);
292 }
293 #endif
294
295 // Safe multiply `count*size` into `total`; return `true` on overflow.
mi_count_size_overflow(size_t count,size_t size,size_t * total)296 static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) {
297 if (count==1) { // quick check for the case where count is one (common for C++ allocators)
298 *total = size;
299 return false;
300 }
301 else if (mi_unlikely(mi_mul_overflow(count, size, total))) {
302 _mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size);
303 *total = SIZE_MAX;
304 return true;
305 }
306 else return false;
307 }
308
309
310 /* ----------------------------------------------------------------------------------------
311 The thread local default heap: `_mi_get_default_heap` returns the thread local heap.
312 On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a
313 __thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures
314 that the storage will always be available (allocated on the thread stacks).
315 On some platforms though we cannot use that when overriding `malloc` since the underlying
316 TLS implementation (or the loader) will call itself `malloc` on a first access and recurse.
317 We try to circumvent this in an efficient way:
318 - macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the
319 loader itself calls `malloc` even before the modules are initialized.
320 - OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS).
321 - DragonFly: the uniqueid use is buggy but kept for reference.
322 ------------------------------------------------------------------------------------------- */
323
324 extern const mi_heap_t _mi_heap_empty; // read-only empty heap, initial value of the thread local default heap
325 extern bool _mi_process_is_initialized;
326 mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap
327
328 #if defined(MI_MALLOC_OVERRIDE)
329 #if defined(__APPLE__) // macOS
330 #define MI_TLS_SLOT 89 // seems unused?
331 // #define MI_TLS_RECURSE_GUARD 1
332 // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
333 // see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
334 #elif defined(__OpenBSD__)
335 // use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
336 // see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
337 #define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24)
338 #elif defined(__DragonFly__)
339 #warning "mimalloc is not working correctly on DragonFly yet."
340 //#define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
341 #endif
342 #endif
343
344 #if defined(MI_TLS_SLOT)
345 static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept; // forward declaration
346 #elif defined(MI_TLS_PTHREAD_SLOT_OFS)
347 #include <pthread.h>
mi_tls_pthread_heap_slot(void)348 static inline mi_heap_t** mi_tls_pthread_heap_slot(void) {
349 pthread_t self = pthread_self();
350 #if defined(__DragonFly__)
351 if (self==NULL) {
352 mi_heap_t* pheap_main = _mi_heap_main_get();
353 return &pheap_main;
354 }
355 #endif
356 return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS);
357 }
358 #elif defined(MI_TLS_PTHREAD)
359 #include <pthread.h>
360 extern pthread_key_t _mi_heap_default_key;
361 #endif
362
363 // Default heap to allocate from (if not using TLS- or pthread slots).
364 // Do not use this directly but use through `mi_heap_get_default()` (or the unchecked `mi_get_default_heap`).
365 // This thread local variable is only used when neither MI_TLS_SLOT, MI_TLS_PTHREAD, or MI_TLS_PTHREAD_SLOT_OFS are defined.
366 // However, on the Apple M1 we do use the address of this variable as the unique thread-id (issue #356).
367 extern mi_decl_thread mi_heap_t* _mi_heap_default; // default heap to allocate from
368
369
mi_get_default_heap(void)370 static inline mi_heap_t* mi_get_default_heap(void) {
371 #if defined(MI_TLS_SLOT)
372 mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT);
373 if (mi_unlikely(heap == NULL)) { heap = (mi_heap_t*)&_mi_heap_empty; } //_mi_heap_empty_get(); }
374 return heap;
375 #elif defined(MI_TLS_PTHREAD_SLOT_OFS)
376 mi_heap_t* heap = *mi_tls_pthread_heap_slot();
377 return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
378 #elif defined(MI_TLS_PTHREAD)
379 mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
380 return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
381 #else
382 #if defined(MI_TLS_RECURSE_GUARD)
383 if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
384 #endif
385 return _mi_heap_default;
386 #endif
387 }
388
mi_heap_is_default(const mi_heap_t * heap)389 static inline bool mi_heap_is_default(const mi_heap_t* heap) {
390 return (heap == mi_get_default_heap());
391 }
392
mi_heap_is_backing(const mi_heap_t * heap)393 static inline bool mi_heap_is_backing(const mi_heap_t* heap) {
394 return (heap->tld->heap_backing == heap);
395 }
396
mi_heap_is_initialized(mi_heap_t * heap)397 static inline bool mi_heap_is_initialized(mi_heap_t* heap) {
398 mi_assert_internal(heap != NULL);
399 return (heap != &_mi_heap_empty);
400 }
401
_mi_ptr_cookie(const void * p)402 static inline uintptr_t _mi_ptr_cookie(const void* p) {
403 extern mi_heap_t _mi_heap_main;
404 mi_assert_internal(_mi_heap_main.cookie != 0);
405 return ((uintptr_t)p ^ _mi_heap_main.cookie);
406 }
407
408 /* -----------------------------------------------------------
409 Pages
410 ----------------------------------------------------------- */
411
_mi_heap_get_free_small_page(mi_heap_t * heap,size_t size)412 static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) {
413 mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE));
414 const size_t idx = _mi_wsize_from_size(size);
415 mi_assert_internal(idx < MI_PAGES_DIRECT);
416 return heap->pages_free_direct[idx];
417 }
418
419 // Get the page belonging to a certain size class
_mi_get_free_small_page(size_t size)420 static inline mi_page_t* _mi_get_free_small_page(size_t size) {
421 return _mi_heap_get_free_small_page(mi_get_default_heap(), size);
422 }
423
424 // Segment that contains the pointer
_mi_ptr_segment(const void * p)425 static inline mi_segment_t* _mi_ptr_segment(const void* p) {
426 // mi_assert_internal(p != NULL);
427 return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK);
428 }
429
mi_slice_to_page(mi_slice_t * s)430 static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) {
431 mi_assert_internal(s->slice_offset== 0 && s->slice_count > 0);
432 return (mi_page_t*)(s);
433 }
434
mi_page_to_slice(mi_page_t * p)435 static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) {
436 mi_assert_internal(p->slice_offset== 0 && p->slice_count > 0);
437 return (mi_slice_t*)(p);
438 }
439
440 // Segment belonging to a page
_mi_page_segment(const mi_page_t * page)441 static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
442 mi_segment_t* segment = _mi_ptr_segment(page);
443 mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_entries));
444 return segment;
445 }
446
mi_slice_first(const mi_slice_t * slice)447 static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) {
448 mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset);
449 mi_assert_internal(start >= _mi_ptr_segment(slice)->slices);
450 mi_assert_internal(start->slice_offset == 0);
451 mi_assert_internal(start + start->slice_count > slice);
452 return start;
453 }
454
455 // Get the page containing the pointer
_mi_segment_page_of(const mi_segment_t * segment,const void * p)456 static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
457 ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
458 mi_assert_internal(diff >= 0 && diff < (ptrdiff_t)MI_SEGMENT_SIZE);
459 size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT;
460 mi_assert_internal(idx < segment->slice_entries);
461 mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx];
462 mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data
463 mi_assert_internal(slice->slice_offset == 0);
464 mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_entries);
465 return mi_slice_to_page(slice);
466 }
467
468 // Quick page start for initialized pages
_mi_page_start(const mi_segment_t * segment,const mi_page_t * page,size_t * page_size)469 static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
470 return _mi_segment_page_start(segment, page, page_size);
471 }
472
473 // Get the page containing the pointer
_mi_ptr_page(void * p)474 static inline mi_page_t* _mi_ptr_page(void* p) {
475 return _mi_segment_page_of(_mi_ptr_segment(p), p);
476 }
477
478 // Get the block size of a page (special case for huge objects)
mi_page_block_size(const mi_page_t * page)479 static inline size_t mi_page_block_size(const mi_page_t* page) {
480 const size_t bsize = page->xblock_size;
481 mi_assert_internal(bsize > 0);
482 if (mi_likely(bsize < MI_HUGE_BLOCK_SIZE)) {
483 return bsize;
484 }
485 else {
486 size_t psize;
487 _mi_segment_page_start(_mi_page_segment(page), page, &psize);
488 return psize;
489 }
490 }
491
492 // Get the usable block size of a page without fixed padding.
493 // This may still include internal padding due to alignment and rounding up size classes.
mi_page_usable_block_size(const mi_page_t * page)494 static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
495 return mi_page_block_size(page) - MI_PADDING_SIZE;
496 }
497
498 // size of a segment
mi_segment_size(mi_segment_t * segment)499 static inline size_t mi_segment_size(mi_segment_t* segment) {
500 return segment->segment_slices * MI_SEGMENT_SLICE_SIZE;
501 }
502
mi_segment_end(mi_segment_t * segment)503 static inline uint8_t* mi_segment_end(mi_segment_t* segment) {
504 return (uint8_t*)segment + mi_segment_size(segment);
505 }
506
507 // Thread free access
mi_page_thread_free(const mi_page_t * page)508 static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
509 return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3);
510 }
511
mi_page_thread_free_flag(const mi_page_t * page)512 static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) {
513 return (mi_delayed_t)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & 3);
514 }
515
516 // Heap access
mi_page_heap(const mi_page_t * page)517 static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
518 return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap));
519 }
520
mi_page_set_heap(mi_page_t * page,mi_heap_t * heap)521 static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
522 mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
523 mi_atomic_store_release(&page->xheap,(uintptr_t)heap);
524 }
525
526 // Thread free flag helpers
mi_tf_block(mi_thread_free_t tf)527 static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
528 return (mi_block_t*)(tf & ~0x03);
529 }
mi_tf_delayed(mi_thread_free_t tf)530 static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) {
531 return (mi_delayed_t)(tf & 0x03);
532 }
mi_tf_make(mi_block_t * block,mi_delayed_t delayed)533 static inline mi_thread_free_t mi_tf_make(mi_block_t* block, mi_delayed_t delayed) {
534 return (mi_thread_free_t)((uintptr_t)block | (uintptr_t)delayed);
535 }
mi_tf_set_delayed(mi_thread_free_t tf,mi_delayed_t delayed)536 static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) {
537 return mi_tf_make(mi_tf_block(tf),delayed);
538 }
mi_tf_set_block(mi_thread_free_t tf,mi_block_t * block)539 static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) {
540 return mi_tf_make(block, mi_tf_delayed(tf));
541 }
542
543 // are all blocks in a page freed?
544 // note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`.
mi_page_all_free(const mi_page_t * page)545 static inline bool mi_page_all_free(const mi_page_t* page) {
546 mi_assert_internal(page != NULL);
547 return (page->used == 0);
548 }
549
550 // are there any available blocks?
mi_page_has_any_available(const mi_page_t * page)551 static inline bool mi_page_has_any_available(const mi_page_t* page) {
552 mi_assert_internal(page != NULL && page->reserved > 0);
553 return (page->used < page->reserved || (mi_page_thread_free(page) != NULL));
554 }
555
556 // are there immediately available blocks, i.e. blocks available on the free list.
mi_page_immediate_available(const mi_page_t * page)557 static inline bool mi_page_immediate_available(const mi_page_t* page) {
558 mi_assert_internal(page != NULL);
559 return (page->free != NULL);
560 }
561
562 // is more than 7/8th of a page in use?
mi_page_mostly_used(const mi_page_t * page)563 static inline bool mi_page_mostly_used(const mi_page_t* page) {
564 if (page==NULL) return true;
565 uint16_t frac = page->reserved / 8U;
566 return (page->reserved - page->used <= frac);
567 }
568
mi_page_queue(const mi_heap_t * heap,size_t size)569 static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) {
570 return &((mi_heap_t*)heap)->pages[_mi_bin(size)];
571 }
572
573
574
575 //-----------------------------------------------------------
576 // Page flags
577 //-----------------------------------------------------------
mi_page_is_in_full(const mi_page_t * page)578 static inline bool mi_page_is_in_full(const mi_page_t* page) {
579 return page->flags.x.in_full;
580 }
581
mi_page_set_in_full(mi_page_t * page,bool in_full)582 static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) {
583 page->flags.x.in_full = in_full;
584 }
585
mi_page_has_aligned(const mi_page_t * page)586 static inline bool mi_page_has_aligned(const mi_page_t* page) {
587 return page->flags.x.has_aligned;
588 }
589
mi_page_set_has_aligned(mi_page_t * page,bool has_aligned)590 static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
591 page->flags.x.has_aligned = has_aligned;
592 }
593
594
595 /* -------------------------------------------------------------------
596 Encoding/Decoding the free list next pointers
597
598 This is to protect against buffer overflow exploits where the
599 free list is mutated. Many hardened allocators xor the next pointer `p`
600 with a secret key `k1`, as `p^k1`. This prevents overwriting with known
601 values but might be still too weak: if the attacker can guess
602 the pointer `p` this can reveal `k1` (since `p^k1^p == k1`).
603 Moreover, if multiple blocks can be read as well, the attacker can
604 xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
605 about the pointers (and subsequently `k1`).
606
607 Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`.
608 Since these operations are not associative, the above approaches do not
609 work so well any more even if the `p` can be guesstimated. For example,
610 for the read case we can subtract two entries to discard the `+k1` term,
611 but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best.
612 We include the left-rotation since xor and addition are otherwise linear
613 in the lowest bit. Finally, both keys are unique per page which reduces
614 the re-use of keys by a large factor.
615
616 We also pass a separate `null` value to be used as `NULL` or otherwise
617 `(k2<<<k1)+k1` would appear (too) often as a sentinel value.
618 ------------------------------------------------------------------- */
619
mi_is_in_same_segment(const void * p,const void * q)620 static inline bool mi_is_in_same_segment(const void* p, const void* q) {
621 return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
622 }
623
mi_is_in_same_page(const void * p,const void * q)624 static inline bool mi_is_in_same_page(const void* p, const void* q) {
625 mi_segment_t* segment = _mi_ptr_segment(p);
626 if (_mi_ptr_segment(q) != segment) return false;
627 // assume q may be invalid // return (_mi_segment_page_of(segment, p) == _mi_segment_page_of(segment, q));
628 mi_page_t* page = _mi_segment_page_of(segment, p);
629 size_t psize;
630 uint8_t* start = _mi_segment_page_start(segment, page, &psize);
631 return (start <= (uint8_t*)q && (uint8_t*)q < start + psize);
632 }
633
mi_rotl(uintptr_t x,uintptr_t shift)634 static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {
635 shift %= MI_INTPTR_BITS;
636 return (shift==0 ? x : ((x << shift) | (x >> (MI_INTPTR_BITS - shift))));
637 }
mi_rotr(uintptr_t x,uintptr_t shift)638 static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) {
639 shift %= MI_INTPTR_BITS;
640 return (shift==0 ? x : ((x >> shift) | (x << (MI_INTPTR_BITS - shift))));
641 }
642
mi_ptr_decode(const void * null,const mi_encoded_t x,const uintptr_t * keys)643 static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) {
644 void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]);
645 return (mi_unlikely(p==null) ? NULL : p);
646 }
647
mi_ptr_encode(const void * null,const void * p,const uintptr_t * keys)648 static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) {
649 uintptr_t x = (uintptr_t)(mi_unlikely(p==NULL) ? null : p);
650 return mi_rotl(x ^ keys[1], keys[0]) + keys[0];
651 }
652
mi_block_nextx(const void * null,const mi_block_t * block,const uintptr_t * keys)653 static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) {
654 #ifdef MI_ENCODE_FREELIST
655 return (mi_block_t*)mi_ptr_decode(null, block->next, keys);
656 #else
657 MI_UNUSED(keys); MI_UNUSED(null);
658 return (mi_block_t*)block->next;
659 #endif
660 }
661
mi_block_set_nextx(const void * null,mi_block_t * block,const mi_block_t * next,const uintptr_t * keys)662 static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) {
663 #ifdef MI_ENCODE_FREELIST
664 block->next = mi_ptr_encode(null, next, keys);
665 #else
666 MI_UNUSED(keys); MI_UNUSED(null);
667 block->next = (mi_encoded_t)next;
668 #endif
669 }
670
mi_block_next(const mi_page_t * page,const mi_block_t * block)671 static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) {
672 #ifdef MI_ENCODE_FREELIST
673 mi_block_t* next = mi_block_nextx(page,block,page->keys);
674 // check for free list corruption: is `next` at least in the same page?
675 // TODO: check if `next` is `page->block_size` aligned?
676 if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) {
677 _mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next);
678 next = NULL;
679 }
680 return next;
681 #else
682 MI_UNUSED(page);
683 return mi_block_nextx(page,block,NULL);
684 #endif
685 }
686
mi_block_set_next(const mi_page_t * page,mi_block_t * block,const mi_block_t * next)687 static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) {
688 #ifdef MI_ENCODE_FREELIST
689 mi_block_set_nextx(page,block,next, page->keys);
690 #else
691 MI_UNUSED(page);
692 mi_block_set_nextx(page,block,next,NULL);
693 #endif
694 }
695
696
697 // -------------------------------------------------------------------
698 // commit mask
699 // -------------------------------------------------------------------
700
mi_commit_mask_create_empty(mi_commit_mask_t * cm)701 static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) {
702 for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
703 cm->mask[i] = 0;
704 }
705 }
706
mi_commit_mask_create_full(mi_commit_mask_t * cm)707 static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) {
708 for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
709 cm->mask[i] = ~((size_t)0);
710 }
711 }
712
mi_commit_mask_is_empty(const mi_commit_mask_t * cm)713 static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) {
714 for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
715 if (cm->mask[i] != 0) return false;
716 }
717 return true;
718 }
719
mi_commit_mask_is_full(const mi_commit_mask_t * cm)720 static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) {
721 for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
722 if (cm->mask[i] != ~((size_t)0)) return false;
723 }
724 return true;
725 }
726
727 // defined in `segment.c`:
728 size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total);
729 size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx);
730
731 #define mi_commit_mask_foreach(cm,idx,count) \
732 idx = 0; \
733 while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) {
734
735 #define mi_commit_mask_foreach_end() \
736 idx += count; \
737 }
738
739
740
741
742 // -------------------------------------------------------------------
743 // Fast "random" shuffle
744 // -------------------------------------------------------------------
745
_mi_random_shuffle(uintptr_t x)746 static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
747 if (x==0) { x = 17; } // ensure we don't get stuck in generating zeros
748 #if (MI_INTPTR_SIZE==8)
749 // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
750 x ^= x >> 30;
751 x *= 0xbf58476d1ce4e5b9UL;
752 x ^= x >> 27;
753 x *= 0x94d049bb133111ebUL;
754 x ^= x >> 31;
755 #elif (MI_INTPTR_SIZE==4)
756 // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
757 x ^= x >> 16;
758 x *= 0x7feb352dUL;
759 x ^= x >> 15;
760 x *= 0x846ca68bUL;
761 x ^= x >> 16;
762 #endif
763 return x;
764 }
765
766 // -------------------------------------------------------------------
767 // Optimize numa node access for the common case (= one node)
768 // -------------------------------------------------------------------
769
770 int _mi_os_numa_node_get(mi_os_tld_t* tld);
771 size_t _mi_os_numa_node_count_get(void);
772
773 extern _Atomic(size_t) _mi_numa_node_count;
_mi_os_numa_node(mi_os_tld_t * tld)774 static inline int _mi_os_numa_node(mi_os_tld_t* tld) {
775 if (mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1)) return 0;
776 else return _mi_os_numa_node_get(tld);
777 }
_mi_os_numa_node_count(void)778 static inline size_t _mi_os_numa_node_count(void) {
779 const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count);
780 if (mi_likely(count>0)) return count;
781 else return _mi_os_numa_node_count_get();
782 }
783
784
785 // -------------------------------------------------------------------
786 // Getting the thread id should be performant as it is called in the
787 // fast path of `_mi_free` and we specialize for various platforms.
788 // -------------------------------------------------------------------
789 #if defined(_WIN32)
790 #define WIN32_LEAN_AND_MEAN
791 #include <windows.h>
_mi_thread_id(void)792 static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
793 // Windows: works on Intel and ARM in both 32- and 64-bit
794 return (uintptr_t)NtCurrentTeb();
795 }
796
797 #elif defined(__GNUC__) && \
798 (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))
799
800 // TLS register on x86 is in the FS or GS register, see: https://akkadia.org/drepper/tls.pdf
mi_tls_slot(size_t slot)801 static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept {
802 void* res;
803 const size_t ofs = (slot*sizeof(void*));
804 #if defined(__i386__)
805 __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // 32-bit always uses GS
806 #elif defined(__APPLE__) && defined(__x86_64__)
807 __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 macOSX uses GS
808 #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
809 __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x32 ABI
810 #elif defined(__x86_64__)
811 __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : ); // x86_64 Linux, BSD uses FS
812 #elif defined(__arm__)
813 void** tcb; MI_UNUSED(ofs);
814 __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
815 res = tcb[slot];
816 #elif defined(__aarch64__)
817 void** tcb; MI_UNUSED(ofs);
818 #if defined(__APPLE__) // M1, issue #343
819 __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb));
820 tcb = (void**)((uintptr_t)tcb & ~0x07UL); // clear lower 3 bits
821 #else
822 __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
823 #endif
824 res = tcb[slot];
825 #endif
826 return res;
827 }
828
829 // setting is only used on macOSX for now
mi_tls_slot_set(size_t slot,void * value)830 static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
831 const size_t ofs = (slot*sizeof(void*));
832 #if defined(__i386__)
833 __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // 32-bit always uses GS
834 #elif defined(__APPLE__) && defined(__x86_64__)
835 __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 macOSX uses GS
836 #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
837 __asm__("movl %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x32 ABI
838 #elif defined(__x86_64__)
839 __asm__("movq %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : ); // x86_64 Linux, BSD uses FS
840 #elif defined(__arm__)
841 void** tcb; MI_UNUSED(ofs);
842 __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
843 tcb[slot] = value;
844 #elif defined(__aarch64__)
845 void** tcb; MI_UNUSED(ofs);
846 #if defined(__APPLE__) // M1, issue #343
847 __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb));
848 tcb = (void**)((uintptr_t)tcb & ~0x07UL); // clear lower 3 bits
849 #else
850 __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
851 #endif
852 tcb[slot] = value;
853 #endif
854 }
855
_mi_thread_id(void)856 static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
857 #if defined(__BIONIC__) && (defined(__arm__) || defined(__aarch64__))
858 // on Android, slot 1 is the thread ID (pointer to pthread internal struct)
859 return (uintptr_t)mi_tls_slot(1);
860 #else
861 // in all our other targets, slot 0 is the pointer to the thread control block
862 return (uintptr_t)mi_tls_slot(0);
863 #endif
864 }
865 #else
866 // otherwise use standard C
_mi_thread_id(void)867 static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
868 return (uintptr_t)&_mi_heap_default;
869 }
870 #endif
871
872 // -----------------------------------------------------------------------
873 // Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero)
874 // -----------------------------------------------------------------------
875
876 #if defined(__GNUC__)
877
878 #include <limits.h> // LONG_MAX
879 #define MI_HAVE_FAST_BITSCAN
mi_clz(uintptr_t x)880 static inline size_t mi_clz(uintptr_t x) {
881 if (x==0) return MI_INTPTR_BITS;
882 #if (INTPTR_MAX == LONG_MAX)
883 return __builtin_clzl(x);
884 #else
885 return __builtin_clzll(x);
886 #endif
887 }
mi_ctz(uintptr_t x)888 static inline size_t mi_ctz(uintptr_t x) {
889 if (x==0) return MI_INTPTR_BITS;
890 #if (INTPTR_MAX == LONG_MAX)
891 return __builtin_ctzl(x);
892 #else
893 return __builtin_ctzll(x);
894 #endif
895 }
896
897 #elif defined(_MSC_VER)
898
899 #include <limits.h> // LONG_MAX
900 #define MI_HAVE_FAST_BITSCAN
mi_clz(uintptr_t x)901 static inline size_t mi_clz(uintptr_t x) {
902 if (x==0) return MI_INTPTR_BITS;
903 unsigned long idx;
904 #if (INTPTR_MAX == LONG_MAX)
905 _BitScanReverse(&idx, x);
906 #else
907 _BitScanReverse64(&idx, x);
908 #endif
909 return ((MI_INTPTR_BITS - 1) - idx);
910 }
mi_ctz(uintptr_t x)911 static inline size_t mi_ctz(uintptr_t x) {
912 if (x==0) return MI_INTPTR_BITS;
913 unsigned long idx;
914 #if (INTPTR_MAX == LONG_MAX)
915 _BitScanForward(&idx, x);
916 #else
917 _BitScanForward64(&idx, x);
918 #endif
919 return idx;
920 }
921
922 #else
mi_ctz32(uint32_t x)923 static inline size_t mi_ctz32(uint32_t x) {
924 // de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
925 static const unsigned char debruijn[32] = {
926 0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
927 31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
928 };
929 if (x==0) return 32;
930 return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27];
931 }
mi_clz32(uint32_t x)932 static inline size_t mi_clz32(uint32_t x) {
933 // de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
934 static const uint8_t debruijn[32] = {
935 31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
936 23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
937 };
938 if (x==0) return 32;
939 x |= x >> 1;
940 x |= x >> 2;
941 x |= x >> 4;
942 x |= x >> 8;
943 x |= x >> 16;
944 return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27];
945 }
946
mi_clz(uintptr_t x)947 static inline size_t mi_clz(uintptr_t x) {
948 if (x==0) return MI_INTPTR_BITS;
949 #if (MI_INTPTR_BITS <= 32)
950 return mi_clz32((uint32_t)x);
951 #else
952 size_t count = mi_clz32((uint32_t)(x >> 32));
953 if (count < 32) return count;
954 return (32 + mi_clz32((uint32_t)x));
955 #endif
956 }
mi_ctz(uintptr_t x)957 static inline size_t mi_ctz(uintptr_t x) {
958 if (x==0) return MI_INTPTR_BITS;
959 #if (MI_INTPTR_BITS <= 32)
960 return mi_ctz32((uint32_t)x);
961 #else
962 size_t count = mi_ctz32((uint32_t)x);
963 if (count < 32) return count;
964 return (32 + mi_ctz32((uint32_t)(x>>32)));
965 #endif
966 }
967
968 #endif
969
970 // "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero)
mi_bsr(uintptr_t x)971 static inline size_t mi_bsr(uintptr_t x) {
972 return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x));
973 }
974
975
976 // ---------------------------------------------------------------------------------
977 // Provide our own `_mi_memcpy` for potential performance optimizations.
978 //
979 // For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
980 // we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
981 // (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
982 // ---------------------------------------------------------------------------------
983
984 #if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
985 #include <intrin.h>
986 #include <string.h>
987 extern bool _mi_cpu_has_fsrm;
_mi_memcpy(void * dst,const void * src,size_t n)988 static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
989 if (_mi_cpu_has_fsrm) {
990 __movsb((unsigned char*)dst, (const unsigned char*)src, n);
991 }
992 else {
993 memcpy(dst, src, n); // todo: use noinline?
994 }
995 }
996 #else
997 #include <string.h>
_mi_memcpy(void * dst,const void * src,size_t n)998 static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
999 memcpy(dst, src, n);
1000 }
1001 #endif
1002
1003
1004 // -------------------------------------------------------------------------------
1005 // The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
1006 // This is used for example in `mi_realloc`.
1007 // -------------------------------------------------------------------------------
1008
1009 #if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__)
1010 // On GCC/CLang we provide a hint that the pointers are word aligned.
1011 #include <string.h>
_mi_memcpy_aligned(void * dst,const void * src,size_t n)1012 static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
1013 mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
1014 void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
1015 const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE);
1016 memcpy(adst, asrc, n);
1017 }
1018 #else
1019 // Default fallback on `_mi_memcpy`
_mi_memcpy_aligned(void * dst,const void * src,size_t n)1020 static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
1021 mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
1022 _mi_memcpy(dst, src, n);
1023 }
1024 #endif
1025
1026
1027 #endif
1028