1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 #pragma once
8 #ifndef MIMALLOC_INTERNAL_H
9 #define MIMALLOC_INTERNAL_H
10 
11 #include "mimalloc-types.h"
12 
13 #if (MI_DEBUG>0)
14 #define mi_trace_message(...)  _mi_trace_message(__VA_ARGS__)
15 #else
16 #define mi_trace_message(...)
17 #endif
18 
19 #define MI_CACHE_LINE          64
20 #if defined(_MSC_VER)
21 #pragma warning(disable:4127)   // suppress constant conditional warning (due to MI_SECURE paths)
22 #pragma warning(disable:26812)  // unscoped enum warning
23 #define mi_decl_noinline        __declspec(noinline)
24 #define mi_decl_thread          __declspec(thread)
25 #define mi_decl_cache_align     __declspec(align(MI_CACHE_LINE))
26 #elif (defined(__GNUC__) && (__GNUC__ >= 3)) || defined(__clang__) // includes clang and icc
27 #define mi_decl_noinline        __attribute__((noinline))
28 #define mi_decl_thread          __thread
29 #define mi_decl_cache_align     __attribute__((aligned(MI_CACHE_LINE)))
30 #else
31 #define mi_decl_noinline
32 #define mi_decl_thread          __thread        // hope for the best :-)
33 #define mi_decl_cache_align
34 #endif
35 
36 #if defined(__EMSCRIPTEN__) && !defined(__wasi__)
37 #define __wasi__
38 #endif
39 
40 #if defined(__cplusplus)
41 #define mi_decl_externc       extern "C"
42 #else
43 #define mi_decl_externc
44 #endif
45 
46 // "options.c"
47 void       _mi_fputs(mi_output_fun* out, void* arg, const char* prefix, const char* message);
48 void       _mi_fprintf(mi_output_fun* out, void* arg, const char* fmt, ...);
49 void       _mi_warning_message(const char* fmt, ...);
50 void       _mi_verbose_message(const char* fmt, ...);
51 void       _mi_trace_message(const char* fmt, ...);
52 void       _mi_options_init(void);
53 void       _mi_error_message(int err, const char* fmt, ...);
54 
55 // random.c
56 void       _mi_random_init(mi_random_ctx_t* ctx);
57 void       _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
58 uintptr_t  _mi_random_next(mi_random_ctx_t* ctx);
59 uintptr_t  _mi_heap_random_next(mi_heap_t* heap);
60 uintptr_t  _mi_os_random_weak(uintptr_t extra_seed);
61 static inline uintptr_t _mi_random_shuffle(uintptr_t x);
62 
63 // init.c
64 extern mi_decl_cache_align mi_stats_t       _mi_stats_main;
65 extern mi_decl_cache_align const mi_page_t  _mi_page_empty;
66 bool       _mi_is_main_thread(void);
67 size_t     _mi_current_thread_count(void);
68 bool       _mi_preloading(void);  // true while the C runtime is not ready
69 
70 // os.c
71 size_t     _mi_os_page_size(void);
72 void       _mi_os_init(void);                                      // called from process init
73 void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocate thread local data
74 void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
75 
76 bool       _mi_os_protect(void* addr, size_t size);
77 bool       _mi_os_unprotect(void* addr, size_t size);
78 bool       _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats);
79 bool       _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
80 bool       _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
81 bool       _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
82 size_t     _mi_os_good_alloc_size(size_t size);
83 bool       _mi_os_has_overcommit(void);
84 
85 // arena.c
86 void*      _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
87 void*      _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
88 void       _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld);
89 
90 // "segment-cache.c"
91 void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
92 bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
93 void       _mi_segment_map_allocated_at(const mi_segment_t* segment);
94 void       _mi_segment_map_freed_at(const mi_segment_t* segment);
95 
96 // "segment.c"
97 mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
98 void       _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
99 void       _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
100 bool       _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
101 void       _mi_segment_thread_collect(mi_segments_tld_t* tld);
102 void       _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
103 
104 uint8_t*   _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
105 void       _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
106 void       _mi_abandoned_await_readers(void);
107 
108 
109 
110 // "page.c"
111 void*      _mi_malloc_generic(mi_heap_t* heap, size_t size)  mi_attr_noexcept mi_attr_malloc;
112 
113 void       _mi_page_retire(mi_page_t* page) mi_attr_noexcept;                  // free the page if there are no other pages with many free blocks
114 void       _mi_page_unfull(mi_page_t* page);
115 void       _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force);   // free the page
116 void       _mi_page_abandon(mi_page_t* page, mi_page_queue_t* pq);            // abandon the page, to be picked up by another thread...
117 void       _mi_heap_delayed_free(mi_heap_t* heap);
118 void       _mi_heap_collect_retired(mi_heap_t* heap, bool force);
119 
120 void       _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never);
121 size_t     _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue_t* append);
122 void       _mi_deferred_free(mi_heap_t* heap, bool force);
123 
124 void       _mi_page_free_collect(mi_page_t* page,bool force);
125 void       _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page);   // callback from segments
126 
127 size_t     _mi_bin_size(uint8_t bin);           // for stats
128 uint8_t    _mi_bin(size_t size);                // for stats
129 
130 // "heap.c"
131 void       _mi_heap_destroy_pages(mi_heap_t* heap);
132 void       _mi_heap_collect_abandon(mi_heap_t* heap);
133 void       _mi_heap_set_default_direct(mi_heap_t* heap);
134 
135 // "stats.c"
136 void       _mi_stats_done(mi_stats_t* stats);
137 
138 mi_msecs_t  _mi_clock_now(void);
139 mi_msecs_t  _mi_clock_end(mi_msecs_t start);
140 mi_msecs_t  _mi_clock_start(void);
141 
142 // "alloc.c"
143 void*       _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept;  // called from `_mi_malloc_generic`
144 void*       _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero);
145 void*       _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero);
146 mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
147 bool        _mi_free_delayed_block(mi_block_t* block);
148 void        _mi_block_zero_init(const mi_page_t* page, void* p, size_t size);
149 
150 #if MI_DEBUG>1
151 bool        _mi_page_is_valid(mi_page_t* page);
152 #endif
153 
154 
155 // ------------------------------------------------------
156 // Branches
157 // ------------------------------------------------------
158 
159 #if defined(__GNUC__) || defined(__clang__)
160 #define mi_unlikely(x)     __builtin_expect((x),0)
161 #define mi_likely(x)       __builtin_expect((x),1)
162 #else
163 #define mi_unlikely(x)     (x)
164 #define mi_likely(x)       (x)
165 #endif
166 
167 #ifndef __has_builtin
168 #define __has_builtin(x)  0
169 #endif
170 
171 
172 /* -----------------------------------------------------------
173   Error codes passed to `_mi_fatal_error`
174   All are recoverable but EFAULT is a serious error and aborts by default in secure mode.
175   For portability define undefined error codes using common Unix codes:
176   <https://www-numi.fnal.gov/offline_software/srt_public_context/WebDocs/Errors/unix_system_errors.html>
177 ----------------------------------------------------------- */
178 #include <errno.h>
179 #ifndef EAGAIN         // double free
180 #define EAGAIN (11)
181 #endif
182 #ifndef ENOMEM         // out of memory
183 #define ENOMEM (12)
184 #endif
185 #ifndef EFAULT         // corrupted free-list or meta-data
186 #define EFAULT (14)
187 #endif
188 #ifndef EINVAL         // trying to free an invalid pointer
189 #define EINVAL (22)
190 #endif
191 #ifndef EOVERFLOW      // count*size overflow
192 #define EOVERFLOW (75)
193 #endif
194 
195 
196 /* -----------------------------------------------------------
197   Inlined definitions
198 ----------------------------------------------------------- */
199 #define MI_UNUSED(x)     (void)(x)
200 #if (MI_DEBUG>0)
201 #define MI_UNUSED_RELEASE(x)
202 #else
203 #define MI_UNUSED_RELEASE(x)  MI_UNUSED(x)
204 #endif
205 
206 #define MI_INIT4(x)   x(),x(),x(),x()
207 #define MI_INIT8(x)   MI_INIT4(x),MI_INIT4(x)
208 #define MI_INIT16(x)  MI_INIT8(x),MI_INIT8(x)
209 #define MI_INIT32(x)  MI_INIT16(x),MI_INIT16(x)
210 #define MI_INIT64(x)  MI_INIT32(x),MI_INIT32(x)
211 #define MI_INIT128(x) MI_INIT64(x),MI_INIT64(x)
212 #define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x)
213 
214 
215 // Is `x` a power of two? (0 is considered a power of two)
_mi_is_power_of_two(uintptr_t x)216 static inline bool _mi_is_power_of_two(uintptr_t x) {
217   return ((x & (x - 1)) == 0);
218 }
219 
220 // Align upwards
_mi_align_up(uintptr_t sz,size_t alignment)221 static inline uintptr_t _mi_align_up(uintptr_t sz, size_t alignment) {
222   mi_assert_internal(alignment != 0);
223   uintptr_t mask = alignment - 1;
224   if ((alignment & mask) == 0) {  // power of two?
225     return ((sz + mask) & ~mask);
226   }
227   else {
228     return (((sz + mask)/alignment)*alignment);
229   }
230 }
231 
232 // Align downwards
_mi_align_down(uintptr_t sz,size_t alignment)233 static inline uintptr_t _mi_align_down(uintptr_t sz, size_t alignment) {
234   mi_assert_internal(alignment != 0);
235   uintptr_t mask = alignment - 1;
236   if ((alignment & mask) == 0) { // power of two?
237     return (sz & ~mask);
238   }
239   else {
240     return ((sz / alignment) * alignment);
241   }
242 }
243 
244 // Divide upwards: `s <= _mi_divide_up(s,d)*d < s+d`.
_mi_divide_up(uintptr_t size,size_t divider)245 static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) {
246   mi_assert_internal(divider != 0);
247   return (divider == 0 ? size : ((size + divider - 1) / divider));
248 }
249 
250 // Is memory zero initialized?
mi_mem_is_zero(void * p,size_t size)251 static inline bool mi_mem_is_zero(void* p, size_t size) {
252   for (size_t i = 0; i < size; i++) {
253     if (((uint8_t*)p)[i] != 0) return false;
254   }
255   return true;
256 }
257 
258 
259 // Align a byte size to a size in _machine words_,
260 // i.e. byte size == `wsize*sizeof(void*)`.
_mi_wsize_from_size(size_t size)261 static inline size_t _mi_wsize_from_size(size_t size) {
262   mi_assert_internal(size <= SIZE_MAX - sizeof(uintptr_t));
263   return (size + sizeof(uintptr_t) - 1) / sizeof(uintptr_t);
264 }
265 
266 // Does malloc satisfy the alignment constraints already?
mi_malloc_satisfies_alignment(size_t alignment,size_t size)267 static inline bool mi_malloc_satisfies_alignment(size_t alignment, size_t size) {
268   return (alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2)));
269 }
270 
271 // Overflow detecting multiply
272 #if __has_builtin(__builtin_umul_overflow) || (defined(__GNUC__) && (__GNUC__ >= 5))
273 #include <limits.h>      // UINT_MAX, ULONG_MAX
274 #if defined(_CLOCK_T)    // for Illumos
275 #undef _CLOCK_T
276 #endif
mi_mul_overflow(size_t count,size_t size,size_t * total)277 static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
278   #if (SIZE_MAX == ULONG_MAX)
279     return __builtin_umull_overflow(count, size, (unsigned long *)total);
280   #elif (SIZE_MAX == UINT_MAX)
281     return __builtin_umul_overflow(count, size, (unsigned int *)total);
282   #else
283     return __builtin_umulll_overflow(count, size, (unsigned long long *)total);
284   #endif
285 }
286 #else /* __builtin_umul_overflow is unavailable */
mi_mul_overflow(size_t count,size_t size,size_t * total)287 static inline bool mi_mul_overflow(size_t count, size_t size, size_t* total) {
288   #define MI_MUL_NO_OVERFLOW ((size_t)1 << (4*sizeof(size_t)))  // sqrt(SIZE_MAX)
289   *total = count * size;
290   return ((size >= MI_MUL_NO_OVERFLOW || count >= MI_MUL_NO_OVERFLOW)
291     && size > 0 && (SIZE_MAX / size) < count);
292 }
293 #endif
294 
295 // Safe multiply `count*size` into `total`; return `true` on overflow.
mi_count_size_overflow(size_t count,size_t size,size_t * total)296 static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* total) {
297   if (count==1) {  // quick check for the case where count is one (common for C++ allocators)
298     *total = size;
299     return false;
300   }
301   else if (mi_unlikely(mi_mul_overflow(count, size, total))) {
302     _mi_error_message(EOVERFLOW, "allocation request is too large (%zu * %zu bytes)\n", count, size);
303     *total = SIZE_MAX;
304     return true;
305   }
306   else return false;
307 }
308 
309 
310 /* ----------------------------------------------------------------------------------------
311 The thread local default heap: `_mi_get_default_heap` returns the thread local heap.
312 On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a
313 __thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures
314 that the storage will always be available (allocated on the thread stacks).
315 On some platforms though we cannot use that when overriding `malloc` since the underlying
316 TLS implementation (or the loader) will call itself `malloc` on a first access and recurse.
317 We try to circumvent this in an efficient way:
318 - macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the
319            loader itself calls `malloc` even before the modules are initialized.
320 - OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS).
321 - DragonFly: the uniqueid use is buggy but kept for reference.
322 ------------------------------------------------------------------------------------------- */
323 
324 extern const mi_heap_t _mi_heap_empty;  // read-only empty heap, initial value of the thread local default heap
325 extern bool _mi_process_is_initialized;
326 mi_heap_t*  _mi_heap_main_get(void);    // statically allocated main backing heap
327 
328 #if defined(MI_MALLOC_OVERRIDE)
329 #if defined(__APPLE__) // macOS
330 #define MI_TLS_SLOT               89  // seems unused?
331 // #define MI_TLS_RECURSE_GUARD 1
332 // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
333 // see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
334 #elif defined(__OpenBSD__)
335 // use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
336 // see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
337 #define MI_TLS_PTHREAD_SLOT_OFS   (6*sizeof(int) + 4*sizeof(void*) + 24)
338 #elif defined(__DragonFly__)
339 #warning "mimalloc is not working correctly on DragonFly yet."
340 //#define MI_TLS_PTHREAD_SLOT_OFS   (4 + 1*sizeof(void*))  // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
341 #endif
342 #endif
343 
344 #if defined(MI_TLS_SLOT)
345 static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept;   // forward declaration
346 #elif defined(MI_TLS_PTHREAD_SLOT_OFS)
347 #include <pthread.h>
mi_tls_pthread_heap_slot(void)348 static inline mi_heap_t** mi_tls_pthread_heap_slot(void) {
349   pthread_t self = pthread_self();
350   #if defined(__DragonFly__)
351   if (self==NULL) {
352     mi_heap_t* pheap_main = _mi_heap_main_get();
353     return &pheap_main;
354   }
355   #endif
356   return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS);
357 }
358 #elif defined(MI_TLS_PTHREAD)
359 #include <pthread.h>
360 extern pthread_key_t _mi_heap_default_key;
361 #endif
362 
363 // Default heap to allocate from (if not using TLS- or pthread slots).
364 // Do not use this directly but use through `mi_heap_get_default()` (or the unchecked `mi_get_default_heap`).
365 // This thread local variable is only used when neither MI_TLS_SLOT, MI_TLS_PTHREAD, or MI_TLS_PTHREAD_SLOT_OFS are defined.
366 // However, on the Apple M1 we do use the address of this variable as the unique thread-id (issue #356).
367 extern mi_decl_thread mi_heap_t* _mi_heap_default;  // default heap to allocate from
368 
369 
mi_get_default_heap(void)370 static inline mi_heap_t* mi_get_default_heap(void) {
371 #if defined(MI_TLS_SLOT)
372   mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT);
373   if (mi_unlikely(heap == NULL)) { heap = (mi_heap_t*)&_mi_heap_empty; } //_mi_heap_empty_get(); }
374   return heap;
375 #elif defined(MI_TLS_PTHREAD_SLOT_OFS)
376   mi_heap_t* heap = *mi_tls_pthread_heap_slot();
377   return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
378 #elif defined(MI_TLS_PTHREAD)
379   mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
380   return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
381 #else
382   #if defined(MI_TLS_RECURSE_GUARD)
383   if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
384   #endif
385   return _mi_heap_default;
386 #endif
387 }
388 
mi_heap_is_default(const mi_heap_t * heap)389 static inline bool mi_heap_is_default(const mi_heap_t* heap) {
390   return (heap == mi_get_default_heap());
391 }
392 
mi_heap_is_backing(const mi_heap_t * heap)393 static inline bool mi_heap_is_backing(const mi_heap_t* heap) {
394   return (heap->tld->heap_backing == heap);
395 }
396 
mi_heap_is_initialized(mi_heap_t * heap)397 static inline bool mi_heap_is_initialized(mi_heap_t* heap) {
398   mi_assert_internal(heap != NULL);
399   return (heap != &_mi_heap_empty);
400 }
401 
_mi_ptr_cookie(const void * p)402 static inline uintptr_t _mi_ptr_cookie(const void* p) {
403   extern mi_heap_t _mi_heap_main;
404   mi_assert_internal(_mi_heap_main.cookie != 0);
405   return ((uintptr_t)p ^ _mi_heap_main.cookie);
406 }
407 
408 /* -----------------------------------------------------------
409   Pages
410 ----------------------------------------------------------- */
411 
_mi_heap_get_free_small_page(mi_heap_t * heap,size_t size)412 static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t size) {
413   mi_assert_internal(size <= (MI_SMALL_SIZE_MAX + MI_PADDING_SIZE));
414   const size_t idx = _mi_wsize_from_size(size);
415   mi_assert_internal(idx < MI_PAGES_DIRECT);
416   return heap->pages_free_direct[idx];
417 }
418 
419 // Get the page belonging to a certain size class
_mi_get_free_small_page(size_t size)420 static inline mi_page_t* _mi_get_free_small_page(size_t size) {
421   return _mi_heap_get_free_small_page(mi_get_default_heap(), size);
422 }
423 
424 // Segment that contains the pointer
_mi_ptr_segment(const void * p)425 static inline mi_segment_t* _mi_ptr_segment(const void* p) {
426   // mi_assert_internal(p != NULL);
427   return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK);
428 }
429 
mi_slice_to_page(mi_slice_t * s)430 static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) {
431   mi_assert_internal(s->slice_offset== 0 && s->slice_count > 0);
432   return (mi_page_t*)(s);
433 }
434 
mi_page_to_slice(mi_page_t * p)435 static inline mi_slice_t* mi_page_to_slice(mi_page_t* p) {
436   mi_assert_internal(p->slice_offset== 0 && p->slice_count > 0);
437   return (mi_slice_t*)(p);
438 }
439 
440 // Segment belonging to a page
_mi_page_segment(const mi_page_t * page)441 static inline mi_segment_t* _mi_page_segment(const mi_page_t* page) {
442   mi_segment_t* segment = _mi_ptr_segment(page);
443   mi_assert_internal(segment == NULL || ((mi_slice_t*)page >= segment->slices && (mi_slice_t*)page < segment->slices + segment->slice_entries));
444   return segment;
445 }
446 
mi_slice_first(const mi_slice_t * slice)447 static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) {
448   mi_slice_t* start = (mi_slice_t*)((uint8_t*)slice - slice->slice_offset);
449   mi_assert_internal(start >= _mi_ptr_segment(slice)->slices);
450   mi_assert_internal(start->slice_offset == 0);
451   mi_assert_internal(start + start->slice_count > slice);
452   return start;
453 }
454 
455 // Get the page containing the pointer
_mi_segment_page_of(const mi_segment_t * segment,const void * p)456 static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
457   ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
458   mi_assert_internal(diff >= 0 && diff < (ptrdiff_t)MI_SEGMENT_SIZE);
459   size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT;
460   mi_assert_internal(idx < segment->slice_entries);
461   mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx];
462   mi_slice_t* slice = mi_slice_first(slice0);  // adjust to the block that holds the page data
463   mi_assert_internal(slice->slice_offset == 0);
464   mi_assert_internal(slice >= segment->slices && slice < segment->slices + segment->slice_entries);
465   return mi_slice_to_page(slice);
466 }
467 
468 // Quick page start for initialized pages
_mi_page_start(const mi_segment_t * segment,const mi_page_t * page,size_t * page_size)469 static inline uint8_t* _mi_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size) {
470   return _mi_segment_page_start(segment, page, page_size);
471 }
472 
473 // Get the page containing the pointer
_mi_ptr_page(void * p)474 static inline mi_page_t* _mi_ptr_page(void* p) {
475   return _mi_segment_page_of(_mi_ptr_segment(p), p);
476 }
477 
478 // Get the block size of a page (special case for huge objects)
mi_page_block_size(const mi_page_t * page)479 static inline size_t mi_page_block_size(const mi_page_t* page) {
480   const size_t bsize = page->xblock_size;
481   mi_assert_internal(bsize > 0);
482   if (mi_likely(bsize < MI_HUGE_BLOCK_SIZE)) {
483     return bsize;
484   }
485   else {
486     size_t psize;
487     _mi_segment_page_start(_mi_page_segment(page), page, &psize);
488     return psize;
489   }
490 }
491 
492 // Get the usable block size of a page without fixed padding.
493 // This may still include internal padding due to alignment and rounding up size classes.
mi_page_usable_block_size(const mi_page_t * page)494 static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
495   return mi_page_block_size(page) - MI_PADDING_SIZE;
496 }
497 
498 // size of a segment
mi_segment_size(mi_segment_t * segment)499 static inline size_t mi_segment_size(mi_segment_t* segment) {
500   return segment->segment_slices * MI_SEGMENT_SLICE_SIZE;
501 }
502 
mi_segment_end(mi_segment_t * segment)503 static inline uint8_t* mi_segment_end(mi_segment_t* segment) {
504   return (uint8_t*)segment + mi_segment_size(segment);
505 }
506 
507 // Thread free access
mi_page_thread_free(const mi_page_t * page)508 static inline mi_block_t* mi_page_thread_free(const mi_page_t* page) {
509   return (mi_block_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & ~3);
510 }
511 
mi_page_thread_free_flag(const mi_page_t * page)512 static inline mi_delayed_t mi_page_thread_free_flag(const mi_page_t* page) {
513   return (mi_delayed_t)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xthread_free) & 3);
514 }
515 
516 // Heap access
mi_page_heap(const mi_page_t * page)517 static inline mi_heap_t* mi_page_heap(const mi_page_t* page) {
518   return (mi_heap_t*)(mi_atomic_load_relaxed(&((mi_page_t*)page)->xheap));
519 }
520 
mi_page_set_heap(mi_page_t * page,mi_heap_t * heap)521 static inline void mi_page_set_heap(mi_page_t* page, mi_heap_t* heap) {
522   mi_assert_internal(mi_page_thread_free_flag(page) != MI_DELAYED_FREEING);
523   mi_atomic_store_release(&page->xheap,(uintptr_t)heap);
524 }
525 
526 // Thread free flag helpers
mi_tf_block(mi_thread_free_t tf)527 static inline mi_block_t* mi_tf_block(mi_thread_free_t tf) {
528   return (mi_block_t*)(tf & ~0x03);
529 }
mi_tf_delayed(mi_thread_free_t tf)530 static inline mi_delayed_t mi_tf_delayed(mi_thread_free_t tf) {
531   return (mi_delayed_t)(tf & 0x03);
532 }
mi_tf_make(mi_block_t * block,mi_delayed_t delayed)533 static inline mi_thread_free_t mi_tf_make(mi_block_t* block, mi_delayed_t delayed) {
534   return (mi_thread_free_t)((uintptr_t)block | (uintptr_t)delayed);
535 }
mi_tf_set_delayed(mi_thread_free_t tf,mi_delayed_t delayed)536 static inline mi_thread_free_t mi_tf_set_delayed(mi_thread_free_t tf, mi_delayed_t delayed) {
537   return mi_tf_make(mi_tf_block(tf),delayed);
538 }
mi_tf_set_block(mi_thread_free_t tf,mi_block_t * block)539 static inline mi_thread_free_t mi_tf_set_block(mi_thread_free_t tf, mi_block_t* block) {
540   return mi_tf_make(block, mi_tf_delayed(tf));
541 }
542 
543 // are all blocks in a page freed?
544 // note: needs up-to-date used count, (as the `xthread_free` list may not be empty). see `_mi_page_collect_free`.
mi_page_all_free(const mi_page_t * page)545 static inline bool mi_page_all_free(const mi_page_t* page) {
546   mi_assert_internal(page != NULL);
547   return (page->used == 0);
548 }
549 
550 // are there any available blocks?
mi_page_has_any_available(const mi_page_t * page)551 static inline bool mi_page_has_any_available(const mi_page_t* page) {
552   mi_assert_internal(page != NULL && page->reserved > 0);
553   return (page->used < page->reserved || (mi_page_thread_free(page) != NULL));
554 }
555 
556 // are there immediately available blocks, i.e. blocks available on the free list.
mi_page_immediate_available(const mi_page_t * page)557 static inline bool mi_page_immediate_available(const mi_page_t* page) {
558   mi_assert_internal(page != NULL);
559   return (page->free != NULL);
560 }
561 
562 // is more than 7/8th of a page in use?
mi_page_mostly_used(const mi_page_t * page)563 static inline bool mi_page_mostly_used(const mi_page_t* page) {
564   if (page==NULL) return true;
565   uint16_t frac = page->reserved / 8U;
566   return (page->reserved - page->used <= frac);
567 }
568 
mi_page_queue(const mi_heap_t * heap,size_t size)569 static inline mi_page_queue_t* mi_page_queue(const mi_heap_t* heap, size_t size) {
570   return &((mi_heap_t*)heap)->pages[_mi_bin(size)];
571 }
572 
573 
574 
575 //-----------------------------------------------------------
576 // Page flags
577 //-----------------------------------------------------------
mi_page_is_in_full(const mi_page_t * page)578 static inline bool mi_page_is_in_full(const mi_page_t* page) {
579   return page->flags.x.in_full;
580 }
581 
mi_page_set_in_full(mi_page_t * page,bool in_full)582 static inline void mi_page_set_in_full(mi_page_t* page, bool in_full) {
583   page->flags.x.in_full = in_full;
584 }
585 
mi_page_has_aligned(const mi_page_t * page)586 static inline bool mi_page_has_aligned(const mi_page_t* page) {
587   return page->flags.x.has_aligned;
588 }
589 
mi_page_set_has_aligned(mi_page_t * page,bool has_aligned)590 static inline void mi_page_set_has_aligned(mi_page_t* page, bool has_aligned) {
591   page->flags.x.has_aligned = has_aligned;
592 }
593 
594 
595 /* -------------------------------------------------------------------
596 Encoding/Decoding the free list next pointers
597 
598 This is to protect against buffer overflow exploits where the
599 free list is mutated. Many hardened allocators xor the next pointer `p`
600 with a secret key `k1`, as `p^k1`. This prevents overwriting with known
601 values but might be still too weak: if the attacker can guess
602 the pointer `p` this  can reveal `k1` (since `p^k1^p == k1`).
603 Moreover, if multiple blocks can be read as well, the attacker can
604 xor both as `(p1^k1) ^ (p2^k1) == p1^p2` which may reveal a lot
605 about the pointers (and subsequently `k1`).
606 
607 Instead mimalloc uses an extra key `k2` and encodes as `((p^k2)<<<k1)+k1`.
608 Since these operations are not associative, the above approaches do not
609 work so well any more even if the `p` can be guesstimated. For example,
610 for the read case we can subtract two entries to discard the `+k1` term,
611 but that leads to `((p1^k2)<<<k1) - ((p2^k2)<<<k1)` at best.
612 We include the left-rotation since xor and addition are otherwise linear
613 in the lowest bit. Finally, both keys are unique per page which reduces
614 the re-use of keys by a large factor.
615 
616 We also pass a separate `null` value to be used as `NULL` or otherwise
617 `(k2<<<k1)+k1` would appear (too) often as a sentinel value.
618 ------------------------------------------------------------------- */
619 
mi_is_in_same_segment(const void * p,const void * q)620 static inline bool mi_is_in_same_segment(const void* p, const void* q) {
621   return (_mi_ptr_segment(p) == _mi_ptr_segment(q));
622 }
623 
mi_is_in_same_page(const void * p,const void * q)624 static inline bool mi_is_in_same_page(const void* p, const void* q) {
625   mi_segment_t* segment = _mi_ptr_segment(p);
626   if (_mi_ptr_segment(q) != segment) return false;
627   // assume q may be invalid // return (_mi_segment_page_of(segment, p) == _mi_segment_page_of(segment, q));
628   mi_page_t* page = _mi_segment_page_of(segment, p);
629   size_t psize;
630   uint8_t* start = _mi_segment_page_start(segment, page, &psize);
631   return (start <= (uint8_t*)q && (uint8_t*)q < start + psize);
632 }
633 
mi_rotl(uintptr_t x,uintptr_t shift)634 static inline uintptr_t mi_rotl(uintptr_t x, uintptr_t shift) {
635   shift %= MI_INTPTR_BITS;
636   return (shift==0 ? x : ((x << shift) | (x >> (MI_INTPTR_BITS - shift))));
637 }
mi_rotr(uintptr_t x,uintptr_t shift)638 static inline uintptr_t mi_rotr(uintptr_t x, uintptr_t shift) {
639   shift %= MI_INTPTR_BITS;
640   return (shift==0 ? x : ((x >> shift) | (x << (MI_INTPTR_BITS - shift))));
641 }
642 
mi_ptr_decode(const void * null,const mi_encoded_t x,const uintptr_t * keys)643 static inline void* mi_ptr_decode(const void* null, const mi_encoded_t x, const uintptr_t* keys) {
644   void* p = (void*)(mi_rotr(x - keys[0], keys[0]) ^ keys[1]);
645   return (mi_unlikely(p==null) ? NULL : p);
646 }
647 
mi_ptr_encode(const void * null,const void * p,const uintptr_t * keys)648 static inline mi_encoded_t mi_ptr_encode(const void* null, const void* p, const uintptr_t* keys) {
649   uintptr_t x = (uintptr_t)(mi_unlikely(p==NULL) ? null : p);
650   return mi_rotl(x ^ keys[1], keys[0]) + keys[0];
651 }
652 
mi_block_nextx(const void * null,const mi_block_t * block,const uintptr_t * keys)653 static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* block, const uintptr_t* keys ) {
654   #ifdef MI_ENCODE_FREELIST
655   return (mi_block_t*)mi_ptr_decode(null, block->next, keys);
656   #else
657   MI_UNUSED(keys); MI_UNUSED(null);
658   return (mi_block_t*)block->next;
659   #endif
660 }
661 
mi_block_set_nextx(const void * null,mi_block_t * block,const mi_block_t * next,const uintptr_t * keys)662 static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) {
663   #ifdef MI_ENCODE_FREELIST
664   block->next = mi_ptr_encode(null, next, keys);
665   #else
666   MI_UNUSED(keys); MI_UNUSED(null);
667   block->next = (mi_encoded_t)next;
668   #endif
669 }
670 
mi_block_next(const mi_page_t * page,const mi_block_t * block)671 static inline mi_block_t* mi_block_next(const mi_page_t* page, const mi_block_t* block) {
672   #ifdef MI_ENCODE_FREELIST
673   mi_block_t* next = mi_block_nextx(page,block,page->keys);
674   // check for free list corruption: is `next` at least in the same page?
675   // TODO: check if `next` is `page->block_size` aligned?
676   if (mi_unlikely(next!=NULL && !mi_is_in_same_page(block, next))) {
677     _mi_error_message(EFAULT, "corrupted free list entry of size %zub at %p: value 0x%zx\n", mi_page_block_size(page), block, (uintptr_t)next);
678     next = NULL;
679   }
680   return next;
681   #else
682   MI_UNUSED(page);
683   return mi_block_nextx(page,block,NULL);
684   #endif
685 }
686 
mi_block_set_next(const mi_page_t * page,mi_block_t * block,const mi_block_t * next)687 static inline void mi_block_set_next(const mi_page_t* page, mi_block_t* block, const mi_block_t* next) {
688   #ifdef MI_ENCODE_FREELIST
689   mi_block_set_nextx(page,block,next, page->keys);
690   #else
691   MI_UNUSED(page);
692   mi_block_set_nextx(page,block,next,NULL);
693   #endif
694 }
695 
696 
697 // -------------------------------------------------------------------
698 // commit mask
699 // -------------------------------------------------------------------
700 
mi_commit_mask_create_empty(mi_commit_mask_t * cm)701 static inline void mi_commit_mask_create_empty(mi_commit_mask_t* cm) {
702   for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
703     cm->mask[i] = 0;
704   }
705 }
706 
mi_commit_mask_create_full(mi_commit_mask_t * cm)707 static inline void mi_commit_mask_create_full(mi_commit_mask_t* cm) {
708   for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
709     cm->mask[i] = ~((size_t)0);
710   }
711 }
712 
mi_commit_mask_is_empty(const mi_commit_mask_t * cm)713 static inline bool mi_commit_mask_is_empty(const mi_commit_mask_t* cm) {
714   for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
715     if (cm->mask[i] != 0) return false;
716   }
717   return true;
718 }
719 
mi_commit_mask_is_full(const mi_commit_mask_t * cm)720 static inline bool mi_commit_mask_is_full(const mi_commit_mask_t* cm) {
721   for (size_t i = 0; i < MI_COMMIT_MASK_FIELD_COUNT; i++) {
722     if (cm->mask[i] != ~((size_t)0)) return false;
723   }
724   return true;
725 }
726 
727 // defined in `segment.c`:
728 size_t _mi_commit_mask_committed_size(const mi_commit_mask_t* cm, size_t total);
729 size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx);
730 
731 #define mi_commit_mask_foreach(cm,idx,count) \
732   idx = 0; \
733   while ((count = _mi_commit_mask_next_run(cm,&idx)) > 0) {
734 
735 #define mi_commit_mask_foreach_end() \
736     idx += count; \
737   }
738 
739 
740 
741 
742 // -------------------------------------------------------------------
743 // Fast "random" shuffle
744 // -------------------------------------------------------------------
745 
_mi_random_shuffle(uintptr_t x)746 static inline uintptr_t _mi_random_shuffle(uintptr_t x) {
747   if (x==0) { x = 17; }   // ensure we don't get stuck in generating zeros
748 #if (MI_INTPTR_SIZE==8)
749   // by Sebastiano Vigna, see: <http://xoshiro.di.unimi.it/splitmix64.c>
750   x ^= x >> 30;
751   x *= 0xbf58476d1ce4e5b9UL;
752   x ^= x >> 27;
753   x *= 0x94d049bb133111ebUL;
754   x ^= x >> 31;
755 #elif (MI_INTPTR_SIZE==4)
756   // by Chris Wellons, see: <https://nullprogram.com/blog/2018/07/31/>
757   x ^= x >> 16;
758   x *= 0x7feb352dUL;
759   x ^= x >> 15;
760   x *= 0x846ca68bUL;
761   x ^= x >> 16;
762 #endif
763   return x;
764 }
765 
766 // -------------------------------------------------------------------
767 // Optimize numa node access for the common case (= one node)
768 // -------------------------------------------------------------------
769 
770 int    _mi_os_numa_node_get(mi_os_tld_t* tld);
771 size_t _mi_os_numa_node_count_get(void);
772 
773 extern _Atomic(size_t) _mi_numa_node_count;
_mi_os_numa_node(mi_os_tld_t * tld)774 static inline int _mi_os_numa_node(mi_os_tld_t* tld) {
775   if (mi_likely(mi_atomic_load_relaxed(&_mi_numa_node_count) == 1)) return 0;
776   else return _mi_os_numa_node_get(tld);
777 }
_mi_os_numa_node_count(void)778 static inline size_t _mi_os_numa_node_count(void) {
779   const size_t count = mi_atomic_load_relaxed(&_mi_numa_node_count);
780   if (mi_likely(count>0)) return count;
781   else return _mi_os_numa_node_count_get();
782 }
783 
784 
785 // -------------------------------------------------------------------
786 // Getting the thread id should be performant as it is called in the
787 // fast path of `_mi_free` and we specialize for various platforms.
788 // -------------------------------------------------------------------
789 #if defined(_WIN32)
790 #define WIN32_LEAN_AND_MEAN
791 #include <windows.h>
_mi_thread_id(void)792 static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
793   // Windows: works on Intel and ARM in both 32- and 64-bit
794   return (uintptr_t)NtCurrentTeb();
795 }
796 
797 #elif defined(__GNUC__) && \
798       (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))
799 
800 // TLS register on x86 is in the FS or GS register, see: https://akkadia.org/drepper/tls.pdf
mi_tls_slot(size_t slot)801 static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept {
802   void* res;
803   const size_t ofs = (slot*sizeof(void*));
804 #if defined(__i386__)
805   __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // 32-bit always uses GS
806 #elif defined(__APPLE__) && defined(__x86_64__)
807   __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86_64 macOSX uses GS
808 #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
809   __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x32 ABI
810 #elif defined(__x86_64__)
811   __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86_64 Linux, BSD uses FS
812 #elif defined(__arm__)
813   void** tcb; MI_UNUSED(ofs);
814   __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
815   res = tcb[slot];
816 #elif defined(__aarch64__)
817   void** tcb; MI_UNUSED(ofs);
818   #if defined(__APPLE__) // M1, issue #343
819   __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb));
820   tcb = (void**)((uintptr_t)tcb & ~0x07UL);  // clear lower 3 bits
821   #else
822   __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
823   #endif
824   res = tcb[slot];
825 #endif
826   return res;
827 }
828 
829 // setting is only used on macOSX for now
mi_tls_slot_set(size_t slot,void * value)830 static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
831   const size_t ofs = (slot*sizeof(void*));
832 #if defined(__i386__)
833   __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // 32-bit always uses GS
834 #elif defined(__APPLE__) && defined(__x86_64__)
835   __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x86_64 macOSX uses GS
836 #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
837   __asm__("movl %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x32 ABI
838 #elif defined(__x86_64__)
839   __asm__("movq %1,%%fs:%1" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x86_64 Linux, BSD uses FS
840 #elif defined(__arm__)
841   void** tcb; MI_UNUSED(ofs);
842   __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
843   tcb[slot] = value;
844 #elif defined(__aarch64__)
845   void** tcb; MI_UNUSED(ofs);
846   #if defined(__APPLE__) // M1, issue #343
847   __asm__ volatile ("mrs %0, tpidrro_el0" : "=r" (tcb));
848   tcb = (void**)((uintptr_t)tcb & ~0x07UL);  // clear lower 3 bits
849   #else
850   __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
851   #endif
852   tcb[slot] = value;
853 #endif
854 }
855 
_mi_thread_id(void)856 static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
857 #if defined(__BIONIC__) && (defined(__arm__) || defined(__aarch64__))
858   // on Android, slot 1 is the thread ID (pointer to pthread internal struct)
859   return (uintptr_t)mi_tls_slot(1);
860 #else
861   // in all our other targets, slot 0 is the pointer to the thread control block
862   return (uintptr_t)mi_tls_slot(0);
863 #endif
864 }
865 #else
866 // otherwise use standard C
_mi_thread_id(void)867 static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
868   return (uintptr_t)&_mi_heap_default;
869 }
870 #endif
871 
872 // -----------------------------------------------------------------------
873 // Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero)
874 // -----------------------------------------------------------------------
875 
876 #if defined(__GNUC__)
877 
878 #include <limits.h>       // LONG_MAX
879 #define MI_HAVE_FAST_BITSCAN
mi_clz(uintptr_t x)880 static inline size_t mi_clz(uintptr_t x) {
881   if (x==0) return MI_INTPTR_BITS;
882 #if (INTPTR_MAX == LONG_MAX)
883   return __builtin_clzl(x);
884 #else
885   return __builtin_clzll(x);
886 #endif
887 }
mi_ctz(uintptr_t x)888 static inline size_t mi_ctz(uintptr_t x) {
889   if (x==0) return MI_INTPTR_BITS;
890 #if (INTPTR_MAX == LONG_MAX)
891   return __builtin_ctzl(x);
892 #else
893   return __builtin_ctzll(x);
894 #endif
895 }
896 
897 #elif defined(_MSC_VER)
898 
899 #include <limits.h>       // LONG_MAX
900 #define MI_HAVE_FAST_BITSCAN
mi_clz(uintptr_t x)901 static inline size_t mi_clz(uintptr_t x) {
902   if (x==0) return MI_INTPTR_BITS;
903   unsigned long idx;
904 #if (INTPTR_MAX == LONG_MAX)
905   _BitScanReverse(&idx, x);
906 #else
907   _BitScanReverse64(&idx, x);
908 #endif
909   return ((MI_INTPTR_BITS - 1) - idx);
910 }
mi_ctz(uintptr_t x)911 static inline size_t mi_ctz(uintptr_t x) {
912   if (x==0) return MI_INTPTR_BITS;
913   unsigned long idx;
914 #if (INTPTR_MAX == LONG_MAX)
915   _BitScanForward(&idx, x);
916 #else
917   _BitScanForward64(&idx, x);
918 #endif
919   return idx;
920 }
921 
922 #else
mi_ctz32(uint32_t x)923 static inline size_t mi_ctz32(uint32_t x) {
924   // de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
925   static const unsigned char debruijn[32] = {
926     0, 1, 28, 2, 29, 14, 24, 3, 30, 22, 20, 15, 25, 17, 4, 8,
927     31, 27, 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9
928   };
929   if (x==0) return 32;
930   return debruijn[((x & -(int32_t)x) * 0x077CB531UL) >> 27];
931 }
mi_clz32(uint32_t x)932 static inline size_t mi_clz32(uint32_t x) {
933   // de Bruijn multiplication, see <http://supertech.csail.mit.edu/papers/debruijn.pdf>
934   static const uint8_t debruijn[32] = {
935     31, 22, 30, 21, 18, 10, 29, 2, 20, 17, 15, 13, 9, 6, 28, 1,
936     23, 19, 11, 3, 16, 14, 7, 24, 12, 4, 8, 25, 5, 26, 27, 0
937   };
938   if (x==0) return 32;
939   x |= x >> 1;
940   x |= x >> 2;
941   x |= x >> 4;
942   x |= x >> 8;
943   x |= x >> 16;
944   return debruijn[(uint32_t)(x * 0x07C4ACDDUL) >> 27];
945 }
946 
mi_clz(uintptr_t x)947 static inline size_t mi_clz(uintptr_t x) {
948   if (x==0) return MI_INTPTR_BITS;
949 #if (MI_INTPTR_BITS <= 32)
950   return mi_clz32((uint32_t)x);
951 #else
952   size_t count = mi_clz32((uint32_t)(x >> 32));
953   if (count < 32) return count;
954   return (32 + mi_clz32((uint32_t)x));
955 #endif
956 }
mi_ctz(uintptr_t x)957 static inline size_t mi_ctz(uintptr_t x) {
958   if (x==0) return MI_INTPTR_BITS;
959 #if (MI_INTPTR_BITS <= 32)
960   return mi_ctz32((uint32_t)x);
961 #else
962   size_t count = mi_ctz32((uint32_t)x);
963   if (count < 32) return count;
964   return (32 + mi_ctz32((uint32_t)(x>>32)));
965 #endif
966 }
967 
968 #endif
969 
970 // "bit scan reverse": Return index of the highest bit (or MI_INTPTR_BITS if `x` is zero)
mi_bsr(uintptr_t x)971 static inline size_t mi_bsr(uintptr_t x) {
972   return (x==0 ? MI_INTPTR_BITS : MI_INTPTR_BITS - 1 - mi_clz(x));
973 }
974 
975 
976 // ---------------------------------------------------------------------------------
977 // Provide our own `_mi_memcpy` for potential performance optimizations.
978 //
979 // For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
980 // we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
981 // (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
982 // ---------------------------------------------------------------------------------
983 
984 #if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
985 #include <intrin.h>
986 #include <string.h>
987 extern bool _mi_cpu_has_fsrm;
_mi_memcpy(void * dst,const void * src,size_t n)988 static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
989   if (_mi_cpu_has_fsrm) {
990     __movsb((unsigned char*)dst, (const unsigned char*)src, n);
991   }
992   else {
993     memcpy(dst, src, n); // todo: use noinline?
994   }
995 }
996 #else
997 #include <string.h>
_mi_memcpy(void * dst,const void * src,size_t n)998 static inline void _mi_memcpy(void* dst, const void* src, size_t n) {
999   memcpy(dst, src, n);
1000 }
1001 #endif
1002 
1003 
1004 // -------------------------------------------------------------------------------
1005 // The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
1006 // This is used for example in `mi_realloc`.
1007 // -------------------------------------------------------------------------------
1008 
1009 #if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__)
1010 // On GCC/CLang we provide a hint that the pointers are word aligned.
1011 #include <string.h>
_mi_memcpy_aligned(void * dst,const void * src,size_t n)1012 static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
1013   mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
1014   void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE);
1015   const void* asrc = __builtin_assume_aligned(src, MI_INTPTR_SIZE);
1016   memcpy(adst, asrc, n);
1017 }
1018 #else
1019 // Default fallback on `_mi_memcpy`
_mi_memcpy_aligned(void * dst,const void * src,size_t n)1020 static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) {
1021   mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0));
1022   _mi_memcpy(dst, src, n);
1023 }
1024 #endif
1025 
1026 
1027 #endif
1028