1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2019-2021, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7
8 /* ----------------------------------------------------------------------------
9 "Arenas" are fixed area's of OS memory from which we can allocate
10 large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB).
11 In contrast to the rest of mimalloc, the arenas are shared between
12 threads and need to be accessed using atomic operations.
13
14 Currently arenas are only used to for huge OS page (1GiB) reservations,
15 otherwise it delegates to direct allocation from the OS.
16 In the future, we can expose an API to manually add more kinds of arenas
17 which is sometimes needed for embedded devices or shared memory for example.
18 (We can also employ this with WASI or `sbrk` systems to reserve large arenas
19 on demand and be able to reuse them efficiently).
20
21 The arena allocation needs to be thread safe and we use an atomic
22 bitmap to allocate. The current implementation of the bitmap can
23 only do this within a field (`uintptr_t`) so we can allocate at most
24 blocks of 2GiB (64*32MiB) and no object can cross the boundary. This
25 can lead to fragmentation but fortunately most objects will be regions
26 of 256MiB in practice.
27 -----------------------------------------------------------------------------*/
28 #include "mimalloc.h"
29 #include "mimalloc-internal.h"
30 #include "mimalloc-atomic.h"
31
32 #include <string.h> // memset
33 #include <errno.h> // ENOMEM
34
35 #include "bitmap.h" // atomic bitmap
36
37
38 // os.c
39 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats);
40 void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
41 void _mi_os_free(void* p, size_t size, mi_stats_t* stats);
42
43 void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
44 void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
45
46 bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
47 bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
48
49 /* -----------------------------------------------------------
50 Arena allocation
51 ----------------------------------------------------------- */
52
53 #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
54 #define MI_ARENA_BLOCK_SIZE (4*MI_SEGMENT_ALIGN) // 32MiB
55 #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 16MiB
56 #define MI_MAX_ARENAS (64) // not more than 256 (since we use 8 bits in the memid)
57
58 // A memory arena descriptor
59 typedef struct mi_arena_s {
60 _Atomic(uint8_t*) start; // the start of the memory area
61 size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
62 size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
63 int numa_node; // associated NUMA node
64 bool is_zero_init; // is the arena zero initialized?
65 bool is_committed; // is the memory fully committed? (if so, block_committed == NULL)
66 bool is_large; // large- or huge OS pages (always committed)
67 _Atomic(uintptr_t) search_idx; // optimization to start the search for free blocks
68 mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero?
69 mi_bitmap_field_t* blocks_committed; // if `!is_committed`, are the blocks committed?
70 mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`)
71 } mi_arena_t;
72
73
74 // The available arenas
75 static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
76 static mi_decl_cache_align _Atomic(uintptr_t) mi_arena_count; // = 0
77
78
79 /* -----------------------------------------------------------
80 Arena allocations get a memory id where the lower 8 bits are
81 the arena index +1, and the upper bits the block index.
82 ----------------------------------------------------------- */
83
84 // Use `0` as a special id for direct OS allocated memory.
85 #define MI_MEMID_OS 0
86
mi_arena_id_create(size_t arena_index,mi_bitmap_index_t bitmap_index)87 static size_t mi_arena_id_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
88 mi_assert_internal(arena_index < 0xFE);
89 mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow?
90 return ((bitmap_index << 8) | ((arena_index+1) & 0xFF));
91 }
92
mi_arena_id_indices(size_t memid,size_t * arena_index,mi_bitmap_index_t * bitmap_index)93 static void mi_arena_id_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
94 mi_assert_internal(memid != MI_MEMID_OS);
95 *arena_index = (memid & 0xFF) - 1;
96 *bitmap_index = (memid >> 8);
97 }
98
mi_block_count_of_size(size_t size)99 static size_t mi_block_count_of_size(size_t size) {
100 return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE);
101 }
102
103 /* -----------------------------------------------------------
104 Thread safe allocation in an arena
105 ----------------------------------------------------------- */
mi_arena_alloc(mi_arena_t * arena,size_t blocks,mi_bitmap_index_t * bitmap_idx)106 static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
107 {
108 size_t idx = mi_atomic_load_acquire(&arena->search_idx); // start from last search
109 if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) {
110 mi_atomic_store_release(&arena->search_idx, idx); // start search from here next time
111 return true;
112 };
113 return false;
114 }
115
116
117 /* -----------------------------------------------------------
118 Arena Allocation
119 ----------------------------------------------------------- */
120
mi_arena_alloc_from(mi_arena_t * arena,size_t arena_index,size_t needed_bcount,bool * commit,bool * large,bool * is_pinned,bool * is_zero,size_t * memid,mi_os_tld_t * tld)121 static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
122 bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
123 {
124 mi_bitmap_index_t bitmap_index;
125 if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
126
127 // claimed it! set the dirty bits (todo: no need for an atomic op here?)
128 void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE);
129 *memid = mi_arena_id_create(arena_index, bitmap_index);
130 *is_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
131 *large = arena->is_large;
132 *is_pinned = (arena->is_large || arena->is_committed);
133 if (arena->is_committed) {
134 // always committed
135 *commit = true;
136 }
137 else if (*commit) {
138 // arena not committed as a whole, but commit requested: ensure commit now
139 bool any_uncommitted;
140 _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
141 if (any_uncommitted) {
142 bool commit_zero;
143 _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats);
144 if (commit_zero) *is_zero = true;
145 }
146 }
147 else {
148 // no need to commit, but check if already fully committed
149 *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
150 }
151 return p;
152 }
153
_mi_arena_alloc_aligned(size_t size,size_t alignment,bool * commit,bool * large,bool * is_pinned,bool * is_zero,size_t * memid,mi_os_tld_t * tld)154 void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero,
155 size_t* memid, mi_os_tld_t* tld)
156 {
157 mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL);
158 mi_assert_internal(size > 0);
159 *memid = MI_MEMID_OS;
160 *is_zero = false;
161 *is_pinned = false;
162
163 // try to allocate in an arena if the alignment is small enough
164 // and the object is not too large or too small.
165 if (alignment <= MI_SEGMENT_ALIGN &&
166 size >= MI_ARENA_MIN_OBJ_SIZE &&
167 mi_atomic_load_relaxed(&mi_arena_count) > 0)
168 {
169 const size_t bcount = mi_block_count_of_size(size);
170 const int numa_node = _mi_os_numa_node(tld); // current numa node
171
172 mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
173 // try numa affine allocation
174 for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
175 mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
176 if (arena==NULL) break; // end reached
177 if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
178 (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
179 {
180 void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld);
181 mi_assert_internal((uintptr_t)p % alignment == 0);
182 if (p != NULL) return p;
183 }
184 }
185 // try from another numa node instead..
186 for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
187 mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
188 if (arena==NULL) break; // end reached
189 if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
190 (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
191 {
192 void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld);
193 mi_assert_internal((uintptr_t)p % alignment == 0);
194 if (p != NULL) return p;
195 }
196 }
197 }
198
199 // finally, fall back to the OS
200 if (mi_option_is_enabled(mi_option_limit_os_alloc)) {
201 errno = ENOMEM;
202 return NULL;
203 }
204 *is_zero = true;
205 *memid = MI_MEMID_OS;
206 void* p = _mi_os_alloc_aligned(size, alignment, *commit, large, tld->stats);
207 if (p != NULL) *is_pinned = *large;
208 return p;
209 }
210
_mi_arena_alloc(size_t size,bool * commit,bool * large,bool * is_pinned,bool * is_zero,size_t * memid,mi_os_tld_t * tld)211 void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
212 {
213 return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_pinned, is_zero, memid, tld);
214 }
215
216 /* -----------------------------------------------------------
217 Arena free
218 ----------------------------------------------------------- */
219
_mi_arena_free(void * p,size_t size,size_t memid,bool all_committed,mi_stats_t * stats)220 void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats) {
221 mi_assert_internal(size > 0 && stats != NULL);
222 if (p==NULL) return;
223 if (size==0) return;
224 if (memid == MI_MEMID_OS) {
225 // was a direct OS allocation, pass through
226 _mi_os_free_ex(p, size, all_committed, stats);
227 }
228 else {
229 // allocated in an arena
230 size_t arena_idx;
231 size_t bitmap_idx;
232 mi_arena_id_indices(memid, &arena_idx, &bitmap_idx);
233 mi_assert_internal(arena_idx < MI_MAX_ARENAS);
234 mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]);
235 mi_assert_internal(arena != NULL);
236 const size_t blocks = mi_block_count_of_size(size);
237 // checks
238 if (arena == NULL) {
239 _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
240 return;
241 }
242 mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx));
243 if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) {
244 _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
245 return;
246 }
247 // potentially decommit
248 if (arena->is_committed) {
249 mi_assert_internal(all_committed);
250 }
251 else {
252 mi_assert_internal(arena->blocks_committed != NULL);
253 _mi_os_decommit(p, blocks * MI_ARENA_BLOCK_SIZE, stats); // ok if this fails
254 _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
255 }
256 // and make it available to others again
257 bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx);
258 if (!all_inuse) {
259 _mi_error_message(EAGAIN, "trying to free an already freed block: %p, size %zu\n", p, size);
260 return;
261 };
262 }
263 }
264
265 /* -----------------------------------------------------------
266 Add an arena.
267 ----------------------------------------------------------- */
268
mi_arena_add(mi_arena_t * arena)269 static bool mi_arena_add(mi_arena_t* arena) {
270 mi_assert_internal(arena != NULL);
271 mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0);
272 mi_assert_internal(arena->block_count > 0);
273
274 uintptr_t i = mi_atomic_increment_acq_rel(&mi_arena_count);
275 if (i >= MI_MAX_ARENAS) {
276 mi_atomic_decrement_acq_rel(&mi_arena_count);
277 return false;
278 }
279 mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena);
280 return true;
281 }
282
mi_manage_os_memory(void * start,size_t size,bool is_committed,bool is_large,bool is_zero,int numa_node)283 bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept
284 {
285 if (is_large) {
286 mi_assert_internal(is_committed);
287 is_committed = true;
288 }
289
290 const size_t bcount = mi_block_count_of_size(size);
291 const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
292 const size_t bitmaps = (is_committed ? 2 : 3);
293 const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
294 mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
295 if (arena == NULL) return false;
296
297 arena->block_count = bcount;
298 arena->field_count = fields;
299 arena->start = (uint8_t*)start;
300 arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
301 arena->is_large = is_large;
302 arena->is_zero_init = is_zero;
303 arena->is_committed = is_committed;
304 arena->search_idx = 0;
305 arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
306 arena->blocks_committed = (is_committed ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap
307 // the bitmaps are already zero initialized due to os_alloc
308 // just claim leftover blocks if needed
309 ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
310 mi_assert_internal(post >= 0);
311 if (post > 0) {
312 // don't use leftover bits at the end
313 mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
314 _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
315 }
316
317 mi_arena_add(arena);
318 return true;
319 }
320
321 // Reserve a range of regular OS memory
mi_reserve_os_memory(size_t size,bool commit,bool allow_large)322 int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept
323 {
324 size = _mi_os_good_alloc_size(size);
325 bool large = allow_large;
326 void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &_mi_stats_main);
327 if (start==NULL) return ENOMEM;
328 if (!mi_manage_os_memory(start, size, (large || commit), large, true, -1)) {
329 _mi_os_free_ex(start, size, commit, &_mi_stats_main);
330 _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024));
331 return ENOMEM;
332 }
333 _mi_verbose_message("reserved %zu kb memory%s\n", _mi_divide_up(size,1024), large ? " (in large os pages)" : "");
334 return 0;
335 }
336
337
338 /* -----------------------------------------------------------
339 Reserve a huge page arena.
340 ----------------------------------------------------------- */
341 // reserve at a specific numa node
mi_reserve_huge_os_pages_at(size_t pages,int numa_node,size_t timeout_msecs)342 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept {
343 if (pages==0) return 0;
344 if (numa_node < -1) numa_node = -1;
345 if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
346 size_t hsize = 0;
347 size_t pages_reserved = 0;
348 void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize);
349 if (p==NULL || pages_reserved==0) {
350 _mi_warning_message("failed to reserve %zu gb huge pages\n", pages);
351 return ENOMEM;
352 }
353 _mi_verbose_message("numa node %i: reserved %zu gb huge pages (of the %zu gb requested)\n", numa_node, pages_reserved, pages);
354
355 if (!mi_manage_os_memory(p, hsize, true, true, true, numa_node)) {
356 _mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
357 return ENOMEM;
358 }
359 return 0;
360 }
361
362
363 // reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected)
mi_reserve_huge_os_pages_interleave(size_t pages,size_t numa_nodes,size_t timeout_msecs)364 int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept {
365 if (pages == 0) return 0;
366
367 // pages per numa node
368 size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count());
369 if (numa_count <= 0) numa_count = 1;
370 const size_t pages_per = pages / numa_count;
371 const size_t pages_mod = pages % numa_count;
372 const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
373
374 // reserve evenly among numa nodes
375 for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
376 size_t node_pages = pages_per; // can be 0
377 if (numa_node < pages_mod) node_pages++;
378 int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per);
379 if (err) return err;
380 if (pages < node_pages) {
381 pages = 0;
382 }
383 else {
384 pages -= node_pages;
385 }
386 }
387
388 return 0;
389 }
390
mi_reserve_huge_os_pages(size_t pages,double max_secs,size_t * pages_reserved)391 int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
392 UNUSED(max_secs);
393 _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
394 if (pages_reserved != NULL) *pages_reserved = 0;
395 int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
396 if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
397 return err;
398 }
399