1 /* ----------------------------------------------------------------------------
2 Copyright (c) 2019-2021, Microsoft Research, Daan Leijen
3 This is free software; you can redistribute it and/or modify it under the
4 terms of the MIT license. A copy of the license can be found in the file
5 "LICENSE" at the root of this distribution.
6 -----------------------------------------------------------------------------*/
7 
8 /* ----------------------------------------------------------------------------
9 "Arenas" are fixed area's of OS memory from which we can allocate
10 large blocks (>= MI_ARENA_BLOCK_SIZE, 32MiB).
11 In contrast to the rest of mimalloc, the arenas are shared between
12 threads and need to be accessed using atomic operations.
13 
14 Currently arenas are only used to for huge OS page (1GiB) reservations,
15 otherwise it delegates to direct allocation from the OS.
16 In the future, we can expose an API to manually add more kinds of arenas
17 which is sometimes needed for embedded devices or shared memory for example.
18 (We can also employ this with WASI or `sbrk` systems to reserve large arenas
19  on demand and be able to reuse them efficiently).
20 
21 The arena allocation needs to be thread safe and we use an atomic
22 bitmap to allocate. The current implementation of the bitmap can
23 only do this within a field (`uintptr_t`) so we can allocate at most
24 blocks of 2GiB (64*32MiB) and no object can cross the boundary. This
25 can lead to fragmentation but fortunately most objects will be regions
26 of 256MiB in practice.
27 -----------------------------------------------------------------------------*/
28 #include "mimalloc.h"
29 #include "mimalloc-internal.h"
30 #include "mimalloc-atomic.h"
31 
32 #include <string.h>  // memset
33 #include <errno.h> // ENOMEM
34 
35 #include "bitmap.h"  // atomic bitmap
36 
37 
38 // os.c
39 void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats);
40 void  _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
41 void  _mi_os_free(void* p, size_t size, mi_stats_t* stats);
42 
43 void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
44 void  _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
45 
46 bool  _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
47 bool  _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
48 
49 /* -----------------------------------------------------------
50   Arena allocation
51 ----------------------------------------------------------- */
52 
53 #define MI_SEGMENT_ALIGN      MI_SEGMENT_SIZE
54 #define MI_ARENA_BLOCK_SIZE   (4*MI_SEGMENT_ALIGN)     // 32MiB
55 #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 16MiB
56 #define MI_MAX_ARENAS         (64)                     // not more than 256 (since we use 8 bits in the memid)
57 
58 // A memory arena descriptor
59 typedef struct mi_arena_s {
60   _Atomic(uint8_t*) start;                // the start of the memory area
61   size_t   block_count;                   // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
62   size_t   field_count;                   // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
63   int      numa_node;                     // associated NUMA node
64   bool     is_zero_init;                  // is the arena zero initialized?
65   bool     is_committed;                  // is the memory fully committed? (if so, block_committed == NULL)
66   bool     is_large;                      // large- or huge OS pages (always committed)
67   _Atomic(uintptr_t) search_idx;          // optimization to start the search for free blocks
68   mi_bitmap_field_t* blocks_dirty;        // are the blocks potentially non-zero?
69   mi_bitmap_field_t* blocks_committed;    // if `!is_committed`, are the blocks committed?
70   mi_bitmap_field_t  blocks_inuse[1];     // in-place bitmap of in-use blocks (of size `field_count`)
71 } mi_arena_t;
72 
73 
74 // The available arenas
75 static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS];
76 static mi_decl_cache_align _Atomic(uintptr_t)   mi_arena_count; // = 0
77 
78 
79 /* -----------------------------------------------------------
80   Arena allocations get a memory id where the lower 8 bits are
81   the arena index +1, and the upper bits the block index.
82 ----------------------------------------------------------- */
83 
84 // Use `0` as a special id for direct OS allocated memory.
85 #define MI_MEMID_OS   0
86 
mi_arena_id_create(size_t arena_index,mi_bitmap_index_t bitmap_index)87 static size_t mi_arena_id_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
88   mi_assert_internal(arena_index < 0xFE);
89   mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow?
90   return ((bitmap_index << 8) | ((arena_index+1) & 0xFF));
91 }
92 
mi_arena_id_indices(size_t memid,size_t * arena_index,mi_bitmap_index_t * bitmap_index)93 static void mi_arena_id_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
94   mi_assert_internal(memid != MI_MEMID_OS);
95   *arena_index = (memid & 0xFF) - 1;
96   *bitmap_index = (memid >> 8);
97 }
98 
mi_block_count_of_size(size_t size)99 static size_t mi_block_count_of_size(size_t size) {
100   return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE);
101 }
102 
103 /* -----------------------------------------------------------
104   Thread safe allocation in an arena
105 ----------------------------------------------------------- */
mi_arena_alloc(mi_arena_t * arena,size_t blocks,mi_bitmap_index_t * bitmap_idx)106 static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx)
107 {
108   size_t idx = mi_atomic_load_acquire(&arena->search_idx);  // start from last search
109   if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) {
110     mi_atomic_store_release(&arena->search_idx, idx);  // start search from here next time
111     return true;
112   };
113   return false;
114 }
115 
116 
117 /* -----------------------------------------------------------
118   Arena Allocation
119 ----------------------------------------------------------- */
120 
mi_arena_alloc_from(mi_arena_t * arena,size_t arena_index,size_t needed_bcount,bool * commit,bool * large,bool * is_pinned,bool * is_zero,size_t * memid,mi_os_tld_t * tld)121 static void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
122                                  bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
123 {
124   mi_bitmap_index_t bitmap_index;
125   if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
126 
127   // claimed it! set the dirty bits (todo: no need for an atomic op here?)
128   void* p    = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE);
129   *memid     = mi_arena_id_create(arena_index, bitmap_index);
130   *is_zero   = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
131   *large     = arena->is_large;
132   *is_pinned = (arena->is_large || arena->is_committed);
133   if (arena->is_committed) {
134     // always committed
135     *commit = true;
136   }
137   else if (*commit) {
138     // arena not committed as a whole, but commit requested: ensure commit now
139     bool any_uncommitted;
140     _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted);
141     if (any_uncommitted) {
142       bool commit_zero;
143       _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats);
144       if (commit_zero) *is_zero = true;
145     }
146   }
147   else {
148     // no need to commit, but check if already fully committed
149     *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index);
150   }
151   return p;
152 }
153 
_mi_arena_alloc_aligned(size_t size,size_t alignment,bool * commit,bool * large,bool * is_pinned,bool * is_zero,size_t * memid,mi_os_tld_t * tld)154 void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero,
155                               size_t* memid, mi_os_tld_t* tld)
156 {
157   mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL);
158   mi_assert_internal(size > 0);
159   *memid   = MI_MEMID_OS;
160   *is_zero = false;
161   *is_pinned = false;
162 
163   // try to allocate in an arena if the alignment is small enough
164   // and the object is not too large or too small.
165   if (alignment <= MI_SEGMENT_ALIGN &&
166       size >= MI_ARENA_MIN_OBJ_SIZE &&
167       mi_atomic_load_relaxed(&mi_arena_count) > 0)
168   {
169     const size_t bcount = mi_block_count_of_size(size);
170     const int numa_node = _mi_os_numa_node(tld); // current numa node
171 
172     mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
173     // try numa affine allocation
174     for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
175       mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
176       if (arena==NULL) break; // end reached
177       if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
178           (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
179       {
180         void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld);
181         mi_assert_internal((uintptr_t)p % alignment == 0);
182         if (p != NULL) return p;
183       }
184     }
185     // try from another numa node instead..
186     for (size_t i = 0; i < MI_MAX_ARENAS; i++) {
187       mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
188       if (arena==NULL) break; // end reached
189       if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
190           (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
191       {
192         void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld);
193         mi_assert_internal((uintptr_t)p % alignment == 0);
194         if (p != NULL) return p;
195       }
196     }
197   }
198 
199   // finally, fall back to the OS
200   if (mi_option_is_enabled(mi_option_limit_os_alloc)) {
201     errno = ENOMEM;
202     return NULL;
203   }
204   *is_zero = true;
205   *memid   = MI_MEMID_OS;
206   void* p = _mi_os_alloc_aligned(size, alignment, *commit, large, tld->stats);
207   if (p != NULL) *is_pinned = *large;
208   return p;
209 }
210 
_mi_arena_alloc(size_t size,bool * commit,bool * large,bool * is_pinned,bool * is_zero,size_t * memid,mi_os_tld_t * tld)211 void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
212 {
213   return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_pinned, is_zero, memid, tld);
214 }
215 
216 /* -----------------------------------------------------------
217   Arena free
218 ----------------------------------------------------------- */
219 
_mi_arena_free(void * p,size_t size,size_t memid,bool all_committed,mi_stats_t * stats)220 void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats) {
221   mi_assert_internal(size > 0 && stats != NULL);
222   if (p==NULL) return;
223   if (size==0) return;
224   if (memid == MI_MEMID_OS) {
225     // was a direct OS allocation, pass through
226     _mi_os_free_ex(p, size, all_committed, stats);
227   }
228   else {
229     // allocated in an arena
230     size_t arena_idx;
231     size_t bitmap_idx;
232     mi_arena_id_indices(memid, &arena_idx, &bitmap_idx);
233     mi_assert_internal(arena_idx < MI_MAX_ARENAS);
234     mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]);
235     mi_assert_internal(arena != NULL);
236     const size_t blocks = mi_block_count_of_size(size);
237     // checks
238     if (arena == NULL) {
239       _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid);
240       return;
241     }
242     mi_assert_internal(arena->field_count > mi_bitmap_index_field(bitmap_idx));
243     if (arena->field_count <= mi_bitmap_index_field(bitmap_idx)) {
244       _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid);
245       return;
246     }
247     // potentially decommit
248     if (arena->is_committed) {
249       mi_assert_internal(all_committed);
250     }
251     else {
252       mi_assert_internal(arena->blocks_committed != NULL);
253       _mi_os_decommit(p, blocks * MI_ARENA_BLOCK_SIZE, stats); // ok if this fails
254       _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
255     }
256     // and make it available to others again
257     bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx);
258     if (!all_inuse) {
259       _mi_error_message(EAGAIN, "trying to free an already freed block: %p, size %zu\n", p, size);
260       return;
261     };
262   }
263 }
264 
265 /* -----------------------------------------------------------
266   Add an arena.
267 ----------------------------------------------------------- */
268 
mi_arena_add(mi_arena_t * arena)269 static bool mi_arena_add(mi_arena_t* arena) {
270   mi_assert_internal(arena != NULL);
271   mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0);
272   mi_assert_internal(arena->block_count > 0);
273 
274   uintptr_t i = mi_atomic_increment_acq_rel(&mi_arena_count);
275   if (i >= MI_MAX_ARENAS) {
276     mi_atomic_decrement_acq_rel(&mi_arena_count);
277     return false;
278   }
279   mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena);
280   return true;
281 }
282 
mi_manage_os_memory(void * start,size_t size,bool is_committed,bool is_large,bool is_zero,int numa_node)283 bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept
284 {
285   if (is_large) {
286     mi_assert_internal(is_committed);
287     is_committed = true;
288   }
289 
290   const size_t bcount = mi_block_count_of_size(size);
291   const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
292   const size_t bitmaps = (is_committed ? 2 : 3);
293   const size_t asize  = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
294   mi_arena_t* arena   = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
295   if (arena == NULL) return false;
296 
297   arena->block_count = bcount;
298   arena->field_count = fields;
299   arena->start = (uint8_t*)start;
300   arena->numa_node    = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1)
301   arena->is_large     = is_large;
302   arena->is_zero_init = is_zero;
303   arena->is_committed = is_committed;
304   arena->search_idx   = 0;
305   arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap
306   arena->blocks_committed = (is_committed ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap
307   // the bitmaps are already zero initialized due to os_alloc
308   // just claim leftover blocks if needed
309   ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount;
310   mi_assert_internal(post >= 0);
311   if (post > 0) {
312     // don't use leftover bits at the end
313     mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post);
314     _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
315   }
316 
317   mi_arena_add(arena);
318   return true;
319 }
320 
321 // Reserve a range of regular OS memory
mi_reserve_os_memory(size_t size,bool commit,bool allow_large)322 int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept
323 {
324   size = _mi_os_good_alloc_size(size);
325   bool large = allow_large;
326   void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &_mi_stats_main);
327   if (start==NULL) return ENOMEM;
328   if (!mi_manage_os_memory(start, size, (large || commit), large, true, -1)) {
329     _mi_os_free_ex(start, size, commit, &_mi_stats_main);
330     _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024));
331     return ENOMEM;
332   }
333   _mi_verbose_message("reserved %zu kb memory%s\n", _mi_divide_up(size,1024), large ? " (in large os pages)" : "");
334   return 0;
335 }
336 
337 
338 /* -----------------------------------------------------------
339   Reserve a huge page arena.
340 ----------------------------------------------------------- */
341 // reserve at a specific numa node
mi_reserve_huge_os_pages_at(size_t pages,int numa_node,size_t timeout_msecs)342 int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept {
343   if (pages==0) return 0;
344   if (numa_node < -1) numa_node = -1;
345   if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
346   size_t hsize = 0;
347   size_t pages_reserved = 0;
348   void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize);
349   if (p==NULL || pages_reserved==0) {
350     _mi_warning_message("failed to reserve %zu gb huge pages\n", pages);
351     return ENOMEM;
352   }
353   _mi_verbose_message("numa node %i: reserved %zu gb huge pages (of the %zu gb requested)\n", numa_node, pages_reserved, pages);
354 
355   if (!mi_manage_os_memory(p, hsize, true, true, true, numa_node)) {
356     _mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
357     return ENOMEM;
358   }
359   return 0;
360 }
361 
362 
363 // reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected)
mi_reserve_huge_os_pages_interleave(size_t pages,size_t numa_nodes,size_t timeout_msecs)364 int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept {
365   if (pages == 0) return 0;
366 
367   // pages per numa node
368   size_t numa_count = (numa_nodes > 0 ? numa_nodes : _mi_os_numa_node_count());
369   if (numa_count <= 0) numa_count = 1;
370   const size_t pages_per = pages / numa_count;
371   const size_t pages_mod = pages % numa_count;
372   const size_t timeout_per = (timeout_msecs==0 ? 0 : (timeout_msecs / numa_count) + 50);
373 
374   // reserve evenly among numa nodes
375   for (size_t numa_node = 0; numa_node < numa_count && pages > 0; numa_node++) {
376     size_t node_pages = pages_per;  // can be 0
377     if (numa_node < pages_mod) node_pages++;
378     int err = mi_reserve_huge_os_pages_at(node_pages, (int)numa_node, timeout_per);
379     if (err) return err;
380     if (pages < node_pages) {
381       pages = 0;
382     }
383     else {
384       pages -= node_pages;
385     }
386   }
387 
388   return 0;
389 }
390 
mi_reserve_huge_os_pages(size_t pages,double max_secs,size_t * pages_reserved)391 int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserved) mi_attr_noexcept {
392   UNUSED(max_secs);
393   _mi_warning_message("mi_reserve_huge_os_pages is deprecated: use mi_reserve_huge_os_pages_interleave/at instead\n");
394   if (pages_reserved != NULL) *pages_reserved = 0;
395   int err = mi_reserve_huge_os_pages_interleave(pages, 0, (size_t)(max_secs * 1000.0));
396   if (err==0 && pages_reserved!=NULL) *pages_reserved = pages;
397   return err;
398 }
399