1 /*
2 * %CopyrightBegin%
3 *
4 * Copyright Ericsson AB 2002-2018. All Rights Reserved.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * %CopyrightEnd%
19 */
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23
24 #define ERTS_WANT_MEM_MAPPERS
25 #include "sys.h"
26 #include "erl_process.h"
27 #include "atom.h"
28 #include "erl_mmap.h"
29 #include <stddef.h>
30
31 #ifdef HAVE_SYS_MMAN_H
32 #include <sys/mman.h>
33 #endif
34
erts_mem_guard(void * p,UWord size)35 int erts_mem_guard(void *p, UWord size) {
36 #if defined(WIN32)
37 DWORD oldProtect;
38 BOOL success;
39
40 success = VirtualProtect((LPVOID*)p,
41 size,
42 PAGE_NOACCESS,
43 &oldProtect);
44
45 return success ? 0 : -1;
46 #elif defined(HAVE_SYS_MMAN_H)
47 return mprotect(p, size, PROT_NONE);
48 #else
49 errno = ENOTSUP;
50 return -1;
51 #endif
52 }
53
54 #if HAVE_ERTS_MMAP
55
56 /* #define ERTS_MMAP_OP_RINGBUF_SZ 100 */
57
58 #if defined(DEBUG) || 0
59 # undef ERTS_MMAP_DEBUG
60 # define ERTS_MMAP_DEBUG
61 # ifndef ERTS_MMAP_OP_RINGBUF_SZ
62 # define ERTS_MMAP_OP_RINGBUF_SZ 100
63 # endif
64 #endif
65
66 #ifndef ERTS_MMAP_OP_RINGBUF_SZ
67 # define ERTS_MMAP_OP_RINGBUF_SZ 0
68 #endif
69
70 /* #define ERTS_MMAP_DEBUG_FILL_AREAS */
71
72 #ifdef ERTS_MMAP_DEBUG
73 # define ERTS_MMAP_ASSERT ERTS_ASSERT
74 #else
75 # define ERTS_MMAP_ASSERT(A) ((void) 1)
76 #endif
77
78 /*
79 * `mm->sa.bot` and `mm->sua.top` are read only after
80 * initialization, but the other pointers are not; i.e., only
81 * ERTS_MMAP_IN_SUPERCARRIER() is allowed without the mutex held.
82 */
83 #define ERTS_MMAP_IN_SUPERCARRIER(PTR) \
84 (((UWord) (PTR)) - ((UWord) mm->sa.bot) \
85 < ((UWord) mm->sua.top) - ((UWord) mm->sa.bot))
86 #define ERTS_MMAP_IN_SUPERALIGNED_AREA(PTR) \
87 (ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&mm->mtx)), \
88 (((UWord) (PTR)) - ((UWord) mm->sa.bot) \
89 < ((UWord) mm->sa.top) - ((UWord) mm->sa.bot)))
90 #define ERTS_MMAP_IN_SUPERUNALIGNED_AREA(PTR) \
91 (ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&mm->mtx)), \
92 (((UWord) (PTR)) - ((UWord) mm->sua.bot) \
93 < ((UWord) mm->sua.top) - ((UWord) mm->sua.bot)))
94
95 UWord erts_page_inv_mask;
96
97 #if defined(DEBUG) || defined(ERTS_MMAP_DEBUG)
98 # undef RBT_DEBUG
99 # define RBT_DEBUG
100 #endif
101 #ifdef RBT_DEBUG
102 # define RBT_ASSERT ERTS_ASSERT
103 # define IF_RBT_DEBUG(C) C
104 #else
105 # define RBT_ASSERT(x)
106 # define IF_RBT_DEBUG(C)
107 #endif
108
109 typedef struct RBTNode_ RBTNode;
110 struct RBTNode_ {
111 UWord parent_and_color; /* color in bit 0 of parent ptr */
112 RBTNode *left;
113 RBTNode *right;
114 };
115
116 #define RED_FLG (1)
117 #define IS_RED(N) ((N) && ((N)->parent_and_color & RED_FLG))
118 #define IS_BLACK(N) (!IS_RED(N))
119 #define SET_RED(N) ((N)->parent_and_color |= RED_FLG)
120 #define SET_BLACK(N) ((N)->parent_and_color &= ~RED_FLG)
121
parent(RBTNode * node)122 static ERTS_INLINE RBTNode* parent(RBTNode* node)
123 {
124 return (RBTNode*) (node->parent_and_color & ~RED_FLG);
125 }
126
set_parent(RBTNode * node,RBTNode * parent)127 static ERTS_INLINE void set_parent(RBTNode* node, RBTNode* parent)
128 {
129 RBT_ASSERT(!((UWord)parent & RED_FLG));
130 node->parent_and_color = ((UWord)parent) | (node->parent_and_color & RED_FLG);
131 }
132
parent_and_color(RBTNode * parent,int color)133 static ERTS_INLINE UWord parent_and_color(RBTNode* parent, int color)
134 {
135 RBT_ASSERT(!((UWord)parent & RED_FLG));
136 RBT_ASSERT(!(color & ~RED_FLG));
137 return ((UWord)parent) | color;
138 }
139
140
141 enum SortOrder {
142 ADDR_ORDER, /* only address order */
143 SA_SZ_ADDR_ORDER, /* first super-aligned size then address order */
144 SZ_REVERSE_ADDR_ORDER /* first size then reverse address order */
145 };
146 #ifdef HARD_DEBUG
147 static const char* sort_order_names[] = {"Address","SuperAlignedSize-Address","Size-RevAddress"};
148 #endif
149
150 typedef struct {
151 RBTNode* root;
152 enum SortOrder order;
153 }RBTree;
154
155 #ifdef HARD_DEBUG
156 # define HARD_CHECK_IS_MEMBER(ROOT,NODE) rbt_assert_is_member(ROOT,NODE)
157 # define HARD_CHECK_TREE(TREE,SZ) check_tree(TREE, SZ)
158 static int rbt_assert_is_member(RBTNode* root, RBTNode* node);
159 static RBTNode* check_tree(RBTree* tree, Uint);
160 #else
161 # define HARD_CHECK_IS_MEMBER(ROOT,NODE)
162 # define HARD_CHECK_TREE(TREE,SZ)
163 #endif
164
165 #if ERTS_MMAP_OP_RINGBUF_SZ
166
167 static int mmap_op_ix;
168
169 typedef enum {
170 ERTS_OP_TYPE_NONE,
171 ERTS_OP_TYPE_MMAP,
172 ERTS_OP_TYPE_MUNMAP,
173 ERTS_OP_TYPE_MREMAP
174 } ErtsMMapOpType;
175
176 typedef struct {
177 ErtsMMapOpType type;
178 void *result;
179 UWord in_size;
180 UWord out_size;
181 void *old_ptr;
182 UWord old_size;
183 } ErtsMMapOp;
184
185 static ErtsMMapOp mmap_ops[ERTS_MMAP_OP_RINGBUF_SZ];
186
187 #define ERTS_MMAP_OP_RINGBUF_INIT() \
188 do { \
189 int ix__; \
190 for (ix__ = 0; ix__ < ERTS_MMAP_OP_RINGBUF_SZ; ix__++) {\
191 mmap_ops[ix__].type = ERTS_OP_TYPE_NONE; \
192 mmap_ops[ix__].result = NULL; \
193 mmap_ops[ix__].in_size = 0; \
194 mmap_ops[ix__].out_size = 0; \
195 mmap_ops[ix__].old_ptr = NULL; \
196 mmap_ops[ix__].old_size = 0; \
197 } \
198 mmap_op_ix = ERTS_MMAP_OP_RINGBUF_SZ-1; \
199 } while (0)
200
201 #define ERTS_MMAP_OP_START(SZ) \
202 do { \
203 int ix__; \
204 if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \
205 mmap_op_ix = 0; \
206 ix__ = mmap_op_ix; \
207 mmap_ops[ix__].type = ERTS_OP_TYPE_MMAP; \
208 mmap_ops[ix__].result = NULL; \
209 mmap_ops[ix__].in_size = (SZ); \
210 mmap_ops[ix__].out_size = 0; \
211 mmap_ops[ix__].old_ptr = NULL; \
212 mmap_ops[ix__].old_size = 0; \
213 } while (0)
214
215 #define ERTS_MMAP_OP_END(PTR, SZ) \
216 do { \
217 int ix__ = mmap_op_ix; \
218 mmap_ops[ix__].result = (PTR); \
219 mmap_ops[ix__].out_size = (SZ); \
220 } while (0)
221
222 #define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ) \
223 do { \
224 erts_mtx_lock(&mm->mtx); \
225 ERTS_MMAP_OP_START((IN_SZ)); \
226 ERTS_MMAP_OP_END((RES), (OUT_SZ)); \
227 erts_mtx_unlock(&mm->mtx); \
228 } while (0)
229
230 #define ERTS_MUNMAP_OP(PTR, SZ) \
231 do { \
232 int ix__; \
233 if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \
234 mmap_op_ix = 0; \
235 ix__ = mmap_op_ix; \
236 mmap_ops[ix__].type = ERTS_OP_TYPE_MUNMAP; \
237 mmap_ops[ix__].result = NULL; \
238 mmap_ops[ix__].in_size = 0; \
239 mmap_ops[ix__].out_size = 0; \
240 mmap_ops[ix__].old_ptr = (PTR); \
241 mmap_ops[ix__].old_size = (SZ); \
242 } while (0)
243
244 #define ERTS_MUNMAP_OP_LCK(PTR, SZ) \
245 do { \
246 erts_mtx_lock(&mm->mtx); \
247 ERTS_MUNMAP_OP((PTR), (SZ)); \
248 erts_mtx_unlock(&mm->mtx); \
249 } while (0)
250
251 #define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ) \
252 do { \
253 int ix__; \
254 if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \
255 mmap_op_ix = 0; \
256 ix__ = mmap_op_ix; \
257 mmap_ops[ix__].type = ERTS_OP_TYPE_MREMAP; \
258 mmap_ops[ix__].result = NULL; \
259 mmap_ops[ix__].in_size = (IN_SZ); \
260 mmap_ops[ix__].out_size = (OLD_SZ); \
261 mmap_ops[ix__].old_ptr = (OLD_PTR); \
262 mmap_ops[ix__].old_size = (OLD_SZ); \
263 } while (0)
264
265 #define ERTS_MREMAP_OP_END(PTR, SZ) \
266 do { \
267 int ix__ = mmap_op_ix; \
268 mmap_ops[ix__].result = (PTR); \
269 mmap_ops[mmap_op_ix].out_size = (SZ); \
270 } while (0)
271
272 #define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ) \
273 do { \
274 erts_mtx_lock(&mm->mtx); \
275 ERTS_MREMAP_OP_START((OLD_PTR), (OLD_SZ), (IN_SZ)); \
276 ERTS_MREMAP_OP_END((RES), (OUT_SZ)); \
277 erts_mtx_unlock(&mm->mtx); \
278 } while (0)
279
280 #define ERTS_MMAP_OP_ABORT() \
281 do { \
282 int ix__ = mmap_op_ix; \
283 mmap_ops[ix__].type = ERTS_OP_TYPE_NONE; \
284 mmap_ops[ix__].result = NULL; \
285 mmap_ops[ix__].in_size = 0; \
286 mmap_ops[ix__].out_size = 0; \
287 mmap_ops[ix__].old_ptr = NULL; \
288 mmap_ops[ix__].old_size = 0; \
289 if (--mmap_op_ix < 0) \
290 mmap_op_ix = ERTS_MMAP_OP_RINGBUF_SZ-1; \
291 } while (0)
292
293 #else
294
295 #define ERTS_MMAP_OP_RINGBUF_INIT()
296 #define ERTS_MMAP_OP_START(SZ)
297 #define ERTS_MMAP_OP_END(PTR, SZ)
298 #define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ)
299 #define ERTS_MUNMAP_OP(PTR, SZ)
300 #define ERTS_MUNMAP_OP_LCK(PTR, SZ)
301 #define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ)
302 #define ERTS_MREMAP_OP_END(PTR, SZ)
303 #define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ)
304 #define ERTS_MMAP_OP_ABORT()
305
306 #endif
307
308 typedef struct {
309 RBTNode snode; /* node in 'stree' */
310 RBTNode anode; /* node in 'atree' */
311 char* start;
312 char* end;
313 }ErtsFreeSegDesc;
314
315 typedef struct {
316 RBTree stree; /* size ordered tree */
317 RBTree atree; /* address ordered tree */
318 Uint nseg;
319 }ErtsFreeSegMap;
320
321 struct ErtsMemMapper_ {
322 int (*reserve_physical)(char *, UWord);
323 void (*unreserve_physical)(char *, UWord);
324 int supercarrier;
325 int no_os_mmap;
326 /*
327 * Super unaligned area is located above super aligned
328 * area. That is, `sa.bot` is beginning of the super
329 * carrier, `sua.top` is the end of the super carrier,
330 * and sa.top and sua.bot moves towards eachother.
331 */
332 struct {
333 char *top;
334 char *bot;
335 ErtsFreeSegMap map;
336 } sua;
337 struct {
338 char *top;
339 char *bot;
340 ErtsFreeSegMap map;
341 } sa;
342 #if HAVE_MMAP && (!defined(MAP_ANON) && !defined(MAP_ANONYMOUS))
343 int mmap_fd;
344 #endif
345 erts_mtx_t mtx;
346 struct {
347 char *free_list;
348 char *unused_start;
349 char *unused_end;
350 char *new_area_hint;
351 Uint reserved;
352 } desc;
353 struct {
354 UWord free_seg_descs;
355 struct {
356 UWord curr;
357 UWord max;
358 } free_segs;
359 } no;
360 struct {
361 struct {
362 UWord total;
363 struct {
364 UWord total;
365 UWord sa;
366 UWord sua;
367 } used;
368 } supercarrier;
369 struct {
370 UWord used;
371 } os;
372 } size;
373 };
374
375 ErtsMemMapper erts_dflt_mmapper;
376
377 #if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION)
378 ErtsMemMapper erts_literal_mmapper;
379 char* erts_literals_start;
380 UWord erts_literals_size;
381 #endif
382
383
384 #define ERTS_MMAP_SIZE_SC_SA_INC(SZ) \
385 do { \
386 mm->size.supercarrier.used.total += (SZ); \
387 mm->size.supercarrier.used.sa += (SZ); \
388 ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total \
389 <= mm->size.supercarrier.total); \
390 ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sa \
391 <= mm->size.supercarrier.used.total); \
392 } while (0)
393 #define ERTS_MMAP_SIZE_SC_SA_DEC(SZ) \
394 do { \
395 ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total >= (SZ)); \
396 mm->size.supercarrier.used.total -= (SZ); \
397 ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sa >= (SZ)); \
398 mm->size.supercarrier.used.sa -= (SZ); \
399 } while (0)
400 #define ERTS_MMAP_SIZE_SC_SUA_INC(SZ) \
401 do { \
402 mm->size.supercarrier.used.total += (SZ); \
403 mm->size.supercarrier.used.sua += (SZ); \
404 ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total \
405 <= mm->size.supercarrier.total); \
406 ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sua \
407 <= mm->size.supercarrier.used.total); \
408 } while (0)
409 #define ERTS_MMAP_SIZE_SC_SUA_DEC(SZ) \
410 do { \
411 ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total >= (SZ)); \
412 mm->size.supercarrier.used.total -= (SZ); \
413 ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sua >= (SZ)); \
414 mm->size.supercarrier.used.sua -= (SZ); \
415 } while (0)
416 #define ERTS_MMAP_SIZE_OS_INC(SZ) \
417 do { \
418 ERTS_MMAP_ASSERT(mm->size.os.used + (SZ) >= (SZ)); \
419 mm->size.os.used += (SZ); \
420 } while (0)
421 #define ERTS_MMAP_SIZE_OS_DEC(SZ) \
422 do { \
423 ERTS_MMAP_ASSERT(mm->size.os.used >= (SZ)); \
424 mm->size.os.used -= (SZ); \
425 } while (0)
426
427
428 static void
add_free_desc_area(ErtsMemMapper * mm,char * start,char * end)429 add_free_desc_area(ErtsMemMapper* mm, char *start, char *end)
430 {
431 ERTS_MMAP_ASSERT(end == (void *) 0 || end > start);
432 if (sizeof(ErtsFreeSegDesc) <= ((UWord) end) - ((UWord) start)) {
433 UWord no;
434 ErtsFreeSegDesc *prev_desc, *desc;
435 char *desc_end;
436
437 no = 1;
438 prev_desc = (ErtsFreeSegDesc *) start;
439 prev_desc->start = mm->desc.free_list;
440 desc = (ErtsFreeSegDesc *) (start + sizeof(ErtsFreeSegDesc));
441 desc_end = start + 2*sizeof(ErtsFreeSegDesc);
442
443 while (desc_end <= end) {
444 desc->start = (char *) prev_desc;
445 prev_desc = desc;
446 desc = (ErtsFreeSegDesc *) desc_end;
447 desc_end += sizeof(ErtsFreeSegDesc);
448 no++;
449 }
450 mm->desc.free_list = (char *) prev_desc;
451 mm->no.free_seg_descs += no;
452 }
453 }
454
455 static ErtsFreeSegDesc *
add_unused_free_desc_area(ErtsMemMapper * mm)456 add_unused_free_desc_area(ErtsMemMapper* mm)
457 {
458 char *ptr;
459 if (!mm->desc.unused_start)
460 return NULL;
461
462 ERTS_MMAP_ASSERT(mm->desc.unused_end);
463 ERTS_MMAP_ASSERT(ERTS_PAGEALIGNED_SIZE
464 <= mm->desc.unused_end - mm->desc.unused_start);
465
466 ptr = mm->desc.unused_start + ERTS_PAGEALIGNED_SIZE;
467 add_free_desc_area(mm, mm->desc.unused_start, ptr);
468
469 if ((mm->desc.unused_end - ptr) >= ERTS_PAGEALIGNED_SIZE)
470 mm->desc.unused_start = ptr;
471 else
472 mm->desc.unused_end = mm->desc.unused_start = NULL;
473
474 ERTS_MMAP_ASSERT(mm->desc.free_list);
475 return (ErtsFreeSegDesc *) mm->desc.free_list;
476 }
477
478 static ERTS_INLINE ErtsFreeSegDesc *
alloc_desc(ErtsMemMapper * mm)479 alloc_desc(ErtsMemMapper* mm)
480 {
481 ErtsFreeSegDesc *res;
482 res = (ErtsFreeSegDesc *) mm->desc.free_list;
483 if (!res) {
484 res = add_unused_free_desc_area(mm);
485 if (!res)
486 return NULL;
487 }
488 mm->desc.free_list = res->start;
489 ASSERT(mm->no.free_segs.curr < mm->no.free_seg_descs);
490 mm->no.free_segs.curr++;
491 if (mm->no.free_segs.max < mm->no.free_segs.curr)
492 mm->no.free_segs.max = mm->no.free_segs.curr;
493 return res;
494 }
495
496 static ERTS_INLINE void
free_desc(ErtsMemMapper * mm,ErtsFreeSegDesc * desc)497 free_desc(ErtsMemMapper* mm, ErtsFreeSegDesc *desc)
498 {
499 desc->start = mm->desc.free_list;
500 mm->desc.free_list = (char *) desc;
501 ERTS_MMAP_ASSERT(mm->no.free_segs.curr > 0);
502 mm->no.free_segs.curr--;
503 }
504
anode_to_desc(RBTNode * anode)505 static ERTS_INLINE ErtsFreeSegDesc* anode_to_desc(RBTNode* anode)
506 {
507 return (ErtsFreeSegDesc*) ((char*)anode - offsetof(ErtsFreeSegDesc, anode));
508 }
509
snode_to_desc(RBTNode * snode)510 static ERTS_INLINE ErtsFreeSegDesc* snode_to_desc(RBTNode* snode)
511 {
512 return (ErtsFreeSegDesc*) ((char*)snode - offsetof(ErtsFreeSegDesc, snode));
513 }
514
node_to_desc(enum SortOrder order,RBTNode * node)515 static ERTS_INLINE ErtsFreeSegDesc* node_to_desc(enum SortOrder order, RBTNode* node)
516 {
517 return order==ADDR_ORDER ? anode_to_desc(node) : snode_to_desc(node);
518 }
519
usable_size(enum SortOrder order,ErtsFreeSegDesc * desc)520 static ERTS_INLINE SWord usable_size(enum SortOrder order,
521 ErtsFreeSegDesc* desc)
522 {
523 return ((order == SA_SZ_ADDR_ORDER) ?
524 ERTS_SUPERALIGNED_FLOOR(desc->end) - ERTS_SUPERALIGNED_CEILING(desc->start)
525 : desc->end - desc->start);
526 }
527
528 #ifdef HARD_DEBUG
cmp_nodes(enum SortOrder order,RBTNode * lhs,RBTNode * rhs)529 static ERTS_INLINE SWord cmp_nodes(enum SortOrder order,
530 RBTNode* lhs, RBTNode* rhs)
531 {
532 ErtsFreeSegDesc* ldesc = node_to_desc(order, lhs);
533 ErtsFreeSegDesc* rdesc = node_to_desc(order, rhs);
534 RBT_ASSERT(lhs != rhs);
535 if (order != ADDR_ORDER) {
536 SWord diff = usable_size(order, ldesc) - usable_size(order, rdesc);
537 if (diff) return diff;
538 }
539 if (order != SZ_REVERSE_ADDR_ORDER) {
540 return (char*)ldesc->start - (char*)rdesc->start;
541 }
542 else {
543 return (char*)rdesc->start - (char*)ldesc->start;
544 }
545 }
546 #endif /* HARD_DEBUG */
547
cmp_with_node(enum SortOrder order,SWord sz,char * addr,RBTNode * rhs)548 static ERTS_INLINE SWord cmp_with_node(enum SortOrder order,
549 SWord sz, char* addr, RBTNode* rhs)
550 {
551 ErtsFreeSegDesc* rdesc;
552 if (order != ADDR_ORDER) {
553 SWord diff;
554 rdesc = snode_to_desc(rhs);
555 diff = sz - usable_size(order, rdesc);
556 if (diff) return diff;
557 }
558 else
559 rdesc = anode_to_desc(rhs);
560
561 if (order != SZ_REVERSE_ADDR_ORDER)
562 return addr - (char*)rdesc->start;
563 else
564 return (char*)rdesc->start - addr;
565 }
566
567
568 static ERTS_INLINE void
left_rotate(RBTNode ** root,RBTNode * x)569 left_rotate(RBTNode **root, RBTNode *x)
570 {
571 RBTNode *y = x->right;
572 x->right = y->left;
573 if (y->left)
574 set_parent(y->left, x);
575 set_parent(y, parent(x));
576 if (!parent(y)) {
577 RBT_ASSERT(*root == x);
578 *root = y;
579 }
580 else if (x == parent(x)->left)
581 parent(x)->left = y;
582 else {
583 RBT_ASSERT(x == parent(x)->right);
584 parent(x)->right = y;
585 }
586 y->left = x;
587 set_parent(x, y);
588 }
589
590 static ERTS_INLINE void
right_rotate(RBTNode ** root,RBTNode * x)591 right_rotate(RBTNode **root, RBTNode *x)
592 {
593 RBTNode *y = x->left;
594 x->left = y->right;
595 if (y->right)
596 set_parent(y->right, x);
597 set_parent(y, parent(x));
598 if (!parent(y)) {
599 RBT_ASSERT(*root == x);
600 *root = y;
601 }
602 else if (x == parent(x)->right)
603 parent(x)->right = y;
604 else {
605 RBT_ASSERT(x == parent(x)->left);
606 parent(x)->left = y;
607 }
608 y->right = x;
609 set_parent(x, y);
610 }
611
612 /*
613 * Replace node x with node y
614 * NOTE: segment descriptor of y is not changed
615 */
616 static ERTS_INLINE void
replace(RBTNode ** root,RBTNode * x,RBTNode * y)617 replace(RBTNode **root, RBTNode *x, RBTNode *y)
618 {
619
620 if (!parent(x)) {
621 RBT_ASSERT(*root == x);
622 *root = y;
623 }
624 else if (x == parent(x)->left)
625 parent(x)->left = y;
626 else {
627 RBT_ASSERT(x == parent(x)->right);
628 parent(x)->right = y;
629 }
630 if (x->left) {
631 RBT_ASSERT(parent(x->left) == x);
632 set_parent(x->left, y);
633 }
634 if (x->right) {
635 RBT_ASSERT(parent(x->right) == x);
636 set_parent(x->right, y);
637 }
638
639 y->parent_and_color = x->parent_and_color;
640 y->right = x->right;
641 y->left = x->left;
642 }
643
644 static void
tree_insert_fixup(RBTNode ** root,RBTNode * node)645 tree_insert_fixup(RBTNode** root, RBTNode *node)
646 {
647 RBTNode *x = node, *y, *papa_x, *granpa_x;
648
649 /*
650 * Rearrange the tree so that it satisfies the Red-Black Tree properties
651 */
652
653 papa_x = parent(x);
654 RBT_ASSERT(x != *root && IS_RED(papa_x));
655 do {
656
657 /*
658 * x and its parent are both red. Move the red pair up the tree
659 * until we get to the root or until we can separate them.
660 */
661
662 granpa_x = parent(papa_x);
663 RBT_ASSERT(IS_RED(x));
664 RBT_ASSERT(IS_BLACK(granpa_x));
665 RBT_ASSERT(granpa_x);
666
667 if (papa_x == granpa_x->left) {
668 y = granpa_x->right;
669 if (IS_RED(y)) {
670 SET_BLACK(y);
671 SET_BLACK(papa_x);
672 SET_RED(granpa_x);
673 x = granpa_x;
674 }
675 else {
676
677 if (x == papa_x->right) {
678 left_rotate(root, papa_x);
679 papa_x = x;
680 x = papa_x->left;
681 }
682
683 RBT_ASSERT(x == granpa_x->left->left);
684 RBT_ASSERT(IS_RED(x));
685 RBT_ASSERT(IS_RED(papa_x));
686 RBT_ASSERT(IS_BLACK(granpa_x));
687 RBT_ASSERT(IS_BLACK(y));
688
689 SET_BLACK(papa_x);
690 SET_RED(granpa_x);
691 right_rotate(root, granpa_x);
692
693 RBT_ASSERT(x == parent(x)->left);
694 RBT_ASSERT(IS_RED(x));
695 RBT_ASSERT(IS_RED(parent(x)->right));
696 RBT_ASSERT(IS_BLACK(parent(x)));
697 break;
698 }
699 }
700 else {
701 RBT_ASSERT(papa_x == granpa_x->right);
702 y = granpa_x->left;
703 if (IS_RED(y)) {
704 SET_BLACK(y);
705 SET_BLACK(papa_x);
706 SET_RED(granpa_x);
707 x = granpa_x;
708 }
709 else {
710
711 if (x == papa_x->left) {
712 right_rotate(root, papa_x);
713 papa_x = x;
714 x = papa_x->right;
715 }
716
717 RBT_ASSERT(x == granpa_x->right->right);
718 RBT_ASSERT(IS_RED(x));
719 RBT_ASSERT(IS_RED(papa_x));
720 RBT_ASSERT(IS_BLACK(granpa_x));
721 RBT_ASSERT(IS_BLACK(y));
722
723 SET_BLACK(papa_x);
724 SET_RED(granpa_x);
725 left_rotate(root, granpa_x);
726
727 RBT_ASSERT(x == parent(x)->right);
728 RBT_ASSERT(IS_RED(x));
729 RBT_ASSERT(IS_RED(parent(x)->left));
730 RBT_ASSERT(IS_BLACK(parent(x)));
731 break;
732 }
733 }
734 } while (x != *root && (papa_x=parent(x), IS_RED(papa_x)));
735
736 SET_BLACK(*root);
737 }
738
739 static void
rbt_delete(RBTree * tree,RBTNode * del)740 rbt_delete(RBTree* tree, RBTNode* del)
741 {
742 Uint spliced_is_black;
743 RBTNode *x, *y, *z = del, *papa_y;
744 RBTNode null_x; /* null_x is used to get the fixup started when we
745 splice out a node without children. */
746
747 HARD_CHECK_IS_MEMBER(tree->root, del);
748 HARD_CHECK_TREE(tree, 0);
749
750 null_x.parent_and_color = parent_and_color(NULL, !RED_FLG);
751
752 /* Remove node from tree... */
753
754 /* Find node to splice out */
755 if (!z->left || !z->right)
756 y = z;
757 else
758 /* Set y to z:s successor */
759 for(y = z->right; y->left; y = y->left)
760 ;
761 /* splice out y */
762 x = y->left ? y->left : y->right;
763 spliced_is_black = IS_BLACK(y);
764 papa_y = parent(y);
765 if (x) {
766 set_parent(x, papa_y);
767 }
768 else if (spliced_is_black) {
769 x = &null_x;
770 x->right = x->left = NULL;
771 x->parent_and_color = parent_and_color(papa_y, !RED_FLG);
772 y->left = x;
773 }
774
775 if (!papa_y) {
776 RBT_ASSERT(tree->root == y);
777 tree->root = x;
778 }
779 else {
780 if (y == papa_y->left) {
781 papa_y->left = x;
782 }
783 else {
784 RBT_ASSERT(y == papa_y->right);
785 papa_y->right = x;
786 }
787 }
788 if (y != z) {
789 /* We spliced out the successor of z; replace z by the successor */
790 RBT_ASSERT(z != &null_x);
791 replace(&tree->root, z, y);
792 }
793
794 if (spliced_is_black) {
795 RBTNode* papa_x;
796 /* We removed a black node which makes the resulting tree
797 violate the Red-Black Tree properties. Fixup tree... */
798
799 papa_x = parent(x);
800 while (IS_BLACK(x) && papa_x) {
801
802 /*
803 * x has an "extra black" which we move up the tree
804 * until we reach the root or until we can get rid of it.
805 *
806 * y is the sibbling of x
807 */
808
809 if (x == papa_x->left) {
810 y = papa_x->right;
811 RBT_ASSERT(y);
812 if (IS_RED(y)) {
813 RBT_ASSERT(y->right);
814 RBT_ASSERT(y->left);
815 SET_BLACK(y);
816 RBT_ASSERT(IS_BLACK(papa_x));
817 SET_RED(papa_x);
818 left_rotate(&tree->root, papa_x);
819 RBT_ASSERT(papa_x == parent(x));
820 y = papa_x->right;
821 }
822 RBT_ASSERT(y);
823 RBT_ASSERT(IS_BLACK(y));
824 if (IS_BLACK(y->left) && IS_BLACK(y->right)) {
825 SET_RED(y);
826 }
827 else {
828 if (IS_BLACK(y->right)) {
829 SET_BLACK(y->left);
830 SET_RED(y);
831 right_rotate(&tree->root, y);
832 RBT_ASSERT(papa_x == parent(x));
833 y = papa_x->right;
834 }
835 RBT_ASSERT(y);
836 if (IS_RED(papa_x)) {
837
838 SET_BLACK(papa_x);
839 SET_RED(y);
840 }
841 RBT_ASSERT(y->right);
842 SET_BLACK(y->right);
843 left_rotate(&tree->root, papa_x);
844 x = tree->root;
845 break;
846 }
847 }
848 else {
849 RBT_ASSERT(x == papa_x->right);
850 y = papa_x->left;
851 RBT_ASSERT(y);
852 if (IS_RED(y)) {
853 RBT_ASSERT(y->right);
854 RBT_ASSERT(y->left);
855 SET_BLACK(y);
856 RBT_ASSERT(IS_BLACK(papa_x));
857 SET_RED(papa_x);
858 right_rotate(&tree->root, papa_x);
859 RBT_ASSERT(papa_x == parent(x));
860 y = papa_x->left;
861 }
862 RBT_ASSERT(y);
863 RBT_ASSERT(IS_BLACK(y));
864 if (IS_BLACK(y->right) && IS_BLACK(y->left)) {
865 SET_RED(y);
866 }
867 else {
868 if (IS_BLACK(y->left)) {
869 SET_BLACK(y->right);
870 SET_RED(y);
871 left_rotate(&tree->root, y);
872 RBT_ASSERT(papa_x == parent(x));
873 y = papa_x->left;
874 }
875 RBT_ASSERT(y);
876 if (IS_RED(papa_x)) {
877 SET_BLACK(papa_x);
878 SET_RED(y);
879 }
880 RBT_ASSERT(y->left);
881 SET_BLACK(y->left);
882 right_rotate(&tree->root, papa_x);
883 x = tree->root;
884 break;
885 }
886 }
887 x = papa_x;
888 papa_x = parent(x);
889 }
890 SET_BLACK(x);
891
892 papa_x = parent(&null_x);
893 if (papa_x) {
894 if (papa_x->left == &null_x)
895 papa_x->left = NULL;
896 else {
897 RBT_ASSERT(papa_x->right == &null_x);
898 papa_x->right = NULL;
899 }
900 RBT_ASSERT(!null_x.left);
901 RBT_ASSERT(!null_x.right);
902 }
903 else if (tree->root == &null_x) {
904 tree->root = NULL;
905 RBT_ASSERT(!null_x.left);
906 RBT_ASSERT(!null_x.right);
907 }
908 }
909 HARD_CHECK_TREE(tree, 0);
910 }
911
912
913 static void
rbt_insert(RBTree * tree,RBTNode * node)914 rbt_insert(RBTree* tree, RBTNode* node)
915 {
916 #ifdef RBT_DEBUG
917 ErtsFreeSegDesc *dbg_under=NULL, *dbg_over=NULL;
918 #endif
919 ErtsFreeSegDesc* desc = node_to_desc(tree->order, node);
920 char* seg_addr = desc->start;
921 SWord seg_sz = desc->end - desc->start;
922
923 HARD_CHECK_TREE(tree, 0);
924
925 node->left = NULL;
926 node->right = NULL;
927
928 if (!tree->root) {
929 node->parent_and_color = parent_and_color(NULL, !RED_FLG);
930 tree->root = node;
931 }
932 else {
933 RBTNode *x = tree->root;
934 while (1) {
935 SWord diff = cmp_with_node(tree->order, seg_sz, seg_addr, x);
936 if (diff < 0) {
937 IF_RBT_DEBUG(dbg_over = node_to_desc(tree->order, x));
938 if (!x->left) {
939 node->parent_and_color = parent_and_color(x, RED_FLG);
940 x->left = node;
941 break;
942 }
943 x = x->left;
944 }
945 else {
946 RBT_ASSERT(diff > 0);
947 IF_RBT_DEBUG(dbg_under = node_to_desc(tree->order, x));
948 if (!x->right) {
949 node->parent_and_color = parent_and_color(x, RED_FLG);
950 x->right = node;
951 break;
952 }
953 x = x->right;
954 }
955 }
956
957 RBT_ASSERT(parent(node));
958 #ifdef RBT_DEBUG
959 if (tree->order == ADDR_ORDER) {
960 RBT_ASSERT(!dbg_under || dbg_under->end < desc->start);
961 RBT_ASSERT(!dbg_over || dbg_over->start > desc->end);
962 }
963 #endif
964 RBT_ASSERT(IS_RED(node));
965 if (IS_RED(parent(node)))
966 tree_insert_fixup(&tree->root, node);
967 }
968 HARD_CHECK_TREE(tree, 0);
969 }
970
971 /*
972 * Traverse tree in (reverse) sorting order
973 */
974 static void
rbt_foreach_node(RBTree * tree,void (* fn)(RBTNode *,void *),void * arg,int reverse)975 rbt_foreach_node(RBTree* tree,
976 void (*fn)(RBTNode*,void*),
977 void* arg, int reverse)
978 {
979 #ifdef HARD_DEBUG
980 Sint blacks = -1;
981 Sint curr_blacks = 1;
982 Uint depth = 1;
983 Uint max_depth = 0;
984 Uint node_cnt = 0;
985 #endif
986 enum { RECURSE_LEFT, DO_NODE, RECURSE_RIGHT, RETURN_TO_PARENT }state;
987 RBTNode *x = tree->root;
988
989 RBT_ASSERT(!x || !parent(x));
990
991 state = reverse ? RECURSE_RIGHT : RECURSE_LEFT;
992 while (x) {
993 switch (state) {
994 case RECURSE_LEFT:
995 if (x->left) {
996 RBT_ASSERT(parent(x->left) == x);
997 #ifdef HARD_DEBUG
998 ++depth;
999 if (IS_BLACK(x->left))
1000 curr_blacks++;
1001 #endif
1002 x = x->left;
1003 state = reverse ? RECURSE_RIGHT : RECURSE_LEFT;
1004 }
1005 else {
1006 #ifdef HARD_DEBUG
1007 if (blacks < 0)
1008 blacks = curr_blacks;
1009 RBT_ASSERT(blacks == curr_blacks);
1010 #endif
1011 state = reverse ? RETURN_TO_PARENT : DO_NODE;
1012 }
1013 break;
1014
1015 case DO_NODE:
1016 #ifdef HARD_DEBUG
1017 ++node_cnt;
1018 if (depth > max_depth)
1019 max_depth = depth;
1020 #endif
1021 (*fn) (x, arg); /* Do it! */
1022 state = reverse ? RECURSE_LEFT : RECURSE_RIGHT;
1023 break;
1024
1025 case RECURSE_RIGHT:
1026 if (x->right) {
1027 RBT_ASSERT(parent(x->right) == x);
1028 #ifdef HARD_DEBUG
1029 ++depth;
1030 if (IS_BLACK(x->right))
1031 curr_blacks++;
1032 #endif
1033 x = x->right;
1034 state = reverse ? RECURSE_RIGHT : RECURSE_LEFT;
1035 }
1036 else {
1037 #ifdef HARD_DEBUG
1038 if (blacks < 0)
1039 blacks = curr_blacks;
1040 RBT_ASSERT(blacks == curr_blacks);
1041 #endif
1042 state = reverse ? DO_NODE : RETURN_TO_PARENT;
1043 }
1044 break;
1045
1046 case RETURN_TO_PARENT:
1047 #ifdef HARD_DEBUG
1048 if (IS_BLACK(x))
1049 curr_blacks--;
1050 --depth;
1051 #endif
1052 if (parent(x)) {
1053 if (x == parent(x)->left) {
1054 state = reverse ? RETURN_TO_PARENT : DO_NODE;
1055 }
1056 else {
1057 RBT_ASSERT(x == parent(x)->right);
1058 state = reverse ? DO_NODE : RETURN_TO_PARENT;
1059 }
1060 }
1061 x = parent(x);
1062 break;
1063 }
1064 }
1065 #ifdef HARD_DEBUG
1066 RBT_ASSERT(depth == 0 || (!tree->root && depth==1));
1067 RBT_ASSERT(curr_blacks == 0);
1068 RBT_ASSERT((1 << (max_depth/2)) <= node_cnt);
1069 #endif
1070 }
1071
1072 #if defined(RBT_DEBUG) || defined(HARD_DEBUG_MSEG)
rbt_prev_node(RBTNode * node)1073 static RBTNode* rbt_prev_node(RBTNode* node)
1074 {
1075 RBTNode* x;
1076 if (node->left) {
1077 for (x=node->left; x->right; x=x->right)
1078 ;
1079 return x;
1080 }
1081 for (x=node; parent(x); x=parent(x)) {
1082 if (parent(x)->right == x)
1083 return parent(x);
1084 }
1085 return NULL;
1086 }
rbt_next_node(RBTNode * node)1087 static RBTNode* rbt_next_node(RBTNode* node)
1088 {
1089 RBTNode* x;
1090 if (node->right) {
1091 for (x=node->right; x->left; x=x->left)
1092 ;
1093 return x;
1094 }
1095 for (x=node; parent(x); x=parent(x)) {
1096 if (parent(x)->left == x)
1097 return parent(x);
1098 }
1099 return NULL;
1100 }
1101 #endif /* RBT_DEBUG || HARD_DEBUG_MSEG */
1102
1103
1104 /* The API to keep track of a bunch of separated (free) segments
1105 (non-overlapping and non-adjacent).
1106 */
1107 static void init_free_seg_map(ErtsFreeSegMap*, enum SortOrder);
1108 static void adjacent_free_seg(ErtsFreeSegMap*, char* start, char* end,
1109 ErtsFreeSegDesc** under, ErtsFreeSegDesc** over);
1110 static void insert_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*, char* start, char* end);
1111 static void resize_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*, char* start, char* end);
1112 static void delete_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*);
1113 static ErtsFreeSegDesc* lookup_free_seg(ErtsFreeSegMap*, SWord sz);
1114
1115
init_free_seg_map(ErtsFreeSegMap * map,enum SortOrder order)1116 static void init_free_seg_map(ErtsFreeSegMap* map, enum SortOrder order)
1117 {
1118 map->atree.root = NULL;
1119 map->atree.order = ADDR_ORDER;
1120 map->stree.root = NULL;
1121 map->stree.order = order;
1122 map->nseg = 0;
1123 }
1124
1125 /* Lookup directly adjacent free segments to the given area [start->end].
1126 * The given area must not contain any free segments.
1127 */
adjacent_free_seg(ErtsFreeSegMap * map,char * start,char * end,ErtsFreeSegDesc ** under,ErtsFreeSegDesc ** over)1128 static void adjacent_free_seg(ErtsFreeSegMap* map, char* start, char* end,
1129 ErtsFreeSegDesc** under, ErtsFreeSegDesc** over)
1130 {
1131 RBTNode* x = map->atree.root;
1132
1133 *under = NULL;
1134 *over = NULL;
1135 while (x) {
1136 if (start < anode_to_desc(x)->start) {
1137 RBT_ASSERT(end <= anode_to_desc(x)->start);
1138 if (end == anode_to_desc(x)->start) {
1139 RBT_ASSERT(!*over);
1140 *over = anode_to_desc(x);
1141 }
1142 x = x->left;
1143 }
1144 else {
1145 RBT_ASSERT(start >= anode_to_desc(x)->end);
1146 if (start == anode_to_desc(x)->end) {
1147 RBT_ASSERT(!*under);
1148 *under = anode_to_desc(x);
1149 }
1150 x = x->right;
1151 }
1152 }
1153 }
1154
1155 /* Initialize 'desc' and insert as new free segment [start->end].
1156 * The new segment must not contain or be adjacent to any free segment in 'map'.
1157 */
insert_free_seg(ErtsFreeSegMap * map,ErtsFreeSegDesc * desc,char * start,char * end)1158 static void insert_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc,
1159 char* start, char* end)
1160 {
1161 desc->start = start;
1162 desc->end = end;
1163 rbt_insert(&map->atree, &desc->anode);
1164 rbt_insert(&map->stree, &desc->snode);
1165 map->nseg++;
1166 }
1167
1168 /* Resize existing free segment 'desc' to [start->end].
1169 * The new segment location must overlap the old location and
1170 * it must not contain or be adjacent to any other free segment in 'map'.
1171 */
resize_free_seg(ErtsFreeSegMap * map,ErtsFreeSegDesc * desc,char * start,char * end)1172 static void resize_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc,
1173 char* start, char* end)
1174 {
1175 #ifdef RBT_DEBUG
1176 RBTNode* prev = rbt_prev_node(&desc->anode);
1177 RBTNode* next = rbt_next_node(&desc->anode);
1178 RBT_ASSERT(!prev || anode_to_desc(prev)->end < start);
1179 RBT_ASSERT(!next || anode_to_desc(next)->start > end);
1180 #endif
1181 rbt_delete(&map->stree, &desc->snode);
1182 desc->start = start;
1183 desc->end = end;
1184 rbt_insert(&map->stree, &desc->snode);
1185 }
1186
1187 /* Delete existing free segment 'desc' from 'map'.
1188 */
delete_free_seg(ErtsFreeSegMap * map,ErtsFreeSegDesc * desc)1189 static void delete_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc)
1190 {
1191 rbt_delete(&map->atree, &desc->anode);
1192 rbt_delete(&map->stree, &desc->snode);
1193 map->nseg--;
1194 }
1195
1196 /* Lookup a free segment in 'map' with a size of at least 'need_sz' usable bytes.
1197 */
lookup_free_seg(ErtsFreeSegMap * map,SWord need_sz)1198 static ErtsFreeSegDesc* lookup_free_seg(ErtsFreeSegMap* map, SWord need_sz)
1199 {
1200 RBTNode* x = map->stree.root;
1201 ErtsFreeSegDesc* best_desc = NULL;
1202 const enum SortOrder order = map->stree.order;
1203
1204 while (x) {
1205 ErtsFreeSegDesc* desc = snode_to_desc(x);
1206 SWord seg_sz = usable_size(order, desc);
1207
1208 if (seg_sz < need_sz) {
1209 x = x->right;
1210 }
1211 else {
1212 best_desc = desc;
1213 x = x->left;
1214 }
1215 }
1216 return best_desc;
1217 }
1218
1219 struct build_arg_t
1220 {
1221 Process* p;
1222 Eterm* hp;
1223 Eterm acc;
1224 };
1225
build_free_seg_tuple(RBTNode * node,void * arg)1226 static void build_free_seg_tuple(RBTNode* node, void* arg)
1227 {
1228 struct build_arg_t* a = (struct build_arg_t*)arg;
1229 ErtsFreeSegDesc* desc = anode_to_desc(node);
1230 Eterm start= erts_bld_uword(&a->hp, NULL, (UWord)desc->start);
1231 Eterm end = erts_bld_uword(&a->hp, NULL, (UWord)desc->end);
1232 Eterm tpl = TUPLE2(a->hp, start, end);
1233
1234 a->hp += 3;
1235 a->acc = CONS(a->hp, tpl, a->acc);
1236 a->hp += 2;
1237 }
1238
1239 static
build_free_seg_list(Process * p,ErtsFreeSegMap * map)1240 Eterm build_free_seg_list(Process* p, ErtsFreeSegMap* map)
1241 {
1242 struct build_arg_t barg;
1243 Eterm* hp_end;
1244 const Uint may_need = map->nseg * (2 + 3 + 2*2); /* cons + tuple + bigs */
1245
1246 barg.p = p;
1247 barg.hp = HAlloc(p, may_need);
1248 hp_end = barg.hp + may_need;
1249 barg.acc = NIL;
1250 rbt_foreach_node(&map->atree, build_free_seg_tuple, &barg, 1);
1251
1252 RBT_ASSERT(barg.hp <= hp_end);
1253 HRelease(p, hp_end, barg.hp);
1254 return barg.acc;
1255 }
1256
1257 #if ERTS_HAVE_OS_MMAP
1258 /* Implementation of os_mmap()/os_munmap()/os_mremap()... */
1259
1260 #if HAVE_MMAP
1261 # define ERTS_MMAP_PROT (PROT_READ|PROT_WRITE)
1262 # if defined(MAP_ANONYMOUS)
1263 # define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE)
1264 # define ERTS_MMAP_FD (-1)
1265 # elif defined(MAP_ANON)
1266 # define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE)
1267 # define ERTS_MMAP_FD (-1)
1268 # else
1269 # define ERTS_MMAP_FLAGS (MAP_PRIVATE)
1270 # define ERTS_MMAP_FD mm->mmap_fd
1271 # endif
1272 #endif
1273
1274 static ERTS_INLINE void *
os_mmap(void * hint_ptr,UWord size,int try_superalign)1275 os_mmap(void *hint_ptr, UWord size, int try_superalign)
1276 {
1277 #if HAVE_MMAP
1278 void *res;
1279 #ifdef MAP_ALIGN
1280 if (try_superalign)
1281 res = mmap((void *) ERTS_SUPERALIGNED_SIZE, size, ERTS_MMAP_PROT,
1282 ERTS_MMAP_FLAGS|MAP_ALIGN, ERTS_MMAP_FD, 0);
1283 else
1284 #endif
1285 res = mmap((void *) hint_ptr, size, ERTS_MMAP_PROT,
1286 ERTS_MMAP_FLAGS, ERTS_MMAP_FD, 0);
1287 if (res == MAP_FAILED)
1288 return NULL;
1289 return res;
1290 #elif HAVE_VIRTUALALLOC
1291 return (void *) VirtualAlloc(NULL, (SIZE_T) size,
1292 MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE);
1293 #else
1294 # error "missing mmap() or similar"
1295 #endif
1296 }
1297
1298 static ERTS_INLINE void
os_munmap(void * ptr,UWord size)1299 os_munmap(void *ptr, UWord size)
1300 {
1301 #if HAVE_MMAP
1302 #ifdef ERTS_MMAP_DEBUG
1303 int res =
1304 #endif
1305 munmap(ptr, size);
1306 ERTS_MMAP_ASSERT(res == 0);
1307 #elif HAVE_VIRTUALALLOC
1308 #ifdef DEBUG
1309 BOOL res =
1310 #endif
1311 VirtualFree((LPVOID) ptr, (SIZE_T) 0, MEM_RELEASE);
1312 ERTS_MMAP_ASSERT(res != 0);
1313 #else
1314 # error "missing munmap() or similar"
1315 #endif
1316 }
1317
1318 #ifdef ERTS_HAVE_OS_MREMAP
1319 # if HAVE_MREMAP
1320 # if defined(__NetBSD__)
1321 # define ERTS_MREMAP_FLAGS (0)
1322 # else
1323 # define ERTS_MREMAP_FLAGS (MREMAP_MAYMOVE)
1324 # endif
1325 # endif
1326 static ERTS_INLINE void *
os_mremap(void * ptr,UWord old_size,UWord new_size,int try_superalign)1327 os_mremap(void *ptr, UWord old_size, UWord new_size, int try_superalign)
1328 {
1329 void *new_seg;
1330 #if HAVE_MREMAP
1331 new_seg = mremap(ptr, (size_t) old_size,
1332 # if defined(__NetBSD__)
1333 NULL,
1334 # endif
1335 (size_t) new_size, ERTS_MREMAP_FLAGS);
1336 if (new_seg == (void *) MAP_FAILED)
1337 return NULL;
1338 return new_seg;
1339 #else
1340 # error "missing mremap() or similar"
1341 #endif
1342 }
1343 #endif
1344
1345 #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
1346 #if HAVE_MMAP
1347
1348 #define ERTS_MMAP_RESERVE_PROT (ERTS_MMAP_PROT)
1349 #define ERTS_MMAP_RESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_FIXED)
1350 #define ERTS_MMAP_UNRESERVE_PROT (PROT_NONE)
1351 #if defined(__FreeBSD__)
1352 #define ERTS_MMAP_UNRESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_FIXED)
1353 #else
1354 #define ERTS_MMAP_UNRESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_NORESERVE|MAP_FIXED)
1355 #endif /* __FreeBSD__ */
1356 #define ERTS_MMAP_VIRTUAL_PROT (PROT_NONE)
1357 #if defined(__FreeBSD__)
1358 #define ERTS_MMAP_VIRTUAL_FLAGS (ERTS_MMAP_FLAGS)
1359 #else
1360 #define ERTS_MMAP_VIRTUAL_FLAGS (ERTS_MMAP_FLAGS|MAP_NORESERVE)
1361 #endif /* __FreeBSD__ */
1362
1363 static int
os_reserve_physical(char * ptr,UWord size)1364 os_reserve_physical(char *ptr, UWord size)
1365 {
1366 void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_RESERVE_PROT,
1367 ERTS_MMAP_RESERVE_FLAGS, ERTS_MMAP_FD, 0);
1368 if (res == (void *) MAP_FAILED)
1369 return 0;
1370 return 1;
1371 }
1372
1373 static void
os_unreserve_physical(char * ptr,UWord size)1374 os_unreserve_physical(char *ptr, UWord size)
1375 {
1376 void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_UNRESERVE_PROT,
1377 ERTS_MMAP_UNRESERVE_FLAGS, ERTS_MMAP_FD, 0);
1378 if (res == (void *) MAP_FAILED)
1379 erts_exit(ERTS_ABORT_EXIT, "Failed to unreserve memory");
1380 }
1381
1382 static void *
os_mmap_virtual(char * ptr,UWord size)1383 os_mmap_virtual(char *ptr, UWord size)
1384 {
1385 int flags = ERTS_MMAP_VIRTUAL_FLAGS;
1386 void* res;
1387
1388 res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_VIRTUAL_PROT,
1389 flags, ERTS_MMAP_FD, 0);
1390 if (res == (void *) MAP_FAILED)
1391 return NULL;
1392 return res;
1393 }
1394
1395 #else
1396 #error "Missing reserve/unreserve physical memory implementation"
1397 #endif
1398 #endif /* ERTS_HAVE_OS_RESERVE_PHYSICAL_MEMORY */
1399
1400 #endif /* ERTS_HAVE_OS_MMAP */
1401
reserve_noop(char * ptr,UWord size)1402 static int reserve_noop(char *ptr, UWord size)
1403 {
1404 #ifdef ERTS_MMAP_DEBUG_FILL_AREAS
1405 Uint32 *uip, *end = (Uint32 *) (ptr + size);
1406
1407 for (uip = (Uint32 *) ptr; uip < end; uip++)
1408 ERTS_MMAP_ASSERT(*uip == (Uint32) 0xdeadbeef);
1409 for (uip = (Uint32 *) ptr; uip < end; uip++)
1410 *uip = (Uint32) 0xfeedfeed;
1411 #endif
1412 return 1;
1413 }
1414
unreserve_noop(char * ptr,UWord size)1415 static void unreserve_noop(char *ptr, UWord size)
1416 {
1417 #ifdef ERTS_MMAP_DEBUG_FILL_AREAS
1418 Uint32 *uip, *end = (Uint32 *) (ptr + size);
1419
1420 for (uip = (Uint32 *) ptr; uip < end; uip++)
1421 *uip = (Uint32) 0xdeadbeef;
1422 #endif
1423 }
1424
1425 static UWord
alloc_desc_insert_free_seg(ErtsMemMapper * mm,ErtsFreeSegMap * map,char * start,char * end)1426 alloc_desc_insert_free_seg(ErtsMemMapper* mm,
1427 ErtsFreeSegMap *map, char* start, char* end)
1428 {
1429 char *ptr;
1430 ErtsFreeSegMap *da_map;
1431 ErtsFreeSegDesc *desc = alloc_desc(mm);
1432 if (desc) {
1433 insert_free_seg(map, desc, start, end);
1434 return 0;
1435 }
1436
1437 /*
1438 * Ahh; ran out of free segment descriptors.
1439 *
1440 * First try to map a new page...
1441 */
1442
1443 #if ERTS_HAVE_OS_MMAP
1444 if (!mm->no_os_mmap) {
1445 ptr = os_mmap(mm->desc.new_area_hint, ERTS_PAGEALIGNED_SIZE, 0);
1446 if (ptr) {
1447 mm->desc.new_area_hint = ptr+ERTS_PAGEALIGNED_SIZE;
1448 ERTS_MMAP_SIZE_OS_INC(ERTS_PAGEALIGNED_SIZE);
1449 add_free_desc_area(mm, ptr, ptr+ERTS_PAGEALIGNED_SIZE);
1450 desc = alloc_desc(mm);
1451 ERTS_MMAP_ASSERT(desc);
1452 insert_free_seg(map, desc, start, end);
1453 return 0;
1454 }
1455 }
1456 #endif
1457
1458 /*
1459 * ...then try to find a good place in the supercarrier...
1460 */
1461 da_map = &mm->sua.map;
1462 desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE);
1463 if (desc) {
1464 if (mm->reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE))
1465 ERTS_MMAP_SIZE_SC_SUA_INC(ERTS_PAGEALIGNED_SIZE);
1466 else
1467 desc = NULL;
1468
1469 }
1470 else {
1471 da_map = &mm->sa.map;
1472 desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE);
1473 if (desc) {
1474 if (mm->reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE))
1475 ERTS_MMAP_SIZE_SC_SA_INC(ERTS_PAGEALIGNED_SIZE);
1476 else
1477 desc = NULL;
1478 }
1479 }
1480 if (desc) {
1481 char *da_end = desc->start + ERTS_PAGEALIGNED_SIZE;
1482 add_free_desc_area(mm, desc->start, da_end);
1483 if (da_end != desc->end)
1484 resize_free_seg(da_map, desc, da_end, desc->end);
1485 else {
1486 delete_free_seg(da_map, desc);
1487 free_desc(mm, desc);
1488 }
1489
1490 desc = alloc_desc(mm);
1491 ERTS_MMAP_ASSERT(desc);
1492 insert_free_seg(map, desc, start, end);
1493 return 0;
1494 }
1495
1496 /*
1497 * ... and then as last resort use the first page of the
1498 * free segment we are trying to insert for free descriptors.
1499 */
1500 ptr = start + ERTS_PAGEALIGNED_SIZE;
1501 ERTS_MMAP_ASSERT(ptr <= end);
1502
1503 add_free_desc_area(mm, start, ptr);
1504
1505 if (ptr != end) {
1506 desc = alloc_desc(mm);
1507 ERTS_MMAP_ASSERT(desc);
1508 insert_free_seg(map, desc, ptr, end);
1509 }
1510
1511 return ERTS_PAGEALIGNED_SIZE;
1512 }
1513
1514 void *
erts_mmap(ErtsMemMapper * mm,Uint32 flags,UWord * sizep)1515 erts_mmap(ErtsMemMapper* mm, Uint32 flags, UWord *sizep)
1516 {
1517 char *seg;
1518 UWord asize = ERTS_PAGEALIGNED_CEILING(*sizep);
1519
1520 /* Map in premapped supercarrier */
1521 if (mm->supercarrier && !(ERTS_MMAPFLG_OS_ONLY & flags)) {
1522 char *end;
1523 ErtsFreeSegDesc *desc;
1524 Uint32 superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
1525
1526 erts_mtx_lock(&mm->mtx);
1527
1528 ERTS_MMAP_OP_START(*sizep);
1529
1530 if (!superaligned) {
1531 desc = lookup_free_seg(&mm->sua.map, asize);
1532 if (desc) {
1533 seg = desc->start;
1534 end = seg+asize;
1535 if (!mm->reserve_physical(seg, asize))
1536 goto supercarrier_reserve_failure;
1537 if (desc->end == end) {
1538 delete_free_seg(&mm->sua.map, desc);
1539 free_desc(mm, desc);
1540 }
1541 else {
1542 ERTS_MMAP_ASSERT(end < desc->end);
1543 resize_free_seg(&mm->sua.map, desc, end, desc->end);
1544 }
1545 ERTS_MMAP_SIZE_SC_SUA_INC(asize);
1546 goto supercarrier_success;
1547 }
1548
1549 if (asize <= mm->sua.bot - mm->sa.top) {
1550 if (!mm->reserve_physical(mm->sua.bot - asize, asize))
1551 goto supercarrier_reserve_failure;
1552 mm->sua.bot -= asize;
1553 seg = mm->sua.bot;
1554 ERTS_MMAP_SIZE_SC_SUA_INC(asize);
1555 goto supercarrier_success;
1556 }
1557 }
1558
1559 asize = ERTS_SUPERALIGNED_CEILING(asize);
1560
1561 desc = lookup_free_seg(&mm->sa.map, asize);
1562 if (desc) {
1563 char *start = seg = desc->start;
1564 seg = (char *) ERTS_SUPERALIGNED_CEILING(seg);
1565 end = seg+asize;
1566 if (!mm->reserve_physical(start, (UWord) (end - start)))
1567 goto supercarrier_reserve_failure;
1568 ERTS_MMAP_SIZE_SC_SA_INC(asize);
1569 if (desc->end == end) {
1570 if (start != seg)
1571 resize_free_seg(&mm->sa.map, desc, start, seg);
1572 else {
1573 delete_free_seg(&mm->sa.map, desc);
1574 free_desc(mm, desc);
1575 }
1576 }
1577 else {
1578 ERTS_MMAP_ASSERT(end < desc->end);
1579 resize_free_seg(&mm->sa.map, desc, end, desc->end);
1580 if (start != seg) {
1581 UWord ad_sz;
1582 ad_sz = alloc_desc_insert_free_seg(mm, &mm->sua.map,
1583 start, seg);
1584 start += ad_sz;
1585 if (start != seg)
1586 mm->unreserve_physical(start, (UWord) (seg - start));
1587 }
1588 }
1589 goto supercarrier_success;
1590 }
1591
1592 if (superaligned) {
1593 char *start = mm->sa.top;
1594 seg = (char *) ERTS_SUPERALIGNED_CEILING(start);
1595
1596 if (asize + (seg - start) <= mm->sua.bot - start) {
1597 end = seg + asize;
1598 if (!mm->reserve_physical(start, (UWord) (end - start)))
1599 goto supercarrier_reserve_failure;
1600 mm->sa.top = end;
1601 ERTS_MMAP_SIZE_SC_SA_INC(asize);
1602 if (start != seg) {
1603 UWord ad_sz;
1604 ad_sz = alloc_desc_insert_free_seg(mm, &mm->sua.map,
1605 start, seg);
1606 start += ad_sz;
1607 if (start != seg)
1608 mm->unreserve_physical(start, (UWord) (seg - start));
1609 }
1610 goto supercarrier_success;
1611 }
1612
1613 desc = lookup_free_seg(&mm->sua.map, asize + ERTS_SUPERALIGNED_SIZE);
1614 if (desc) {
1615 char *org_start = desc->start;
1616 char *org_end = desc->end;
1617
1618 seg = (char *) ERTS_SUPERALIGNED_CEILING(org_start);
1619 end = seg + asize;
1620 if (!mm->reserve_physical(seg, (UWord) (org_end - seg)))
1621 goto supercarrier_reserve_failure;
1622 ERTS_MMAP_SIZE_SC_SUA_INC(asize);
1623 if (org_start != seg) {
1624 ERTS_MMAP_ASSERT(org_start < seg);
1625 resize_free_seg(&mm->sua.map, desc, org_start, seg);
1626 desc = NULL;
1627 }
1628 if (end != org_end) {
1629 UWord ad_sz = 0;
1630 ERTS_MMAP_ASSERT(end < org_end);
1631 if (desc)
1632 resize_free_seg(&mm->sua.map, desc, end, org_end);
1633 else
1634 ad_sz = alloc_desc_insert_free_seg(mm, &mm->sua.map,
1635 end, org_end);
1636 end += ad_sz;
1637 if (end != org_end)
1638 mm->unreserve_physical(end,
1639 (UWord) (org_end - end));
1640 }
1641 goto supercarrier_success;
1642 }
1643 }
1644
1645 ERTS_MMAP_OP_ABORT();
1646 erts_mtx_unlock(&mm->mtx);
1647 }
1648
1649 #if ERTS_HAVE_OS_MMAP
1650 /* Map using OS primitives */
1651 if (!(ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) && !mm->no_os_mmap) {
1652 if (!(ERTS_MMAPFLG_SUPERALIGNED & flags)) {
1653 seg = os_mmap(NULL, asize, 0);
1654 if (!seg)
1655 goto failure;
1656 }
1657 else {
1658 asize = ERTS_SUPERALIGNED_CEILING(*sizep);
1659 seg = os_mmap(NULL, asize, 1);
1660 if (!seg)
1661 goto failure;
1662
1663 if (!ERTS_IS_SUPERALIGNED(seg)) {
1664 char *ptr;
1665 UWord sz;
1666
1667 os_munmap(seg, asize);
1668
1669 ptr = os_mmap(NULL, asize + ERTS_SUPERALIGNED_SIZE, 1);
1670 if (!ptr)
1671 goto failure;
1672
1673 seg = (char *) ERTS_SUPERALIGNED_CEILING(ptr);
1674 sz = (UWord) (seg - ptr);
1675 ERTS_MMAP_ASSERT(sz <= ERTS_SUPERALIGNED_SIZE);
1676 if (sz)
1677 os_munmap(ptr, sz);
1678 sz = ERTS_SUPERALIGNED_SIZE - sz;
1679 if (sz)
1680 os_munmap(seg+asize, sz);
1681 }
1682 }
1683
1684 ERTS_MMAP_OP_LCK(seg, *sizep, asize);
1685 ERTS_MMAP_SIZE_OS_INC(asize);
1686 *sizep = asize;
1687 return (void *) seg;
1688 }
1689 failure:
1690 #endif
1691 ERTS_MMAP_OP_LCK(NULL, *sizep, 0);
1692 *sizep = 0;
1693 return NULL;
1694
1695 supercarrier_success:
1696
1697 #ifdef ERTS_MMAP_DEBUG
1698 if (ERTS_MMAPFLG_SUPERALIGNED & flags) {
1699 ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(seg));
1700 ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(asize));
1701 }
1702 else {
1703 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(seg));
1704 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
1705 }
1706 #endif
1707
1708 ERTS_MMAP_OP_END(seg, asize);
1709 erts_mtx_unlock(&mm->mtx);
1710
1711 *sizep = asize;
1712 return (void *) seg;
1713
1714 supercarrier_reserve_failure:
1715 erts_mtx_unlock(&mm->mtx);
1716 *sizep = 0;
1717 return NULL;
1718 }
1719
1720 void
erts_munmap(ErtsMemMapper * mm,Uint32 flags,void * ptr,UWord size)1721 erts_munmap(ErtsMemMapper* mm, Uint32 flags, void *ptr, UWord size)
1722 {
1723 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
1724 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(size));
1725
1726 if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) {
1727 ERTS_MMAP_ASSERT(!mm->no_os_mmap);
1728 #if ERTS_HAVE_OS_MMAP
1729 ERTS_MUNMAP_OP_LCK(ptr, size);
1730 ERTS_MMAP_SIZE_OS_DEC(size);
1731 os_munmap(ptr, size);
1732 #endif
1733 }
1734 else {
1735 char *start, *end;
1736 ErtsFreeSegMap *map;
1737 ErtsFreeSegDesc *prev, *next, *desc;
1738 UWord ad_sz = 0;
1739
1740 ERTS_MMAP_ASSERT(mm->supercarrier);
1741
1742 start = (char *) ptr;
1743 end = start + size;
1744
1745 erts_mtx_lock(&mm->mtx);
1746
1747 ERTS_MUNMAP_OP(ptr, size);
1748
1749 if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
1750
1751 map = &mm->sa.map;
1752 adjacent_free_seg(map, start, end, &prev, &next);
1753
1754 ERTS_MMAP_SIZE_SC_SA_DEC(size);
1755 if (end == mm->sa.top) {
1756 ERTS_MMAP_ASSERT(!next);
1757 if (prev) {
1758 start = prev->start;
1759 delete_free_seg(map, prev);
1760 free_desc(mm, prev);
1761 }
1762 mm->sa.top = start;
1763 goto supercarrier_success;
1764 }
1765 }
1766 else {
1767 map = &mm->sua.map;
1768 adjacent_free_seg(map, start, end, &prev, &next);
1769
1770 ERTS_MMAP_SIZE_SC_SUA_DEC(size);
1771 if (start == mm->sua.bot) {
1772 ERTS_MMAP_ASSERT(!prev);
1773 if (next) {
1774 end = next->end;
1775 delete_free_seg(map, next);
1776 free_desc(mm, next);
1777 }
1778 mm->sua.bot = end;
1779 goto supercarrier_success;
1780 }
1781 }
1782
1783 desc = NULL;
1784
1785 if (next) {
1786 ERTS_MMAP_ASSERT(end < next->end);
1787 end = next->end;
1788 if (prev) {
1789 delete_free_seg(map, next);
1790 free_desc(mm, next);
1791 goto save_prev;
1792 }
1793 desc = next;
1794 } else if (prev) {
1795 save_prev:
1796 ERTS_MMAP_ASSERT(prev->start < start);
1797 start = prev->start;
1798 desc = prev;
1799 }
1800
1801 if (desc)
1802 resize_free_seg(map, desc, start, end);
1803 else
1804 ad_sz = alloc_desc_insert_free_seg(mm, map, start, end);
1805
1806 supercarrier_success: {
1807 UWord unres_sz;
1808
1809 ERTS_MMAP_ASSERT(size >= ad_sz);
1810 unres_sz = size - ad_sz;
1811 if (unres_sz)
1812 mm->unreserve_physical(((char *) ptr) + ad_sz, unres_sz);
1813
1814 erts_mtx_unlock(&mm->mtx);
1815 }
1816 }
1817 }
1818
1819 static void *
remap_move(ErtsMemMapper * mm,Uint32 flags,void * ptr,UWord old_size,UWord * sizep)1820 remap_move(ErtsMemMapper* mm,
1821 Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
1822 {
1823 UWord size = *sizep;
1824 void *new_ptr = erts_mmap(mm, flags, &size);
1825 if (!new_ptr)
1826 return NULL;
1827 *sizep = size;
1828 if (old_size < size)
1829 size = old_size;
1830 sys_memcpy(new_ptr, ptr, (size_t) size);
1831 erts_munmap(mm, flags, ptr, old_size);
1832 return new_ptr;
1833 }
1834
1835 void *
erts_mremap(ErtsMemMapper * mm,Uint32 flags,void * ptr,UWord old_size,UWord * sizep)1836 erts_mremap(ErtsMemMapper* mm,
1837 Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
1838 {
1839 void *new_ptr = NULL;
1840 Uint32 superaligned;
1841 UWord asize;
1842
1843 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
1844 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size));
1845 ERTS_MMAP_ASSERT(sizep && ERTS_IS_PAGEALIGNED(*sizep));
1846
1847 if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) {
1848
1849 ERTS_MMAP_ASSERT(!mm->no_os_mmap);
1850
1851 if (!(ERTS_MMAPFLG_OS_ONLY & flags) && mm->supercarrier) {
1852 new_ptr = remap_move(mm, ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags,
1853 ptr, old_size, sizep);
1854 if (new_ptr)
1855 return new_ptr;
1856 }
1857
1858 if (ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) {
1859 ERTS_MREMAP_OP_LCK(NULL, ptr, old_size, *sizep, old_size);
1860 return NULL;
1861 }
1862
1863 #if defined(ERTS_HAVE_OS_MREMAP) || defined(ERTS_HAVE_GENUINE_OS_MMAP)
1864 superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
1865
1866 if (superaligned) {
1867 asize = ERTS_SUPERALIGNED_CEILING(*sizep);
1868 if (asize == old_size && ERTS_IS_SUPERALIGNED(ptr)) {
1869 ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize);
1870 *sizep = asize;
1871 return ptr;
1872 }
1873 }
1874 else {
1875 asize = ERTS_PAGEALIGNED_CEILING(*sizep);
1876 if (asize == old_size) {
1877 ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize);
1878 *sizep = asize;
1879 return ptr;
1880 }
1881 }
1882
1883 #ifdef ERTS_HAVE_GENUINE_OS_MMAP
1884 if (asize < old_size
1885 && (!superaligned
1886 || ERTS_IS_SUPERALIGNED(ptr))) {
1887 UWord um_sz;
1888 new_ptr = ((char *) ptr) + asize;
1889 ERTS_MMAP_ASSERT((((char *)ptr) + old_size) > (char *) new_ptr);
1890 um_sz = (UWord) ((((char *) ptr) + old_size) - (char *) new_ptr);
1891 ERTS_MMAP_SIZE_OS_DEC(um_sz);
1892 os_munmap(new_ptr, um_sz);
1893 ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize);
1894 *sizep = asize;
1895 return ptr;
1896 }
1897 #endif
1898 #ifdef ERTS_HAVE_OS_MREMAP
1899 if (superaligned) {
1900 return remap_move(mm, flags, new_ptr, old_size, sizep);
1901 } else {
1902 new_ptr = os_mremap(ptr, old_size, asize, 0);
1903 if (!new_ptr)
1904 return NULL;
1905 if (asize > old_size)
1906 ERTS_MMAP_SIZE_OS_INC(asize - old_size);
1907 else
1908 ERTS_MMAP_SIZE_OS_DEC(old_size - asize);
1909 ERTS_MREMAP_OP_LCK(new_ptr, ptr, old_size, *sizep, asize);
1910 *sizep = asize;
1911 return new_ptr;
1912 }
1913 #endif
1914 #endif
1915 }
1916 else { /* In super carrier */
1917 char *start, *end, *new_end;
1918 ErtsFreeSegMap *map;
1919 ErtsFreeSegDesc *prev, *next;
1920 UWord ad_sz = 0;
1921
1922 ERTS_MMAP_ASSERT(mm->supercarrier);
1923
1924 if (ERTS_MMAPFLG_OS_ONLY & flags)
1925 return remap_move(mm, flags, ptr, old_size, sizep);
1926
1927 superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
1928
1929 asize = (superaligned
1930 ? ERTS_SUPERALIGNED_CEILING(*sizep)
1931 : ERTS_PAGEALIGNED_CEILING(*sizep));
1932
1933 erts_mtx_lock(&mm->mtx);
1934
1935 if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)
1936 ? (!superaligned && lookup_free_seg(&mm->sua.map, asize))
1937 : (superaligned && lookup_free_seg(&mm->sa.map, asize))) {
1938 erts_mtx_unlock(&mm->mtx);
1939 /*
1940 * Segment currently in wrong area (due to a previous memory
1941 * shortage), move it to the right area.
1942 * (remap_move() will succeed)
1943 */
1944 return remap_move(mm, ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags,
1945 ptr, old_size, sizep);
1946 }
1947
1948 ERTS_MREMAP_OP_START(ptr, old_size, *sizep);
1949
1950 if (asize == old_size) {
1951 new_ptr = ptr;
1952 goto supercarrier_resize_success;
1953 }
1954
1955 start = (char *) ptr;
1956 end = start + old_size;
1957 new_end = start+asize;
1958
1959 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
1960 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size));
1961 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
1962
1963 if (asize < old_size) {
1964 UWord unres_sz;
1965 new_ptr = ptr;
1966 if (!ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
1967 map = &mm->sua.map;
1968 ERTS_MMAP_SIZE_SC_SUA_DEC(old_size - asize);
1969 }
1970 else {
1971 if (end == mm->sa.top) {
1972 mm->sa.top = new_end;
1973 mm->unreserve_physical(((char *) ptr) + asize,
1974 old_size - asize);
1975 goto supercarrier_resize_success;
1976 }
1977 ERTS_MMAP_SIZE_SC_SA_DEC(old_size - asize);
1978 map = &mm->sa.map;
1979 }
1980
1981 adjacent_free_seg(map, start, end, &prev, &next);
1982
1983 if (next)
1984 resize_free_seg(map, next, new_end, next->end);
1985 else
1986 ad_sz = alloc_desc_insert_free_seg(mm, map, new_end, end);
1987 ERTS_MMAP_ASSERT(old_size - asize >= ad_sz);
1988 unres_sz = old_size - asize - ad_sz;
1989 if (unres_sz)
1990 mm->unreserve_physical(((char *) ptr) + asize + ad_sz,
1991 unres_sz);
1992 goto supercarrier_resize_success;
1993 }
1994
1995 if (!ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
1996 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
1997 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size));
1998 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
1999
2000 adjacent_free_seg(&mm->sua.map, start, end, &prev, &next);
2001
2002 if (next && new_end <= next->end) {
2003 if (!mm->reserve_physical(((char *) ptr) + old_size,
2004 asize - old_size))
2005 goto supercarrier_reserve_failure;
2006 if (new_end < next->end)
2007 resize_free_seg(&mm->sua.map, next, new_end, next->end);
2008 else {
2009 delete_free_seg(&mm->sua.map, next);
2010 free_desc(mm, next);
2011 }
2012 new_ptr = ptr;
2013 ERTS_MMAP_SIZE_SC_SUA_INC(asize - old_size);
2014 goto supercarrier_resize_success;
2015 }
2016 }
2017 else { /* Superaligned area */
2018
2019 if (end == mm->sa.top) {
2020 if (new_end <= mm->sua.bot) {
2021 if (!mm->reserve_physical(((char *) ptr) + old_size,
2022 asize - old_size))
2023 goto supercarrier_reserve_failure;
2024 mm->sa.top = new_end;
2025 new_ptr = ptr;
2026 ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size);
2027 goto supercarrier_resize_success;
2028 }
2029 }
2030 else {
2031 adjacent_free_seg(&mm->sa.map, start, end, &prev, &next);
2032 if (next && new_end <= next->end) {
2033 if (!mm->reserve_physical(((char *) ptr) + old_size,
2034 asize - old_size))
2035 goto supercarrier_reserve_failure;
2036 if (new_end < next->end)
2037 resize_free_seg(&mm->sa.map, next, new_end, next->end);
2038 else {
2039 delete_free_seg(&mm->sa.map, next);
2040 free_desc(mm, next);
2041 }
2042 new_ptr = ptr;
2043 ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size);
2044 goto supercarrier_resize_success;
2045 }
2046 }
2047 }
2048
2049 ERTS_MMAP_OP_ABORT();
2050 erts_mtx_unlock(&mm->mtx);
2051
2052 /* Failed to resize... */
2053 }
2054
2055 return remap_move(mm, flags, ptr, old_size, sizep);
2056
2057 supercarrier_resize_success:
2058
2059 #ifdef ERTS_MMAP_DEBUG
2060 if ((ERTS_MMAPFLG_SUPERALIGNED & flags)
2061 || ERTS_MMAP_IN_SUPERALIGNED_AREA(new_ptr)) {
2062 ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(new_ptr));
2063 ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(asize));
2064 }
2065 else {
2066 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(new_ptr));
2067 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
2068 }
2069 #endif
2070
2071 ERTS_MREMAP_OP_END(new_ptr, asize);
2072 erts_mtx_unlock(&mm->mtx);
2073
2074 *sizep = asize;
2075 return new_ptr;
2076
2077 supercarrier_reserve_failure:
2078 ERTS_MREMAP_OP_END(NULL, old_size);
2079 erts_mtx_unlock(&mm->mtx);
2080 *sizep = old_size;
2081 return NULL;
2082
2083 }
2084
erts_mmap_in_supercarrier(ErtsMemMapper * mm,void * ptr)2085 int erts_mmap_in_supercarrier(ErtsMemMapper* mm, void *ptr)
2086 {
2087 return ERTS_MMAP_IN_SUPERCARRIER(ptr);
2088 }
2089
2090 static struct {
2091 Eterm options;
2092 Eterm total;
2093 Eterm total_sa;
2094 Eterm total_sua;
2095 Eterm used;
2096 Eterm used_sa;
2097 Eterm used_sua;
2098 Eterm max;
2099 Eterm allocated;
2100 Eterm reserved;
2101 Eterm sizes;
2102 Eterm free_segs;
2103 Eterm supercarrier;
2104 Eterm os;
2105 Eterm scs;
2106 Eterm sco;
2107 Eterm scrpm;
2108 Eterm scrfsd;
2109
2110 int is_initialized;
2111 erts_mtx_t init_mutex;
2112 }am;
2113
atom_init(Eterm * atom,char * name)2114 static void ERTS_INLINE atom_init(Eterm *atom, char *name)
2115 {
2116 *atom = am_atom_put(name, strlen(name));
2117 }
2118 #define AM_INIT(AM) atom_init(&am.AM, #AM)
2119
init_atoms(void)2120 static void init_atoms(void)
2121 {
2122 erts_mtx_lock(&am.init_mutex);
2123
2124 if (!am.is_initialized) {
2125 AM_INIT(options);
2126 AM_INIT(total);
2127 AM_INIT(total_sa);
2128 AM_INIT(total_sua);
2129 AM_INIT(used);
2130 AM_INIT(used_sa);
2131 AM_INIT(used_sua);
2132 AM_INIT(max);
2133 AM_INIT(allocated);
2134 AM_INIT(reserved);
2135 AM_INIT(sizes);
2136 AM_INIT(free_segs);
2137 AM_INIT(supercarrier);
2138 AM_INIT(os);
2139 AM_INIT(scs);
2140 AM_INIT(sco);
2141 AM_INIT(scrpm);
2142 AM_INIT(scrfsd);
2143 am.is_initialized = 1;
2144 }
2145 erts_mtx_unlock(&am.init_mutex);
2146 };
2147
2148
2149 #ifdef HARD_DEBUG_MSEG
2150 static void hard_dbg_mseg_init(void);
2151 #endif
2152
2153 void
erts_mmap_init(ErtsMemMapper * mm,ErtsMMapInit * init)2154 erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init)
2155 {
2156 static int is_first_call = 1;
2157 char *start = NULL, *end = NULL;
2158 UWord pagesize;
2159 int virtual_map = 0;
2160
2161 (void)virtual_map;
2162
2163 #if defined(__WIN32__)
2164 {
2165 SYSTEM_INFO sysinfo;
2166 GetSystemInfo(&sysinfo);
2167 pagesize = (UWord) sysinfo.dwPageSize;
2168 }
2169 #elif defined(_SC_PAGESIZE)
2170 pagesize = (UWord) sysconf(_SC_PAGESIZE);
2171 #elif defined(HAVE_GETPAGESIZE)
2172 pagesize = (UWord) getpagesize();
2173 #else
2174 # error "Do not know how to get page size"
2175 #endif
2176 #if defined(HARD_DEBUG) || 0
2177 erts_fprintf(stderr, "erts_mmap: scs = %bpu\n", init->scs);
2178 erts_fprintf(stderr, "erts_mmap: sco = %i\n", init->sco);
2179 erts_fprintf(stderr, "erts_mmap: scrfsd = %i\n", init->scrfsd);
2180 #endif
2181 erts_page_inv_mask = pagesize - 1;
2182 if (pagesize & erts_page_inv_mask)
2183 erts_exit(1, "erts_mmap: Invalid pagesize: %bpu\n",
2184 pagesize);
2185
2186 ERTS_MMAP_OP_RINGBUF_INIT();
2187
2188 mm->supercarrier = 0;
2189 mm->reserve_physical = reserve_noop;
2190 mm->unreserve_physical = unreserve_noop;
2191
2192 #if HAVE_MMAP && !defined(MAP_ANON)
2193 mm->mmap_fd = open("/dev/zero", O_RDWR);
2194 if (mm->mmap_fd < 0)
2195 erts_exit(1, "erts_mmap: Failed to open /dev/zero\n");
2196 #endif
2197
2198 erts_mtx_init(&mm->mtx, "erts_mmap", NIL,
2199 ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
2200 if (is_first_call) {
2201 erts_mtx_init(&am.init_mutex, "mmap_init_atoms", NIL,
2202 ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
2203 }
2204
2205 #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
2206 if (init->virtual_range.start) {
2207 char *ptr;
2208 UWord sz;
2209 ptr = (char *) ERTS_PAGEALIGNED_CEILING(init->virtual_range.start);
2210 end = (char *) ERTS_PAGEALIGNED_FLOOR(init->virtual_range.end);
2211 sz = end - ptr;
2212 start = os_mmap_virtual(ptr, sz);
2213 if (!start || start > ptr || start >= end)
2214 erts_exit(1,
2215 "erts_mmap: Failed to create virtual range for super carrier\n");
2216 sz = start - ptr;
2217 if (sz)
2218 os_munmap(end, sz);
2219 mm->reserve_physical = os_reserve_physical;
2220 mm->unreserve_physical = os_unreserve_physical;
2221 virtual_map = 1;
2222 }
2223 else
2224 #endif
2225 if (init->predefined_area.start) {
2226 start = init->predefined_area.start;
2227 end = init->predefined_area.end;
2228 if (end != (void *) 0 && end < start)
2229 end = start;
2230 }
2231 #if ERTS_HAVE_OS_MMAP
2232 else if (init->scs) {
2233 UWord sz;
2234 sz = ERTS_PAGEALIGNED_CEILING(init->scs);
2235 #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
2236 if (!init->scrpm) {
2237 start = os_mmap_virtual(NULL, sz);
2238 mm->reserve_physical = os_reserve_physical;
2239 mm->unreserve_physical = os_unreserve_physical;
2240 virtual_map = 1;
2241 }
2242 else
2243 #endif
2244 {
2245 /*
2246 * The whole supercarrier will by physically
2247 * reserved all the time.
2248 */
2249 start = os_mmap(NULL, sz, 1);
2250 }
2251 if (!start)
2252 erts_exit(1,
2253 "erts_mmap: Failed to create super carrier of size %bpu MB\n",
2254 init->scs/1024/1024);
2255 end = start + sz;
2256 #ifdef ERTS_MMAP_DEBUG_FILL_AREAS
2257 if (!virtual_map) {
2258 Uint32 *uip;
2259
2260 for (uip = (Uint32 *) start; uip < (Uint32 *) end; uip++)
2261 *uip = (Uint32) 0xdeadbeef;
2262 }
2263 #endif
2264 }
2265 #endif
2266
2267 mm->no.free_seg_descs = 0;
2268 mm->no.free_segs.curr = 0;
2269 mm->no.free_segs.max = 0;
2270
2271 mm->size.supercarrier.total = 0;
2272 mm->size.supercarrier.used.total = 0;
2273 mm->size.supercarrier.used.sa = 0;
2274 mm->size.supercarrier.used.sua = 0;
2275 mm->size.os.used = 0;
2276
2277 mm->desc.new_area_hint = NULL;
2278
2279 if (!start) {
2280 mm->sa.bot = NULL;
2281 mm->sua.top = NULL;
2282 mm->sa.bot = NULL;
2283 mm->sua.top = NULL;
2284 mm->no_os_mmap = 0;
2285 mm->supercarrier = 0;
2286 }
2287 else {
2288 size_t desc_size;
2289
2290 mm->no_os_mmap = init->sco;
2291
2292 desc_size = init->scrfsd;
2293 if (desc_size < 100)
2294 desc_size = 100;
2295 desc_size *= sizeof(ErtsFreeSegDesc);
2296 if ((desc_size
2297 + ERTS_SUPERALIGNED_SIZE
2298 + ERTS_PAGEALIGNED_SIZE) > end - start)
2299 erts_exit(1, "erts_mmap: No space for segments in super carrier\n");
2300
2301 mm->sa.bot = start;
2302 mm->sa.bot += desc_size;
2303 mm->sa.bot = (char *) ERTS_SUPERALIGNED_CEILING(mm->sa.bot);
2304 mm->sa.top = mm->sa.bot;
2305 mm->sua.top = end;
2306 mm->sua.bot = mm->sua.top;
2307
2308 mm->size.supercarrier.used.total += (UWord) (mm->sa.bot - start);
2309
2310 mm->desc.free_list = NULL;
2311 mm->desc.reserved = 0;
2312
2313 if (end == (void *) 0) {
2314 /*
2315 * Very unlikely, but we need a guarantee
2316 * that `mm->sua.top` always will
2317 * compare as larger than all segment pointers
2318 * into the super carrier...
2319 */
2320 mm->sua.top -= ERTS_PAGEALIGNED_SIZE;
2321 mm->size.supercarrier.used.total += ERTS_PAGEALIGNED_SIZE;
2322 #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
2323 if (!virtual_map || os_reserve_physical(mm->sua.top, ERTS_PAGEALIGNED_SIZE))
2324 #endif
2325 add_free_desc_area(mm, mm->sua.top, end);
2326 mm->desc.reserved += (end - mm->sua.top) / sizeof(ErtsFreeSegDesc);
2327 }
2328
2329 mm->size.supercarrier.total = (UWord) (mm->sua.top - start);
2330
2331 /*
2332 * Area before (and after) super carrier
2333 * will be used for free segment descritors.
2334 */
2335 #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
2336 if (virtual_map && !os_reserve_physical(start, mm->sa.bot - start))
2337 erts_exit(1, "erts_mmap: Failed to reserve physical memory for descriptors\n");
2338 #endif
2339 mm->desc.unused_start = start;
2340 mm->desc.unused_end = mm->sa.bot;
2341 mm->desc.reserved += ((mm->desc.unused_end - start)
2342 / sizeof(ErtsFreeSegDesc));
2343
2344 init_free_seg_map(&mm->sa.map, SA_SZ_ADDR_ORDER);
2345 init_free_seg_map(&mm->sua.map, SZ_REVERSE_ADDR_ORDER);
2346
2347 mm->supercarrier = 1;
2348
2349 mm->desc.new_area_hint = end;
2350
2351 }
2352
2353 #if !ERTS_HAVE_OS_MMAP
2354 mm->no_os_mmap = 1;
2355 #endif
2356
2357 #ifdef HARD_DEBUG_MSEG
2358 hard_dbg_mseg_init();
2359 #endif
2360
2361 #if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION)
2362 if (mm == &erts_literal_mmapper) {
2363 erts_literals_start = erts_literal_mmapper.sa.bot;
2364 erts_literals_size = erts_literal_mmapper.sua.top - erts_literals_start;
2365 }
2366 #endif
2367 is_first_call = 0;
2368 }
2369
2370
2371 static ERTS_INLINE void
add_2tup(Uint ** hpp,Uint * szp,Eterm * lp,Eterm el1,Eterm el2)2372 add_2tup(Uint **hpp, Uint *szp, Eterm *lp, Eterm el1, Eterm el2)
2373 {
2374 *lp = erts_bld_cons(hpp, szp, erts_bld_tuple(hpp, szp, 2, el1, el2), *lp);
2375 }
2376
erts_mmap_info(ErtsMemMapper * mm,fmtfn_t * print_to_p,void * print_to_arg,Eterm ** hpp,Uint * szp,struct erts_mmap_info_struct * emis)2377 Eterm erts_mmap_info(ErtsMemMapper* mm,
2378 fmtfn_t *print_to_p,
2379 void *print_to_arg,
2380 Eterm** hpp, Uint* szp,
2381 struct erts_mmap_info_struct* emis)
2382 {
2383 Eterm size_tags[] = { am.total, am.total_sa, am.total_sua, am.used, am.used_sa, am.used_sua };
2384 Eterm seg_tags[] = { am.used, am.max, am.allocated, am.reserved, am.used_sa, am.used_sua };
2385 Eterm group[2];
2386 Eterm group_tags[] = { am.sizes, am.free_segs };
2387 Eterm list[3];
2388 Eterm list_tags[3]; /* { am.options, am.supercarrier, am.os } */
2389 int lix = 0;
2390 Eterm res = THE_NON_VALUE;
2391
2392 if (!hpp) {
2393 erts_mtx_lock(&mm->mtx);
2394 emis->sizes[0] = mm->size.supercarrier.total;
2395 emis->sizes[1] = mm->sa.top - mm->sa.bot;
2396 emis->sizes[2] = mm->sua.top - mm->sua.bot;
2397 emis->sizes[3] = mm->size.supercarrier.used.total;
2398 emis->sizes[4] = mm->size.supercarrier.used.sa;
2399 emis->sizes[5] = mm->size.supercarrier.used.sua;
2400
2401 emis->segs[0] = mm->no.free_segs.curr;
2402 emis->segs[1] = mm->no.free_segs.max;
2403 emis->segs[2] = mm->no.free_seg_descs;
2404 emis->segs[3] = mm->desc.reserved;
2405 emis->segs[4] = mm->sa.map.nseg;
2406 emis->segs[5] = mm->sua.map.nseg;
2407
2408 emis->os_used = mm->size.os.used;
2409 erts_mtx_unlock(&mm->mtx);
2410 }
2411
2412 list[lix] = erts_mmap_info_options(mm, "option ", print_to_p, print_to_arg,
2413 hpp, szp);
2414 list_tags[lix] = am.options;
2415 lix++;
2416
2417
2418 if (print_to_p) {
2419 fmtfn_t to = *print_to_p;
2420 void *arg = print_to_arg;
2421 if (mm->supercarrier) {
2422 const char* prefix = "supercarrier ";
2423 erts_print(to, arg, "%stotal size: %bpu\n", prefix, emis->sizes[0]);
2424 erts_print(to, arg, "%stotal sa size: %bpu\n", prefix, emis->sizes[1]);
2425 erts_print(to, arg, "%stotal sua size: %bpu\n", prefix, emis->sizes[2]);
2426 erts_print(to, arg, "%sused size: %bpu\n", prefix, emis->sizes[3]);
2427 erts_print(to, arg, "%sused sa size: %bpu\n", prefix, emis->sizes[4]);
2428 erts_print(to, arg, "%sused sua size: %bpu\n", prefix, emis->sizes[5]);
2429 erts_print(to, arg, "%sused free segs: %bpu\n", prefix, emis->segs[0]);
2430 erts_print(to, arg, "%smax free segs: %bpu\n", prefix, emis->segs[1]);
2431 erts_print(to, arg, "%sallocated free segs: %bpu\n", prefix, emis->segs[2]);
2432 erts_print(to, arg, "%sreserved free segs: %bpu\n", prefix, emis->segs[3]);
2433 erts_print(to, arg, "%ssa free segs: %bpu\n", prefix, emis->segs[4]);
2434 erts_print(to, arg, "%ssua free segs: %bpu\n", prefix, emis->segs[5]);
2435 }
2436 if (!mm->no_os_mmap) {
2437 erts_print(to, arg, "os mmap size used: %bpu\n", emis->os_used);
2438 }
2439 }
2440
2441
2442 if (hpp || szp) {
2443 if (!am.is_initialized) {
2444 init_atoms();
2445 }
2446
2447 if (mm->supercarrier) {
2448 group[0] = erts_bld_atom_uword_2tup_list(hpp, szp,
2449 sizeof(size_tags)/sizeof(Eterm),
2450 size_tags, emis->sizes);
2451 group[1] = erts_bld_atom_uword_2tup_list(hpp, szp,
2452 sizeof(seg_tags)/sizeof(Eterm),
2453 seg_tags, emis->segs);
2454 list[lix] = erts_bld_2tup_list(hpp, szp, 2, group_tags, group);
2455 list_tags[lix] = am.supercarrier;
2456 lix++;
2457 }
2458
2459 if (!mm->no_os_mmap) {
2460 group[0] = erts_bld_atom_uword_2tup_list(hpp, szp,
2461 1, &am.used, &emis->os_used);
2462 list[lix] = erts_bld_2tup_list(hpp, szp, 1, group_tags, group);
2463 list_tags[lix] = am.os;
2464 lix++;
2465 }
2466 res = erts_bld_2tup_list(hpp, szp, lix, list_tags, list);
2467 }
2468 return res;
2469 }
2470
erts_mmap_info_options(ErtsMemMapper * mm,char * prefix,fmtfn_t * print_to_p,void * print_to_arg,Uint ** hpp,Uint * szp)2471 Eterm erts_mmap_info_options(ErtsMemMapper* mm,
2472 char *prefix,
2473 fmtfn_t *print_to_p,
2474 void *print_to_arg,
2475 Uint **hpp,
2476 Uint *szp)
2477 {
2478 const UWord scs = mm->sua.top - mm->sa.bot;
2479 const Eterm sco = mm->no_os_mmap ? am_true : am_false;
2480 const Eterm scrpm = (mm->reserve_physical == reserve_noop) ? am_true : am_false;
2481 Eterm res = THE_NON_VALUE;
2482
2483 if (print_to_p) {
2484 fmtfn_t to = *print_to_p;
2485 void *arg = print_to_arg;
2486 erts_print(to, arg, "%sscs: %bpu\n", prefix, scs);
2487 if (mm->supercarrier) {
2488 erts_print(to, arg, "%ssco: %T\n", prefix, sco);
2489 erts_print(to, arg, "%sscrpm: %T\n", prefix, scrpm);
2490 erts_print(to, arg, "%sscrfsd: %beu\n", prefix, mm->desc.reserved);
2491 }
2492 }
2493
2494 if (hpp || szp) {
2495 if (!am.is_initialized) {
2496 init_atoms();
2497 }
2498
2499 res = NIL;
2500 if (mm->supercarrier) {
2501 add_2tup(hpp, szp, &res, am.scrfsd,
2502 erts_bld_uint(hpp,szp, mm->desc.reserved));
2503 add_2tup(hpp, szp, &res, am.scrpm, scrpm);
2504 add_2tup(hpp, szp, &res, am.sco, sco);
2505 }
2506 add_2tup(hpp, szp, &res, am.scs, erts_bld_uword(hpp, szp, scs));
2507 }
2508 return res;
2509 }
2510
2511 #endif /* HAVE_ERTS_MMAP */
2512
erts_mmap_debug_info(Process * p)2513 Eterm erts_mmap_debug_info(Process* p)
2514 {
2515 #if HAVE_ERTS_MMAP
2516 ErtsMemMapper* mm = &erts_dflt_mmapper;
2517
2518 if (mm->supercarrier) {
2519 ERTS_DECL_AM(sabot);
2520 ERTS_DECL_AM(satop);
2521 ERTS_DECL_AM(suabot);
2522 ERTS_DECL_AM(suatop);
2523 Eterm sa_list, sua_list, list;
2524 Eterm tags[] = { AM_sabot, AM_satop, AM_suabot, AM_suatop };
2525 UWord values[4];
2526 Eterm *hp, *hp_end;
2527 Uint may_need;
2528
2529 erts_mtx_lock(&mm->mtx);
2530 values[0] = (UWord)mm->sa.bot;
2531 values[1] = (UWord)mm->sa.top;
2532 values[2] = (UWord)mm->sua.bot;
2533 values[3] = (UWord)mm->sua.top;
2534 sa_list = build_free_seg_list(p, &mm->sa.map);
2535 sua_list = build_free_seg_list(p, &mm->sua.map);
2536 erts_mtx_unlock(&mm->mtx);
2537
2538 may_need = 4*(2+3+2) + 2*(2+3);
2539 hp = HAlloc(p, may_need);
2540 hp_end = hp + may_need;
2541
2542 list = erts_bld_atom_uword_2tup_list(&hp, NULL,
2543 sizeof(values)/sizeof(*values),
2544 tags, values);
2545
2546 sa_list = TUPLE2(hp, ERTS_MAKE_AM("sa_free_segs"), sa_list); hp+=3;
2547 sua_list = TUPLE2(hp, ERTS_MAKE_AM("sua_free_segs"), sua_list); hp+=3;
2548 list = CONS(hp, sua_list, list); hp+=2;
2549 list = CONS(hp, sa_list, list); hp+=2;
2550
2551 ASSERT(hp <= hp_end);
2552 HRelease(p, hp_end, hp);
2553 return list;
2554 }
2555 #endif
2556 return am_undefined;
2557 }
2558
2559
2560 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
2561 * Debug functions *
2562 \* */
2563
2564
2565 #ifdef HARD_DEBUG
2566
rbt_assert_is_member(RBTNode * root,RBTNode * node)2567 static int rbt_assert_is_member(RBTNode* root, RBTNode* node)
2568 {
2569 while (node != root) {
2570 RBT_ASSERT(parent(node));
2571 RBT_ASSERT(parent(node)->left == node || parent(node)->right == node);
2572 node = parent(node);
2573 }
2574 return 1;
2575 }
2576
2577
2578 #if 0
2579 # define PRINT_TREE
2580 #else
2581 # undef PRINT_TREE
2582 #endif
2583
2584 #ifdef PRINT_TREE
2585 static void print_tree(enum SortOrder order, RBTNode*);
2586 #endif
2587
2588 /*
2589 * Checks that the order between parent and children are correct,
2590 * and that the Red-Black Tree properies are satisfied. if size > 0,
2591 * check_tree() returns the node that satisfies "address order first fit"
2592 *
2593 * The Red-Black Tree properies are:
2594 * 1. Every node is either red or black.
2595 * 2. Every leaf (NIL) is black.
2596 * 3. If a node is red, then both its children are black.
2597 * 4. Every simple path from a node to a descendant leaf
2598 * contains the same number of black nodes.
2599 *
2600 */
2601
2602 struct check_arg_t {
2603 RBTree* tree;
2604 ErtsFreeSegDesc* prev_seg;
2605 Uint size;
2606 RBTNode *res;
2607 };
2608 static void check_node_callback(RBTNode* x, void* arg);
2609
2610
2611 static RBTNode *
check_tree(RBTree * tree,Uint size)2612 check_tree(RBTree* tree, Uint size)
2613 {
2614 struct check_arg_t carg;
2615 carg.tree = tree;
2616 carg.prev_seg = NULL;
2617 carg.size = size;
2618 carg.res = NULL;
2619
2620 #ifdef PRINT_TREE
2621 print_tree(tree->order, tree->root);
2622 #endif
2623
2624 if (!tree->root)
2625 return NULL;
2626
2627 RBT_ASSERT(IS_BLACK(tree->root));
2628 RBT_ASSERT(!parent(tree->root));
2629
2630 rbt_foreach_node(tree, check_node_callback, &carg, 0);
2631
2632 return carg.res;
2633 }
2634
check_node_callback(RBTNode * x,void * arg)2635 static void check_node_callback(RBTNode* x, void* arg)
2636 {
2637 struct check_arg_t* a = (struct check_arg_t*) arg;
2638 ErtsFreeSegDesc* seg;
2639
2640 if (IS_RED(x)) {
2641 RBT_ASSERT(IS_BLACK(x->right));
2642 RBT_ASSERT(IS_BLACK(x->left));
2643 }
2644
2645 RBT_ASSERT(parent(x) || x == a->tree->root);
2646
2647 if (x->left) {
2648 RBT_ASSERT(cmp_nodes(a->tree->order, x->left, x) < 0);
2649 }
2650 if (x->right) {
2651 RBT_ASSERT(cmp_nodes(a->tree->order, x->right, x) > 0);
2652 }
2653
2654 seg = node_to_desc(a->tree->order, x);
2655 RBT_ASSERT(seg->start < seg->end);
2656 if (a->size && (seg->end - seg->start) >= a->size) {
2657 if (!a->res || cmp_nodes(a->tree->order, x, a->res) < 0) {
2658 a->res = x;
2659 }
2660 }
2661 if (a->tree->order == ADDR_ORDER) {
2662 RBT_ASSERT(!a->prev_seg || a->prev_seg->end < seg->start);
2663 a->prev_seg = seg;
2664 }
2665 }
2666
2667 #endif /* HARD_DEBUG */
2668
2669
2670 #ifdef PRINT_TREE
2671 #define INDENT_STEP 2
2672
2673 #include <stdio.h>
2674
2675 static void
print_tree_aux(enum SortOrder order,RBTNode * x,int indent)2676 print_tree_aux(enum SortOrder order, RBTNode *x, int indent)
2677 {
2678 int i;
2679
2680 if (x) {
2681 ErtsFreeSegDesc* desc = node_to_desc(order, x);
2682 print_tree_aux(order, x->right, indent + INDENT_STEP);
2683 for (i = 0; i < indent; i++) {
2684 putc(' ', stderr);
2685 }
2686 fprintf(stderr, "%s: sz=%lx [%p - %p] desc=%p\r\n",
2687 IS_BLACK(x) ? "BLACK" : "RED",
2688 desc->end - desc->start, desc->start, desc->end, desc);
2689 print_tree_aux(order, x->left, indent + INDENT_STEP);
2690 }
2691 }
2692
2693
2694 static void
print_tree(enum SortOrder order,RBTNode * root)2695 print_tree(enum SortOrder order, RBTNode* root)
2696 {
2697 fprintf(stderr, " --- %s ordered tree begin ---\r\n", sort_order_names[order]);
2698 print_tree_aux(order, root, 0);
2699 fprintf(stderr, " --- %s ordered tree end ---\r\n", sort_order_names[order]);
2700 }
2701
2702 #endif /* PRINT_TREE */
2703
2704
2705 #ifdef FREE_SEG_API_SMOKE_TEST
2706
test_it(void)2707 void test_it(void)
2708 {
2709 ErtsFreeSegMap map;
2710 ErtsFreeSegDesc *desc, *under, *over, *d1, *d2;
2711 const int i = 1; /* reverse addr order */
2712
2713 {
2714 init_free_seg_map(&map, SZ_REVERSE_ADDR_ORDER);
2715
2716 insert_free_seg(&map, alloc_desc(), (char*)0x11000, (char*)0x12000);
2717 HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
2718 insert_free_seg(&map, alloc_desc(), (char*)0x13000, (char*)0x14000);
2719 HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
2720 insert_free_seg(&map, alloc_desc(), (char*)0x15000, (char*)0x17000);
2721 HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
2722 insert_free_seg(&map, alloc_desc(), (char*)0x8000, (char*)0x10000);
2723 HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
2724
2725 desc = lookup_free_seg(&map, 0x500);
2726 ERTS_ASSERT(desc->start == (char*)(i?0x13000L:0x11000L));
2727
2728 desc = lookup_free_seg(&map, 0x1500);
2729 ERTS_ASSERT(desc->start == (char*)0x15000);
2730
2731 adjacent_free_seg(&map, (char*)0x6666, (char*)0x7777, &under, &over);
2732 ERTS_ASSERT(!under && !over);
2733
2734 adjacent_free_seg(&map, (char*)0x6666, (char*)0x8000, &under, &over);
2735 ERTS_ASSERT(!under && over->start == (char*)0x8000);
2736
2737 adjacent_free_seg(&map, (char*)0x10000, (char*)0x10500, &under, &over);
2738 ERTS_ASSERT(under->end == (char*)0x10000 && !over);
2739
2740 adjacent_free_seg(&map, (char*)0x10100, (char*)0x10500, &under, &over);
2741 ERTS_ASSERT(!under && !over);
2742
2743 adjacent_free_seg(&map, (char*)0x10100, (char*)0x11000, &under, &over);
2744 ERTS_ASSERT(!under && over && over->start == (char*)0x11000);
2745
2746 adjacent_free_seg(&map, (char*)0x12000, (char*)0x12500, &under, &over);
2747 ERTS_ASSERT(under && under->end == (char*)0x12000 && !over);
2748
2749 adjacent_free_seg(&map, (char*)0x12000, (char*)0x13000, &under, &over);
2750 ERTS_ASSERT(under && under->end == (char*)0x12000 &&
2751 over && over->start == (char*)0x13000);
2752
2753 adjacent_free_seg(&map, (char*)0x12500, (char*)0x13000, &under, &over);
2754 ERTS_ASSERT(!under && over && over->start == (char*)0x13000);
2755
2756 d1 = lookup_free_seg(&map, 0x500);
2757 ERTS_ASSERT(d1->start == (char*)(i?0x13000L:0x11000L));
2758
2759 resize_free_seg(&map, d1, d1->start - 0x800, (char*)d1->end);
2760 HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
2761
2762 d2 = lookup_free_seg(&map, 0x1200);
2763 ERTS_ASSERT(d2 == d1);
2764
2765 delete_free_seg(&map, d1);
2766 HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
2767
2768 d1 = lookup_free_seg(&map, 0x1200);
2769 ERTS_ASSERT(d1->start == (char*)0x15000);
2770 }
2771 }
2772
2773 #endif /* FREE_SEG_API_SMOKE_TEST */
2774
2775
2776 #ifdef HARD_DEBUG_MSEG
2777
2778 /*
2779 * Debug stuff used by erl_mseg to check that it does the right thing.
2780 * The reason for keeping it here is that we (ab)use the rb-tree code
2781 * for keeping track of *allocated* segments.
2782 */
2783
2784 typedef struct ErtsFreeSegDesc_fake_ {
2785 /*RBTNode snode; Save memory by skipping unused size tree node */
2786 RBTNode anode; /* node in 'atree' */
2787 union {
2788 char* start;
2789 struct ErtsFreeSegDesc_fake_* next_free;
2790 }u;
2791 char* end;
2792 }ErtsFreeSegDesc_fake;
2793
2794 static ErtsFreeSegDesc_fake hard_dbg_mseg_desc_pool[10000];
2795 static ErtsFreeSegDesc_fake* hard_dbg_mseg_desc_first;
2796 RBTree hard_dbg_mseg_tree;
2797
2798 static erts_mtx_t hard_dbg_mseg_mtx;
2799
hard_dbg_mseg_init(void)2800 static void hard_dbg_mseg_init(void)
2801 {
2802 ErtsFreeSegDesc_fake* p;
2803
2804 erts_mtx_init(&hard_dbg_mseg_mtx, "hard_dbg_mseg", NIL,
2805 ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_DEBUG);
2806 hard_dbg_mseg_tree.root = NULL;
2807 hard_dbg_mseg_tree.order = ADDR_ORDER;
2808
2809 p = &hard_dbg_mseg_desc_pool[(sizeof(hard_dbg_mseg_desc_pool) /
2810 sizeof(*hard_dbg_mseg_desc_pool)) - 1];
2811 p->u.next_free = NULL;
2812 while (--p >= hard_dbg_mseg_desc_pool) {
2813 p->u.next_free = (p+1);
2814 }
2815 hard_dbg_mseg_desc_first = &hard_dbg_mseg_desc_pool[0];
2816 }
2817
hard_dbg_alloc_desc(void)2818 static ErtsFreeSegDesc* hard_dbg_alloc_desc(void)
2819 {
2820 ErtsFreeSegDesc_fake* p = hard_dbg_mseg_desc_first;
2821 ERTS_ASSERT(p || !"HARD_DEBUG_MSEG: Out of mseg descriptors");
2822 hard_dbg_mseg_desc_first = p->u.next_free;
2823
2824 /* Creative pointer arithmetic to return something that looks like
2825 * a ErtsFreeSegDesc as long as we don't use the absent 'snode'.
2826 */
2827 return (ErtsFreeSegDesc*) ((char*)p - offsetof(ErtsFreeSegDesc,anode));
2828 }
2829
hard_dbg_free_desc(ErtsFreeSegDesc * desc)2830 static void hard_dbg_free_desc(ErtsFreeSegDesc* desc)
2831 {
2832 ErtsFreeSegDesc_fake* p = (ErtsFreeSegDesc_fake*) &desc->anode;
2833 memset(p, 0xfe, sizeof(*p));
2834 p->u.next_free = hard_dbg_mseg_desc_first;
2835 hard_dbg_mseg_desc_first = p;
2836 }
2837
check_seg_writable(void * seg,UWord sz)2838 static void check_seg_writable(void* seg, UWord sz)
2839 {
2840 UWord* seg_end = (UWord*)((char*)seg + sz);
2841 volatile UWord* p;
2842 ERTS_ASSERT(ERTS_IS_PAGEALIGNED(seg));
2843 ERTS_ASSERT(ERTS_IS_PAGEALIGNED(sz));
2844 for (p=(UWord*)seg; p<seg_end; p += (ERTS_INV_PAGEALIGNED_MASK+1)/sizeof(UWord)) {
2845 UWord write_back = *p;
2846 *p = 0xfade2b1acc;
2847 *p = write_back;
2848 }
2849 }
2850
hard_dbg_insert_mseg(void * seg,UWord sz)2851 void hard_dbg_insert_mseg(void* seg, UWord sz)
2852 {
2853 check_seg_writable(seg, sz);
2854 erts_mtx_lock(&hard_dbg_mseg_mtx);
2855 {
2856 ErtsFreeSegDesc *desc = hard_dbg_alloc_desc();
2857 RBTNode *prev, *next;
2858 desc->start = (char*)seg;
2859 desc->end = desc->start + sz - 1; /* -1 to allow adjacent segments in tree */
2860 rbt_insert(&hard_dbg_mseg_tree, &desc->anode);
2861 prev = rbt_prev_node(&desc->anode);
2862 next = rbt_next_node(&desc->anode);
2863 ERTS_ASSERT(!prev || anode_to_desc(prev)->end < desc->start);
2864 ERTS_ASSERT(!next || anode_to_desc(next)->start > desc->end);
2865 }
2866 erts_mtx_unlock(&hard_dbg_mseg_mtx);
2867 }
2868
hard_dbg_lookup_seg_at(RBTree * tree,char * start)2869 static ErtsFreeSegDesc* hard_dbg_lookup_seg_at(RBTree* tree, char* start)
2870 {
2871 RBTNode* x = tree->root;
2872
2873 while (x) {
2874 ErtsFreeSegDesc* desc = anode_to_desc(x);
2875 if (start < desc->start) {
2876 x = x->left;
2877 }
2878 else if (start > desc->start) {
2879 ERTS_ASSERT(start > desc->end);
2880 x = x->right;
2881 }
2882 else
2883 return desc;
2884 }
2885 return NULL;
2886 }
2887
hard_dbg_remove_mseg(void * seg,UWord sz)2888 void hard_dbg_remove_mseg(void* seg, UWord sz)
2889 {
2890 check_seg_writable(seg, sz);
2891 erts_mtx_lock(&hard_dbg_mseg_mtx);
2892 {
2893 ErtsFreeSegDesc* desc = hard_dbg_lookup_seg_at(&hard_dbg_mseg_tree, (char*)seg);
2894 ERTS_ASSERT(desc);
2895 ERTS_ASSERT(desc->start == (char*)seg);
2896 ERTS_ASSERT(desc->end == (char*)seg + sz - 1);
2897
2898 rbt_delete(&hard_dbg_mseg_tree, &desc->anode);
2899 hard_dbg_free_desc(desc);
2900 }
2901 erts_mtx_unlock(&hard_dbg_mseg_mtx);
2902 }
2903
2904 #endif /* HARD_DEBUG_MSEG */
2905