1 /*
2 * %CopyrightBegin%
3 *
4 * Copyright Ericsson AB 2002-2018. All Rights Reserved.
5 *
6 * Licensed under the Apache License, Version 2.0 (the "License");
7 * you may not use this file except in compliance with the License.
8 * You may obtain a copy of the License at
9 *
10 * http://www.apache.org/licenses/LICENSE-2.0
11 *
12 * Unless required by applicable law or agreed to in writing, software
13 * distributed under the License is distributed on an "AS IS" BASIS,
14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15 * See the License for the specific language governing permissions and
16 * limitations under the License.
17 *
18 * %CopyrightEnd%
19 */
20 #ifdef HAVE_CONFIG_H
21 # include "config.h"
22 #endif
23
24 #define ERTS_WANT_MEM_MAPPERS
25 #include "sys.h"
26 #include "erl_process.h"
27 #include "atom.h"
28 #include "erl_mmap.h"
29 #include <stddef.h>
30
31 #if HAVE_ERTS_MMAP
32
33 /* #define ERTS_MMAP_OP_RINGBUF_SZ 100 */
34
35 #if defined(DEBUG) || 0
36 # undef ERTS_MMAP_DEBUG
37 # define ERTS_MMAP_DEBUG
38 # ifndef ERTS_MMAP_OP_RINGBUF_SZ
39 # define ERTS_MMAP_OP_RINGBUF_SZ 100
40 # endif
41 #endif
42
43 #ifndef ERTS_MMAP_OP_RINGBUF_SZ
44 # define ERTS_MMAP_OP_RINGBUF_SZ 0
45 #endif
46
47 /* #define ERTS_MMAP_DEBUG_FILL_AREAS */
48
49 #ifdef ERTS_MMAP_DEBUG
50 # define ERTS_MMAP_ASSERT ERTS_ASSERT
51 #else
52 # define ERTS_MMAP_ASSERT(A) ((void) 1)
53 #endif
54
55 /*
56 * `mm->sa.bot` and `mm->sua.top` are read only after
57 * initialization, but the other pointers are not; i.e., only
58 * ERTS_MMAP_IN_SUPERCARRIER() is allowed without the mutex held.
59 */
60 #define ERTS_MMAP_IN_SUPERCARRIER(PTR) \
61 (((UWord) (PTR)) - ((UWord) mm->sa.bot) \
62 < ((UWord) mm->sua.top) - ((UWord) mm->sa.bot))
63 #define ERTS_MMAP_IN_SUPERALIGNED_AREA(PTR) \
64 (ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&mm->mtx)), \
65 (((UWord) (PTR)) - ((UWord) mm->sa.bot) \
66 < ((UWord) mm->sa.top) - ((UWord) mm->sa.bot)))
67 #define ERTS_MMAP_IN_SUPERUNALIGNED_AREA(PTR) \
68 (ERTS_LC_ASSERT(erts_lc_mtx_is_locked(&mm->mtx)), \
69 (((UWord) (PTR)) - ((UWord) mm->sua.bot) \
70 < ((UWord) mm->sua.top) - ((UWord) mm->sua.bot)))
71
72 UWord erts_page_inv_mask;
73
74 #if defined(DEBUG) || defined(ERTS_MMAP_DEBUG)
75 # undef RBT_DEBUG
76 # define RBT_DEBUG
77 #endif
78 #ifdef RBT_DEBUG
79 # define RBT_ASSERT ERTS_ASSERT
80 # define IF_RBT_DEBUG(C) C
81 #else
82 # define RBT_ASSERT(x)
83 # define IF_RBT_DEBUG(C)
84 #endif
85
86 typedef struct RBTNode_ RBTNode;
87 struct RBTNode_ {
88 UWord parent_and_color; /* color in bit 0 of parent ptr */
89 RBTNode *left;
90 RBTNode *right;
91 };
92
93 #define RED_FLG (1)
94 #define IS_RED(N) ((N) && ((N)->parent_and_color & RED_FLG))
95 #define IS_BLACK(N) (!IS_RED(N))
96 #define SET_RED(N) ((N)->parent_and_color |= RED_FLG)
97 #define SET_BLACK(N) ((N)->parent_and_color &= ~RED_FLG)
98
parent(RBTNode * node)99 static ERTS_INLINE RBTNode* parent(RBTNode* node)
100 {
101 return (RBTNode*) (node->parent_and_color & ~RED_FLG);
102 }
103
set_parent(RBTNode * node,RBTNode * parent)104 static ERTS_INLINE void set_parent(RBTNode* node, RBTNode* parent)
105 {
106 RBT_ASSERT(!((UWord)parent & RED_FLG));
107 node->parent_and_color = ((UWord)parent) | (node->parent_and_color & RED_FLG);
108 }
109
parent_and_color(RBTNode * parent,int color)110 static ERTS_INLINE UWord parent_and_color(RBTNode* parent, int color)
111 {
112 RBT_ASSERT(!((UWord)parent & RED_FLG));
113 RBT_ASSERT(!(color & ~RED_FLG));
114 return ((UWord)parent) | color;
115 }
116
117
118 enum SortOrder {
119 ADDR_ORDER, /* only address order */
120 SA_SZ_ADDR_ORDER, /* first super-aligned size then address order */
121 SZ_REVERSE_ADDR_ORDER /* first size then reverse address order */
122 };
123 #ifdef HARD_DEBUG
124 static const char* sort_order_names[] = {"Address","SuperAlignedSize-Address","Size-RevAddress"};
125 #endif
126
127 typedef struct {
128 RBTNode* root;
129 enum SortOrder order;
130 }RBTree;
131
132 #ifdef HARD_DEBUG
133 # define HARD_CHECK_IS_MEMBER(ROOT,NODE) rbt_assert_is_member(ROOT,NODE)
134 # define HARD_CHECK_TREE(TREE,SZ) check_tree(TREE, SZ)
135 static int rbt_assert_is_member(RBTNode* root, RBTNode* node);
136 static RBTNode* check_tree(RBTree* tree, Uint);
137 #else
138 # define HARD_CHECK_IS_MEMBER(ROOT,NODE)
139 # define HARD_CHECK_TREE(TREE,SZ)
140 #endif
141
142 #if ERTS_MMAP_OP_RINGBUF_SZ
143
144 static int mmap_op_ix;
145
146 typedef enum {
147 ERTS_OP_TYPE_NONE,
148 ERTS_OP_TYPE_MMAP,
149 ERTS_OP_TYPE_MUNMAP,
150 ERTS_OP_TYPE_MREMAP
151 } ErtsMMapOpType;
152
153 typedef struct {
154 ErtsMMapOpType type;
155 void *result;
156 UWord in_size;
157 UWord out_size;
158 void *old_ptr;
159 UWord old_size;
160 } ErtsMMapOp;
161
162 static ErtsMMapOp mmap_ops[ERTS_MMAP_OP_RINGBUF_SZ];
163
164 #define ERTS_MMAP_OP_RINGBUF_INIT() \
165 do { \
166 int ix__; \
167 for (ix__ = 0; ix__ < ERTS_MMAP_OP_RINGBUF_SZ; ix__++) {\
168 mmap_ops[ix__].type = ERTS_OP_TYPE_NONE; \
169 mmap_ops[ix__].result = NULL; \
170 mmap_ops[ix__].in_size = 0; \
171 mmap_ops[ix__].out_size = 0; \
172 mmap_ops[ix__].old_ptr = NULL; \
173 mmap_ops[ix__].old_size = 0; \
174 } \
175 mmap_op_ix = ERTS_MMAP_OP_RINGBUF_SZ-1; \
176 } while (0)
177
178 #define ERTS_MMAP_OP_START(SZ) \
179 do { \
180 int ix__; \
181 if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \
182 mmap_op_ix = 0; \
183 ix__ = mmap_op_ix; \
184 mmap_ops[ix__].type = ERTS_OP_TYPE_MMAP; \
185 mmap_ops[ix__].result = NULL; \
186 mmap_ops[ix__].in_size = (SZ); \
187 mmap_ops[ix__].out_size = 0; \
188 mmap_ops[ix__].old_ptr = NULL; \
189 mmap_ops[ix__].old_size = 0; \
190 } while (0)
191
192 #define ERTS_MMAP_OP_END(PTR, SZ) \
193 do { \
194 int ix__ = mmap_op_ix; \
195 mmap_ops[ix__].result = (PTR); \
196 mmap_ops[ix__].out_size = (SZ); \
197 } while (0)
198
199 #define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ) \
200 do { \
201 erts_mtx_lock(&mm->mtx); \
202 ERTS_MMAP_OP_START((IN_SZ)); \
203 ERTS_MMAP_OP_END((RES), (OUT_SZ)); \
204 erts_mtx_unlock(&mm->mtx); \
205 } while (0)
206
207 #define ERTS_MUNMAP_OP(PTR, SZ) \
208 do { \
209 int ix__; \
210 if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \
211 mmap_op_ix = 0; \
212 ix__ = mmap_op_ix; \
213 mmap_ops[ix__].type = ERTS_OP_TYPE_MUNMAP; \
214 mmap_ops[ix__].result = NULL; \
215 mmap_ops[ix__].in_size = 0; \
216 mmap_ops[ix__].out_size = 0; \
217 mmap_ops[ix__].old_ptr = (PTR); \
218 mmap_ops[ix__].old_size = (SZ); \
219 } while (0)
220
221 #define ERTS_MUNMAP_OP_LCK(PTR, SZ) \
222 do { \
223 erts_mtx_lock(&mm->mtx); \
224 ERTS_MUNMAP_OP((PTR), (SZ)); \
225 erts_mtx_unlock(&mm->mtx); \
226 } while (0)
227
228 #define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ) \
229 do { \
230 int ix__; \
231 if (++mmap_op_ix >= ERTS_MMAP_OP_RINGBUF_SZ) \
232 mmap_op_ix = 0; \
233 ix__ = mmap_op_ix; \
234 mmap_ops[ix__].type = ERTS_OP_TYPE_MREMAP; \
235 mmap_ops[ix__].result = NULL; \
236 mmap_ops[ix__].in_size = (IN_SZ); \
237 mmap_ops[ix__].out_size = (OLD_SZ); \
238 mmap_ops[ix__].old_ptr = (OLD_PTR); \
239 mmap_ops[ix__].old_size = (OLD_SZ); \
240 } while (0)
241
242 #define ERTS_MREMAP_OP_END(PTR, SZ) \
243 do { \
244 int ix__ = mmap_op_ix; \
245 mmap_ops[ix__].result = (PTR); \
246 mmap_ops[mmap_op_ix].out_size = (SZ); \
247 } while (0)
248
249 #define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ) \
250 do { \
251 erts_mtx_lock(&mm->mtx); \
252 ERTS_MREMAP_OP_START((OLD_PTR), (OLD_SZ), (IN_SZ)); \
253 ERTS_MREMAP_OP_END((RES), (OUT_SZ)); \
254 erts_mtx_unlock(&mm->mtx); \
255 } while (0)
256
257 #define ERTS_MMAP_OP_ABORT() \
258 do { \
259 int ix__ = mmap_op_ix; \
260 mmap_ops[ix__].type = ERTS_OP_TYPE_NONE; \
261 mmap_ops[ix__].result = NULL; \
262 mmap_ops[ix__].in_size = 0; \
263 mmap_ops[ix__].out_size = 0; \
264 mmap_ops[ix__].old_ptr = NULL; \
265 mmap_ops[ix__].old_size = 0; \
266 if (--mmap_op_ix < 0) \
267 mmap_op_ix = ERTS_MMAP_OP_RINGBUF_SZ-1; \
268 } while (0)
269
270 #else
271
272 #define ERTS_MMAP_OP_RINGBUF_INIT()
273 #define ERTS_MMAP_OP_START(SZ)
274 #define ERTS_MMAP_OP_END(PTR, SZ)
275 #define ERTS_MMAP_OP_LCK(RES, IN_SZ, OUT_SZ)
276 #define ERTS_MUNMAP_OP(PTR, SZ)
277 #define ERTS_MUNMAP_OP_LCK(PTR, SZ)
278 #define ERTS_MREMAP_OP_START(OLD_PTR, OLD_SZ, IN_SZ)
279 #define ERTS_MREMAP_OP_END(PTR, SZ)
280 #define ERTS_MREMAP_OP_LCK(RES, OLD_PTR, OLD_SZ, IN_SZ, OUT_SZ)
281 #define ERTS_MMAP_OP_ABORT()
282
283 #endif
284
285 typedef struct {
286 RBTNode snode; /* node in 'stree' */
287 RBTNode anode; /* node in 'atree' */
288 char* start;
289 char* end;
290 }ErtsFreeSegDesc;
291
292 typedef struct {
293 RBTree stree; /* size ordered tree */
294 RBTree atree; /* address ordered tree */
295 Uint nseg;
296 }ErtsFreeSegMap;
297
298 struct ErtsMemMapper_ {
299 int (*reserve_physical)(char *, UWord);
300 void (*unreserve_physical)(char *, UWord);
301 int supercarrier;
302 int no_os_mmap;
303 /*
304 * Super unaligned area is located above super aligned
305 * area. That is, `sa.bot` is beginning of the super
306 * carrier, `sua.top` is the end of the super carrier,
307 * and sa.top and sua.bot moves towards eachother.
308 */
309 struct {
310 char *top;
311 char *bot;
312 ErtsFreeSegMap map;
313 } sua;
314 struct {
315 char *top;
316 char *bot;
317 ErtsFreeSegMap map;
318 } sa;
319 #if HAVE_MMAP && (!defined(MAP_ANON) && !defined(MAP_ANONYMOUS))
320 int mmap_fd;
321 #endif
322 erts_mtx_t mtx;
323 struct {
324 char *free_list;
325 char *unused_start;
326 char *unused_end;
327 char *new_area_hint;
328 Uint reserved;
329 } desc;
330 struct {
331 UWord free_seg_descs;
332 struct {
333 UWord curr;
334 UWord max;
335 } free_segs;
336 } no;
337 struct {
338 struct {
339 UWord total;
340 struct {
341 UWord total;
342 UWord sa;
343 UWord sua;
344 } used;
345 } supercarrier;
346 struct {
347 UWord used;
348 } os;
349 } size;
350 };
351
352 ErtsMemMapper erts_dflt_mmapper;
353
354 #if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION)
355 ErtsMemMapper erts_literal_mmapper;
356 char* erts_literals_start;
357 UWord erts_literals_size;
358 #endif
359
360
361 #define ERTS_MMAP_SIZE_SC_SA_INC(SZ) \
362 do { \
363 mm->size.supercarrier.used.total += (SZ); \
364 mm->size.supercarrier.used.sa += (SZ); \
365 ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total \
366 <= mm->size.supercarrier.total); \
367 ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sa \
368 <= mm->size.supercarrier.used.total); \
369 } while (0)
370 #define ERTS_MMAP_SIZE_SC_SA_DEC(SZ) \
371 do { \
372 ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total >= (SZ)); \
373 mm->size.supercarrier.used.total -= (SZ); \
374 ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sa >= (SZ)); \
375 mm->size.supercarrier.used.sa -= (SZ); \
376 } while (0)
377 #define ERTS_MMAP_SIZE_SC_SUA_INC(SZ) \
378 do { \
379 mm->size.supercarrier.used.total += (SZ); \
380 mm->size.supercarrier.used.sua += (SZ); \
381 ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total \
382 <= mm->size.supercarrier.total); \
383 ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sua \
384 <= mm->size.supercarrier.used.total); \
385 } while (0)
386 #define ERTS_MMAP_SIZE_SC_SUA_DEC(SZ) \
387 do { \
388 ERTS_MMAP_ASSERT(mm->size.supercarrier.used.total >= (SZ)); \
389 mm->size.supercarrier.used.total -= (SZ); \
390 ERTS_MMAP_ASSERT(mm->size.supercarrier.used.sua >= (SZ)); \
391 mm->size.supercarrier.used.sua -= (SZ); \
392 } while (0)
393 #define ERTS_MMAP_SIZE_OS_INC(SZ) \
394 do { \
395 ERTS_MMAP_ASSERT(mm->size.os.used + (SZ) >= (SZ)); \
396 mm->size.os.used += (SZ); \
397 } while (0)
398 #define ERTS_MMAP_SIZE_OS_DEC(SZ) \
399 do { \
400 ERTS_MMAP_ASSERT(mm->size.os.used >= (SZ)); \
401 mm->size.os.used -= (SZ); \
402 } while (0)
403
404
405 static void
add_free_desc_area(ErtsMemMapper * mm,char * start,char * end)406 add_free_desc_area(ErtsMemMapper* mm, char *start, char *end)
407 {
408 ERTS_MMAP_ASSERT(end == (void *) 0 || end > start);
409 if (sizeof(ErtsFreeSegDesc) <= ((UWord) end) - ((UWord) start)) {
410 UWord no;
411 ErtsFreeSegDesc *prev_desc, *desc;
412 char *desc_end;
413
414 no = 1;
415 prev_desc = (ErtsFreeSegDesc *) start;
416 prev_desc->start = mm->desc.free_list;
417 desc = (ErtsFreeSegDesc *) (start + sizeof(ErtsFreeSegDesc));
418 desc_end = start + 2*sizeof(ErtsFreeSegDesc);
419
420 while (desc_end <= end) {
421 desc->start = (char *) prev_desc;
422 prev_desc = desc;
423 desc = (ErtsFreeSegDesc *) desc_end;
424 desc_end += sizeof(ErtsFreeSegDesc);
425 no++;
426 }
427 mm->desc.free_list = (char *) prev_desc;
428 mm->no.free_seg_descs += no;
429 }
430 }
431
432 static ErtsFreeSegDesc *
add_unused_free_desc_area(ErtsMemMapper * mm)433 add_unused_free_desc_area(ErtsMemMapper* mm)
434 {
435 char *ptr;
436 if (!mm->desc.unused_start)
437 return NULL;
438
439 ERTS_MMAP_ASSERT(mm->desc.unused_end);
440 ERTS_MMAP_ASSERT(ERTS_PAGEALIGNED_SIZE
441 <= mm->desc.unused_end - mm->desc.unused_start);
442
443 ptr = mm->desc.unused_start + ERTS_PAGEALIGNED_SIZE;
444 add_free_desc_area(mm, mm->desc.unused_start, ptr);
445
446 if ((mm->desc.unused_end - ptr) >= ERTS_PAGEALIGNED_SIZE)
447 mm->desc.unused_start = ptr;
448 else
449 mm->desc.unused_end = mm->desc.unused_start = NULL;
450
451 ERTS_MMAP_ASSERT(mm->desc.free_list);
452 return (ErtsFreeSegDesc *) mm->desc.free_list;
453 }
454
455 static ERTS_INLINE ErtsFreeSegDesc *
alloc_desc(ErtsMemMapper * mm)456 alloc_desc(ErtsMemMapper* mm)
457 {
458 ErtsFreeSegDesc *res;
459 res = (ErtsFreeSegDesc *) mm->desc.free_list;
460 if (!res) {
461 res = add_unused_free_desc_area(mm);
462 if (!res)
463 return NULL;
464 }
465 mm->desc.free_list = res->start;
466 ASSERT(mm->no.free_segs.curr < mm->no.free_seg_descs);
467 mm->no.free_segs.curr++;
468 if (mm->no.free_segs.max < mm->no.free_segs.curr)
469 mm->no.free_segs.max = mm->no.free_segs.curr;
470 return res;
471 }
472
473 static ERTS_INLINE void
free_desc(ErtsMemMapper * mm,ErtsFreeSegDesc * desc)474 free_desc(ErtsMemMapper* mm, ErtsFreeSegDesc *desc)
475 {
476 desc->start = mm->desc.free_list;
477 mm->desc.free_list = (char *) desc;
478 ERTS_MMAP_ASSERT(mm->no.free_segs.curr > 0);
479 mm->no.free_segs.curr--;
480 }
481
anode_to_desc(RBTNode * anode)482 static ERTS_INLINE ErtsFreeSegDesc* anode_to_desc(RBTNode* anode)
483 {
484 return (ErtsFreeSegDesc*) ((char*)anode - offsetof(ErtsFreeSegDesc, anode));
485 }
486
snode_to_desc(RBTNode * snode)487 static ERTS_INLINE ErtsFreeSegDesc* snode_to_desc(RBTNode* snode)
488 {
489 return (ErtsFreeSegDesc*) ((char*)snode - offsetof(ErtsFreeSegDesc, snode));
490 }
491
node_to_desc(enum SortOrder order,RBTNode * node)492 static ERTS_INLINE ErtsFreeSegDesc* node_to_desc(enum SortOrder order, RBTNode* node)
493 {
494 return order==ADDR_ORDER ? anode_to_desc(node) : snode_to_desc(node);
495 }
496
usable_size(enum SortOrder order,ErtsFreeSegDesc * desc)497 static ERTS_INLINE SWord usable_size(enum SortOrder order,
498 ErtsFreeSegDesc* desc)
499 {
500 return ((order == SA_SZ_ADDR_ORDER) ?
501 ERTS_SUPERALIGNED_FLOOR(desc->end) - ERTS_SUPERALIGNED_CEILING(desc->start)
502 : desc->end - desc->start);
503 }
504
505 #ifdef HARD_DEBUG
cmp_nodes(enum SortOrder order,RBTNode * lhs,RBTNode * rhs)506 static ERTS_INLINE SWord cmp_nodes(enum SortOrder order,
507 RBTNode* lhs, RBTNode* rhs)
508 {
509 ErtsFreeSegDesc* ldesc = node_to_desc(order, lhs);
510 ErtsFreeSegDesc* rdesc = node_to_desc(order, rhs);
511 RBT_ASSERT(lhs != rhs);
512 if (order != ADDR_ORDER) {
513 SWord diff = usable_size(order, ldesc) - usable_size(order, rdesc);
514 if (diff) return diff;
515 }
516 if (order != SZ_REVERSE_ADDR_ORDER) {
517 return (char*)ldesc->start - (char*)rdesc->start;
518 }
519 else {
520 return (char*)rdesc->start - (char*)ldesc->start;
521 }
522 }
523 #endif /* HARD_DEBUG */
524
cmp_with_node(enum SortOrder order,SWord sz,char * addr,RBTNode * rhs)525 static ERTS_INLINE SWord cmp_with_node(enum SortOrder order,
526 SWord sz, char* addr, RBTNode* rhs)
527 {
528 ErtsFreeSegDesc* rdesc;
529 if (order != ADDR_ORDER) {
530 SWord diff;
531 rdesc = snode_to_desc(rhs);
532 diff = sz - usable_size(order, rdesc);
533 if (diff) return diff;
534 }
535 else
536 rdesc = anode_to_desc(rhs);
537
538 if (order != SZ_REVERSE_ADDR_ORDER)
539 return addr - (char*)rdesc->start;
540 else
541 return (char*)rdesc->start - addr;
542 }
543
544
545 static ERTS_INLINE void
left_rotate(RBTNode ** root,RBTNode * x)546 left_rotate(RBTNode **root, RBTNode *x)
547 {
548 RBTNode *y = x->right;
549 x->right = y->left;
550 if (y->left)
551 set_parent(y->left, x);
552 set_parent(y, parent(x));
553 if (!parent(y)) {
554 RBT_ASSERT(*root == x);
555 *root = y;
556 }
557 else if (x == parent(x)->left)
558 parent(x)->left = y;
559 else {
560 RBT_ASSERT(x == parent(x)->right);
561 parent(x)->right = y;
562 }
563 y->left = x;
564 set_parent(x, y);
565 }
566
567 static ERTS_INLINE void
right_rotate(RBTNode ** root,RBTNode * x)568 right_rotate(RBTNode **root, RBTNode *x)
569 {
570 RBTNode *y = x->left;
571 x->left = y->right;
572 if (y->right)
573 set_parent(y->right, x);
574 set_parent(y, parent(x));
575 if (!parent(y)) {
576 RBT_ASSERT(*root == x);
577 *root = y;
578 }
579 else if (x == parent(x)->right)
580 parent(x)->right = y;
581 else {
582 RBT_ASSERT(x == parent(x)->left);
583 parent(x)->left = y;
584 }
585 y->right = x;
586 set_parent(x, y);
587 }
588
589 /*
590 * Replace node x with node y
591 * NOTE: segment descriptor of y is not changed
592 */
593 static ERTS_INLINE void
replace(RBTNode ** root,RBTNode * x,RBTNode * y)594 replace(RBTNode **root, RBTNode *x, RBTNode *y)
595 {
596
597 if (!parent(x)) {
598 RBT_ASSERT(*root == x);
599 *root = y;
600 }
601 else if (x == parent(x)->left)
602 parent(x)->left = y;
603 else {
604 RBT_ASSERT(x == parent(x)->right);
605 parent(x)->right = y;
606 }
607 if (x->left) {
608 RBT_ASSERT(parent(x->left) == x);
609 set_parent(x->left, y);
610 }
611 if (x->right) {
612 RBT_ASSERT(parent(x->right) == x);
613 set_parent(x->right, y);
614 }
615
616 y->parent_and_color = x->parent_and_color;
617 y->right = x->right;
618 y->left = x->left;
619 }
620
621 static void
tree_insert_fixup(RBTNode ** root,RBTNode * node)622 tree_insert_fixup(RBTNode** root, RBTNode *node)
623 {
624 RBTNode *x = node, *y, *papa_x, *granpa_x;
625
626 /*
627 * Rearrange the tree so that it satisfies the Red-Black Tree properties
628 */
629
630 papa_x = parent(x);
631 RBT_ASSERT(x != *root && IS_RED(papa_x));
632 do {
633
634 /*
635 * x and its parent are both red. Move the red pair up the tree
636 * until we get to the root or until we can separate them.
637 */
638
639 granpa_x = parent(papa_x);
640 RBT_ASSERT(IS_RED(x));
641 RBT_ASSERT(IS_BLACK(granpa_x));
642 RBT_ASSERT(granpa_x);
643
644 if (papa_x == granpa_x->left) {
645 y = granpa_x->right;
646 if (IS_RED(y)) {
647 SET_BLACK(y);
648 SET_BLACK(papa_x);
649 SET_RED(granpa_x);
650 x = granpa_x;
651 }
652 else {
653
654 if (x == papa_x->right) {
655 left_rotate(root, papa_x);
656 papa_x = x;
657 x = papa_x->left;
658 }
659
660 RBT_ASSERT(x == granpa_x->left->left);
661 RBT_ASSERT(IS_RED(x));
662 RBT_ASSERT(IS_RED(papa_x));
663 RBT_ASSERT(IS_BLACK(granpa_x));
664 RBT_ASSERT(IS_BLACK(y));
665
666 SET_BLACK(papa_x);
667 SET_RED(granpa_x);
668 right_rotate(root, granpa_x);
669
670 RBT_ASSERT(x == parent(x)->left);
671 RBT_ASSERT(IS_RED(x));
672 RBT_ASSERT(IS_RED(parent(x)->right));
673 RBT_ASSERT(IS_BLACK(parent(x)));
674 break;
675 }
676 }
677 else {
678 RBT_ASSERT(papa_x == granpa_x->right);
679 y = granpa_x->left;
680 if (IS_RED(y)) {
681 SET_BLACK(y);
682 SET_BLACK(papa_x);
683 SET_RED(granpa_x);
684 x = granpa_x;
685 }
686 else {
687
688 if (x == papa_x->left) {
689 right_rotate(root, papa_x);
690 papa_x = x;
691 x = papa_x->right;
692 }
693
694 RBT_ASSERT(x == granpa_x->right->right);
695 RBT_ASSERT(IS_RED(x));
696 RBT_ASSERT(IS_RED(papa_x));
697 RBT_ASSERT(IS_BLACK(granpa_x));
698 RBT_ASSERT(IS_BLACK(y));
699
700 SET_BLACK(papa_x);
701 SET_RED(granpa_x);
702 left_rotate(root, granpa_x);
703
704 RBT_ASSERT(x == parent(x)->right);
705 RBT_ASSERT(IS_RED(x));
706 RBT_ASSERT(IS_RED(parent(x)->left));
707 RBT_ASSERT(IS_BLACK(parent(x)));
708 break;
709 }
710 }
711 } while (x != *root && (papa_x=parent(x), IS_RED(papa_x)));
712
713 SET_BLACK(*root);
714 }
715
716 static void
rbt_delete(RBTree * tree,RBTNode * del)717 rbt_delete(RBTree* tree, RBTNode* del)
718 {
719 Uint spliced_is_black;
720 RBTNode *x, *y, *z = del, *papa_y;
721 RBTNode null_x; /* null_x is used to get the fixup started when we
722 splice out a node without children. */
723
724 HARD_CHECK_IS_MEMBER(tree->root, del);
725 HARD_CHECK_TREE(tree, 0);
726
727 null_x.parent_and_color = parent_and_color(NULL, !RED_FLG);
728
729 /* Remove node from tree... */
730
731 /* Find node to splice out */
732 if (!z->left || !z->right)
733 y = z;
734 else
735 /* Set y to z:s successor */
736 for(y = z->right; y->left; y = y->left);
737 /* splice out y */
738 x = y->left ? y->left : y->right;
739 spliced_is_black = IS_BLACK(y);
740 papa_y = parent(y);
741 if (x) {
742 set_parent(x, papa_y);
743 }
744 else if (spliced_is_black) {
745 x = &null_x;
746 x->right = x->left = NULL;
747 x->parent_and_color = parent_and_color(papa_y, !RED_FLG);
748 y->left = x;
749 }
750
751 if (!papa_y) {
752 RBT_ASSERT(tree->root == y);
753 tree->root = x;
754 }
755 else {
756 if (y == papa_y->left) {
757 papa_y->left = x;
758 }
759 else {
760 RBT_ASSERT(y == papa_y->right);
761 papa_y->right = x;
762 }
763 }
764 if (y != z) {
765 /* We spliced out the successor of z; replace z by the successor */
766 RBT_ASSERT(z != &null_x);
767 replace(&tree->root, z, y);
768 }
769
770 if (spliced_is_black) {
771 RBTNode* papa_x;
772 /* We removed a black node which makes the resulting tree
773 violate the Red-Black Tree properties. Fixup tree... */
774
775 papa_x = parent(x);
776 while (IS_BLACK(x) && papa_x) {
777
778 /*
779 * x has an "extra black" which we move up the tree
780 * until we reach the root or until we can get rid of it.
781 *
782 * y is the sibbling of x
783 */
784
785 if (x == papa_x->left) {
786 y = papa_x->right;
787 RBT_ASSERT(y);
788 if (IS_RED(y)) {
789 RBT_ASSERT(y->right);
790 RBT_ASSERT(y->left);
791 SET_BLACK(y);
792 RBT_ASSERT(IS_BLACK(papa_x));
793 SET_RED(papa_x);
794 left_rotate(&tree->root, papa_x);
795 RBT_ASSERT(papa_x == parent(x));
796 y = papa_x->right;
797 }
798 RBT_ASSERT(y);
799 RBT_ASSERT(IS_BLACK(y));
800 if (IS_BLACK(y->left) && IS_BLACK(y->right)) {
801 SET_RED(y);
802 }
803 else {
804 if (IS_BLACK(y->right)) {
805 SET_BLACK(y->left);
806 SET_RED(y);
807 right_rotate(&tree->root, y);
808 RBT_ASSERT(papa_x == parent(x));
809 y = papa_x->right;
810 }
811 RBT_ASSERT(y);
812 if (IS_RED(papa_x)) {
813
814 SET_BLACK(papa_x);
815 SET_RED(y);
816 }
817 RBT_ASSERT(y->right);
818 SET_BLACK(y->right);
819 left_rotate(&tree->root, papa_x);
820 x = tree->root;
821 break;
822 }
823 }
824 else {
825 RBT_ASSERT(x == papa_x->right);
826 y = papa_x->left;
827 RBT_ASSERT(y);
828 if (IS_RED(y)) {
829 RBT_ASSERT(y->right);
830 RBT_ASSERT(y->left);
831 SET_BLACK(y);
832 RBT_ASSERT(IS_BLACK(papa_x));
833 SET_RED(papa_x);
834 right_rotate(&tree->root, papa_x);
835 RBT_ASSERT(papa_x == parent(x));
836 y = papa_x->left;
837 }
838 RBT_ASSERT(y);
839 RBT_ASSERT(IS_BLACK(y));
840 if (IS_BLACK(y->right) && IS_BLACK(y->left)) {
841 SET_RED(y);
842 }
843 else {
844 if (IS_BLACK(y->left)) {
845 SET_BLACK(y->right);
846 SET_RED(y);
847 left_rotate(&tree->root, y);
848 RBT_ASSERT(papa_x == parent(x));
849 y = papa_x->left;
850 }
851 RBT_ASSERT(y);
852 if (IS_RED(papa_x)) {
853 SET_BLACK(papa_x);
854 SET_RED(y);
855 }
856 RBT_ASSERT(y->left);
857 SET_BLACK(y->left);
858 right_rotate(&tree->root, papa_x);
859 x = tree->root;
860 break;
861 }
862 }
863 x = papa_x;
864 papa_x = parent(x);
865 }
866 SET_BLACK(x);
867
868 papa_x = parent(&null_x);
869 if (papa_x) {
870 if (papa_x->left == &null_x)
871 papa_x->left = NULL;
872 else {
873 RBT_ASSERT(papa_x->right == &null_x);
874 papa_x->right = NULL;
875 }
876 RBT_ASSERT(!null_x.left);
877 RBT_ASSERT(!null_x.right);
878 }
879 else if (tree->root == &null_x) {
880 tree->root = NULL;
881 RBT_ASSERT(!null_x.left);
882 RBT_ASSERT(!null_x.right);
883 }
884 }
885 HARD_CHECK_TREE(tree, 0);
886 }
887
888
889 static void
rbt_insert(RBTree * tree,RBTNode * node)890 rbt_insert(RBTree* tree, RBTNode* node)
891 {
892 #ifdef RBT_DEBUG
893 ErtsFreeSegDesc *dbg_under=NULL, *dbg_over=NULL;
894 #endif
895 ErtsFreeSegDesc* desc = node_to_desc(tree->order, node);
896 char* seg_addr = desc->start;
897 SWord seg_sz = desc->end - desc->start;
898
899 HARD_CHECK_TREE(tree, 0);
900
901 node->left = NULL;
902 node->right = NULL;
903
904 if (!tree->root) {
905 node->parent_and_color = parent_and_color(NULL, !RED_FLG);
906 tree->root = node;
907 }
908 else {
909 RBTNode *x = tree->root;
910 while (1) {
911 SWord diff = cmp_with_node(tree->order, seg_sz, seg_addr, x);
912 if (diff < 0) {
913 IF_RBT_DEBUG(dbg_over = node_to_desc(tree->order, x));
914 if (!x->left) {
915 node->parent_and_color = parent_and_color(x, RED_FLG);
916 x->left = node;
917 break;
918 }
919 x = x->left;
920 }
921 else {
922 RBT_ASSERT(diff > 0);
923 IF_RBT_DEBUG(dbg_under = node_to_desc(tree->order, x));
924 if (!x->right) {
925 node->parent_and_color = parent_and_color(x, RED_FLG);
926 x->right = node;
927 break;
928 }
929 x = x->right;
930 }
931 }
932
933 RBT_ASSERT(parent(node));
934 #ifdef RBT_DEBUG
935 if (tree->order == ADDR_ORDER) {
936 RBT_ASSERT(!dbg_under || dbg_under->end < desc->start);
937 RBT_ASSERT(!dbg_over || dbg_over->start > desc->end);
938 }
939 #endif
940 RBT_ASSERT(IS_RED(node));
941 if (IS_RED(parent(node)))
942 tree_insert_fixup(&tree->root, node);
943 }
944 HARD_CHECK_TREE(tree, 0);
945 }
946
947 /*
948 * Traverse tree in (reverse) sorting order
949 */
950 static void
rbt_foreach_node(RBTree * tree,void (* fn)(RBTNode *,void *),void * arg,int reverse)951 rbt_foreach_node(RBTree* tree,
952 void (*fn)(RBTNode*,void*),
953 void* arg, int reverse)
954 {
955 #ifdef HARD_DEBUG
956 Sint blacks = -1;
957 Sint curr_blacks = 1;
958 Uint depth = 1;
959 Uint max_depth = 0;
960 Uint node_cnt = 0;
961 #endif
962 enum { RECURSE_LEFT, DO_NODE, RECURSE_RIGHT, RETURN_TO_PARENT }state;
963 RBTNode *x = tree->root;
964
965 RBT_ASSERT(!x || !parent(x));
966
967 state = reverse ? RECURSE_RIGHT : RECURSE_LEFT;
968 while (x) {
969 switch (state) {
970 case RECURSE_LEFT:
971 if (x->left) {
972 RBT_ASSERT(parent(x->left) == x);
973 #ifdef HARD_DEBUG
974 ++depth;
975 if (IS_BLACK(x->left))
976 curr_blacks++;
977 #endif
978 x = x->left;
979 state = reverse ? RECURSE_RIGHT : RECURSE_LEFT;
980 }
981 else {
982 #ifdef HARD_DEBUG
983 if (blacks < 0)
984 blacks = curr_blacks;
985 RBT_ASSERT(blacks == curr_blacks);
986 #endif
987 state = reverse ? RETURN_TO_PARENT : DO_NODE;
988 }
989 break;
990
991 case DO_NODE:
992 #ifdef HARD_DEBUG
993 ++node_cnt;
994 if (depth > max_depth)
995 max_depth = depth;
996 #endif
997 (*fn) (x, arg); /* Do it! */
998 state = reverse ? RECURSE_LEFT : RECURSE_RIGHT;
999 break;
1000
1001 case RECURSE_RIGHT:
1002 if (x->right) {
1003 RBT_ASSERT(parent(x->right) == x);
1004 #ifdef HARD_DEBUG
1005 ++depth;
1006 if (IS_BLACK(x->right))
1007 curr_blacks++;
1008 #endif
1009 x = x->right;
1010 state = reverse ? RECURSE_RIGHT : RECURSE_LEFT;
1011 }
1012 else {
1013 #ifdef HARD_DEBUG
1014 if (blacks < 0)
1015 blacks = curr_blacks;
1016 RBT_ASSERT(blacks == curr_blacks);
1017 #endif
1018 state = reverse ? DO_NODE : RETURN_TO_PARENT;
1019 }
1020 break;
1021
1022 case RETURN_TO_PARENT:
1023 #ifdef HARD_DEBUG
1024 if (IS_BLACK(x))
1025 curr_blacks--;
1026 --depth;
1027 #endif
1028 if (parent(x)) {
1029 if (x == parent(x)->left) {
1030 state = reverse ? RETURN_TO_PARENT : DO_NODE;
1031 }
1032 else {
1033 RBT_ASSERT(x == parent(x)->right);
1034 state = reverse ? DO_NODE : RETURN_TO_PARENT;
1035 }
1036 }
1037 x = parent(x);
1038 break;
1039 }
1040 }
1041 #ifdef HARD_DEBUG
1042 RBT_ASSERT(depth == 0 || (!tree->root && depth==1));
1043 RBT_ASSERT(curr_blacks == 0);
1044 RBT_ASSERT((1 << (max_depth/2)) <= node_cnt);
1045 #endif
1046 }
1047
1048 #if defined(RBT_DEBUG) || defined(HARD_DEBUG_MSEG)
rbt_prev_node(RBTNode * node)1049 static RBTNode* rbt_prev_node(RBTNode* node)
1050 {
1051 RBTNode* x;
1052 if (node->left) {
1053 for (x=node->left; x->right; x=x->right)
1054 ;
1055 return x;
1056 }
1057 for (x=node; parent(x); x=parent(x)) {
1058 if (parent(x)->right == x)
1059 return parent(x);
1060 }
1061 return NULL;
1062 }
rbt_next_node(RBTNode * node)1063 static RBTNode* rbt_next_node(RBTNode* node)
1064 {
1065 RBTNode* x;
1066 if (node->right) {
1067 for (x=node->right; x->left; x=x->left)
1068 ;
1069 return x;
1070 }
1071 for (x=node; parent(x); x=parent(x)) {
1072 if (parent(x)->left == x)
1073 return parent(x);
1074 }
1075 return NULL;
1076 }
1077 #endif /* RBT_DEBUG || HARD_DEBUG_MSEG */
1078
1079
1080 /* The API to keep track of a bunch of separated (free) segments
1081 (non-overlapping and non-adjacent).
1082 */
1083 static void init_free_seg_map(ErtsFreeSegMap*, enum SortOrder);
1084 static void adjacent_free_seg(ErtsFreeSegMap*, char* start, char* end,
1085 ErtsFreeSegDesc** under, ErtsFreeSegDesc** over);
1086 static void insert_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*, char* start, char* end);
1087 static void resize_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*, char* start, char* end);
1088 static void delete_free_seg(ErtsFreeSegMap*, ErtsFreeSegDesc*);
1089 static ErtsFreeSegDesc* lookup_free_seg(ErtsFreeSegMap*, SWord sz);
1090
1091
init_free_seg_map(ErtsFreeSegMap * map,enum SortOrder order)1092 static void init_free_seg_map(ErtsFreeSegMap* map, enum SortOrder order)
1093 {
1094 map->atree.root = NULL;
1095 map->atree.order = ADDR_ORDER;
1096 map->stree.root = NULL;
1097 map->stree.order = order;
1098 map->nseg = 0;
1099 }
1100
1101 /* Lookup directly adjacent free segments to the given area [start->end].
1102 * The given area must not contain any free segments.
1103 */
adjacent_free_seg(ErtsFreeSegMap * map,char * start,char * end,ErtsFreeSegDesc ** under,ErtsFreeSegDesc ** over)1104 static void adjacent_free_seg(ErtsFreeSegMap* map, char* start, char* end,
1105 ErtsFreeSegDesc** under, ErtsFreeSegDesc** over)
1106 {
1107 RBTNode* x = map->atree.root;
1108
1109 *under = NULL;
1110 *over = NULL;
1111 while (x) {
1112 if (start < anode_to_desc(x)->start) {
1113 RBT_ASSERT(end <= anode_to_desc(x)->start);
1114 if (end == anode_to_desc(x)->start) {
1115 RBT_ASSERT(!*over);
1116 *over = anode_to_desc(x);
1117 }
1118 x = x->left;
1119 }
1120 else {
1121 RBT_ASSERT(start >= anode_to_desc(x)->end);
1122 if (start == anode_to_desc(x)->end) {
1123 RBT_ASSERT(!*under);
1124 *under = anode_to_desc(x);
1125 }
1126 x = x->right;
1127 }
1128 }
1129 }
1130
1131 /* Initialize 'desc' and insert as new free segment [start->end].
1132 * The new segment must not contain or be adjacent to any free segment in 'map'.
1133 */
insert_free_seg(ErtsFreeSegMap * map,ErtsFreeSegDesc * desc,char * start,char * end)1134 static void insert_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc,
1135 char* start, char* end)
1136 {
1137 desc->start = start;
1138 desc->end = end;
1139 rbt_insert(&map->atree, &desc->anode);
1140 rbt_insert(&map->stree, &desc->snode);
1141 map->nseg++;
1142 }
1143
1144 /* Resize existing free segment 'desc' to [start->end].
1145 * The new segment location must overlap the old location and
1146 * it must not contain or be adjacent to any other free segment in 'map'.
1147 */
resize_free_seg(ErtsFreeSegMap * map,ErtsFreeSegDesc * desc,char * start,char * end)1148 static void resize_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc,
1149 char* start, char* end)
1150 {
1151 #ifdef RBT_DEBUG
1152 RBTNode* prev = rbt_prev_node(&desc->anode);
1153 RBTNode* next = rbt_next_node(&desc->anode);
1154 RBT_ASSERT(!prev || anode_to_desc(prev)->end < start);
1155 RBT_ASSERT(!next || anode_to_desc(next)->start > end);
1156 #endif
1157 rbt_delete(&map->stree, &desc->snode);
1158 desc->start = start;
1159 desc->end = end;
1160 rbt_insert(&map->stree, &desc->snode);
1161 }
1162
1163 /* Delete existing free segment 'desc' from 'map'.
1164 */
delete_free_seg(ErtsFreeSegMap * map,ErtsFreeSegDesc * desc)1165 static void delete_free_seg(ErtsFreeSegMap* map, ErtsFreeSegDesc* desc)
1166 {
1167 rbt_delete(&map->atree, &desc->anode);
1168 rbt_delete(&map->stree, &desc->snode);
1169 map->nseg--;
1170 }
1171
1172 /* Lookup a free segment in 'map' with a size of at least 'need_sz' usable bytes.
1173 */
lookup_free_seg(ErtsFreeSegMap * map,SWord need_sz)1174 static ErtsFreeSegDesc* lookup_free_seg(ErtsFreeSegMap* map, SWord need_sz)
1175 {
1176 RBTNode* x = map->stree.root;
1177 ErtsFreeSegDesc* best_desc = NULL;
1178 const enum SortOrder order = map->stree.order;
1179
1180 while (x) {
1181 ErtsFreeSegDesc* desc = snode_to_desc(x);
1182 SWord seg_sz = usable_size(order, desc);
1183
1184 if (seg_sz < need_sz) {
1185 x = x->right;
1186 }
1187 else {
1188 best_desc = desc;
1189 x = x->left;
1190 }
1191 }
1192 return best_desc;
1193 }
1194
1195 struct build_arg_t
1196 {
1197 Process* p;
1198 Eterm* hp;
1199 Eterm acc;
1200 };
1201
build_free_seg_tuple(RBTNode * node,void * arg)1202 static void build_free_seg_tuple(RBTNode* node, void* arg)
1203 {
1204 struct build_arg_t* a = (struct build_arg_t*)arg;
1205 ErtsFreeSegDesc* desc = anode_to_desc(node);
1206 Eterm start= erts_bld_uword(&a->hp, NULL, (UWord)desc->start);
1207 Eterm end = erts_bld_uword(&a->hp, NULL, (UWord)desc->end);
1208 Eterm tpl = TUPLE2(a->hp, start, end);
1209
1210 a->hp += 3;
1211 a->acc = CONS(a->hp, tpl, a->acc);
1212 a->hp += 2;
1213 }
1214
1215 static
build_free_seg_list(Process * p,ErtsFreeSegMap * map)1216 Eterm build_free_seg_list(Process* p, ErtsFreeSegMap* map)
1217 {
1218 struct build_arg_t barg;
1219 Eterm* hp_end;
1220 const Uint may_need = map->nseg * (2 + 3 + 2*2); /* cons + tuple + bigs */
1221
1222 barg.p = p;
1223 barg.hp = HAlloc(p, may_need);
1224 hp_end = barg.hp + may_need;
1225 barg.acc = NIL;
1226 rbt_foreach_node(&map->atree, build_free_seg_tuple, &barg, 1);
1227
1228 RBT_ASSERT(barg.hp <= hp_end);
1229 HRelease(p, hp_end, barg.hp);
1230 return barg.acc;
1231 }
1232
1233 #if ERTS_HAVE_OS_MMAP
1234 /* Implementation of os_mmap()/os_munmap()/os_mremap()... */
1235
1236 #if HAVE_MMAP
1237 # define ERTS_MMAP_PROT (PROT_READ|PROT_WRITE)
1238 # if defined(MAP_ANONYMOUS)
1239 # define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE)
1240 # define ERTS_MMAP_FD (-1)
1241 # elif defined(MAP_ANON)
1242 # define ERTS_MMAP_FLAGS (MAP_ANON|MAP_PRIVATE)
1243 # define ERTS_MMAP_FD (-1)
1244 # else
1245 # define ERTS_MMAP_FLAGS (MAP_PRIVATE)
1246 # define ERTS_MMAP_FD mm->mmap_fd
1247 # endif
1248 #endif
1249
1250 static ERTS_INLINE void *
os_mmap(void * hint_ptr,UWord size,int try_superalign)1251 os_mmap(void *hint_ptr, UWord size, int try_superalign)
1252 {
1253 #if HAVE_MMAP
1254 void *res;
1255 #ifdef MAP_ALIGN
1256 if (try_superalign)
1257 res = mmap((void *) ERTS_SUPERALIGNED_SIZE, size, ERTS_MMAP_PROT,
1258 ERTS_MMAP_FLAGS|MAP_ALIGN, ERTS_MMAP_FD, 0);
1259 else
1260 #endif
1261 res = mmap((void *) hint_ptr, size, ERTS_MMAP_PROT,
1262 ERTS_MMAP_FLAGS, ERTS_MMAP_FD, 0);
1263 if (res == MAP_FAILED)
1264 return NULL;
1265 return res;
1266 #elif HAVE_VIRTUALALLOC
1267 return (void *) VirtualAlloc(NULL, (SIZE_T) size,
1268 MEM_COMMIT|MEM_RESERVE, PAGE_READWRITE);
1269 #else
1270 # error "missing mmap() or similar"
1271 #endif
1272 }
1273
1274 static ERTS_INLINE void
os_munmap(void * ptr,UWord size)1275 os_munmap(void *ptr, UWord size)
1276 {
1277 #if HAVE_MMAP
1278 #ifdef ERTS_MMAP_DEBUG
1279 int res =
1280 #endif
1281 munmap(ptr, size);
1282 ERTS_MMAP_ASSERT(res == 0);
1283 #elif HAVE_VIRTUALALLOC
1284 #ifdef DEBUG
1285 BOOL res =
1286 #endif
1287 VirtualFree((LPVOID) ptr, (SIZE_T) 0, MEM_RELEASE);
1288 ERTS_MMAP_ASSERT(res != 0);
1289 #else
1290 # error "missing munmap() or similar"
1291 #endif
1292 }
1293
1294 #ifdef ERTS_HAVE_OS_MREMAP
1295 # if HAVE_MREMAP
1296 # if defined(__NetBSD__)
1297 # define ERTS_MREMAP_FLAGS (0)
1298 # else
1299 # define ERTS_MREMAP_FLAGS (MREMAP_MAYMOVE)
1300 # endif
1301 # endif
1302 static ERTS_INLINE void *
os_mremap(void * ptr,UWord old_size,UWord new_size,int try_superalign)1303 os_mremap(void *ptr, UWord old_size, UWord new_size, int try_superalign)
1304 {
1305 void *new_seg;
1306 #if HAVE_MREMAP
1307 new_seg = mremap(ptr, (size_t) old_size,
1308 # if defined(__NetBSD__)
1309 NULL,
1310 # endif
1311 (size_t) new_size, ERTS_MREMAP_FLAGS);
1312 if (new_seg == (void *) MAP_FAILED)
1313 return NULL;
1314 return new_seg;
1315 #else
1316 # error "missing mremap() or similar"
1317 #endif
1318 }
1319 #endif
1320
1321 #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
1322 #if HAVE_MMAP
1323
1324 #define ERTS_MMAP_RESERVE_PROT (ERTS_MMAP_PROT)
1325 #define ERTS_MMAP_RESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_FIXED)
1326 #define ERTS_MMAP_UNRESERVE_PROT (PROT_NONE)
1327 #if defined(__FreeBSD__)
1328 #define ERTS_MMAP_UNRESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_FIXED)
1329 #else
1330 #define ERTS_MMAP_UNRESERVE_FLAGS (ERTS_MMAP_FLAGS|MAP_NORESERVE|MAP_FIXED)
1331 #endif /* __FreeBSD__ */
1332 #define ERTS_MMAP_VIRTUAL_PROT (PROT_NONE)
1333 #if defined(__FreeBSD__)
1334 #define ERTS_MMAP_VIRTUAL_FLAGS (ERTS_MMAP_FLAGS)
1335 #else
1336 #define ERTS_MMAP_VIRTUAL_FLAGS (ERTS_MMAP_FLAGS|MAP_NORESERVE)
1337 #endif /* __FreeBSD__ */
1338
1339 static int
os_reserve_physical(char * ptr,UWord size)1340 os_reserve_physical(char *ptr, UWord size)
1341 {
1342 void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_RESERVE_PROT,
1343 ERTS_MMAP_RESERVE_FLAGS, ERTS_MMAP_FD, 0);
1344 if (res == (void *) MAP_FAILED)
1345 return 0;
1346 return 1;
1347 }
1348
1349 static void
os_unreserve_physical(char * ptr,UWord size)1350 os_unreserve_physical(char *ptr, UWord size)
1351 {
1352 void *res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_UNRESERVE_PROT,
1353 ERTS_MMAP_UNRESERVE_FLAGS, ERTS_MMAP_FD, 0);
1354 if (res == (void *) MAP_FAILED)
1355 erts_exit(ERTS_ABORT_EXIT, "Failed to unreserve memory");
1356 }
1357
1358 static void *
os_mmap_virtual(char * ptr,UWord size)1359 os_mmap_virtual(char *ptr, UWord size)
1360 {
1361 int flags = ERTS_MMAP_VIRTUAL_FLAGS;
1362 void* res;
1363
1364 res = mmap((void *) ptr, (size_t) size, ERTS_MMAP_VIRTUAL_PROT,
1365 flags, ERTS_MMAP_FD, 0);
1366 if (res == (void *) MAP_FAILED)
1367 return NULL;
1368 return res;
1369 }
1370
1371 #else
1372 #error "Missing reserve/unreserve physical memory implementation"
1373 #endif
1374 #endif /* ERTS_HAVE_OS_RESERVE_PHYSICAL_MEMORY */
1375
1376 #endif /* ERTS_HAVE_OS_MMAP */
1377
reserve_noop(char * ptr,UWord size)1378 static int reserve_noop(char *ptr, UWord size)
1379 {
1380 #ifdef ERTS_MMAP_DEBUG_FILL_AREAS
1381 Uint32 *uip, *end = (Uint32 *) (ptr + size);
1382
1383 for (uip = (Uint32 *) ptr; uip < end; uip++)
1384 ERTS_MMAP_ASSERT(*uip == (Uint32) 0xdeadbeef);
1385 for (uip = (Uint32 *) ptr; uip < end; uip++)
1386 *uip = (Uint32) 0xfeedfeed;
1387 #endif
1388 return 1;
1389 }
1390
unreserve_noop(char * ptr,UWord size)1391 static void unreserve_noop(char *ptr, UWord size)
1392 {
1393 #ifdef ERTS_MMAP_DEBUG_FILL_AREAS
1394 Uint32 *uip, *end = (Uint32 *) (ptr + size);
1395
1396 for (uip = (Uint32 *) ptr; uip < end; uip++)
1397 *uip = (Uint32) 0xdeadbeef;
1398 #endif
1399 }
1400
1401 static UWord
alloc_desc_insert_free_seg(ErtsMemMapper * mm,ErtsFreeSegMap * map,char * start,char * end)1402 alloc_desc_insert_free_seg(ErtsMemMapper* mm,
1403 ErtsFreeSegMap *map, char* start, char* end)
1404 {
1405 char *ptr;
1406 ErtsFreeSegMap *da_map;
1407 ErtsFreeSegDesc *desc = alloc_desc(mm);
1408 if (desc) {
1409 insert_free_seg(map, desc, start, end);
1410 return 0;
1411 }
1412
1413 /*
1414 * Ahh; ran out of free segment descriptors.
1415 *
1416 * First try to map a new page...
1417 */
1418
1419 #if ERTS_HAVE_OS_MMAP
1420 if (!mm->no_os_mmap) {
1421 ptr = os_mmap(mm->desc.new_area_hint, ERTS_PAGEALIGNED_SIZE, 0);
1422 if (ptr) {
1423 mm->desc.new_area_hint = ptr+ERTS_PAGEALIGNED_SIZE;
1424 ERTS_MMAP_SIZE_OS_INC(ERTS_PAGEALIGNED_SIZE);
1425 add_free_desc_area(mm, ptr, ptr+ERTS_PAGEALIGNED_SIZE);
1426 desc = alloc_desc(mm);
1427 ERTS_MMAP_ASSERT(desc);
1428 insert_free_seg(map, desc, start, end);
1429 return 0;
1430 }
1431 }
1432 #endif
1433
1434 /*
1435 * ...then try to find a good place in the supercarrier...
1436 */
1437 da_map = &mm->sua.map;
1438 desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE);
1439 if (desc) {
1440 if (mm->reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE))
1441 ERTS_MMAP_SIZE_SC_SUA_INC(ERTS_PAGEALIGNED_SIZE);
1442 else
1443 desc = NULL;
1444
1445 }
1446 else {
1447 da_map = &mm->sa.map;
1448 desc = lookup_free_seg(da_map, ERTS_PAGEALIGNED_SIZE);
1449 if (desc) {
1450 if (mm->reserve_physical(desc->start, ERTS_PAGEALIGNED_SIZE))
1451 ERTS_MMAP_SIZE_SC_SA_INC(ERTS_PAGEALIGNED_SIZE);
1452 else
1453 desc = NULL;
1454 }
1455 }
1456 if (desc) {
1457 char *da_end = desc->start + ERTS_PAGEALIGNED_SIZE;
1458 add_free_desc_area(mm, desc->start, da_end);
1459 if (da_end != desc->end)
1460 resize_free_seg(da_map, desc, da_end, desc->end);
1461 else {
1462 delete_free_seg(da_map, desc);
1463 free_desc(mm, desc);
1464 }
1465
1466 desc = alloc_desc(mm);
1467 ERTS_MMAP_ASSERT(desc);
1468 insert_free_seg(map, desc, start, end);
1469 return 0;
1470 }
1471
1472 /*
1473 * ... and then as last resort use the first page of the
1474 * free segment we are trying to insert for free descriptors.
1475 */
1476 ptr = start + ERTS_PAGEALIGNED_SIZE;
1477 ERTS_MMAP_ASSERT(ptr <= end);
1478
1479 add_free_desc_area(mm, start, ptr);
1480
1481 if (ptr != end) {
1482 desc = alloc_desc(mm);
1483 ERTS_MMAP_ASSERT(desc);
1484 insert_free_seg(map, desc, ptr, end);
1485 }
1486
1487 return ERTS_PAGEALIGNED_SIZE;
1488 }
1489
1490 void *
erts_mmap(ErtsMemMapper * mm,Uint32 flags,UWord * sizep)1491 erts_mmap(ErtsMemMapper* mm, Uint32 flags, UWord *sizep)
1492 {
1493 char *seg;
1494 UWord asize = ERTS_PAGEALIGNED_CEILING(*sizep);
1495
1496 /* Map in premapped supercarrier */
1497 if (mm->supercarrier && !(ERTS_MMAPFLG_OS_ONLY & flags)) {
1498 char *end;
1499 ErtsFreeSegDesc *desc;
1500 Uint32 superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
1501
1502 erts_mtx_lock(&mm->mtx);
1503
1504 ERTS_MMAP_OP_START(*sizep);
1505
1506 if (!superaligned) {
1507 desc = lookup_free_seg(&mm->sua.map, asize);
1508 if (desc) {
1509 seg = desc->start;
1510 end = seg+asize;
1511 if (!mm->reserve_physical(seg, asize))
1512 goto supercarrier_reserve_failure;
1513 if (desc->end == end) {
1514 delete_free_seg(&mm->sua.map, desc);
1515 free_desc(mm, desc);
1516 }
1517 else {
1518 ERTS_MMAP_ASSERT(end < desc->end);
1519 resize_free_seg(&mm->sua.map, desc, end, desc->end);
1520 }
1521 ERTS_MMAP_SIZE_SC_SUA_INC(asize);
1522 goto supercarrier_success;
1523 }
1524
1525 if (asize <= mm->sua.bot - mm->sa.top) {
1526 if (!mm->reserve_physical(mm->sua.bot - asize, asize))
1527 goto supercarrier_reserve_failure;
1528 mm->sua.bot -= asize;
1529 seg = mm->sua.bot;
1530 ERTS_MMAP_SIZE_SC_SUA_INC(asize);
1531 goto supercarrier_success;
1532 }
1533 }
1534
1535 asize = ERTS_SUPERALIGNED_CEILING(asize);
1536
1537 desc = lookup_free_seg(&mm->sa.map, asize);
1538 if (desc) {
1539 char *start = seg = desc->start;
1540 seg = (char *) ERTS_SUPERALIGNED_CEILING(seg);
1541 end = seg+asize;
1542 if (!mm->reserve_physical(start, (UWord) (end - start)))
1543 goto supercarrier_reserve_failure;
1544 ERTS_MMAP_SIZE_SC_SA_INC(asize);
1545 if (desc->end == end) {
1546 if (start != seg)
1547 resize_free_seg(&mm->sa.map, desc, start, seg);
1548 else {
1549 delete_free_seg(&mm->sa.map, desc);
1550 free_desc(mm, desc);
1551 }
1552 }
1553 else {
1554 ERTS_MMAP_ASSERT(end < desc->end);
1555 resize_free_seg(&mm->sa.map, desc, end, desc->end);
1556 if (start != seg) {
1557 UWord ad_sz;
1558 ad_sz = alloc_desc_insert_free_seg(mm, &mm->sua.map,
1559 start, seg);
1560 start += ad_sz;
1561 if (start != seg)
1562 mm->unreserve_physical(start, (UWord) (seg - start));
1563 }
1564 }
1565 goto supercarrier_success;
1566 }
1567
1568 if (superaligned) {
1569 char *start = mm->sa.top;
1570 seg = (char *) ERTS_SUPERALIGNED_CEILING(start);
1571
1572 if (asize + (seg - start) <= mm->sua.bot - start) {
1573 end = seg + asize;
1574 if (!mm->reserve_physical(start, (UWord) (end - start)))
1575 goto supercarrier_reserve_failure;
1576 mm->sa.top = end;
1577 ERTS_MMAP_SIZE_SC_SA_INC(asize);
1578 if (start != seg) {
1579 UWord ad_sz;
1580 ad_sz = alloc_desc_insert_free_seg(mm, &mm->sua.map,
1581 start, seg);
1582 start += ad_sz;
1583 if (start != seg)
1584 mm->unreserve_physical(start, (UWord) (seg - start));
1585 }
1586 goto supercarrier_success;
1587 }
1588
1589 desc = lookup_free_seg(&mm->sua.map, asize + ERTS_SUPERALIGNED_SIZE);
1590 if (desc) {
1591 char *org_start = desc->start;
1592 char *org_end = desc->end;
1593
1594 seg = (char *) ERTS_SUPERALIGNED_CEILING(org_start);
1595 end = seg + asize;
1596 if (!mm->reserve_physical(seg, (UWord) (org_end - seg)))
1597 goto supercarrier_reserve_failure;
1598 ERTS_MMAP_SIZE_SC_SUA_INC(asize);
1599 if (org_start != seg) {
1600 ERTS_MMAP_ASSERT(org_start < seg);
1601 resize_free_seg(&mm->sua.map, desc, org_start, seg);
1602 desc = NULL;
1603 }
1604 if (end != org_end) {
1605 UWord ad_sz = 0;
1606 ERTS_MMAP_ASSERT(end < org_end);
1607 if (desc)
1608 resize_free_seg(&mm->sua.map, desc, end, org_end);
1609 else
1610 ad_sz = alloc_desc_insert_free_seg(mm, &mm->sua.map,
1611 end, org_end);
1612 end += ad_sz;
1613 if (end != org_end)
1614 mm->unreserve_physical(end,
1615 (UWord) (org_end - end));
1616 }
1617 goto supercarrier_success;
1618 }
1619 }
1620
1621 ERTS_MMAP_OP_ABORT();
1622 erts_mtx_unlock(&mm->mtx);
1623 }
1624
1625 #if ERTS_HAVE_OS_MMAP
1626 /* Map using OS primitives */
1627 if (!(ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) && !mm->no_os_mmap) {
1628 if (!(ERTS_MMAPFLG_SUPERALIGNED & flags)) {
1629 seg = os_mmap(NULL, asize, 0);
1630 if (!seg)
1631 goto failure;
1632 }
1633 else {
1634 asize = ERTS_SUPERALIGNED_CEILING(*sizep);
1635 seg = os_mmap(NULL, asize, 1);
1636 if (!seg)
1637 goto failure;
1638
1639 if (!ERTS_IS_SUPERALIGNED(seg)) {
1640 char *ptr;
1641 UWord sz;
1642
1643 os_munmap(seg, asize);
1644
1645 ptr = os_mmap(NULL, asize + ERTS_SUPERALIGNED_SIZE, 1);
1646 if (!ptr)
1647 goto failure;
1648
1649 seg = (char *) ERTS_SUPERALIGNED_CEILING(ptr);
1650 sz = (UWord) (seg - ptr);
1651 ERTS_MMAP_ASSERT(sz <= ERTS_SUPERALIGNED_SIZE);
1652 if (sz)
1653 os_munmap(ptr, sz);
1654 sz = ERTS_SUPERALIGNED_SIZE - sz;
1655 if (sz)
1656 os_munmap(seg+asize, sz);
1657 }
1658 }
1659
1660 ERTS_MMAP_OP_LCK(seg, *sizep, asize);
1661 ERTS_MMAP_SIZE_OS_INC(asize);
1662 *sizep = asize;
1663 return (void *) seg;
1664 }
1665 failure:
1666 #endif
1667 ERTS_MMAP_OP_LCK(NULL, *sizep, 0);
1668 *sizep = 0;
1669 return NULL;
1670
1671 supercarrier_success:
1672
1673 #ifdef ERTS_MMAP_DEBUG
1674 if (ERTS_MMAPFLG_SUPERALIGNED & flags) {
1675 ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(seg));
1676 ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(asize));
1677 }
1678 else {
1679 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(seg));
1680 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
1681 }
1682 #endif
1683
1684 ERTS_MMAP_OP_END(seg, asize);
1685 erts_mtx_unlock(&mm->mtx);
1686
1687 *sizep = asize;
1688 return (void *) seg;
1689
1690 supercarrier_reserve_failure:
1691 erts_mtx_unlock(&mm->mtx);
1692 *sizep = 0;
1693 return NULL;
1694 }
1695
1696 void
erts_munmap(ErtsMemMapper * mm,Uint32 flags,void * ptr,UWord size)1697 erts_munmap(ErtsMemMapper* mm, Uint32 flags, void *ptr, UWord size)
1698 {
1699 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
1700 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(size));
1701
1702 if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) {
1703 ERTS_MMAP_ASSERT(!mm->no_os_mmap);
1704 #if ERTS_HAVE_OS_MMAP
1705 ERTS_MUNMAP_OP_LCK(ptr, size);
1706 ERTS_MMAP_SIZE_OS_DEC(size);
1707 os_munmap(ptr, size);
1708 #endif
1709 }
1710 else {
1711 char *start, *end;
1712 ErtsFreeSegMap *map;
1713 ErtsFreeSegDesc *prev, *next, *desc;
1714 UWord ad_sz = 0;
1715
1716 ERTS_MMAP_ASSERT(mm->supercarrier);
1717
1718 start = (char *) ptr;
1719 end = start + size;
1720
1721 erts_mtx_lock(&mm->mtx);
1722
1723 ERTS_MUNMAP_OP(ptr, size);
1724
1725 if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
1726
1727 map = &mm->sa.map;
1728 adjacent_free_seg(map, start, end, &prev, &next);
1729
1730 ERTS_MMAP_SIZE_SC_SA_DEC(size);
1731 if (end == mm->sa.top) {
1732 ERTS_MMAP_ASSERT(!next);
1733 if (prev) {
1734 start = prev->start;
1735 delete_free_seg(map, prev);
1736 free_desc(mm, prev);
1737 }
1738 mm->sa.top = start;
1739 goto supercarrier_success;
1740 }
1741 }
1742 else {
1743 map = &mm->sua.map;
1744 adjacent_free_seg(map, start, end, &prev, &next);
1745
1746 ERTS_MMAP_SIZE_SC_SUA_DEC(size);
1747 if (start == mm->sua.bot) {
1748 ERTS_MMAP_ASSERT(!prev);
1749 if (next) {
1750 end = next->end;
1751 delete_free_seg(map, next);
1752 free_desc(mm, next);
1753 }
1754 mm->sua.bot = end;
1755 goto supercarrier_success;
1756 }
1757 }
1758
1759 desc = NULL;
1760
1761 if (next) {
1762 ERTS_MMAP_ASSERT(end < next->end);
1763 end = next->end;
1764 if (prev) {
1765 delete_free_seg(map, next);
1766 free_desc(mm, next);
1767 goto save_prev;
1768 }
1769 desc = next;
1770 } else if (prev) {
1771 save_prev:
1772 ERTS_MMAP_ASSERT(prev->start < start);
1773 start = prev->start;
1774 desc = prev;
1775 }
1776
1777 if (desc)
1778 resize_free_seg(map, desc, start, end);
1779 else
1780 ad_sz = alloc_desc_insert_free_seg(mm, map, start, end);
1781
1782 supercarrier_success: {
1783 UWord unres_sz;
1784
1785 ERTS_MMAP_ASSERT(size >= ad_sz);
1786 unres_sz = size - ad_sz;
1787 if (unres_sz)
1788 mm->unreserve_physical(((char *) ptr) + ad_sz, unres_sz);
1789
1790 erts_mtx_unlock(&mm->mtx);
1791 }
1792 }
1793 }
1794
1795 static void *
remap_move(ErtsMemMapper * mm,Uint32 flags,void * ptr,UWord old_size,UWord * sizep)1796 remap_move(ErtsMemMapper* mm,
1797 Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
1798 {
1799 UWord size = *sizep;
1800 void *new_ptr = erts_mmap(mm, flags, &size);
1801 if (!new_ptr)
1802 return NULL;
1803 *sizep = size;
1804 if (old_size < size)
1805 size = old_size;
1806 sys_memcpy(new_ptr, ptr, (size_t) size);
1807 erts_munmap(mm, flags, ptr, old_size);
1808 return new_ptr;
1809 }
1810
1811 void *
erts_mremap(ErtsMemMapper * mm,Uint32 flags,void * ptr,UWord old_size,UWord * sizep)1812 erts_mremap(ErtsMemMapper* mm,
1813 Uint32 flags, void *ptr, UWord old_size, UWord *sizep)
1814 {
1815 void *new_ptr;
1816 Uint32 superaligned;
1817 UWord asize;
1818
1819 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
1820 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size));
1821 ERTS_MMAP_ASSERT(sizep && ERTS_IS_PAGEALIGNED(*sizep));
1822
1823 if (!ERTS_MMAP_IN_SUPERCARRIER(ptr)) {
1824
1825 ERTS_MMAP_ASSERT(!mm->no_os_mmap);
1826
1827 if (!(ERTS_MMAPFLG_OS_ONLY & flags) && mm->supercarrier) {
1828 new_ptr = remap_move(mm, ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags,
1829 ptr, old_size, sizep);
1830 if (new_ptr)
1831 return new_ptr;
1832 }
1833
1834 if (ERTS_MMAPFLG_SUPERCARRIER_ONLY & flags) {
1835 ERTS_MREMAP_OP_LCK(NULL, ptr, old_size, *sizep, old_size);
1836 return NULL;
1837 }
1838
1839 #if defined(ERTS_HAVE_OS_MREMAP) || defined(ERTS_HAVE_GENUINE_OS_MMAP)
1840 superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
1841
1842 if (superaligned) {
1843 asize = ERTS_SUPERALIGNED_CEILING(*sizep);
1844 if (asize == old_size && ERTS_IS_SUPERALIGNED(ptr)) {
1845 ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize);
1846 *sizep = asize;
1847 return ptr;
1848 }
1849 }
1850 else {
1851 asize = ERTS_PAGEALIGNED_CEILING(*sizep);
1852 if (asize == old_size) {
1853 ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize);
1854 *sizep = asize;
1855 return ptr;
1856 }
1857 }
1858
1859 #ifdef ERTS_HAVE_GENUINE_OS_MMAP
1860 if (asize < old_size
1861 && (!superaligned
1862 || ERTS_IS_SUPERALIGNED(ptr))) {
1863 UWord um_sz;
1864 new_ptr = ((char *) ptr) + asize;
1865 ERTS_MMAP_ASSERT((((char *)ptr) + old_size) > (char *) new_ptr);
1866 um_sz = (UWord) ((((char *) ptr) + old_size) - (char *) new_ptr);
1867 ERTS_MMAP_SIZE_OS_DEC(um_sz);
1868 os_munmap(new_ptr, um_sz);
1869 ERTS_MREMAP_OP_LCK(ptr, ptr, old_size, *sizep, asize);
1870 *sizep = asize;
1871 return ptr;
1872 }
1873 #endif
1874 #ifdef ERTS_HAVE_OS_MREMAP
1875 if (superaligned)
1876 return remap_move(mm, flags, new_ptr, old_size, sizep);
1877 else {
1878 new_ptr = os_mremap(ptr, old_size, asize, 0);
1879 if (!new_ptr)
1880 return NULL;
1881 if (asize > old_size)
1882 ERTS_MMAP_SIZE_OS_INC(asize - old_size);
1883 else
1884 ERTS_MMAP_SIZE_OS_DEC(old_size - asize);
1885 ERTS_MREMAP_OP_LCK(new_ptr, ptr, old_size, *sizep, asize);
1886 *sizep = asize;
1887 return new_ptr;
1888 }
1889 #endif
1890 #endif
1891 }
1892 else { /* In super carrier */
1893 char *start, *end, *new_end;
1894 ErtsFreeSegMap *map;
1895 ErtsFreeSegDesc *prev, *next;
1896 UWord ad_sz = 0;
1897
1898 ERTS_MMAP_ASSERT(mm->supercarrier);
1899
1900 if (ERTS_MMAPFLG_OS_ONLY & flags)
1901 return remap_move(mm, flags, ptr, old_size, sizep);
1902
1903 superaligned = (ERTS_MMAPFLG_SUPERALIGNED & flags);
1904
1905 asize = (superaligned
1906 ? ERTS_SUPERALIGNED_CEILING(*sizep)
1907 : ERTS_PAGEALIGNED_CEILING(*sizep));
1908
1909 erts_mtx_lock(&mm->mtx);
1910
1911 if (ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)
1912 ? (!superaligned && lookup_free_seg(&mm->sua.map, asize))
1913 : (superaligned && lookup_free_seg(&mm->sa.map, asize))) {
1914 erts_mtx_unlock(&mm->mtx);
1915 /*
1916 * Segment currently in wrong area (due to a previous memory
1917 * shortage), move it to the right area.
1918 * (remap_move() will succeed)
1919 */
1920 return remap_move(mm, ERTS_MMAPFLG_SUPERCARRIER_ONLY|flags,
1921 ptr, old_size, sizep);
1922 }
1923
1924 ERTS_MREMAP_OP_START(ptr, old_size, *sizep);
1925
1926 if (asize == old_size) {
1927 new_ptr = ptr;
1928 goto supercarrier_resize_success;
1929 }
1930
1931 start = (char *) ptr;
1932 end = start + old_size;
1933 new_end = start+asize;
1934
1935 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
1936 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size));
1937 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
1938
1939 if (asize < old_size) {
1940 UWord unres_sz;
1941 new_ptr = ptr;
1942 if (!ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
1943 map = &mm->sua.map;
1944 ERTS_MMAP_SIZE_SC_SUA_DEC(old_size - asize);
1945 }
1946 else {
1947 if (end == mm->sa.top) {
1948 mm->sa.top = new_end;
1949 mm->unreserve_physical(((char *) ptr) + asize,
1950 old_size - asize);
1951 goto supercarrier_resize_success;
1952 }
1953 ERTS_MMAP_SIZE_SC_SA_DEC(old_size - asize);
1954 map = &mm->sa.map;
1955 }
1956
1957 adjacent_free_seg(map, start, end, &prev, &next);
1958
1959 if (next)
1960 resize_free_seg(map, next, new_end, next->end);
1961 else
1962 ad_sz = alloc_desc_insert_free_seg(mm, map, new_end, end);
1963 ERTS_MMAP_ASSERT(old_size - asize >= ad_sz);
1964 unres_sz = old_size - asize - ad_sz;
1965 if (unres_sz)
1966 mm->unreserve_physical(((char *) ptr) + asize + ad_sz,
1967 unres_sz);
1968 goto supercarrier_resize_success;
1969 }
1970
1971 if (!ERTS_MMAP_IN_SUPERALIGNED_AREA(ptr)) {
1972 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(ptr));
1973 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(old_size));
1974 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
1975
1976 adjacent_free_seg(&mm->sua.map, start, end, &prev, &next);
1977
1978 if (next && new_end <= next->end) {
1979 if (!mm->reserve_physical(((char *) ptr) + old_size,
1980 asize - old_size))
1981 goto supercarrier_reserve_failure;
1982 if (new_end < next->end)
1983 resize_free_seg(&mm->sua.map, next, new_end, next->end);
1984 else {
1985 delete_free_seg(&mm->sua.map, next);
1986 free_desc(mm, next);
1987 }
1988 new_ptr = ptr;
1989 ERTS_MMAP_SIZE_SC_SUA_INC(asize - old_size);
1990 goto supercarrier_resize_success;
1991 }
1992 }
1993 else { /* Superaligned area */
1994
1995 if (end == mm->sa.top) {
1996 if (new_end <= mm->sua.bot) {
1997 if (!mm->reserve_physical(((char *) ptr) + old_size,
1998 asize - old_size))
1999 goto supercarrier_reserve_failure;
2000 mm->sa.top = new_end;
2001 new_ptr = ptr;
2002 ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size);
2003 goto supercarrier_resize_success;
2004 }
2005 }
2006 else {
2007 adjacent_free_seg(&mm->sa.map, start, end, &prev, &next);
2008 if (next && new_end <= next->end) {
2009 if (!mm->reserve_physical(((char *) ptr) + old_size,
2010 asize - old_size))
2011 goto supercarrier_reserve_failure;
2012 if (new_end < next->end)
2013 resize_free_seg(&mm->sa.map, next, new_end, next->end);
2014 else {
2015 delete_free_seg(&mm->sa.map, next);
2016 free_desc(mm, next);
2017 }
2018 new_ptr = ptr;
2019 ERTS_MMAP_SIZE_SC_SA_INC(asize - old_size);
2020 goto supercarrier_resize_success;
2021 }
2022 }
2023 }
2024
2025 ERTS_MMAP_OP_ABORT();
2026 erts_mtx_unlock(&mm->mtx);
2027
2028 /* Failed to resize... */
2029 }
2030
2031 return remap_move(mm, flags, ptr, old_size, sizep);
2032
2033 supercarrier_resize_success:
2034
2035 #ifdef ERTS_MMAP_DEBUG
2036 if ((ERTS_MMAPFLG_SUPERALIGNED & flags)
2037 || ERTS_MMAP_IN_SUPERALIGNED_AREA(new_ptr)) {
2038 ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(new_ptr));
2039 ERTS_MMAP_ASSERT(ERTS_IS_SUPERALIGNED(asize));
2040 }
2041 else {
2042 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(new_ptr));
2043 ERTS_MMAP_ASSERT(ERTS_IS_PAGEALIGNED(asize));
2044 }
2045 #endif
2046
2047 ERTS_MREMAP_OP_END(new_ptr, asize);
2048 erts_mtx_unlock(&mm->mtx);
2049
2050 *sizep = asize;
2051 return new_ptr;
2052
2053 supercarrier_reserve_failure:
2054 ERTS_MREMAP_OP_END(NULL, old_size);
2055 erts_mtx_unlock(&mm->mtx);
2056 *sizep = old_size;
2057 return NULL;
2058
2059 }
2060
erts_mmap_in_supercarrier(ErtsMemMapper * mm,void * ptr)2061 int erts_mmap_in_supercarrier(ErtsMemMapper* mm, void *ptr)
2062 {
2063 return ERTS_MMAP_IN_SUPERCARRIER(ptr);
2064 }
2065
2066 static struct {
2067 Eterm options;
2068 Eterm total;
2069 Eterm total_sa;
2070 Eterm total_sua;
2071 Eterm used;
2072 Eterm used_sa;
2073 Eterm used_sua;
2074 Eterm max;
2075 Eterm allocated;
2076 Eterm reserved;
2077 Eterm sizes;
2078 Eterm free_segs;
2079 Eterm supercarrier;
2080 Eterm os;
2081 Eterm scs;
2082 Eterm sco;
2083 Eterm scrpm;
2084 Eterm scrfsd;
2085
2086 int is_initialized;
2087 erts_mtx_t init_mutex;
2088 }am;
2089
atom_init(Eterm * atom,char * name)2090 static void ERTS_INLINE atom_init(Eterm *atom, char *name)
2091 {
2092 *atom = am_atom_put(name, strlen(name));
2093 }
2094 #define AM_INIT(AM) atom_init(&am.AM, #AM)
2095
init_atoms(void)2096 static void init_atoms(void)
2097 {
2098 erts_mtx_lock(&am.init_mutex);
2099
2100 if (!am.is_initialized) {
2101 AM_INIT(options);
2102 AM_INIT(total);
2103 AM_INIT(total_sa);
2104 AM_INIT(total_sua);
2105 AM_INIT(used);
2106 AM_INIT(used_sa);
2107 AM_INIT(used_sua);
2108 AM_INIT(max);
2109 AM_INIT(allocated);
2110 AM_INIT(reserved);
2111 AM_INIT(sizes);
2112 AM_INIT(free_segs);
2113 AM_INIT(supercarrier);
2114 AM_INIT(os);
2115 AM_INIT(scs);
2116 AM_INIT(sco);
2117 AM_INIT(scrpm);
2118 AM_INIT(scrfsd);
2119 am.is_initialized = 1;
2120 }
2121 erts_mtx_unlock(&am.init_mutex);
2122 };
2123
2124
2125 #ifdef HARD_DEBUG_MSEG
2126 static void hard_dbg_mseg_init(void);
2127 #endif
2128
2129 void
erts_mmap_init(ErtsMemMapper * mm,ErtsMMapInit * init)2130 erts_mmap_init(ErtsMemMapper* mm, ErtsMMapInit *init)
2131 {
2132 static int is_first_call = 1;
2133 int virtual_map = 0;
2134 char *start = NULL, *end = NULL;
2135 UWord pagesize;
2136 #if defined(__WIN32__)
2137 SYSTEM_INFO sysinfo;
2138 GetSystemInfo(&sysinfo);
2139 pagesize = (UWord) sysinfo.dwPageSize;
2140 #elif defined(_SC_PAGESIZE)
2141 pagesize = (UWord) sysconf(_SC_PAGESIZE);
2142 #elif defined(HAVE_GETPAGESIZE)
2143 pagesize = (UWord) getpagesize();
2144 #else
2145 # error "Do not know how to get page size"
2146 #endif
2147 #if defined(HARD_DEBUG) || 0
2148 erts_fprintf(stderr, "erts_mmap: scs = %bpu\n", init->scs);
2149 erts_fprintf(stderr, "erts_mmap: sco = %i\n", init->sco);
2150 erts_fprintf(stderr, "erts_mmap: scrfsd = %i\n", init->scrfsd);
2151 #endif
2152 erts_page_inv_mask = pagesize - 1;
2153 if (pagesize & erts_page_inv_mask)
2154 erts_exit(1, "erts_mmap: Invalid pagesize: %bpu\n",
2155 pagesize);
2156
2157 ERTS_MMAP_OP_RINGBUF_INIT();
2158
2159 mm->supercarrier = 0;
2160 mm->reserve_physical = reserve_noop;
2161 mm->unreserve_physical = unreserve_noop;
2162
2163 #if HAVE_MMAP && !defined(MAP_ANON)
2164 mm->mmap_fd = open("/dev/zero", O_RDWR);
2165 if (mm->mmap_fd < 0)
2166 erts_exit(1, "erts_mmap: Failed to open /dev/zero\n");
2167 #endif
2168
2169 erts_mtx_init(&mm->mtx, "erts_mmap", NIL,
2170 ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
2171 if (is_first_call) {
2172 erts_mtx_init(&am.init_mutex, "mmap_init_atoms", NIL,
2173 ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_GENERIC);
2174 }
2175
2176 #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
2177 if (init->virtual_range.start) {
2178 char *ptr;
2179 UWord sz;
2180 ptr = (char *) ERTS_PAGEALIGNED_CEILING(init->virtual_range.start);
2181 end = (char *) ERTS_PAGEALIGNED_FLOOR(init->virtual_range.end);
2182 sz = end - ptr;
2183 start = os_mmap_virtual(ptr, sz);
2184 if (!start || start > ptr || start >= end)
2185 erts_exit(1,
2186 "erts_mmap: Failed to create virtual range for super carrier\n");
2187 sz = start - ptr;
2188 if (sz)
2189 os_munmap(end, sz);
2190 mm->reserve_physical = os_reserve_physical;
2191 mm->unreserve_physical = os_unreserve_physical;
2192 virtual_map = 1;
2193 }
2194 else
2195 #endif
2196 if (init->predefined_area.start) {
2197 start = init->predefined_area.start;
2198 end = init->predefined_area.end;
2199 if (end != (void *) 0 && end < start)
2200 end = start;
2201 }
2202 #if ERTS_HAVE_OS_MMAP
2203 else if (init->scs) {
2204 UWord sz;
2205 sz = ERTS_PAGEALIGNED_CEILING(init->scs);
2206 #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
2207 if (!init->scrpm) {
2208 start = os_mmap_virtual(NULL, sz);
2209 mm->reserve_physical = os_reserve_physical;
2210 mm->unreserve_physical = os_unreserve_physical;
2211 virtual_map = 1;
2212 }
2213 else
2214 #endif
2215 {
2216 /*
2217 * The whole supercarrier will by physically
2218 * reserved all the time.
2219 */
2220 start = os_mmap(NULL, sz, 1);
2221 }
2222 if (!start)
2223 erts_exit(1,
2224 "erts_mmap: Failed to create super carrier of size %bpu MB\n",
2225 init->scs/1024/1024);
2226 end = start + sz;
2227 #ifdef ERTS_MMAP_DEBUG_FILL_AREAS
2228 if (!virtual_map) {
2229 Uint32 *uip;
2230
2231 for (uip = (Uint32 *) start; uip < (Uint32 *) end; uip++)
2232 *uip = (Uint32) 0xdeadbeef;
2233 }
2234 #endif
2235 }
2236 #endif
2237
2238 mm->no.free_seg_descs = 0;
2239 mm->no.free_segs.curr = 0;
2240 mm->no.free_segs.max = 0;
2241
2242 mm->size.supercarrier.total = 0;
2243 mm->size.supercarrier.used.total = 0;
2244 mm->size.supercarrier.used.sa = 0;
2245 mm->size.supercarrier.used.sua = 0;
2246 mm->size.os.used = 0;
2247
2248 mm->desc.new_area_hint = NULL;
2249
2250 if (!start) {
2251 mm->sa.bot = NULL;
2252 mm->sua.top = NULL;
2253 mm->sa.bot = NULL;
2254 mm->sua.top = NULL;
2255 mm->no_os_mmap = 0;
2256 mm->supercarrier = 0;
2257 }
2258 else {
2259 size_t desc_size;
2260
2261 mm->no_os_mmap = init->sco;
2262
2263 desc_size = init->scrfsd;
2264 if (desc_size < 100)
2265 desc_size = 100;
2266 desc_size *= sizeof(ErtsFreeSegDesc);
2267 if ((desc_size
2268 + ERTS_SUPERALIGNED_SIZE
2269 + ERTS_PAGEALIGNED_SIZE) > end - start)
2270 erts_exit(1, "erts_mmap: No space for segments in super carrier\n");
2271
2272 mm->sa.bot = start;
2273 mm->sa.bot += desc_size;
2274 mm->sa.bot = (char *) ERTS_SUPERALIGNED_CEILING(mm->sa.bot);
2275 mm->sa.top = mm->sa.bot;
2276 mm->sua.top = end;
2277 mm->sua.bot = mm->sua.top;
2278
2279 mm->size.supercarrier.used.total += (UWord) (mm->sa.bot - start);
2280
2281 mm->desc.free_list = NULL;
2282 mm->desc.reserved = 0;
2283
2284 if (end == (void *) 0) {
2285 /*
2286 * Very unlikely, but we need a guarantee
2287 * that `mm->sua.top` always will
2288 * compare as larger than all segment pointers
2289 * into the super carrier...
2290 */
2291 mm->sua.top -= ERTS_PAGEALIGNED_SIZE;
2292 mm->size.supercarrier.used.total += ERTS_PAGEALIGNED_SIZE;
2293 #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
2294 if (!virtual_map || os_reserve_physical(mm->sua.top, ERTS_PAGEALIGNED_SIZE))
2295 #endif
2296 add_free_desc_area(mm, mm->sua.top, end);
2297 mm->desc.reserved += (end - mm->sua.top) / sizeof(ErtsFreeSegDesc);
2298 }
2299
2300 mm->size.supercarrier.total = (UWord) (mm->sua.top - start);
2301
2302 /*
2303 * Area before (and after) super carrier
2304 * will be used for free segment descritors.
2305 */
2306 #ifdef ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION
2307 if (virtual_map && !os_reserve_physical(start, mm->sa.bot - start))
2308 erts_exit(1, "erts_mmap: Failed to reserve physical memory for descriptors\n");
2309 #endif
2310 mm->desc.unused_start = start;
2311 mm->desc.unused_end = mm->sa.bot;
2312 mm->desc.reserved += ((mm->desc.unused_end - start)
2313 / sizeof(ErtsFreeSegDesc));
2314
2315 init_free_seg_map(&mm->sa.map, SA_SZ_ADDR_ORDER);
2316 init_free_seg_map(&mm->sua.map, SZ_REVERSE_ADDR_ORDER);
2317
2318 mm->supercarrier = 1;
2319
2320 mm->desc.new_area_hint = end;
2321
2322 }
2323
2324 #if !ERTS_HAVE_OS_MMAP
2325 mm->no_os_mmap = 1;
2326 #endif
2327
2328 #ifdef HARD_DEBUG_MSEG
2329 hard_dbg_mseg_init();
2330 #endif
2331
2332 #if defined(ARCH_64) && defined(ERTS_HAVE_OS_PHYSICAL_MEMORY_RESERVATION)
2333 if (mm == &erts_literal_mmapper) {
2334 erts_literals_start = erts_literal_mmapper.sa.bot;
2335 erts_literals_size = erts_literal_mmapper.sua.top - erts_literals_start;
2336 }
2337 #endif
2338 is_first_call = 0;
2339 }
2340
2341
2342 static ERTS_INLINE void
add_2tup(Uint ** hpp,Uint * szp,Eterm * lp,Eterm el1,Eterm el2)2343 add_2tup(Uint **hpp, Uint *szp, Eterm *lp, Eterm el1, Eterm el2)
2344 {
2345 *lp = erts_bld_cons(hpp, szp, erts_bld_tuple(hpp, szp, 2, el1, el2), *lp);
2346 }
2347
erts_mmap_info(ErtsMemMapper * mm,fmtfn_t * print_to_p,void * print_to_arg,Eterm ** hpp,Uint * szp,struct erts_mmap_info_struct * emis)2348 Eterm erts_mmap_info(ErtsMemMapper* mm,
2349 fmtfn_t *print_to_p,
2350 void *print_to_arg,
2351 Eterm** hpp, Uint* szp,
2352 struct erts_mmap_info_struct* emis)
2353 {
2354 Eterm size_tags[] = { am.total, am.total_sa, am.total_sua, am.used, am.used_sa, am.used_sua };
2355 Eterm seg_tags[] = { am.used, am.max, am.allocated, am.reserved, am.used_sa, am.used_sua };
2356 Eterm group[2];
2357 Eterm group_tags[] = { am.sizes, am.free_segs };
2358 Eterm list[3];
2359 Eterm list_tags[3]; /* { am.options, am.supercarrier, am.os } */
2360 int lix = 0;
2361 Eterm res = THE_NON_VALUE;
2362
2363 if (!hpp) {
2364 erts_mtx_lock(&mm->mtx);
2365 emis->sizes[0] = mm->size.supercarrier.total;
2366 emis->sizes[1] = mm->sa.top - mm->sa.bot;
2367 emis->sizes[2] = mm->sua.top - mm->sua.bot;
2368 emis->sizes[3] = mm->size.supercarrier.used.total;
2369 emis->sizes[4] = mm->size.supercarrier.used.sa;
2370 emis->sizes[5] = mm->size.supercarrier.used.sua;
2371
2372 emis->segs[0] = mm->no.free_segs.curr;
2373 emis->segs[1] = mm->no.free_segs.max;
2374 emis->segs[2] = mm->no.free_seg_descs;
2375 emis->segs[3] = mm->desc.reserved;
2376 emis->segs[4] = mm->sa.map.nseg;
2377 emis->segs[5] = mm->sua.map.nseg;
2378
2379 emis->os_used = mm->size.os.used;
2380 erts_mtx_unlock(&mm->mtx);
2381 }
2382
2383 list[lix] = erts_mmap_info_options(mm, "option ", print_to_p, print_to_arg,
2384 hpp, szp);
2385 list_tags[lix] = am.options;
2386 lix++;
2387
2388
2389 if (print_to_p) {
2390 fmtfn_t to = *print_to_p;
2391 void *arg = print_to_arg;
2392 if (mm->supercarrier) {
2393 const char* prefix = "supercarrier ";
2394 erts_print(to, arg, "%stotal size: %bpu\n", prefix, emis->sizes[0]);
2395 erts_print(to, arg, "%stotal sa size: %bpu\n", prefix, emis->sizes[1]);
2396 erts_print(to, arg, "%stotal sua size: %bpu\n", prefix, emis->sizes[2]);
2397 erts_print(to, arg, "%sused size: %bpu\n", prefix, emis->sizes[3]);
2398 erts_print(to, arg, "%sused sa size: %bpu\n", prefix, emis->sizes[4]);
2399 erts_print(to, arg, "%sused sua size: %bpu\n", prefix, emis->sizes[5]);
2400 erts_print(to, arg, "%sused free segs: %bpu\n", prefix, emis->segs[0]);
2401 erts_print(to, arg, "%smax free segs: %bpu\n", prefix, emis->segs[1]);
2402 erts_print(to, arg, "%sallocated free segs: %bpu\n", prefix, emis->segs[2]);
2403 erts_print(to, arg, "%sreserved free segs: %bpu\n", prefix, emis->segs[3]);
2404 erts_print(to, arg, "%ssa free segs: %bpu\n", prefix, emis->segs[4]);
2405 erts_print(to, arg, "%ssua free segs: %bpu\n", prefix, emis->segs[5]);
2406 }
2407 if (!mm->no_os_mmap) {
2408 erts_print(to, arg, "os mmap size used: %bpu\n", emis->os_used);
2409 }
2410 }
2411
2412
2413 if (hpp || szp) {
2414 if (!am.is_initialized) {
2415 init_atoms();
2416 }
2417
2418 if (mm->supercarrier) {
2419 group[0] = erts_bld_atom_uword_2tup_list(hpp, szp,
2420 sizeof(size_tags)/sizeof(Eterm),
2421 size_tags, emis->sizes);
2422 group[1] = erts_bld_atom_uword_2tup_list(hpp, szp,
2423 sizeof(seg_tags)/sizeof(Eterm),
2424 seg_tags, emis->segs);
2425 list[lix] = erts_bld_2tup_list(hpp, szp, 2, group_tags, group);
2426 list_tags[lix] = am.supercarrier;
2427 lix++;
2428 }
2429
2430 if (!mm->no_os_mmap) {
2431 group[0] = erts_bld_atom_uword_2tup_list(hpp, szp,
2432 1, &am.used, &emis->os_used);
2433 list[lix] = erts_bld_2tup_list(hpp, szp, 1, group_tags, group);
2434 list_tags[lix] = am.os;
2435 lix++;
2436 }
2437 res = erts_bld_2tup_list(hpp, szp, lix, list_tags, list);
2438 }
2439 return res;
2440 }
2441
erts_mmap_info_options(ErtsMemMapper * mm,char * prefix,fmtfn_t * print_to_p,void * print_to_arg,Uint ** hpp,Uint * szp)2442 Eterm erts_mmap_info_options(ErtsMemMapper* mm,
2443 char *prefix,
2444 fmtfn_t *print_to_p,
2445 void *print_to_arg,
2446 Uint **hpp,
2447 Uint *szp)
2448 {
2449 const UWord scs = mm->sua.top - mm->sa.bot;
2450 const Eterm sco = mm->no_os_mmap ? am_true : am_false;
2451 const Eterm scrpm = (mm->reserve_physical == reserve_noop) ? am_true : am_false;
2452 Eterm res = THE_NON_VALUE;
2453
2454 if (print_to_p) {
2455 fmtfn_t to = *print_to_p;
2456 void *arg = print_to_arg;
2457 erts_print(to, arg, "%sscs: %bpu\n", prefix, scs);
2458 if (mm->supercarrier) {
2459 erts_print(to, arg, "%ssco: %T\n", prefix, sco);
2460 erts_print(to, arg, "%sscrpm: %T\n", prefix, scrpm);
2461 erts_print(to, arg, "%sscrfsd: %beu\n", prefix, mm->desc.reserved);
2462 }
2463 }
2464
2465 if (hpp || szp) {
2466 if (!am.is_initialized) {
2467 init_atoms();
2468 }
2469
2470 res = NIL;
2471 if (mm->supercarrier) {
2472 add_2tup(hpp, szp, &res, am.scrfsd,
2473 erts_bld_uint(hpp,szp, mm->desc.reserved));
2474 add_2tup(hpp, szp, &res, am.scrpm, scrpm);
2475 add_2tup(hpp, szp, &res, am.sco, sco);
2476 }
2477 add_2tup(hpp, szp, &res, am.scs, erts_bld_uword(hpp, szp, scs));
2478 }
2479 return res;
2480 }
2481
2482 #endif /* HAVE_ERTS_MMAP */
2483
erts_mmap_debug_info(Process * p)2484 Eterm erts_mmap_debug_info(Process* p)
2485 {
2486 #if HAVE_ERTS_MMAP
2487 ErtsMemMapper* mm = &erts_dflt_mmapper;
2488
2489 if (mm->supercarrier) {
2490 ERTS_DECL_AM(sabot);
2491 ERTS_DECL_AM(satop);
2492 ERTS_DECL_AM(suabot);
2493 ERTS_DECL_AM(suatop);
2494 Eterm sa_list, sua_list, list;
2495 Eterm tags[] = { AM_sabot, AM_satop, AM_suabot, AM_suatop };
2496 UWord values[4];
2497 Eterm *hp, *hp_end;
2498 Uint may_need;
2499
2500 erts_mtx_lock(&mm->mtx);
2501 values[0] = (UWord)mm->sa.bot;
2502 values[1] = (UWord)mm->sa.top;
2503 values[2] = (UWord)mm->sua.bot;
2504 values[3] = (UWord)mm->sua.top;
2505 sa_list = build_free_seg_list(p, &mm->sa.map);
2506 sua_list = build_free_seg_list(p, &mm->sua.map);
2507 erts_mtx_unlock(&mm->mtx);
2508
2509 may_need = 4*(2+3+2) + 2*(2+3);
2510 hp = HAlloc(p, may_need);
2511 hp_end = hp + may_need;
2512
2513 list = erts_bld_atom_uword_2tup_list(&hp, NULL,
2514 sizeof(values)/sizeof(*values),
2515 tags, values);
2516
2517 sa_list = TUPLE2(hp, ERTS_MAKE_AM("sa_free_segs"), sa_list); hp+=3;
2518 sua_list = TUPLE2(hp, ERTS_MAKE_AM("sua_free_segs"), sua_list); hp+=3;
2519 list = CONS(hp, sua_list, list); hp+=2;
2520 list = CONS(hp, sa_list, list); hp+=2;
2521
2522 ASSERT(hp <= hp_end);
2523 HRelease(p, hp_end, hp);
2524 return list;
2525 }
2526 #endif
2527 return am_undefined;
2528 }
2529
2530
2531 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *\
2532 * Debug functions *
2533 \* */
2534
2535
2536 #ifdef HARD_DEBUG
2537
rbt_assert_is_member(RBTNode * root,RBTNode * node)2538 static int rbt_assert_is_member(RBTNode* root, RBTNode* node)
2539 {
2540 while (node != root) {
2541 RBT_ASSERT(parent(node));
2542 RBT_ASSERT(parent(node)->left == node || parent(node)->right == node);
2543 node = parent(node);
2544 }
2545 return 1;
2546 }
2547
2548
2549 #if 0
2550 # define PRINT_TREE
2551 #else
2552 # undef PRINT_TREE
2553 #endif
2554
2555 #ifdef PRINT_TREE
2556 static void print_tree(enum SortOrder order, RBTNode*);
2557 #endif
2558
2559 /*
2560 * Checks that the order between parent and children are correct,
2561 * and that the Red-Black Tree properies are satisfied. if size > 0,
2562 * check_tree() returns the node that satisfies "address order first fit"
2563 *
2564 * The Red-Black Tree properies are:
2565 * 1. Every node is either red or black.
2566 * 2. Every leaf (NIL) is black.
2567 * 3. If a node is red, then both its children are black.
2568 * 4. Every simple path from a node to a descendant leaf
2569 * contains the same number of black nodes.
2570 *
2571 */
2572
2573 struct check_arg_t {
2574 RBTree* tree;
2575 ErtsFreeSegDesc* prev_seg;
2576 Uint size;
2577 RBTNode *res;
2578 };
2579 static void check_node_callback(RBTNode* x, void* arg);
2580
2581
2582 static RBTNode *
check_tree(RBTree * tree,Uint size)2583 check_tree(RBTree* tree, Uint size)
2584 {
2585 struct check_arg_t carg;
2586 carg.tree = tree;
2587 carg.prev_seg = NULL;
2588 carg.size = size;
2589 carg.res = NULL;
2590
2591 #ifdef PRINT_TREE
2592 print_tree(tree->order, tree->root);
2593 #endif
2594
2595 if (!tree->root)
2596 return NULL;
2597
2598 RBT_ASSERT(IS_BLACK(tree->root));
2599 RBT_ASSERT(!parent(tree->root));
2600
2601 rbt_foreach_node(tree, check_node_callback, &carg, 0);
2602
2603 return carg.res;
2604 }
2605
check_node_callback(RBTNode * x,void * arg)2606 static void check_node_callback(RBTNode* x, void* arg)
2607 {
2608 struct check_arg_t* a = (struct check_arg_t*) arg;
2609 ErtsFreeSegDesc* seg;
2610
2611 if (IS_RED(x)) {
2612 RBT_ASSERT(IS_BLACK(x->right));
2613 RBT_ASSERT(IS_BLACK(x->left));
2614 }
2615
2616 RBT_ASSERT(parent(x) || x == a->tree->root);
2617
2618 if (x->left) {
2619 RBT_ASSERT(cmp_nodes(a->tree->order, x->left, x) < 0);
2620 }
2621 if (x->right) {
2622 RBT_ASSERT(cmp_nodes(a->tree->order, x->right, x) > 0);
2623 }
2624
2625 seg = node_to_desc(a->tree->order, x);
2626 RBT_ASSERT(seg->start < seg->end);
2627 if (a->size && (seg->end - seg->start) >= a->size) {
2628 if (!a->res || cmp_nodes(a->tree->order, x, a->res) < 0) {
2629 a->res = x;
2630 }
2631 }
2632 if (a->tree->order == ADDR_ORDER) {
2633 RBT_ASSERT(!a->prev_seg || a->prev_seg->end < seg->start);
2634 a->prev_seg = seg;
2635 }
2636 }
2637
2638 #endif /* HARD_DEBUG */
2639
2640
2641 #ifdef PRINT_TREE
2642 #define INDENT_STEP 2
2643
2644 #include <stdio.h>
2645
2646 static void
print_tree_aux(enum SortOrder order,RBTNode * x,int indent)2647 print_tree_aux(enum SortOrder order, RBTNode *x, int indent)
2648 {
2649 int i;
2650
2651 if (x) {
2652 ErtsFreeSegDesc* desc = node_to_desc(order, x);
2653 print_tree_aux(order, x->right, indent + INDENT_STEP);
2654 for (i = 0; i < indent; i++) {
2655 putc(' ', stderr);
2656 }
2657 fprintf(stderr, "%s: sz=%lx [%p - %p] desc=%p\r\n",
2658 IS_BLACK(x) ? "BLACK" : "RED",
2659 desc->end - desc->start, desc->start, desc->end, desc);
2660 print_tree_aux(order, x->left, indent + INDENT_STEP);
2661 }
2662 }
2663
2664
2665 static void
print_tree(enum SortOrder order,RBTNode * root)2666 print_tree(enum SortOrder order, RBTNode* root)
2667 {
2668 fprintf(stderr, " --- %s ordered tree begin ---\r\n", sort_order_names[order]);
2669 print_tree_aux(order, root, 0);
2670 fprintf(stderr, " --- %s ordered tree end ---\r\n", sort_order_names[order]);
2671 }
2672
2673 #endif /* PRINT_TREE */
2674
2675
2676 #ifdef FREE_SEG_API_SMOKE_TEST
2677
test_it(void)2678 void test_it(void)
2679 {
2680 ErtsFreeSegMap map;
2681 ErtsFreeSegDesc *desc, *under, *over, *d1, *d2;
2682 const int i = 1; /* reverse addr order */
2683
2684 {
2685 init_free_seg_map(&map, SZ_REVERSE_ADDR_ORDER);
2686
2687 insert_free_seg(&map, alloc_desc(), (char*)0x11000, (char*)0x12000);
2688 HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
2689 insert_free_seg(&map, alloc_desc(), (char*)0x13000, (char*)0x14000);
2690 HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
2691 insert_free_seg(&map, alloc_desc(), (char*)0x15000, (char*)0x17000);
2692 HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
2693 insert_free_seg(&map, alloc_desc(), (char*)0x8000, (char*)0x10000);
2694 HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
2695
2696 desc = lookup_free_seg(&map, 0x500);
2697 ERTS_ASSERT(desc->start == (char*)(i?0x13000L:0x11000L));
2698
2699 desc = lookup_free_seg(&map, 0x1500);
2700 ERTS_ASSERT(desc->start == (char*)0x15000);
2701
2702 adjacent_free_seg(&map, (char*)0x6666, (char*)0x7777, &under, &over);
2703 ERTS_ASSERT(!under && !over);
2704
2705 adjacent_free_seg(&map, (char*)0x6666, (char*)0x8000, &under, &over);
2706 ERTS_ASSERT(!under && over->start == (char*)0x8000);
2707
2708 adjacent_free_seg(&map, (char*)0x10000, (char*)0x10500, &under, &over);
2709 ERTS_ASSERT(under->end == (char*)0x10000 && !over);
2710
2711 adjacent_free_seg(&map, (char*)0x10100, (char*)0x10500, &under, &over);
2712 ERTS_ASSERT(!under && !over);
2713
2714 adjacent_free_seg(&map, (char*)0x10100, (char*)0x11000, &under, &over);
2715 ERTS_ASSERT(!under && over && over->start == (char*)0x11000);
2716
2717 adjacent_free_seg(&map, (char*)0x12000, (char*)0x12500, &under, &over);
2718 ERTS_ASSERT(under && under->end == (char*)0x12000 && !over);
2719
2720 adjacent_free_seg(&map, (char*)0x12000, (char*)0x13000, &under, &over);
2721 ERTS_ASSERT(under && under->end == (char*)0x12000 &&
2722 over && over->start == (char*)0x13000);
2723
2724 adjacent_free_seg(&map, (char*)0x12500, (char*)0x13000, &under, &over);
2725 ERTS_ASSERT(!under && over && over->start == (char*)0x13000);
2726
2727 d1 = lookup_free_seg(&map, 0x500);
2728 ERTS_ASSERT(d1->start == (char*)(i?0x13000L:0x11000L));
2729
2730 resize_free_seg(&map, d1, d1->start - 0x800, (char*)d1->end);
2731 HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
2732
2733 d2 = lookup_free_seg(&map, 0x1200);
2734 ERTS_ASSERT(d2 == d1);
2735
2736 delete_free_seg(&map, d1);
2737 HARD_CHECK_TREE(&map.atree, 0); HARD_CHECK_TREE(&map.stree, 0);
2738
2739 d1 = lookup_free_seg(&map, 0x1200);
2740 ERTS_ASSERT(d1->start == (char*)0x15000);
2741 }
2742 }
2743
2744 #endif /* FREE_SEG_API_SMOKE_TEST */
2745
2746
2747 #ifdef HARD_DEBUG_MSEG
2748
2749 /*
2750 * Debug stuff used by erl_mseg to check that it does the right thing.
2751 * The reason for keeping it here is that we (ab)use the rb-tree code
2752 * for keeping track of *allocated* segments.
2753 */
2754
2755 typedef struct ErtsFreeSegDesc_fake_ {
2756 /*RBTNode snode; Save memory by skipping unused size tree node */
2757 RBTNode anode; /* node in 'atree' */
2758 union {
2759 char* start;
2760 struct ErtsFreeSegDesc_fake_* next_free;
2761 }u;
2762 char* end;
2763 }ErtsFreeSegDesc_fake;
2764
2765 static ErtsFreeSegDesc_fake hard_dbg_mseg_desc_pool[10000];
2766 static ErtsFreeSegDesc_fake* hard_dbg_mseg_desc_first;
2767 RBTree hard_dbg_mseg_tree;
2768
2769 static erts_mtx_t hard_dbg_mseg_mtx;
2770
hard_dbg_mseg_init(void)2771 static void hard_dbg_mseg_init(void)
2772 {
2773 ErtsFreeSegDesc_fake* p;
2774
2775 erts_mtx_init(&hard_dbg_mseg_mtx, "hard_dbg_mseg", NIL,
2776 ERTS_LOCK_FLAGS_PROPERTY_STATIC | ERTS_LOCK_FLAGS_CATEGORY_DEBUG);
2777 hard_dbg_mseg_tree.root = NULL;
2778 hard_dbg_mseg_tree.order = ADDR_ORDER;
2779
2780 p = &hard_dbg_mseg_desc_pool[(sizeof(hard_dbg_mseg_desc_pool) /
2781 sizeof(*hard_dbg_mseg_desc_pool)) - 1];
2782 p->u.next_free = NULL;
2783 while (--p >= hard_dbg_mseg_desc_pool) {
2784 p->u.next_free = (p+1);
2785 }
2786 hard_dbg_mseg_desc_first = &hard_dbg_mseg_desc_pool[0];
2787 }
2788
hard_dbg_alloc_desc(void)2789 static ErtsFreeSegDesc* hard_dbg_alloc_desc(void)
2790 {
2791 ErtsFreeSegDesc_fake* p = hard_dbg_mseg_desc_first;
2792 ERTS_ASSERT(p || !"HARD_DEBUG_MSEG: Out of mseg descriptors");
2793 hard_dbg_mseg_desc_first = p->u.next_free;
2794
2795 /* Creative pointer arithmetic to return something that looks like
2796 * a ErtsFreeSegDesc as long as we don't use the absent 'snode'.
2797 */
2798 return (ErtsFreeSegDesc*) ((char*)p - offsetof(ErtsFreeSegDesc,anode));
2799 }
2800
hard_dbg_free_desc(ErtsFreeSegDesc * desc)2801 static void hard_dbg_free_desc(ErtsFreeSegDesc* desc)
2802 {
2803 ErtsFreeSegDesc_fake* p = (ErtsFreeSegDesc_fake*) &desc->anode;
2804 memset(p, 0xfe, sizeof(*p));
2805 p->u.next_free = hard_dbg_mseg_desc_first;
2806 hard_dbg_mseg_desc_first = p;
2807 }
2808
check_seg_writable(void * seg,UWord sz)2809 static void check_seg_writable(void* seg, UWord sz)
2810 {
2811 UWord* seg_end = (UWord*)((char*)seg + sz);
2812 volatile UWord* p;
2813 ERTS_ASSERT(ERTS_IS_PAGEALIGNED(seg));
2814 ERTS_ASSERT(ERTS_IS_PAGEALIGNED(sz));
2815 for (p=(UWord*)seg; p<seg_end; p += (ERTS_INV_PAGEALIGNED_MASK+1)/sizeof(UWord)) {
2816 UWord write_back = *p;
2817 *p = 0xfade2b1acc;
2818 *p = write_back;
2819 }
2820 }
2821
hard_dbg_insert_mseg(void * seg,UWord sz)2822 void hard_dbg_insert_mseg(void* seg, UWord sz)
2823 {
2824 check_seg_writable(seg, sz);
2825 erts_mtx_lock(&hard_dbg_mseg_mtx);
2826 {
2827 ErtsFreeSegDesc *desc = hard_dbg_alloc_desc();
2828 RBTNode *prev, *next;
2829 desc->start = (char*)seg;
2830 desc->end = desc->start + sz - 1; /* -1 to allow adjacent segments in tree */
2831 rbt_insert(&hard_dbg_mseg_tree, &desc->anode);
2832 prev = rbt_prev_node(&desc->anode);
2833 next = rbt_next_node(&desc->anode);
2834 ERTS_ASSERT(!prev || anode_to_desc(prev)->end < desc->start);
2835 ERTS_ASSERT(!next || anode_to_desc(next)->start > desc->end);
2836 }
2837 erts_mtx_unlock(&hard_dbg_mseg_mtx);
2838 }
2839
hard_dbg_lookup_seg_at(RBTree * tree,char * start)2840 static ErtsFreeSegDesc* hard_dbg_lookup_seg_at(RBTree* tree, char* start)
2841 {
2842 RBTNode* x = tree->root;
2843
2844 while (x) {
2845 ErtsFreeSegDesc* desc = anode_to_desc(x);
2846 if (start < desc->start) {
2847 x = x->left;
2848 }
2849 else if (start > desc->start) {
2850 ERTS_ASSERT(start > desc->end);
2851 x = x->right;
2852 }
2853 else
2854 return desc;
2855 }
2856 return NULL;
2857 }
2858
hard_dbg_remove_mseg(void * seg,UWord sz)2859 void hard_dbg_remove_mseg(void* seg, UWord sz)
2860 {
2861 check_seg_writable(seg, sz);
2862 erts_mtx_lock(&hard_dbg_mseg_mtx);
2863 {
2864 ErtsFreeSegDesc* desc = hard_dbg_lookup_seg_at(&hard_dbg_mseg_tree, (char*)seg);
2865 ERTS_ASSERT(desc);
2866 ERTS_ASSERT(desc->start == (char*)seg);
2867 ERTS_ASSERT(desc->end == (char*)seg + sz - 1);
2868
2869 rbt_delete(&hard_dbg_mseg_tree, &desc->anode);
2870 hard_dbg_free_desc(desc);
2871 }
2872 erts_mtx_unlock(&hard_dbg_mseg_mtx);
2873 }
2874
2875 #endif /* HARD_DEBUG_MSEG */
2876