1 /*
2 ** Bundled memory allocator.
3 **
4 ** Beware: this is a HEAVILY CUSTOMIZED version of dlmalloc.
5 ** The original bears the following remark:
6 **
7 **   This is a version (aka dlmalloc) of malloc/free/realloc written by
8 **   Doug Lea and released to the public domain, as explained at
9 **   https://creativecommons.org/licenses/publicdomain.
10 **
11 **   * Version pre-2.8.4 Wed Mar 29 19:46:29 2006    (dl at gee)
12 **
13 ** No additional copyright is claimed over the customizations.
14 ** Please do NOT bother the original author about this version here!
15 **
16 ** If you want to use dlmalloc in another project, you should get
17 ** the original from: ftp://gee.cs.oswego.edu/pub/misc/
18 ** For thread-safe derivatives, take a look at:
19 ** - ptmalloc: https://www.malloc.de/
20 ** - nedmalloc: https://www.nedprod.com/programs/portable/nedmalloc/
21 */
22 
23 #define lj_alloc_c
24 #define LUA_CORE
25 
26 /* To get the mremap prototype. Must be defined before any system includes. */
27 #if defined(__linux__) && !defined(_GNU_SOURCE)
28 #define _GNU_SOURCE
29 #endif
30 
31 #include "lj_def.h"
32 #include "lj_arch.h"
33 #include "lj_alloc.h"
34 #include "lj_prng.h"
35 
36 #ifndef LUAJIT_USE_SYSMALLOC
37 
38 #define MAX_SIZE_T		(~(size_t)0)
39 #define MALLOC_ALIGNMENT	((size_t)8U)
40 
41 #define DEFAULT_GRANULARITY	((size_t)128U * (size_t)1024U)
42 #define DEFAULT_TRIM_THRESHOLD	((size_t)2U * (size_t)1024U * (size_t)1024U)
43 #define DEFAULT_MMAP_THRESHOLD	((size_t)128U * (size_t)1024U)
44 #define MAX_RELEASE_CHECK_RATE	255
45 
46 /* ------------------- size_t and alignment properties -------------------- */
47 
48 /* The byte and bit size of a size_t */
49 #define SIZE_T_SIZE		(sizeof(size_t))
50 #define SIZE_T_BITSIZE		(sizeof(size_t) << 3)
51 
52 /* Some constants coerced to size_t */
53 /* Annoying but necessary to avoid errors on some platforms */
54 #define SIZE_T_ZERO		((size_t)0)
55 #define SIZE_T_ONE		((size_t)1)
56 #define SIZE_T_TWO		((size_t)2)
57 #define TWO_SIZE_T_SIZES	(SIZE_T_SIZE<<1)
58 #define FOUR_SIZE_T_SIZES	(SIZE_T_SIZE<<2)
59 #define SIX_SIZE_T_SIZES	(FOUR_SIZE_T_SIZES+TWO_SIZE_T_SIZES)
60 
61 /* The bit mask value corresponding to MALLOC_ALIGNMENT */
62 #define CHUNK_ALIGN_MASK	(MALLOC_ALIGNMENT - SIZE_T_ONE)
63 
64 /* the number of bytes to offset an address to align it */
65 #define align_offset(A)\
66  ((((size_t)(A) & CHUNK_ALIGN_MASK) == 0)? 0 :\
67   ((MALLOC_ALIGNMENT - ((size_t)(A) & CHUNK_ALIGN_MASK)) & CHUNK_ALIGN_MASK))
68 
69 /* -------------------------- MMAP support ------------------------------- */
70 
71 #define MFAIL			((void *)(MAX_SIZE_T))
72 #define CMFAIL			((char *)(MFAIL)) /* defined for convenience */
73 
74 #define IS_DIRECT_BIT		(SIZE_T_ONE)
75 
76 
77 /* Determine system-specific block allocation method. */
78 #if LJ_TARGET_WINDOWS
79 
80 #define WIN32_LEAN_AND_MEAN
81 #include <windows.h>
82 
83 #define LJ_ALLOC_VIRTUALALLOC	1
84 
85 #if LJ_64 && !LJ_GC64
86 #define LJ_ALLOC_NTAVM		1
87 #endif
88 
89 #else
90 
91 #include <errno.h>
92 /* If this include fails, then rebuild with: -DLUAJIT_USE_SYSMALLOC */
93 #include <sys/mman.h>
94 
95 #define LJ_ALLOC_MMAP		1
96 
97 #if LJ_64
98 
99 #define LJ_ALLOC_MMAP_PROBE	1
100 
101 #if LJ_GC64
102 #define LJ_ALLOC_MBITS		47	/* 128 TB in LJ_GC64 mode. */
103 #elif LJ_TARGET_X64 && LJ_HASJIT
104 /* Due to limitations in the x64 compiler backend. */
105 #define LJ_ALLOC_MBITS		31	/* 2 GB on x64 with !LJ_GC64. */
106 #else
107 #define LJ_ALLOC_MBITS		32	/* 4 GB on other archs with !LJ_GC64. */
108 #endif
109 
110 #endif
111 
112 #if LJ_64 && !LJ_GC64 && defined(MAP_32BIT)
113 #define LJ_ALLOC_MMAP32		1
114 #endif
115 
116 #if LJ_TARGET_LINUX
117 #define LJ_ALLOC_MREMAP		1
118 #endif
119 
120 #endif
121 
122 
123 #if LJ_ALLOC_VIRTUALALLOC
124 
125 #if LJ_ALLOC_NTAVM
126 /* Undocumented, but hey, that's what we all love so much about Windows. */
127 typedef long (*PNTAVM)(HANDLE handle, void **addr, ULONG_PTR zbits,
128 		       size_t *size, ULONG alloctype, ULONG prot);
129 static PNTAVM ntavm;
130 
131 /* Number of top bits of the lower 32 bits of an address that must be zero.
132 ** Apparently 0 gives us full 64 bit addresses and 1 gives us the lower 2GB.
133 */
134 #define NTAVM_ZEROBITS		1
135 
init_mmap(void)136 static void init_mmap(void)
137 {
138   ntavm = (PNTAVM)GetProcAddress(GetModuleHandleA("ntdll.dll"),
139 				 "NtAllocateVirtualMemory");
140 }
141 #define INIT_MMAP()	init_mmap()
142 
143 /* Win64 32 bit MMAP via NtAllocateVirtualMemory. */
mmap_plain(size_t size)144 static void *mmap_plain(size_t size)
145 {
146   DWORD olderr = GetLastError();
147   void *ptr = NULL;
148   long st = ntavm(INVALID_HANDLE_VALUE, &ptr, NTAVM_ZEROBITS, &size,
149 		  MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
150   SetLastError(olderr);
151   return st == 0 ? ptr : MFAIL;
152 }
153 
154 /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
direct_mmap(size_t size)155 static void *direct_mmap(size_t size)
156 {
157   DWORD olderr = GetLastError();
158   void *ptr = NULL;
159   long st = ntavm(INVALID_HANDLE_VALUE, &ptr, NTAVM_ZEROBITS, &size,
160 		  MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN, PAGE_READWRITE);
161   SetLastError(olderr);
162   return st == 0 ? ptr : MFAIL;
163 }
164 
165 #else
166 
167 /* Win32 MMAP via VirtualAlloc */
mmap_plain(size_t size)168 static void *mmap_plain(size_t size)
169 {
170   DWORD olderr = GetLastError();
171   void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT, PAGE_READWRITE);
172   SetLastError(olderr);
173   return ptr ? ptr : MFAIL;
174 }
175 
176 /* For direct MMAP, use MEM_TOP_DOWN to minimize interference */
direct_mmap(size_t size)177 static void *direct_mmap(size_t size)
178 {
179   DWORD olderr = GetLastError();
180   void *ptr = LJ_WIN_VALLOC(0, size, MEM_RESERVE|MEM_COMMIT|MEM_TOP_DOWN,
181 			    PAGE_READWRITE);
182   SetLastError(olderr);
183   return ptr ? ptr : MFAIL;
184 }
185 
186 #endif
187 
188 #define CALL_MMAP(prng, size)	mmap_plain(size)
189 #define DIRECT_MMAP(prng, size)	direct_mmap(size)
190 
191 /* This function supports releasing coalesed segments */
CALL_MUNMAP(void * ptr,size_t size)192 static int CALL_MUNMAP(void *ptr, size_t size)
193 {
194   DWORD olderr = GetLastError();
195   MEMORY_BASIC_INFORMATION minfo;
196   char *cptr = (char *)ptr;
197   while (size) {
198     if (VirtualQuery(cptr, &minfo, sizeof(minfo)) == 0)
199       return -1;
200     if (minfo.BaseAddress != cptr || minfo.AllocationBase != cptr ||
201 	minfo.State != MEM_COMMIT || minfo.RegionSize > size)
202       return -1;
203     if (VirtualFree(cptr, 0, MEM_RELEASE) == 0)
204       return -1;
205     cptr += minfo.RegionSize;
206     size -= minfo.RegionSize;
207   }
208   SetLastError(olderr);
209   return 0;
210 }
211 
212 #elif LJ_ALLOC_MMAP
213 
214 #define MMAP_PROT		(PROT_READ|PROT_WRITE)
215 #if !defined(MAP_ANONYMOUS) && defined(MAP_ANON)
216 #define MAP_ANONYMOUS		MAP_ANON
217 #endif
218 #define MMAP_FLAGS		(MAP_PRIVATE|MAP_ANONYMOUS)
219 
220 #if LJ_ALLOC_MMAP_PROBE
221 
222 #ifdef MAP_TRYFIXED
223 #define MMAP_FLAGS_PROBE	(MMAP_FLAGS|MAP_TRYFIXED)
224 #else
225 #define MMAP_FLAGS_PROBE	MMAP_FLAGS
226 #endif
227 
228 #define LJ_ALLOC_MMAP_PROBE_MAX		30
229 #define LJ_ALLOC_MMAP_PROBE_LINEAR	5
230 
231 #define LJ_ALLOC_MMAP_PROBE_LOWER	((uintptr_t)0x4000)
232 
mmap_probe(PRNGState * rs,size_t size)233 static void *mmap_probe(PRNGState *rs, size_t size)
234 {
235   /* Hint for next allocation. Doesn't need to be thread-safe. */
236   static uintptr_t hint_addr = 0;
237   int olderr = errno;
238   int retry;
239   for (retry = 0; retry < LJ_ALLOC_MMAP_PROBE_MAX; retry++) {
240     void *p = mmap((void *)hint_addr, size, MMAP_PROT, MMAP_FLAGS_PROBE, -1, 0);
241     uintptr_t addr = (uintptr_t)p;
242     if ((addr >> LJ_ALLOC_MBITS) == 0 && addr >= LJ_ALLOC_MMAP_PROBE_LOWER &&
243 	((addr + size) >> LJ_ALLOC_MBITS) == 0) {
244       /* We got a suitable address. Bump the hint address. */
245       hint_addr = addr + size;
246       errno = olderr;
247       return p;
248     }
249     if (p != MFAIL) {
250       munmap(p, size);
251     } else if (errno == ENOMEM) {
252       return MFAIL;
253     }
254     if (hint_addr) {
255       /* First, try linear probing. */
256       if (retry < LJ_ALLOC_MMAP_PROBE_LINEAR) {
257 	hint_addr += 0x1000000;
258 	if (((hint_addr + size) >> LJ_ALLOC_MBITS) != 0)
259 	  hint_addr = 0;
260 	continue;
261       } else if (retry == LJ_ALLOC_MMAP_PROBE_LINEAR) {
262 	/* Next, try a no-hint probe to get back an ASLR address. */
263 	hint_addr = 0;
264 	continue;
265       }
266     }
267     /* Finally, try pseudo-random probing. */
268     do {
269       hint_addr = lj_prng_u64(rs) & (((uintptr_t)1<<LJ_ALLOC_MBITS)-LJ_PAGESIZE);
270     } while (hint_addr < LJ_ALLOC_MMAP_PROBE_LOWER);
271   }
272   errno = olderr;
273   return MFAIL;
274 }
275 
276 #endif
277 
278 #if LJ_ALLOC_MMAP32
279 
280 #if LJ_TARGET_SOLARIS
281 #define LJ_ALLOC_MMAP32_START	((uintptr_t)0x1000)
282 #else
283 #define LJ_ALLOC_MMAP32_START	((uintptr_t)0)
284 #endif
285 
286 #if LJ_ALLOC_MMAP_PROBE
mmap_map32(PRNGState * rs,size_t size)287 static void *mmap_map32(PRNGState *rs, size_t size)
288 #else
289 static void *mmap_map32(size_t size)
290 #endif
291 {
292 #if LJ_ALLOC_MMAP_PROBE
293   static int fallback = 0;
294   if (fallback)
295     return mmap_probe(rs, size);
296 #endif
297   {
298     int olderr = errno;
299     void *ptr = mmap((void *)LJ_ALLOC_MMAP32_START, size, MMAP_PROT, MAP_32BIT|MMAP_FLAGS, -1, 0);
300     errno = olderr;
301     /* This only allows 1GB on Linux. So fallback to probing to get 2GB. */
302 #if LJ_ALLOC_MMAP_PROBE
303     if (ptr == MFAIL) {
304       fallback = 1;
305       return mmap_probe(rs, size);
306     }
307 #endif
308     return ptr;
309   }
310 }
311 
312 #endif
313 
314 #if LJ_ALLOC_MMAP32
315 #if LJ_ALLOC_MMAP_PROBE
316 #define CALL_MMAP(prng, size)	mmap_map32(prng, size)
317 #else
318 #define CALL_MMAP(prng, size)	mmap_map32(size)
319 #endif
320 #elif LJ_ALLOC_MMAP_PROBE
321 #define CALL_MMAP(prng, size)	mmap_probe(prng, size)
322 #else
mmap_plain(size_t size)323 static void *mmap_plain(size_t size)
324 {
325   int olderr = errno;
326   void *ptr = mmap(NULL, size, MMAP_PROT, MMAP_FLAGS, -1, 0);
327   errno = olderr;
328   return ptr;
329 }
330 #define CALL_MMAP(prng, size)	mmap_plain(size)
331 #endif
332 
333 #if LJ_64 && !LJ_GC64 && ((defined(__FreeBSD__) && __FreeBSD__ < 10) || defined(__FreeBSD_kernel__)) && !LJ_TARGET_PS4
334 
335 #include <sys/resource.h>
336 
init_mmap(void)337 static void init_mmap(void)
338 {
339   struct rlimit rlim;
340   rlim.rlim_cur = rlim.rlim_max = 0x10000;
341   setrlimit(RLIMIT_DATA, &rlim);  /* Ignore result. May fail later. */
342 }
343 #define INIT_MMAP()	init_mmap()
344 
345 #endif
346 
CALL_MUNMAP(void * ptr,size_t size)347 static int CALL_MUNMAP(void *ptr, size_t size)
348 {
349   int olderr = errno;
350   int ret = munmap(ptr, size);
351   errno = olderr;
352   return ret;
353 }
354 
355 #if LJ_ALLOC_MREMAP
356 /* Need to define _GNU_SOURCE to get the mremap prototype. */
CALL_MREMAP_(void * ptr,size_t osz,size_t nsz,int flags)357 static void *CALL_MREMAP_(void *ptr, size_t osz, size_t nsz, int flags)
358 {
359   int olderr = errno;
360   ptr = mremap(ptr, osz, nsz, flags);
361   errno = olderr;
362   return ptr;
363 }
364 
365 #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv))
366 #define CALL_MREMAP_NOMOVE	0
367 #define CALL_MREMAP_MAYMOVE	1
368 #if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64)
369 #define CALL_MREMAP_MV		CALL_MREMAP_NOMOVE
370 #else
371 #define CALL_MREMAP_MV		CALL_MREMAP_MAYMOVE
372 #endif
373 #endif
374 
375 #endif
376 
377 
378 #ifndef INIT_MMAP
379 #define INIT_MMAP()		((void)0)
380 #endif
381 
382 #ifndef DIRECT_MMAP
383 #define DIRECT_MMAP(prng, s)	CALL_MMAP(prng, s)
384 #endif
385 
386 #ifndef CALL_MREMAP
387 #define CALL_MREMAP(addr, osz, nsz, mv) ((void)osz, MFAIL)
388 #endif
389 
390 /* -----------------------  Chunk representations ------------------------ */
391 
392 struct malloc_chunk {
393   size_t               prev_foot;  /* Size of previous chunk (if free).  */
394   size_t               head;       /* Size and inuse bits. */
395   struct malloc_chunk *fd;         /* double links -- used only if free. */
396   struct malloc_chunk *bk;
397 };
398 
399 typedef struct malloc_chunk  mchunk;
400 typedef struct malloc_chunk *mchunkptr;
401 typedef struct malloc_chunk *sbinptr;  /* The type of bins of chunks */
402 typedef size_t bindex_t;               /* Described below */
403 typedef unsigned int binmap_t;         /* Described below */
404 typedef unsigned int flag_t;           /* The type of various bit flag sets */
405 
406 /* ------------------- Chunks sizes and alignments ----------------------- */
407 
408 #define MCHUNK_SIZE		(sizeof(mchunk))
409 
410 #define CHUNK_OVERHEAD		(SIZE_T_SIZE)
411 
412 /* Direct chunks need a second word of overhead ... */
413 #define DIRECT_CHUNK_OVERHEAD	(TWO_SIZE_T_SIZES)
414 /* ... and additional padding for fake next-chunk at foot */
415 #define DIRECT_FOOT_PAD		(FOUR_SIZE_T_SIZES)
416 
417 /* The smallest size we can malloc is an aligned minimal chunk */
418 #define MIN_CHUNK_SIZE\
419   ((MCHUNK_SIZE + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
420 
421 /* conversion from malloc headers to user pointers, and back */
422 #define chunk2mem(p)		((void *)((char *)(p) + TWO_SIZE_T_SIZES))
423 #define mem2chunk(mem)		((mchunkptr)((char *)(mem) - TWO_SIZE_T_SIZES))
424 /* chunk associated with aligned address A */
425 #define align_as_chunk(A)	(mchunkptr)((A) + align_offset(chunk2mem(A)))
426 
427 /* Bounds on request (not chunk) sizes. */
428 #define MAX_REQUEST		((~MIN_CHUNK_SIZE+1) << 2)
429 #define MIN_REQUEST		(MIN_CHUNK_SIZE - CHUNK_OVERHEAD - SIZE_T_ONE)
430 
431 /* pad request bytes into a usable size */
432 #define pad_request(req) \
433    (((req) + CHUNK_OVERHEAD + CHUNK_ALIGN_MASK) & ~CHUNK_ALIGN_MASK)
434 
435 /* pad request, checking for minimum (but not maximum) */
436 #define request2size(req) \
437   (((req) < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(req))
438 
439 /* ------------------ Operations on head and foot fields ----------------- */
440 
441 #define PINUSE_BIT		(SIZE_T_ONE)
442 #define CINUSE_BIT		(SIZE_T_TWO)
443 #define INUSE_BITS		(PINUSE_BIT|CINUSE_BIT)
444 
445 /* Head value for fenceposts */
446 #define FENCEPOST_HEAD		(INUSE_BITS|SIZE_T_SIZE)
447 
448 /* extraction of fields from head words */
449 #define cinuse(p)		((p)->head & CINUSE_BIT)
450 #define pinuse(p)		((p)->head & PINUSE_BIT)
451 #define chunksize(p)		((p)->head & ~(INUSE_BITS))
452 
453 #define clear_pinuse(p)		((p)->head &= ~PINUSE_BIT)
454 #define clear_cinuse(p)		((p)->head &= ~CINUSE_BIT)
455 
456 /* Treat space at ptr +/- offset as a chunk */
457 #define chunk_plus_offset(p, s)		((mchunkptr)(((char *)(p)) + (s)))
458 #define chunk_minus_offset(p, s)	((mchunkptr)(((char *)(p)) - (s)))
459 
460 /* Ptr to next or previous physical malloc_chunk. */
461 #define next_chunk(p)	((mchunkptr)(((char *)(p)) + ((p)->head & ~INUSE_BITS)))
462 #define prev_chunk(p)	((mchunkptr)(((char *)(p)) - ((p)->prev_foot) ))
463 
464 /* extract next chunk's pinuse bit */
465 #define next_pinuse(p)	((next_chunk(p)->head) & PINUSE_BIT)
466 
467 /* Get/set size at footer */
468 #define get_foot(p, s)	(((mchunkptr)((char *)(p) + (s)))->prev_foot)
469 #define set_foot(p, s)	(((mchunkptr)((char *)(p) + (s)))->prev_foot = (s))
470 
471 /* Set size, pinuse bit, and foot */
472 #define set_size_and_pinuse_of_free_chunk(p, s)\
473   ((p)->head = (s|PINUSE_BIT), set_foot(p, s))
474 
475 /* Set size, pinuse bit, foot, and clear next pinuse */
476 #define set_free_with_pinuse(p, s, n)\
477   (clear_pinuse(n), set_size_and_pinuse_of_free_chunk(p, s))
478 
479 #define is_direct(p)\
480   (!((p)->head & PINUSE_BIT) && ((p)->prev_foot & IS_DIRECT_BIT))
481 
482 /* Get the internal overhead associated with chunk p */
483 #define overhead_for(p)\
484  (is_direct(p)? DIRECT_CHUNK_OVERHEAD : CHUNK_OVERHEAD)
485 
486 /* ---------------------- Overlaid data structures ----------------------- */
487 
488 struct malloc_tree_chunk {
489   /* The first four fields must be compatible with malloc_chunk */
490   size_t                    prev_foot;
491   size_t                    head;
492   struct malloc_tree_chunk *fd;
493   struct malloc_tree_chunk *bk;
494 
495   struct malloc_tree_chunk *child[2];
496   struct malloc_tree_chunk *parent;
497   bindex_t                  index;
498 };
499 
500 typedef struct malloc_tree_chunk  tchunk;
501 typedef struct malloc_tree_chunk *tchunkptr;
502 typedef struct malloc_tree_chunk *tbinptr; /* The type of bins of trees */
503 
504 /* A little helper macro for trees */
505 #define leftmost_child(t) ((t)->child[0] != 0? (t)->child[0] : (t)->child[1])
506 
507 /* ----------------------------- Segments -------------------------------- */
508 
509 struct malloc_segment {
510   char        *base;             /* base address */
511   size_t       size;             /* allocated size */
512   struct malloc_segment *next;   /* ptr to next segment */
513 };
514 
515 typedef struct malloc_segment  msegment;
516 typedef struct malloc_segment *msegmentptr;
517 
518 /* ---------------------------- malloc_state ----------------------------- */
519 
520 /* Bin types, widths and sizes */
521 #define NSMALLBINS		(32U)
522 #define NTREEBINS		(32U)
523 #define SMALLBIN_SHIFT		(3U)
524 #define SMALLBIN_WIDTH		(SIZE_T_ONE << SMALLBIN_SHIFT)
525 #define TREEBIN_SHIFT		(8U)
526 #define MIN_LARGE_SIZE		(SIZE_T_ONE << TREEBIN_SHIFT)
527 #define MAX_SMALL_SIZE		(MIN_LARGE_SIZE - SIZE_T_ONE)
528 #define MAX_SMALL_REQUEST  (MAX_SMALL_SIZE - CHUNK_ALIGN_MASK - CHUNK_OVERHEAD)
529 
530 struct malloc_state {
531   binmap_t   smallmap;
532   binmap_t   treemap;
533   size_t     dvsize;
534   size_t     topsize;
535   mchunkptr  dv;
536   mchunkptr  top;
537   size_t     trim_check;
538   size_t     release_checks;
539   mchunkptr  smallbins[(NSMALLBINS+1)*2];
540   tbinptr    treebins[NTREEBINS];
541   msegment   seg;
542   PRNGState  *prng;
543 };
544 
545 typedef struct malloc_state *mstate;
546 
547 #define is_initialized(M)	((M)->top != 0)
548 
549 /* -------------------------- system alloc setup ------------------------- */
550 
551 /* page-align a size */
552 #define page_align(S)\
553  (((S) + (LJ_PAGESIZE - SIZE_T_ONE)) & ~(LJ_PAGESIZE - SIZE_T_ONE))
554 
555 /* granularity-align a size */
556 #define granularity_align(S)\
557   (((S) + (DEFAULT_GRANULARITY - SIZE_T_ONE))\
558    & ~(DEFAULT_GRANULARITY - SIZE_T_ONE))
559 
560 #if LJ_TARGET_WINDOWS
561 #define mmap_align(S)	granularity_align(S)
562 #else
563 #define mmap_align(S)	page_align(S)
564 #endif
565 
566 /*  True if segment S holds address A */
567 #define segment_holds(S, A)\
568   ((char *)(A) >= S->base && (char *)(A) < S->base + S->size)
569 
570 /* Return segment holding given address */
segment_holding(mstate m,char * addr)571 static msegmentptr segment_holding(mstate m, char *addr)
572 {
573   msegmentptr sp = &m->seg;
574   for (;;) {
575     if (addr >= sp->base && addr < sp->base + sp->size)
576       return sp;
577     if ((sp = sp->next) == 0)
578       return 0;
579   }
580 }
581 
582 /* Return true if segment contains a segment link */
has_segment_link(mstate m,msegmentptr ss)583 static int has_segment_link(mstate m, msegmentptr ss)
584 {
585   msegmentptr sp = &m->seg;
586   for (;;) {
587     if ((char *)sp >= ss->base && (char *)sp < ss->base + ss->size)
588       return 1;
589     if ((sp = sp->next) == 0)
590       return 0;
591   }
592 }
593 
594 /*
595   TOP_FOOT_SIZE is padding at the end of a segment, including space
596   that may be needed to place segment records and fenceposts when new
597   noncontiguous segments are added.
598 */
599 #define TOP_FOOT_SIZE\
600   (align_offset(TWO_SIZE_T_SIZES)+pad_request(sizeof(struct malloc_segment))+MIN_CHUNK_SIZE)
601 
602 /* ---------------------------- Indexing Bins ---------------------------- */
603 
604 #define is_small(s)		(((s) >> SMALLBIN_SHIFT) < NSMALLBINS)
605 #define small_index(s)		((s)  >> SMALLBIN_SHIFT)
606 #define small_index2size(i)	((i)  << SMALLBIN_SHIFT)
607 #define MIN_SMALL_INDEX		(small_index(MIN_CHUNK_SIZE))
608 
609 /* addressing by index. See above about smallbin repositioning */
610 #define smallbin_at(M, i)	((sbinptr)((char *)&((M)->smallbins[(i)<<1])))
611 #define treebin_at(M,i)		(&((M)->treebins[i]))
612 
613 /* assign tree index for size S to variable I */
614 #define compute_tree_index(S, I)\
615 {\
616   unsigned int X = (unsigned int)(S >> TREEBIN_SHIFT);\
617   if (X == 0) {\
618     I = 0;\
619   } else if (X > 0xFFFF) {\
620     I = NTREEBINS-1;\
621   } else {\
622     unsigned int K = lj_fls(X);\
623     I =  (bindex_t)((K << 1) + ((S >> (K + (TREEBIN_SHIFT-1)) & 1)));\
624   }\
625 }
626 
627 /* Bit representing maximum resolved size in a treebin at i */
628 #define bit_for_tree_index(i) \
629    (i == NTREEBINS-1)? (SIZE_T_BITSIZE-1) : (((i) >> 1) + TREEBIN_SHIFT - 2)
630 
631 /* Shift placing maximum resolved bit in a treebin at i as sign bit */
632 #define leftshift_for_tree_index(i) \
633    ((i == NTREEBINS-1)? 0 : \
634     ((SIZE_T_BITSIZE-SIZE_T_ONE) - (((i) >> 1) + TREEBIN_SHIFT - 2)))
635 
636 /* The size of the smallest chunk held in bin with index i */
637 #define minsize_for_tree_index(i) \
638    ((SIZE_T_ONE << (((i) >> 1) + TREEBIN_SHIFT)) |  \
639    (((size_t)((i) & SIZE_T_ONE)) << (((i) >> 1) + TREEBIN_SHIFT - 1)))
640 
641 /* ------------------------ Operations on bin maps ----------------------- */
642 
643 /* bit corresponding to given index */
644 #define idx2bit(i)		((binmap_t)(1) << (i))
645 
646 /* Mark/Clear bits with given index */
647 #define mark_smallmap(M,i)	((M)->smallmap |=  idx2bit(i))
648 #define clear_smallmap(M,i)	((M)->smallmap &= ~idx2bit(i))
649 #define smallmap_is_marked(M,i)	((M)->smallmap &   idx2bit(i))
650 
651 #define mark_treemap(M,i)	((M)->treemap  |=  idx2bit(i))
652 #define clear_treemap(M,i)	((M)->treemap  &= ~idx2bit(i))
653 #define treemap_is_marked(M,i)	((M)->treemap  &   idx2bit(i))
654 
655 /* mask with all bits to left of least bit of x on */
656 #define left_bits(x)		((x<<1) | (~(x<<1)+1))
657 
658 /* Set cinuse bit and pinuse bit of next chunk */
659 #define set_inuse(M,p,s)\
660   ((p)->head = (((p)->head & PINUSE_BIT)|s|CINUSE_BIT),\
661   ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)
662 
663 /* Set cinuse and pinuse of this chunk and pinuse of next chunk */
664 #define set_inuse_and_pinuse(M,p,s)\
665   ((p)->head = (s|PINUSE_BIT|CINUSE_BIT),\
666   ((mchunkptr)(((char *)(p)) + (s)))->head |= PINUSE_BIT)
667 
668 /* Set size, cinuse and pinuse bit of this chunk */
669 #define set_size_and_pinuse_of_inuse_chunk(M, p, s)\
670   ((p)->head = (s|PINUSE_BIT|CINUSE_BIT))
671 
672 /* ----------------------- Operations on smallbins ----------------------- */
673 
674 /* Link a free chunk into a smallbin  */
675 #define insert_small_chunk(M, P, S) {\
676   bindex_t I = small_index(S);\
677   mchunkptr B = smallbin_at(M, I);\
678   mchunkptr F = B;\
679   if (!smallmap_is_marked(M, I))\
680     mark_smallmap(M, I);\
681   else\
682     F = B->fd;\
683   B->fd = P;\
684   F->bk = P;\
685   P->fd = F;\
686   P->bk = B;\
687 }
688 
689 /* Unlink a chunk from a smallbin  */
690 #define unlink_small_chunk(M, P, S) {\
691   mchunkptr F = P->fd;\
692   mchunkptr B = P->bk;\
693   bindex_t I = small_index(S);\
694   if (F == B) {\
695     clear_smallmap(M, I);\
696   } else {\
697     F->bk = B;\
698     B->fd = F;\
699   }\
700 }
701 
702 /* Unlink the first chunk from a smallbin */
703 #define unlink_first_small_chunk(M, B, P, I) {\
704   mchunkptr F = P->fd;\
705   if (B == F) {\
706     clear_smallmap(M, I);\
707   } else {\
708     B->fd = F;\
709     F->bk = B;\
710   }\
711 }
712 
713 /* Replace dv node, binning the old one */
714 /* Used only when dvsize known to be small */
715 #define replace_dv(M, P, S) {\
716   size_t DVS = M->dvsize;\
717   if (DVS != 0) {\
718     mchunkptr DV = M->dv;\
719     insert_small_chunk(M, DV, DVS);\
720   }\
721   M->dvsize = S;\
722   M->dv = P;\
723 }
724 
725 /* ------------------------- Operations on trees ------------------------- */
726 
727 /* Insert chunk into tree */
728 #define insert_large_chunk(M, X, S) {\
729   tbinptr *H;\
730   bindex_t I;\
731   compute_tree_index(S, I);\
732   H = treebin_at(M, I);\
733   X->index = I;\
734   X->child[0] = X->child[1] = 0;\
735   if (!treemap_is_marked(M, I)) {\
736     mark_treemap(M, I);\
737     *H = X;\
738     X->parent = (tchunkptr)H;\
739     X->fd = X->bk = X;\
740   } else {\
741     tchunkptr T = *H;\
742     size_t K = S << leftshift_for_tree_index(I);\
743     for (;;) {\
744       if (chunksize(T) != S) {\
745 	tchunkptr *C = &(T->child[(K >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1]);\
746 	K <<= 1;\
747 	if (*C != 0) {\
748 	  T = *C;\
749 	} else {\
750 	  *C = X;\
751 	  X->parent = T;\
752 	  X->fd = X->bk = X;\
753 	  break;\
754 	}\
755       } else {\
756 	tchunkptr F = T->fd;\
757 	T->fd = F->bk = X;\
758 	X->fd = F;\
759 	X->bk = T;\
760 	X->parent = 0;\
761 	break;\
762       }\
763     }\
764   }\
765 }
766 
767 #define unlink_large_chunk(M, X) {\
768   tchunkptr XP = X->parent;\
769   tchunkptr R;\
770   if (X->bk != X) {\
771     tchunkptr F = X->fd;\
772     R = X->bk;\
773     F->bk = R;\
774     R->fd = F;\
775   } else {\
776     tchunkptr *RP;\
777     if (((R = *(RP = &(X->child[1]))) != 0) ||\
778 	((R = *(RP = &(X->child[0]))) != 0)) {\
779       tchunkptr *CP;\
780       while ((*(CP = &(R->child[1])) != 0) ||\
781 	     (*(CP = &(R->child[0])) != 0)) {\
782 	R = *(RP = CP);\
783       }\
784       *RP = 0;\
785     }\
786   }\
787   if (XP != 0) {\
788     tbinptr *H = treebin_at(M, X->index);\
789     if (X == *H) {\
790       if ((*H = R) == 0) \
791 	clear_treemap(M, X->index);\
792     } else {\
793       if (XP->child[0] == X) \
794 	XP->child[0] = R;\
795       else \
796 	XP->child[1] = R;\
797     }\
798     if (R != 0) {\
799       tchunkptr C0, C1;\
800       R->parent = XP;\
801       if ((C0 = X->child[0]) != 0) {\
802 	R->child[0] = C0;\
803 	C0->parent = R;\
804       }\
805       if ((C1 = X->child[1]) != 0) {\
806 	R->child[1] = C1;\
807 	C1->parent = R;\
808       }\
809     }\
810   }\
811 }
812 
813 /* Relays to large vs small bin operations */
814 
815 #define insert_chunk(M, P, S)\
816   if (is_small(S)) { insert_small_chunk(M, P, S)\
817   } else { tchunkptr TP = (tchunkptr)(P); insert_large_chunk(M, TP, S); }
818 
819 #define unlink_chunk(M, P, S)\
820   if (is_small(S)) { unlink_small_chunk(M, P, S)\
821   } else { tchunkptr TP = (tchunkptr)(P); unlink_large_chunk(M, TP); }
822 
823 /* -----------------------  Direct-mmapping chunks ----------------------- */
824 
direct_alloc(mstate m,size_t nb)825 static void *direct_alloc(mstate m, size_t nb)
826 {
827   size_t mmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
828   if (LJ_LIKELY(mmsize > nb)) {     /* Check for wrap around 0 */
829     char *mm = (char *)(DIRECT_MMAP(m->prng, mmsize));
830     if (mm != CMFAIL) {
831       size_t offset = align_offset(chunk2mem(mm));
832       size_t psize = mmsize - offset - DIRECT_FOOT_PAD;
833       mchunkptr p = (mchunkptr)(mm + offset);
834       p->prev_foot = offset | IS_DIRECT_BIT;
835       p->head = psize|CINUSE_BIT;
836       chunk_plus_offset(p, psize)->head = FENCEPOST_HEAD;
837       chunk_plus_offset(p, psize+SIZE_T_SIZE)->head = 0;
838       return chunk2mem(p);
839     }
840   }
841   UNUSED(m);
842   return NULL;
843 }
844 
direct_resize(mchunkptr oldp,size_t nb)845 static mchunkptr direct_resize(mchunkptr oldp, size_t nb)
846 {
847   size_t oldsize = chunksize(oldp);
848   if (is_small(nb)) /* Can't shrink direct regions below small size */
849     return NULL;
850   /* Keep old chunk if big enough but not too big */
851   if (oldsize >= nb + SIZE_T_SIZE &&
852       (oldsize - nb) <= (DEFAULT_GRANULARITY >> 1)) {
853     return oldp;
854   } else {
855     size_t offset = oldp->prev_foot & ~IS_DIRECT_BIT;
856     size_t oldmmsize = oldsize + offset + DIRECT_FOOT_PAD;
857     size_t newmmsize = mmap_align(nb + SIX_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
858     char *cp = (char *)CALL_MREMAP((char *)oldp - offset,
859 				   oldmmsize, newmmsize, CALL_MREMAP_MV);
860     if (cp != CMFAIL) {
861       mchunkptr newp = (mchunkptr)(cp + offset);
862       size_t psize = newmmsize - offset - DIRECT_FOOT_PAD;
863       newp->head = psize|CINUSE_BIT;
864       chunk_plus_offset(newp, psize)->head = FENCEPOST_HEAD;
865       chunk_plus_offset(newp, psize+SIZE_T_SIZE)->head = 0;
866       return newp;
867     }
868   }
869   return NULL;
870 }
871 
872 /* -------------------------- mspace management -------------------------- */
873 
874 /* Initialize top chunk and its size */
init_top(mstate m,mchunkptr p,size_t psize)875 static void init_top(mstate m, mchunkptr p, size_t psize)
876 {
877   /* Ensure alignment */
878   size_t offset = align_offset(chunk2mem(p));
879   p = (mchunkptr)((char *)p + offset);
880   psize -= offset;
881 
882   m->top = p;
883   m->topsize = psize;
884   p->head = psize | PINUSE_BIT;
885   /* set size of fake trailing chunk holding overhead space only once */
886   chunk_plus_offset(p, psize)->head = TOP_FOOT_SIZE;
887   m->trim_check = DEFAULT_TRIM_THRESHOLD; /* reset on each update */
888 }
889 
890 /* Initialize bins for a new mstate that is otherwise zeroed out */
init_bins(mstate m)891 static void init_bins(mstate m)
892 {
893   /* Establish circular links for smallbins */
894   bindex_t i;
895   for (i = 0; i < NSMALLBINS; i++) {
896     sbinptr bin = smallbin_at(m,i);
897     bin->fd = bin->bk = bin;
898   }
899 }
900 
901 /* Allocate chunk and prepend remainder with chunk in successor base. */
prepend_alloc(mstate m,char * newbase,char * oldbase,size_t nb)902 static void *prepend_alloc(mstate m, char *newbase, char *oldbase, size_t nb)
903 {
904   mchunkptr p = align_as_chunk(newbase);
905   mchunkptr oldfirst = align_as_chunk(oldbase);
906   size_t psize = (size_t)((char *)oldfirst - (char *)p);
907   mchunkptr q = chunk_plus_offset(p, nb);
908   size_t qsize = psize - nb;
909   set_size_and_pinuse_of_inuse_chunk(m, p, nb);
910 
911   /* consolidate remainder with first chunk of old base */
912   if (oldfirst == m->top) {
913     size_t tsize = m->topsize += qsize;
914     m->top = q;
915     q->head = tsize | PINUSE_BIT;
916   } else if (oldfirst == m->dv) {
917     size_t dsize = m->dvsize += qsize;
918     m->dv = q;
919     set_size_and_pinuse_of_free_chunk(q, dsize);
920   } else {
921     if (!cinuse(oldfirst)) {
922       size_t nsize = chunksize(oldfirst);
923       unlink_chunk(m, oldfirst, nsize);
924       oldfirst = chunk_plus_offset(oldfirst, nsize);
925       qsize += nsize;
926     }
927     set_free_with_pinuse(q, qsize, oldfirst);
928     insert_chunk(m, q, qsize);
929   }
930 
931   return chunk2mem(p);
932 }
933 
934 /* Add a segment to hold a new noncontiguous region */
add_segment(mstate m,char * tbase,size_t tsize)935 static void add_segment(mstate m, char *tbase, size_t tsize)
936 {
937   /* Determine locations and sizes of segment, fenceposts, old top */
938   char *old_top = (char *)m->top;
939   msegmentptr oldsp = segment_holding(m, old_top);
940   char *old_end = oldsp->base + oldsp->size;
941   size_t ssize = pad_request(sizeof(struct malloc_segment));
942   char *rawsp = old_end - (ssize + FOUR_SIZE_T_SIZES + CHUNK_ALIGN_MASK);
943   size_t offset = align_offset(chunk2mem(rawsp));
944   char *asp = rawsp + offset;
945   char *csp = (asp < (old_top + MIN_CHUNK_SIZE))? old_top : asp;
946   mchunkptr sp = (mchunkptr)csp;
947   msegmentptr ss = (msegmentptr)(chunk2mem(sp));
948   mchunkptr tnext = chunk_plus_offset(sp, ssize);
949   mchunkptr p = tnext;
950 
951   /* reset top to new space */
952   init_top(m, (mchunkptr)tbase, tsize - TOP_FOOT_SIZE);
953 
954   /* Set up segment record */
955   set_size_and_pinuse_of_inuse_chunk(m, sp, ssize);
956   *ss = m->seg; /* Push current record */
957   m->seg.base = tbase;
958   m->seg.size = tsize;
959   m->seg.next = ss;
960 
961   /* Insert trailing fenceposts */
962   for (;;) {
963     mchunkptr nextp = chunk_plus_offset(p, SIZE_T_SIZE);
964     p->head = FENCEPOST_HEAD;
965     if ((char *)(&(nextp->head)) < old_end)
966       p = nextp;
967     else
968       break;
969   }
970 
971   /* Insert the rest of old top into a bin as an ordinary free chunk */
972   if (csp != old_top) {
973     mchunkptr q = (mchunkptr)old_top;
974     size_t psize = (size_t)(csp - old_top);
975     mchunkptr tn = chunk_plus_offset(q, psize);
976     set_free_with_pinuse(q, psize, tn);
977     insert_chunk(m, q, psize);
978   }
979 }
980 
981 /* -------------------------- System allocation -------------------------- */
982 
alloc_sys(mstate m,size_t nb)983 static void *alloc_sys(mstate m, size_t nb)
984 {
985   char *tbase = CMFAIL;
986   size_t tsize = 0;
987 
988   /* Directly map large chunks */
989   if (LJ_UNLIKELY(nb >= DEFAULT_MMAP_THRESHOLD)) {
990     void *mem = direct_alloc(m, nb);
991     if (mem != 0)
992       return mem;
993   }
994 
995   {
996     size_t req = nb + TOP_FOOT_SIZE + SIZE_T_ONE;
997     size_t rsize = granularity_align(req);
998     if (LJ_LIKELY(rsize > nb)) { /* Fail if wraps around zero */
999       char *mp = (char *)(CALL_MMAP(m->prng, rsize));
1000       if (mp != CMFAIL) {
1001 	tbase = mp;
1002 	tsize = rsize;
1003       }
1004     }
1005   }
1006 
1007   if (tbase != CMFAIL) {
1008     msegmentptr sp = &m->seg;
1009     /* Try to merge with an existing segment */
1010     while (sp != 0 && tbase != sp->base + sp->size)
1011       sp = sp->next;
1012     if (sp != 0 && segment_holds(sp, m->top)) { /* append */
1013       sp->size += tsize;
1014       init_top(m, m->top, m->topsize + tsize);
1015     } else {
1016       sp = &m->seg;
1017       while (sp != 0 && sp->base != tbase + tsize)
1018 	sp = sp->next;
1019       if (sp != 0) {
1020 	char *oldbase = sp->base;
1021 	sp->base = tbase;
1022 	sp->size += tsize;
1023 	return prepend_alloc(m, tbase, oldbase, nb);
1024       } else {
1025 	add_segment(m, tbase, tsize);
1026       }
1027     }
1028 
1029     if (nb < m->topsize) { /* Allocate from new or extended top space */
1030       size_t rsize = m->topsize -= nb;
1031       mchunkptr p = m->top;
1032       mchunkptr r = m->top = chunk_plus_offset(p, nb);
1033       r->head = rsize | PINUSE_BIT;
1034       set_size_and_pinuse_of_inuse_chunk(m, p, nb);
1035       return chunk2mem(p);
1036     }
1037   }
1038 
1039   return NULL;
1040 }
1041 
1042 /* -----------------------  system deallocation -------------------------- */
1043 
1044 /* Unmap and unlink any mmapped segments that don't contain used chunks */
release_unused_segments(mstate m)1045 static size_t release_unused_segments(mstate m)
1046 {
1047   size_t released = 0;
1048   size_t nsegs = 0;
1049   msegmentptr pred = &m->seg;
1050   msegmentptr sp = pred->next;
1051   while (sp != 0) {
1052     char *base = sp->base;
1053     size_t size = sp->size;
1054     msegmentptr next = sp->next;
1055     nsegs++;
1056     {
1057       mchunkptr p = align_as_chunk(base);
1058       size_t psize = chunksize(p);
1059       /* Can unmap if first chunk holds entire segment and not pinned */
1060       if (!cinuse(p) && (char *)p + psize >= base + size - TOP_FOOT_SIZE) {
1061 	tchunkptr tp = (tchunkptr)p;
1062 	if (p == m->dv) {
1063 	  m->dv = 0;
1064 	  m->dvsize = 0;
1065 	} else {
1066 	  unlink_large_chunk(m, tp);
1067 	}
1068 	if (CALL_MUNMAP(base, size) == 0) {
1069 	  released += size;
1070 	  /* unlink obsoleted record */
1071 	  sp = pred;
1072 	  sp->next = next;
1073 	} else { /* back out if cannot unmap */
1074 	  insert_large_chunk(m, tp, psize);
1075 	}
1076       }
1077     }
1078     pred = sp;
1079     sp = next;
1080   }
1081   /* Reset check counter */
1082   m->release_checks = nsegs > MAX_RELEASE_CHECK_RATE ?
1083 		      nsegs : MAX_RELEASE_CHECK_RATE;
1084   return released;
1085 }
1086 
alloc_trim(mstate m,size_t pad)1087 static int alloc_trim(mstate m, size_t pad)
1088 {
1089   size_t released = 0;
1090   if (pad < MAX_REQUEST && is_initialized(m)) {
1091     pad += TOP_FOOT_SIZE; /* ensure enough room for segment overhead */
1092 
1093     if (m->topsize > pad) {
1094       /* Shrink top space in granularity-size units, keeping at least one */
1095       size_t unit = DEFAULT_GRANULARITY;
1096       size_t extra = ((m->topsize - pad + (unit - SIZE_T_ONE)) / unit -
1097 		      SIZE_T_ONE) * unit;
1098       msegmentptr sp = segment_holding(m, (char *)m->top);
1099 
1100       if (sp->size >= extra &&
1101 	  !has_segment_link(m, sp)) { /* can't shrink if pinned */
1102 	size_t newsize = sp->size - extra;
1103 	/* Prefer mremap, fall back to munmap */
1104 	if ((CALL_MREMAP(sp->base, sp->size, newsize, CALL_MREMAP_NOMOVE) != MFAIL) ||
1105 	    (CALL_MUNMAP(sp->base + newsize, extra) == 0)) {
1106 	  released = extra;
1107 	}
1108       }
1109 
1110       if (released != 0) {
1111 	sp->size -= released;
1112 	init_top(m, m->top, m->topsize - released);
1113       }
1114     }
1115 
1116     /* Unmap any unused mmapped segments */
1117     released += release_unused_segments(m);
1118 
1119     /* On failure, disable autotrim to avoid repeated failed future calls */
1120     if (released == 0 && m->topsize > m->trim_check)
1121       m->trim_check = MAX_SIZE_T;
1122   }
1123 
1124   return (released != 0)? 1 : 0;
1125 }
1126 
1127 /* ---------------------------- malloc support --------------------------- */
1128 
1129 /* allocate a large request from the best fitting chunk in a treebin */
tmalloc_large(mstate m,size_t nb)1130 static void *tmalloc_large(mstate m, size_t nb)
1131 {
1132   tchunkptr v = 0;
1133   size_t rsize = ~nb+1; /* Unsigned negation */
1134   tchunkptr t;
1135   bindex_t idx;
1136   compute_tree_index(nb, idx);
1137 
1138   if ((t = *treebin_at(m, idx)) != 0) {
1139     /* Traverse tree for this bin looking for node with size == nb */
1140     size_t sizebits = nb << leftshift_for_tree_index(idx);
1141     tchunkptr rst = 0;  /* The deepest untaken right subtree */
1142     for (;;) {
1143       tchunkptr rt;
1144       size_t trem = chunksize(t) - nb;
1145       if (trem < rsize) {
1146 	v = t;
1147 	if ((rsize = trem) == 0)
1148 	  break;
1149       }
1150       rt = t->child[1];
1151       t = t->child[(sizebits >> (SIZE_T_BITSIZE-SIZE_T_ONE)) & 1];
1152       if (rt != 0 && rt != t)
1153 	rst = rt;
1154       if (t == 0) {
1155 	t = rst; /* set t to least subtree holding sizes > nb */
1156 	break;
1157       }
1158       sizebits <<= 1;
1159     }
1160   }
1161 
1162   if (t == 0 && v == 0) { /* set t to root of next non-empty treebin */
1163     binmap_t leftbits = left_bits(idx2bit(idx)) & m->treemap;
1164     if (leftbits != 0)
1165       t = *treebin_at(m, lj_ffs(leftbits));
1166   }
1167 
1168   while (t != 0) { /* find smallest of tree or subtree */
1169     size_t trem = chunksize(t) - nb;
1170     if (trem < rsize) {
1171       rsize = trem;
1172       v = t;
1173     }
1174     t = leftmost_child(t);
1175   }
1176 
1177   /*  If dv is a better fit, return NULL so malloc will use it */
1178   if (v != 0 && rsize < (size_t)(m->dvsize - nb)) {
1179     mchunkptr r = chunk_plus_offset(v, nb);
1180     unlink_large_chunk(m, v);
1181     if (rsize < MIN_CHUNK_SIZE) {
1182       set_inuse_and_pinuse(m, v, (rsize + nb));
1183     } else {
1184       set_size_and_pinuse_of_inuse_chunk(m, v, nb);
1185       set_size_and_pinuse_of_free_chunk(r, rsize);
1186       insert_chunk(m, r, rsize);
1187     }
1188     return chunk2mem(v);
1189   }
1190   return NULL;
1191 }
1192 
1193 /* allocate a small request from the best fitting chunk in a treebin */
tmalloc_small(mstate m,size_t nb)1194 static void *tmalloc_small(mstate m, size_t nb)
1195 {
1196   tchunkptr t, v;
1197   mchunkptr r;
1198   size_t rsize;
1199   bindex_t i = lj_ffs(m->treemap);
1200 
1201   v = t = *treebin_at(m, i);
1202   rsize = chunksize(t) - nb;
1203 
1204   while ((t = leftmost_child(t)) != 0) {
1205     size_t trem = chunksize(t) - nb;
1206     if (trem < rsize) {
1207       rsize = trem;
1208       v = t;
1209     }
1210   }
1211 
1212   r = chunk_plus_offset(v, nb);
1213   unlink_large_chunk(m, v);
1214   if (rsize < MIN_CHUNK_SIZE) {
1215     set_inuse_and_pinuse(m, v, (rsize + nb));
1216   } else {
1217     set_size_and_pinuse_of_inuse_chunk(m, v, nb);
1218     set_size_and_pinuse_of_free_chunk(r, rsize);
1219     replace_dv(m, r, rsize);
1220   }
1221   return chunk2mem(v);
1222 }
1223 
1224 /* ----------------------------------------------------------------------- */
1225 
lj_alloc_create(PRNGState * rs)1226 void *lj_alloc_create(PRNGState *rs)
1227 {
1228   size_t tsize = DEFAULT_GRANULARITY;
1229   char *tbase;
1230   INIT_MMAP();
1231   UNUSED(rs);
1232   tbase = (char *)(CALL_MMAP(rs, tsize));
1233   if (tbase != CMFAIL) {
1234     size_t msize = pad_request(sizeof(struct malloc_state));
1235     mchunkptr mn;
1236     mchunkptr msp = align_as_chunk(tbase);
1237     mstate m = (mstate)(chunk2mem(msp));
1238     memset(m, 0, msize);
1239     msp->head = (msize|PINUSE_BIT|CINUSE_BIT);
1240     m->seg.base = tbase;
1241     m->seg.size = tsize;
1242     m->release_checks = MAX_RELEASE_CHECK_RATE;
1243     init_bins(m);
1244     mn = next_chunk(mem2chunk(m));
1245     init_top(m, mn, (size_t)((tbase + tsize) - (char *)mn) - TOP_FOOT_SIZE);
1246     return m;
1247   }
1248   return NULL;
1249 }
1250 
lj_alloc_setprng(void * msp,PRNGState * rs)1251 void lj_alloc_setprng(void *msp, PRNGState *rs)
1252 {
1253   mstate ms = (mstate)msp;
1254   ms->prng = rs;
1255 }
1256 
lj_alloc_destroy(void * msp)1257 void lj_alloc_destroy(void *msp)
1258 {
1259   mstate ms = (mstate)msp;
1260   msegmentptr sp = &ms->seg;
1261   while (sp != 0) {
1262     char *base = sp->base;
1263     size_t size = sp->size;
1264     sp = sp->next;
1265     CALL_MUNMAP(base, size);
1266   }
1267 }
1268 
lj_alloc_malloc(void * msp,size_t nsize)1269 static LJ_NOINLINE void *lj_alloc_malloc(void *msp, size_t nsize)
1270 {
1271   mstate ms = (mstate)msp;
1272   void *mem;
1273   size_t nb;
1274   if (nsize <= MAX_SMALL_REQUEST) {
1275     bindex_t idx;
1276     binmap_t smallbits;
1277     nb = (nsize < MIN_REQUEST)? MIN_CHUNK_SIZE : pad_request(nsize);
1278     idx = small_index(nb);
1279     smallbits = ms->smallmap >> idx;
1280 
1281     if ((smallbits & 0x3U) != 0) { /* Remainderless fit to a smallbin. */
1282       mchunkptr b, p;
1283       idx += ~smallbits & 1;       /* Uses next bin if idx empty */
1284       b = smallbin_at(ms, idx);
1285       p = b->fd;
1286       unlink_first_small_chunk(ms, b, p, idx);
1287       set_inuse_and_pinuse(ms, p, small_index2size(idx));
1288       mem = chunk2mem(p);
1289       return mem;
1290     } else if (nb > ms->dvsize) {
1291       if (smallbits != 0) { /* Use chunk in next nonempty smallbin */
1292 	mchunkptr b, p, r;
1293 	size_t rsize;
1294 	binmap_t leftbits = (smallbits << idx) & left_bits(idx2bit(idx));
1295 	bindex_t i = lj_ffs(leftbits);
1296 	b = smallbin_at(ms, i);
1297 	p = b->fd;
1298 	unlink_first_small_chunk(ms, b, p, i);
1299 	rsize = small_index2size(i) - nb;
1300 	/* Fit here cannot be remainderless if 4byte sizes */
1301 	if (SIZE_T_SIZE != 4 && rsize < MIN_CHUNK_SIZE) {
1302 	  set_inuse_and_pinuse(ms, p, small_index2size(i));
1303 	} else {
1304 	  set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
1305 	  r = chunk_plus_offset(p, nb);
1306 	  set_size_and_pinuse_of_free_chunk(r, rsize);
1307 	  replace_dv(ms, r, rsize);
1308 	}
1309 	mem = chunk2mem(p);
1310 	return mem;
1311       } else if (ms->treemap != 0 && (mem = tmalloc_small(ms, nb)) != 0) {
1312 	return mem;
1313       }
1314     }
1315   } else if (nsize >= MAX_REQUEST) {
1316     nb = MAX_SIZE_T; /* Too big to allocate. Force failure (in sys alloc) */
1317   } else {
1318     nb = pad_request(nsize);
1319     if (ms->treemap != 0 && (mem = tmalloc_large(ms, nb)) != 0) {
1320       return mem;
1321     }
1322   }
1323 
1324   if (nb <= ms->dvsize) {
1325     size_t rsize = ms->dvsize - nb;
1326     mchunkptr p = ms->dv;
1327     if (rsize >= MIN_CHUNK_SIZE) { /* split dv */
1328       mchunkptr r = ms->dv = chunk_plus_offset(p, nb);
1329       ms->dvsize = rsize;
1330       set_size_and_pinuse_of_free_chunk(r, rsize);
1331       set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
1332     } else { /* exhaust dv */
1333       size_t dvs = ms->dvsize;
1334       ms->dvsize = 0;
1335       ms->dv = 0;
1336       set_inuse_and_pinuse(ms, p, dvs);
1337     }
1338     mem = chunk2mem(p);
1339     return mem;
1340   } else if (nb < ms->topsize) { /* Split top */
1341     size_t rsize = ms->topsize -= nb;
1342     mchunkptr p = ms->top;
1343     mchunkptr r = ms->top = chunk_plus_offset(p, nb);
1344     r->head = rsize | PINUSE_BIT;
1345     set_size_and_pinuse_of_inuse_chunk(ms, p, nb);
1346     mem = chunk2mem(p);
1347     return mem;
1348   }
1349   return alloc_sys(ms, nb);
1350 }
1351 
lj_alloc_free(void * msp,void * ptr)1352 static LJ_NOINLINE void *lj_alloc_free(void *msp, void *ptr)
1353 {
1354   if (ptr != 0) {
1355     mchunkptr p = mem2chunk(ptr);
1356     mstate fm = (mstate)msp;
1357     size_t psize = chunksize(p);
1358     mchunkptr next = chunk_plus_offset(p, psize);
1359     if (!pinuse(p)) {
1360       size_t prevsize = p->prev_foot;
1361       if ((prevsize & IS_DIRECT_BIT) != 0) {
1362 	prevsize &= ~IS_DIRECT_BIT;
1363 	psize += prevsize + DIRECT_FOOT_PAD;
1364 	CALL_MUNMAP((char *)p - prevsize, psize);
1365 	return NULL;
1366       } else {
1367 	mchunkptr prev = chunk_minus_offset(p, prevsize);
1368 	psize += prevsize;
1369 	p = prev;
1370 	/* consolidate backward */
1371 	if (p != fm->dv) {
1372 	  unlink_chunk(fm, p, prevsize);
1373 	} else if ((next->head & INUSE_BITS) == INUSE_BITS) {
1374 	  fm->dvsize = psize;
1375 	  set_free_with_pinuse(p, psize, next);
1376 	  return NULL;
1377 	}
1378       }
1379     }
1380     if (!cinuse(next)) {  /* consolidate forward */
1381       if (next == fm->top) {
1382 	size_t tsize = fm->topsize += psize;
1383 	fm->top = p;
1384 	p->head = tsize | PINUSE_BIT;
1385 	if (p == fm->dv) {
1386 	  fm->dv = 0;
1387 	  fm->dvsize = 0;
1388 	}
1389 	if (tsize > fm->trim_check)
1390 	  alloc_trim(fm, 0);
1391 	return NULL;
1392       } else if (next == fm->dv) {
1393 	size_t dsize = fm->dvsize += psize;
1394 	fm->dv = p;
1395 	set_size_and_pinuse_of_free_chunk(p, dsize);
1396 	return NULL;
1397       } else {
1398 	size_t nsize = chunksize(next);
1399 	psize += nsize;
1400 	unlink_chunk(fm, next, nsize);
1401 	set_size_and_pinuse_of_free_chunk(p, psize);
1402 	if (p == fm->dv) {
1403 	  fm->dvsize = psize;
1404 	  return NULL;
1405 	}
1406       }
1407     } else {
1408       set_free_with_pinuse(p, psize, next);
1409     }
1410 
1411     if (is_small(psize)) {
1412       insert_small_chunk(fm, p, psize);
1413     } else {
1414       tchunkptr tp = (tchunkptr)p;
1415       insert_large_chunk(fm, tp, psize);
1416       if (--fm->release_checks == 0)
1417 	release_unused_segments(fm);
1418     }
1419   }
1420   return NULL;
1421 }
1422 
lj_alloc_realloc(void * msp,void * ptr,size_t nsize)1423 static LJ_NOINLINE void *lj_alloc_realloc(void *msp, void *ptr, size_t nsize)
1424 {
1425   if (nsize >= MAX_REQUEST) {
1426     return NULL;
1427   } else {
1428     mstate m = (mstate)msp;
1429     mchunkptr oldp = mem2chunk(ptr);
1430     size_t oldsize = chunksize(oldp);
1431     mchunkptr next = chunk_plus_offset(oldp, oldsize);
1432     mchunkptr newp = 0;
1433     size_t nb = request2size(nsize);
1434 
1435     /* Try to either shrink or extend into top. Else malloc-copy-free */
1436     if (is_direct(oldp)) {
1437       newp = direct_resize(oldp, nb);  /* this may return NULL. */
1438     } else if (oldsize >= nb) { /* already big enough */
1439       size_t rsize = oldsize - nb;
1440       newp = oldp;
1441       if (rsize >= MIN_CHUNK_SIZE) {
1442 	mchunkptr rem = chunk_plus_offset(newp, nb);
1443 	set_inuse(m, newp, nb);
1444 	set_inuse(m, rem, rsize);
1445 	lj_alloc_free(m, chunk2mem(rem));
1446       }
1447     } else if (next == m->top && oldsize + m->topsize > nb) {
1448       /* Expand into top */
1449       size_t newsize = oldsize + m->topsize;
1450       size_t newtopsize = newsize - nb;
1451       mchunkptr newtop = chunk_plus_offset(oldp, nb);
1452       set_inuse(m, oldp, nb);
1453       newtop->head = newtopsize |PINUSE_BIT;
1454       m->top = newtop;
1455       m->topsize = newtopsize;
1456       newp = oldp;
1457     }
1458 
1459     if (newp != 0) {
1460       return chunk2mem(newp);
1461     } else {
1462       void *newmem = lj_alloc_malloc(m, nsize);
1463       if (newmem != 0) {
1464 	size_t oc = oldsize - overhead_for(oldp);
1465 	memcpy(newmem, ptr, oc < nsize ? oc : nsize);
1466 	lj_alloc_free(m, ptr);
1467       }
1468       return newmem;
1469     }
1470   }
1471 }
1472 
lj_alloc_f(void * msp,void * ptr,size_t osize,size_t nsize)1473 void *lj_alloc_f(void *msp, void *ptr, size_t osize, size_t nsize)
1474 {
1475   (void)osize;
1476   if (nsize == 0) {
1477     return lj_alloc_free(msp, ptr);
1478   } else if (ptr == NULL) {
1479     return lj_alloc_malloc(msp, nsize);
1480   } else {
1481     return lj_alloc_realloc(msp, ptr, nsize);
1482   }
1483 }
1484 
1485 #endif
1486