1 /* Alternative malloc implementation for multiple threads without
2 lock contention based on dlmalloc. (C) 2005-2006 Niall Douglas
3 
4 Boost Software License - Version 1.0 - August 17th, 2003
5 
6 Permission is hereby granted, free of charge, to any person or organization
7 obtaining a copy of the software and accompanying documentation covered by
8 this license (the "Software") to use, reproduce, display, distribute,
9 execute, and transmit the Software, and to prepare derivative works of the
10 Software, and to permit third-parties to whom the Software is furnished to
11 do so, all subject to the following:
12 
13 The copyright notices in the Software and this entire statement, including
14 the above license grant, this restriction and the following disclaimer,
15 must be included in all copies of the Software, in whole or in part, and
16 all derivative works of the Software, unless such copies or derivative
17 works are solely in the form of machine-executable object code generated by
18 a source language processor.
19 
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
23 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
24 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
25 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 DEALINGS IN THE SOFTWARE.
27 */
28 
29 #ifdef _MSC_VER
30 /* Enable full aliasing on MSVC */
31 /*#pragma optimize("a", on)*/
32 #endif
33 
34 /*#define FULLSANITYCHECKS*/
35 
36 #include "nedmalloc.h"
37 #if defined(WIN32)
38  #include <malloc.h>
39 #endif
40 #define MSPACES 1
41 #define ONLY_MSPACES 1
42 #ifndef USE_LOCKS
43  #define USE_LOCKS 1
44 #endif
45 #define FOOTERS 1           /* Need to enable footers so frees lock the right mspace */
46 #undef DEBUG				/* dlmalloc wants DEBUG either 0 or 1 */
47 #ifdef _DEBUG
48  #define DEBUG 1
49 #else
50  #define DEBUG 0
51 #endif
52 #ifdef NDEBUG               /* Disable assert checking on release builds */
53  #undef DEBUG
54 #endif
55 /* The default of 64Kb means we spend too much time kernel-side */
56 #ifndef DEFAULT_GRANULARITY
57 #define DEFAULT_GRANULARITY (1*1024*1024)
58 #endif
59 /*#define USE_SPIN_LOCKS 0*/
60 
61 
62 /*#define FORCEINLINE*/
63 #include "malloc.c.h"
64 #ifdef NDEBUG               /* Disable assert checking on release builds */
65  #undef DEBUG
66 #endif
67 
68 /* The maximum concurrent threads in a pool possible */
69 #ifndef MAXTHREADSINPOOL
70 #define MAXTHREADSINPOOL 16
71 #endif
72 /* The maximum number of threadcaches which can be allocated */
73 #ifndef THREADCACHEMAXCACHES
74 #define THREADCACHEMAXCACHES 256
75 #endif
76 /* The maximum size to be allocated from the thread cache */
77 #ifndef THREADCACHEMAX
78 #define THREADCACHEMAX 8192
79 #endif
80 #if 0
81 /* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */
82 #define THREADCACHEMAXBINS ((13-4)*2)
83 #else
84 /* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */
85 #define THREADCACHEMAXBINS (13-4)
86 #endif
87 /* Point at which the free space in a thread cache is garbage collected */
88 #ifndef THREADCACHEMAXFREESPACE
89 #define THREADCACHEMAXFREESPACE (512*1024)
90 #endif
91 
92 
93 #ifdef WIN32
94  #define TLSVAR			DWORD
95  #define TLSALLOC(k)	(*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k))
96  #define TLSFREE(k)		(!TlsFree(k))
97  #define TLSGET(k)		TlsGetValue(k)
98  #define TLSSET(k, a)	(!TlsSetValue(k, a))
99  #ifdef DEBUG
ChkedTlsGetValue(DWORD idx)100 static LPVOID ChkedTlsGetValue(DWORD idx)
101 {
102 	LPVOID ret=TlsGetValue(idx);
103 	assert(S_OK==GetLastError());
104 	return ret;
105 }
106   #undef TLSGET
107   #define TLSGET(k) ChkedTlsGetValue(k)
108  #endif
109 #else
110  #define TLSVAR			pthread_key_t
111  #define TLSALLOC(k)	pthread_key_create(k, 0)
112  #define TLSFREE(k)		pthread_key_delete(k)
113  #define TLSGET(k)		pthread_getspecific(k)
114  #define TLSSET(k, a)	pthread_setspecific(k, a)
115 #endif
116 
117 #if 0
118 /* Only enable if testing with valgrind. Causes misoperation */
119 #define mspace_malloc(p, s) malloc(s)
120 #define mspace_realloc(p, m, s) realloc(m, s)
121 #define mspace_calloc(p, n, s) calloc(n, s)
122 #define mspace_free(p, m) free(m)
123 #endif
124 
125 
126 #if defined(__cplusplus)
127 #if !defined(NO_NED_NAMESPACE)
128 namespace nedalloc {
129 #else
130 extern "C" {
131 #endif
132 #endif
133 
nedblksize(void * mem)134 size_t nedblksize(void *mem) THROWSPEC
135 {
136 #if 0
137 	/* Only enable if testing with valgrind. Causes misoperation */
138 	return THREADCACHEMAX;
139 #else
140 	if(mem)
141 	{
142 		mchunkptr p=mem2chunk(mem);
143 		assert(cinuse(p));	/* If this fails, someone tried to free a block twice */
144 		if(cinuse(p))
145 			return chunksize(p)-overhead_for(p);
146 	}
147 	return 0;
148 #endif
149 }
150 
nedsetvalue(void * v)151 void nedsetvalue(void *v) THROWSPEC					{ nedpsetvalue(0, v); }
nedmalloc(size_t size)152 void * nedmalloc(size_t size) THROWSPEC				{ return nedpmalloc(0, size); }
nedcalloc(size_t no,size_t size)153 void * nedcalloc(size_t no, size_t size) THROWSPEC	{ return nedpcalloc(0, no, size); }
nedrealloc(void * mem,size_t size)154 void * nedrealloc(void *mem, size_t size) THROWSPEC	{ return nedprealloc(0, mem, size); }
nedfree(void * mem)155 void   nedfree(void *mem) THROWSPEC					{ nedpfree(0, mem); }
nedmemalign(size_t alignment,size_t bytes)156 void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign(0, alignment, bytes); }
nedposix_memalign(void ** p,size_t alignment,size_t bytes)157 int    nedposix_memalign(void** p, size_t alignment, size_t bytes) THROWSPEC { *p = nedmemalign(alignment, bytes); return 0; }
158 #if !NO_MALLINFO
nedmallinfo(void)159 struct mallinfo nedmallinfo(void) THROWSPEC			{ return nedpmallinfo(0); }
160 #endif
nedmallopt(int parno,int value)161 int    nedmallopt(int parno, int value) THROWSPEC	{ return nedpmallopt(0, parno, value); }
nedmalloc_trim(size_t pad)162 int    nedmalloc_trim(size_t pad) THROWSPEC			{ return nedpmalloc_trim(0, pad); }
nedmalloc_stats(void)163 void   nedmalloc_stats(void) THROWSPEC					{ nedpmalloc_stats(0); }
nedmalloc_footprint(void)164 size_t nedmalloc_footprint(void) THROWSPEC				{ return nedpmalloc_footprint(0); }
nedindependent_calloc(size_t elemsno,size_t elemsize,void ** chunks)165 void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC	{ return nedpindependent_calloc(0, elemsno, elemsize, chunks); }
nedindependent_comalloc(size_t elems,size_t * sizes,void ** chunks)166 void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC	{ return nedpindependent_comalloc(0, elems, sizes, chunks); }
167 
168 struct threadcacheblk_t;
169 typedef struct threadcacheblk_t threadcacheblk;
170 struct threadcacheblk_t
171 {	/* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */
172 #ifdef FULLSANITYCHECKS
173 	unsigned int magic;
174 #endif
175 	unsigned int lastUsed, size;
176 	threadcacheblk *next, *prev;
177 };
178 typedef struct threadcache_t
179 {
180 #ifdef FULLSANITYCHECKS
181 	unsigned int magic1;
182 #endif
183 	int mymspace;						/* Last mspace entry this thread used */
184 	long threadid;
185 	unsigned int mallocs, frees, successes;
186 	size_t freeInCache;					/* How much free space is stored in this cache */
187 	threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2];
188 #ifdef FULLSANITYCHECKS
189 	unsigned int magic2;
190 #endif
191 } threadcache;
192 struct nedpool_t
193 {
194 	MLOCK_T mutex;
195 	void *uservalue;
196 	int threads;						/* Max entries in m to use */
197 	threadcache *caches[THREADCACHEMAXCACHES];
198 	TLSVAR mycache;						/* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */
199 	mstate m[MAXTHREADSINPOOL+1];		/* mspace entries for this pool */
200 };
201 static nedpool syspool;
202 
size2binidx(size_t _size)203 static FORCEINLINE unsigned int size2binidx(size_t _size) THROWSPEC
204 {	/* 8=1000	16=10000	20=10100	24=11000	32=100000	48=110000	4096=1000000000000 */
205 	unsigned int topbit, size=(unsigned int)(_size>>4);
206 	/* 16=1		20=1	24=1	32=10	48=11	64=100	96=110	128=1000	4096=100000000 */
207 
208 #if defined(__GNUC__)
209 	topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size);
210 #elif defined(_MSC_VER) && _MSC_VER>=1300
211 	{
212 	    unsigned long bsrTopBit;
213 
214 	    _BitScanReverse(&bsrTopBit, size);
215 
216 	    topbit = bsrTopBit;
217 	}
218 #else
219 #if 0
220 	union {
221 		unsigned asInt[2];
222 		double asDouble;
223 	};
224 	int n;
225 
226 	asDouble = (double)size + 0.5;
227 	topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023;
228 #else
229 	{
230 		unsigned int x=size;
231 		x = x | (x >> 1);
232 		x = x | (x >> 2);
233 		x = x | (x >> 4);
234 		x = x | (x >> 8);
235 		x = x | (x >>16);
236 		x = ~x;
237 		x = x - ((x >> 1) & 0x55555555);
238 		x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
239 		x = (x + (x >> 4)) & 0x0F0F0F0F;
240 		x = x + (x << 8);
241 		x = x + (x << 16);
242 		topbit=31 - (x >> 24);
243 	}
244 #endif
245 #endif
246 	return topbit;
247 }
248 
249 
250 #ifdef FULLSANITYCHECKS
tcsanitycheck(threadcacheblk ** ptr)251 static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC
252 {
253 	assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1]));
254 	if(ptr[0] && ptr[1])
255 	{
256 		assert(nedblksize(ptr[0])>=sizeof(threadcacheblk));
257 		assert(nedblksize(ptr[1])>=sizeof(threadcacheblk));
258 		assert(*(unsigned int *) "NEDN"==ptr[0]->magic);
259 		assert(*(unsigned int *) "NEDN"==ptr[1]->magic);
260 		assert(!ptr[0]->prev);
261 		assert(!ptr[1]->next);
262 		if(ptr[0]==ptr[1])
263 		{
264 			assert(!ptr[0]->next);
265 			assert(!ptr[1]->prev);
266 		}
267 	}
268 }
tcfullsanitycheck(threadcache * tc)269 static void tcfullsanitycheck(threadcache *tc) THROWSPEC
270 {
271 	threadcacheblk **tcbptr=tc->bins;
272 	int n;
273 	for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
274 	{
275 		threadcacheblk *b, *ob=0;
276 		tcsanitycheck(tcbptr);
277 		for(b=tcbptr[0]; b; ob=b, b=b->next)
278 		{
279 			assert(*(unsigned int *) "NEDN"==b->magic);
280 			assert(!ob || ob->next==b);
281 			assert(!ob || b->prev==ob);
282 		}
283 	}
284 }
285 #endif
286 
RemoveCacheEntries(nedpool * p,threadcache * tc,unsigned int age)287 static NOINLINE void RemoveCacheEntries(nedpool *p, threadcache *tc, unsigned int age) THROWSPEC
288 {
289 #ifdef FULLSANITYCHECKS
290 	tcfullsanitycheck(tc);
291 #endif
292 	if(tc->freeInCache)
293 	{
294 		threadcacheblk **tcbptr=tc->bins;
295 		int n;
296 		for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
297 		{
298 			threadcacheblk **tcb=tcbptr+1;		/* come from oldest end of list */
299 			/*tcsanitycheck(tcbptr);*/
300 			for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; )
301 			{
302 				threadcacheblk *f=*tcb;
303 				size_t blksize=f->size; /*nedblksize(f);*/
304 				assert(blksize<=nedblksize(f));
305 				assert(blksize);
306 #ifdef FULLSANITYCHECKS
307 				assert(*(unsigned int *) "NEDN"==(*tcb)->magic);
308 #endif
309 				*tcb=(*tcb)->prev;
310 				if(*tcb)
311 					(*tcb)->next=0;
312 				else
313 					*tcbptr=0;
314 				tc->freeInCache-=blksize;
315 				assert((long) tc->freeInCache>=0);
316 				mspace_free(0, f);
317 				/*tcsanitycheck(tcbptr);*/
318 			}
319 		}
320 	}
321 #ifdef FULLSANITYCHECKS
322 	tcfullsanitycheck(tc);
323 #endif
324 }
DestroyCaches(nedpool * p)325 static void DestroyCaches(nedpool *p) THROWSPEC
326 {
327 	if(p->caches)
328 	{
329 		threadcache *tc;
330 		int n;
331 		for(n=0; n<THREADCACHEMAXCACHES; n++)
332 		{
333 			if((tc=p->caches[n]))
334 			{
335 				tc->frees++;
336 				RemoveCacheEntries(p, tc, 0);
337 				assert(!tc->freeInCache);
338 				tc->mymspace=-1;
339 				tc->threadid=0;
340 				mspace_free(0, tc);
341 				p->caches[n]=0;
342 			}
343 		}
344 	}
345 }
346 
AllocCache(nedpool * p)347 static NOINLINE threadcache *AllocCache(nedpool *p) THROWSPEC
348 {
349 	threadcache *tc=0;
350 	int n, end;
351 	ACQUIRE_LOCK(&p->mutex);
352 	for(n=0; n<THREADCACHEMAXCACHES && p->caches[n]; n++);
353 	if(THREADCACHEMAXCACHES==n)
354 	{	/* List exhausted, so disable for this thread */
355 		RELEASE_LOCK(&p->mutex);
356 		return 0;
357 	}
358 	tc=p->caches[n]=(threadcache *) mspace_calloc(p->m[0], 1, sizeof(threadcache));
359 	if(!tc)
360 	{
361 		RELEASE_LOCK(&p->mutex);
362 		return 0;
363 	}
364 #ifdef FULLSANITYCHECKS
365 	tc->magic1=*(unsigned int *)"NEDMALC1";
366 	tc->magic2=*(unsigned int *)"NEDMALC2";
367 #endif
368 	tc->threadid=(long)(size_t)CURRENT_THREAD;
369 	for(end=0; p->m[end]; end++);
370 	tc->mymspace=tc->threadid % end;
371 	RELEASE_LOCK(&p->mutex);
372 	if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort();
373 	return tc;
374 }
375 
threadcache_malloc(nedpool * p,threadcache * tc,size_t * size)376 static void *threadcache_malloc(nedpool *p, threadcache *tc, size_t *size) THROWSPEC
377 {
378 	void *ret=0;
379 	unsigned int bestsize;
380 	unsigned int idx=size2binidx(*size);
381 	size_t blksize=0;
382 	threadcacheblk *blk, **binsptr;
383 #ifdef FULLSANITYCHECKS
384 	tcfullsanitycheck(tc);
385 #endif
386 	/* Calculate best fit bin size */
387 	bestsize=1<<(idx+4);
388 #if 0
389 	/* Finer grained bin fit */
390 	idx<<=1;
391 	if(*size>bestsize)
392 	{
393 		idx++;
394 		bestsize+=bestsize>>1;
395 	}
396 	if(*size>bestsize)
397 	{
398 		idx++;
399 		bestsize=1<<(4+(idx>>1));
400 	}
401 #else
402 	if(*size>bestsize)
403 	{
404 		idx++;
405 		bestsize<<=1;
406 	}
407 #endif
408 	assert(bestsize>=*size);
409 	if(*size<bestsize) *size=bestsize;
410 	assert(*size<=THREADCACHEMAX);
411 	assert(idx<=THREADCACHEMAXBINS);
412 	binsptr=&tc->bins[idx*2];
413 	/* Try to match close, but move up a bin if necessary */
414 	blk=*binsptr;
415 	if(!blk || blk->size<*size)
416 	{	/* Bump it up a bin */
417 		if(idx<THREADCACHEMAXBINS)
418 		{
419 			idx++;
420 			binsptr+=2;
421 			blk=*binsptr;
422 		}
423 	}
424 	if(blk)
425 	{
426 		blksize=blk->size; /*nedblksize(blk);*/
427 		assert(nedblksize(blk)>=blksize);
428 		assert(blksize>=*size);
429 		if(blk->next)
430 			blk->next->prev=0;
431 		*binsptr=blk->next;
432 		if(!*binsptr)
433 			binsptr[1]=0;
434 #ifdef FULLSANITYCHECKS
435 		blk->magic=0;
436 #endif
437 		assert(binsptr[0]!=blk && binsptr[1]!=blk);
438 		assert(nedblksize(blk)>=sizeof(threadcacheblk) && nedblksize(blk)<=THREADCACHEMAX+CHUNK_OVERHEAD);
439 		/*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) size);*/
440 		ret=(void *) blk;
441 	}
442 	++tc->mallocs;
443 	if(ret)
444 	{
445 		assert(blksize>=*size);
446 		++tc->successes;
447 		tc->freeInCache-=blksize;
448 		assert((long) tc->freeInCache>=0);
449 	}
450 #if defined(DEBUG) && 0
451 	if(!(tc->mallocs & 0xfff))
452 	{
453 		printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs,
454 			(float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache);
455 	}
456 #endif
457 #ifdef FULLSANITYCHECKS
458 	tcfullsanitycheck(tc);
459 #endif
460 	return ret;
461 }
ReleaseFreeInCache(nedpool * p,threadcache * tc,int mymspace)462 static NOINLINE void ReleaseFreeInCache(nedpool *p, threadcache *tc, int mymspace) THROWSPEC
463 {
464 	unsigned int age=THREADCACHEMAXFREESPACE/8192;
465 	/*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/
466 	while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE)
467 	{
468 		RemoveCacheEntries(p, tc, age);
469 		/*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/
470 		age>>=1;
471 	}
472 	/*RELEASE_LOCK(&p->m[mymspace]->mutex);*/
473 }
threadcache_free(nedpool * p,threadcache * tc,int mymspace,void * mem,size_t size)474 static void threadcache_free(nedpool *p, threadcache *tc, int mymspace, void *mem, size_t size) THROWSPEC
475 {
476 	unsigned int bestsize;
477 	unsigned int idx=size2binidx(size);
478 	threadcacheblk **binsptr, *tck=(threadcacheblk *) mem;
479 	assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD);
480 #ifdef DEBUG
481 	{	/* Make sure this is a valid memory block */
482 	    mchunkptr p  = mem2chunk(mem);
483 	    mstate fm = get_mstate_for(p);
484 	    if (!ok_magic(fm)) {
485 	      USAGE_ERROR_ACTION(fm, p);
486 	      return;
487 	    }
488 	}
489 #endif
490 #ifdef FULLSANITYCHECKS
491 	tcfullsanitycheck(tc);
492 #endif
493 	/* Calculate best fit bin size */
494 	bestsize=1<<(idx+4);
495 #if 0
496 	/* Finer grained bin fit */
497 	idx<<=1;
498 	if(size>bestsize)
499 	{
500 		unsigned int biggerbestsize=bestsize+bestsize<<1;
501 		if(size>=biggerbestsize)
502 		{
503 			idx++;
504 			bestsize=biggerbestsize;
505 		}
506 	}
507 #endif
508 	if(bestsize!=size)	/* dlmalloc can round up, so we round down to preserve indexing */
509 		size=bestsize;
510 	binsptr=&tc->bins[idx*2];
511 	assert(idx<=THREADCACHEMAXBINS);
512 	if(tck==*binsptr)
513 	{
514 		fprintf(stderr, "Attempt to free already freed memory block %p - aborting!\n", tck);
515 		abort();
516 	}
517 #ifdef FULLSANITYCHECKS
518 	tck->magic=*(unsigned int *) "NEDN";
519 #endif
520 	tck->lastUsed=++tc->frees;
521 	tck->size=(unsigned int) size;
522 	tck->next=*binsptr;
523 	tck->prev=0;
524 	if(tck->next)
525 		tck->next->prev=tck;
526 	else
527 		binsptr[1]=tck;
528 	assert(!*binsptr || (*binsptr)->size==tck->size);
529 	*binsptr=tck;
530 	assert(tck==tc->bins[idx*2]);
531 	assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck);
532 	/*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/
533 	tc->freeInCache+=size;
534 #ifdef FULLSANITYCHECKS
535 	tcfullsanitycheck(tc);
536 #endif
537 #if 1
538 	if(tc->freeInCache>=THREADCACHEMAXFREESPACE)
539 		ReleaseFreeInCache(p, tc, mymspace);
540 #endif
541 }
542 
543 
544 
545 
InitPool(nedpool * p,size_t capacity,int threads)546 static NOINLINE int InitPool(nedpool *p, size_t capacity, int threads) THROWSPEC
547 {	/* threads is -1 for system pool */
548 	ensure_initialization();
549 	ACQUIRE_MALLOC_GLOBAL_LOCK();
550 	if(p->threads) goto done;
551 	if(INITIAL_LOCK(&p->mutex)) goto err;
552 	if(TLSALLOC(&p->mycache)) goto err;
553 	if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err;
554 	p->m[0]->extp=p;
555 	p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads;
556 done:
557 	RELEASE_MALLOC_GLOBAL_LOCK();
558 	return 1;
559 err:
560 	if(threads<0)
561 		abort();			/* If you can't allocate for system pool, we're screwed */
562 	DestroyCaches(p);
563 	if(p->m[0])
564 	{
565 		destroy_mspace(p->m[0]);
566 		p->m[0]=0;
567 	}
568 	if(p->mycache)
569 	{
570 		if(TLSFREE(p->mycache)) abort();
571 		p->mycache=0;
572 	}
573 	RELEASE_MALLOC_GLOBAL_LOCK();
574 	return 0;
575 }
FindMSpace(nedpool * p,threadcache * tc,int * lastUsed,size_t size)576 static NOINLINE mstate FindMSpace(nedpool *p, threadcache *tc, int *lastUsed, size_t size) THROWSPEC
577 {	/* Gets called when thread's last used mspace is in use. The strategy
578 	is to run through the list of all available mspaces looking for an
579 	unlocked one and if we fail, we create a new one so long as we don't
580 	exceed p->threads */
581 	int n, end;
582 	for(n=end=*lastUsed+1; p->m[n]; end=++n)
583 	{
584 		if(TRY_LOCK(&p->m[n]->mutex)) goto found;
585 	}
586 	for(n=0; n<*lastUsed && p->m[n]; n++)
587 	{
588 		if(TRY_LOCK(&p->m[n]->mutex)) goto found;
589 	}
590 	if(end<p->threads)
591 	{
592 		mstate temp;
593 		if(!(temp=(mstate) create_mspace(size, 1)))
594 			goto badexit;
595 		/* Now we're ready to modify the lists, we lock */
596 		ACQUIRE_LOCK(&p->mutex);
597 		while(p->m[end] && end<p->threads)
598 			end++;
599 		if(end>=p->threads)
600 		{	/* Drat, must destroy it now */
601 			RELEASE_LOCK(&p->mutex);
602 			destroy_mspace((mspace) temp);
603 			goto badexit;
604 		}
605 		/* We really want to make sure this goes into memory now but we
606 		have to be careful of breaking aliasing rules, so write it twice */
607 		{
608 			volatile struct malloc_state **_m=(volatile struct malloc_state **) &p->m[end];
609 			*_m=(p->m[end]=temp);
610 		}
611 		ACQUIRE_LOCK(&p->m[end]->mutex);
612 		/*printf("Created mspace idx %d\n", end);*/
613 		RELEASE_LOCK(&p->mutex);
614 		n=end;
615 		goto found;
616 	}
617 	/* Let it lock on the last one it used */
618 badexit:
619 	ACQUIRE_LOCK(&p->m[*lastUsed]->mutex);
620 	return p->m[*lastUsed];
621 found:
622 	*lastUsed=n;
623 	if(tc)
624 		tc->mymspace=n;
625 	else
626 	{
627 		if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort();
628 	}
629 	return p->m[n];
630 }
631 
nedcreatepool(size_t capacity,int threads)632 nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC
633 {
634 	nedpool *ret;
635 	if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) return 0;
636 	if(!InitPool(ret, capacity, threads))
637 	{
638 		nedpfree(0, ret);
639 		return 0;
640 	}
641 	return ret;
642 }
neddestroypool(nedpool * p)643 void neddestroypool(nedpool *p) THROWSPEC
644 {
645 	int n;
646 	ACQUIRE_LOCK(&p->mutex);
647 	DestroyCaches(p);
648 	for(n=0; p->m[n]; n++)
649 	{
650 		destroy_mspace(p->m[n]);
651 		p->m[n]=0;
652 	}
653 	RELEASE_LOCK(&p->mutex);
654 	if(TLSFREE(p->mycache)) abort();
655 	nedpfree(0, p);
656 }
657 
nedpsetvalue(nedpool * p,void * v)658 void nedpsetvalue(nedpool *p, void *v) THROWSPEC
659 {
660 	if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
661 	p->uservalue=v;
662 }
nedgetvalue(nedpool ** p,void * mem)663 void *nedgetvalue(nedpool **p, void *mem) THROWSPEC
664 {
665 	nedpool *np=0;
666 	mchunkptr mcp=mem2chunk(mem);
667 	mstate fm;
668 	if(!(is_aligned(chunk2mem(mcp))) && mcp->head != FENCEPOST_HEAD) return 0;
669 	if(!cinuse(mcp)) return 0;
670 	if(!next_pinuse(mcp)) return 0;
671 	if(!is_mmapped(mcp) && !pinuse(mcp))
672 	{
673 		if(next_chunk(prev_chunk(mcp))!=mcp) return 0;
674 	}
675 	fm=get_mstate_for(mcp);
676 	if(!ok_magic(fm)) return 0;
677 	if(!ok_address(fm, mcp)) return 0;
678 	if(!fm->extp) return 0;
679 	np=(nedpool *) fm->extp;
680 	if(p) *p=np;
681 	return np->uservalue;
682 }
683 
neddisablethreadcache(nedpool * p)684 void neddisablethreadcache(nedpool *p) THROWSPEC
685 {
686 	int mycache;
687 	if(!p)
688 	{
689 		p=&syspool;
690 		if(!syspool.threads) InitPool(&syspool, 0, -1);
691 	}
692 	mycache=(int)(size_t) TLSGET(p->mycache);
693 	if(!mycache)
694 	{	/* Set to mspace 0 */
695 		if(TLSSET(p->mycache, (void *)-1)) abort();
696 	}
697 	else if(mycache>0)
698 	{	/* Set to last used mspace */
699 		threadcache *tc=p->caches[mycache-1];
700 #if defined(DEBUG)
701 		printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n",
702 			100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs);
703 #endif
704 		if(TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort();
705 		tc->frees++;
706 		RemoveCacheEntries(p, tc, 0);
707 		assert(!tc->freeInCache);
708 		tc->mymspace=-1;
709 		tc->threadid=0;
710 		mspace_free(0, p->caches[mycache-1]);
711 		p->caches[mycache-1]=0;
712 	}
713 }
714 
715 #define GETMSPACE(m,p,tc,ms,s,action)           \
716   do                                            \
717   {                                             \
718     mstate m = GetMSpace((p),(tc),(ms),(s));    \
719     action;                                     \
720     RELEASE_LOCK(&m->mutex);                    \
721   } while (0)
722 
GetMSpace(nedpool * p,threadcache * tc,int mymspace,size_t size)723 static FORCEINLINE mstate GetMSpace(nedpool *p, threadcache *tc, int mymspace, size_t size) THROWSPEC
724 {	/* Returns a locked and ready for use mspace */
725 	mstate m=p->m[mymspace];
726 	assert(m);
727 	if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size);\
728 	/*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/
729 	return m;
730 }
GetThreadCache(nedpool ** p,threadcache ** tc,int * mymspace,size_t * size)731 static FORCEINLINE void GetThreadCache(nedpool **p, threadcache **tc, int *mymspace, size_t *size) THROWSPEC
732 {
733 	int mycache;
734 	if(size && *size<sizeof(threadcacheblk)) *size=sizeof(threadcacheblk);
735 	if(!*p)
736 	{
737 		*p=&syspool;
738 		if(!syspool.threads) InitPool(&syspool, 0, -1);
739 	}
740 	mycache=(int)(size_t) TLSGET((*p)->mycache);
741 	if(mycache>0)
742 	{
743 		*tc=(*p)->caches[mycache-1];
744 		*mymspace=(*tc)->mymspace;
745 	}
746 	else if(!mycache)
747 	{
748 		*tc=AllocCache(*p);
749 		if(!*tc)
750 		{	/* Disable */
751 			if(TLSSET((*p)->mycache, (void *)-1)) abort();
752 			*mymspace=0;
753 		}
754 		else
755 			*mymspace=(*tc)->mymspace;
756 	}
757 	else
758 	{
759 		*tc=0;
760 		*mymspace=-mycache-1;
761 	}
762 	assert(*mymspace>=0);
763 	assert((long)(size_t)CURRENT_THREAD==(*tc)->threadid);
764 #ifdef FULLSANITYCHECKS
765 	if(*tc)
766 	{
767 		if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2)
768 		{
769 			abort();
770 		}
771 	}
772 #endif
773 }
774 
nedpmalloc(nedpool * p,size_t size)775 void * nedpmalloc(nedpool *p, size_t size) THROWSPEC
776 {
777 	void *ret=0;
778 	threadcache *tc;
779 	int mymspace;
780 	GetThreadCache(&p, &tc, &mymspace, &size);
781 #if THREADCACHEMAX
782 	if(tc && size<=THREADCACHEMAX)
783 	{	/* Use the thread cache */
784 		ret=threadcache_malloc(p, tc, &size);
785 	}
786 #endif
787 	if(!ret)
788 	{	/* Use this thread's mspace */
789 	GETMSPACE(m, p, tc, mymspace, size,
790 		  ret=mspace_malloc(m, size));
791 	}
792 	return ret;
793 }
nedpcalloc(nedpool * p,size_t no,size_t size)794 void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC
795 {
796 	size_t rsize=size*no;
797 	void *ret=0;
798 	threadcache *tc;
799 	int mymspace;
800 	GetThreadCache(&p, &tc, &mymspace, &rsize);
801 #if THREADCACHEMAX
802 	if(tc && rsize<=THREADCACHEMAX)
803 	{	/* Use the thread cache */
804 		if((ret=threadcache_malloc(p, tc, &rsize)))
805 			memset(ret, 0, rsize);
806 	}
807 #endif
808 	if(!ret)
809 	{	/* Use this thread's mspace */
810 	GETMSPACE(m, p, tc, mymspace, rsize,
811 		  ret=mspace_calloc(m, 1, rsize));
812 	}
813 	return ret;
814 }
nedprealloc(nedpool * p,void * mem,size_t size)815 void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC
816 {
817 	void *ret=0;
818 	threadcache *tc;
819 	int mymspace;
820 	if(!mem) return nedpmalloc(p, size);
821 	GetThreadCache(&p, &tc, &mymspace, &size);
822 #if THREADCACHEMAX
823 	if(tc && size && size<=THREADCACHEMAX)
824 	{	/* Use the thread cache */
825 		size_t memsize=nedblksize(mem);
826 		assert(memsize);
827 		if((ret=threadcache_malloc(p, tc, &size)))
828 		{
829 			memcpy(ret, mem, memsize<size ? memsize : size);
830 			if(memsize<=THREADCACHEMAX)
831 				threadcache_free(p, tc, mymspace, mem, memsize);
832 			else
833 				mspace_free(0, mem);
834 		}
835 	}
836 #endif
837 	if(!ret)
838 	{	/* Reallocs always happen in the mspace they happened in, so skip
839 		locking the preferred mspace for this thread */
840 		ret=mspace_realloc(0, mem, size);
841 	}
842 	return ret;
843 }
nedpfree(nedpool * p,void * mem)844 void   nedpfree(nedpool *p, void *mem) THROWSPEC
845 {	/* Frees always happen in the mspace they happened in, so skip
846 	locking the preferred mspace for this thread */
847 	threadcache *tc;
848 	int mymspace;
849 	size_t memsize;
850 	assert(mem);
851 	GetThreadCache(&p, &tc, &mymspace, 0);
852 #if THREADCACHEMAX
853 	memsize=nedblksize(mem);
854 	assert(memsize);
855 	if(mem && tc && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
856 		threadcache_free(p, tc, mymspace, mem, memsize);
857 	else
858 #endif
859 		mspace_free(0, mem);
860 }
nedpmemalign(nedpool * p,size_t alignment,size_t bytes)861 void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC
862 {
863 	void *ret;
864 	threadcache *tc;
865 	int mymspace;
866 	GetThreadCache(&p, &tc, &mymspace, &bytes);
867 	{	/* Use this thread's mspace */
868 	GETMSPACE(m, p, tc, mymspace, bytes,
869 		  ret=mspace_memalign(m, alignment, bytes));
870 	}
871 	return ret;
872 }
873 #if !NO_MALLINFO
nedpmallinfo(nedpool * p)874 struct mallinfo nedpmallinfo(nedpool *p) THROWSPEC
875 {
876 	int n;
877 	struct mallinfo ret={0};
878 	if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
879 	for(n=0; p->m[n]; n++)
880 	{
881 		struct mallinfo t=mspace_mallinfo(p->m[n]);
882 		ret.arena+=t.arena;
883 		ret.ordblks+=t.ordblks;
884 		ret.hblkhd+=t.hblkhd;
885 		ret.usmblks+=t.usmblks;
886 		ret.uordblks+=t.uordblks;
887 		ret.fordblks+=t.fordblks;
888 		ret.keepcost+=t.keepcost;
889 	}
890 	return ret;
891 }
892 #endif
nedpmallopt(nedpool * p,int parno,int value)893 int    nedpmallopt(nedpool *p, int parno, int value) THROWSPEC
894 {
895 	return mspace_mallopt(parno, value);
896 }
nedpmalloc_trim(nedpool * p,size_t pad)897 int    nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC
898 {
899 	int n, ret=0;
900 	if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
901 	for(n=0; p->m[n]; n++)
902 	{
903 		ret+=mspace_trim(p->m[n], pad);
904 	}
905 	return ret;
906 }
nedpmalloc_stats(nedpool * p)907 void   nedpmalloc_stats(nedpool *p) THROWSPEC
908 {
909 	int n;
910 	if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
911 	for(n=0; p->m[n]; n++)
912 	{
913 		mspace_malloc_stats(p->m[n]);
914 	}
915 }
nedpmalloc_footprint(nedpool * p)916 size_t nedpmalloc_footprint(nedpool *p) THROWSPEC
917 {
918 	size_t ret=0;
919 	int n;
920 	if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
921 	for(n=0; p->m[n]; n++)
922 	{
923 		ret+=mspace_footprint(p->m[n]);
924 	}
925 	return ret;
926 }
nedpindependent_calloc(nedpool * p,size_t elemsno,size_t elemsize,void ** chunks)927 void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC
928 {
929 	void **ret;
930 	threadcache *tc;
931 	int mymspace;
932 	GetThreadCache(&p, &tc, &mymspace, &elemsize);
933     GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
934 	      ret=mspace_independent_calloc(m, elemsno, elemsize, chunks));
935 	return ret;
936 }
nedpindependent_comalloc(nedpool * p,size_t elems,size_t * sizes,void ** chunks)937 void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC
938 {
939 	void **ret;
940 	threadcache *tc;
941 	int mymspace;
942 	size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t));
943 	if(!adjustedsizes) return 0;
944 	for(i=0; i<elems; i++)
945 		adjustedsizes[i]=sizes[i]<sizeof(threadcacheblk) ? sizeof(threadcacheblk) : sizes[i];
946 	GetThreadCache(&p, &tc, &mymspace, 0);
947 	GETMSPACE(m, p, tc, mymspace, 0,
948 	      ret=mspace_independent_comalloc(m, elems, adjustedsizes, chunks));
949 	return ret;
950 }
951 
952 #if defined(__cplusplus)
953 }
954 #endif
955