1 /* Alternative malloc implementation for multiple threads without
2 lock contention based on dlmalloc. (C) 2005-2006 Niall Douglas
3
4 Boost Software License - Version 1.0 - August 17th, 2003
5
6 Permission is hereby granted, free of charge, to any person or organization
7 obtaining a copy of the software and accompanying documentation covered by
8 this license (the "Software") to use, reproduce, display, distribute,
9 execute, and transmit the Software, and to prepare derivative works of the
10 Software, and to permit third-parties to whom the Software is furnished to
11 do so, all subject to the following:
12
13 The copyright notices in the Software and this entire statement, including
14 the above license grant, this restriction and the following disclaimer,
15 must be included in all copies of the Software, in whole or in part, and
16 all derivative works of the Software, unless such copies or derivative
17 works are solely in the form of machine-executable object code generated by
18 a source language processor.
19
20 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 FITNESS FOR A PARTICULAR PURPOSE, TITLE AND NON-INFRINGEMENT. IN NO EVENT
23 SHALL THE COPYRIGHT HOLDERS OR ANYONE DISTRIBUTING THE SOFTWARE BE LIABLE
24 FOR ANY DAMAGES OR OTHER LIABILITY, WHETHER IN CONTRACT, TORT OR OTHERWISE,
25 ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
26 DEALINGS IN THE SOFTWARE.
27 */
28
29 #ifdef _MSC_VER
30 /* Enable full aliasing on MSVC */
31 /*#pragma optimize("a", on)*/
32 #endif
33
34 /*#define FULLSANITYCHECKS*/
35
36 #include "nedmalloc.h"
37 #if defined(WIN32)
38 #include <malloc.h>
39 #endif
40 #define MSPACES 1
41 #define ONLY_MSPACES 1
42 #ifndef USE_LOCKS
43 #define USE_LOCKS 1
44 #endif
45 #define FOOTERS 1 /* Need to enable footers so frees lock the right mspace */
46 #undef DEBUG /* dlmalloc wants DEBUG either 0 or 1 */
47 #ifdef _DEBUG
48 #define DEBUG 1
49 #else
50 #define DEBUG 0
51 #endif
52 #ifdef NDEBUG /* Disable assert checking on release builds */
53 #undef DEBUG
54 #endif
55 /* The default of 64Kb means we spend too much time kernel-side */
56 #ifndef DEFAULT_GRANULARITY
57 #define DEFAULT_GRANULARITY (1*1024*1024)
58 #endif
59 /*#define USE_SPIN_LOCKS 0*/
60
61
62 /*#define FORCEINLINE*/
63 #include "malloc.c.h"
64 #ifdef NDEBUG /* Disable assert checking on release builds */
65 #undef DEBUG
66 #endif
67
68 /* The maximum concurrent threads in a pool possible */
69 #ifndef MAXTHREADSINPOOL
70 #define MAXTHREADSINPOOL 16
71 #endif
72 /* The maximum number of threadcaches which can be allocated */
73 #ifndef THREADCACHEMAXCACHES
74 #define THREADCACHEMAXCACHES 256
75 #endif
76 /* The maximum size to be allocated from the thread cache */
77 #ifndef THREADCACHEMAX
78 #define THREADCACHEMAX 8192
79 #endif
80 #if 0
81 /* The number of cache entries for finer grained bins. This is (topbitpos(THREADCACHEMAX)-4)*2 */
82 #define THREADCACHEMAXBINS ((13-4)*2)
83 #else
84 /* The number of cache entries. This is (topbitpos(THREADCACHEMAX)-4) */
85 #define THREADCACHEMAXBINS (13-4)
86 #endif
87 /* Point at which the free space in a thread cache is garbage collected */
88 #ifndef THREADCACHEMAXFREESPACE
89 #define THREADCACHEMAXFREESPACE (512*1024)
90 #endif
91
92
93 #ifdef WIN32
94 #define TLSVAR DWORD
95 #define TLSALLOC(k) (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k))
96 #define TLSFREE(k) (!TlsFree(k))
97 #define TLSGET(k) TlsGetValue(k)
98 #define TLSSET(k, a) (!TlsSetValue(k, a))
99 #ifdef DEBUG
ChkedTlsGetValue(DWORD idx)100 static LPVOID ChkedTlsGetValue(DWORD idx)
101 {
102 LPVOID ret=TlsGetValue(idx);
103 assert(S_OK==GetLastError());
104 return ret;
105 }
106 #undef TLSGET
107 #define TLSGET(k) ChkedTlsGetValue(k)
108 #endif
109 #else
110 #define TLSVAR pthread_key_t
111 #define TLSALLOC(k) pthread_key_create(k, 0)
112 #define TLSFREE(k) pthread_key_delete(k)
113 #define TLSGET(k) pthread_getspecific(k)
114 #define TLSSET(k, a) pthread_setspecific(k, a)
115 #endif
116
117 #if 0
118 /* Only enable if testing with valgrind. Causes misoperation */
119 #define mspace_malloc(p, s) malloc(s)
120 #define mspace_realloc(p, m, s) realloc(m, s)
121 #define mspace_calloc(p, n, s) calloc(n, s)
122 #define mspace_free(p, m) free(m)
123 #endif
124
125
126 #if defined(__cplusplus)
127 #if !defined(NO_NED_NAMESPACE)
128 namespace nedalloc {
129 #else
130 extern "C" {
131 #endif
132 #endif
133
nedblksize(void * mem)134 size_t nedblksize(void *mem) THROWSPEC
135 {
136 #if 0
137 /* Only enable if testing with valgrind. Causes misoperation */
138 return THREADCACHEMAX;
139 #else
140 if(mem)
141 {
142 mchunkptr p=mem2chunk(mem);
143 assert(cinuse(p)); /* If this fails, someone tried to free a block twice */
144 if(cinuse(p))
145 return chunksize(p)-overhead_for(p);
146 }
147 return 0;
148 #endif
149 }
150
nedsetvalue(void * v)151 void nedsetvalue(void *v) THROWSPEC { nedpsetvalue(0, v); }
nedmalloc(size_t size)152 void * nedmalloc(size_t size) THROWSPEC { return nedpmalloc(0, size); }
nedcalloc(size_t no,size_t size)153 void * nedcalloc(size_t no, size_t size) THROWSPEC { return nedpcalloc(0, no, size); }
nedrealloc(void * mem,size_t size)154 void * nedrealloc(void *mem, size_t size) THROWSPEC { return nedprealloc(0, mem, size); }
nedfree(void * mem)155 void nedfree(void *mem) THROWSPEC { nedpfree(0, mem); }
nedmemalign(size_t alignment,size_t bytes)156 void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign(0, alignment, bytes); }
nedposix_memalign(void ** p,size_t alignment,size_t bytes)157 int nedposix_memalign(void** p, size_t alignment, size_t bytes) THROWSPEC { *p = nedmemalign(alignment, bytes); return 0; }
158 #if !NO_MALLINFO
nedmallinfo(void)159 struct mallinfo nedmallinfo(void) THROWSPEC { return nedpmallinfo(0); }
160 #endif
nedmallopt(int parno,int value)161 int nedmallopt(int parno, int value) THROWSPEC { return nedpmallopt(0, parno, value); }
nedmalloc_trim(size_t pad)162 int nedmalloc_trim(size_t pad) THROWSPEC { return nedpmalloc_trim(0, pad); }
nedmalloc_stats(void)163 void nedmalloc_stats(void) THROWSPEC { nedpmalloc_stats(0); }
nedmalloc_footprint(void)164 size_t nedmalloc_footprint(void) THROWSPEC { return nedpmalloc_footprint(0); }
nedindependent_calloc(size_t elemsno,size_t elemsize,void ** chunks)165 void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC { return nedpindependent_calloc(0, elemsno, elemsize, chunks); }
nedindependent_comalloc(size_t elems,size_t * sizes,void ** chunks)166 void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC { return nedpindependent_comalloc(0, elems, sizes, chunks); }
167
168 struct threadcacheblk_t;
169 typedef struct threadcacheblk_t threadcacheblk;
170 struct threadcacheblk_t
171 { /* Keep less than 16 bytes on 32 bit systems and 32 bytes on 64 bit systems */
172 #ifdef FULLSANITYCHECKS
173 unsigned int magic;
174 #endif
175 unsigned int lastUsed, size;
176 threadcacheblk *next, *prev;
177 };
178 typedef struct threadcache_t
179 {
180 #ifdef FULLSANITYCHECKS
181 unsigned int magic1;
182 #endif
183 int mymspace; /* Last mspace entry this thread used */
184 long threadid;
185 unsigned int mallocs, frees, successes;
186 size_t freeInCache; /* How much free space is stored in this cache */
187 threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2];
188 #ifdef FULLSANITYCHECKS
189 unsigned int magic2;
190 #endif
191 } threadcache;
192 struct nedpool_t
193 {
194 MLOCK_T mutex;
195 void *uservalue;
196 int threads; /* Max entries in m to use */
197 threadcache *caches[THREADCACHEMAXCACHES];
198 TLSVAR mycache; /* Thread cache for this thread. 0 for unset, negative for use mspace-1 directly, otherwise is cache-1 */
199 mstate m[MAXTHREADSINPOOL+1]; /* mspace entries for this pool */
200 };
201 static nedpool syspool;
202
size2binidx(size_t _size)203 static FORCEINLINE unsigned int size2binidx(size_t _size) THROWSPEC
204 { /* 8=1000 16=10000 20=10100 24=11000 32=100000 48=110000 4096=1000000000000 */
205 unsigned int topbit, size=(unsigned int)(_size>>4);
206 /* 16=1 20=1 24=1 32=10 48=11 64=100 96=110 128=1000 4096=100000000 */
207
208 #if defined(__GNUC__)
209 topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size);
210 #elif defined(_MSC_VER) && _MSC_VER>=1300
211 {
212 unsigned long bsrTopBit;
213
214 _BitScanReverse(&bsrTopBit, size);
215
216 topbit = bsrTopBit;
217 }
218 #else
219 #if 0
220 union {
221 unsigned asInt[2];
222 double asDouble;
223 };
224 int n;
225
226 asDouble = (double)size + 0.5;
227 topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023;
228 #else
229 {
230 unsigned int x=size;
231 x = x | (x >> 1);
232 x = x | (x >> 2);
233 x = x | (x >> 4);
234 x = x | (x >> 8);
235 x = x | (x >>16);
236 x = ~x;
237 x = x - ((x >> 1) & 0x55555555);
238 x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
239 x = (x + (x >> 4)) & 0x0F0F0F0F;
240 x = x + (x << 8);
241 x = x + (x << 16);
242 topbit=31 - (x >> 24);
243 }
244 #endif
245 #endif
246 return topbit;
247 }
248
249
250 #ifdef FULLSANITYCHECKS
tcsanitycheck(threadcacheblk ** ptr)251 static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC
252 {
253 assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1]));
254 if(ptr[0] && ptr[1])
255 {
256 assert(nedblksize(ptr[0])>=sizeof(threadcacheblk));
257 assert(nedblksize(ptr[1])>=sizeof(threadcacheblk));
258 assert(*(unsigned int *) "NEDN"==ptr[0]->magic);
259 assert(*(unsigned int *) "NEDN"==ptr[1]->magic);
260 assert(!ptr[0]->prev);
261 assert(!ptr[1]->next);
262 if(ptr[0]==ptr[1])
263 {
264 assert(!ptr[0]->next);
265 assert(!ptr[1]->prev);
266 }
267 }
268 }
tcfullsanitycheck(threadcache * tc)269 static void tcfullsanitycheck(threadcache *tc) THROWSPEC
270 {
271 threadcacheblk **tcbptr=tc->bins;
272 int n;
273 for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
274 {
275 threadcacheblk *b, *ob=0;
276 tcsanitycheck(tcbptr);
277 for(b=tcbptr[0]; b; ob=b, b=b->next)
278 {
279 assert(*(unsigned int *) "NEDN"==b->magic);
280 assert(!ob || ob->next==b);
281 assert(!ob || b->prev==ob);
282 }
283 }
284 }
285 #endif
286
RemoveCacheEntries(nedpool * p,threadcache * tc,unsigned int age)287 static NOINLINE void RemoveCacheEntries(nedpool *p, threadcache *tc, unsigned int age) THROWSPEC
288 {
289 #ifdef FULLSANITYCHECKS
290 tcfullsanitycheck(tc);
291 #endif
292 if(tc->freeInCache)
293 {
294 threadcacheblk **tcbptr=tc->bins;
295 int n;
296 for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
297 {
298 threadcacheblk **tcb=tcbptr+1; /* come from oldest end of list */
299 /*tcsanitycheck(tcbptr);*/
300 for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; )
301 {
302 threadcacheblk *f=*tcb;
303 size_t blksize=f->size; /*nedblksize(f);*/
304 assert(blksize<=nedblksize(f));
305 assert(blksize);
306 #ifdef FULLSANITYCHECKS
307 assert(*(unsigned int *) "NEDN"==(*tcb)->magic);
308 #endif
309 *tcb=(*tcb)->prev;
310 if(*tcb)
311 (*tcb)->next=0;
312 else
313 *tcbptr=0;
314 tc->freeInCache-=blksize;
315 assert((long) tc->freeInCache>=0);
316 mspace_free(0, f);
317 /*tcsanitycheck(tcbptr);*/
318 }
319 }
320 }
321 #ifdef FULLSANITYCHECKS
322 tcfullsanitycheck(tc);
323 #endif
324 }
DestroyCaches(nedpool * p)325 static void DestroyCaches(nedpool *p) THROWSPEC
326 {
327 if(p->caches)
328 {
329 threadcache *tc;
330 int n;
331 for(n=0; n<THREADCACHEMAXCACHES; n++)
332 {
333 if((tc=p->caches[n]))
334 {
335 tc->frees++;
336 RemoveCacheEntries(p, tc, 0);
337 assert(!tc->freeInCache);
338 tc->mymspace=-1;
339 tc->threadid=0;
340 mspace_free(0, tc);
341 p->caches[n]=0;
342 }
343 }
344 }
345 }
346
AllocCache(nedpool * p)347 static NOINLINE threadcache *AllocCache(nedpool *p) THROWSPEC
348 {
349 threadcache *tc=0;
350 int n, end;
351 ACQUIRE_LOCK(&p->mutex);
352 for(n=0; n<THREADCACHEMAXCACHES && p->caches[n]; n++);
353 if(THREADCACHEMAXCACHES==n)
354 { /* List exhausted, so disable for this thread */
355 RELEASE_LOCK(&p->mutex);
356 return 0;
357 }
358 tc=p->caches[n]=(threadcache *) mspace_calloc(p->m[0], 1, sizeof(threadcache));
359 if(!tc)
360 {
361 RELEASE_LOCK(&p->mutex);
362 return 0;
363 }
364 #ifdef FULLSANITYCHECKS
365 tc->magic1=*(unsigned int *)"NEDMALC1";
366 tc->magic2=*(unsigned int *)"NEDMALC2";
367 #endif
368 tc->threadid=(long)(size_t)CURRENT_THREAD;
369 for(end=0; p->m[end]; end++);
370 tc->mymspace=tc->threadid % end;
371 RELEASE_LOCK(&p->mutex);
372 if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort();
373 return tc;
374 }
375
threadcache_malloc(nedpool * p,threadcache * tc,size_t * size)376 static void *threadcache_malloc(nedpool *p, threadcache *tc, size_t *size) THROWSPEC
377 {
378 void *ret=0;
379 unsigned int bestsize;
380 unsigned int idx=size2binidx(*size);
381 size_t blksize=0;
382 threadcacheblk *blk, **binsptr;
383 #ifdef FULLSANITYCHECKS
384 tcfullsanitycheck(tc);
385 #endif
386 /* Calculate best fit bin size */
387 bestsize=1<<(idx+4);
388 #if 0
389 /* Finer grained bin fit */
390 idx<<=1;
391 if(*size>bestsize)
392 {
393 idx++;
394 bestsize+=bestsize>>1;
395 }
396 if(*size>bestsize)
397 {
398 idx++;
399 bestsize=1<<(4+(idx>>1));
400 }
401 #else
402 if(*size>bestsize)
403 {
404 idx++;
405 bestsize<<=1;
406 }
407 #endif
408 assert(bestsize>=*size);
409 if(*size<bestsize) *size=bestsize;
410 assert(*size<=THREADCACHEMAX);
411 assert(idx<=THREADCACHEMAXBINS);
412 binsptr=&tc->bins[idx*2];
413 /* Try to match close, but move up a bin if necessary */
414 blk=*binsptr;
415 if(!blk || blk->size<*size)
416 { /* Bump it up a bin */
417 if(idx<THREADCACHEMAXBINS)
418 {
419 idx++;
420 binsptr+=2;
421 blk=*binsptr;
422 }
423 }
424 if(blk)
425 {
426 blksize=blk->size; /*nedblksize(blk);*/
427 assert(nedblksize(blk)>=blksize);
428 assert(blksize>=*size);
429 if(blk->next)
430 blk->next->prev=0;
431 *binsptr=blk->next;
432 if(!*binsptr)
433 binsptr[1]=0;
434 #ifdef FULLSANITYCHECKS
435 blk->magic=0;
436 #endif
437 assert(binsptr[0]!=blk && binsptr[1]!=blk);
438 assert(nedblksize(blk)>=sizeof(threadcacheblk) && nedblksize(blk)<=THREADCACHEMAX+CHUNK_OVERHEAD);
439 /*printf("malloc: %p, %p, %p, %lu\n", p, tc, blk, (long) size);*/
440 ret=(void *) blk;
441 }
442 ++tc->mallocs;
443 if(ret)
444 {
445 assert(blksize>=*size);
446 ++tc->successes;
447 tc->freeInCache-=blksize;
448 assert((long) tc->freeInCache>=0);
449 }
450 #if defined(DEBUG) && 0
451 if(!(tc->mallocs & 0xfff))
452 {
453 printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs,
454 (float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache);
455 }
456 #endif
457 #ifdef FULLSANITYCHECKS
458 tcfullsanitycheck(tc);
459 #endif
460 return ret;
461 }
ReleaseFreeInCache(nedpool * p,threadcache * tc,int mymspace)462 static NOINLINE void ReleaseFreeInCache(nedpool *p, threadcache *tc, int mymspace) THROWSPEC
463 {
464 unsigned int age=THREADCACHEMAXFREESPACE/8192;
465 /*ACQUIRE_LOCK(&p->m[mymspace]->mutex);*/
466 while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE)
467 {
468 RemoveCacheEntries(p, tc, age);
469 /*printf("*** Removing cache entries older than %u (%u)\n", age, (unsigned int) tc->freeInCache);*/
470 age>>=1;
471 }
472 /*RELEASE_LOCK(&p->m[mymspace]->mutex);*/
473 }
threadcache_free(nedpool * p,threadcache * tc,int mymspace,void * mem,size_t size)474 static void threadcache_free(nedpool *p, threadcache *tc, int mymspace, void *mem, size_t size) THROWSPEC
475 {
476 unsigned int bestsize;
477 unsigned int idx=size2binidx(size);
478 threadcacheblk **binsptr, *tck=(threadcacheblk *) mem;
479 assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD);
480 #ifdef DEBUG
481 { /* Make sure this is a valid memory block */
482 mchunkptr p = mem2chunk(mem);
483 mstate fm = get_mstate_for(p);
484 if (!ok_magic(fm)) {
485 USAGE_ERROR_ACTION(fm, p);
486 return;
487 }
488 }
489 #endif
490 #ifdef FULLSANITYCHECKS
491 tcfullsanitycheck(tc);
492 #endif
493 /* Calculate best fit bin size */
494 bestsize=1<<(idx+4);
495 #if 0
496 /* Finer grained bin fit */
497 idx<<=1;
498 if(size>bestsize)
499 {
500 unsigned int biggerbestsize=bestsize+bestsize<<1;
501 if(size>=biggerbestsize)
502 {
503 idx++;
504 bestsize=biggerbestsize;
505 }
506 }
507 #endif
508 if(bestsize!=size) /* dlmalloc can round up, so we round down to preserve indexing */
509 size=bestsize;
510 binsptr=&tc->bins[idx*2];
511 assert(idx<=THREADCACHEMAXBINS);
512 if(tck==*binsptr)
513 {
514 fprintf(stderr, "Attempt to free already freed memory block %p - aborting!\n", tck);
515 abort();
516 }
517 #ifdef FULLSANITYCHECKS
518 tck->magic=*(unsigned int *) "NEDN";
519 #endif
520 tck->lastUsed=++tc->frees;
521 tck->size=(unsigned int) size;
522 tck->next=*binsptr;
523 tck->prev=0;
524 if(tck->next)
525 tck->next->prev=tck;
526 else
527 binsptr[1]=tck;
528 assert(!*binsptr || (*binsptr)->size==tck->size);
529 *binsptr=tck;
530 assert(tck==tc->bins[idx*2]);
531 assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck);
532 /*printf("free: %p, %p, %p, %lu\n", p, tc, mem, (long) size);*/
533 tc->freeInCache+=size;
534 #ifdef FULLSANITYCHECKS
535 tcfullsanitycheck(tc);
536 #endif
537 #if 1
538 if(tc->freeInCache>=THREADCACHEMAXFREESPACE)
539 ReleaseFreeInCache(p, tc, mymspace);
540 #endif
541 }
542
543
544
545
InitPool(nedpool * p,size_t capacity,int threads)546 static NOINLINE int InitPool(nedpool *p, size_t capacity, int threads) THROWSPEC
547 { /* threads is -1 for system pool */
548 ensure_initialization();
549 ACQUIRE_MALLOC_GLOBAL_LOCK();
550 if(p->threads) goto done;
551 if(INITIAL_LOCK(&p->mutex)) goto err;
552 if(TLSALLOC(&p->mycache)) goto err;
553 if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err;
554 p->m[0]->extp=p;
555 p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads;
556 done:
557 RELEASE_MALLOC_GLOBAL_LOCK();
558 return 1;
559 err:
560 if(threads<0)
561 abort(); /* If you can't allocate for system pool, we're screwed */
562 DestroyCaches(p);
563 if(p->m[0])
564 {
565 destroy_mspace(p->m[0]);
566 p->m[0]=0;
567 }
568 if(p->mycache)
569 {
570 if(TLSFREE(p->mycache)) abort();
571 p->mycache=0;
572 }
573 RELEASE_MALLOC_GLOBAL_LOCK();
574 return 0;
575 }
FindMSpace(nedpool * p,threadcache * tc,int * lastUsed,size_t size)576 static NOINLINE mstate FindMSpace(nedpool *p, threadcache *tc, int *lastUsed, size_t size) THROWSPEC
577 { /* Gets called when thread's last used mspace is in use. The strategy
578 is to run through the list of all available mspaces looking for an
579 unlocked one and if we fail, we create a new one so long as we don't
580 exceed p->threads */
581 int n, end;
582 for(n=end=*lastUsed+1; p->m[n]; end=++n)
583 {
584 if(TRY_LOCK(&p->m[n]->mutex)) goto found;
585 }
586 for(n=0; n<*lastUsed && p->m[n]; n++)
587 {
588 if(TRY_LOCK(&p->m[n]->mutex)) goto found;
589 }
590 if(end<p->threads)
591 {
592 mstate temp;
593 if(!(temp=(mstate) create_mspace(size, 1)))
594 goto badexit;
595 /* Now we're ready to modify the lists, we lock */
596 ACQUIRE_LOCK(&p->mutex);
597 while(p->m[end] && end<p->threads)
598 end++;
599 if(end>=p->threads)
600 { /* Drat, must destroy it now */
601 RELEASE_LOCK(&p->mutex);
602 destroy_mspace((mspace) temp);
603 goto badexit;
604 }
605 /* We really want to make sure this goes into memory now but we
606 have to be careful of breaking aliasing rules, so write it twice */
607 {
608 volatile struct malloc_state **_m=(volatile struct malloc_state **) &p->m[end];
609 *_m=(p->m[end]=temp);
610 }
611 ACQUIRE_LOCK(&p->m[end]->mutex);
612 /*printf("Created mspace idx %d\n", end);*/
613 RELEASE_LOCK(&p->mutex);
614 n=end;
615 goto found;
616 }
617 /* Let it lock on the last one it used */
618 badexit:
619 ACQUIRE_LOCK(&p->m[*lastUsed]->mutex);
620 return p->m[*lastUsed];
621 found:
622 *lastUsed=n;
623 if(tc)
624 tc->mymspace=n;
625 else
626 {
627 if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort();
628 }
629 return p->m[n];
630 }
631
nedcreatepool(size_t capacity,int threads)632 nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC
633 {
634 nedpool *ret;
635 if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) return 0;
636 if(!InitPool(ret, capacity, threads))
637 {
638 nedpfree(0, ret);
639 return 0;
640 }
641 return ret;
642 }
neddestroypool(nedpool * p)643 void neddestroypool(nedpool *p) THROWSPEC
644 {
645 int n;
646 ACQUIRE_LOCK(&p->mutex);
647 DestroyCaches(p);
648 for(n=0; p->m[n]; n++)
649 {
650 destroy_mspace(p->m[n]);
651 p->m[n]=0;
652 }
653 RELEASE_LOCK(&p->mutex);
654 if(TLSFREE(p->mycache)) abort();
655 nedpfree(0, p);
656 }
657
nedpsetvalue(nedpool * p,void * v)658 void nedpsetvalue(nedpool *p, void *v) THROWSPEC
659 {
660 if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
661 p->uservalue=v;
662 }
nedgetvalue(nedpool ** p,void * mem)663 void *nedgetvalue(nedpool **p, void *mem) THROWSPEC
664 {
665 nedpool *np=0;
666 mchunkptr mcp=mem2chunk(mem);
667 mstate fm;
668 if(!(is_aligned(chunk2mem(mcp))) && mcp->head != FENCEPOST_HEAD) return 0;
669 if(!cinuse(mcp)) return 0;
670 if(!next_pinuse(mcp)) return 0;
671 if(!is_mmapped(mcp) && !pinuse(mcp))
672 {
673 if(next_chunk(prev_chunk(mcp))!=mcp) return 0;
674 }
675 fm=get_mstate_for(mcp);
676 if(!ok_magic(fm)) return 0;
677 if(!ok_address(fm, mcp)) return 0;
678 if(!fm->extp) return 0;
679 np=(nedpool *) fm->extp;
680 if(p) *p=np;
681 return np->uservalue;
682 }
683
neddisablethreadcache(nedpool * p)684 void neddisablethreadcache(nedpool *p) THROWSPEC
685 {
686 int mycache;
687 if(!p)
688 {
689 p=&syspool;
690 if(!syspool.threads) InitPool(&syspool, 0, -1);
691 }
692 mycache=(int)(size_t) TLSGET(p->mycache);
693 if(!mycache)
694 { /* Set to mspace 0 */
695 if(TLSSET(p->mycache, (void *)-1)) abort();
696 }
697 else if(mycache>0)
698 { /* Set to last used mspace */
699 threadcache *tc=p->caches[mycache-1];
700 #if defined(DEBUG)
701 printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n",
702 100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs);
703 #endif
704 if(TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort();
705 tc->frees++;
706 RemoveCacheEntries(p, tc, 0);
707 assert(!tc->freeInCache);
708 tc->mymspace=-1;
709 tc->threadid=0;
710 mspace_free(0, p->caches[mycache-1]);
711 p->caches[mycache-1]=0;
712 }
713 }
714
715 #define GETMSPACE(m,p,tc,ms,s,action) \
716 do \
717 { \
718 mstate m = GetMSpace((p),(tc),(ms),(s)); \
719 action; \
720 RELEASE_LOCK(&m->mutex); \
721 } while (0)
722
GetMSpace(nedpool * p,threadcache * tc,int mymspace,size_t size)723 static FORCEINLINE mstate GetMSpace(nedpool *p, threadcache *tc, int mymspace, size_t size) THROWSPEC
724 { /* Returns a locked and ready for use mspace */
725 mstate m=p->m[mymspace];
726 assert(m);
727 if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size);\
728 /*assert(IS_LOCKED(&p->m[mymspace]->mutex));*/
729 return m;
730 }
GetThreadCache(nedpool ** p,threadcache ** tc,int * mymspace,size_t * size)731 static FORCEINLINE void GetThreadCache(nedpool **p, threadcache **tc, int *mymspace, size_t *size) THROWSPEC
732 {
733 int mycache;
734 if(size && *size<sizeof(threadcacheblk)) *size=sizeof(threadcacheblk);
735 if(!*p)
736 {
737 *p=&syspool;
738 if(!syspool.threads) InitPool(&syspool, 0, -1);
739 }
740 mycache=(int)(size_t) TLSGET((*p)->mycache);
741 if(mycache>0)
742 {
743 *tc=(*p)->caches[mycache-1];
744 *mymspace=(*tc)->mymspace;
745 }
746 else if(!mycache)
747 {
748 *tc=AllocCache(*p);
749 if(!*tc)
750 { /* Disable */
751 if(TLSSET((*p)->mycache, (void *)-1)) abort();
752 *mymspace=0;
753 }
754 else
755 *mymspace=(*tc)->mymspace;
756 }
757 else
758 {
759 *tc=0;
760 *mymspace=-mycache-1;
761 }
762 assert(*mymspace>=0);
763 assert((long)(size_t)CURRENT_THREAD==(*tc)->threadid);
764 #ifdef FULLSANITYCHECKS
765 if(*tc)
766 {
767 if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2)
768 {
769 abort();
770 }
771 }
772 #endif
773 }
774
nedpmalloc(nedpool * p,size_t size)775 void * nedpmalloc(nedpool *p, size_t size) THROWSPEC
776 {
777 void *ret=0;
778 threadcache *tc;
779 int mymspace;
780 GetThreadCache(&p, &tc, &mymspace, &size);
781 #if THREADCACHEMAX
782 if(tc && size<=THREADCACHEMAX)
783 { /* Use the thread cache */
784 ret=threadcache_malloc(p, tc, &size);
785 }
786 #endif
787 if(!ret)
788 { /* Use this thread's mspace */
789 GETMSPACE(m, p, tc, mymspace, size,
790 ret=mspace_malloc(m, size));
791 }
792 return ret;
793 }
nedpcalloc(nedpool * p,size_t no,size_t size)794 void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC
795 {
796 size_t rsize=size*no;
797 void *ret=0;
798 threadcache *tc;
799 int mymspace;
800 GetThreadCache(&p, &tc, &mymspace, &rsize);
801 #if THREADCACHEMAX
802 if(tc && rsize<=THREADCACHEMAX)
803 { /* Use the thread cache */
804 if((ret=threadcache_malloc(p, tc, &rsize)))
805 memset(ret, 0, rsize);
806 }
807 #endif
808 if(!ret)
809 { /* Use this thread's mspace */
810 GETMSPACE(m, p, tc, mymspace, rsize,
811 ret=mspace_calloc(m, 1, rsize));
812 }
813 return ret;
814 }
nedprealloc(nedpool * p,void * mem,size_t size)815 void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC
816 {
817 void *ret=0;
818 threadcache *tc;
819 int mymspace;
820 if(!mem) return nedpmalloc(p, size);
821 GetThreadCache(&p, &tc, &mymspace, &size);
822 #if THREADCACHEMAX
823 if(tc && size && size<=THREADCACHEMAX)
824 { /* Use the thread cache */
825 size_t memsize=nedblksize(mem);
826 assert(memsize);
827 if((ret=threadcache_malloc(p, tc, &size)))
828 {
829 memcpy(ret, mem, memsize<size ? memsize : size);
830 if(memsize<=THREADCACHEMAX)
831 threadcache_free(p, tc, mymspace, mem, memsize);
832 else
833 mspace_free(0, mem);
834 }
835 }
836 #endif
837 if(!ret)
838 { /* Reallocs always happen in the mspace they happened in, so skip
839 locking the preferred mspace for this thread */
840 ret=mspace_realloc(0, mem, size);
841 }
842 return ret;
843 }
nedpfree(nedpool * p,void * mem)844 void nedpfree(nedpool *p, void *mem) THROWSPEC
845 { /* Frees always happen in the mspace they happened in, so skip
846 locking the preferred mspace for this thread */
847 threadcache *tc;
848 int mymspace;
849 size_t memsize;
850 assert(mem);
851 GetThreadCache(&p, &tc, &mymspace, 0);
852 #if THREADCACHEMAX
853 memsize=nedblksize(mem);
854 assert(memsize);
855 if(mem && tc && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
856 threadcache_free(p, tc, mymspace, mem, memsize);
857 else
858 #endif
859 mspace_free(0, mem);
860 }
nedpmemalign(nedpool * p,size_t alignment,size_t bytes)861 void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC
862 {
863 void *ret;
864 threadcache *tc;
865 int mymspace;
866 GetThreadCache(&p, &tc, &mymspace, &bytes);
867 { /* Use this thread's mspace */
868 GETMSPACE(m, p, tc, mymspace, bytes,
869 ret=mspace_memalign(m, alignment, bytes));
870 }
871 return ret;
872 }
873 #if !NO_MALLINFO
nedpmallinfo(nedpool * p)874 struct mallinfo nedpmallinfo(nedpool *p) THROWSPEC
875 {
876 int n;
877 struct mallinfo ret={0};
878 if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
879 for(n=0; p->m[n]; n++)
880 {
881 struct mallinfo t=mspace_mallinfo(p->m[n]);
882 ret.arena+=t.arena;
883 ret.ordblks+=t.ordblks;
884 ret.hblkhd+=t.hblkhd;
885 ret.usmblks+=t.usmblks;
886 ret.uordblks+=t.uordblks;
887 ret.fordblks+=t.fordblks;
888 ret.keepcost+=t.keepcost;
889 }
890 return ret;
891 }
892 #endif
nedpmallopt(nedpool * p,int parno,int value)893 int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC
894 {
895 return mspace_mallopt(parno, value);
896 }
nedpmalloc_trim(nedpool * p,size_t pad)897 int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC
898 {
899 int n, ret=0;
900 if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
901 for(n=0; p->m[n]; n++)
902 {
903 ret+=mspace_trim(p->m[n], pad);
904 }
905 return ret;
906 }
nedpmalloc_stats(nedpool * p)907 void nedpmalloc_stats(nedpool *p) THROWSPEC
908 {
909 int n;
910 if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
911 for(n=0; p->m[n]; n++)
912 {
913 mspace_malloc_stats(p->m[n]);
914 }
915 }
nedpmalloc_footprint(nedpool * p)916 size_t nedpmalloc_footprint(nedpool *p) THROWSPEC
917 {
918 size_t ret=0;
919 int n;
920 if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
921 for(n=0; p->m[n]; n++)
922 {
923 ret+=mspace_footprint(p->m[n]);
924 }
925 return ret;
926 }
nedpindependent_calloc(nedpool * p,size_t elemsno,size_t elemsize,void ** chunks)927 void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC
928 {
929 void **ret;
930 threadcache *tc;
931 int mymspace;
932 GetThreadCache(&p, &tc, &mymspace, &elemsize);
933 GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
934 ret=mspace_independent_calloc(m, elemsno, elemsize, chunks));
935 return ret;
936 }
nedpindependent_comalloc(nedpool * p,size_t elems,size_t * sizes,void ** chunks)937 void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC
938 {
939 void **ret;
940 threadcache *tc;
941 int mymspace;
942 size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t));
943 if(!adjustedsizes) return 0;
944 for(i=0; i<elems; i++)
945 adjustedsizes[i]=sizes[i]<sizeof(threadcacheblk) ? sizeof(threadcacheblk) : sizes[i];
946 GetThreadCache(&p, &tc, &mymspace, 0);
947 GETMSPACE(m, p, tc, mymspace, 0,
948 ret=mspace_independent_comalloc(m, elems, adjustedsizes, chunks));
949 return ret;
950 }
951
952 #if defined(__cplusplus)
953 }
954 #endif
955