56
#ifndef DEFAULT_GRANULARITY
57
#define DEFAULT_GRANULARITY (1*1024*1024)
69
#ifndef MAXTHREADSINPOOL
70
#define MAXTHREADSINPOOL 16
73
#ifndef THREADCACHEMAXCACHES
74
#define THREADCACHEMAXCACHES 256
78
#define THREADCACHEMAX 8192
82
#define THREADCACHEMAXBINS ((13-4)*2)
85
#define THREADCACHEMAXBINS (13-4)
88
#ifndef THREADCACHEMAXFREESPACE
89
#define THREADCACHEMAXFREESPACE (512*1024)
95
#define TLSALLOC(k) (*(k)=TlsAlloc(), TLS_OUT_OF_INDEXES==*(k))
96
#define TLSFREE(k) (!TlsFree(k))
97
#define TLSGET(k) TlsGetValue(k)
98
#define TLSSET(k, a) (!TlsSetValue(k, a))
100
static LPVOID ChkedTlsGetValue(DWORD idx)
102
LPVOID ret=TlsGetValue(idx);
103
assert(S_OK==GetLastError());
107
#define TLSGET(k) ChkedTlsGetValue(k)
110
#define TLSVAR pthread_key_t
111
#define TLSALLOC(k) pthread_key_create(k, 0)
112
#define TLSFREE(k) pthread_key_delete(k)
113
#define TLSGET(k) pthread_getspecific(k)
114
#define TLSSET(k, a) pthread_setspecific(k, a)
119
#define mspace_malloc(p, s) malloc(s)
120
#define mspace_realloc(p, m, s) realloc(m, s)
121
#define mspace_calloc(p, n, s) calloc(n, s)
122
#define mspace_free(p, m) free(m)
126
#if defined(__cplusplus)
127
#if !defined(NO_NED_NAMESPACE)
134
size_t nedblksize(void *mem) THROWSPEC
138
return THREADCACHEMAX;
142
mchunkptr p=mem2chunk(mem);
145
return chunksize(p)-overhead_for(p);
151
void nedsetvalue(void *v) THROWSPEC { nedpsetvalue(0, v); }
152
void * nedmalloc(size_t size) THROWSPEC { return nedpmalloc(0, size); }
153
void * nedcalloc(size_t no, size_t size) THROWSPEC { return nedpcalloc(0, no, size); }
154
void * nedrealloc(void *mem, size_t size) THROWSPEC { return nedprealloc(0, mem, size); }
155
void nedfree(void *mem) THROWSPEC { nedpfree(0, mem); }
156
void * nedmemalign(size_t alignment, size_t bytes) THROWSPEC { return nedpmemalign(0, alignment, bytes); }
158
struct mallinfo nedmallinfo(void) THROWSPEC { return nedpmallinfo(0); }
160
int nedmallopt(int parno, int value) THROWSPEC { return nedpmallopt(0, parno, value); }
161
int nedmalloc_trim(size_t pad) THROWSPEC { return nedpmalloc_trim(0, pad); }
162
void nedmalloc_stats(void) THROWSPEC { nedpmalloc_stats(0); }
163
size_t nedmalloc_footprint(void) THROWSPEC { return nedpmalloc_footprint(0); }
164
void **nedindependent_calloc(size_t elemsno, size_t elemsize, void **chunks) THROWSPEC { return nedpindependent_calloc(0, elemsno, elemsize, chunks); }
165
void **nedindependent_comalloc(size_t elems, size_t *sizes, void **chunks) THROWSPEC { return nedpindependent_comalloc(0, elems, sizes, chunks); }
167
struct threadcacheblk_t;
168
typedef struct threadcacheblk_t threadcacheblk;
169
struct threadcacheblk_t
171
#ifdef FULLSANITYCHECKS
174
unsigned int lastUsed, size;
175
threadcacheblk *next, *prev;
177
typedef struct threadcache_t
179
#ifdef FULLSANITYCHECKS
184
unsigned int mallocs, frees, successes;
186
threadcacheblk *bins[(THREADCACHEMAXBINS+1)*2];
187
#ifdef FULLSANITYCHECKS
196
threadcache *caches[THREADCACHEMAXCACHES];
198
mstate m[MAXTHREADSINPOOL+1];
200
static nedpool syspool;
202
static FORCEINLINE unsigned int size2binidx(size_t _size) THROWSPEC
204
unsigned int topbit, size=(unsigned int)(_size>>4);
208
topbit = sizeof(size)*__CHAR_BIT__ - 1 - __builtin_clz(size);
209
#elif defined(_MSC_VER) && _MSC_VER>=1300
211
unsigned long bsrTopBit;
213
_BitScanReverse(&bsrTopBit, size);
225
asDouble = (double)size + 0.5;
226
topbit = (asInt[!FOX_BIGENDIAN] >> 20) - 1023;
236
x = x - ((x >> 1) & 0x55555555);
237
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
238
x = (x + (x >> 4)) & 0x0F0F0F0F;
241
topbit=31 - (x >> 24);
249
#ifdef FULLSANITYCHECKS
250
static void tcsanitycheck(threadcacheblk **ptr) THROWSPEC
252
assert((ptr[0] && ptr[1]) || (!ptr[0] && !ptr[1]));
255
assert(nedblksize(ptr[0])>=sizeof(threadcacheblk));
256
assert(nedblksize(ptr[1])>=sizeof(threadcacheblk));
257
assert(*(unsigned int *) "NEDN"==ptr[0]->magic);
258
assert(*(unsigned int *) "NEDN"==ptr[1]->magic);
259
assert(!ptr[0]->prev);
260
assert(!ptr[1]->next);
263
assert(!ptr[0]->next);
264
assert(!ptr[1]->prev);
268
static void tcfullsanitycheck(threadcache *tc) THROWSPEC
270
threadcacheblk **tcbptr=tc->bins;
272
for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
274
threadcacheblk *b, *ob=0;
275
tcsanitycheck(tcbptr);
276
for(b=tcbptr[0]; b; ob=b, b=b->next)
278
assert(*(unsigned int *) "NEDN"==b->magic);
279
assert(!ob || ob->next==b);
280
assert(!ob || b->prev==ob);
286
static NOINLINE void RemoveCacheEntries(nedpool *p, threadcache *tc, unsigned int age) THROWSPEC
288
#ifdef FULLSANITYCHECKS
289
tcfullsanitycheck(tc);
293
threadcacheblk **tcbptr=tc->bins;
295
for(n=0; n<=THREADCACHEMAXBINS; n++, tcbptr+=2)
297
threadcacheblk **tcb=tcbptr+1;
299
for(; *tcb && tc->frees-(*tcb)->lastUsed>=age; )
301
threadcacheblk *f=*tcb;
302
size_t blksize=f->size;
303
assert(blksize<=nedblksize(f));
305
#ifdef FULLSANITYCHECKS
306
assert(*(unsigned int *) "NEDN"==(*tcb)->magic);
313
tc->freeInCache-=blksize;
314
assert((long) tc->freeInCache>=0);
320
#ifdef FULLSANITYCHECKS
321
tcfullsanitycheck(tc);
324
static void DestroyCaches(nedpool *p) THROWSPEC
329
for(n=0; n<THREADCACHEMAXCACHES; n++)
331
if((tc=p->caches[n]))
334
RemoveCacheEntries(p, tc, 0);
335
assert(!tc->freeInCache);
345
static NOINLINE threadcache *AllocCache(nedpool *p) THROWSPEC
349
ACQUIRE_LOCK(&p->mutex);
350
for(n=0; n<THREADCACHEMAXCACHES && p->caches[n]; n++);
351
if(THREADCACHEMAXCACHES==n)
353
RELEASE_LOCK(&p->mutex);
356
tc=p->caches[n]=(threadcache *) mspace_calloc(p->m[0], 1, sizeof(threadcache));
359
RELEASE_LOCK(&p->mutex);
362
#ifdef FULLSANITYCHECKS
363
tc->magic1=*(unsigned int *)"NEDMALC1";
364
tc->magic2=*(unsigned int *)"NEDMALC2";
366
tc->threadid=(long)(size_t)CURRENT_THREAD;
367
for(end=0; p->m[end]; end++);
368
tc->mymspace=tc->threadid % end;
369
RELEASE_LOCK(&p->mutex);
370
if(TLSSET(p->mycache, (void *)(size_t)(n+1))) abort();
374
static void *threadcache_malloc(nedpool *p, threadcache *tc, size_t *size) THROWSPEC
377
unsigned int bestsize;
378
unsigned int idx=size2binidx(*size);
380
threadcacheblk *blk, **binsptr;
381
#ifdef FULLSANITYCHECKS
382
tcfullsanitycheck(tc);
392
bestsize+=bestsize>>1;
397
bestsize=1<<(4+(idx>>1));
406
assert(bestsize>=*size);
407
if(*size<bestsize) *size=bestsize;
408
assert(*size<=THREADCACHEMAX);
409
assert(idx<=THREADCACHEMAXBINS);
410
binsptr=&tc->bins[idx*2];
413
if(!blk || blk->size<*size)
415
if(idx<THREADCACHEMAXBINS)
425
assert(nedblksize(blk)>=blksize);
426
assert(blksize>=*size);
432
#ifdef FULLSANITYCHECKS
435
assert(binsptr[0]!=blk && binsptr[1]!=blk);
436
assert(nedblksize(blk)>=sizeof(threadcacheblk) && nedblksize(blk)<=THREADCACHEMAX+CHUNK_OVERHEAD);
443
assert(blksize>=*size);
445
tc->freeInCache-=blksize;
446
assert((long) tc->freeInCache>=0);
448
#if defined(DEBUG) && 0
449
if(!(tc->mallocs & 0xfff))
451
printf("*** threadcache=%u, mallocs=%u (%f), free=%u (%f), freeInCache=%u\n", (unsigned int) tc->threadid, tc->mallocs,
452
(float) tc->successes/tc->mallocs, tc->frees, (float) tc->successes/tc->frees, (unsigned int) tc->freeInCache);
455
#ifdef FULLSANITYCHECKS
456
tcfullsanitycheck(tc);
460
static NOINLINE void ReleaseFreeInCache(nedpool *p, threadcache *tc, int mymspace) THROWSPEC
462
unsigned int age=THREADCACHEMAXFREESPACE/8192;
464
while(age && tc->freeInCache>=THREADCACHEMAXFREESPACE)
466
RemoveCacheEntries(p, tc, age);
472
static void threadcache_free(nedpool *p, threadcache *tc, int mymspace, void *mem, size_t size) THROWSPEC
474
unsigned int bestsize;
475
unsigned int idx=size2binidx(size);
476
threadcacheblk **binsptr, *tck=(threadcacheblk *) mem;
477
assert(size>=sizeof(threadcacheblk) && size<=THREADCACHEMAX+CHUNK_OVERHEAD);
480
mchunkptr p = mem2chunk(mem);
481
mstate fm = get_mstate_for(p);
483
USAGE_ERROR_ACTION(fm, p);
488
#ifdef FULLSANITYCHECKS
489
tcfullsanitycheck(tc);
498
unsigned int biggerbestsize=bestsize+bestsize<<1;
499
if(size>=biggerbestsize)
502
bestsize=biggerbestsize;
508
binsptr=&tc->bins[idx*2];
509
assert(idx<=THREADCACHEMAXBINS);
512
fprintf(stderr, "Attempt to free already freed memory block %p - aborting!\n", (void *)tck);
515
#ifdef FULLSANITYCHECKS
516
tck->magic=*(unsigned int *) "NEDN";
518
tck->lastUsed=++tc->frees;
519
tck->size=(unsigned int) size;
526
assert(!*binsptr || (*binsptr)->size==tck->size);
528
assert(tck==tc->bins[idx*2]);
529
assert(tc->bins[idx*2+1]==tck || binsptr[0]->next->prev==tck);
531
tc->freeInCache+=size;
532
#ifdef FULLSANITYCHECKS
533
tcfullsanitycheck(tc);
536
if(tc->freeInCache>=THREADCACHEMAXFREESPACE)
537
ReleaseFreeInCache(p, tc, mymspace);
544
static NOINLINE int InitPool(nedpool *p, size_t capacity, int threads) THROWSPEC
546
ensure_initialization();
547
ACQUIRE_MALLOC_GLOBAL_LOCK();
548
if(p->threads) goto done;
549
if(INITIAL_LOCK(&p->mutex)) goto err;
550
if(TLSALLOC(&p->mycache)) goto err;
551
if(!(p->m[0]=(mstate) create_mspace(capacity, 1))) goto err;
553
p->threads=(threads<1 || threads>MAXTHREADSINPOOL) ? MAXTHREADSINPOOL : threads;
555
RELEASE_MALLOC_GLOBAL_LOCK();
563
destroy_mspace(p->m[0]);
568
if(TLSFREE(p->mycache)) abort();
571
RELEASE_MALLOC_GLOBAL_LOCK();
574
static NOINLINE mstate FindMSpace(nedpool *p, threadcache *tc, int *lastUsed, size_t size) THROWSPEC
580
for(n=end=*lastUsed+1; p->m[n]; end=++n)
582
if(TRY_LOCK(&p->m[n]->mutex)) goto found;
584
for(n=0; n<*lastUsed && p->m[n]; n++)
586
if(TRY_LOCK(&p->m[n]->mutex)) goto found;
591
if(!(temp=(mstate) create_mspace(size, 1)))
594
ACQUIRE_LOCK(&p->mutex);
595
while(p->m[end] && end<p->threads)
599
RELEASE_LOCK(&p->mutex);
600
destroy_mspace((mspace) temp);
606
volatile struct malloc_state **_m=(volatile struct malloc_state **) &p->m[end];
607
*_m=(p->m[end]=temp);
609
ACQUIRE_LOCK(&p->m[end]->mutex);
611
RELEASE_LOCK(&p->mutex);
617
ACQUIRE_LOCK(&p->m[*lastUsed]->mutex);
618
return p->m[*lastUsed];
625
if(TLSSET(p->mycache, (void *)(size_t)(-(n+1)))) abort();
630
nedpool *nedcreatepool(size_t capacity, int threads) THROWSPEC
633
if(!(ret=(nedpool *) nedpcalloc(0, 1, sizeof(nedpool)))) return 0;
634
if(!InitPool(ret, capacity, threads))
641
void neddestroypool(nedpool *p) THROWSPEC
644
ACQUIRE_LOCK(&p->mutex);
646
for(n=0; p->m[n]; n++)
648
destroy_mspace(p->m[n]);
651
RELEASE_LOCK(&p->mutex);
652
if(TLSFREE(p->mycache)) abort();
656
void nedpsetvalue(nedpool *p, void *v) THROWSPEC
658
if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
661
void *nedgetvalue(nedpool **p, void *mem) THROWSPEC
664
mchunkptr mcp=mem2chunk(mem);
666
if(!(is_aligned(chunk2mem(mcp))) && mcp->head != FENCEPOST_HEAD) return 0;
667
if(!cinuse(mcp)) return 0;
668
if(!next_pinuse(mcp)) return 0;
669
if(!is_mmapped(mcp) && !pinuse(mcp))
671
if(next_chunk(prev_chunk(mcp))!=mcp) return 0;
673
fm=get_mstate_for(mcp);
674
if(!ok_magic(fm)) return 0;
675
if(!ok_address(fm, mcp)) return 0;
676
if(!fm->extp) return 0;
677
np=(nedpool *) fm->extp;
679
return np->uservalue;
682
void neddisablethreadcache(nedpool *p) THROWSPEC
688
if(!syspool.threads) InitPool(&syspool, 0, -1);
690
mycache=(int)(size_t) TLSGET(p->mycache);
693
if(TLSSET(p->mycache, (void *)-1)) abort();
697
threadcache *tc=p->caches[mycache-1];
699
printf("Threadcache utilisation: %lf%% in cache with %lf%% lost to other threads\n",
700
100.0*tc->successes/tc->mallocs, 100.0*((double) tc->mallocs-tc->frees)/tc->mallocs);
702
if(TLSSET(p->mycache, (void *)(size_t)(-tc->mymspace))) abort();
704
RemoveCacheEntries(p, tc, 0);
705
assert(!tc->freeInCache);
708
mspace_free(0, p->caches[mycache-1]);
709
p->caches[mycache-1]=0;
713
#define GETMSPACE(m,p,tc,ms,s,action) \
716
mstate m = GetMSpace((p),(tc),(ms),(s)); \
718
RELEASE_LOCK(&m->mutex); \
721
static FORCEINLINE mstate GetMSpace(nedpool *p, threadcache *tc, int mymspace, size_t size) THROWSPEC
723
mstate m=p->m[mymspace];
725
if(!TRY_LOCK(&p->m[mymspace]->mutex)) m=FindMSpace(p, tc, &mymspace, size);\
729
static FORCEINLINE void GetThreadCache(nedpool **p, threadcache **tc, int *mymspace, size_t *size) THROWSPEC
732
if(size && *size<sizeof(threadcacheblk)) *size=sizeof(threadcacheblk);
736
if(!syspool.threads) InitPool(&syspool, 0, -1);
738
mycache=(int)(size_t) TLSGET((*p)->mycache);
741
*tc=(*p)->caches[mycache-1];
742
*mymspace=(*tc)->mymspace;
749
if(TLSSET((*p)->mycache, (void *)-1)) abort();
753
*mymspace=(*tc)->mymspace;
758
*mymspace=-mycache-1;
760
assert(*mymspace>=0);
761
assert((long)(size_t)CURRENT_THREAD==(*tc)->threadid);
762
#ifdef FULLSANITYCHECKS
765
if(*(unsigned int *)"NEDMALC1"!=(*tc)->magic1 || *(unsigned int *)"NEDMALC2"!=(*tc)->magic2)
773
void * nedpmalloc(nedpool *p, size_t size) THROWSPEC
778
GetThreadCache(&p, &tc, &mymspace, &size);
780
if(tc && size<=THREADCACHEMAX)
782
ret=threadcache_malloc(p, tc, &size);
787
GETMSPACE(m, p, tc, mymspace, size,
788
ret=mspace_malloc(m, size));
792
void * nedpcalloc(nedpool *p, size_t no, size_t size) THROWSPEC
794
size_t rsize=size*no;
798
GetThreadCache(&p, &tc, &mymspace, &rsize);
800
if(tc && rsize<=THREADCACHEMAX)
802
if((ret=threadcache_malloc(p, tc, &rsize)))
803
memset(ret, 0, rsize);
808
GETMSPACE(m, p, tc, mymspace, rsize,
809
ret=mspace_calloc(m, 1, rsize));
813
void * nedprealloc(nedpool *p, void *mem, size_t size) THROWSPEC
818
if(!mem) return nedpmalloc(p, size);
819
GetThreadCache(&p, &tc, &mymspace, &size);
821
if(tc && size && size<=THREADCACHEMAX)
823
size_t memsize=nedblksize(mem);
825
if((ret=threadcache_malloc(p, tc, &size)))
827
memcpy(ret, mem, memsize<size ? memsize : size);
828
if(memsize<=THREADCACHEMAX)
829
threadcache_free(p, tc, mymspace, mem, memsize);
838
ret=mspace_realloc(0, mem, size);
842
void nedpfree(nedpool *p, void *mem) THROWSPEC
849
GetThreadCache(&p, &tc, &mymspace, 0);
851
memsize=nedblksize(mem);
853
if(mem && tc && memsize<=(THREADCACHEMAX+CHUNK_OVERHEAD))
854
threadcache_free(p, tc, mymspace, mem, memsize);
859
void * nedpmemalign(nedpool *p, size_t alignment, size_t bytes) THROWSPEC
864
GetThreadCache(&p, &tc, &mymspace, &bytes);
866
GETMSPACE(m, p, tc, mymspace, bytes,
867
ret=mspace_memalign(m, alignment, bytes));
872
struct mallinfo nedpmallinfo(nedpool *p) THROWSPEC
875
struct mallinfo ret={0};
876
if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
877
for(n=0; p->m[n]; n++)
879
struct mallinfo t=mspace_mallinfo(p->m[n]);
881
ret.ordblks+=t.ordblks;
882
ret.hblkhd+=t.hblkhd;
883
ret.usmblks+=t.usmblks;
884
ret.uordblks+=t.uordblks;
885
ret.fordblks+=t.fordblks;
886
ret.keepcost+=t.keepcost;
891
int nedpmallopt(nedpool *p, int parno, int value) THROWSPEC
893
return mspace_mallopt(parno, value);
895
int nedpmalloc_trim(nedpool *p, size_t pad) THROWSPEC
898
if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
899
for(n=0; p->m[n]; n++)
901
ret+=mspace_trim(p->m[n], pad);
905
void nedpmalloc_stats(nedpool *p) THROWSPEC
908
if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
909
for(n=0; p->m[n]; n++)
911
mspace_malloc_stats(p->m[n]);
914
size_t nedpmalloc_footprint(nedpool *p) THROWSPEC
918
if(!p) { p=&syspool; if(!syspool.threads) InitPool(&syspool, 0, -1); }
919
for(n=0; p->m[n]; n++)
921
ret+=mspace_footprint(p->m[n]);
925
void **nedpindependent_calloc(nedpool *p, size_t elemsno, size_t elemsize, void **chunks) THROWSPEC
930
GetThreadCache(&p, &tc, &mymspace, &elemsize);
931
GETMSPACE(m, p, tc, mymspace, elemsno*elemsize,
932
ret=mspace_independent_calloc(m, elemsno, elemsize, chunks));
935
void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void **chunks) THROWSPEC
940
size_t i, *adjustedsizes=(size_t *) alloca(elems*sizeof(size_t));
941
if(!adjustedsizes) return 0;
942
for(i=0; i<elems; i++)
943
adjustedsizes[i]=sizes[i]<sizeof(threadcacheblk) ? sizeof(threadcacheblk) : sizes[i];
944
GetThreadCache(&p, &tc, &mymspace, 0);
945
GETMSPACE(m, p, tc, mymspace, 0,
946
ret=mspace_independent_comalloc(m, elems, adjustedsizes, chunks));
950
#if defined(__cplusplus)