1 /*
2     Copyright (c) 2005-2020 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 /* to prevent loading dynamic TBBmalloc at startup, that is not needed
18    for the whitebox test */
19 #define __TBB_SOURCE_DIRECTLY_INCLUDED 1
20 
21 // According to C99 standard INTPTR_MIN defined for C++
22 // iff __STDC_LIMIT_MACROS pre-defined
23 #define __STDC_LIMIT_MACROS 1
24 
25 #define HARNESS_TBBMALLOC_THREAD_SHUTDOWN 1
26 
27 #include "harness.h"
28 #include "harness_barrier.h"
29 
30 // To not depends on ITT support stuff
31 #ifdef DO_ITT_NOTIFY
32 #undef DO_ITT_NOTIFY
33 #endif
34 
35 #define __TBB_MALLOC_WHITEBOX_TEST 1 // to get access to allocator internals
36 // help trigger rare race condition
37 #define WhiteboxTestingYield() (__TBB_Yield(), __TBB_Yield(), __TBB_Yield(), __TBB_Yield())
38 
39 #if __INTEL_COMPILER && __TBB_MIC_OFFLOAD
40 // 2571 is variable has not been declared with compatible "target" attribute
41 // 3218 is class/struct may fail when offloaded because this field is misaligned
42 //         or contains data that is misaligned
43     #pragma warning(push)
44     #pragma warning(disable:2571 3218)
45 #endif
46 #define protected public
47 #define private public
48 #include "../tbbmalloc/frontend.cpp"
49 #undef protected
50 #undef private
51 #if __INTEL_COMPILER && __TBB_MIC_OFFLOAD
52     #pragma warning(pop)
53 #endif
54 #include "../tbbmalloc/backend.cpp"
55 #include "../tbbmalloc/backref.cpp"
56 
57 namespace tbbmalloc_whitebox {
58     size_t locGetProcessed = 0;
59     size_t locPutProcessed = 0;
60 }
61 #include "../tbbmalloc/large_objects.cpp"
62 #include "../tbbmalloc/tbbmalloc.cpp"
63 
64 const int LARGE_MEM_SIZES_NUM = 10;
65 
66 class AllocInfo {
67     int *p;
68     int val;
69     int size;
70 public:
AllocInfo()71     AllocInfo() : p(NULL), val(0), size(0) {}
AllocInfo(int sz)72     explicit AllocInfo(int sz) : p((int*)scalable_malloc(sz*sizeof(int))),
73                                    val(rand()), size(sz) {
74         ASSERT(p, NULL);
75         for (int k=0; k<size; k++)
76             p[k] = val;
77     }
check() const78     void check() const {
79         for (int k=0; k<size; k++)
80             ASSERT(p[k] == val, NULL);
81     }
clear()82     void clear() {
83         scalable_free(p);
84     }
85 };
86 
87 class SimpleBarrier: NoAssign {
88 protected:
89     static Harness::SpinBarrier barrier;
90 public:
initBarrier(unsigned thrds)91     static void initBarrier(unsigned thrds) { barrier.initialize(thrds); }
92 };
93 
94 Harness::SpinBarrier SimpleBarrier::barrier;
95 
96 class TestLargeObjCache: public SimpleBarrier {
97 public:
98     static int largeMemSizes[LARGE_MEM_SIZES_NUM];
99 
TestLargeObjCache()100     TestLargeObjCache( ) {}
101 
operator ()(int) const102     void operator()( int /*mynum*/ ) const {
103         AllocInfo allocs[LARGE_MEM_SIZES_NUM];
104 
105         // push to maximal cache limit
106         for (int i=0; i<2; i++) {
107             const int sizes[] = { MByte/sizeof(int),
108                                   (MByte-2*LargeObjectCache::LargeBSProps::CacheStep)/sizeof(int) };
109             for (int q=0; q<2; q++) {
110                 size_t curr = 0;
111                 for (int j=0; j<LARGE_MEM_SIZES_NUM; j++, curr++)
112                     new (allocs+curr) AllocInfo(sizes[q]);
113 
114                 for (size_t j=0; j<curr; j++) {
115                     allocs[j].check();
116                     allocs[j].clear();
117                 }
118             }
119         }
120 
121         barrier.wait();
122 
123         // check caching correctness
124         for (int i=0; i<1000; i++) {
125             size_t curr = 0;
126             for (int j=0; j<LARGE_MEM_SIZES_NUM-1; j++, curr++)
127                 new (allocs+curr) AllocInfo(largeMemSizes[j]);
128 
129             new (allocs+curr)
130                 AllocInfo((int)(4*minLargeObjectSize +
131                                 2*minLargeObjectSize*(1.*rand()/RAND_MAX)));
132             curr++;
133 
134             for (size_t j=0; j<curr; j++) {
135                 allocs[j].check();
136                 allocs[j].clear();
137             }
138         }
139     }
140 };
141 
142 int TestLargeObjCache::largeMemSizes[LARGE_MEM_SIZES_NUM];
143 
TestLargeObjectCache()144 void TestLargeObjectCache()
145 {
146     for (int i=0; i<LARGE_MEM_SIZES_NUM; i++)
147         TestLargeObjCache::largeMemSizes[i] =
148             (int)(minLargeObjectSize + 2*minLargeObjectSize*(1.*rand()/RAND_MAX));
149 
150     for( int p=MaxThread; p>=MinThread; --p ) {
151         TestLargeObjCache::initBarrier( p );
152         NativeParallelFor( p, TestLargeObjCache() );
153     }
154 }
155 
156 #if MALLOC_CHECK_RECURSION
157 
158 class TestStartupAlloc: public SimpleBarrier {
159     struct TestBlock {
160         void *ptr;
161         size_t sz;
162     };
163     static const int ITERS = 100;
164 public:
TestStartupAlloc()165     TestStartupAlloc() {}
operator ()(int) const166     void operator()(int) const {
167         TestBlock blocks1[ITERS], blocks2[ITERS];
168 
169         barrier.wait();
170 
171         for (int i=0; i<ITERS; i++) {
172             blocks1[i].sz = rand() % minLargeObjectSize;
173             blocks1[i].ptr = StartupBlock::allocate(blocks1[i].sz);
174             ASSERT(blocks1[i].ptr && StartupBlock::msize(blocks1[i].ptr)>=blocks1[i].sz
175                    && 0==(uintptr_t)blocks1[i].ptr % sizeof(void*), NULL);
176             memset(blocks1[i].ptr, i, blocks1[i].sz);
177         }
178         for (int i=0; i<ITERS; i++) {
179             blocks2[i].sz = rand() % minLargeObjectSize;
180             blocks2[i].ptr = StartupBlock::allocate(blocks2[i].sz);
181             ASSERT(blocks2[i].ptr && StartupBlock::msize(blocks2[i].ptr)>=blocks2[i].sz
182                    && 0==(uintptr_t)blocks2[i].ptr % sizeof(void*), NULL);
183             memset(blocks2[i].ptr, i, blocks2[i].sz);
184 
185             for (size_t j=0; j<blocks1[i].sz; j++)
186                 ASSERT(*((char*)blocks1[i].ptr+j) == i, NULL);
187             Block *block = (Block *)alignDown(blocks1[i].ptr, slabSize);
188             ((StartupBlock *)block)->free(blocks1[i].ptr);
189         }
190         for (int i=ITERS-1; i>=0; i--) {
191             for (size_t j=0; j<blocks2[i].sz; j++)
192                 ASSERT(*((char*)blocks2[i].ptr+j) == i, NULL);
193             Block *block = (Block *)alignDown(blocks2[i].ptr, slabSize);
194             ((StartupBlock *)block)->free(blocks2[i].ptr);
195         }
196     }
197 };
198 
199 #endif /* MALLOC_CHECK_RECURSION */
200 
201 #include <deque>
202 
203 template<int ITERS>
204 class BackRefWork: NoAssign {
205     struct TestBlock {
206         BackRefIdx idx;
207         char       data;
TestBlockBackRefWork::TestBlock208         TestBlock(BackRefIdx idx_) : idx(idx_) {}
209     };
210 public:
BackRefWork()211     BackRefWork() {}
operator ()(int) const212     void operator()(int) const {
213         size_t cnt;
214         // it's important to not invalidate pointers to the contents of the container
215         std::deque<TestBlock> blocks;
216 
217         // for ITERS==0 consume all available backrefs
218         for (cnt=0; !ITERS || cnt<ITERS; cnt++) {
219             BackRefIdx idx = BackRefIdx::newBackRef(/*largeObj=*/false);
220             if (idx.isInvalid())
221                 break;
222             blocks.push_back(TestBlock(idx));
223             setBackRef(blocks.back().idx, &blocks.back().data);
224         }
225         for (size_t i=0; i<cnt; i++)
226             ASSERT((Block*)&blocks[i].data == getBackRef(blocks[i].idx), NULL);
227         for (size_t i=cnt; i>0; i--)
228             removeBackRef(blocks[i-1].idx);
229     }
230 };
231 
232 class LocalCachesHit: NoAssign {
233     // set ITERS to trigger possible leak of backreferences
234     // during cleanup on cache overflow and on thread termination
235     static const int ITERS = 2*(FreeBlockPool::POOL_HIGH_MARK +
236                                 LocalLOC::LOC_HIGH_MARK);
237 public:
LocalCachesHit()238     LocalCachesHit() {}
operator ()(int) const239     void operator()(int) const {
240         void *objsSmall[ITERS], *objsLarge[ITERS];
241 
242         for (int i=0; i<ITERS; i++) {
243             objsSmall[i] = scalable_malloc(minLargeObjectSize-1);
244             objsLarge[i] = scalable_malloc(minLargeObjectSize);
245         }
246         for (int i=0; i<ITERS; i++) {
247             scalable_free(objsSmall[i]);
248             scalable_free(objsLarge[i]);
249         }
250     }
251 };
252 
allocatedBackRefCount()253 static size_t allocatedBackRefCount()
254 {
255     size_t cnt = 0;
256     for (int i=0; i<=backRefMaster->lastUsed; i++)
257         cnt += backRefMaster->backRefBl[i]->allocatedCount;
258     return cnt;
259 }
260 
261 class TestInvalidBackrefs: public SimpleBarrier {
262 #if __ANDROID__
263     // Android requires lower iters due to lack of virtual memory.
264     static const int BACKREF_GROWTH_ITERS = 50*1024;
265 #else
266     static const int BACKREF_GROWTH_ITERS = 200*1024;
267 #endif
268 
269     static tbb::atomic<bool> backrefGrowthDone;
270     static void *ptrs[BACKREF_GROWTH_ITERS];
271 public:
TestInvalidBackrefs()272     TestInvalidBackrefs() {}
operator ()(int id) const273     void operator()(int id) const {
274 
275         if (!id) {
276             backrefGrowthDone = false;
277             barrier.wait();
278 
279             for (int i=0; i<BACKREF_GROWTH_ITERS; i++)
280                 ptrs[i] = scalable_malloc(minLargeObjectSize);
281             backrefGrowthDone = true;
282             for (int i=0; i<BACKREF_GROWTH_ITERS; i++)
283                 scalable_free(ptrs[i]);
284         } else {
285             void *p2 = scalable_malloc(minLargeObjectSize-1);
286             char *p1 = (char*)scalable_malloc(minLargeObjectSize-1);
287             LargeObjectHdr *hdr =
288                 (LargeObjectHdr*)(p1+minLargeObjectSize-1 - sizeof(LargeObjectHdr));
289             hdr->backRefIdx.master = 7;
290             hdr->backRefIdx.largeObj = 1;
291             hdr->backRefIdx.offset = 2000;
292 
293             barrier.wait();
294 
295             while (!backrefGrowthDone) {
296                 scalable_free(p2);
297                 p2 = scalable_malloc(minLargeObjectSize-1);
298             }
299             scalable_free(p1);
300             scalable_free(p2);
301         }
302     }
303 };
304 
305 tbb::atomic<bool> TestInvalidBackrefs::backrefGrowthDone;
306 void *TestInvalidBackrefs::ptrs[BACKREF_GROWTH_ITERS];
307 
TestBackRef()308 void TestBackRef() {
309     size_t beforeNumBackRef, afterNumBackRef;
310 
311     beforeNumBackRef = allocatedBackRefCount();
312     for( int p=MaxThread; p>=MinThread; --p )
313         NativeParallelFor( p, BackRefWork<2*BR_MAX_CNT+2>() );
314     afterNumBackRef = allocatedBackRefCount();
315     ASSERT(beforeNumBackRef==afterNumBackRef, "backreference leak detected");
316 
317     // lastUsed marks peak resource consumption. As we allocate below the mark,
318     // it must not move up, otherwise there is a resource leak.
319     int sustLastUsed = backRefMaster->lastUsed;
320     NativeParallelFor( 1, BackRefWork<2*BR_MAX_CNT+2>() );
321     ASSERT(sustLastUsed == backRefMaster->lastUsed, "backreference leak detected");
322 
323     // check leak of back references while per-thread caches are in use
324     // warm up needed to cover bootStrapMalloc call
325     NativeParallelFor( 1, LocalCachesHit() );
326     beforeNumBackRef = allocatedBackRefCount();
327     NativeParallelFor( 2, LocalCachesHit() );
328     int res = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, NULL);
329     ASSERT(res == TBBMALLOC_OK, NULL);
330     afterNumBackRef = allocatedBackRefCount();
331     ASSERT(beforeNumBackRef>=afterNumBackRef, "backreference leak detected");
332 
333     // This is a regression test against race condition between backreference
334     // extension and checking invalid BackRefIdx.
335     // While detecting is object large or small, scalable_free 1st check for
336     // large objects, so there is a chance to prepend small object with
337     // seems valid BackRefIdx for large objects, and thus trigger the bug.
338     TestInvalidBackrefs::initBarrier(MaxThread);
339     NativeParallelFor( MaxThread, TestInvalidBackrefs() );
340     // Consume all available backrefs and check they work correctly.
341     // For now test 32-bit machines only, because for 64-bit memory consumption is too high.
342     if (sizeof(uintptr_t) == 4)
343         NativeParallelFor( MaxThread, BackRefWork<0>() );
344 }
345 
getMem(intptr_t,size_t & bytes)346 void *getMem(intptr_t /*pool_id*/, size_t &bytes)
347 {
348     const size_t BUF_SIZE = 8*1024*1024;
349     static char space[BUF_SIZE];
350     static size_t pos;
351 
352     if (pos + bytes > BUF_SIZE)
353         return NULL;
354 
355     void *ret = space + pos;
356     pos += bytes;
357 
358     return ret;
359 }
360 
putMem(intptr_t,void *,size_t)361 int putMem(intptr_t /*pool_id*/, void* /*raw_ptr*/, size_t /*raw_bytes*/)
362 {
363     return 0;
364 }
365 
366 struct MallocPoolHeader {
367     void  *rawPtr;
368     size_t userSize;
369 };
370 
getMallocMem(intptr_t,size_t & bytes)371 void *getMallocMem(intptr_t /*pool_id*/, size_t &bytes)
372 {
373     void *rawPtr = malloc(bytes+sizeof(MallocPoolHeader));
374     void *ret = (void *)((uintptr_t)rawPtr+sizeof(MallocPoolHeader));
375 
376     MallocPoolHeader *hdr = (MallocPoolHeader*)ret-1;
377     hdr->rawPtr = rawPtr;
378     hdr->userSize = bytes;
379 
380     return ret;
381 }
382 
putMallocMem(intptr_t,void * ptr,size_t bytes)383 int putMallocMem(intptr_t /*pool_id*/, void *ptr, size_t bytes)
384 {
385     MallocPoolHeader *hdr = (MallocPoolHeader*)ptr-1;
386     ASSERT(bytes == hdr->userSize, "Invalid size in pool callback.");
387     free(hdr->rawPtr);
388 
389     return 0;
390 }
391 
392 class StressLOCacheWork: NoAssign {
393     rml::MemoryPool *my_mallocPool;
394 public:
StressLOCacheWork(rml::MemoryPool * mallocPool)395     StressLOCacheWork(rml::MemoryPool *mallocPool) : my_mallocPool(mallocPool) {}
operator ()(int) const396     void operator()(int) const {
397         for (size_t sz=minLargeObjectSize; sz<1*1024*1024;
398              sz+=LargeObjectCache::LargeBSProps::CacheStep) {
399             void *ptr = pool_malloc(my_mallocPool, sz);
400             ASSERT(ptr, "Memory was not allocated");
401             memset(ptr, sz, sz);
402             pool_free(my_mallocPool, ptr);
403         }
404     }
405 };
406 
TestPools()407 void TestPools() {
408     rml::MemPoolPolicy pol(getMem, putMem);
409     size_t beforeNumBackRef, afterNumBackRef;
410 
411     rml::MemoryPool *pool1;
412     rml::MemoryPool *pool2;
413     pool_create_v1(0, &pol, &pool1);
414     pool_create_v1(0, &pol, &pool2);
415     pool_destroy(pool1);
416     pool_destroy(pool2);
417 
418     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, NULL);
419     beforeNumBackRef = allocatedBackRefCount();
420     rml::MemoryPool *fixedPool;
421 
422     pool_create_v1(0, &pol, &fixedPool);
423     pol.pAlloc = getMallocMem;
424     pol.pFree = putMallocMem;
425     pol.granularity = 8;
426     rml::MemoryPool *mallocPool;
427 
428     pool_create_v1(0, &pol, &mallocPool);
429 /* check that large object cache (LOC) returns correct size for cached objects
430    passBackendSz Byte objects are cached in LOC, but bypassed the backend, so
431    memory requested directly from allocation callback.
432    nextPassBackendSz Byte objects must fit to another LOC bin,
433    so that their allocation/realeasing leads to cache cleanup.
434    All this is expecting to lead to releasing of passBackendSz Byte object
435    from LOC during LOC cleanup, and putMallocMem checks that returned size
436    is correct.
437 */
438     const size_t passBackendSz = Backend::maxBinned_HugePage+1,
439         anotherLOCBinSz = minLargeObjectSize+1;
440     for (int i=0; i<10; i++) { // run long enough to be cached
441         void *p = pool_malloc(mallocPool, passBackendSz);
442         ASSERT(p, "Memory was not allocated");
443         pool_free(mallocPool, p);
444     }
445     // run long enough to passBackendSz allocation was cleaned from cache
446     // and returned back to putMallocMem for size checking
447     for (int i=0; i<1000; i++) {
448         void *p = pool_malloc(mallocPool, anotherLOCBinSz);
449         ASSERT(p, "Memory was not allocated");
450         pool_free(mallocPool, p);
451     }
452 
453     void *smallObj =  pool_malloc(fixedPool, 10);
454     ASSERT(smallObj, "Memory was not allocated");
455     memset(smallObj, 1, 10);
456     void *ptr = pool_malloc(fixedPool, 1024);
457     ASSERT(ptr, "Memory was not allocated");
458     memset(ptr, 1, 1024);
459     void *largeObj = pool_malloc(fixedPool, minLargeObjectSize);
460     ASSERT(largeObj, "Memory was not allocated");
461     memset(largeObj, 1, minLargeObjectSize);
462     ptr = pool_malloc(fixedPool, minLargeObjectSize);
463     ASSERT(ptr, "Memory was not allocated");
464     memset(ptr, minLargeObjectSize, minLargeObjectSize);
465     pool_malloc(fixedPool, 10*minLargeObjectSize); // no leak for unsuccessful allocations
466     pool_free(fixedPool, smallObj);
467     pool_free(fixedPool, largeObj);
468 
469     // provoke large object cache cleanup and hope no leaks occurs
470     for( int p=MaxThread; p>=MinThread; --p )
471         NativeParallelFor( p, StressLOCacheWork(mallocPool) );
472     pool_destroy(mallocPool);
473     pool_destroy(fixedPool);
474 
475     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, NULL);
476     afterNumBackRef = allocatedBackRefCount();
477     ASSERT(beforeNumBackRef==afterNumBackRef, "backreference leak detected");
478 
479     {
480         // test usedSize/cachedSize and LOC bitmask correctness
481         void *p[5];
482         pool_create_v1(0, &pol, &mallocPool);
483         const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc;
484         const int LargeCacheStep = LargeObjectCache::LargeBSProps::CacheStep;
485         p[3] = pool_malloc(mallocPool, minLargeObjectSize+2*LargeCacheStep);
486         for (int i=0; i<10; i++) {
487             p[0] = pool_malloc(mallocPool, minLargeObjectSize);
488             p[1] = pool_malloc(mallocPool, minLargeObjectSize+LargeCacheStep);
489             pool_free(mallocPool, p[0]);
490             pool_free(mallocPool, p[1]);
491         }
492         ASSERT(loc->getUsedSize(), NULL);
493         pool_free(mallocPool, p[3]);
494         ASSERT(loc->getLOCSize() < 3*(minLargeObjectSize+LargeCacheStep), NULL);
495         const size_t maxLocalLOCSize = LocalLOCImpl<3,30>::getMaxSize();
496         ASSERT(loc->getUsedSize() <= maxLocalLOCSize, NULL);
497         for (int i=0; i<3; i++)
498             p[i] = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep);
499         size_t currUser = loc->getUsedSize();
500         ASSERT(!loc->getLOCSize() && currUser >= 3*(minLargeObjectSize+LargeCacheStep), NULL);
501         p[4] = pool_malloc(mallocPool, minLargeObjectSize+3*LargeCacheStep);
502         ASSERT(loc->getUsedSize() - currUser >= minLargeObjectSize+3*LargeCacheStep, NULL);
503         pool_free(mallocPool, p[4]);
504         ASSERT(loc->getUsedSize() <= currUser+maxLocalLOCSize, NULL);
505         pool_reset(mallocPool);
506         ASSERT(!loc->getLOCSize() && !loc->getUsedSize(), NULL);
507         pool_destroy(mallocPool);
508     }
509     // To test LOC we need bigger lists than released by current LocalLOC
510     //   in production code. Create special LocalLOC.
511     {
512         LocalLOCImpl<2, 20> lLOC;
513         pool_create_v1(0, &pol, &mallocPool);
514         rml::internal::ExtMemoryPool *mPool = &((rml::internal::MemoryPool*)mallocPool)->extMemPool;
515         const LargeObjectCache *loc = &((rml::internal::MemoryPool*)mallocPool)->extMemPool.loc;
516         const int LargeCacheStep = LargeObjectCache::LargeBSProps::CacheStep;
517         for (int i=0; i<22; i++) {
518             void *o = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep);
519             bool ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool);
520             ASSERT(ret, NULL);
521 
522             o = pool_malloc(mallocPool, minLargeObjectSize+i*LargeCacheStep);
523             ret = lLOC.put(((LargeObjectHdr*)o - 1)->memoryBlock, mPool);
524             ASSERT(ret, NULL);
525         }
526         lLOC.externalCleanup(mPool);
527         ASSERT(!loc->getUsedSize(), NULL);
528 
529         pool_destroy(mallocPool);
530     }
531 }
532 
TestObjectRecognition()533 void TestObjectRecognition() {
534     size_t headersSize = sizeof(LargeMemoryBlock)+sizeof(LargeObjectHdr);
535     unsigned falseObjectSize = 113; // unsigned is the type expected by getObjectSize
536     size_t obtainedSize;
537 
538     ASSERT(sizeof(BackRefIdx)==sizeof(uintptr_t), "Unexpected size of BackRefIdx");
539     ASSERT(getObjectSize(falseObjectSize)!=falseObjectSize, "Error in test: bad choice for false object size");
540 
541     void* mem = scalable_malloc(2*slabSize);
542     ASSERT(mem, "Memory was not allocated");
543     Block* falseBlock = (Block*)alignUp((uintptr_t)mem, slabSize);
544     falseBlock->objectSize = falseObjectSize;
545     char* falseSO = (char*)falseBlock + falseObjectSize*7;
546     ASSERT(alignDown(falseSO, slabSize)==(void*)falseBlock, "Error in test: false object offset is too big");
547 
548     void* bufferLOH = scalable_malloc(2*slabSize + headersSize);
549     ASSERT(bufferLOH, "Memory was not allocated");
550     LargeObjectHdr* falseLO =
551         (LargeObjectHdr*)alignUp((uintptr_t)bufferLOH + headersSize, slabSize);
552     LargeObjectHdr* headerLO = (LargeObjectHdr*)falseLO-1;
553     headerLO->memoryBlock = (LargeMemoryBlock*)bufferLOH;
554     headerLO->memoryBlock->unalignedSize = 2*slabSize + headersSize;
555     headerLO->memoryBlock->objectSize = slabSize + headersSize;
556     headerLO->backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true);
557     setBackRef(headerLO->backRefIdx, headerLO);
558     ASSERT(scalable_msize(falseLO) == slabSize + headersSize,
559            "Error in test: LOH falsification failed");
560     removeBackRef(headerLO->backRefIdx);
561 
562     const int NUM_OF_IDX = BR_MAX_CNT+2;
563     BackRefIdx idxs[NUM_OF_IDX];
564     for (int cnt=0; cnt<2; cnt++) {
565         for (int master = -10; master<10; master++) {
566             falseBlock->backRefIdx.master = (uint16_t)master;
567             headerLO->backRefIdx.master = (uint16_t)master;
568 
569             for (int bl = -10; bl<BR_MAX_CNT+10; bl++) {
570                 falseBlock->backRefIdx.offset = (uint16_t)bl;
571                 headerLO->backRefIdx.offset = (uint16_t)bl;
572 
573                 for (int largeObj = 0; largeObj<2; largeObj++) {
574                     falseBlock->backRefIdx.largeObj = largeObj;
575                     headerLO->backRefIdx.largeObj = largeObj;
576 
577                     obtainedSize = __TBB_malloc_safer_msize(falseSO, NULL);
578                     ASSERT(obtainedSize==0, "Incorrect pointer accepted");
579                     obtainedSize = __TBB_malloc_safer_msize(falseLO, NULL);
580                     ASSERT(obtainedSize==0, "Incorrect pointer accepted");
581                 }
582             }
583         }
584         if (cnt == 1) {
585             for (int i=0; i<NUM_OF_IDX; i++)
586                 removeBackRef(idxs[i]);
587             break;
588         }
589         for (int i=0; i<NUM_OF_IDX; i++) {
590             idxs[i] = BackRefIdx::newBackRef(/*largeObj=*/false);
591             setBackRef(idxs[i], NULL);
592         }
593     }
594     char *smallPtr = (char*)scalable_malloc(falseObjectSize);
595     obtainedSize = __TBB_malloc_safer_msize(smallPtr, NULL);
596     ASSERT(obtainedSize==getObjectSize(falseObjectSize), "Correct pointer not accepted?");
597     scalable_free(smallPtr);
598 
599     obtainedSize = __TBB_malloc_safer_msize(mem, NULL);
600     ASSERT(obtainedSize>=2*slabSize, "Correct pointer not accepted?");
601     scalable_free(mem);
602     scalable_free(bufferLOH);
603 }
604 
605 class TestBackendWork: public SimpleBarrier {
606     struct TestBlock {
607         intptr_t   data;
608         BackRefIdx idx;
609     };
610     static const int ITERS = 20;
611 
612     rml::internal::Backend *backend;
613 public:
TestBackendWork(rml::internal::Backend * bknd)614     TestBackendWork(rml::internal::Backend *bknd) : backend(bknd) {}
operator ()(int) const615     void operator()(int) const {
616         barrier.wait();
617 
618         for (int i=0; i<ITERS; i++) {
619             BlockI *slabBlock = backend->getSlabBlock(1);
620             ASSERT(slabBlock, "Memory was not allocated");
621             uintptr_t prevBlock = (uintptr_t)slabBlock;
622             backend->putSlabBlock(slabBlock);
623 
624             LargeMemoryBlock *largeBlock = backend->getLargeBlock(16*1024);
625             ASSERT(largeBlock, "Memory was not allocated");
626             ASSERT((uintptr_t)largeBlock != prevBlock,
627                     "Large block cannot be reused from slab memory, only in fixed_pool case.");
628             backend->putLargeBlock(largeBlock);
629         }
630     }
631 };
632 
TestBackend()633 void TestBackend()
634 {
635     rml::MemPoolPolicy pol(getMallocMem, putMallocMem);
636     rml::MemoryPool *mPool;
637     pool_create_v1(0, &pol, &mPool);
638     rml::internal::ExtMemoryPool *ePool = &((rml::internal::MemoryPool*)mPool)->extMemPool;
639     rml::internal::Backend *backend = &ePool->backend;
640 
641     for( int p=MaxThread; p>=MinThread; --p ) {
642         // regression test against an race condition in backend synchronization,
643         // triggered only when WhiteboxTestingYield() call yields
644         for (int i=0; i<100; i++) {
645             TestBackendWork::initBarrier(p);
646             NativeParallelFor( p, TestBackendWork(backend) );
647         }
648     }
649 
650     BlockI *block = backend->getSlabBlock(1);
651     ASSERT(block, "Memory was not allocated");
652     backend->putSlabBlock(block);
653 
654     // Checks if the backend increases and decreases the amount of allocated memory when memory is allocated.
655     const size_t memSize0 = backend->getTotalMemSize();
656     LargeMemoryBlock *lmb = backend->getLargeBlock(4*MByte);
657     ASSERT( lmb, ASSERT_TEXT );
658 
659     const size_t memSize1 = backend->getTotalMemSize();
660     ASSERT( (intptr_t)(memSize1-memSize0) >= 4*MByte, "The backend has not increased the amount of using memory." );
661 
662     backend->putLargeBlock(lmb);
663     const size_t memSize2 = backend->getTotalMemSize();
664     ASSERT( memSize2 == memSize0, "The backend has not decreased the amount of using memory." );
665 
666     pool_destroy(mPool);
667 }
668 
TestBitMask()669 void TestBitMask()
670 {
671     BitMaskMin<256> mask;
672 
673     mask.reset();
674     mask.set(10, 1);
675     mask.set(5, 1);
676     mask.set(1, 1);
677     ASSERT(mask.getMinTrue(2) == 5, NULL);
678 
679     mask.reset();
680     mask.set(0, 1);
681     mask.set(64, 1);
682     mask.set(63, 1);
683     mask.set(200, 1);
684     mask.set(255, 1);
685     ASSERT(mask.getMinTrue(0) == 0, NULL);
686     ASSERT(mask.getMinTrue(1) == 63, NULL);
687     ASSERT(mask.getMinTrue(63) == 63, NULL);
688     ASSERT(mask.getMinTrue(64) == 64, NULL);
689     ASSERT(mask.getMinTrue(101) == 200, NULL);
690     ASSERT(mask.getMinTrue(201) == 255, NULL);
691     mask.set(255, 0);
692     ASSERT(mask.getMinTrue(201) == -1, NULL);
693 }
694 
getMemSize()695 size_t getMemSize()
696 {
697     return defaultMemPool->extMemPool.backend.getTotalMemSize();
698 }
699 
700 class CheckNotCached {
701     static size_t memSize;
702 public:
operator ()() const703     void operator() () const {
704         int res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1);
705         ASSERT(res == TBBMALLOC_OK, NULL);
706         if (memSize==(size_t)-1) {
707             memSize = getMemSize();
708         } else {
709             ASSERT(getMemSize() == memSize, NULL);
710             memSize=(size_t)-1;
711         }
712     }
713 };
714 
715 size_t CheckNotCached::memSize = (size_t)-1;
716 
717 class RunTestHeapLimit: public SimpleBarrier {
718 public:
operator ()(int) const719     void operator()( int /*mynum*/ ) const {
720         // Provoke bootstrap heap initialization before recording memory size.
721         // NOTE: The initialization should be processed only with a "large"
722         // object. Since the "small" object allocation lead to blocking of a
723         // slab as an active block and it is impossible to release it with
724         // foreign thread.
725         scalable_free(scalable_malloc(minLargeObjectSize));
726         barrier.wait(CheckNotCached());
727         for (size_t n = minLargeObjectSize; n < 5*1024*1024; n += 128*1024)
728             scalable_free(scalable_malloc(n));
729         barrier.wait(CheckNotCached());
730     }
731 };
732 
TestHeapLimit()733 void TestHeapLimit()
734 {
735     if(!isMallocInitialized()) doInitialization();
736     // tiny limit to stop caching
737     int res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1);
738     ASSERT(res == TBBMALLOC_OK, NULL);
739      // Provoke bootstrap heap initialization before recording memory size.
740     scalable_free(scalable_malloc(8));
741     size_t n, sizeBefore = getMemSize();
742 
743     // Try to provoke call to OS for memory to check that
744     // requests are not fulfilled from caches.
745     // Single call is not enough here because of backend fragmentation.
746     for (n = minLargeObjectSize; n < 10*1024*1024; n += 16*1024) {
747         void *p = scalable_malloc(n);
748         bool leave = (sizeBefore != getMemSize());
749         scalable_free(p);
750         if (leave)
751             break;
752         ASSERT(sizeBefore == getMemSize(), "No caching expected");
753     }
754     ASSERT(n < 10*1024*1024, "scalable_malloc doesn't provoke OS request for memory, "
755            "is some internal cache still used?");
756 
757     for( int p=MaxThread; p>=MinThread; --p ) {
758         RunTestHeapLimit::initBarrier( p );
759         NativeParallelFor( p, RunTestHeapLimit() );
760     }
761     // it's try to match limit as well as set limit, so call here
762     res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 1);
763     ASSERT(res == TBBMALLOC_OK, NULL);
764     size_t m = getMemSize();
765     ASSERT(sizeBefore == m, NULL);
766     // restore default
767     res = scalable_allocation_mode(TBBMALLOC_SET_SOFT_HEAP_LIMIT, 0);
768     ASSERT(res == TBBMALLOC_OK, NULL);
769 }
770 
checkNoHugePages()771 void checkNoHugePages()
772 {
773     ASSERT(!hugePages.isEnabled, "scalable_allocation_mode "
774            "must have priority over environment variable");
775 }
776 
777 /*---------------------------------------------------------------------------*/
778 // The regression test against bugs in TBBMALLOC_CLEAN_ALL_BUFFERS allocation command.
779 // The idea is to allocate and deallocate a set of objects randomly in parallel.
780 // For large sizes (16K), it forces conflicts in backend during coalescing.
781 // For small sizes (4K), it forces cross-thread deallocations and then orphaned slabs.
782 // Global cleanup should process orphaned slabs and the queue of postponed coalescing
783 // requests, otherwise it will not be able to unmap all unused memory.
784 
785 const int num_allocs = 10*1024;
786 void *ptrs[num_allocs];
787 tbb::atomic<int> alloc_counter;
788 
multiThreadAlloc(size_t alloc_size)789 inline void multiThreadAlloc(size_t alloc_size) {
790     for( int i = alloc_counter++; i < num_allocs; i = alloc_counter++ ) {
791        ptrs[i] = scalable_malloc( alloc_size );
792        ASSERT( ptrs[i] != NULL, "scalable_malloc returned zero." );
793     }
794 }
crossThreadDealloc()795 inline void crossThreadDealloc() {
796     for( int i = --alloc_counter; i >= 0; i = --alloc_counter ) {
797        if (i < num_allocs) scalable_free( ptrs[i] );
798     }
799 }
800 
801 template<int AllocSize>
802 struct TestCleanAllBuffersBody : public SimpleBarrier {
operator ()TestCleanAllBuffersBody803     void operator() ( int ) const {
804         barrier.wait();
805         multiThreadAlloc(AllocSize);
806         barrier.wait();
807         crossThreadDealloc();
808     }
809 };
810 
811 template<int AllocSize>
TestCleanAllBuffers()812 void TestCleanAllBuffers() {
813     const int num_threads = 8;
814     // Clean up if something was allocated before the test
815     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,0);
816 
817     size_t memory_in_use_before = getMemSize();
818     alloc_counter = 0;
819     TestCleanAllBuffersBody<AllocSize>::initBarrier(num_threads);
820 
821     NativeParallelFor(num_threads, TestCleanAllBuffersBody<AllocSize>());
822     // TODO: reproduce the bug conditions more reliably
823     if ( defaultMemPool->extMemPool.backend.coalescQ.blocksToFree == NULL )
824         REMARK( "Warning: The queue of postponed coalescing requests is empty. Unable to create the condition for bug reproduction.\n" );
825     int result = scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,0);
826     ASSERT( result == TBBMALLOC_OK, "The cleanup request has not cleaned anything." );
827     size_t memory_in_use_after = getMemSize();
828 
829     size_t memory_leak = memory_in_use_after - memory_in_use_before;
830     REMARK( "memory_in_use_before = %ld\nmemory_in_use_after = %ld\n", memory_in_use_before, memory_in_use_after );
831     ASSERT( memory_leak == 0, "Cleanup was unable to release all allocated memory." );
832 }
833 
834 //! Force cross thread deallocation of small objects to create a set of privatizable slab blocks.
835 //! TBBMALLOC_CLEAN_THREAD_BUFFERS command have to privatize all the block.
836 struct TestCleanThreadBuffersBody : public SimpleBarrier {
operator ()TestCleanThreadBuffersBody837     void operator() ( int ) const {
838         barrier.wait();
839         multiThreadAlloc(2*1024);
840         barrier.wait();
841         crossThreadDealloc();
842         barrier.wait();
843         int result = scalable_allocation_command(TBBMALLOC_CLEAN_THREAD_BUFFERS,0);
844         ASSERT(result == TBBMALLOC_OK, "Per-thread clean request has not cleaned anything.");
845 
846         // Check that TLS was cleaned fully
847         TLSData *tlsCurr = defaultMemPool->getTLS(/*create=*/false);
848         for (int i = 0; i < numBlockBinLimit; i++) {
849             ASSERT(!(tlsCurr->bin[i].activeBlk), "Some bin was not cleaned.");
850         }
851         ASSERT(!(tlsCurr->lloc.head), "Local LOC was not cleaned.");
852         ASSERT(!(tlsCurr->freeSlabBlocks.head), "Free Block pool was not cleaned.");
853     }
854 };
855 
TestCleanThreadBuffers()856 void TestCleanThreadBuffers() {
857     const int num_threads = 8;
858     // Clean up if something was allocated before the test
859     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS,0);
860 
861     alloc_counter = 0;
862     TestCleanThreadBuffersBody::initBarrier(num_threads);
863     NativeParallelFor(num_threads, TestCleanThreadBuffersBody());
864 }
865 
866 /*---------------------------------------------------------------------------*/
867 /*------------------------- Large Object Cache tests ------------------------*/
868 #if _MSC_VER==1600 || _MSC_VER==1500
869     // ignore C4275: non dll-interface class 'stdext::exception' used as
870     // base for dll-interface class 'std::bad_cast'
871     #pragma warning (disable: 4275)
872 #endif
873 #include <vector>
874 #include <list>
875 #include __TBB_STD_SWAP_HEADER
876 
877 // default constructor of CacheBin
878 template<typename Props>
CacheBin()879 rml::internal::LargeObjectCacheImpl<Props>::CacheBin::CacheBin() {}
880 
881 template<typename Props>
882 class CacheBinModel {
883 
884     typedef typename rml::internal::LargeObjectCacheImpl<Props>::CacheBin CacheBinType;
885 
886     // The emulated cache bin.
887     CacheBinType cacheBinModel;
888     // The reference to real cache bin inside the large object cache.
889     CacheBinType &cacheBin;
890 
891     const size_t size;
892 
893     // save only current time
894     std::list<uintptr_t> objects;
895 
doCleanup()896     void doCleanup() {
897         if ( cacheBinModel.cachedSize > Props::TooLargeFactor*cacheBinModel.usedSize ) tooLargeLOC++;
898         else tooLargeLOC = 0;
899 
900         if (tooLargeLOC>3 && cacheBinModel.ageThreshold)
901             cacheBinModel.ageThreshold = (cacheBinModel.ageThreshold + cacheBinModel.meanHitRange)/2;
902 
903         uintptr_t currTime = cacheCurrTime;
904         while (!objects.empty() && (intptr_t)(currTime - objects.front()) > cacheBinModel.ageThreshold) {
905             cacheBinModel.cachedSize -= size;
906             cacheBinModel.lastCleanedAge = objects.front();
907             objects.pop_front();
908         }
909 
910         cacheBinModel.oldest = objects.empty() ? 0 : objects.front();
911     }
912 
913 public:
CacheBinModel(CacheBinType & _cacheBin,size_t allocSize)914     CacheBinModel(CacheBinType &_cacheBin, size_t allocSize) : cacheBin(_cacheBin), size(allocSize) {
915         cacheBinModel.oldest = cacheBin.oldest;
916         cacheBinModel.lastCleanedAge = cacheBin.lastCleanedAge;
917         cacheBinModel.ageThreshold = cacheBin.ageThreshold;
918         cacheBinModel.usedSize = cacheBin.usedSize;
919         cacheBinModel.cachedSize = cacheBin.cachedSize;
920         cacheBinModel.meanHitRange = cacheBin.meanHitRange;
921         cacheBinModel.lastGet = cacheBin.lastGet;
922     }
get()923     void get() {
924         uintptr_t currTime = ++cacheCurrTime;
925 
926         if ( objects.empty() ) {
927             const uintptr_t sinceLastGet = currTime - cacheBinModel.lastGet;
928             if ( ( cacheBinModel.ageThreshold && sinceLastGet > Props::LongWaitFactor*cacheBinModel.ageThreshold ) ||
929                  ( cacheBinModel.lastCleanedAge && sinceLastGet > Props::LongWaitFactor*(cacheBinModel.lastCleanedAge - cacheBinModel.lastGet) ) )
930                 cacheBinModel.lastCleanedAge = cacheBinModel.ageThreshold = 0;
931 
932             if (cacheBinModel.lastCleanedAge)
933                 cacheBinModel.ageThreshold = Props::OnMissFactor*(currTime - cacheBinModel.lastCleanedAge);
934         } else {
935             uintptr_t obj_age = objects.back();
936             objects.pop_back();
937             if ( objects.empty() ) cacheBinModel.oldest = 0;
938 
939             intptr_t hitRange = currTime - obj_age;
940             cacheBinModel.meanHitRange = cacheBinModel.meanHitRange? (cacheBinModel.meanHitRange + hitRange)/2 : hitRange;
941 
942             cacheBinModel.cachedSize -= size;
943         }
944 
945         cacheBinModel.usedSize += size;
946         cacheBinModel.lastGet = currTime;
947 
948         if ( currTime % rml::internal::cacheCleanupFreq == 0 ) doCleanup();
949     }
950 
putList(int num)951     void putList( int num ) {
952         uintptr_t currTime = cacheCurrTime;
953         cacheCurrTime += num;
954 
955         cacheBinModel.usedSize -= num*size;
956 
957         bool cleanUpNeeded = false;
958         if ( !cacheBinModel.lastCleanedAge ) {
959             cacheBinModel.lastCleanedAge = ++currTime;
960             cleanUpNeeded |= currTime % rml::internal::cacheCleanupFreq == 0;
961             num--;
962         }
963 
964         for ( int i=1; i<=num; ++i ) {
965             currTime+=1;
966             cleanUpNeeded |= currTime % rml::internal::cacheCleanupFreq == 0;
967             if ( objects.empty() )
968                 cacheBinModel.oldest = currTime;
969             objects.push_back(currTime);
970         }
971 
972         cacheBinModel.cachedSize += num*size;
973 
974         if ( cleanUpNeeded ) doCleanup();
975     }
976 
check()977     void check() {
978         ASSERT(cacheBinModel.oldest == cacheBin.oldest, ASSERT_TEXT);
979         ASSERT(cacheBinModel.lastCleanedAge == cacheBin.lastCleanedAge, ASSERT_TEXT);
980         ASSERT(cacheBinModel.ageThreshold == cacheBin.ageThreshold, ASSERT_TEXT);
981         ASSERT(cacheBinModel.usedSize == cacheBin.usedSize, ASSERT_TEXT);
982         ASSERT(cacheBinModel.cachedSize == cacheBin.cachedSize, ASSERT_TEXT);
983         ASSERT(cacheBinModel.meanHitRange == cacheBin.meanHitRange, ASSERT_TEXT);
984         ASSERT(cacheBinModel.lastGet == cacheBin.lastGet, ASSERT_TEXT);
985     }
986 
987     static uintptr_t cacheCurrTime;
988     static intptr_t tooLargeLOC;
989 };
990 
991 template<typename Props> uintptr_t CacheBinModel<Props>::cacheCurrTime;
992 template<typename Props> intptr_t CacheBinModel<Props>::tooLargeLOC;
993 
994 template <typename Scenario>
LOCModelTester()995 void LOCModelTester() {
996     defaultMemPool->extMemPool.loc.cleanAll();
997     defaultMemPool->extMemPool.loc.reset();
998 
999     const size_t size = 16 * 1024;
1000     const size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr);
1001     const size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment);
1002     const int binIdx = defaultMemPool->extMemPool.loc.largeCache.sizeToIdx( allocationSize );
1003 
1004     CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps>::cacheCurrTime = defaultMemPool->extMemPool.loc.cacheCurrTime;
1005     CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps>::tooLargeLOC = defaultMemPool->extMemPool.loc.largeCache.tooLargeLOC;
1006     CacheBinModel<rml::internal::LargeObjectCache::LargeCacheTypeProps> cacheBinModel(defaultMemPool->extMemPool.loc.largeCache.bin[binIdx], allocationSize);
1007 
1008     Scenario scen;
1009     for (rml::internal::LargeMemoryBlock *lmb = scen.next(); (intptr_t)lmb != (intptr_t)-1; lmb = scen.next()) {
1010         if ( lmb ) {
1011             int num=1;
1012             for (rml::internal::LargeMemoryBlock *curr = lmb; curr->next; curr=curr->next) num+=1;
1013             defaultMemPool->extMemPool.freeLargeObject(lmb);
1014             cacheBinModel.putList(num);
1015         } else {
1016             scen.saveLmb(defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize));
1017             cacheBinModel.get();
1018         }
1019 
1020         cacheBinModel.check();
1021     }
1022 }
1023 
1024 class TestBootstrap {
1025     bool allocating;
1026     std::vector<rml::internal::LargeMemoryBlock*> lmbArray;
1027 public:
TestBootstrap()1028     TestBootstrap() : allocating(true) {}
1029 
next()1030     rml::internal::LargeMemoryBlock* next() {
1031         if ( allocating )
1032             return NULL;
1033         if ( !lmbArray.empty() ) {
1034             rml::internal::LargeMemoryBlock *ret = lmbArray.back();
1035             lmbArray.pop_back();
1036             return ret;
1037         }
1038         return (rml::internal::LargeMemoryBlock*)-1;
1039     }
1040 
saveLmb(rml::internal::LargeMemoryBlock * lmb)1041     void saveLmb( rml::internal::LargeMemoryBlock *lmb ) {
1042         lmb->next = NULL;
1043         lmbArray.push_back(lmb);
1044         if ( lmbArray.size() == 1000 ) allocating = false;
1045     }
1046 };
1047 
1048 class TestRandom {
1049     std::vector<rml::internal::LargeMemoryBlock*> lmbArray;
1050     int numOps;
1051 public:
TestRandom()1052     TestRandom() : numOps(100000) {
1053         srand(1234);
1054     }
1055 
next()1056     rml::internal::LargeMemoryBlock* next() {
1057         if ( numOps-- ) {
1058             if ( lmbArray.empty() || rand() / (RAND_MAX>>1) == 0 )
1059                 return NULL;
1060             size_t ind = rand()%lmbArray.size();
1061             if ( ind != lmbArray.size()-1 ) std::swap(lmbArray[ind],lmbArray[lmbArray.size()-1]);
1062             rml::internal::LargeMemoryBlock *lmb = lmbArray.back();
1063             lmbArray.pop_back();
1064             return lmb;
1065         }
1066         return (rml::internal::LargeMemoryBlock*)-1;
1067     }
1068 
saveLmb(rml::internal::LargeMemoryBlock * lmb)1069     void saveLmb( rml::internal::LargeMemoryBlock *lmb ) {
1070         lmb->next = NULL;
1071         lmbArray.push_back(lmb);
1072     }
1073 };
1074 
1075 class TestCollapsingMallocFree : public SimpleBarrier {
1076 public:
1077     static const int NUM_ALLOCS = 100000;
1078     const int num_threads;
1079 
TestCollapsingMallocFree(int _num_threads)1080     TestCollapsingMallocFree( int _num_threads ) : num_threads(_num_threads) {
1081         initBarrier( num_threads );
1082     }
1083 
operator ()(int) const1084     void operator() ( int ) const {
1085         const size_t size = 16 * 1024;
1086         const size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr);
1087         const size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment);
1088 
1089         barrier.wait();
1090         for ( int i=0; i<NUM_ALLOCS; ++i ) {
1091             defaultMemPool->extMemPool.freeLargeObject(
1092                 defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize) );
1093         }
1094     }
1095 
check()1096     void check() {
1097         ASSERT( tbbmalloc_whitebox::locGetProcessed == tbbmalloc_whitebox::locPutProcessed, ASSERT_TEXT );
1098         ASSERT( tbbmalloc_whitebox::locGetProcessed < num_threads*NUM_ALLOCS, "No one Malloc/Free pair was collapsed." );
1099     }
1100 };
1101 
1102 class TestCollapsingBootstrap : public SimpleBarrier {
1103     class CheckNumAllocs {
1104         const int num_threads;
1105     public:
CheckNumAllocs(int _num_threads)1106         CheckNumAllocs( int _num_threads ) : num_threads(_num_threads) {}
operator ()() const1107         void operator()() const {
1108             ASSERT( tbbmalloc_whitebox::locGetProcessed == num_threads*NUM_ALLOCS, ASSERT_TEXT );
1109             ASSERT( tbbmalloc_whitebox::locPutProcessed == 0, ASSERT_TEXT );
1110         }
1111     };
1112 public:
1113     static const int NUM_ALLOCS = 1000;
1114     const int num_threads;
1115 
TestCollapsingBootstrap(int _num_threads)1116     TestCollapsingBootstrap( int _num_threads ) : num_threads(_num_threads) {
1117         initBarrier( num_threads );
1118     }
1119 
operator ()(int) const1120     void operator() ( int ) const {
1121         const size_t size = 16 * 1024;
1122         size_t headersSize = sizeof(rml::internal::LargeMemoryBlock)+sizeof(rml::internal::LargeObjectHdr);
1123         size_t allocationSize = LargeObjectCache::alignToBin(size+headersSize+rml::internal::largeObjectAlignment);
1124 
1125         barrier.wait();
1126         rml::internal::LargeMemoryBlock *lmbArray[NUM_ALLOCS];
1127         for ( int i=0; i<NUM_ALLOCS; ++i )
1128             lmbArray[i] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize);
1129 
1130         barrier.wait(CheckNumAllocs(num_threads));
1131         for ( int i=0; i<NUM_ALLOCS; ++i )
1132             defaultMemPool->extMemPool.freeLargeObject( lmbArray[i] );
1133     }
1134 
check()1135     void check() {
1136         ASSERT( tbbmalloc_whitebox::locGetProcessed == tbbmalloc_whitebox::locPutProcessed, ASSERT_TEXT );
1137         ASSERT( tbbmalloc_whitebox::locGetProcessed == num_threads*NUM_ALLOCS, ASSERT_TEXT );
1138     }
1139 };
1140 
1141 template <typename Scenario>
LOCCollapsingTester(int num_threads)1142 void LOCCollapsingTester( int num_threads ) {
1143     tbbmalloc_whitebox::locGetProcessed = 0;
1144     tbbmalloc_whitebox::locPutProcessed = 0;
1145     defaultMemPool->extMemPool.loc.cleanAll();
1146     defaultMemPool->extMemPool.loc.reset();
1147 
1148     Scenario scen(num_threads);
1149     NativeParallelFor(num_threads, scen);
1150 
1151     scen.check();
1152 }
1153 
TestLOC()1154 void TestLOC() {
1155     LOCModelTester<TestBootstrap>();
1156     LOCModelTester<TestRandom>();
1157 
1158     const int num_threads = 16;
1159     LOCCollapsingTester<TestCollapsingBootstrap>( num_threads );
1160     if ( num_threads > 1 ) {
1161         REMARK( "num_threads = %d\n", num_threads );
1162         LOCCollapsingTester<TestCollapsingMallocFree>( num_threads );
1163     } else {
1164         REPORT( "Warning: concurrency is too low for TestMallocFreeCollapsing ( num_threads = %d )\n", num_threads );
1165     }
1166 }
1167 /*---------------------------------------------------------------------------*/
1168 
findCacheLine(void * p)1169 void *findCacheLine(void *p) {
1170     return (void*)alignDown((uintptr_t)p, estimatedCacheLineSize);
1171 }
1172 
1173 // test that internals of Block are at expected cache lines
TestSlabAlignment()1174 void TestSlabAlignment() {
1175     const size_t min_sz = 8;
1176     const int space = 2*16*1024; // fill at least 2 slabs
1177     void *pointers[space / min_sz];  // the worst case is min_sz byte object
1178 
1179     for (size_t sz = min_sz; sz <= 64; sz *= 2) {
1180         for (size_t i = 0; i < space/sz; i++) {
1181             pointers[i] = scalable_malloc(sz);
1182             Block *block = (Block *)alignDown(pointers[i], slabSize);
1183             MALLOC_ASSERT(findCacheLine(&block->isFull) != findCacheLine(pointers[i]),
1184                           "A user object must not share a cache line with slab control structures.");
1185             MALLOC_ASSERT(findCacheLine(&block->next) != findCacheLine(&block->nextPrivatizable),
1186                           "GlobalBlockFields and LocalBlockFields must be on different cache lines.");
1187         }
1188         for (size_t i = 0; i < space/sz; i++)
1189             scalable_free(pointers[i]);
1190     }
1191 }
1192 
1193 #include "harness_memory.h"
1194 
1195 // TODO: Consider adding Huge Pages support on macOS (special mmap flag).
1196 // Transparent Huge pages support could be enabled by different system parsing mechanism,
1197 // because there is no /proc/meminfo on macOS
1198 #if __linux__
TestTHP()1199 void TestTHP() {
1200     // Get backend from default memory pool
1201     rml::internal::Backend *backend = &(defaultMemPool->extMemPool.backend);
1202 
1203     // Configure malloc to use huge pages
1204     scalable_allocation_mode(USE_HUGE_PAGES, 1);
1205     MALLOC_ASSERT(hugePages.isEnabled, "Huge pages should be enabled via scalable_allocation_mode");
1206 
1207     const int HUGE_PAGE_SIZE = 2 * 1024 * 1024;
1208 
1209     // allocCount transparent huge pages should be allocated
1210     const int allocCount = 10;
1211 
1212     // Allocate huge page aligned memory regions to track system
1213     // counters for transparent huge pages
1214     void*  allocPtrs[allocCount];
1215 
1216     // Wait for the system to update process memory info files after other tests
1217     Harness::Sleep(4000);
1218 
1219     // Parse system info regarding current THP status
1220     size_t currentSystemTHPCount = getSystemTHPCount();
1221     size_t currentSystemTHPAllocatedSize = getSystemTHPAllocatedSize();
1222 
1223     for (int i = 0; i < allocCount; i++) {
1224         // Allocation size have to be aligned on page size
1225         size_t allocSize = HUGE_PAGE_SIZE - (i * 1000);
1226 
1227         // Map memory
1228         allocPtrs[i] = backend->allocRawMem(allocSize);
1229 
1230         MALLOC_ASSERT(allocPtrs[i], "Allocation not succeeded.");
1231         MALLOC_ASSERT(allocSize == HUGE_PAGE_SIZE,
1232             "Allocation size have to be aligned on Huge Page size internally.");
1233 
1234         // First touch policy - no real pages allocated by OS without accessing the region
1235         memset(allocPtrs[i], 1, allocSize);
1236 
1237         MALLOC_ASSERT(isAligned(allocPtrs[i], HUGE_PAGE_SIZE),
1238             "The pointer returned by scalable_malloc is not aligned on huge page size.");
1239     }
1240 
1241     // Wait for the system to update process memory info files after allocations
1242     Harness::Sleep(4000);
1243 
1244     // Generally, kernel tries to allocate transparent huge pages, but sometimes it cannot do this
1245     // (tested on SLES 11/12), so consider this system info checks as a remark.
1246     // Also, some systems can allocate more memory then needed in background (tested on Ubuntu 14.04)
1247     size_t newSystemTHPCount = getSystemTHPCount();
1248     size_t newSystemTHPAllocatedSize = getSystemTHPAllocatedSize();
1249     if ((newSystemTHPCount - currentSystemTHPCount) < allocCount
1250             && (newSystemTHPAllocatedSize - currentSystemTHPAllocatedSize) / (2 * 1024) < allocCount) {
1251         REPORT( "Warning: the system didn't allocate needed amount of THPs.\n" );
1252     }
1253 
1254     // Test memory unmap
1255     for (int i = 0; i < allocCount; i++) {
1256         MALLOC_ASSERT(backend->freeRawMem(allocPtrs[i], HUGE_PAGE_SIZE),
1257                 "Something went wrong during raw memory free");
1258     }
1259 }
1260 #endif // __linux__
1261 
getStabilizedMemUsage()1262 inline size_t getStabilizedMemUsage() {
1263     for (int i = 0; i < 3; i++) GetMemoryUsage();
1264     return GetMemoryUsage();
1265 }
1266 
reallocAndRetrieve(void * origPtr,size_t reallocSize,size_t & origBlockSize,size_t & reallocBlockSize)1267 inline void* reallocAndRetrieve(void* origPtr, size_t reallocSize, size_t& origBlockSize, size_t& reallocBlockSize) {
1268     rml::internal::LargeMemoryBlock* origLmb = ((rml::internal::LargeObjectHdr *)origPtr - 1)->memoryBlock;
1269     origBlockSize = origLmb->unalignedSize;
1270 
1271     void* reallocPtr = rml::internal::reallocAligned(defaultMemPool, origPtr, reallocSize, 0);
1272 
1273     // Retrieved reallocated block information
1274     rml::internal::LargeMemoryBlock* reallocLmb = ((rml::internal::LargeObjectHdr *)reallocPtr - 1)->memoryBlock;
1275     reallocBlockSize = reallocLmb->unalignedSize;
1276 
1277     return reallocPtr;
1278 }
1279 
TestReallocDecreasing()1280 void TestReallocDecreasing() {
1281 
1282     /* Testing that actual reallocation happens for large objects that do not fit the backend cache
1283        but decrease in size by a factor of >= 2. */
1284 
1285     size_t startSize = 100 * 1024 * 1024;
1286     size_t maxBinnedSize = defaultMemPool->extMemPool.backend.getMaxBinnedSize();
1287     void*  origPtr = scalable_malloc(startSize);
1288     void*  reallocPtr = NULL;
1289 
1290     // Realloc on 1MB less size
1291     size_t origBlockSize = 42;
1292     size_t reallocBlockSize = 43;
1293     reallocPtr = reallocAndRetrieve(origPtr, startSize - 1 * 1024 * 1024, origBlockSize, reallocBlockSize);
1294     MALLOC_ASSERT(origBlockSize == reallocBlockSize, "Reallocated block size shouldn't change");
1295     MALLOC_ASSERT(reallocPtr == origPtr, "Original pointer shouldn't change");
1296 
1297     // Repeated decreasing reallocation while max cache bin size reached
1298     size_t reallocSize = (startSize / 2) - 1000; // exact realloc
1299     while(reallocSize > maxBinnedSize) {
1300 
1301         // Prevent huge/large objects caching
1302         defaultMemPool->extMemPool.loc.cleanAll();
1303         // Prevent local large object caching
1304         TLSData *tls = defaultMemPool->getTLS(/*create=*/false);
1305         tls->lloc.externalCleanup(&defaultMemPool->extMemPool);
1306 
1307         size_t sysMemUsageBefore = getStabilizedMemUsage();
1308         size_t totalMemSizeBefore = defaultMemPool->extMemPool.backend.getTotalMemSize();
1309 
1310         reallocPtr = reallocAndRetrieve(origPtr, reallocSize, origBlockSize, reallocBlockSize);
1311 
1312         MALLOC_ASSERT(origBlockSize > reallocBlockSize, "Reallocated block size should descrease.");
1313 
1314         size_t sysMemUsageAfter = getStabilizedMemUsage();
1315         size_t totalMemSizeAfter = defaultMemPool->extMemPool.backend.getTotalMemSize();
1316 
1317         // Prevent false checking when backend caching occurred or could not read system memory usage info
1318         if (totalMemSizeBefore > totalMemSizeAfter && sysMemUsageAfter != 0 && sysMemUsageBefore != 0) {
1319             MALLOC_ASSERT(sysMemUsageBefore > sysMemUsageAfter, "Memory were not released");
1320         }
1321 
1322         origPtr = reallocPtr;
1323         reallocSize = (reallocSize / 2) - 1000; // exact realloc
1324     }
1325     scalable_free(reallocPtr);
1326 
1327     /* TODO: Decreasing reallocation of large objects that fit backend cache */
1328     /* TODO: Small objects decreasing reallocation test */
1329 }
1330 #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32)
1331 
1332 #include "../src/tbbmalloc/tbb_function_replacement.cpp"
1333 #include <string>
1334 namespace FunctionReplacement {
1335     FunctionInfo funcInfo = { "funcname","dllname" };
1336     char **func_replacement_log;
1337     int status;
1338 
LogCleanup()1339     void LogCleanup() {
1340         // Free all allocated memory
1341         for (unsigned i = 0; i < Log::record_number; i++){
1342             HeapFree(GetProcessHeap(), 0, Log::records[i]);
1343         }
1344         for (unsigned i = 0; i < Log::RECORDS_COUNT + 1; i++){
1345             Log::records[i] = NULL;
1346         }
1347         Log::replacement_status = true;
1348         Log::record_number = 0;
1349     }
1350 
TestEmptyLog()1351     void TestEmptyLog() {
1352         status = TBB_malloc_replacement_log(&func_replacement_log);
1353 
1354         ASSERT(status == -1, "Status is true, but log is empty");
1355         ASSERT(*func_replacement_log == NULL, "Log must be empty");
1356     }
1357 
TestLogOverload()1358     void TestLogOverload() {
1359         for (int i = 0; i < 1000; i++)
1360             Log::record(funcInfo, "opcode string", true);
1361 
1362         status = TBB_malloc_replacement_log(&func_replacement_log);
1363         // Find last record
1364         for (; *(func_replacement_log + 1) != 0; func_replacement_log++) {}
1365 
1366         std::string last_line(*func_replacement_log);
1367         ASSERT(status == 0, "False status, but all functions found");
1368         ASSERT(last_line.compare("Log was truncated.") == 0, "Log overflow was not handled");
1369 
1370         // Change status
1371         Log::record(funcInfo, "opcode string", false);
1372         status = TBB_malloc_replacement_log(NULL);
1373         ASSERT(status == -1, "Status is true, but we have false search case");
1374 
1375         LogCleanup();
1376     }
1377 
TestFalseSearchCase()1378     void TestFalseSearchCase() {
1379         Log::record(funcInfo, "opcode string", false);
1380         std::string expected_line = "Fail: "+ std::string(funcInfo.funcName) + " (" +
1381                          std::string(funcInfo.dllName) + "), byte pattern: <opcode string>";
1382 
1383         status = TBB_malloc_replacement_log(&func_replacement_log);
1384 
1385         ASSERT(expected_line.compare(*func_replacement_log) == 0, "Wrong last string contnent");
1386         ASSERT(status == -1, "Status is true, but we have false search case");
1387         LogCleanup();
1388     }
1389 
TestWrongFunctionInDll()1390     void TestWrongFunctionInDll(){
1391         HMODULE ucrtbase_handle = GetModuleHandle("ucrtbase.dll");
1392         if (ucrtbase_handle) {
1393             IsPrologueKnown("ucrtbase.dll", "fake_function", NULL, ucrtbase_handle);
1394             std::string expected_line = "Fail: fake_function (ucrtbase.dll), byte pattern: <unknown>";
1395 
1396             status = TBB_malloc_replacement_log(&func_replacement_log);
1397 
1398             ASSERT(expected_line.compare(*func_replacement_log) == 0, "Wrong last string contnent");
1399             ASSERT(status == -1, "Status is true, but we have false search case");
1400             LogCleanup();
1401         } else {
1402             REMARK("Cannot found ucrtbase.dll on system, test skipped!\n");
1403         }
1404     }
1405 }
1406 
TesFunctionReplacementLog()1407 void TesFunctionReplacementLog() {
1408     using namespace FunctionReplacement;
1409     // Do not reorder the test cases
1410     TestEmptyLog();
1411     TestLogOverload();
1412     TestFalseSearchCase();
1413     TestWrongFunctionInDll();
1414 }
1415 
1416 #endif /*!__TBB_WIN8UI_SUPPORT && defined(_WIN32)*/
1417 
1418 #include <cmath> // pow function
1419 
1420 // Huge objects cache: Size = MinSize * (2 ^ (Index / StepFactor) formula gives value for the bin size,
1421 // but it is not matched with our sizeToIdx approximation algorithm, where step sizes between major
1422 // (power of 2) sizes are equal. Used internally for the test. Static cast to avoid warnings.
hocIdxToSizeFormula(int idx)1423 inline size_t hocIdxToSizeFormula(int idx) {
1424     return static_cast<size_t>(float(rml::internal::LargeObjectCache::maxLargeSize) *
1425         pow(2, float(idx) / float(rml::internal::LargeObjectCache::HugeBSProps::StepFactor)));
1426 }
1427 // Large objects cache arithmetic progression
locIdxToSizeFormula(int idx)1428 inline size_t locIdxToSizeFormula(int idx) {
1429     return rml::internal::LargeObjectCache::LargeBSProps::MinSize +
1430         (idx * rml::internal::LargeObjectCache::LargeBSProps::CacheStep);
1431 }
1432 
1433 template <typename CacheType>
TestLOCacheBinsConverterImpl(int idx,size_t checkingSize)1434 void TestLOCacheBinsConverterImpl(int idx, size_t checkingSize) {
1435     size_t alignedSize = CacheType::alignToBin(checkingSize);
1436     MALLOC_ASSERT(alignedSize >= checkingSize, "Size is not correctly aligned");
1437     int calcIdx = CacheType::sizeToIdx(alignedSize);
1438     MALLOC_ASSERT(calcIdx == idx, "Index from size calculated not correctly");
1439 }
1440 
TestLOCacheBinsConverter()1441 void TestLOCacheBinsConverter(){
1442     typedef rml::internal::LargeObjectCache::LargeCacheType LargeCacheType;
1443     typedef rml::internal::LargeObjectCache::HugeCacheType HugeCacheType;
1444 
1445     size_t checkingSize = 0;
1446     for (int idx = 0; idx < LargeCacheType::numBins; idx++) {
1447         checkingSize = locIdxToSizeFormula(idx);
1448         TestLOCacheBinsConverterImpl<LargeCacheType>(idx, checkingSize);
1449     }
1450     for (int idx = 0; idx < HugeCacheType::numBins; idx++) {
1451         checkingSize = hocIdxToSizeFormula(idx);
1452         TestLOCacheBinsConverterImpl<HugeCacheType>(idx, checkingSize);
1453     }
1454 }
1455 
1456 struct HOThresholdTester {
1457     LargeObjectCache* loc;
1458     size_t hugeSize;
1459 
1460     static const size_t sieveSize = LargeObjectCache::defaultMaxHugeSize;
1461     // Sieve starts from 64MB (24-th cache bin), enough to check 4 bins radius range
1462     // for decent memory consumption (especially for 32-bit arch)
1463     static const int MIN_BIN_IDX = 20;
1464     static const int MAX_BIN_IDX = 28;
1465 
1466     enum CleanupType {
1467         NO_CLEANUP,
1468         REGULAR_CLEANUP,
1469         HARD_CLEANUP
1470     };
1471 
populateCacheHOThresholdTester1472     void populateCache() {
1473         LargeMemoryBlock* loArray[MAX_BIN_IDX - MIN_BIN_IDX];
1474         // To avoid backend::softCacheCleanup consequences (cleanup by isLOCToolarge),
1475         // firstly allocate all objects and then cache them at once.
1476         // Morevover, just because first cache item will still be dropped from cache because of the lack of history,
1477         // redo allocation 2 times.
1478         for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) {
1479             size_t allocationSize = alignedSizeFromIdx(idx);
1480             int localIdx = idx - MIN_BIN_IDX;
1481             loArray[localIdx] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize);
1482             MALLOC_ASSERT(loArray[localIdx], "Large object was not allocated.");
1483             loc->put(loArray[localIdx]);
1484             loArray[localIdx] = defaultMemPool->extMemPool.mallocLargeObject(defaultMemPool, allocationSize);
1485         }
1486         for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) {
1487             loc->put(loArray[idx - MIN_BIN_IDX]);
1488         }
1489     }
cleanHOThresholdTester1490     void clean(bool all) {
1491         if (all) {
1492             // Should avoid any threshold and clean all bins
1493             loc->cleanAll();
1494         } else {
1495             // Regular cleanup should do nothing for bins above threshold. Decreasing option used
1496             // for the test to be sure that all objects below defaultMaxHugeSize (sieveSize) were cleaned
1497             loc->regularCleanup();
1498             loc->decreasingCleanup();
1499         }
1500     }
checkHOThresholdTester1501     void check(CleanupType type) {
1502         for (int idx = MIN_BIN_IDX; idx < MAX_BIN_IDX; ++idx) {
1503             size_t objectSize = alignedSizeFromIdx(idx);
1504             // Cache object below sieve threshold and above huge object threshold should be cached
1505             // (other should be sieved). Unless all cache is dropped. Regular cleanup drops object only below sieve size.
1506             if (type == NO_CLEANUP && sizeInCacheRange(objectSize)) {
1507                 MALLOC_ASSERT(objectInCacheBin(idx, objectSize), "Object was released from cache, it shouldn't.");
1508             } else if (type == REGULAR_CLEANUP && (objectSize >= hugeSize)) {
1509                 MALLOC_ASSERT(objectInCacheBin(idx, objectSize), "Object was released from cache, it shouldn't.");
1510             } else { // HARD_CLEANUP
1511                 MALLOC_ASSERT(cacheBinEmpty(idx), "Object is still cached.");
1512             }
1513         }
1514     }
1515 
1516 private:
cacheBinEmptyHOThresholdTester1517     bool cacheBinEmpty(int idx) {
1518         return (loc->hugeCache.bin[idx].cachedSize == 0 && loc->hugeCache.bin[idx].get() == NULL);
1519     }
objectInCacheBinHOThresholdTester1520     bool objectInCacheBin(int idx, size_t size) {
1521         return (loc->hugeCache.bin[idx].cachedSize != 0 && loc->hugeCache.bin[idx].cachedSize % size == 0);
1522     }
sizeInCacheRangeHOThresholdTester1523     bool sizeInCacheRange(size_t size) {
1524         return size <= sieveSize || size >= hugeSize;
1525     }
alignedSizeFromIdxHOThresholdTester1526     size_t alignedSizeFromIdx(int idx) {
1527         return rml::internal::LargeObjectCache::alignToBin(hocIdxToSizeFormula(idx));
1528     }
1529 };
1530 
1531 // TBBMALLOC_SET_HUGE_OBJECT_THRESHOLD value should be set before the test,
1532 // through scalable API or env variable
TestHugeSizeThresholdImpl(LargeObjectCache * loc,size_t hugeSize,bool fullTesting)1533 void TestHugeSizeThresholdImpl(LargeObjectCache* loc, size_t hugeSize, bool fullTesting) {
1534     HOThresholdTester test = {loc, hugeSize};
1535     test.populateCache();
1536     // Check the default sieve value
1537     test.check(HOThresholdTester::NO_CLEANUP);
1538 
1539     if(fullTesting) {
1540         // Check that objects above threshold stay in cache after regular cleanup
1541         test.clean(/*all*/false);
1542         test.check(HOThresholdTester::REGULAR_CLEANUP);
1543     }
1544     // Check that all objects dropped from cache after hard cleanup (ignore huge obects threshold)
1545     test.clean(/*all*/true);
1546     test.check(HOThresholdTester::HARD_CLEANUP);
1547     // Restore previous settings
1548     loc->setHugeSizeThreshold(LargeObjectCache::maxHugeSize);
1549     loc->reset();
1550 }
1551 
1552 /*
1553  *  Test for default huge size and behaviour when huge object settings defined
1554  */
TestHugeSizeThreshold()1555 void TestHugeSizeThreshold() {
1556     // Clean up if something was allocated before the test and reset cache state
1557     scalable_allocation_command(TBBMALLOC_CLEAN_ALL_BUFFERS, 0);
1558     LargeObjectCache* loc = &defaultMemPool->extMemPool.loc;
1559     // Restore default settings just in case
1560     loc->setHugeSizeThreshold(LargeObjectCache::maxHugeSize);
1561     loc->reset();
1562     // Firstly check default huge size value (with max huge object threshold).
1563     // Everything that more then this value should be released to OS without caching.
1564     TestHugeSizeThresholdImpl(loc, loc->hugeSizeThreshold, false);
1565     // Then set huge object threshold.
1566     // All objects with sizes after threshold will be released only after the hard cleanup.
1567 #if !__TBB_WIN8UI_SUPPORT
1568     // Unit testing for environment variable
1569     Harness::SetEnv("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD","67108864");
1570     // Large object cache reads threshold environment during initialization.
1571     // Reset the value before the test.
1572     loc->hugeSizeThreshold = 0;
1573     loc->init(&defaultMemPool->extMemPool);
1574     TestHugeSizeThresholdImpl(loc, 64 * MByte, true);
1575 #endif
1576     // Unit testing for scalable_allocation_command
1577     scalable_allocation_mode(TBBMALLOC_SET_HUGE_SIZE_THRESHOLD, 56 * MByte);
1578     TestHugeSizeThresholdImpl(loc, 56 * MByte, true);
1579 }
1580 
TestMain()1581 int TestMain () {
1582     scalable_allocation_mode(USE_HUGE_PAGES, 0);
1583 #if !__TBB_WIN8UI_SUPPORT
1584     Harness::SetEnv("TBB_MALLOC_USE_HUGE_PAGES","yes");
1585 #endif
1586     checkNoHugePages();
1587     // backreference requires that initialization was done
1588     if(!isMallocInitialized()) doInitialization();
1589     checkNoHugePages();
1590     // to succeed, leak detection must be the 1st memory-intensive test
1591     TestBackRef();
1592     TestCleanAllBuffers<4*1024>();
1593     TestCleanAllBuffers<16*1024>();
1594     TestCleanThreadBuffers();
1595     TestPools();
1596     TestBackend();
1597 
1598 #if MALLOC_CHECK_RECURSION
1599     for( int p=MaxThread; p>=MinThread; --p ) {
1600         TestStartupAlloc::initBarrier( p );
1601         NativeParallelFor( p, TestStartupAlloc() );
1602         ASSERT(!firstStartupBlock, "Startup heap memory leak detected");
1603     }
1604 #endif
1605 
1606     TestLargeObjectCache();
1607     TestObjectRecognition();
1608     TestBitMask();
1609     TestHeapLimit();
1610     TestLOC();
1611     TestSlabAlignment();
1612     TestReallocDecreasing();
1613     TestLOCacheBinsConverter();
1614     TestHugeSizeThreshold();
1615 
1616 #if __linux__
1617     if (isTHPEnabledOnMachine()) {
1618         TestTHP();
1619     } else {
1620         REMARK("Transparent Huge Pages is not supported on the system - skipped the test\n");
1621     }
1622 #endif
1623 
1624 #if !__TBB_WIN8UI_SUPPORT && defined(_WIN32)
1625     TesFunctionReplacementLog();
1626 #endif
1627     return Harness::Done;
1628 }
1629 
1630