1 /*
2     Copyright (c) 2005-2020 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #include "tbbmalloc_internal.h"
18 #include "tbb/tbb_environment.h"
19 
20 /******************************* Allocation of large objects *********************************************/
21 
22 namespace rml {
23 namespace internal {
24 
25 /* ---------------------------- Large Object cache init section ---------------------------------------- */
26 
init(ExtMemoryPool * memPool)27 void LargeObjectCache::init(ExtMemoryPool *memPool)
28 {
29     extMemPool = memPool;
30     // scalable_allocation_mode can be called before allocator initialization, respect this manual request
31     if (hugeSizeThreshold == 0) {
32         // Huge size threshold initialization if environment variable was set
33         long requestedThreshold = tbb::internal::GetIntegralEnvironmentVariable("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD");
34         // Read valid env or initialize by default with max possible values
35         if (requestedThreshold != -1) {
36             setHugeSizeThreshold(requestedThreshold);
37         } else {
38             setHugeSizeThreshold(maxHugeSize);
39         }
40     }
41 }
42 
43 /* ----------------------------- Huge size threshold settings ----------------------------------------- */
44 
setHugeSizeThreshold(size_t value)45 void LargeObjectCache::setHugeSizeThreshold(size_t value)
46 {
47     // Valid in the huge cache range: [MaxLargeSize, MaxHugeSize].
48     if (value <= maxHugeSize) {
49         hugeSizeThreshold = value >= maxLargeSize ? alignToBin(value) : maxLargeSize;
50 
51         // Calculate local indexes for the global threshold size (for fast search inside a regular cleanup)
52         largeCache.hugeSizeThresholdIdx = LargeCacheType::numBins;
53         hugeCache.hugeSizeThresholdIdx = HugeCacheType::sizeToIdx(hugeSizeThreshold);
54     }
55 }
56 
sizeInCacheRange(size_t size)57 bool LargeObjectCache::sizeInCacheRange(size_t size)
58 {
59     return size <= maxHugeSize && (size <= defaultMaxHugeSize || size >= hugeSizeThreshold);
60 }
61 
62 /* ----------------------------------------------------------------------------------------------------- */
63 
64 /* The functor called by the aggregator for the operation list */
65 template<typename Props>
66 class CacheBinFunctor {
67     typename LargeObjectCacheImpl<Props>::CacheBin *const bin;
68     ExtMemoryPool *const extMemPool;
69     typename LargeObjectCacheImpl<Props>::BinBitMask *const bitMask;
70     const int idx;
71 
72     LargeMemoryBlock *toRelease;
73     bool needCleanup;
74     uintptr_t currTime;
75 
76     /* Do preprocessing under the operation list. */
77     /* All the OP_PUT_LIST operations are merged in the one operation.
78        All OP_GET operations are merged with the OP_PUT_LIST operations but
79        it demands the update of the moving average value in the bin.
80        Only the last OP_CLEAN_TO_THRESHOLD operation has sense.
81        The OP_CLEAN_ALL operation also should be performed only once.
82        Moreover it cancels the OP_CLEAN_TO_THRESHOLD operation. */
83     class OperationPreprocessor {
84         // TODO: remove the dependency on CacheBin.
85         typename LargeObjectCacheImpl<Props>::CacheBin *const  bin;
86 
87         /* Contains the relative time in the operation list.
88            It counts in the reverse order since the aggregator also
89            provides operations in the reverse order. */
90         uintptr_t lclTime;
91 
92         /* opGet contains only OP_GET operations which cannot be merge with OP_PUT operations
93            opClean contains all OP_CLEAN_TO_THRESHOLD and OP_CLEAN_ALL operations. */
94         CacheBinOperation *opGet, *opClean;
95         /* The time of the last OP_CLEAN_TO_THRESHOLD operations */
96         uintptr_t cleanTime;
97 
98         /* lastGetOpTime - the time of the last OP_GET operation.
99            lastGet - the same meaning as CacheBin::lastGet */
100         uintptr_t lastGetOpTime, lastGet;
101 
102         /* The total sum of all usedSize changes requested with CBOP_UPDATE_USED_SIZE operations. */
103         size_t updateUsedSize;
104 
105         /* The list of blocks for the OP_PUT_LIST operation. */
106         LargeMemoryBlock *head, *tail;
107         int putListNum;
108 
109         /* if the OP_CLEAN_ALL is requested. */
110         bool isCleanAll;
111 
112         inline void commitOperation(CacheBinOperation *op) const;
113         inline void addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const;
114         bool getFromPutList(CacheBinOperation* opGet, uintptr_t currTime);
115         void addToPutList( LargeMemoryBlock *head, LargeMemoryBlock *tail, int num );
116 
117     public:
OperationPreprocessor(typename LargeObjectCacheImpl<Props>::CacheBin * bin)118         OperationPreprocessor(typename LargeObjectCacheImpl<Props>::CacheBin *bin) :
119             bin(bin), lclTime(0), opGet(NULL), opClean(NULL), cleanTime(0),
120             lastGetOpTime(0), updateUsedSize(0), head(NULL), isCleanAll(false)  {}
121         void operator()(CacheBinOperation* opList);
getTimeRange() const122         uintptr_t getTimeRange() const { return -lclTime; }
123 
124         friend class CacheBinFunctor;
125     };
126 
127 public:
CacheBinFunctor(typename LargeObjectCacheImpl<Props>::CacheBin * bin,ExtMemoryPool * extMemPool,typename LargeObjectCacheImpl<Props>::BinBitMask * bitMask,int idx)128     CacheBinFunctor(typename LargeObjectCacheImpl<Props>::CacheBin *bin, ExtMemoryPool *extMemPool,
129                     typename LargeObjectCacheImpl<Props>::BinBitMask *bitMask, int idx) :
130         bin(bin), extMemPool(extMemPool), bitMask(bitMask), idx(idx), toRelease(NULL), needCleanup(false) {}
131     void operator()(CacheBinOperation* opList);
132 
isCleanupNeeded() const133     bool isCleanupNeeded() const { return needCleanup; }
getToRelease() const134     LargeMemoryBlock *getToRelease() const { return toRelease; }
getCurrTime() const135     uintptr_t getCurrTime() const { return currTime; }
136 };
137 
138 /* ---------------- Cache Bin Aggregator Operation Helpers ---------------- */
139 
140 // The list of structures which describe the operation data
141 struct OpGet {
142     static const CacheBinOperationType type = CBOP_GET;
143     LargeMemoryBlock **res;
144     size_t size;
145     uintptr_t currTime;
146 };
147 
148 struct OpPutList {
149     static const CacheBinOperationType type = CBOP_PUT_LIST;
150     LargeMemoryBlock *head;
151 };
152 
153 struct OpCleanToThreshold {
154     static const CacheBinOperationType type = CBOP_CLEAN_TO_THRESHOLD;
155     LargeMemoryBlock **res;
156     uintptr_t currTime;
157 };
158 
159 struct OpCleanAll {
160     static const CacheBinOperationType type = CBOP_CLEAN_ALL;
161     LargeMemoryBlock **res;
162 };
163 
164 struct OpUpdateUsedSize {
165     static const CacheBinOperationType type = CBOP_UPDATE_USED_SIZE;
166     size_t size;
167 };
168 
169 union CacheBinOperationData {
170 private:
171     OpGet opGet;
172     OpPutList opPutList;
173     OpCleanToThreshold opCleanToThreshold;
174     OpCleanAll opCleanAll;
175     OpUpdateUsedSize opUpdateUsedSize;
176 };
177 
178 // Forward declarations
179 template <typename OpTypeData> OpTypeData& opCast(CacheBinOperation &op);
180 
181 // Describes the aggregator operation
182 struct CacheBinOperation : public MallocAggregatedOperation<CacheBinOperation>::type {
183     CacheBinOperationType type;
184 
185     template <typename OpTypeData>
CacheBinOperationrml::internal::CacheBinOperation186     CacheBinOperation(OpTypeData &d, CacheBinOperationStatus st = CBST_WAIT) {
187         opCast<OpTypeData>(*this) = d;
188         type = OpTypeData::type;
189         MallocAggregatedOperation<CacheBinOperation>::type::status = st;
190     }
191 private:
192     CacheBinOperationData data;
193 
194     template <typename OpTypeData>
195     friend OpTypeData& opCast(CacheBinOperation &op);
196 };
197 
198 // The opCast function can be the member of CacheBinOperation but it will have
199 // small stylistic ambiguity: it will look like a getter (with a cast) for the
200 // CacheBinOperation::data data member but it should return a reference to
201 // simplify the code from a lot of getter/setter calls. So the global cast in
202 // the style of static_cast (or reinterpret_cast) seems to be more readable and
203 // have more explicit semantic.
204 template <typename OpTypeData>
opCast(CacheBinOperation & op)205 OpTypeData& opCast(CacheBinOperation &op) {
206     return *reinterpret_cast<OpTypeData*>(&op.data);
207 }
208 
209 /* ------------------------------------------------------------------------ */
210 
211 #if __TBB_MALLOC_LOCACHE_STAT
212 intptr_t mallocCalls, cacheHits;
213 intptr_t memAllocKB, memHitKB;
214 #endif
215 
lessThanWithOverflow(intptr_t a,intptr_t b)216 inline bool lessThanWithOverflow(intptr_t a, intptr_t b)
217 {
218     return (a < b && (b - a < UINTPTR_MAX/2)) ||
219            (a > b && (a - b > UINTPTR_MAX/2));
220 }
221 
222 /* ----------------------------------- Operation processing methods ------------------------------------ */
223 
224 template<typename Props> void CacheBinFunctor<Props>::
commitOperation(CacheBinOperation * op) const225     OperationPreprocessor::commitOperation(CacheBinOperation *op) const
226 {
227     FencedStore( (intptr_t&)(op->status), CBST_DONE );
228 }
229 
230 template<typename Props> void CacheBinFunctor<Props>::
addOpToOpList(CacheBinOperation * op,CacheBinOperation ** opList) const231     OperationPreprocessor::addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const
232 {
233     op->next = *opList;
234     *opList = op;
235 }
236 
237 template<typename Props> bool CacheBinFunctor<Props>::
getFromPutList(CacheBinOperation * opGet,uintptr_t currTime)238     OperationPreprocessor::getFromPutList(CacheBinOperation *opGet, uintptr_t currTime)
239 {
240     if ( head ) {
241         uintptr_t age = head->age;
242         LargeMemoryBlock *next = head->next;
243         *opCast<OpGet>(*opGet).res = head;
244         commitOperation( opGet );
245         head = next;
246         putListNum--;
247         MALLOC_ASSERT( putListNum>=0, ASSERT_TEXT );
248 
249         // use moving average with current hit interval
250         bin->updateMeanHitRange( currTime - age );
251         return true;
252     }
253     return false;
254 }
255 
256 template<typename Props> void CacheBinFunctor<Props>::
addToPutList(LargeMemoryBlock * h,LargeMemoryBlock * t,int num)257     OperationPreprocessor::addToPutList(LargeMemoryBlock *h, LargeMemoryBlock *t, int num)
258 {
259     if ( head ) {
260         MALLOC_ASSERT( tail, ASSERT_TEXT );
261         tail->next = h;
262         h->prev = tail;
263         tail = t;
264         putListNum += num;
265     } else {
266         head = h;
267         tail = t;
268         putListNum = num;
269     }
270 }
271 
272 template<typename Props> void CacheBinFunctor<Props>::
operator ()(CacheBinOperation * opList)273     OperationPreprocessor::operator()(CacheBinOperation* opList)
274 {
275     for ( CacheBinOperation *op = opList, *opNext; op; op = opNext ) {
276         opNext = op->next;
277         switch ( op->type ) {
278         case CBOP_GET:
279             {
280                 lclTime--;
281                 if ( !lastGetOpTime ) {
282                     lastGetOpTime = lclTime;
283                     lastGet = 0;
284                 } else if ( !lastGet ) lastGet = lclTime;
285 
286                 if ( !getFromPutList(op,lclTime) ) {
287                     opCast<OpGet>(*op).currTime = lclTime;
288                     addOpToOpList( op, &opGet );
289                 }
290             }
291             break;
292 
293         case CBOP_PUT_LIST:
294             {
295                 LargeMemoryBlock *head = opCast<OpPutList>(*op).head;
296                 LargeMemoryBlock *curr = head, *prev = NULL;
297 
298                 int num = 0;
299                 do {
300                     // we do not kept prev pointers during assigning blocks to bins, set them now
301                     curr->prev = prev;
302 
303                     // Save the local times to the memory blocks. Local times are necessary
304                     // for the getFromPutList function which updates the hit range value in
305                     // CacheBin when OP_GET and OP_PUT_LIST operations are merged successfully.
306                     // The age will be updated to the correct global time after preprocessing
307                     // when global cache time is updated.
308                     curr->age = --lclTime;
309 
310                     prev = curr;
311                     num += 1;
312 
313                     STAT_increment(getThreadId(), ThreadCommonCounters, cacheLargeObj);
314                 } while ((curr = curr->next) != NULL);
315 
316                 LargeMemoryBlock *tail = prev;
317                 addToPutList(head, tail, num);
318 
319                 while ( opGet ) {
320                     CacheBinOperation *next = opGet->next;
321                     if ( !getFromPutList(opGet, opCast<OpGet>(*opGet).currTime) )
322                         break;
323                     opGet = next;
324                 }
325             }
326             break;
327 
328         case CBOP_UPDATE_USED_SIZE:
329             updateUsedSize += opCast<OpUpdateUsedSize>(*op).size;
330             commitOperation( op );
331             break;
332 
333         case CBOP_CLEAN_ALL:
334             isCleanAll = true;
335             addOpToOpList( op, &opClean );
336             break;
337 
338         case CBOP_CLEAN_TO_THRESHOLD:
339             {
340                 uintptr_t currTime = opCast<OpCleanToThreshold>(*op).currTime;
341                 // We don't worry about currTime overflow since it is a rare
342                 // occurrence and doesn't affect correctness
343                 cleanTime = cleanTime < currTime ? currTime : cleanTime;
344                 addOpToOpList( op, &opClean );
345             }
346             break;
347 
348         default:
349             MALLOC_ASSERT( false, "Unknown operation." );
350         }
351     }
352     MALLOC_ASSERT( !( opGet && head ), "Not all put/get pairs are processed!" );
353 }
354 
operator ()(CacheBinOperation * opList)355 template<typename Props> void CacheBinFunctor<Props>::operator()(CacheBinOperation* opList)
356 {
357     MALLOC_ASSERT( opList, "Empty operation list is passed into operation handler." );
358 
359     OperationPreprocessor prep(bin);
360     prep(opList);
361 
362     if ( uintptr_t timeRange = prep.getTimeRange() ) {
363         uintptr_t startTime = extMemPool->loc.getCurrTimeRange(timeRange);
364         // endTime is used as the current (base) time since the local time is negative.
365         uintptr_t endTime = startTime + timeRange;
366 
367         if ( prep.lastGetOpTime && prep.lastGet ) bin->setLastGet(prep.lastGet+endTime);
368 
369         if ( CacheBinOperation *opGet = prep.opGet ) {
370             bool isEmpty = false;
371             do {
372 #if __TBB_MALLOC_WHITEBOX_TEST
373                 tbbmalloc_whitebox::locGetProcessed++;
374 #endif
375                 const OpGet &opGetData = opCast<OpGet>(*opGet);
376                 if ( !isEmpty ) {
377                     if ( LargeMemoryBlock *res = bin->get() ) {
378                         uintptr_t getTime = opGetData.currTime + endTime;
379                         // use moving average with current hit interval
380                         bin->updateMeanHitRange( getTime - res->age);
381                         bin->updateCachedSize( -opGetData.size );
382                         *opGetData.res = res;
383                     } else {
384                         isEmpty = true;
385                         uintptr_t lastGetOpTime = prep.lastGetOpTime+endTime;
386                         bin->forgetOutdatedState(lastGetOpTime);
387                         bin->updateAgeThreshold(lastGetOpTime);
388                     }
389                 }
390 
391                 CacheBinOperation *opNext = opGet->next;
392                 bin->updateUsedSize( opGetData.size, bitMask, idx );
393                 prep.commitOperation( opGet );
394                 opGet = opNext;
395             } while ( opGet );
396             if ( prep.lastGetOpTime )
397                 bin->setLastGet( prep.lastGetOpTime + endTime );
398         } else if ( LargeMemoryBlock *curr = prep.head ) {
399             curr->prev = NULL;
400             while ( curr ) {
401                 // Update local times to global times
402                 curr->age += endTime;
403                 curr=curr->next;
404             }
405 #if __TBB_MALLOC_WHITEBOX_TEST
406             tbbmalloc_whitebox::locPutProcessed+=prep.putListNum;
407 #endif
408             toRelease = bin->putList(prep.head, prep.tail, bitMask, idx, prep.putListNum, extMemPool->loc.hugeSizeThreshold);
409         }
410         needCleanup = extMemPool->loc.isCleanupNeededOnRange(timeRange, startTime);
411         currTime = endTime - 1;
412     }
413 
414     if ( CacheBinOperation *opClean = prep.opClean ) {
415         if ( prep.isCleanAll )
416             *opCast<OpCleanAll>(*opClean).res = bin->cleanAll(bitMask, idx);
417         else
418             *opCast<OpCleanToThreshold>(*opClean).res = bin->cleanToThreshold(prep.cleanTime, bitMask, idx);
419 
420         CacheBinOperation *opNext = opClean->next;
421         prep.commitOperation( opClean );
422 
423         while ((opClean = opNext) != NULL) {
424             opNext = opClean->next;
425             prep.commitOperation(opClean);
426         }
427     }
428 
429     if ( size_t size = prep.updateUsedSize )
430         bin->updateUsedSize(size, bitMask, idx);
431 }
432 /* ----------------------------------------------------------------------------------------------------- */
433 /* --------------------------- Methods for creating and executing operations --------------------------- */
434 template<typename Props> void LargeObjectCacheImpl<Props>::
ExecuteOperation(CacheBinOperation * op,ExtMemoryPool * extMemPool,BinBitMask * bitMask,int idx,bool longLifeTime)435     CacheBin::ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime)
436 {
437     CacheBinFunctor<Props> func( this, extMemPool, bitMask, idx );
438     aggregator.execute( op, func, longLifeTime );
439 
440     if ( LargeMemoryBlock *toRelease = func.getToRelease()) {
441         extMemPool->backend.returnLargeObject(toRelease);
442     }
443 
444     if ( func.isCleanupNeeded() ) {
445         extMemPool->loc.doCleanup( func.getCurrTime(), /*doThreshDecr=*/false);
446     }
447 }
448 
449 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
get(ExtMemoryPool * extMemPool,size_t size,BinBitMask * bitMask,int idx)450     CacheBin::get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx)
451 {
452     LargeMemoryBlock *lmb=NULL;
453     OpGet data = {&lmb, size};
454     CacheBinOperation op(data);
455     ExecuteOperation( &op, extMemPool, bitMask, idx );
456     return lmb;
457 }
458 
459 template<typename Props> void LargeObjectCacheImpl<Props>::
putList(ExtMemoryPool * extMemPool,LargeMemoryBlock * head,BinBitMask * bitMask,int idx)460     CacheBin::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx)
461 {
462     MALLOC_ASSERT(sizeof(LargeMemoryBlock)+sizeof(CacheBinOperation)<=head->unalignedSize, "CacheBinOperation is too large to be placed in LargeMemoryBlock!");
463 
464     OpPutList data = {head};
465     CacheBinOperation *op = new (head+1) CacheBinOperation(data, CBST_NOWAIT);
466     ExecuteOperation( op, extMemPool, bitMask, idx, false );
467 }
468 
469 template<typename Props> bool LargeObjectCacheImpl<Props>::
cleanToThreshold(ExtMemoryPool * extMemPool,BinBitMask * bitMask,uintptr_t currTime,int idx)470     CacheBin::cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx)
471 {
472     LargeMemoryBlock *toRelease = NULL;
473 
474     /* oldest may be more recent then age, that's why cast to signed type
475        was used. age overflow is also processed correctly. */
476     if (last && (intptr_t)(currTime - oldest) > ageThreshold) {
477         OpCleanToThreshold data = {&toRelease, currTime};
478         CacheBinOperation op(data);
479         ExecuteOperation( &op, extMemPool, bitMask, idx );
480     }
481     bool released = toRelease;
482 
483     Backend *backend = &extMemPool->backend;
484     while ( toRelease ) {
485         LargeMemoryBlock *helper = toRelease->next;
486         backend->returnLargeObject(toRelease);
487         toRelease = helper;
488     }
489     return released;
490 }
491 
492 template<typename Props> bool LargeObjectCacheImpl<Props>::
releaseAllToBackend(ExtMemoryPool * extMemPool,BinBitMask * bitMask,int idx)493     CacheBin::releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx)
494 {
495     LargeMemoryBlock *toRelease = NULL;
496 
497     if (last) {
498         OpCleanAll data = {&toRelease};
499         CacheBinOperation op(data);
500         ExecuteOperation(&op, extMemPool, bitMask, idx);
501     }
502     bool released = toRelease;
503 
504     Backend *backend = &extMemPool->backend;
505     while ( toRelease ) {
506         LargeMemoryBlock *helper = toRelease->next;
507         MALLOC_ASSERT(!helper || lessThanWithOverflow(helper->age, toRelease->age),
508                       ASSERT_TEXT);
509         backend->returnLargeObject(toRelease);
510         toRelease = helper;
511     }
512     return released;
513 }
514 
515 template<typename Props> void LargeObjectCacheImpl<Props>::
updateUsedSize(ExtMemoryPool * extMemPool,size_t size,BinBitMask * bitMask,int idx)516     CacheBin::updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx)
517 {
518     OpUpdateUsedSize data = {size};
519     CacheBinOperation op(data);
520     ExecuteOperation( &op, extMemPool, bitMask, idx );
521 }
522 
523 /* ------------------------------ Unsafe methods used with the aggregator ------------------------------ */
524 
525 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
putList(LargeMemoryBlock * head,LargeMemoryBlock * tail,BinBitMask * bitMask,int idx,int num,size_t hugeSizeThreshold)526     CacheBin::putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, int idx, int num, size_t hugeSizeThreshold)
527 {
528     size_t size = head->unalignedSize;
529     usedSize -= num*size;
530     MALLOC_ASSERT( !last || (last->age != 0 && last->age != -1U), ASSERT_TEXT );
531     MALLOC_ASSERT( (tail==head && num==1) || (tail!=head && num>1), ASSERT_TEXT );
532     LargeMemoryBlock *toRelease = NULL;
533     if (size < hugeSizeThreshold && !lastCleanedAge) {
534         // 1st object of such size was released.
535         // Not cache it, and remember when this occurs
536         // to take into account during cache miss.
537         lastCleanedAge = tail->age;
538         toRelease = tail;
539         tail = tail->prev;
540         if (tail)
541             tail->next = NULL;
542         else
543             head = NULL;
544         num--;
545     }
546     if (num) {
547         // add [head;tail] list to cache
548         MALLOC_ASSERT( tail, ASSERT_TEXT );
549         tail->next = first;
550         if (first)
551             first->prev = tail;
552         first = head;
553         if (!last) {
554             MALLOC_ASSERT(0 == oldest, ASSERT_TEXT);
555             oldest = tail->age;
556             last = tail;
557         }
558 
559         cachedSize += num*size;
560     }
561 
562     // No used object, and nothing in the bin, mark the bin as empty
563     if (!usedSize && !first)
564         bitMask->set(idx, false);
565 
566     return toRelease;
567 }
568 
569 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
get()570     CacheBin::get()
571 {
572     LargeMemoryBlock *result=first;
573     if (result) {
574         first = result->next;
575         if (first)
576             first->prev = NULL;
577         else {
578             last = NULL;
579             oldest = 0;
580         }
581     }
582 
583     return result;
584 }
585 
586 template<typename Props> void LargeObjectCacheImpl<Props>::
forgetOutdatedState(uintptr_t currTime)587     CacheBin::forgetOutdatedState(uintptr_t currTime)
588 {
589     // If the time since the last get is LongWaitFactor times more than ageThreshold
590     // for the bin, treat the bin as rarely-used and forget everything we know
591     // about it.
592     // If LongWaitFactor is too small, we forget too early and
593     // so prevents good caching, while if too high, caching blocks
594     // with unrelated usage pattern occurs.
595     const uintptr_t sinceLastGet = currTime - lastGet;
596     bool doCleanup = false;
597 
598     if (ageThreshold)
599         doCleanup = sinceLastGet > Props::LongWaitFactor * ageThreshold;
600     else if (lastCleanedAge)
601         doCleanup = sinceLastGet > Props::LongWaitFactor * (lastCleanedAge - lastGet);
602 
603     if (doCleanup) {
604         lastCleanedAge = 0;
605         ageThreshold = 0;
606     }
607 
608 }
609 
610 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
cleanToThreshold(uintptr_t currTime,BinBitMask * bitMask,int idx)611     CacheBin::cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx)
612 {
613     /* oldest may be more recent then age, that's why cast to signed type
614     was used. age overflow is also processed correctly. */
615     if ( !last || (intptr_t)(currTime - last->age) < ageThreshold ) return NULL;
616 
617 #if MALLOC_DEBUG
618     uintptr_t nextAge = 0;
619 #endif
620     do {
621 #if MALLOC_DEBUG
622         // check that list ordered
623         MALLOC_ASSERT(!nextAge || lessThanWithOverflow(nextAge, last->age),
624             ASSERT_TEXT);
625         nextAge = last->age;
626 #endif
627         cachedSize -= last->unalignedSize;
628         last = last->prev;
629     } while (last && (intptr_t)(currTime - last->age) > ageThreshold);
630 
631     LargeMemoryBlock *toRelease = NULL;
632     if (last) {
633         toRelease = last->next;
634         oldest = last->age;
635         last->next = NULL;
636     } else {
637         toRelease = first;
638         first = NULL;
639         oldest = 0;
640         if (!usedSize)
641             bitMask->set(idx, false);
642     }
643     MALLOC_ASSERT( toRelease, ASSERT_TEXT );
644     lastCleanedAge = toRelease->age;
645 
646     return toRelease;
647 }
648 
649 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
cleanAll(BinBitMask * bitMask,int idx)650     CacheBin::cleanAll(BinBitMask *bitMask, int idx)
651 {
652     if (!last) return NULL;
653 
654     LargeMemoryBlock *toRelease = first;
655     last = NULL;
656     first = NULL;
657     oldest = 0;
658     cachedSize = 0;
659     if (!usedSize)
660         bitMask->set(idx, false);
661 
662     return toRelease;
663 }
664 
665 /* ----------------------------------------------------------------------------------------------------- */
666 
667 template<typename Props> size_t LargeObjectCacheImpl<Props>::
reportStat(int num,FILE * f)668     CacheBin::reportStat(int num, FILE *f)
669 {
670 #if __TBB_MALLOC_LOCACHE_STAT
671     if (first)
672         printf("%d(%lu): total %lu KB thr %ld lastCln %lu oldest %lu\n",
673                num, num*Props::CacheStep+Props::MinSize,
674                cachedSize/1024, ageThreshold, lastCleanedAge, oldest);
675 #else
676     suppress_unused_warning(num);
677     suppress_unused_warning(f);
678 #endif
679     return cachedSize;
680 }
681 
682 // Release objects from cache blocks that are older than ageThreshold
683 template<typename Props>
regularCleanup(ExtMemoryPool * extMemPool,uintptr_t currTime,bool doThreshDecr)684 bool LargeObjectCacheImpl<Props>::regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currTime, bool doThreshDecr)
685 {
686     bool released = false;
687     BinsSummary binsSummary;
688 
689     // Threshold settings is below this cache or starts from zero index
690     if (hugeSizeThresholdIdx == 0) return false;
691 
692     // Starting searching for bin that is less than huge size threshold (can be cleaned-up)
693     int startSearchIdx = hugeSizeThresholdIdx - 1;
694 
695     for (int i = bitMask.getMaxTrue(startSearchIdx); i >= 0; i = bitMask.getMaxTrue(i-1)) {
696         bin[i].updateBinsSummary(&binsSummary);
697         if (!doThreshDecr && tooLargeLOC > 2 && binsSummary.isLOCTooLarge()) {
698             // if LOC is too large for quite long time, decrease the threshold
699             // based on bin hit statistics.
700             // For this, redo cleanup from the beginning.
701             // Note: on this iteration total usedSz can be not too large
702             // in comparison to total cachedSz, as we calculated it only
703             // partially. We are ok with it.
704             i = bitMask.getMaxTrue(startSearchIdx)+1;
705             doThreshDecr = true;
706             binsSummary.reset();
707             continue;
708         }
709         if (doThreshDecr)
710             bin[i].decreaseThreshold();
711 
712         if (bin[i].cleanToThreshold(extMemPool, &bitMask, currTime, i)) {
713             released = true;
714         }
715     }
716     // We want to find if LOC was too large for some time continuously,
717     // so OK with races between incrementing and zeroing, but incrementing
718     // must be atomic.
719     if (binsSummary.isLOCTooLarge())
720         AtomicIncrement(tooLargeLOC);
721     else
722         tooLargeLOC = 0;
723     return released;
724 }
725 
726 template<typename Props>
cleanAll(ExtMemoryPool * extMemPool)727 bool LargeObjectCacheImpl<Props>::cleanAll(ExtMemoryPool *extMemPool)
728 {
729     bool released = false;
730     for (int i = numBins-1; i >= 0; i--) {
731         released |= bin[i].releaseAllToBackend(extMemPool, &bitMask, i);
732     }
733     return released;
734 }
735 
736 template<typename Props>
reset()737 void LargeObjectCacheImpl<Props>::reset() {
738     tooLargeLOC = 0;
739     for (int i = numBins-1; i >= 0; i--)
740         bin[i].init();
741     bitMask.reset();
742 }
743 
744 #if __TBB_MALLOC_WHITEBOX_TEST
745 template<typename Props>
getLOCSize() const746 size_t LargeObjectCacheImpl<Props>::getLOCSize() const
747 {
748     size_t size = 0;
749     for (int i = numBins-1; i >= 0; i--)
750         size += bin[i].getSize();
751     return size;
752 }
753 
getLOCSize() const754 size_t LargeObjectCache::getLOCSize() const
755 {
756     return largeCache.getLOCSize() + hugeCache.getLOCSize();
757 }
758 
759 template<typename Props>
getUsedSize() const760 size_t LargeObjectCacheImpl<Props>::getUsedSize() const
761 {
762     size_t size = 0;
763     for (int i = numBins-1; i >= 0; i--)
764         size += bin[i].getUsedSize();
765     return size;
766 }
767 
getUsedSize() const768 size_t LargeObjectCache::getUsedSize() const
769 {
770     return largeCache.getUsedSize() + hugeCache.getUsedSize();
771 }
772 #endif // __TBB_MALLOC_WHITEBOX_TEST
773 
isCleanupNeededOnRange(uintptr_t range,uintptr_t currTime)774 inline bool LargeObjectCache::isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime)
775 {
776     return range >= cacheCleanupFreq
777         || currTime+range < currTime-1 // overflow, 0 is power of 2, do cleanup
778         // (prev;prev+range] contains n*cacheCleanupFreq
779         || alignUp(currTime, cacheCleanupFreq)<currTime+range;
780 }
781 
doCleanup(uintptr_t currTime,bool doThreshDecr)782 bool LargeObjectCache::doCleanup(uintptr_t currTime, bool doThreshDecr)
783 {
784     if (!doThreshDecr)
785         extMemPool->allLocalCaches.markUnused();
786     return largeCache.regularCleanup(extMemPool, currTime, doThreshDecr)
787         | hugeCache.regularCleanup(extMemPool, currTime, doThreshDecr);
788 }
789 
decreasingCleanup()790 bool LargeObjectCache::decreasingCleanup()
791 {
792     return doCleanup(FencedLoad((intptr_t&)cacheCurrTime), /*doThreshDecr=*/true);
793 }
794 
regularCleanup()795 bool LargeObjectCache::regularCleanup()
796 {
797     return doCleanup(FencedLoad((intptr_t&)cacheCurrTime), /*doThreshDecr=*/false);
798 }
799 
cleanAll()800 bool LargeObjectCache::cleanAll()
801 {
802     return largeCache.cleanAll(extMemPool) | hugeCache.cleanAll(extMemPool);
803 }
804 
reset()805 void LargeObjectCache::reset()
806 {
807     largeCache.reset();
808     hugeCache.reset();
809 }
810 
811 template<typename Props>
get(ExtMemoryPool * extMemoryPool,size_t size)812 LargeMemoryBlock *LargeObjectCacheImpl<Props>::get(ExtMemoryPool *extMemoryPool, size_t size)
813 {
814     int idx = Props::sizeToIdx(size);
815 
816     LargeMemoryBlock *lmb = bin[idx].get(extMemoryPool, size, &bitMask, idx);
817 
818     if (lmb) {
819         MALLOC_ITT_SYNC_ACQUIRED(bin+idx);
820         STAT_increment(getThreadId(), ThreadCommonCounters, allocCachedLargeObj);
821     }
822     return lmb;
823 }
824 
825 template<typename Props>
updateCacheState(ExtMemoryPool * extMemPool,DecreaseOrIncrease op,size_t size)826 void LargeObjectCacheImpl<Props>::updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size)
827 {
828     int idx = Props::sizeToIdx(size);
829     MALLOC_ASSERT(idx<numBins, ASSERT_TEXT);
830     bin[idx].updateUsedSize(extMemPool, op==decrease? -size : size, &bitMask, idx);
831 }
832 
833 #if __TBB_MALLOC_LOCACHE_STAT
834 template<typename Props>
reportStat(FILE * f)835 void LargeObjectCacheImpl<Props>::reportStat(FILE *f)
836 {
837     size_t cachedSize = 0;
838     for (int i=0; i<numBins; i++)
839         cachedSize += bin[i].reportStat(i, f);
840     fprintf(f, "total LOC size %lu MB\n", cachedSize/1024/1024);
841 }
842 
reportStat(FILE * f)843 void LargeObjectCache::reportStat(FILE *f)
844 {
845     largeCache.reportStat(f);
846     hugeCache.reportStat(f);
847     fprintf(f, "cache time %lu\n", cacheCurrTime);
848 }
849 #endif
850 
851 template<typename Props>
putList(ExtMemoryPool * extMemPool,LargeMemoryBlock * toCache)852 void LargeObjectCacheImpl<Props>::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *toCache)
853 {
854     int toBinIdx = Props::sizeToIdx(toCache->unalignedSize);
855 
856     MALLOC_ITT_SYNC_RELEASING(bin+toBinIdx);
857     bin[toBinIdx].putList(extMemPool, toCache, &bitMask, toBinIdx);
858 }
859 
updateCacheState(DecreaseOrIncrease op,size_t size)860 void LargeObjectCache::updateCacheState(DecreaseOrIncrease op, size_t size)
861 {
862     if (size < maxLargeSize)
863         largeCache.updateCacheState(extMemPool, op, size);
864     else if (size < maxHugeSize)
865         hugeCache.updateCacheState(extMemPool, op, size);
866 }
867 
getCurrTime()868 uintptr_t LargeObjectCache::getCurrTime()
869 {
870     return (uintptr_t)AtomicIncrement((intptr_t&)cacheCurrTime);
871 }
872 
getCurrTimeRange(uintptr_t range)873 uintptr_t LargeObjectCache::getCurrTimeRange(uintptr_t range)
874 {
875     return (uintptr_t)AtomicAdd((intptr_t&)cacheCurrTime, range) + 1;
876 }
877 
registerRealloc(size_t oldSize,size_t newSize)878 void LargeObjectCache::registerRealloc(size_t oldSize, size_t newSize)
879 {
880     updateCacheState(decrease, oldSize);
881     updateCacheState(increase, alignToBin(newSize));
882 }
883 
alignToBin(size_t size)884 size_t LargeObjectCache::alignToBin(size_t size) {
885     return size < maxLargeSize ? LargeCacheType::alignToBin(size) : HugeCacheType::alignToBin(size);
886 }
887 
888 // Used for internal purpose
sizeToIdx(size_t size)889 int LargeObjectCache::sizeToIdx(size_t size)
890 {
891     MALLOC_ASSERT(size <= maxHugeSize, ASSERT_TEXT);
892     return size < maxLargeSize ?
893         LargeCacheType::sizeToIdx(size) :
894         LargeCacheType::numBins + HugeCacheType::sizeToIdx(size);
895 }
896 
putList(LargeMemoryBlock * list)897 void LargeObjectCache::putList(LargeMemoryBlock *list)
898 {
899     LargeMemoryBlock *toProcess, *n;
900 
901     for (LargeMemoryBlock *curr = list; curr; curr = toProcess) {
902         LargeMemoryBlock *tail = curr;
903         toProcess = curr->next;
904         if (!sizeInCacheRange(curr->unalignedSize)) {
905             extMemPool->backend.returnLargeObject(curr);
906             continue;
907         }
908         int currIdx = sizeToIdx(curr->unalignedSize);
909 
910         // Find all blocks fitting to same bin. Not use more efficient sorting
911         // algorithm because list is short (commonly,
912         // LocalLOC's HIGH_MARK-LOW_MARK, i.e. 24 items).
913         for (LargeMemoryBlock *b = toProcess; b; b = n) {
914             n = b->next;
915             if (sizeToIdx(b->unalignedSize) == currIdx) {
916                 tail->next = b;
917                 tail = b;
918                 if (toProcess == b)
919                     toProcess = toProcess->next;
920                 else {
921                     b->prev->next = b->next;
922                     if (b->next)
923                         b->next->prev = b->prev;
924                 }
925             }
926         }
927         tail->next = NULL;
928         if (curr->unalignedSize < maxLargeSize)
929             largeCache.putList(extMemPool, curr);
930         else
931             hugeCache.putList(extMemPool, curr);
932     }
933 }
934 
put(LargeMemoryBlock * largeBlock)935 void LargeObjectCache::put(LargeMemoryBlock *largeBlock)
936 {
937     size_t blockSize = largeBlock->unalignedSize;
938     if (sizeInCacheRange(blockSize)) {
939         largeBlock->next = NULL;
940         if (blockSize < maxLargeSize)
941             largeCache.putList(extMemPool, largeBlock);
942         else
943             hugeCache.putList(extMemPool, largeBlock);
944     } else {
945         extMemPool->backend.returnLargeObject(largeBlock);
946     }
947 }
948 
get(size_t size)949 LargeMemoryBlock *LargeObjectCache::get(size_t size)
950 {
951     MALLOC_ASSERT( size >= minLargeSize, ASSERT_TEXT );
952     if (sizeInCacheRange(size)) {
953         return size < maxLargeSize ?
954             largeCache.get(extMemPool, size) : hugeCache.get(extMemPool, size);
955     }
956     return NULL;
957 }
958 
mallocLargeObject(MemoryPool * pool,size_t allocationSize)959 LargeMemoryBlock *ExtMemoryPool::mallocLargeObject(MemoryPool *pool, size_t allocationSize)
960 {
961 #if __TBB_MALLOC_LOCACHE_STAT
962     AtomicIncrement(mallocCalls);
963     AtomicAdd(memAllocKB, allocationSize/1024);
964 #endif
965     LargeMemoryBlock* lmb = loc.get(allocationSize);
966     if (!lmb) {
967         BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true);
968         if (backRefIdx.isInvalid())
969             return NULL;
970 
971         // unalignedSize is set in getLargeBlock
972         lmb = backend.getLargeBlock(allocationSize);
973         if (!lmb) {
974             removeBackRef(backRefIdx);
975             loc.updateCacheState(decrease, allocationSize);
976             return NULL;
977         }
978         lmb->backRefIdx = backRefIdx;
979         lmb->pool = pool;
980         STAT_increment(getThreadId(), ThreadCommonCounters, allocNewLargeObj);
981     } else {
982 #if __TBB_MALLOC_LOCACHE_STAT
983         AtomicIncrement(cacheHits);
984         AtomicAdd(memHitKB, allocationSize/1024);
985 #endif
986     }
987     return lmb;
988 }
989 
freeLargeObject(LargeMemoryBlock * mBlock)990 void ExtMemoryPool::freeLargeObject(LargeMemoryBlock *mBlock)
991 {
992     loc.put(mBlock);
993 }
994 
freeLargeObjectList(LargeMemoryBlock * head)995 void ExtMemoryPool::freeLargeObjectList(LargeMemoryBlock *head)
996 {
997     loc.putList(head);
998 }
999 
softCachesCleanup()1000 bool ExtMemoryPool::softCachesCleanup()
1001 {
1002     return loc.regularCleanup();
1003 }
1004 
hardCachesCleanup()1005 bool ExtMemoryPool::hardCachesCleanup()
1006 {
1007     // thread-local caches must be cleaned before LOC,
1008     // because object from thread-local cache can be released to LOC
1009     bool ret = releaseAllLocalCaches();
1010     ret |= orphanedBlocks.cleanup(&backend);
1011     ret |= loc.cleanAll();
1012     ret |= backend.clean();
1013     return ret;
1014 }
1015 
1016 #if BACKEND_HAS_MREMAP
remap(void * ptr,size_t oldSize,size_t newSize,size_t alignment)1017 void *ExtMemoryPool::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment)
1018 {
1019     const size_t oldUnalignedSize = ((LargeObjectHdr*)ptr - 1)->memoryBlock->unalignedSize;
1020     void *o = backend.remap(ptr, oldSize, newSize, alignment);
1021     if (o) {
1022         LargeMemoryBlock *lmb = ((LargeObjectHdr*)o - 1)->memoryBlock;
1023         loc.registerRealloc(oldUnalignedSize, lmb->unalignedSize);
1024     }
1025     return o;
1026 }
1027 #endif /* BACKEND_HAS_MREMAP */
1028 
1029 /*********** End allocation of large objects **********/
1030 
1031 } // namespace internal
1032 } // namespace rml
1033 
1034