1 /*
2 Copyright (c) 2005-2020 Intel Corporation
3
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
7
8 http://www.apache.org/licenses/LICENSE-2.0
9
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
15 */
16
17 #include "tbbmalloc_internal.h"
18 #include "tbb/tbb_environment.h"
19
20 /******************************* Allocation of large objects *********************************************/
21
22 namespace rml {
23 namespace internal {
24
25 /* ---------------------------- Large Object cache init section ---------------------------------------- */
26
init(ExtMemoryPool * memPool)27 void LargeObjectCache::init(ExtMemoryPool *memPool)
28 {
29 extMemPool = memPool;
30 // scalable_allocation_mode can be called before allocator initialization, respect this manual request
31 if (hugeSizeThreshold == 0) {
32 // Huge size threshold initialization if environment variable was set
33 long requestedThreshold = tbb::internal::GetIntegralEnvironmentVariable("TBB_MALLOC_SET_HUGE_SIZE_THRESHOLD");
34 // Read valid env or initialize by default with max possible values
35 if (requestedThreshold != -1) {
36 setHugeSizeThreshold(requestedThreshold);
37 } else {
38 setHugeSizeThreshold(maxHugeSize);
39 }
40 }
41 }
42
43 /* ----------------------------- Huge size threshold settings ----------------------------------------- */
44
setHugeSizeThreshold(size_t value)45 void LargeObjectCache::setHugeSizeThreshold(size_t value)
46 {
47 // Valid in the huge cache range: [MaxLargeSize, MaxHugeSize].
48 if (value <= maxHugeSize) {
49 hugeSizeThreshold = value >= maxLargeSize ? alignToBin(value) : maxLargeSize;
50
51 // Calculate local indexes for the global threshold size (for fast search inside a regular cleanup)
52 largeCache.hugeSizeThresholdIdx = LargeCacheType::numBins;
53 hugeCache.hugeSizeThresholdIdx = HugeCacheType::sizeToIdx(hugeSizeThreshold);
54 }
55 }
56
sizeInCacheRange(size_t size)57 bool LargeObjectCache::sizeInCacheRange(size_t size)
58 {
59 return size <= maxHugeSize && (size <= defaultMaxHugeSize || size >= hugeSizeThreshold);
60 }
61
62 /* ----------------------------------------------------------------------------------------------------- */
63
64 /* The functor called by the aggregator for the operation list */
65 template<typename Props>
66 class CacheBinFunctor {
67 typename LargeObjectCacheImpl<Props>::CacheBin *const bin;
68 ExtMemoryPool *const extMemPool;
69 typename LargeObjectCacheImpl<Props>::BinBitMask *const bitMask;
70 const int idx;
71
72 LargeMemoryBlock *toRelease;
73 bool needCleanup;
74 uintptr_t currTime;
75
76 /* Do preprocessing under the operation list. */
77 /* All the OP_PUT_LIST operations are merged in the one operation.
78 All OP_GET operations are merged with the OP_PUT_LIST operations but
79 it demands the update of the moving average value in the bin.
80 Only the last OP_CLEAN_TO_THRESHOLD operation has sense.
81 The OP_CLEAN_ALL operation also should be performed only once.
82 Moreover it cancels the OP_CLEAN_TO_THRESHOLD operation. */
83 class OperationPreprocessor {
84 // TODO: remove the dependency on CacheBin.
85 typename LargeObjectCacheImpl<Props>::CacheBin *const bin;
86
87 /* Contains the relative time in the operation list.
88 It counts in the reverse order since the aggregator also
89 provides operations in the reverse order. */
90 uintptr_t lclTime;
91
92 /* opGet contains only OP_GET operations which cannot be merge with OP_PUT operations
93 opClean contains all OP_CLEAN_TO_THRESHOLD and OP_CLEAN_ALL operations. */
94 CacheBinOperation *opGet, *opClean;
95 /* The time of the last OP_CLEAN_TO_THRESHOLD operations */
96 uintptr_t cleanTime;
97
98 /* lastGetOpTime - the time of the last OP_GET operation.
99 lastGet - the same meaning as CacheBin::lastGet */
100 uintptr_t lastGetOpTime, lastGet;
101
102 /* The total sum of all usedSize changes requested with CBOP_UPDATE_USED_SIZE operations. */
103 size_t updateUsedSize;
104
105 /* The list of blocks for the OP_PUT_LIST operation. */
106 LargeMemoryBlock *head, *tail;
107 int putListNum;
108
109 /* if the OP_CLEAN_ALL is requested. */
110 bool isCleanAll;
111
112 inline void commitOperation(CacheBinOperation *op) const;
113 inline void addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const;
114 bool getFromPutList(CacheBinOperation* opGet, uintptr_t currTime);
115 void addToPutList( LargeMemoryBlock *head, LargeMemoryBlock *tail, int num );
116
117 public:
OperationPreprocessor(typename LargeObjectCacheImpl<Props>::CacheBin * bin)118 OperationPreprocessor(typename LargeObjectCacheImpl<Props>::CacheBin *bin) :
119 bin(bin), lclTime(0), opGet(NULL), opClean(NULL), cleanTime(0),
120 lastGetOpTime(0), updateUsedSize(0), head(NULL), isCleanAll(false) {}
121 void operator()(CacheBinOperation* opList);
getTimeRange() const122 uintptr_t getTimeRange() const { return -lclTime; }
123
124 friend class CacheBinFunctor;
125 };
126
127 public:
CacheBinFunctor(typename LargeObjectCacheImpl<Props>::CacheBin * bin,ExtMemoryPool * extMemPool,typename LargeObjectCacheImpl<Props>::BinBitMask * bitMask,int idx)128 CacheBinFunctor(typename LargeObjectCacheImpl<Props>::CacheBin *bin, ExtMemoryPool *extMemPool,
129 typename LargeObjectCacheImpl<Props>::BinBitMask *bitMask, int idx) :
130 bin(bin), extMemPool(extMemPool), bitMask(bitMask), idx(idx), toRelease(NULL), needCleanup(false) {}
131 void operator()(CacheBinOperation* opList);
132
isCleanupNeeded() const133 bool isCleanupNeeded() const { return needCleanup; }
getToRelease() const134 LargeMemoryBlock *getToRelease() const { return toRelease; }
getCurrTime() const135 uintptr_t getCurrTime() const { return currTime; }
136 };
137
138 /* ---------------- Cache Bin Aggregator Operation Helpers ---------------- */
139
140 // The list of structures which describe the operation data
141 struct OpGet {
142 static const CacheBinOperationType type = CBOP_GET;
143 LargeMemoryBlock **res;
144 size_t size;
145 uintptr_t currTime;
146 };
147
148 struct OpPutList {
149 static const CacheBinOperationType type = CBOP_PUT_LIST;
150 LargeMemoryBlock *head;
151 };
152
153 struct OpCleanToThreshold {
154 static const CacheBinOperationType type = CBOP_CLEAN_TO_THRESHOLD;
155 LargeMemoryBlock **res;
156 uintptr_t currTime;
157 };
158
159 struct OpCleanAll {
160 static const CacheBinOperationType type = CBOP_CLEAN_ALL;
161 LargeMemoryBlock **res;
162 };
163
164 struct OpUpdateUsedSize {
165 static const CacheBinOperationType type = CBOP_UPDATE_USED_SIZE;
166 size_t size;
167 };
168
169 union CacheBinOperationData {
170 private:
171 OpGet opGet;
172 OpPutList opPutList;
173 OpCleanToThreshold opCleanToThreshold;
174 OpCleanAll opCleanAll;
175 OpUpdateUsedSize opUpdateUsedSize;
176 };
177
178 // Forward declarations
179 template <typename OpTypeData> OpTypeData& opCast(CacheBinOperation &op);
180
181 // Describes the aggregator operation
182 struct CacheBinOperation : public MallocAggregatedOperation<CacheBinOperation>::type {
183 CacheBinOperationType type;
184
185 template <typename OpTypeData>
CacheBinOperationrml::internal::CacheBinOperation186 CacheBinOperation(OpTypeData &d, CacheBinOperationStatus st = CBST_WAIT) {
187 opCast<OpTypeData>(*this) = d;
188 type = OpTypeData::type;
189 MallocAggregatedOperation<CacheBinOperation>::type::status = st;
190 }
191 private:
192 CacheBinOperationData data;
193
194 template <typename OpTypeData>
195 friend OpTypeData& opCast(CacheBinOperation &op);
196 };
197
198 // The opCast function can be the member of CacheBinOperation but it will have
199 // small stylistic ambiguity: it will look like a getter (with a cast) for the
200 // CacheBinOperation::data data member but it should return a reference to
201 // simplify the code from a lot of getter/setter calls. So the global cast in
202 // the style of static_cast (or reinterpret_cast) seems to be more readable and
203 // have more explicit semantic.
204 template <typename OpTypeData>
opCast(CacheBinOperation & op)205 OpTypeData& opCast(CacheBinOperation &op) {
206 return *reinterpret_cast<OpTypeData*>(&op.data);
207 }
208
209 /* ------------------------------------------------------------------------ */
210
211 #if __TBB_MALLOC_LOCACHE_STAT
212 intptr_t mallocCalls, cacheHits;
213 intptr_t memAllocKB, memHitKB;
214 #endif
215
lessThanWithOverflow(intptr_t a,intptr_t b)216 inline bool lessThanWithOverflow(intptr_t a, intptr_t b)
217 {
218 return (a < b && (b - a < UINTPTR_MAX/2)) ||
219 (a > b && (a - b > UINTPTR_MAX/2));
220 }
221
222 /* ----------------------------------- Operation processing methods ------------------------------------ */
223
224 template<typename Props> void CacheBinFunctor<Props>::
commitOperation(CacheBinOperation * op) const225 OperationPreprocessor::commitOperation(CacheBinOperation *op) const
226 {
227 FencedStore( (intptr_t&)(op->status), CBST_DONE );
228 }
229
230 template<typename Props> void CacheBinFunctor<Props>::
addOpToOpList(CacheBinOperation * op,CacheBinOperation ** opList) const231 OperationPreprocessor::addOpToOpList(CacheBinOperation *op, CacheBinOperation **opList) const
232 {
233 op->next = *opList;
234 *opList = op;
235 }
236
237 template<typename Props> bool CacheBinFunctor<Props>::
getFromPutList(CacheBinOperation * opGet,uintptr_t currTime)238 OperationPreprocessor::getFromPutList(CacheBinOperation *opGet, uintptr_t currTime)
239 {
240 if ( head ) {
241 uintptr_t age = head->age;
242 LargeMemoryBlock *next = head->next;
243 *opCast<OpGet>(*opGet).res = head;
244 commitOperation( opGet );
245 head = next;
246 putListNum--;
247 MALLOC_ASSERT( putListNum>=0, ASSERT_TEXT );
248
249 // use moving average with current hit interval
250 bin->updateMeanHitRange( currTime - age );
251 return true;
252 }
253 return false;
254 }
255
256 template<typename Props> void CacheBinFunctor<Props>::
addToPutList(LargeMemoryBlock * h,LargeMemoryBlock * t,int num)257 OperationPreprocessor::addToPutList(LargeMemoryBlock *h, LargeMemoryBlock *t, int num)
258 {
259 if ( head ) {
260 MALLOC_ASSERT( tail, ASSERT_TEXT );
261 tail->next = h;
262 h->prev = tail;
263 tail = t;
264 putListNum += num;
265 } else {
266 head = h;
267 tail = t;
268 putListNum = num;
269 }
270 }
271
272 template<typename Props> void CacheBinFunctor<Props>::
operator ()(CacheBinOperation * opList)273 OperationPreprocessor::operator()(CacheBinOperation* opList)
274 {
275 for ( CacheBinOperation *op = opList, *opNext; op; op = opNext ) {
276 opNext = op->next;
277 switch ( op->type ) {
278 case CBOP_GET:
279 {
280 lclTime--;
281 if ( !lastGetOpTime ) {
282 lastGetOpTime = lclTime;
283 lastGet = 0;
284 } else if ( !lastGet ) lastGet = lclTime;
285
286 if ( !getFromPutList(op,lclTime) ) {
287 opCast<OpGet>(*op).currTime = lclTime;
288 addOpToOpList( op, &opGet );
289 }
290 }
291 break;
292
293 case CBOP_PUT_LIST:
294 {
295 LargeMemoryBlock *head = opCast<OpPutList>(*op).head;
296 LargeMemoryBlock *curr = head, *prev = NULL;
297
298 int num = 0;
299 do {
300 // we do not kept prev pointers during assigning blocks to bins, set them now
301 curr->prev = prev;
302
303 // Save the local times to the memory blocks. Local times are necessary
304 // for the getFromPutList function which updates the hit range value in
305 // CacheBin when OP_GET and OP_PUT_LIST operations are merged successfully.
306 // The age will be updated to the correct global time after preprocessing
307 // when global cache time is updated.
308 curr->age = --lclTime;
309
310 prev = curr;
311 num += 1;
312
313 STAT_increment(getThreadId(), ThreadCommonCounters, cacheLargeObj);
314 } while ((curr = curr->next) != NULL);
315
316 LargeMemoryBlock *tail = prev;
317 addToPutList(head, tail, num);
318
319 while ( opGet ) {
320 CacheBinOperation *next = opGet->next;
321 if ( !getFromPutList(opGet, opCast<OpGet>(*opGet).currTime) )
322 break;
323 opGet = next;
324 }
325 }
326 break;
327
328 case CBOP_UPDATE_USED_SIZE:
329 updateUsedSize += opCast<OpUpdateUsedSize>(*op).size;
330 commitOperation( op );
331 break;
332
333 case CBOP_CLEAN_ALL:
334 isCleanAll = true;
335 addOpToOpList( op, &opClean );
336 break;
337
338 case CBOP_CLEAN_TO_THRESHOLD:
339 {
340 uintptr_t currTime = opCast<OpCleanToThreshold>(*op).currTime;
341 // We don't worry about currTime overflow since it is a rare
342 // occurrence and doesn't affect correctness
343 cleanTime = cleanTime < currTime ? currTime : cleanTime;
344 addOpToOpList( op, &opClean );
345 }
346 break;
347
348 default:
349 MALLOC_ASSERT( false, "Unknown operation." );
350 }
351 }
352 MALLOC_ASSERT( !( opGet && head ), "Not all put/get pairs are processed!" );
353 }
354
operator ()(CacheBinOperation * opList)355 template<typename Props> void CacheBinFunctor<Props>::operator()(CacheBinOperation* opList)
356 {
357 MALLOC_ASSERT( opList, "Empty operation list is passed into operation handler." );
358
359 OperationPreprocessor prep(bin);
360 prep(opList);
361
362 if ( uintptr_t timeRange = prep.getTimeRange() ) {
363 uintptr_t startTime = extMemPool->loc.getCurrTimeRange(timeRange);
364 // endTime is used as the current (base) time since the local time is negative.
365 uintptr_t endTime = startTime + timeRange;
366
367 if ( prep.lastGetOpTime && prep.lastGet ) bin->setLastGet(prep.lastGet+endTime);
368
369 if ( CacheBinOperation *opGet = prep.opGet ) {
370 bool isEmpty = false;
371 do {
372 #if __TBB_MALLOC_WHITEBOX_TEST
373 tbbmalloc_whitebox::locGetProcessed++;
374 #endif
375 const OpGet &opGetData = opCast<OpGet>(*opGet);
376 if ( !isEmpty ) {
377 if ( LargeMemoryBlock *res = bin->get() ) {
378 uintptr_t getTime = opGetData.currTime + endTime;
379 // use moving average with current hit interval
380 bin->updateMeanHitRange( getTime - res->age);
381 bin->updateCachedSize( -opGetData.size );
382 *opGetData.res = res;
383 } else {
384 isEmpty = true;
385 uintptr_t lastGetOpTime = prep.lastGetOpTime+endTime;
386 bin->forgetOutdatedState(lastGetOpTime);
387 bin->updateAgeThreshold(lastGetOpTime);
388 }
389 }
390
391 CacheBinOperation *opNext = opGet->next;
392 bin->updateUsedSize( opGetData.size, bitMask, idx );
393 prep.commitOperation( opGet );
394 opGet = opNext;
395 } while ( opGet );
396 if ( prep.lastGetOpTime )
397 bin->setLastGet( prep.lastGetOpTime + endTime );
398 } else if ( LargeMemoryBlock *curr = prep.head ) {
399 curr->prev = NULL;
400 while ( curr ) {
401 // Update local times to global times
402 curr->age += endTime;
403 curr=curr->next;
404 }
405 #if __TBB_MALLOC_WHITEBOX_TEST
406 tbbmalloc_whitebox::locPutProcessed+=prep.putListNum;
407 #endif
408 toRelease = bin->putList(prep.head, prep.tail, bitMask, idx, prep.putListNum, extMemPool->loc.hugeSizeThreshold);
409 }
410 needCleanup = extMemPool->loc.isCleanupNeededOnRange(timeRange, startTime);
411 currTime = endTime - 1;
412 }
413
414 if ( CacheBinOperation *opClean = prep.opClean ) {
415 if ( prep.isCleanAll )
416 *opCast<OpCleanAll>(*opClean).res = bin->cleanAll(bitMask, idx);
417 else
418 *opCast<OpCleanToThreshold>(*opClean).res = bin->cleanToThreshold(prep.cleanTime, bitMask, idx);
419
420 CacheBinOperation *opNext = opClean->next;
421 prep.commitOperation( opClean );
422
423 while ((opClean = opNext) != NULL) {
424 opNext = opClean->next;
425 prep.commitOperation(opClean);
426 }
427 }
428
429 if ( size_t size = prep.updateUsedSize )
430 bin->updateUsedSize(size, bitMask, idx);
431 }
432 /* ----------------------------------------------------------------------------------------------------- */
433 /* --------------------------- Methods for creating and executing operations --------------------------- */
434 template<typename Props> void LargeObjectCacheImpl<Props>::
ExecuteOperation(CacheBinOperation * op,ExtMemoryPool * extMemPool,BinBitMask * bitMask,int idx,bool longLifeTime)435 CacheBin::ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime)
436 {
437 CacheBinFunctor<Props> func( this, extMemPool, bitMask, idx );
438 aggregator.execute( op, func, longLifeTime );
439
440 if ( LargeMemoryBlock *toRelease = func.getToRelease()) {
441 extMemPool->backend.returnLargeObject(toRelease);
442 }
443
444 if ( func.isCleanupNeeded() ) {
445 extMemPool->loc.doCleanup( func.getCurrTime(), /*doThreshDecr=*/false);
446 }
447 }
448
449 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
get(ExtMemoryPool * extMemPool,size_t size,BinBitMask * bitMask,int idx)450 CacheBin::get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx)
451 {
452 LargeMemoryBlock *lmb=NULL;
453 OpGet data = {&lmb, size};
454 CacheBinOperation op(data);
455 ExecuteOperation( &op, extMemPool, bitMask, idx );
456 return lmb;
457 }
458
459 template<typename Props> void LargeObjectCacheImpl<Props>::
putList(ExtMemoryPool * extMemPool,LargeMemoryBlock * head,BinBitMask * bitMask,int idx)460 CacheBin::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx)
461 {
462 MALLOC_ASSERT(sizeof(LargeMemoryBlock)+sizeof(CacheBinOperation)<=head->unalignedSize, "CacheBinOperation is too large to be placed in LargeMemoryBlock!");
463
464 OpPutList data = {head};
465 CacheBinOperation *op = new (head+1) CacheBinOperation(data, CBST_NOWAIT);
466 ExecuteOperation( op, extMemPool, bitMask, idx, false );
467 }
468
469 template<typename Props> bool LargeObjectCacheImpl<Props>::
cleanToThreshold(ExtMemoryPool * extMemPool,BinBitMask * bitMask,uintptr_t currTime,int idx)470 CacheBin::cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx)
471 {
472 LargeMemoryBlock *toRelease = NULL;
473
474 /* oldest may be more recent then age, that's why cast to signed type
475 was used. age overflow is also processed correctly. */
476 if (last && (intptr_t)(currTime - oldest) > ageThreshold) {
477 OpCleanToThreshold data = {&toRelease, currTime};
478 CacheBinOperation op(data);
479 ExecuteOperation( &op, extMemPool, bitMask, idx );
480 }
481 bool released = toRelease;
482
483 Backend *backend = &extMemPool->backend;
484 while ( toRelease ) {
485 LargeMemoryBlock *helper = toRelease->next;
486 backend->returnLargeObject(toRelease);
487 toRelease = helper;
488 }
489 return released;
490 }
491
492 template<typename Props> bool LargeObjectCacheImpl<Props>::
releaseAllToBackend(ExtMemoryPool * extMemPool,BinBitMask * bitMask,int idx)493 CacheBin::releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx)
494 {
495 LargeMemoryBlock *toRelease = NULL;
496
497 if (last) {
498 OpCleanAll data = {&toRelease};
499 CacheBinOperation op(data);
500 ExecuteOperation(&op, extMemPool, bitMask, idx);
501 }
502 bool released = toRelease;
503
504 Backend *backend = &extMemPool->backend;
505 while ( toRelease ) {
506 LargeMemoryBlock *helper = toRelease->next;
507 MALLOC_ASSERT(!helper || lessThanWithOverflow(helper->age, toRelease->age),
508 ASSERT_TEXT);
509 backend->returnLargeObject(toRelease);
510 toRelease = helper;
511 }
512 return released;
513 }
514
515 template<typename Props> void LargeObjectCacheImpl<Props>::
updateUsedSize(ExtMemoryPool * extMemPool,size_t size,BinBitMask * bitMask,int idx)516 CacheBin::updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx)
517 {
518 OpUpdateUsedSize data = {size};
519 CacheBinOperation op(data);
520 ExecuteOperation( &op, extMemPool, bitMask, idx );
521 }
522
523 /* ------------------------------ Unsafe methods used with the aggregator ------------------------------ */
524
525 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
putList(LargeMemoryBlock * head,LargeMemoryBlock * tail,BinBitMask * bitMask,int idx,int num,size_t hugeSizeThreshold)526 CacheBin::putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask, int idx, int num, size_t hugeSizeThreshold)
527 {
528 size_t size = head->unalignedSize;
529 usedSize -= num*size;
530 MALLOC_ASSERT( !last || (last->age != 0 && last->age != -1U), ASSERT_TEXT );
531 MALLOC_ASSERT( (tail==head && num==1) || (tail!=head && num>1), ASSERT_TEXT );
532 LargeMemoryBlock *toRelease = NULL;
533 if (size < hugeSizeThreshold && !lastCleanedAge) {
534 // 1st object of such size was released.
535 // Not cache it, and remember when this occurs
536 // to take into account during cache miss.
537 lastCleanedAge = tail->age;
538 toRelease = tail;
539 tail = tail->prev;
540 if (tail)
541 tail->next = NULL;
542 else
543 head = NULL;
544 num--;
545 }
546 if (num) {
547 // add [head;tail] list to cache
548 MALLOC_ASSERT( tail, ASSERT_TEXT );
549 tail->next = first;
550 if (first)
551 first->prev = tail;
552 first = head;
553 if (!last) {
554 MALLOC_ASSERT(0 == oldest, ASSERT_TEXT);
555 oldest = tail->age;
556 last = tail;
557 }
558
559 cachedSize += num*size;
560 }
561
562 // No used object, and nothing in the bin, mark the bin as empty
563 if (!usedSize && !first)
564 bitMask->set(idx, false);
565
566 return toRelease;
567 }
568
569 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
get()570 CacheBin::get()
571 {
572 LargeMemoryBlock *result=first;
573 if (result) {
574 first = result->next;
575 if (first)
576 first->prev = NULL;
577 else {
578 last = NULL;
579 oldest = 0;
580 }
581 }
582
583 return result;
584 }
585
586 template<typename Props> void LargeObjectCacheImpl<Props>::
forgetOutdatedState(uintptr_t currTime)587 CacheBin::forgetOutdatedState(uintptr_t currTime)
588 {
589 // If the time since the last get is LongWaitFactor times more than ageThreshold
590 // for the bin, treat the bin as rarely-used and forget everything we know
591 // about it.
592 // If LongWaitFactor is too small, we forget too early and
593 // so prevents good caching, while if too high, caching blocks
594 // with unrelated usage pattern occurs.
595 const uintptr_t sinceLastGet = currTime - lastGet;
596 bool doCleanup = false;
597
598 if (ageThreshold)
599 doCleanup = sinceLastGet > Props::LongWaitFactor * ageThreshold;
600 else if (lastCleanedAge)
601 doCleanup = sinceLastGet > Props::LongWaitFactor * (lastCleanedAge - lastGet);
602
603 if (doCleanup) {
604 lastCleanedAge = 0;
605 ageThreshold = 0;
606 }
607
608 }
609
610 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
cleanToThreshold(uintptr_t currTime,BinBitMask * bitMask,int idx)611 CacheBin::cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx)
612 {
613 /* oldest may be more recent then age, that's why cast to signed type
614 was used. age overflow is also processed correctly. */
615 if ( !last || (intptr_t)(currTime - last->age) < ageThreshold ) return NULL;
616
617 #if MALLOC_DEBUG
618 uintptr_t nextAge = 0;
619 #endif
620 do {
621 #if MALLOC_DEBUG
622 // check that list ordered
623 MALLOC_ASSERT(!nextAge || lessThanWithOverflow(nextAge, last->age),
624 ASSERT_TEXT);
625 nextAge = last->age;
626 #endif
627 cachedSize -= last->unalignedSize;
628 last = last->prev;
629 } while (last && (intptr_t)(currTime - last->age) > ageThreshold);
630
631 LargeMemoryBlock *toRelease = NULL;
632 if (last) {
633 toRelease = last->next;
634 oldest = last->age;
635 last->next = NULL;
636 } else {
637 toRelease = first;
638 first = NULL;
639 oldest = 0;
640 if (!usedSize)
641 bitMask->set(idx, false);
642 }
643 MALLOC_ASSERT( toRelease, ASSERT_TEXT );
644 lastCleanedAge = toRelease->age;
645
646 return toRelease;
647 }
648
649 template<typename Props> LargeMemoryBlock *LargeObjectCacheImpl<Props>::
cleanAll(BinBitMask * bitMask,int idx)650 CacheBin::cleanAll(BinBitMask *bitMask, int idx)
651 {
652 if (!last) return NULL;
653
654 LargeMemoryBlock *toRelease = first;
655 last = NULL;
656 first = NULL;
657 oldest = 0;
658 cachedSize = 0;
659 if (!usedSize)
660 bitMask->set(idx, false);
661
662 return toRelease;
663 }
664
665 /* ----------------------------------------------------------------------------------------------------- */
666
667 template<typename Props> size_t LargeObjectCacheImpl<Props>::
reportStat(int num,FILE * f)668 CacheBin::reportStat(int num, FILE *f)
669 {
670 #if __TBB_MALLOC_LOCACHE_STAT
671 if (first)
672 printf("%d(%lu): total %lu KB thr %ld lastCln %lu oldest %lu\n",
673 num, num*Props::CacheStep+Props::MinSize,
674 cachedSize/1024, ageThreshold, lastCleanedAge, oldest);
675 #else
676 suppress_unused_warning(num);
677 suppress_unused_warning(f);
678 #endif
679 return cachedSize;
680 }
681
682 // Release objects from cache blocks that are older than ageThreshold
683 template<typename Props>
regularCleanup(ExtMemoryPool * extMemPool,uintptr_t currTime,bool doThreshDecr)684 bool LargeObjectCacheImpl<Props>::regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currTime, bool doThreshDecr)
685 {
686 bool released = false;
687 BinsSummary binsSummary;
688
689 // Threshold settings is below this cache or starts from zero index
690 if (hugeSizeThresholdIdx == 0) return false;
691
692 // Starting searching for bin that is less than huge size threshold (can be cleaned-up)
693 int startSearchIdx = hugeSizeThresholdIdx - 1;
694
695 for (int i = bitMask.getMaxTrue(startSearchIdx); i >= 0; i = bitMask.getMaxTrue(i-1)) {
696 bin[i].updateBinsSummary(&binsSummary);
697 if (!doThreshDecr && tooLargeLOC > 2 && binsSummary.isLOCTooLarge()) {
698 // if LOC is too large for quite long time, decrease the threshold
699 // based on bin hit statistics.
700 // For this, redo cleanup from the beginning.
701 // Note: on this iteration total usedSz can be not too large
702 // in comparison to total cachedSz, as we calculated it only
703 // partially. We are ok with it.
704 i = bitMask.getMaxTrue(startSearchIdx)+1;
705 doThreshDecr = true;
706 binsSummary.reset();
707 continue;
708 }
709 if (doThreshDecr)
710 bin[i].decreaseThreshold();
711
712 if (bin[i].cleanToThreshold(extMemPool, &bitMask, currTime, i)) {
713 released = true;
714 }
715 }
716 // We want to find if LOC was too large for some time continuously,
717 // so OK with races between incrementing and zeroing, but incrementing
718 // must be atomic.
719 if (binsSummary.isLOCTooLarge())
720 AtomicIncrement(tooLargeLOC);
721 else
722 tooLargeLOC = 0;
723 return released;
724 }
725
726 template<typename Props>
cleanAll(ExtMemoryPool * extMemPool)727 bool LargeObjectCacheImpl<Props>::cleanAll(ExtMemoryPool *extMemPool)
728 {
729 bool released = false;
730 for (int i = numBins-1; i >= 0; i--) {
731 released |= bin[i].releaseAllToBackend(extMemPool, &bitMask, i);
732 }
733 return released;
734 }
735
736 template<typename Props>
reset()737 void LargeObjectCacheImpl<Props>::reset() {
738 tooLargeLOC = 0;
739 for (int i = numBins-1; i >= 0; i--)
740 bin[i].init();
741 bitMask.reset();
742 }
743
744 #if __TBB_MALLOC_WHITEBOX_TEST
745 template<typename Props>
getLOCSize() const746 size_t LargeObjectCacheImpl<Props>::getLOCSize() const
747 {
748 size_t size = 0;
749 for (int i = numBins-1; i >= 0; i--)
750 size += bin[i].getSize();
751 return size;
752 }
753
getLOCSize() const754 size_t LargeObjectCache::getLOCSize() const
755 {
756 return largeCache.getLOCSize() + hugeCache.getLOCSize();
757 }
758
759 template<typename Props>
getUsedSize() const760 size_t LargeObjectCacheImpl<Props>::getUsedSize() const
761 {
762 size_t size = 0;
763 for (int i = numBins-1; i >= 0; i--)
764 size += bin[i].getUsedSize();
765 return size;
766 }
767
getUsedSize() const768 size_t LargeObjectCache::getUsedSize() const
769 {
770 return largeCache.getUsedSize() + hugeCache.getUsedSize();
771 }
772 #endif // __TBB_MALLOC_WHITEBOX_TEST
773
isCleanupNeededOnRange(uintptr_t range,uintptr_t currTime)774 inline bool LargeObjectCache::isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime)
775 {
776 return range >= cacheCleanupFreq
777 || currTime+range < currTime-1 // overflow, 0 is power of 2, do cleanup
778 // (prev;prev+range] contains n*cacheCleanupFreq
779 || alignUp(currTime, cacheCleanupFreq)<currTime+range;
780 }
781
doCleanup(uintptr_t currTime,bool doThreshDecr)782 bool LargeObjectCache::doCleanup(uintptr_t currTime, bool doThreshDecr)
783 {
784 if (!doThreshDecr)
785 extMemPool->allLocalCaches.markUnused();
786 return largeCache.regularCleanup(extMemPool, currTime, doThreshDecr)
787 | hugeCache.regularCleanup(extMemPool, currTime, doThreshDecr);
788 }
789
decreasingCleanup()790 bool LargeObjectCache::decreasingCleanup()
791 {
792 return doCleanup(FencedLoad((intptr_t&)cacheCurrTime), /*doThreshDecr=*/true);
793 }
794
regularCleanup()795 bool LargeObjectCache::regularCleanup()
796 {
797 return doCleanup(FencedLoad((intptr_t&)cacheCurrTime), /*doThreshDecr=*/false);
798 }
799
cleanAll()800 bool LargeObjectCache::cleanAll()
801 {
802 return largeCache.cleanAll(extMemPool) | hugeCache.cleanAll(extMemPool);
803 }
804
reset()805 void LargeObjectCache::reset()
806 {
807 largeCache.reset();
808 hugeCache.reset();
809 }
810
811 template<typename Props>
get(ExtMemoryPool * extMemoryPool,size_t size)812 LargeMemoryBlock *LargeObjectCacheImpl<Props>::get(ExtMemoryPool *extMemoryPool, size_t size)
813 {
814 int idx = Props::sizeToIdx(size);
815
816 LargeMemoryBlock *lmb = bin[idx].get(extMemoryPool, size, &bitMask, idx);
817
818 if (lmb) {
819 MALLOC_ITT_SYNC_ACQUIRED(bin+idx);
820 STAT_increment(getThreadId(), ThreadCommonCounters, allocCachedLargeObj);
821 }
822 return lmb;
823 }
824
825 template<typename Props>
updateCacheState(ExtMemoryPool * extMemPool,DecreaseOrIncrease op,size_t size)826 void LargeObjectCacheImpl<Props>::updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size)
827 {
828 int idx = Props::sizeToIdx(size);
829 MALLOC_ASSERT(idx<numBins, ASSERT_TEXT);
830 bin[idx].updateUsedSize(extMemPool, op==decrease? -size : size, &bitMask, idx);
831 }
832
833 #if __TBB_MALLOC_LOCACHE_STAT
834 template<typename Props>
reportStat(FILE * f)835 void LargeObjectCacheImpl<Props>::reportStat(FILE *f)
836 {
837 size_t cachedSize = 0;
838 for (int i=0; i<numBins; i++)
839 cachedSize += bin[i].reportStat(i, f);
840 fprintf(f, "total LOC size %lu MB\n", cachedSize/1024/1024);
841 }
842
reportStat(FILE * f)843 void LargeObjectCache::reportStat(FILE *f)
844 {
845 largeCache.reportStat(f);
846 hugeCache.reportStat(f);
847 fprintf(f, "cache time %lu\n", cacheCurrTime);
848 }
849 #endif
850
851 template<typename Props>
putList(ExtMemoryPool * extMemPool,LargeMemoryBlock * toCache)852 void LargeObjectCacheImpl<Props>::putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *toCache)
853 {
854 int toBinIdx = Props::sizeToIdx(toCache->unalignedSize);
855
856 MALLOC_ITT_SYNC_RELEASING(bin+toBinIdx);
857 bin[toBinIdx].putList(extMemPool, toCache, &bitMask, toBinIdx);
858 }
859
updateCacheState(DecreaseOrIncrease op,size_t size)860 void LargeObjectCache::updateCacheState(DecreaseOrIncrease op, size_t size)
861 {
862 if (size < maxLargeSize)
863 largeCache.updateCacheState(extMemPool, op, size);
864 else if (size < maxHugeSize)
865 hugeCache.updateCacheState(extMemPool, op, size);
866 }
867
getCurrTime()868 uintptr_t LargeObjectCache::getCurrTime()
869 {
870 return (uintptr_t)AtomicIncrement((intptr_t&)cacheCurrTime);
871 }
872
getCurrTimeRange(uintptr_t range)873 uintptr_t LargeObjectCache::getCurrTimeRange(uintptr_t range)
874 {
875 return (uintptr_t)AtomicAdd((intptr_t&)cacheCurrTime, range) + 1;
876 }
877
registerRealloc(size_t oldSize,size_t newSize)878 void LargeObjectCache::registerRealloc(size_t oldSize, size_t newSize)
879 {
880 updateCacheState(decrease, oldSize);
881 updateCacheState(increase, alignToBin(newSize));
882 }
883
alignToBin(size_t size)884 size_t LargeObjectCache::alignToBin(size_t size) {
885 return size < maxLargeSize ? LargeCacheType::alignToBin(size) : HugeCacheType::alignToBin(size);
886 }
887
888 // Used for internal purpose
sizeToIdx(size_t size)889 int LargeObjectCache::sizeToIdx(size_t size)
890 {
891 MALLOC_ASSERT(size <= maxHugeSize, ASSERT_TEXT);
892 return size < maxLargeSize ?
893 LargeCacheType::sizeToIdx(size) :
894 LargeCacheType::numBins + HugeCacheType::sizeToIdx(size);
895 }
896
putList(LargeMemoryBlock * list)897 void LargeObjectCache::putList(LargeMemoryBlock *list)
898 {
899 LargeMemoryBlock *toProcess, *n;
900
901 for (LargeMemoryBlock *curr = list; curr; curr = toProcess) {
902 LargeMemoryBlock *tail = curr;
903 toProcess = curr->next;
904 if (!sizeInCacheRange(curr->unalignedSize)) {
905 extMemPool->backend.returnLargeObject(curr);
906 continue;
907 }
908 int currIdx = sizeToIdx(curr->unalignedSize);
909
910 // Find all blocks fitting to same bin. Not use more efficient sorting
911 // algorithm because list is short (commonly,
912 // LocalLOC's HIGH_MARK-LOW_MARK, i.e. 24 items).
913 for (LargeMemoryBlock *b = toProcess; b; b = n) {
914 n = b->next;
915 if (sizeToIdx(b->unalignedSize) == currIdx) {
916 tail->next = b;
917 tail = b;
918 if (toProcess == b)
919 toProcess = toProcess->next;
920 else {
921 b->prev->next = b->next;
922 if (b->next)
923 b->next->prev = b->prev;
924 }
925 }
926 }
927 tail->next = NULL;
928 if (curr->unalignedSize < maxLargeSize)
929 largeCache.putList(extMemPool, curr);
930 else
931 hugeCache.putList(extMemPool, curr);
932 }
933 }
934
put(LargeMemoryBlock * largeBlock)935 void LargeObjectCache::put(LargeMemoryBlock *largeBlock)
936 {
937 size_t blockSize = largeBlock->unalignedSize;
938 if (sizeInCacheRange(blockSize)) {
939 largeBlock->next = NULL;
940 if (blockSize < maxLargeSize)
941 largeCache.putList(extMemPool, largeBlock);
942 else
943 hugeCache.putList(extMemPool, largeBlock);
944 } else {
945 extMemPool->backend.returnLargeObject(largeBlock);
946 }
947 }
948
get(size_t size)949 LargeMemoryBlock *LargeObjectCache::get(size_t size)
950 {
951 MALLOC_ASSERT( size >= minLargeSize, ASSERT_TEXT );
952 if (sizeInCacheRange(size)) {
953 return size < maxLargeSize ?
954 largeCache.get(extMemPool, size) : hugeCache.get(extMemPool, size);
955 }
956 return NULL;
957 }
958
mallocLargeObject(MemoryPool * pool,size_t allocationSize)959 LargeMemoryBlock *ExtMemoryPool::mallocLargeObject(MemoryPool *pool, size_t allocationSize)
960 {
961 #if __TBB_MALLOC_LOCACHE_STAT
962 AtomicIncrement(mallocCalls);
963 AtomicAdd(memAllocKB, allocationSize/1024);
964 #endif
965 LargeMemoryBlock* lmb = loc.get(allocationSize);
966 if (!lmb) {
967 BackRefIdx backRefIdx = BackRefIdx::newBackRef(/*largeObj=*/true);
968 if (backRefIdx.isInvalid())
969 return NULL;
970
971 // unalignedSize is set in getLargeBlock
972 lmb = backend.getLargeBlock(allocationSize);
973 if (!lmb) {
974 removeBackRef(backRefIdx);
975 loc.updateCacheState(decrease, allocationSize);
976 return NULL;
977 }
978 lmb->backRefIdx = backRefIdx;
979 lmb->pool = pool;
980 STAT_increment(getThreadId(), ThreadCommonCounters, allocNewLargeObj);
981 } else {
982 #if __TBB_MALLOC_LOCACHE_STAT
983 AtomicIncrement(cacheHits);
984 AtomicAdd(memHitKB, allocationSize/1024);
985 #endif
986 }
987 return lmb;
988 }
989
freeLargeObject(LargeMemoryBlock * mBlock)990 void ExtMemoryPool::freeLargeObject(LargeMemoryBlock *mBlock)
991 {
992 loc.put(mBlock);
993 }
994
freeLargeObjectList(LargeMemoryBlock * head)995 void ExtMemoryPool::freeLargeObjectList(LargeMemoryBlock *head)
996 {
997 loc.putList(head);
998 }
999
softCachesCleanup()1000 bool ExtMemoryPool::softCachesCleanup()
1001 {
1002 return loc.regularCleanup();
1003 }
1004
hardCachesCleanup()1005 bool ExtMemoryPool::hardCachesCleanup()
1006 {
1007 // thread-local caches must be cleaned before LOC,
1008 // because object from thread-local cache can be released to LOC
1009 bool ret = releaseAllLocalCaches();
1010 ret |= orphanedBlocks.cleanup(&backend);
1011 ret |= loc.cleanAll();
1012 ret |= backend.clean();
1013 return ret;
1014 }
1015
1016 #if BACKEND_HAS_MREMAP
remap(void * ptr,size_t oldSize,size_t newSize,size_t alignment)1017 void *ExtMemoryPool::remap(void *ptr, size_t oldSize, size_t newSize, size_t alignment)
1018 {
1019 const size_t oldUnalignedSize = ((LargeObjectHdr*)ptr - 1)->memoryBlock->unalignedSize;
1020 void *o = backend.remap(ptr, oldSize, newSize, alignment);
1021 if (o) {
1022 LargeMemoryBlock *lmb = ((LargeObjectHdr*)o - 1)->memoryBlock;
1023 loc.registerRealloc(oldUnalignedSize, lmb->unalignedSize);
1024 }
1025 return o;
1026 }
1027 #endif /* BACKEND_HAS_MREMAP */
1028
1029 /*********** End allocation of large objects **********/
1030
1031 } // namespace internal
1032 } // namespace rml
1033
1034