1 /*
2     Copyright (c) 2005-2020 Intel Corporation
3 
4     Licensed under the Apache License, Version 2.0 (the "License");
5     you may not use this file except in compliance with the License.
6     You may obtain a copy of the License at
7 
8         http://www.apache.org/licenses/LICENSE-2.0
9 
10     Unless required by applicable law or agreed to in writing, software
11     distributed under the License is distributed on an "AS IS" BASIS,
12     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13     See the License for the specific language governing permissions and
14     limitations under the License.
15 */
16 
17 #ifndef __TBB_tbbmalloc_internal_H
18     #error tbbmalloc_internal.h must be included at this point
19 #endif
20 
21 #ifndef __TBB_large_objects_H
22 #define __TBB_large_objects_H
23 
24 //! The list of possible Cache Bin Aggregator operations.
25 /*  Declared here to avoid Solaris Studio* 12.2 "multiple definitions" error */
26 enum CacheBinOperationType {
27     CBOP_INVALID = 0,
28     CBOP_GET,
29     CBOP_PUT_LIST,
30     CBOP_CLEAN_TO_THRESHOLD,
31     CBOP_CLEAN_ALL,
32     CBOP_UPDATE_USED_SIZE
33 };
34 
35 // The Cache Bin Aggregator operation status list.
36 // CBST_NOWAIT can be specified for non-blocking operations.
37 enum CacheBinOperationStatus {
38     CBST_WAIT = 0,
39     CBST_NOWAIT,
40     CBST_DONE
41 };
42 
43 /*
44  * Bins that grow with arithmetic step
45  */
46 template<size_t MIN_SIZE, size_t MAX_SIZE>
47 struct LargeBinStructureProps {
48 public:
49     static const size_t   MinSize = MIN_SIZE, MaxSize = MAX_SIZE;
50     static const size_t   CacheStep = 8 * 1024;
51     static const unsigned NumBins = (MaxSize - MinSize) / CacheStep;
52 
alignToBinLargeBinStructureProps53     static size_t alignToBin(size_t size) {
54         return alignUp(size, CacheStep);
55     }
56 
sizeToIdxLargeBinStructureProps57     static int sizeToIdx(size_t size) {
58         MALLOC_ASSERT(MinSize <= size && size < MaxSize, ASSERT_TEXT);
59         MALLOC_ASSERT(size % CacheStep == 0, ASSERT_TEXT);
60         return (size - MinSize) / CacheStep;
61     }
62 };
63 
64 /*
65  * Bins that grow with special geometric progression.
66  */
67 template<size_t MIN_SIZE, size_t MAX_SIZE>
68 struct HugeBinStructureProps {
69 
70 private:
71     // Sizes grow with the following formula: Size = MinSize * (2 ^ (Index / StepFactor))
72     // There are StepFactor bins (8 be default) between each power of 2 bin
73     static const int MaxSizeExp    = Log2<MAX_SIZE>::value;
74     static const int MinSizeExp    = Log2<MIN_SIZE>::value;
75     static const int StepFactor    = 8;
76     static const int StepFactorExp = Log2<StepFactor>::value;
77 
78 public:
79     static const size_t   MinSize = MIN_SIZE, MaxSize = MAX_SIZE;
80     static const unsigned NumBins = (MaxSizeExp - MinSizeExp) * StepFactor;
81 
alignToBinHugeBinStructureProps82     static size_t alignToBin(size_t size) {
83         size_t minorStepExp = BitScanRev(size) - StepFactorExp;
84         return alignUp(size, 1ULL << minorStepExp);
85     }
86 
87     // Sizes between the power of 2 values are aproximated to StepFactor.
sizeToIdxHugeBinStructureProps88     static int sizeToIdx(size_t size) {
89         MALLOC_ASSERT(MinSize <= size && size <= MaxSize, ASSERT_TEXT);
90         int sizeExp = (int)BitScanRev(size); // same as __TBB_Log2
91         size_t majorStepSize = 1ULL << sizeExp;
92         int minorStepExp = sizeExp - StepFactorExp;
93         int minorIdx = (size - majorStepSize) >> minorStepExp;
94         MALLOC_ASSERT(size == majorStepSize + ((size_t)minorIdx << minorStepExp),
95             "Size is not aligned on the bin");
96         return StepFactor * (sizeExp - MinSizeExp) + minorIdx;
97     }
98 };
99 
100 /*
101  * Cache properties accessor
102  *
103  * TooLargeFactor -- when cache size treated "too large" in comparison to user data size
104  * OnMissFactor -- If cache miss occurred and cache was cleaned,
105  *                 set ageThreshold to OnMissFactor * the difference
106  *                 between current time and last time cache was cleaned.
107  * LongWaitFactor -- to detect rarely-used bins and forget about their usage history
108  */
109 template<typename StructureProps, int TOO_LARGE, int ON_MISS, int LONG_WAIT>
110 struct LargeObjectCacheProps : public StructureProps {
111     static const int TooLargeFactor = TOO_LARGE, OnMissFactor = ON_MISS, LongWaitFactor = LONG_WAIT;
112 };
113 
114 template<typename Props>
115 class LargeObjectCacheImpl {
116 private:
117 
118     // Current sizes of used and cached objects. It's calculated while we are
119     // traversing bins, and used for isLOCTooLarge() check at the same time.
120     class BinsSummary {
121         size_t usedSz;
122         size_t cachedSz;
123     public:
BinsSummary()124         BinsSummary() : usedSz(0), cachedSz(0) {}
125         // "too large" criteria
isLOCTooLarge()126         bool isLOCTooLarge() const { return cachedSz > Props::TooLargeFactor * usedSz; }
update(size_t usedSize,size_t cachedSize)127         void update(size_t usedSize, size_t cachedSize) {
128             usedSz += usedSize;
129             cachedSz += cachedSize;
130         }
reset()131         void reset() { usedSz = cachedSz = 0; }
132     };
133 
134 public:
135     // The number of bins to cache large/huge objects.
136     static const uint32_t numBins = Props::NumBins;
137 
138     typedef BitMaskMax<numBins> BinBitMask;
139 
140     // 2-linked list of same-size cached blocks ordered by age (oldest on top)
141     // TODO: are we really want the list to be 2-linked? This allows us
142     // reduce memory consumption and do less operations under lock.
143     // TODO: try to switch to 32-bit logical time to save space in CacheBin
144     // and move bins to different cache lines.
145     class CacheBin {
146     private:
147         LargeMemoryBlock *first,
148                          *last;
149         /* age of an oldest block in the list; equal to last->age, if last defined,
150             used for quick checking it without acquiring the lock. */
151         uintptr_t         oldest;
152         /* currAge when something was excluded out of list because of the age,
153          not because of cache hit */
154         uintptr_t         lastCleanedAge;
155         /* Current threshold value for the blocks of a particular size.
156          Set on cache miss. */
157         intptr_t          ageThreshold;
158 
159         /* total size of all objects corresponding to the bin and allocated by user */
160         size_t            usedSize,
161         /* total size of all objects cached in the bin */
162                           cachedSize;
163         /* mean time of presence of block in the bin before successful reuse */
164         intptr_t          meanHitRange;
165         /* time of last get called for the bin */
166         uintptr_t         lastGet;
167 
168         typename MallocAggregator<CacheBinOperation>::type aggregator;
169 
170         void ExecuteOperation(CacheBinOperation *op, ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx, bool longLifeTime = true);
171 
172         /* should be placed in zero-initialized memory, ctor not needed. */
173         CacheBin();
174 
175     public:
init()176         void init() {
177             memset(this, 0, sizeof(CacheBin));
178         }
179 
180         /* ---------- Cache accessors ---------- */
181         void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *head, BinBitMask *bitMask, int idx);
182         LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx);
183 
184         /* ---------- Cleanup functions -------- */
185         bool cleanToThreshold(ExtMemoryPool *extMemPool, BinBitMask *bitMask, uintptr_t currTime, int idx);
186         bool releaseAllToBackend(ExtMemoryPool *extMemPool, BinBitMask *bitMask, int idx);
187         /* ------------------------------------- */
188 
189         void updateUsedSize(ExtMemoryPool *extMemPool, size_t size, BinBitMask *bitMask, int idx);
decreaseThreshold()190         void decreaseThreshold() {
191             if (ageThreshold)
192                 ageThreshold = (ageThreshold + meanHitRange) / 2;
193         }
updateBinsSummary(BinsSummary * binsSummary)194         void updateBinsSummary(BinsSummary *binsSummary) const {
195             binsSummary->update(usedSize, cachedSize);
196         }
getSize()197         size_t getSize() const { return cachedSize; }
getUsedSize()198         size_t getUsedSize() const { return usedSize; }
199         size_t reportStat(int num, FILE *f);
200 
201         /* --------- Unsafe methods used with the aggregator ------- */
202         void forgetOutdatedState(uintptr_t currTime);
203         LargeMemoryBlock *putList(LargeMemoryBlock *head, LargeMemoryBlock *tail, BinBitMask *bitMask,
204                 int idx, int num, size_t hugeObjectThreshold);
205         LargeMemoryBlock *get();
206         LargeMemoryBlock *cleanToThreshold(uintptr_t currTime, BinBitMask *bitMask, int idx);
207         LargeMemoryBlock *cleanAll(BinBitMask *bitMask, int idx);
updateUsedSize(size_t size,BinBitMask * bitMask,int idx)208         void updateUsedSize(size_t size, BinBitMask *bitMask, int idx) {
209             if (!usedSize) bitMask->set(idx, true);
210             usedSize += size;
211             if (!usedSize && !first) bitMask->set(idx, false);
212         }
updateMeanHitRange(intptr_t hitRange)213         void updateMeanHitRange( intptr_t hitRange ) {
214             hitRange = hitRange >= 0 ? hitRange : 0;
215             meanHitRange = meanHitRange ? (meanHitRange + hitRange) / 2 : hitRange;
216         }
updateAgeThreshold(uintptr_t currTime)217         void updateAgeThreshold( uintptr_t currTime ) {
218             if (lastCleanedAge)
219                 ageThreshold = Props::OnMissFactor*(currTime - lastCleanedAge);
220         }
updateCachedSize(size_t size)221         void updateCachedSize(size_t size) {
222             cachedSize += size;
223         }
setLastGet(uintptr_t newLastGet)224         void setLastGet( uintptr_t newLastGet ) {
225             lastGet = newLastGet;
226         }
227         /* -------------------------------------------------------- */
228     };
229 
230     // Huge bins index for fast regular cleanup searching in case of
231     // the "huge size threshold" setting defined
232     intptr_t     hugeSizeThresholdIdx;
233 
234 private:
235     // How many times LOC was "too large"
236     intptr_t     tooLargeLOC;
237     // for fast finding of used bins and bins with non-zero usedSize;
238     // indexed from the end, as we need largest 1st
239     BinBitMask   bitMask;
240     // bins with lists of recently freed large blocks cached for re-use
241     CacheBin bin[numBins];
242 
243 public:
244     /* ------------ CacheBin structure dependent stuff ------------ */
alignToBin(size_t size)245     static size_t alignToBin(size_t size) {
246         return Props::alignToBin(size);
247     }
sizeToIdx(size_t size)248     static int sizeToIdx(size_t size) {
249         return Props::sizeToIdx(size);
250     }
251 
252     /* --------- Main cache functions (put, get object) ------------ */
253     void putList(ExtMemoryPool *extMemPool, LargeMemoryBlock *largeBlock);
254     LargeMemoryBlock *get(ExtMemoryPool *extMemPool, size_t size);
255 
256     /* ------------------------ Cleanup ---------------------------- */
257     bool regularCleanup(ExtMemoryPool *extMemPool, uintptr_t currAge, bool doThreshDecr);
258     bool cleanAll(ExtMemoryPool *extMemPool);
259 
260     /* -------------------------- Other ---------------------------- */
261     void updateCacheState(ExtMemoryPool *extMemPool, DecreaseOrIncrease op, size_t size);
262 
263     void reset();
264     void reportStat(FILE *f);
265 #if __TBB_MALLOC_WHITEBOX_TEST
266     size_t getLOCSize() const;
267     size_t getUsedSize() const;
268 #endif
269 };
270 
271 class LargeObjectCache {
272 private:
273     // Large bins [minLargeSize, maxLargeSize)
274     // Huge bins [maxLargeSize, maxHugeSize)
275     static const size_t minLargeSize = 8 * 1024,
276                         maxLargeSize = 8 * 1024 * 1024,
277                         // Cache memory up to 1TB (or 2GB for 32-bit arch), but sieve objects from the special threshold
278                         maxHugeSize = tbb::internal::select_size_t_constant<2147483648U, 1099511627776ULL>::value;
279 
280 public:
281     // Upper bound threshold for caching size. After that size all objects sieve through cache
282     // By default - 64MB, previous value was 129MB (needed by some Intel(R) Math Kernel Library (Intel(R) MKL) benchmarks)
283     static const size_t defaultMaxHugeSize = 64UL * 1024UL * 1024UL;
284     // After that size large object interpreted as huge and does not participate in regular cleanup.
285     // Can be changed during the program execution.
286     size_t hugeSizeThreshold;
287 
288 private:
289     // Large objects cache properties
290     typedef LargeBinStructureProps<minLargeSize, maxLargeSize> LargeBSProps;
291     typedef LargeObjectCacheProps<LargeBSProps, 2, 2, 16> LargeCacheTypeProps;
292 
293     // Huge objects cache properties
294     typedef HugeBinStructureProps<maxLargeSize, maxHugeSize> HugeBSProps;
295     typedef LargeObjectCacheProps<HugeBSProps, 1, 1, 4> HugeCacheTypeProps;
296 
297     // Cache implementation type with properties
298     typedef LargeObjectCacheImpl< LargeCacheTypeProps > LargeCacheType;
299     typedef LargeObjectCacheImpl< HugeCacheTypeProps > HugeCacheType;
300 
301     // Beginning of largeCache is more actively used and smaller than hugeCache,
302     // so put hugeCache first to prevent false sharing
303     // with LargeObjectCache's predecessor
304     HugeCacheType hugeCache;
305     LargeCacheType largeCache;
306 
307     /* logical time, incremented on each put/get operation
308        To prevent starvation between pools, keep separately for each pool.
309        Overflow is OK, as we only want difference between
310        its current value and some recent.
311 
312        Both malloc and free should increment logical time, as in
313        a different case multiple cached blocks would have same age,
314        and accuracy of predictors suffers.
315     */
316     uintptr_t cacheCurrTime;
317 
318     // Memory pool that owns this LargeObjectCache.
319     // strict 1:1 relation, never changed
320     ExtMemoryPool *extMemPool;
321 
322     // Returns artificial bin index,
323     // it's used only during sorting and never saved
324     static int sizeToIdx(size_t size);
325 
326     // Our friends
327     friend class Backend;
328 
329 public:
330     void init(ExtMemoryPool *memPool);
331 
332     // Item accessors
333     void put(LargeMemoryBlock *largeBlock);
334     void putList(LargeMemoryBlock *head);
335     LargeMemoryBlock *get(size_t size);
336 
337     void updateCacheState(DecreaseOrIncrease op, size_t size);
338     bool isCleanupNeededOnRange(uintptr_t range, uintptr_t currTime);
339 
340     // Cleanup operations
341     bool doCleanup(uintptr_t currTime, bool doThreshDecr);
342     bool decreasingCleanup();
343     bool regularCleanup();
344     bool cleanAll();
345     void reset();
346 
347     void reportStat(FILE *f);
348 #if __TBB_MALLOC_WHITEBOX_TEST
349     size_t getLOCSize() const;
350     size_t getUsedSize() const;
351 #endif
352 
353     // Cache deals with exact-fit sizes, so need to align each size
354     // to the specific bin when put object to cache
355     static size_t alignToBin(size_t size);
356 
357     void setHugeSizeThreshold(size_t value);
358 
359     // Check if we should cache or sieve this size
360     bool sizeInCacheRange(size_t size);
361 
362     uintptr_t getCurrTime();
363     uintptr_t getCurrTimeRange(uintptr_t range);
364     void registerRealloc(size_t oldSize, size_t newSize);
365 };
366 
367 #endif // __TBB_large_objects_H
368 
369