1 /****************************************************************************
2  * Copyright (C) 2014-2015 Intel Corporation.   All Rights Reserved.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21  * IN THE SOFTWARE.
22  *
23  * @file arena.h
24  *
25  * @brief Arena memory manager
26  *        The arena is convenient and fast for managing allocations for any of
27  *        our allocations that are associated with operations and can all be freed
28  *        once when their operation has completed. Allocations are cheap since
29  *        most of the time its simply an increment of an offset. Also, no need to
30  *        free individual allocations. All of the arena memory can be freed at once.
31  *
32  ******************************************************************************/
33 #pragma once
34 
35 #include <mutex>
36 #include <algorithm>
37 #include <atomic>
38 #include "core/utils.h"
39 
40 static const size_t ARENA_BLOCK_ALIGN = 64;
41 
42 struct ArenaBlock
43 {
44     size_t      blockSize = 0;
45     ArenaBlock* pNext     = nullptr;
46 };
47 static_assert(sizeof(ArenaBlock) <= ARENA_BLOCK_ALIGN, "Increase BLOCK_ALIGN size");
48 
49 class DefaultAllocator
50 {
51 public:
AllocateAligned(size_t size,size_t align)52     ArenaBlock* AllocateAligned(size_t size, size_t align)
53     {
54         SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
55 
56         ArenaBlock* p = new (AlignedMalloc(size, align)) ArenaBlock();
57         p->blockSize  = size;
58         return p;
59     }
60 
Free(ArenaBlock * pMem)61     void Free(ArenaBlock* pMem)
62     {
63         if (pMem)
64         {
65             SWR_ASSUME_ASSERT(pMem->blockSize < size_t(0xdddddddd));
66             AlignedFree(pMem);
67         }
68     }
69 };
70 
71 // Caching Allocator for Arena
72 template <uint32_t NumBucketsT = 8, uint32_t StartBucketBitT = 12>
73 struct CachingAllocatorT : DefaultAllocator
74 {
AllocateAlignedCachingAllocatorT75     ArenaBlock* AllocateAligned(size_t size, size_t align)
76     {
77         SWR_ASSUME_ASSERT(size >= sizeof(ArenaBlock));
78         SWR_ASSUME_ASSERT(size <= uint32_t(-1));
79 
80         uint32_t bucket = GetBucketId(size);
81 
82         {
83             // search cached blocks
84             std::lock_guard<std::mutex> l(m_mutex);
85             ArenaBlock*                 pPrevBlock = &m_cachedBlocks[bucket];
86             ArenaBlock*                 pBlock     = SearchBlocks(pPrevBlock, size, align);
87 
88             if (pBlock)
89             {
90                 m_cachedSize -= pBlock->blockSize;
91                 if (pBlock == m_pLastCachedBlocks[bucket])
92                 {
93                     m_pLastCachedBlocks[bucket] = pPrevBlock;
94                 }
95             }
96             else
97             {
98                 pPrevBlock = &m_oldCachedBlocks[bucket];
99                 pBlock     = SearchBlocks(pPrevBlock, size, align);
100 
101                 if (pBlock)
102                 {
103                     m_oldCachedSize -= pBlock->blockSize;
104                     if (pBlock == m_pOldLastCachedBlocks[bucket])
105                     {
106                         m_pOldLastCachedBlocks[bucket] = pPrevBlock;
107                     }
108                 }
109             }
110 
111             if (pBlock)
112             {
113                 assert(pPrevBlock && pPrevBlock->pNext == pBlock);
114                 pPrevBlock->pNext = pBlock->pNext;
115                 pBlock->pNext     = nullptr;
116 
117                 return pBlock;
118             }
119 
120             m_totalAllocated += size;
121 
122 #if 0
123             {
124                 static uint32_t count = 0;
125                 char buf[128];
126                 sprintf_s(buf, "Arena Alloc %d 0x%llx bytes - 0x%llx total\n", ++count, uint64_t(size), uint64_t(m_totalAllocated));
127                 OutputDebugStringA(buf);
128             }
129 #endif
130         }
131 
132         if (bucket && bucket < (CACHE_NUM_BUCKETS - 1))
133         {
134             // Make all blocks in this bucket the same size
135             size = size_t(1) << (bucket + 1 + CACHE_START_BUCKET_BIT);
136         }
137 
138         return this->DefaultAllocator::AllocateAligned(size, align);
139     }
140 
FreeCachingAllocatorT141     void Free(ArenaBlock* pMem)
142     {
143         if (pMem)
144         {
145             std::unique_lock<std::mutex> l(m_mutex);
146             InsertCachedBlock(GetBucketId(pMem->blockSize), pMem);
147         }
148     }
149 
FreeOldBlocksCachingAllocatorT150     void FreeOldBlocks()
151     {
152         if (!m_cachedSize)
153         {
154             return;
155         }
156         std::lock_guard<std::mutex> l(m_mutex);
157 
158         bool doFree = (m_oldCachedSize > MAX_UNUSED_SIZE);
159 
160         for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
161         {
162             if (doFree)
163             {
164                 ArenaBlock* pBlock = m_oldCachedBlocks[i].pNext;
165                 while (pBlock)
166                 {
167                     ArenaBlock* pNext = pBlock->pNext;
168                     m_oldCachedSize -= pBlock->blockSize;
169                     m_totalAllocated -= pBlock->blockSize;
170                     this->DefaultAllocator::Free(pBlock);
171                     pBlock = pNext;
172                 }
173                 m_oldCachedBlocks[i].pNext = nullptr;
174                 m_pOldLastCachedBlocks[i]  = &m_oldCachedBlocks[i];
175             }
176 
177             if (m_pLastCachedBlocks[i] != &m_cachedBlocks[i])
178             {
179                 if (i && i < (CACHE_NUM_BUCKETS - 1))
180                 {
181                     // We know that all blocks are the same size.
182                     // Just move the list over.
183                     m_pLastCachedBlocks[i]->pNext = m_oldCachedBlocks[i].pNext;
184                     m_oldCachedBlocks[i].pNext    = m_cachedBlocks[i].pNext;
185                     m_cachedBlocks[i].pNext       = nullptr;
186                     if (m_pOldLastCachedBlocks[i]->pNext)
187                     {
188                         m_pOldLastCachedBlocks[i] = m_pLastCachedBlocks[i];
189                     }
190                     m_pLastCachedBlocks[i] = &m_cachedBlocks[i];
191                 }
192                 else
193                 {
194                     // The end buckets can have variable sized lists.
195                     // Insert each block based on size
196                     ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
197                     while (pBlock)
198                     {
199                         ArenaBlock* pNext = pBlock->pNext;
200                         pBlock->pNext     = nullptr;
201                         m_cachedSize -= pBlock->blockSize;
202                         InsertCachedBlock<true>(i, pBlock);
203                         pBlock = pNext;
204                     }
205 
206                     m_pLastCachedBlocks[i]  = &m_cachedBlocks[i];
207                     m_cachedBlocks[i].pNext = nullptr;
208                 }
209             }
210         }
211 
212         m_oldCachedSize += m_cachedSize;
213         m_cachedSize = 0;
214     }
215 
CachingAllocatorTCachingAllocatorT216     CachingAllocatorT()
217     {
218         for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
219         {
220             m_pLastCachedBlocks[i]    = &m_cachedBlocks[i];
221             m_pOldLastCachedBlocks[i] = &m_oldCachedBlocks[i];
222         }
223     }
224 
~CachingAllocatorTCachingAllocatorT225     ~CachingAllocatorT()
226     {
227         // Free all cached blocks
228         for (uint32_t i = 0; i < CACHE_NUM_BUCKETS; ++i)
229         {
230             ArenaBlock* pBlock = m_cachedBlocks[i].pNext;
231             while (pBlock)
232             {
233                 ArenaBlock* pNext = pBlock->pNext;
234                 this->DefaultAllocator::Free(pBlock);
235                 pBlock = pNext;
236             }
237             pBlock = m_oldCachedBlocks[i].pNext;
238             while (pBlock)
239             {
240                 ArenaBlock* pNext = pBlock->pNext;
241                 this->DefaultAllocator::Free(pBlock);
242                 pBlock = pNext;
243             }
244         }
245     }
246 
247 private:
GetBucketIdCachingAllocatorT248     static uint32_t GetBucketId(size_t blockSize)
249     {
250         uint32_t bucketId = 0;
251 
252 #if defined(BitScanReverseSizeT)
253         BitScanReverseSizeT((unsigned long*)&bucketId, (blockSize - 1) >> CACHE_START_BUCKET_BIT);
254         bucketId = std::min<uint32_t>(bucketId, CACHE_NUM_BUCKETS - 1);
255 #endif
256 
257         return bucketId;
258     }
259 
260     template <bool OldBlockT = false>
InsertCachedBlockCachingAllocatorT261     void InsertCachedBlock(uint32_t bucketId, ArenaBlock* pNewBlock)
262     {
263         SWR_ASSUME_ASSERT(bucketId < CACHE_NUM_BUCKETS);
264 
265         ArenaBlock* pPrevBlock =
266             OldBlockT ? &m_oldCachedBlocks[bucketId] : &m_cachedBlocks[bucketId];
267         ArenaBlock* pBlock = pPrevBlock->pNext;
268 
269         while (pBlock)
270         {
271             if (pNewBlock->blockSize >= pBlock->blockSize)
272             {
273                 // Insert here
274                 break;
275             }
276             pPrevBlock = pBlock;
277             pBlock     = pBlock->pNext;
278         }
279 
280         // Insert into list
281         SWR_ASSUME_ASSERT(pPrevBlock);
282         pPrevBlock->pNext = pNewBlock;
283         pNewBlock->pNext  = pBlock;
284 
285         if (OldBlockT)
286         {
287             if (m_pOldLastCachedBlocks[bucketId] == pPrevBlock)
288             {
289                 m_pOldLastCachedBlocks[bucketId] = pNewBlock;
290             }
291 
292             m_oldCachedSize += pNewBlock->blockSize;
293         }
294         else
295         {
296             if (m_pLastCachedBlocks[bucketId] == pPrevBlock)
297             {
298                 m_pLastCachedBlocks[bucketId] = pNewBlock;
299             }
300 
301             m_cachedSize += pNewBlock->blockSize;
302         }
303     }
304 
SearchBlocksCachingAllocatorT305     static ArenaBlock* SearchBlocks(ArenaBlock*& pPrevBlock, size_t blockSize, size_t align)
306     {
307         ArenaBlock* pBlock          = pPrevBlock->pNext;
308         ArenaBlock* pPotentialBlock = nullptr;
309         ArenaBlock* pPotentialPrev  = nullptr;
310 
311         while (pBlock)
312         {
313             if (pBlock->blockSize >= blockSize)
314             {
315                 if (pBlock == AlignUp(pBlock, align))
316                 {
317                     if (pBlock->blockSize == blockSize)
318                     {
319                         // Won't find a better match
320                         break;
321                     }
322 
323                     // We could use this as it is larger than we wanted, but
324                     // continue to search for a better match
325                     pPotentialBlock = pBlock;
326                     pPotentialPrev  = pPrevBlock;
327                 }
328             }
329             else
330             {
331                 // Blocks are sorted by size (biggest first)
332                 // So, if we get here, there are no blocks
333                 // large enough, fall through to allocation.
334                 pBlock = nullptr;
335                 break;
336             }
337 
338             pPrevBlock = pBlock;
339             pBlock     = pBlock->pNext;
340         }
341 
342         if (!pBlock)
343         {
344             // Couldn't find an exact match, use next biggest size
345             pBlock     = pPotentialBlock;
346             pPrevBlock = pPotentialPrev;
347         }
348 
349         return pBlock;
350     }
351 
352     // buckets, for block sizes < (1 << (start+1)), < (1 << (start+2)), ...
353     static const uint32_t CACHE_NUM_BUCKETS      = NumBucketsT;
354     static const uint32_t CACHE_START_BUCKET_BIT = StartBucketBitT;
355     static const size_t   MAX_UNUSED_SIZE        = sizeof(MEGABYTE);
356 
357     ArenaBlock  m_cachedBlocks[CACHE_NUM_BUCKETS];
358     ArenaBlock* m_pLastCachedBlocks[CACHE_NUM_BUCKETS];
359     ArenaBlock  m_oldCachedBlocks[CACHE_NUM_BUCKETS];
360     ArenaBlock* m_pOldLastCachedBlocks[CACHE_NUM_BUCKETS];
361     std::mutex  m_mutex;
362 
363     size_t m_totalAllocated = 0;
364 
365     size_t m_cachedSize    = 0;
366     size_t m_oldCachedSize = 0;
367 };
368 typedef CachingAllocatorT<> CachingAllocator;
369 
370 template <typename T = DefaultAllocator, size_t BlockSizeT = 128 * sizeof(KILOBYTE)>
371 class TArena
372 {
373 public:
TArena(T & in_allocator)374     TArena(T& in_allocator) : m_allocator(in_allocator) {}
TArena()375     TArena() : m_allocator(m_defAllocator) {}
~TArena()376     ~TArena() { Reset(true); }
377 
AllocAligned(size_t size,size_t align)378     void* AllocAligned(size_t size, size_t align)
379     {
380         if (0 == size)
381         {
382             return nullptr;
383         }
384 
385         SWR_ASSERT(align <= ARENA_BLOCK_ALIGN);
386 
387         if (m_pCurBlock)
388         {
389             ArenaBlock* pCurBlock = m_pCurBlock;
390             size_t      offset    = AlignUp(m_offset, align);
391 
392             if ((offset + size) <= pCurBlock->blockSize)
393             {
394                 void* pMem = PtrAdd(pCurBlock, offset);
395                 m_offset   = offset + size;
396                 return pMem;
397             }
398 
399             // Not enough memory in this block, fall through to allocate
400             // a new block
401         }
402 
403         static const size_t ArenaBlockSize = BlockSizeT;
404         size_t              blockSize      = std::max(size + ARENA_BLOCK_ALIGN, ArenaBlockSize);
405 
406         // Add in one BLOCK_ALIGN unit to store ArenaBlock in.
407         blockSize = AlignUp(blockSize, ARENA_BLOCK_ALIGN);
408 
409         ArenaBlock* pNewBlock = m_allocator.AllocateAligned(
410             blockSize, ARENA_BLOCK_ALIGN); // Arena blocks are always simd byte aligned.
411         SWR_ASSERT(pNewBlock != nullptr);
412 
413         if (pNewBlock != nullptr)
414         {
415             m_offset         = ARENA_BLOCK_ALIGN;
416             pNewBlock->pNext = m_pCurBlock;
417 
418             m_pCurBlock = pNewBlock;
419         }
420 
421         return AllocAligned(size, align);
422     }
423 
Alloc(size_t size)424     void* Alloc(size_t size) { return AllocAligned(size, 1); }
425 
AllocAlignedSync(size_t size,size_t align)426     void* AllocAlignedSync(size_t size, size_t align)
427     {
428         void* pAlloc = nullptr;
429 
430         m_mutex.lock();
431         pAlloc = AllocAligned(size, align);
432         m_mutex.unlock();
433 
434         return pAlloc;
435     }
436 
AllocSync(size_t size)437     void* AllocSync(size_t size)
438     {
439         void* pAlloc = nullptr;
440 
441         m_mutex.lock();
442         pAlloc = Alloc(size);
443         m_mutex.unlock();
444 
445         return pAlloc;
446     }
447 
448     void Reset(bool removeAll = false)
449     {
450         m_offset = ARENA_BLOCK_ALIGN;
451 
452         if (m_pCurBlock)
453         {
454             ArenaBlock* pUsedBlocks = m_pCurBlock->pNext;
455             m_pCurBlock->pNext      = nullptr;
456             while (pUsedBlocks)
457             {
458                 ArenaBlock* pBlock = pUsedBlocks;
459                 pUsedBlocks        = pBlock->pNext;
460 
461                 m_allocator.Free(pBlock);
462             }
463 
464             if (removeAll)
465             {
466                 m_allocator.Free(m_pCurBlock);
467                 m_pCurBlock = nullptr;
468             }
469         }
470     }
471 
IsEmpty()472     bool IsEmpty()
473     {
474         return (m_pCurBlock == nullptr) ||
475                (m_offset == ARENA_BLOCK_ALIGN && m_pCurBlock->pNext == nullptr);
476     }
477 
478 private:
479     ArenaBlock* m_pCurBlock = nullptr;
480     size_t      m_offset    = ARENA_BLOCK_ALIGN;
481 
482     /// @note Mutex is only used by sync allocation functions.
483     std::mutex m_mutex;
484 
485     DefaultAllocator m_defAllocator;
486     T&               m_allocator;
487 };
488 
489 using StdArena     = TArena<DefaultAllocator>;
490 using CachingArena = TArena<CachingAllocator>;
491