1 //===-- primary64.h ---------------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef SCUDO_PRIMARY64_H_
10 #define SCUDO_PRIMARY64_H_
11 
12 #include "bytemap.h"
13 #include "common.h"
14 #include "list.h"
15 #include "local_cache.h"
16 #include "memtag.h"
17 #include "options.h"
18 #include "release.h"
19 #include "stats.h"
20 #include "string_utils.h"
21 
22 namespace scudo {
23 
24 // SizeClassAllocator64 is an allocator tuned for 64-bit address space.
25 //
26 // It starts by reserving NumClasses * 2^RegionSizeLog bytes, equally divided in
27 // Regions, specific to each size class. Note that the base of that mapping is
28 // random (based to the platform specific map() capabilities), and that each
29 // Region actually starts at a random offset from its base.
30 //
31 // Regions are mapped incrementally on demand to fulfill allocation requests,
32 // those mappings being split into equally sized Blocks based on the size class
33 // they belong to. The Blocks created are shuffled to prevent predictable
34 // address patterns (the predictability increases with the size of the Blocks).
35 //
36 // The 1st Region (for size class 0) holds the TransferBatches. This is a
37 // structure used to transfer arrays of available pointers from the class size
38 // freelist to the thread specific freelist, and back.
39 //
40 // The memory used by this allocator is never unmapped, but can be partially
41 // released if the platform allows for it.
42 
43 template <class SizeClassMapT, uptr RegionSizeLog,
44           s32 MinReleaseToOsIntervalMs = INT32_MIN,
45           s32 MaxReleaseToOsIntervalMs = INT32_MAX,
46           bool MaySupportMemoryTagging = false>
47 class SizeClassAllocator64 {
48 public:
49   typedef SizeClassMapT SizeClassMap;
50   typedef SizeClassAllocator64<
51       SizeClassMap, RegionSizeLog, MinReleaseToOsIntervalMs,
52       MaxReleaseToOsIntervalMs, MaySupportMemoryTagging>
53       ThisT;
54   typedef SizeClassAllocatorLocalCache<ThisT> CacheT;
55   typedef typename CacheT::TransferBatch TransferBatch;
56   static const bool SupportsMemoryTagging =
57       MaySupportMemoryTagging && archSupportsMemoryTagging();
58 
getSizeByClassId(uptr ClassId)59   static uptr getSizeByClassId(uptr ClassId) {
60     return (ClassId == SizeClassMap::BatchClassId)
61                ? sizeof(TransferBatch)
62                : SizeClassMap::getSizeByClassId(ClassId);
63   }
64 
canAllocate(uptr Size)65   static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; }
66 
initLinkerInitialized(s32 ReleaseToOsInterval)67   void initLinkerInitialized(s32 ReleaseToOsInterval) {
68     // Reserve the space required for the Primary.
69     PrimaryBase = reinterpret_cast<uptr>(
70         map(nullptr, PrimarySize, "scudo:primary", MAP_NOACCESS, &Data));
71 
72     u32 Seed;
73     const u64 Time = getMonotonicTime();
74     if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed))))
75       Seed = static_cast<u32>(Time ^ (PrimaryBase >> 12));
76     const uptr PageSize = getPageSizeCached();
77     for (uptr I = 0; I < NumClasses; I++) {
78       RegionInfo *Region = getRegionInfo(I);
79       // The actual start of a region is offseted by a random number of pages.
80       Region->RegionBeg =
81           getRegionBaseByClassId(I) + (getRandomModN(&Seed, 16) + 1) * PageSize;
82       Region->RandState = getRandomU32(&Seed);
83       // Releasing smaller size classes doesn't necessarily yield to a
84       // meaningful RSS impact: there are more blocks per page, they are
85       // randomized around, and thus pages are less likely to be entirely empty.
86       // On top of this, attempting to release those require more iterations and
87       // memory accesses which ends up being fairly costly. The current lower
88       // limit is mostly arbitrary and based on empirical observations.
89       // TODO(kostyak): make the lower limit a runtime option
90       Region->CanRelease = (I != SizeClassMap::BatchClassId) &&
91                            (getSizeByClassId(I) >= (PageSize / 32));
92       if (Region->CanRelease)
93         Region->ReleaseInfo.LastReleaseAtNs = Time;
94     }
95     setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval));
96 
97     if (SupportsMemoryTagging && systemSupportsMemoryTagging())
98       Options.set(OptionBit::UseMemoryTagging);
99   }
init(s32 ReleaseToOsInterval)100   void init(s32 ReleaseToOsInterval) {
101     memset(this, 0, sizeof(*this));
102     initLinkerInitialized(ReleaseToOsInterval);
103   }
104 
unmapTestOnly()105   void unmapTestOnly() {
106     unmap(reinterpret_cast<void *>(PrimaryBase), PrimarySize, UNMAP_ALL, &Data);
107   }
108 
popBatch(CacheT * C,uptr ClassId)109   TransferBatch *popBatch(CacheT *C, uptr ClassId) {
110     DCHECK_LT(ClassId, NumClasses);
111     RegionInfo *Region = getRegionInfo(ClassId);
112     ScopedLock L(Region->Mutex);
113     TransferBatch *B = Region->FreeList.front();
114     if (B) {
115       Region->FreeList.pop_front();
116     } else {
117       B = populateFreeList(C, ClassId, Region);
118       if (UNLIKELY(!B))
119         return nullptr;
120     }
121     DCHECK_GT(B->getCount(), 0);
122     Region->Stats.PoppedBlocks += B->getCount();
123     return B;
124   }
125 
pushBatch(uptr ClassId,TransferBatch * B)126   void pushBatch(uptr ClassId, TransferBatch *B) {
127     DCHECK_GT(B->getCount(), 0);
128     RegionInfo *Region = getRegionInfo(ClassId);
129     ScopedLock L(Region->Mutex);
130     Region->FreeList.push_front(B);
131     Region->Stats.PushedBlocks += B->getCount();
132     if (Region->CanRelease)
133       releaseToOSMaybe(Region, ClassId);
134   }
135 
disable()136   void disable() {
137     // The BatchClassId must be locked last since other classes can use it.
138     for (sptr I = static_cast<sptr>(NumClasses) - 1; I >= 0; I--) {
139       if (static_cast<uptr>(I) == SizeClassMap::BatchClassId)
140         continue;
141       getRegionInfo(static_cast<uptr>(I))->Mutex.lock();
142     }
143     getRegionInfo(SizeClassMap::BatchClassId)->Mutex.lock();
144   }
145 
enable()146   void enable() {
147     getRegionInfo(SizeClassMap::BatchClassId)->Mutex.unlock();
148     for (uptr I = 0; I < NumClasses; I++) {
149       if (I == SizeClassMap::BatchClassId)
150         continue;
151       getRegionInfo(I)->Mutex.unlock();
152     }
153   }
154 
iterateOverBlocks(F Callback)155   template <typename F> void iterateOverBlocks(F Callback) {
156     for (uptr I = 0; I < NumClasses; I++) {
157       if (I == SizeClassMap::BatchClassId)
158         continue;
159       const RegionInfo *Region = getRegionInfo(I);
160       const uptr BlockSize = getSizeByClassId(I);
161       const uptr From = Region->RegionBeg;
162       const uptr To = From + Region->AllocatedUser;
163       for (uptr Block = From; Block < To; Block += BlockSize)
164         Callback(Block);
165     }
166   }
167 
getStats(ScopedString * Str)168   void getStats(ScopedString *Str) {
169     // TODO(kostyak): get the RSS per region.
170     uptr TotalMapped = 0;
171     uptr PoppedBlocks = 0;
172     uptr PushedBlocks = 0;
173     for (uptr I = 0; I < NumClasses; I++) {
174       RegionInfo *Region = getRegionInfo(I);
175       if (Region->MappedUser)
176         TotalMapped += Region->MappedUser;
177       PoppedBlocks += Region->Stats.PoppedBlocks;
178       PushedBlocks += Region->Stats.PushedBlocks;
179     }
180     Str->append("Stats: SizeClassAllocator64: %zuM mapped (%zuM rss) in %zu "
181                 "allocations; remains %zu\n",
182                 TotalMapped >> 20, 0, PoppedBlocks,
183                 PoppedBlocks - PushedBlocks);
184 
185     for (uptr I = 0; I < NumClasses; I++)
186       getStats(Str, I, 0);
187   }
188 
setOption(Option O,sptr Value)189   bool setOption(Option O, sptr Value) {
190     if (O == Option::ReleaseInterval) {
191       const s32 Interval =
192           Max(Min(static_cast<s32>(Value), MaxReleaseToOsIntervalMs),
193               MinReleaseToOsIntervalMs);
194       atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval);
195       return true;
196     }
197     // Not supported by the Primary, but not an error either.
198     return true;
199   }
200 
releaseToOS()201   uptr releaseToOS() {
202     uptr TotalReleasedBytes = 0;
203     for (uptr I = 0; I < NumClasses; I++) {
204       RegionInfo *Region = getRegionInfo(I);
205       ScopedLock L(Region->Mutex);
206       TotalReleasedBytes += releaseToOSMaybe(Region, I, /*Force=*/true);
207     }
208     return TotalReleasedBytes;
209   }
210 
useMemoryTagging(Options Options)211   static bool useMemoryTagging(Options Options) {
212     return SupportsMemoryTagging && Options.get(OptionBit::UseMemoryTagging);
213   }
disableMemoryTagging()214   void disableMemoryTagging() { Options.clear(OptionBit::UseMemoryTagging); }
215 
getRegionInfoArrayAddress()216   const char *getRegionInfoArrayAddress() const {
217     return reinterpret_cast<const char *>(RegionInfoArray);
218   }
219 
getRegionInfoArraySize()220   static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); }
221 
findNearestBlock(const char * RegionInfoData,uptr Ptr)222   static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) {
223     const RegionInfo *RegionInfoArray =
224         reinterpret_cast<const RegionInfo *>(RegionInfoData);
225     uptr ClassId;
226     uptr MinDistance = -1UL;
227     for (uptr I = 0; I != NumClasses; ++I) {
228       if (I == SizeClassMap::BatchClassId)
229         continue;
230       uptr Begin = RegionInfoArray[I].RegionBeg;
231       uptr End = Begin + RegionInfoArray[I].AllocatedUser;
232       if (Begin > End || End - Begin < SizeClassMap::getSizeByClassId(I))
233         continue;
234       uptr RegionDistance;
235       if (Begin <= Ptr) {
236         if (Ptr < End)
237           RegionDistance = 0;
238         else
239           RegionDistance = Ptr - End;
240       } else {
241         RegionDistance = Begin - Ptr;
242       }
243 
244       if (RegionDistance < MinDistance) {
245         MinDistance = RegionDistance;
246         ClassId = I;
247       }
248     }
249 
250     BlockInfo B = {};
251     if (MinDistance <= 8192) {
252       B.RegionBegin = RegionInfoArray[ClassId].RegionBeg;
253       B.RegionEnd = B.RegionBegin + RegionInfoArray[ClassId].AllocatedUser;
254       B.BlockSize = SizeClassMap::getSizeByClassId(ClassId);
255       B.BlockBegin =
256           B.RegionBegin + uptr(sptr(Ptr - B.RegionBegin) / sptr(B.BlockSize) *
257                                sptr(B.BlockSize));
258       while (B.BlockBegin < B.RegionBegin)
259         B.BlockBegin += B.BlockSize;
260       while (B.RegionEnd < B.BlockBegin + B.BlockSize)
261         B.BlockBegin -= B.BlockSize;
262     }
263     return B;
264   }
265 
266   AtomicOptions Options;
267 
268 private:
269   static const uptr RegionSize = 1UL << RegionSizeLog;
270   static const uptr NumClasses = SizeClassMap::NumClasses;
271   static const uptr PrimarySize = RegionSize * NumClasses;
272 
273   // Call map for user memory with at least this size.
274   static const uptr MapSizeIncrement = 1UL << 18;
275   // Fill at most this number of batches from the newly map'd memory.
276   static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U;
277 
278   struct RegionStats {
279     uptr PoppedBlocks;
280     uptr PushedBlocks;
281   };
282 
283   struct ReleaseToOsInfo {
284     uptr PushedBlocksAtLastRelease;
285     uptr RangesReleased;
286     uptr LastReleasedBytes;
287     u64 LastReleaseAtNs;
288   };
289 
290   struct UnpaddedRegionInfo {
291     HybridMutex Mutex;
292     SinglyLinkedList<TransferBatch> FreeList;
293     RegionStats Stats;
294     bool CanRelease;
295     bool Exhausted;
296     u32 RandState;
297     uptr RegionBeg;
298     uptr MappedUser;    // Bytes mapped for user memory.
299     uptr AllocatedUser; // Bytes allocated for user memory.
300     MapPlatformData Data;
301     ReleaseToOsInfo ReleaseInfo;
302   };
303   struct RegionInfo : UnpaddedRegionInfo {
304     char Padding[SCUDO_CACHE_LINE_SIZE -
305                  (sizeof(UnpaddedRegionInfo) % SCUDO_CACHE_LINE_SIZE)];
306   };
307   static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, "");
308 
309   uptr PrimaryBase;
310   MapPlatformData Data;
311   atomic_s32 ReleaseToOsIntervalMs;
alignas(SCUDO_CACHE_LINE_SIZE)312   alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses];
313 
314   RegionInfo *getRegionInfo(uptr ClassId) {
315     DCHECK_LT(ClassId, NumClasses);
316     return &RegionInfoArray[ClassId];
317   }
318 
getRegionBaseByClassId(uptr ClassId)319   uptr getRegionBaseByClassId(uptr ClassId) const {
320     return PrimaryBase + (ClassId << RegionSizeLog);
321   }
322 
populateFreeList(CacheT * C,uptr ClassId,RegionInfo * Region)323   NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId,
324                                            RegionInfo *Region) {
325     const uptr Size = getSizeByClassId(ClassId);
326     const u32 MaxCount = TransferBatch::getMaxCached(Size);
327 
328     const uptr RegionBeg = Region->RegionBeg;
329     const uptr MappedUser = Region->MappedUser;
330     const uptr TotalUserBytes = Region->AllocatedUser + MaxCount * Size;
331     // Map more space for blocks, if necessary.
332     if (UNLIKELY(TotalUserBytes > MappedUser)) {
333       // Do the mmap for the user memory.
334       const uptr UserMapSize =
335           roundUpTo(TotalUserBytes - MappedUser, MapSizeIncrement);
336       const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId);
337       if (RegionBase + MappedUser + UserMapSize > RegionSize) {
338         if (!Region->Exhausted) {
339           Region->Exhausted = true;
340           ScopedString Str(1024);
341           getStats(&Str);
342           Str.append(
343               "Scudo OOM: The process has exhausted %zuM for size class %zu.\n",
344               RegionSize >> 20, Size);
345           Str.output();
346         }
347         return nullptr;
348       }
349       if (MappedUser == 0)
350         Region->Data = Data;
351       if (!map(reinterpret_cast<void *>(RegionBeg + MappedUser), UserMapSize,
352                "scudo:primary",
353                MAP_ALLOWNOMEM | MAP_RESIZABLE |
354                    (useMemoryTagging(Options.load()) ? MAP_MEMTAG : 0),
355                &Region->Data))
356         return nullptr;
357       Region->MappedUser += UserMapSize;
358       C->getStats().add(StatMapped, UserMapSize);
359     }
360 
361     const u32 NumberOfBlocks = Min(
362         MaxNumBatches * MaxCount,
363         static_cast<u32>((Region->MappedUser - Region->AllocatedUser) / Size));
364     DCHECK_GT(NumberOfBlocks, 0);
365 
366     constexpr u32 ShuffleArraySize =
367         MaxNumBatches * TransferBatch::MaxNumCached;
368     void *ShuffleArray[ShuffleArraySize];
369     DCHECK_LE(NumberOfBlocks, ShuffleArraySize);
370 
371     uptr P = RegionBeg + Region->AllocatedUser;
372     for (u32 I = 0; I < NumberOfBlocks; I++, P += Size)
373       ShuffleArray[I] = reinterpret_cast<void *>(P);
374     // No need to shuffle the batches size class.
375     if (ClassId != SizeClassMap::BatchClassId)
376       shuffle(ShuffleArray, NumberOfBlocks, &Region->RandState);
377     for (u32 I = 0; I < NumberOfBlocks;) {
378       TransferBatch *B = C->createBatch(ClassId, ShuffleArray[I]);
379       if (UNLIKELY(!B))
380         return nullptr;
381       const u32 N = Min(MaxCount, NumberOfBlocks - I);
382       B->setFromArray(&ShuffleArray[I], N);
383       Region->FreeList.push_back(B);
384       I += N;
385     }
386     TransferBatch *B = Region->FreeList.front();
387     Region->FreeList.pop_front();
388     DCHECK(B);
389     DCHECK_GT(B->getCount(), 0);
390 
391     const uptr AllocatedUser = Size * NumberOfBlocks;
392     C->getStats().add(StatFree, AllocatedUser);
393     Region->AllocatedUser += AllocatedUser;
394 
395     return B;
396   }
397 
getStats(ScopedString * Str,uptr ClassId,uptr Rss)398   void getStats(ScopedString *Str, uptr ClassId, uptr Rss) {
399     RegionInfo *Region = getRegionInfo(ClassId);
400     if (Region->MappedUser == 0)
401       return;
402     const uptr InUse = Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks;
403     const uptr TotalChunks = Region->AllocatedUser / getSizeByClassId(ClassId);
404     Str->append("%s %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu "
405                 "inuse: %6zu total: %6zu rss: %6zuK releases: %6zu last "
406                 "released: %6zuK region: 0x%zx (0x%zx)\n",
407                 Region->Exhausted ? "F" : " ", ClassId,
408                 getSizeByClassId(ClassId), Region->MappedUser >> 10,
409                 Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks, InUse,
410                 TotalChunks, Rss >> 10, Region->ReleaseInfo.RangesReleased,
411                 Region->ReleaseInfo.LastReleasedBytes >> 10, Region->RegionBeg,
412                 getRegionBaseByClassId(ClassId));
413   }
414 
415   NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId,
416                                  bool Force = false) {
417     const uptr BlockSize = getSizeByClassId(ClassId);
418     const uptr PageSize = getPageSizeCached();
419 
420     CHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks);
421     const uptr BytesInFreeList =
422         Region->AllocatedUser -
423         (Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize;
424     if (BytesInFreeList < PageSize)
425       return 0; // No chance to release anything.
426     const uptr BytesPushed = (Region->Stats.PushedBlocks -
427                               Region->ReleaseInfo.PushedBlocksAtLastRelease) *
428                              BlockSize;
429     if (BytesPushed < PageSize)
430       return 0; // Nothing new to release.
431 
432     // Releasing smaller blocks is expensive, so we want to make sure that a
433     // significant amount of bytes are free, and that there has been a good
434     // amount of batches pushed to the freelist before attempting to release.
435     if (BlockSize < PageSize / 16U) {
436       if (!Force && BytesPushed < Region->AllocatedUser / 16U)
437         return 0;
438       // We want 8x% to 9x% free bytes (the larger the bock, the lower the %).
439       if ((BytesInFreeList * 100U) / Region->AllocatedUser <
440           (100U - 1U - BlockSize / 16U))
441         return 0;
442     }
443 
444     if (!Force) {
445       const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs);
446       if (IntervalMs < 0)
447         return 0;
448       if (Region->ReleaseInfo.LastReleaseAtNs +
449               static_cast<u64>(IntervalMs) * 1000000 >
450           getMonotonicTime()) {
451         return 0; // Memory was returned recently.
452       }
453     }
454 
455     auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; };
456     ReleaseRecorder Recorder(Region->RegionBeg, &Region->Data);
457     releaseFreeMemoryToOS(Region->FreeList, Region->RegionBeg,
458                           Region->AllocatedUser, 1U, BlockSize, &Recorder,
459                           SkipRegion);
460 
461     if (Recorder.getReleasedRangesCount() > 0) {
462       Region->ReleaseInfo.PushedBlocksAtLastRelease =
463           Region->Stats.PushedBlocks;
464       Region->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount();
465       Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes();
466     }
467     Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTime();
468     return Recorder.getReleasedBytes();
469   }
470 };
471 
472 } // namespace scudo
473 
474 #endif // SCUDO_PRIMARY64_H_
475