1 //===-- primary64.h ---------------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #ifndef SCUDO_PRIMARY64_H_ 10 #define SCUDO_PRIMARY64_H_ 11 12 #include "bytemap.h" 13 #include "common.h" 14 #include "list.h" 15 #include "local_cache.h" 16 #include "memtag.h" 17 #include "options.h" 18 #include "release.h" 19 #include "stats.h" 20 #include "string_utils.h" 21 22 namespace scudo { 23 24 // SizeClassAllocator64 is an allocator tuned for 64-bit address space. 25 // 26 // It starts by reserving NumClasses * 2^RegionSizeLog bytes, equally divided in 27 // Regions, specific to each size class. Note that the base of that mapping is 28 // random (based to the platform specific map() capabilities), and that each 29 // Region actually starts at a random offset from its base. 30 // 31 // Regions are mapped incrementally on demand to fulfill allocation requests, 32 // those mappings being split into equally sized Blocks based on the size class 33 // they belong to. The Blocks created are shuffled to prevent predictable 34 // address patterns (the predictability increases with the size of the Blocks). 35 // 36 // The 1st Region (for size class 0) holds the TransferBatches. This is a 37 // structure used to transfer arrays of available pointers from the class size 38 // freelist to the thread specific freelist, and back. 39 // 40 // The memory used by this allocator is never unmapped, but can be partially 41 // released if the platform allows for it. 42 43 template <class SizeClassMapT, uptr RegionSizeLog, 44 s32 MinReleaseToOsIntervalMs = INT32_MIN, 45 s32 MaxReleaseToOsIntervalMs = INT32_MAX, 46 bool MaySupportMemoryTagging = false> 47 class SizeClassAllocator64 { 48 public: 49 typedef SizeClassMapT SizeClassMap; 50 typedef SizeClassAllocator64< 51 SizeClassMap, RegionSizeLog, MinReleaseToOsIntervalMs, 52 MaxReleaseToOsIntervalMs, MaySupportMemoryTagging> 53 ThisT; 54 typedef SizeClassAllocatorLocalCache<ThisT> CacheT; 55 typedef typename CacheT::TransferBatch TransferBatch; 56 static const bool SupportsMemoryTagging = 57 MaySupportMemoryTagging && archSupportsMemoryTagging(); 58 getSizeByClassId(uptr ClassId)59 static uptr getSizeByClassId(uptr ClassId) { 60 return (ClassId == SizeClassMap::BatchClassId) 61 ? sizeof(TransferBatch) 62 : SizeClassMap::getSizeByClassId(ClassId); 63 } 64 canAllocate(uptr Size)65 static bool canAllocate(uptr Size) { return Size <= SizeClassMap::MaxSize; } 66 initLinkerInitialized(s32 ReleaseToOsInterval)67 void initLinkerInitialized(s32 ReleaseToOsInterval) { 68 // Reserve the space required for the Primary. 69 PrimaryBase = reinterpret_cast<uptr>( 70 map(nullptr, PrimarySize, "scudo:primary", MAP_NOACCESS, &Data)); 71 72 u32 Seed; 73 const u64 Time = getMonotonicTime(); 74 if (UNLIKELY(!getRandom(reinterpret_cast<void *>(&Seed), sizeof(Seed)))) 75 Seed = static_cast<u32>(Time ^ (PrimaryBase >> 12)); 76 const uptr PageSize = getPageSizeCached(); 77 for (uptr I = 0; I < NumClasses; I++) { 78 RegionInfo *Region = getRegionInfo(I); 79 // The actual start of a region is offseted by a random number of pages. 80 Region->RegionBeg = 81 getRegionBaseByClassId(I) + (getRandomModN(&Seed, 16) + 1) * PageSize; 82 Region->RandState = getRandomU32(&Seed); 83 // Releasing smaller size classes doesn't necessarily yield to a 84 // meaningful RSS impact: there are more blocks per page, they are 85 // randomized around, and thus pages are less likely to be entirely empty. 86 // On top of this, attempting to release those require more iterations and 87 // memory accesses which ends up being fairly costly. The current lower 88 // limit is mostly arbitrary and based on empirical observations. 89 // TODO(kostyak): make the lower limit a runtime option 90 Region->CanRelease = (I != SizeClassMap::BatchClassId) && 91 (getSizeByClassId(I) >= (PageSize / 32)); 92 if (Region->CanRelease) 93 Region->ReleaseInfo.LastReleaseAtNs = Time; 94 } 95 setOption(Option::ReleaseInterval, static_cast<sptr>(ReleaseToOsInterval)); 96 97 if (SupportsMemoryTagging && systemSupportsMemoryTagging()) 98 Options.set(OptionBit::UseMemoryTagging); 99 } init(s32 ReleaseToOsInterval)100 void init(s32 ReleaseToOsInterval) { 101 memset(this, 0, sizeof(*this)); 102 initLinkerInitialized(ReleaseToOsInterval); 103 } 104 unmapTestOnly()105 void unmapTestOnly() { 106 unmap(reinterpret_cast<void *>(PrimaryBase), PrimarySize, UNMAP_ALL, &Data); 107 } 108 popBatch(CacheT * C,uptr ClassId)109 TransferBatch *popBatch(CacheT *C, uptr ClassId) { 110 DCHECK_LT(ClassId, NumClasses); 111 RegionInfo *Region = getRegionInfo(ClassId); 112 ScopedLock L(Region->Mutex); 113 TransferBatch *B = Region->FreeList.front(); 114 if (B) { 115 Region->FreeList.pop_front(); 116 } else { 117 B = populateFreeList(C, ClassId, Region); 118 if (UNLIKELY(!B)) 119 return nullptr; 120 } 121 DCHECK_GT(B->getCount(), 0); 122 Region->Stats.PoppedBlocks += B->getCount(); 123 return B; 124 } 125 pushBatch(uptr ClassId,TransferBatch * B)126 void pushBatch(uptr ClassId, TransferBatch *B) { 127 DCHECK_GT(B->getCount(), 0); 128 RegionInfo *Region = getRegionInfo(ClassId); 129 ScopedLock L(Region->Mutex); 130 Region->FreeList.push_front(B); 131 Region->Stats.PushedBlocks += B->getCount(); 132 if (Region->CanRelease) 133 releaseToOSMaybe(Region, ClassId); 134 } 135 disable()136 void disable() { 137 // The BatchClassId must be locked last since other classes can use it. 138 for (sptr I = static_cast<sptr>(NumClasses) - 1; I >= 0; I--) { 139 if (static_cast<uptr>(I) == SizeClassMap::BatchClassId) 140 continue; 141 getRegionInfo(static_cast<uptr>(I))->Mutex.lock(); 142 } 143 getRegionInfo(SizeClassMap::BatchClassId)->Mutex.lock(); 144 } 145 enable()146 void enable() { 147 getRegionInfo(SizeClassMap::BatchClassId)->Mutex.unlock(); 148 for (uptr I = 0; I < NumClasses; I++) { 149 if (I == SizeClassMap::BatchClassId) 150 continue; 151 getRegionInfo(I)->Mutex.unlock(); 152 } 153 } 154 iterateOverBlocks(F Callback)155 template <typename F> void iterateOverBlocks(F Callback) { 156 for (uptr I = 0; I < NumClasses; I++) { 157 if (I == SizeClassMap::BatchClassId) 158 continue; 159 const RegionInfo *Region = getRegionInfo(I); 160 const uptr BlockSize = getSizeByClassId(I); 161 const uptr From = Region->RegionBeg; 162 const uptr To = From + Region->AllocatedUser; 163 for (uptr Block = From; Block < To; Block += BlockSize) 164 Callback(Block); 165 } 166 } 167 getStats(ScopedString * Str)168 void getStats(ScopedString *Str) { 169 // TODO(kostyak): get the RSS per region. 170 uptr TotalMapped = 0; 171 uptr PoppedBlocks = 0; 172 uptr PushedBlocks = 0; 173 for (uptr I = 0; I < NumClasses; I++) { 174 RegionInfo *Region = getRegionInfo(I); 175 if (Region->MappedUser) 176 TotalMapped += Region->MappedUser; 177 PoppedBlocks += Region->Stats.PoppedBlocks; 178 PushedBlocks += Region->Stats.PushedBlocks; 179 } 180 Str->append("Stats: SizeClassAllocator64: %zuM mapped (%zuM rss) in %zu " 181 "allocations; remains %zu\n", 182 TotalMapped >> 20, 0, PoppedBlocks, 183 PoppedBlocks - PushedBlocks); 184 185 for (uptr I = 0; I < NumClasses; I++) 186 getStats(Str, I, 0); 187 } 188 setOption(Option O,sptr Value)189 bool setOption(Option O, sptr Value) { 190 if (O == Option::ReleaseInterval) { 191 const s32 Interval = 192 Max(Min(static_cast<s32>(Value), MaxReleaseToOsIntervalMs), 193 MinReleaseToOsIntervalMs); 194 atomic_store_relaxed(&ReleaseToOsIntervalMs, Interval); 195 return true; 196 } 197 // Not supported by the Primary, but not an error either. 198 return true; 199 } 200 releaseToOS()201 uptr releaseToOS() { 202 uptr TotalReleasedBytes = 0; 203 for (uptr I = 0; I < NumClasses; I++) { 204 RegionInfo *Region = getRegionInfo(I); 205 ScopedLock L(Region->Mutex); 206 TotalReleasedBytes += releaseToOSMaybe(Region, I, /*Force=*/true); 207 } 208 return TotalReleasedBytes; 209 } 210 useMemoryTagging(Options Options)211 static bool useMemoryTagging(Options Options) { 212 return SupportsMemoryTagging && Options.get(OptionBit::UseMemoryTagging); 213 } disableMemoryTagging()214 void disableMemoryTagging() { Options.clear(OptionBit::UseMemoryTagging); } 215 getRegionInfoArrayAddress()216 const char *getRegionInfoArrayAddress() const { 217 return reinterpret_cast<const char *>(RegionInfoArray); 218 } 219 getRegionInfoArraySize()220 static uptr getRegionInfoArraySize() { return sizeof(RegionInfoArray); } 221 findNearestBlock(const char * RegionInfoData,uptr Ptr)222 static BlockInfo findNearestBlock(const char *RegionInfoData, uptr Ptr) { 223 const RegionInfo *RegionInfoArray = 224 reinterpret_cast<const RegionInfo *>(RegionInfoData); 225 uptr ClassId; 226 uptr MinDistance = -1UL; 227 for (uptr I = 0; I != NumClasses; ++I) { 228 if (I == SizeClassMap::BatchClassId) 229 continue; 230 uptr Begin = RegionInfoArray[I].RegionBeg; 231 uptr End = Begin + RegionInfoArray[I].AllocatedUser; 232 if (Begin > End || End - Begin < SizeClassMap::getSizeByClassId(I)) 233 continue; 234 uptr RegionDistance; 235 if (Begin <= Ptr) { 236 if (Ptr < End) 237 RegionDistance = 0; 238 else 239 RegionDistance = Ptr - End; 240 } else { 241 RegionDistance = Begin - Ptr; 242 } 243 244 if (RegionDistance < MinDistance) { 245 MinDistance = RegionDistance; 246 ClassId = I; 247 } 248 } 249 250 BlockInfo B = {}; 251 if (MinDistance <= 8192) { 252 B.RegionBegin = RegionInfoArray[ClassId].RegionBeg; 253 B.RegionEnd = B.RegionBegin + RegionInfoArray[ClassId].AllocatedUser; 254 B.BlockSize = SizeClassMap::getSizeByClassId(ClassId); 255 B.BlockBegin = 256 B.RegionBegin + uptr(sptr(Ptr - B.RegionBegin) / sptr(B.BlockSize) * 257 sptr(B.BlockSize)); 258 while (B.BlockBegin < B.RegionBegin) 259 B.BlockBegin += B.BlockSize; 260 while (B.RegionEnd < B.BlockBegin + B.BlockSize) 261 B.BlockBegin -= B.BlockSize; 262 } 263 return B; 264 } 265 266 AtomicOptions Options; 267 268 private: 269 static const uptr RegionSize = 1UL << RegionSizeLog; 270 static const uptr NumClasses = SizeClassMap::NumClasses; 271 static const uptr PrimarySize = RegionSize * NumClasses; 272 273 // Call map for user memory with at least this size. 274 static const uptr MapSizeIncrement = 1UL << 18; 275 // Fill at most this number of batches from the newly map'd memory. 276 static const u32 MaxNumBatches = SCUDO_ANDROID ? 4U : 8U; 277 278 struct RegionStats { 279 uptr PoppedBlocks; 280 uptr PushedBlocks; 281 }; 282 283 struct ReleaseToOsInfo { 284 uptr PushedBlocksAtLastRelease; 285 uptr RangesReleased; 286 uptr LastReleasedBytes; 287 u64 LastReleaseAtNs; 288 }; 289 290 struct UnpaddedRegionInfo { 291 HybridMutex Mutex; 292 SinglyLinkedList<TransferBatch> FreeList; 293 RegionStats Stats; 294 bool CanRelease; 295 bool Exhausted; 296 u32 RandState; 297 uptr RegionBeg; 298 uptr MappedUser; // Bytes mapped for user memory. 299 uptr AllocatedUser; // Bytes allocated for user memory. 300 MapPlatformData Data; 301 ReleaseToOsInfo ReleaseInfo; 302 }; 303 struct RegionInfo : UnpaddedRegionInfo { 304 char Padding[SCUDO_CACHE_LINE_SIZE - 305 (sizeof(UnpaddedRegionInfo) % SCUDO_CACHE_LINE_SIZE)]; 306 }; 307 static_assert(sizeof(RegionInfo) % SCUDO_CACHE_LINE_SIZE == 0, ""); 308 309 uptr PrimaryBase; 310 MapPlatformData Data; 311 atomic_s32 ReleaseToOsIntervalMs; alignas(SCUDO_CACHE_LINE_SIZE)312 alignas(SCUDO_CACHE_LINE_SIZE) RegionInfo RegionInfoArray[NumClasses]; 313 314 RegionInfo *getRegionInfo(uptr ClassId) { 315 DCHECK_LT(ClassId, NumClasses); 316 return &RegionInfoArray[ClassId]; 317 } 318 getRegionBaseByClassId(uptr ClassId)319 uptr getRegionBaseByClassId(uptr ClassId) const { 320 return PrimaryBase + (ClassId << RegionSizeLog); 321 } 322 populateFreeList(CacheT * C,uptr ClassId,RegionInfo * Region)323 NOINLINE TransferBatch *populateFreeList(CacheT *C, uptr ClassId, 324 RegionInfo *Region) { 325 const uptr Size = getSizeByClassId(ClassId); 326 const u32 MaxCount = TransferBatch::getMaxCached(Size); 327 328 const uptr RegionBeg = Region->RegionBeg; 329 const uptr MappedUser = Region->MappedUser; 330 const uptr TotalUserBytes = Region->AllocatedUser + MaxCount * Size; 331 // Map more space for blocks, if necessary. 332 if (UNLIKELY(TotalUserBytes > MappedUser)) { 333 // Do the mmap for the user memory. 334 const uptr UserMapSize = 335 roundUpTo(TotalUserBytes - MappedUser, MapSizeIncrement); 336 const uptr RegionBase = RegionBeg - getRegionBaseByClassId(ClassId); 337 if (RegionBase + MappedUser + UserMapSize > RegionSize) { 338 if (!Region->Exhausted) { 339 Region->Exhausted = true; 340 ScopedString Str(1024); 341 getStats(&Str); 342 Str.append( 343 "Scudo OOM: The process has exhausted %zuM for size class %zu.\n", 344 RegionSize >> 20, Size); 345 Str.output(); 346 } 347 return nullptr; 348 } 349 if (MappedUser == 0) 350 Region->Data = Data; 351 if (!map(reinterpret_cast<void *>(RegionBeg + MappedUser), UserMapSize, 352 "scudo:primary", 353 MAP_ALLOWNOMEM | MAP_RESIZABLE | 354 (useMemoryTagging(Options.load()) ? MAP_MEMTAG : 0), 355 &Region->Data)) 356 return nullptr; 357 Region->MappedUser += UserMapSize; 358 C->getStats().add(StatMapped, UserMapSize); 359 } 360 361 const u32 NumberOfBlocks = Min( 362 MaxNumBatches * MaxCount, 363 static_cast<u32>((Region->MappedUser - Region->AllocatedUser) / Size)); 364 DCHECK_GT(NumberOfBlocks, 0); 365 366 constexpr u32 ShuffleArraySize = 367 MaxNumBatches * TransferBatch::MaxNumCached; 368 void *ShuffleArray[ShuffleArraySize]; 369 DCHECK_LE(NumberOfBlocks, ShuffleArraySize); 370 371 uptr P = RegionBeg + Region->AllocatedUser; 372 for (u32 I = 0; I < NumberOfBlocks; I++, P += Size) 373 ShuffleArray[I] = reinterpret_cast<void *>(P); 374 // No need to shuffle the batches size class. 375 if (ClassId != SizeClassMap::BatchClassId) 376 shuffle(ShuffleArray, NumberOfBlocks, &Region->RandState); 377 for (u32 I = 0; I < NumberOfBlocks;) { 378 TransferBatch *B = C->createBatch(ClassId, ShuffleArray[I]); 379 if (UNLIKELY(!B)) 380 return nullptr; 381 const u32 N = Min(MaxCount, NumberOfBlocks - I); 382 B->setFromArray(&ShuffleArray[I], N); 383 Region->FreeList.push_back(B); 384 I += N; 385 } 386 TransferBatch *B = Region->FreeList.front(); 387 Region->FreeList.pop_front(); 388 DCHECK(B); 389 DCHECK_GT(B->getCount(), 0); 390 391 const uptr AllocatedUser = Size * NumberOfBlocks; 392 C->getStats().add(StatFree, AllocatedUser); 393 Region->AllocatedUser += AllocatedUser; 394 395 return B; 396 } 397 getStats(ScopedString * Str,uptr ClassId,uptr Rss)398 void getStats(ScopedString *Str, uptr ClassId, uptr Rss) { 399 RegionInfo *Region = getRegionInfo(ClassId); 400 if (Region->MappedUser == 0) 401 return; 402 const uptr InUse = Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks; 403 const uptr TotalChunks = Region->AllocatedUser / getSizeByClassId(ClassId); 404 Str->append("%s %02zu (%6zu): mapped: %6zuK popped: %7zu pushed: %7zu " 405 "inuse: %6zu total: %6zu rss: %6zuK releases: %6zu last " 406 "released: %6zuK region: 0x%zx (0x%zx)\n", 407 Region->Exhausted ? "F" : " ", ClassId, 408 getSizeByClassId(ClassId), Region->MappedUser >> 10, 409 Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks, InUse, 410 TotalChunks, Rss >> 10, Region->ReleaseInfo.RangesReleased, 411 Region->ReleaseInfo.LastReleasedBytes >> 10, Region->RegionBeg, 412 getRegionBaseByClassId(ClassId)); 413 } 414 415 NOINLINE uptr releaseToOSMaybe(RegionInfo *Region, uptr ClassId, 416 bool Force = false) { 417 const uptr BlockSize = getSizeByClassId(ClassId); 418 const uptr PageSize = getPageSizeCached(); 419 420 CHECK_GE(Region->Stats.PoppedBlocks, Region->Stats.PushedBlocks); 421 const uptr BytesInFreeList = 422 Region->AllocatedUser - 423 (Region->Stats.PoppedBlocks - Region->Stats.PushedBlocks) * BlockSize; 424 if (BytesInFreeList < PageSize) 425 return 0; // No chance to release anything. 426 const uptr BytesPushed = (Region->Stats.PushedBlocks - 427 Region->ReleaseInfo.PushedBlocksAtLastRelease) * 428 BlockSize; 429 if (BytesPushed < PageSize) 430 return 0; // Nothing new to release. 431 432 // Releasing smaller blocks is expensive, so we want to make sure that a 433 // significant amount of bytes are free, and that there has been a good 434 // amount of batches pushed to the freelist before attempting to release. 435 if (BlockSize < PageSize / 16U) { 436 if (!Force && BytesPushed < Region->AllocatedUser / 16U) 437 return 0; 438 // We want 8x% to 9x% free bytes (the larger the bock, the lower the %). 439 if ((BytesInFreeList * 100U) / Region->AllocatedUser < 440 (100U - 1U - BlockSize / 16U)) 441 return 0; 442 } 443 444 if (!Force) { 445 const s32 IntervalMs = atomic_load_relaxed(&ReleaseToOsIntervalMs); 446 if (IntervalMs < 0) 447 return 0; 448 if (Region->ReleaseInfo.LastReleaseAtNs + 449 static_cast<u64>(IntervalMs) * 1000000 > 450 getMonotonicTime()) { 451 return 0; // Memory was returned recently. 452 } 453 } 454 455 auto SkipRegion = [](UNUSED uptr RegionIndex) { return false; }; 456 ReleaseRecorder Recorder(Region->RegionBeg, &Region->Data); 457 releaseFreeMemoryToOS(Region->FreeList, Region->RegionBeg, 458 Region->AllocatedUser, 1U, BlockSize, &Recorder, 459 SkipRegion); 460 461 if (Recorder.getReleasedRangesCount() > 0) { 462 Region->ReleaseInfo.PushedBlocksAtLastRelease = 463 Region->Stats.PushedBlocks; 464 Region->ReleaseInfo.RangesReleased += Recorder.getReleasedRangesCount(); 465 Region->ReleaseInfo.LastReleasedBytes = Recorder.getReleasedBytes(); 466 } 467 Region->ReleaseInfo.LastReleaseAtNs = getMonotonicTime(); 468 return Recorder.getReleasedBytes(); 469 } 470 }; 471 472 } // namespace scudo 473 474 #endif // SCUDO_PRIMARY64_H_ 475