1 //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of SanitizerBinaryMetadata.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/Analysis/CaptureTracking.h"
20 #include "llvm/Analysis/ValueTracking.h"
21 #include "llvm/IR/Constant.h"
22 #include "llvm/IR/DerivedTypes.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/GlobalValue.h"
25 #include "llvm/IR/GlobalVariable.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/LLVMContext.h"
30 #include "llvm/IR/MDBuilder.h"
31 #include "llvm/IR/Metadata.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/ProfileData/InstrProf.h"
36 #include "llvm/Support/Allocator.h"
37 #include "llvm/Support/CommandLine.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/SpecialCaseList.h"
40 #include "llvm/Support/StringSaver.h"
41 #include "llvm/Support/VirtualFileSystem.h"
42 #include "llvm/TargetParser/Triple.h"
43 #include "llvm/Transforms/Utils/ModuleUtils.h"
44
45 #include <array>
46 #include <cstdint>
47 #include <memory>
48
49 using namespace llvm;
50
51 #define DEBUG_TYPE "sanmd"
52
53 namespace {
54
55 //===--- Constants --------------------------------------------------------===//
56
57 constexpr uint32_t kVersionBase = 2; // occupies lower 16 bits
58 constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized
59 constexpr int kCtorDtorPriority = 2;
60
61 // Pairs of names of initialization callback functions and which section
62 // contains the relevant metadata.
63 class MetadataInfo {
64 public:
65 const StringRef FunctionPrefix;
66 const StringRef SectionSuffix;
67
68 static const MetadataInfo Covered;
69 static const MetadataInfo Atomics;
70
71 private:
72 // Forbid construction elsewhere.
MetadataInfo(StringRef FunctionPrefix,StringRef SectionSuffix)73 explicit constexpr MetadataInfo(StringRef FunctionPrefix,
74 StringRef SectionSuffix)
75 : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {}
76 };
77 const MetadataInfo MetadataInfo::Covered{
78 "__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection};
79 const MetadataInfo MetadataInfo::Atomics{
80 "__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection};
81
82 // The only instances of MetadataInfo are the constants above, so a set of
83 // them may simply store pointers to them. To deterministically generate code,
84 // we need to use a set with stable iteration order, such as SetVector.
85 using MetadataInfoSet = SetVector<const MetadataInfo *>;
86
87 //===--- Command-line options ---------------------------------------------===//
88
89 cl::opt<bool> ClWeakCallbacks(
90 "sanitizer-metadata-weak-callbacks",
91 cl::desc("Declare callbacks extern weak, and only call if non-null."),
92 cl::Hidden, cl::init(true));
93 cl::opt<bool>
94 ClNoSanitize("sanitizer-metadata-nosanitize-attr",
95 cl::desc("Mark some metadata features uncovered in functions "
96 "with associated no_sanitize attributes."),
97 cl::Hidden, cl::init(true));
98
99 cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered",
100 cl::desc("Emit PCs for covered functions."),
101 cl::Hidden, cl::init(false));
102 cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics",
103 cl::desc("Emit PCs for atomic operations."),
104 cl::Hidden, cl::init(false));
105 cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar",
106 cl::desc("Emit PCs for start of functions that are "
107 "subject for use-after-return checking"),
108 cl::Hidden, cl::init(false));
109
110 //===--- Statistics -------------------------------------------------------===//
111
112 STATISTIC(NumMetadataCovered, "Metadata attached to covered functions");
113 STATISTIC(NumMetadataAtomics, "Metadata attached to atomics");
114 STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions");
115
116 //===----------------------------------------------------------------------===//
117
118 // Apply opt overrides.
119 SanitizerBinaryMetadataOptions &&
transformOptionsFromCl(SanitizerBinaryMetadataOptions && Opts)120 transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) {
121 Opts.Covered |= ClEmitCovered;
122 Opts.Atomics |= ClEmitAtomics;
123 Opts.UAR |= ClEmitUAR;
124 return std::move(Opts);
125 }
126
127 class SanitizerBinaryMetadata {
128 public:
SanitizerBinaryMetadata(Module & M,SanitizerBinaryMetadataOptions Opts,std::unique_ptr<SpecialCaseList> Ignorelist)129 SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts,
130 std::unique_ptr<SpecialCaseList> Ignorelist)
131 : Mod(M), Options(transformOptionsFromCl(std::move(Opts))),
132 Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()),
133 IRB(M.getContext()) {
134 // FIXME: Make it work with other formats.
135 assert(TargetTriple.isOSBinFormatELF() && "ELF only");
136 assert(!(TargetTriple.isNVPTX() || TargetTriple.isAMDGPU()) &&
137 "Device targets are not supported");
138 }
139
140 bool run();
141
142 private:
getVersion() const143 uint32_t getVersion() const {
144 uint32_t Version = kVersionBase;
145 const auto CM = Mod.getCodeModel();
146 if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large))
147 Version |= kVersionPtrSizeRel;
148 return Version;
149 }
150
151 void runOn(Function &F, MetadataInfoSet &MIS);
152
153 // Determines which set of metadata to collect for this instruction.
154 //
155 // Returns true if covered metadata is required to unambiguously interpret
156 // other metadata. For example, if we are interested in atomics metadata, any
157 // function with memory operations (atomic or not) requires covered metadata
158 // to determine if a memory operation is atomic or not in modules compiled
159 // with SanitizerBinaryMetadata.
160 bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
161 uint64_t &FeatureMask);
162
163 // Get start/end section marker pointer.
164 GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
165
166 // Returns the target-dependent section name.
167 StringRef getSectionName(StringRef SectionSuffix);
168
169 // Returns the section start marker name.
170 Twine getSectionStart(StringRef SectionSuffix);
171
172 // Returns the section end marker name.
173 Twine getSectionEnd(StringRef SectionSuffix);
174
175 // Returns true if the access to the address should be considered "atomic".
176 bool pretendAtomicAccess(const Value *Addr);
177
178 Module &Mod;
179 const SanitizerBinaryMetadataOptions Options;
180 std::unique_ptr<SpecialCaseList> Ignorelist;
181 const Triple TargetTriple;
182 IRBuilder<> IRB;
183 BumpPtrAllocator Alloc;
184 UniqueStringSaver StringPool{Alloc};
185 };
186
run()187 bool SanitizerBinaryMetadata::run() {
188 MetadataInfoSet MIS;
189
190 for (Function &F : Mod)
191 runOn(F, MIS);
192
193 if (MIS.empty())
194 return false;
195
196 //
197 // Setup constructors and call all initialization functions for requested
198 // metadata features.
199 //
200
201 auto *PtrTy = IRB.getPtrTy();
202 auto *Int32Ty = IRB.getInt32Ty();
203 const std::array<Type *, 3> InitTypes = {Int32Ty, PtrTy, PtrTy};
204 auto *Version = ConstantInt::get(Int32Ty, getVersion());
205
206 for (const MetadataInfo *MI : MIS) {
207 const std::array<Value *, InitTypes.size()> InitArgs = {
208 Version,
209 getSectionMarker(getSectionStart(MI->SectionSuffix), PtrTy),
210 getSectionMarker(getSectionEnd(MI->SectionSuffix), PtrTy),
211 };
212 // We declare the _add and _del functions as weak, and only call them if
213 // there is a valid symbol linked. This allows building binaries with
214 // semantic metadata, but without having callbacks. When a tool that wants
215 // the metadata is linked which provides the callbacks, they will be called.
216 Function *Ctor =
217 createSanitizerCtorAndInitFunctions(
218 Mod, (MI->FunctionPrefix + ".module_ctor").str(),
219 (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs,
220 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
221 .first;
222 Function *Dtor =
223 createSanitizerCtorAndInitFunctions(
224 Mod, (MI->FunctionPrefix + ".module_dtor").str(),
225 (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs,
226 /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
227 .first;
228 Constant *CtorComdatKey = nullptr;
229 Constant *DtorComdatKey = nullptr;
230 if (TargetTriple.supportsCOMDAT()) {
231 // Use COMDAT to deduplicate constructor/destructor function. The COMDAT
232 // key needs to be a non-local linkage.
233 Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName()));
234 Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName()));
235 Ctor->setLinkage(GlobalValue::ExternalLinkage);
236 Dtor->setLinkage(GlobalValue::ExternalLinkage);
237 // DSOs should _not_ call another constructor/destructor!
238 Ctor->setVisibility(GlobalValue::HiddenVisibility);
239 Dtor->setVisibility(GlobalValue::HiddenVisibility);
240 CtorComdatKey = Ctor;
241 DtorComdatKey = Dtor;
242 }
243 appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorComdatKey);
244 appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorComdatKey);
245 }
246
247 return true;
248 }
249
runOn(Function & F,MetadataInfoSet & MIS)250 void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
251 if (F.empty())
252 return;
253 if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
254 return;
255 if (Ignorelist && Ignorelist->inSection("metadata", "fun", F.getName()))
256 return;
257 // Don't touch available_externally functions, their actual body is elsewhere.
258 if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
259 return;
260
261 MDBuilder MDB(F.getContext());
262
263 // The metadata features enabled for this function, stored along covered
264 // metadata (if enabled).
265 uint64_t FeatureMask = 0;
266 // Don't emit unnecessary covered metadata for all functions to save space.
267 bool RequiresCovered = false;
268
269 if (Options.Atomics || Options.UAR) {
270 for (BasicBlock &BB : F)
271 for (Instruction &I : BB)
272 RequiresCovered |= runOn(I, MIS, MDB, FeatureMask);
273 }
274
275 if (ClNoSanitize && F.hasFnAttribute("no_sanitize_thread"))
276 FeatureMask &= ~kSanitizerBinaryMetadataAtomics;
277 if (F.isVarArg())
278 FeatureMask &= ~kSanitizerBinaryMetadataUAR;
279 if (FeatureMask & kSanitizerBinaryMetadataUAR) {
280 RequiresCovered = true;
281 NumMetadataUAR++;
282 }
283
284 // Covered metadata is always emitted if explicitly requested, otherwise only
285 // if some other metadata requires it to unambiguously interpret it for
286 // modules compiled with SanitizerBinaryMetadata.
287 if (Options.Covered || (FeatureMask && RequiresCovered)) {
288 NumMetadataCovered++;
289 const auto *MI = &MetadataInfo::Covered;
290 MIS.insert(MI);
291 const StringRef Section = getSectionName(MI->SectionSuffix);
292 // The feature mask will be placed after the function size.
293 Constant *CFM = IRB.getInt64(FeatureMask);
294 F.setMetadata(LLVMContext::MD_pcsections,
295 MDB.createPCSections({{Section, {CFM}}}));
296 }
297 }
298
isUARSafeCall(CallInst * CI)299 bool isUARSafeCall(CallInst *CI) {
300 auto *F = CI->getCalledFunction();
301 // There are no intrinsic functions that leak arguments.
302 // If the called function does not return, the current function
303 // does not return as well, so no possibility of use-after-return.
304 // Sanitizer function also don't leak or don't return.
305 // It's safe to both pass pointers to local variables to them
306 // and to tail-call them.
307 return F && (F->isIntrinsic() || F->doesNotReturn() ||
308 F->getName().starts_with("__asan_") ||
309 F->getName().starts_with("__hwsan_") ||
310 F->getName().starts_with("__ubsan_") ||
311 F->getName().starts_with("__msan_") ||
312 F->getName().starts_with("__tsan_"));
313 }
314
hasUseAfterReturnUnsafeUses(Value & V)315 bool hasUseAfterReturnUnsafeUses(Value &V) {
316 for (User *U : V.users()) {
317 if (auto *I = dyn_cast<Instruction>(U)) {
318 if (I->isLifetimeStartOrEnd() || I->isDroppable())
319 continue;
320 if (auto *CI = dyn_cast<CallInst>(U)) {
321 if (isUARSafeCall(CI))
322 continue;
323 }
324 if (isa<LoadInst>(U))
325 continue;
326 if (auto *SI = dyn_cast<StoreInst>(U)) {
327 // If storing TO the alloca, then the address isn't taken.
328 if (SI->getOperand(1) == &V)
329 continue;
330 }
331 if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
332 if (!hasUseAfterReturnUnsafeUses(*GEPI))
333 continue;
334 } else if (auto *BCI = dyn_cast<BitCastInst>(U)) {
335 if (!hasUseAfterReturnUnsafeUses(*BCI))
336 continue;
337 }
338 }
339 return true;
340 }
341 return false;
342 }
343
useAfterReturnUnsafe(Instruction & I)344 bool useAfterReturnUnsafe(Instruction &I) {
345 if (isa<AllocaInst>(I))
346 return hasUseAfterReturnUnsafeUses(I);
347 // Tail-called functions are not necessary intercepted
348 // at runtime because there is no call instruction.
349 // So conservatively mark the caller as requiring checking.
350 else if (auto *CI = dyn_cast<CallInst>(&I))
351 return CI->isTailCall() && !isUARSafeCall(CI);
352 return false;
353 }
354
pretendAtomicAccess(const Value * Addr)355 bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) {
356 if (!Addr)
357 return false;
358
359 Addr = Addr->stripInBoundsOffsets();
360 auto *GV = dyn_cast<GlobalVariable>(Addr);
361 if (!GV)
362 return false;
363
364 // Some compiler-generated accesses are known racy, to avoid false positives
365 // in data-race analysis pretend they're atomic.
366 if (GV->hasSection()) {
367 const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat();
368 const auto ProfSec =
369 getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false);
370 if (GV->getSection().ends_with(ProfSec))
371 return true;
372 }
373 if (GV->getName().starts_with("__llvm_gcov") ||
374 GV->getName().starts_with("__llvm_gcda"))
375 return true;
376
377 return false;
378 }
379
380 // Returns true if the memory at `Addr` may be shared with other threads.
maybeSharedMutable(const Value * Addr)381 bool maybeSharedMutable(const Value *Addr) {
382 // By default assume memory may be shared.
383 if (!Addr)
384 return true;
385
386 if (isa<AllocaInst>(getUnderlyingObject(Addr)) &&
387 !PointerMayBeCaptured(Addr, true, true))
388 return false; // Object is on stack but does not escape.
389
390 Addr = Addr->stripInBoundsOffsets();
391 if (auto *GV = dyn_cast<GlobalVariable>(Addr)) {
392 if (GV->isConstant())
393 return false; // Shared, but not mutable.
394 }
395
396 return true;
397 }
398
runOn(Instruction & I,MetadataInfoSet & MIS,MDBuilder & MDB,uint64_t & FeatureMask)399 bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
400 MDBuilder &MDB, uint64_t &FeatureMask) {
401 SmallVector<const MetadataInfo *, 1> InstMetadata;
402 bool RequiresCovered = false;
403
404 // Only call if at least 1 type of metadata is requested.
405 assert(Options.UAR || Options.Atomics);
406
407 if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) {
408 if (useAfterReturnUnsafe(I))
409 FeatureMask |= kSanitizerBinaryMetadataUAR;
410 }
411
412 if (Options.Atomics) {
413 const Value *Addr = nullptr;
414 if (auto *SI = dyn_cast<StoreInst>(&I))
415 Addr = SI->getPointerOperand();
416 else if (auto *LI = dyn_cast<LoadInst>(&I))
417 Addr = LI->getPointerOperand();
418
419 if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) {
420 auto SSID = getAtomicSyncScopeID(&I);
421 if ((SSID.has_value() && *SSID != SyncScope::SingleThread) ||
422 pretendAtomicAccess(Addr)) {
423 NumMetadataAtomics++;
424 InstMetadata.push_back(&MetadataInfo::Atomics);
425 }
426 FeatureMask |= kSanitizerBinaryMetadataAtomics;
427 RequiresCovered = true;
428 }
429 }
430
431 // Attach MD_pcsections to instruction.
432 if (!InstMetadata.empty()) {
433 MIS.insert(InstMetadata.begin(), InstMetadata.end());
434 SmallVector<MDBuilder::PCSection, 1> Sections;
435 for (const auto &MI : InstMetadata)
436 Sections.push_back({getSectionName(MI->SectionSuffix), {}});
437 I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections));
438 }
439
440 return RequiresCovered;
441 }
442
443 GlobalVariable *
getSectionMarker(const Twine & MarkerName,Type * Ty)444 SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {
445 // Use ExternalWeak so that if all sections are discarded due to section
446 // garbage collection, the linker will not report undefined symbol errors.
447 auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false,
448 GlobalVariable::ExternalWeakLinkage,
449 /*Initializer=*/nullptr, MarkerName);
450 Marker->setVisibility(GlobalValue::HiddenVisibility);
451 return Marker;
452 }
453
getSectionName(StringRef SectionSuffix)454 StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
455 // FIXME: Other TargetTriples.
456 // Request ULEB128 encoding for all integer constants.
457 return StringPool.save(SectionSuffix + "!C");
458 }
459
getSectionStart(StringRef SectionSuffix)460 Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {
461 return "__start_" + SectionSuffix;
462 }
463
getSectionEnd(StringRef SectionSuffix)464 Twine SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
465 return "__stop_" + SectionSuffix;
466 }
467
468 } // namespace
469
SanitizerBinaryMetadataPass(SanitizerBinaryMetadataOptions Opts,ArrayRef<std::string> IgnorelistFiles)470 SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
471 SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles)
472 : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {}
473
474 PreservedAnalyses
run(Module & M,AnalysisManager<Module> & AM)475 SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {
476 std::unique_ptr<SpecialCaseList> Ignorelist;
477 if (!IgnorelistFiles.empty()) {
478 Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles,
479 *vfs::getRealFileSystem());
480 if (Ignorelist->inSection("metadata", "src", M.getSourceFileName()))
481 return PreservedAnalyses::all();
482 }
483
484 SanitizerBinaryMetadata Pass(M, Options, std::move(Ignorelist));
485 if (Pass.run())
486 return PreservedAnalyses::none();
487 return PreservedAnalyses::all();
488 }
489