1 //===- SanitizerBinaryMetadata.cpp - binary analysis sanitizers metadata --===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file is a part of SanitizerBinaryMetadata.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Transforms/Instrumentation/SanitizerBinaryMetadata.h"
14 #include "llvm/ADT/SetVector.h"
15 #include "llvm/ADT/SmallVector.h"
16 #include "llvm/ADT/Statistic.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/Twine.h"
19 #include "llvm/Analysis/CaptureTracking.h"
20 #include "llvm/Analysis/ValueTracking.h"
21 #include "llvm/IR/Constant.h"
22 #include "llvm/IR/DerivedTypes.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/GlobalValue.h"
25 #include "llvm/IR/GlobalVariable.h"
26 #include "llvm/IR/IRBuilder.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/LLVMContext.h"
30 #include "llvm/IR/MDBuilder.h"
31 #include "llvm/IR/Metadata.h"
32 #include "llvm/IR/Module.h"
33 #include "llvm/IR/Type.h"
34 #include "llvm/IR/Value.h"
35 #include "llvm/ProfileData/InstrProf.h"
36 #include "llvm/Support/Allocator.h"
37 #include "llvm/Support/CommandLine.h"
38 #include "llvm/Support/Debug.h"
39 #include "llvm/Support/SpecialCaseList.h"
40 #include "llvm/Support/StringSaver.h"
41 #include "llvm/Support/VirtualFileSystem.h"
42 #include "llvm/TargetParser/Triple.h"
43 #include "llvm/Transforms/Utils/ModuleUtils.h"
44 
45 #include <array>
46 #include <cstdint>
47 #include <memory>
48 
49 using namespace llvm;
50 
51 #define DEBUG_TYPE "sanmd"
52 
53 namespace {
54 
55 //===--- Constants --------------------------------------------------------===//
56 
57 constexpr uint32_t kVersionBase = 2;                // occupies lower 16 bits
58 constexpr uint32_t kVersionPtrSizeRel = (1u << 16); // offsets are pointer-sized
59 constexpr int kCtorDtorPriority = 2;
60 
61 // Pairs of names of initialization callback functions and which section
62 // contains the relevant metadata.
63 class MetadataInfo {
64 public:
65   const StringRef FunctionPrefix;
66   const StringRef SectionSuffix;
67 
68   static const MetadataInfo Covered;
69   static const MetadataInfo Atomics;
70 
71 private:
72   // Forbid construction elsewhere.
73   explicit constexpr MetadataInfo(StringRef FunctionPrefix,
74                                   StringRef SectionSuffix)
75       : FunctionPrefix(FunctionPrefix), SectionSuffix(SectionSuffix) {}
76 };
77 const MetadataInfo MetadataInfo::Covered{
78     "__sanitizer_metadata_covered", kSanitizerBinaryMetadataCoveredSection};
79 const MetadataInfo MetadataInfo::Atomics{
80     "__sanitizer_metadata_atomics", kSanitizerBinaryMetadataAtomicsSection};
81 
82 // The only instances of MetadataInfo are the constants above, so a set of
83 // them may simply store pointers to them. To deterministically generate code,
84 // we need to use a set with stable iteration order, such as SetVector.
85 using MetadataInfoSet = SetVector<const MetadataInfo *>;
86 
87 //===--- Command-line options ---------------------------------------------===//
88 
89 cl::opt<bool> ClWeakCallbacks(
90     "sanitizer-metadata-weak-callbacks",
91     cl::desc("Declare callbacks extern weak, and only call if non-null."),
92     cl::Hidden, cl::init(true));
93 cl::opt<bool>
94     ClNoSanitize("sanitizer-metadata-nosanitize-attr",
95                  cl::desc("Mark some metadata features uncovered in functions "
96                           "with associated no_sanitize attributes."),
97                  cl::Hidden, cl::init(true));
98 
99 cl::opt<bool> ClEmitCovered("sanitizer-metadata-covered",
100                             cl::desc("Emit PCs for covered functions."),
101                             cl::Hidden, cl::init(false));
102 cl::opt<bool> ClEmitAtomics("sanitizer-metadata-atomics",
103                             cl::desc("Emit PCs for atomic operations."),
104                             cl::Hidden, cl::init(false));
105 cl::opt<bool> ClEmitUAR("sanitizer-metadata-uar",
106                         cl::desc("Emit PCs for start of functions that are "
107                                  "subject for use-after-return checking"),
108                         cl::Hidden, cl::init(false));
109 
110 //===--- Statistics -------------------------------------------------------===//
111 
112 STATISTIC(NumMetadataCovered, "Metadata attached to covered functions");
113 STATISTIC(NumMetadataAtomics, "Metadata attached to atomics");
114 STATISTIC(NumMetadataUAR, "Metadata attached to UAR functions");
115 
116 //===----------------------------------------------------------------------===//
117 
118 // Apply opt overrides.
119 SanitizerBinaryMetadataOptions &&
120 transformOptionsFromCl(SanitizerBinaryMetadataOptions &&Opts) {
121   Opts.Covered |= ClEmitCovered;
122   Opts.Atomics |= ClEmitAtomics;
123   Opts.UAR |= ClEmitUAR;
124   return std::move(Opts);
125 }
126 
127 class SanitizerBinaryMetadata {
128 public:
129   SanitizerBinaryMetadata(Module &M, SanitizerBinaryMetadataOptions Opts,
130                           std::unique_ptr<SpecialCaseList> Ignorelist)
131       : Mod(M), Options(transformOptionsFromCl(std::move(Opts))),
132         Ignorelist(std::move(Ignorelist)), TargetTriple(M.getTargetTriple()),
133         IRB(M.getContext()) {
134     // FIXME: Make it work with other formats.
135     assert(TargetTriple.isOSBinFormatELF() && "ELF only");
136     assert(!(TargetTriple.isNVPTX() || TargetTriple.isAMDGPU()) &&
137            "Device targets are not supported");
138   }
139 
140   bool run();
141 
142 private:
143   uint32_t getVersion() const {
144     uint32_t Version = kVersionBase;
145     const auto CM = Mod.getCodeModel();
146     if (CM.has_value() && (*CM == CodeModel::Medium || *CM == CodeModel::Large))
147       Version |= kVersionPtrSizeRel;
148     return Version;
149   }
150 
151   void runOn(Function &F, MetadataInfoSet &MIS);
152 
153   // Determines which set of metadata to collect for this instruction.
154   //
155   // Returns true if covered metadata is required to unambiguously interpret
156   // other metadata. For example, if we are interested in atomics metadata, any
157   // function with memory operations (atomic or not) requires covered metadata
158   // to determine if a memory operation is atomic or not in modules compiled
159   // with SanitizerBinaryMetadata.
160   bool runOn(Instruction &I, MetadataInfoSet &MIS, MDBuilder &MDB,
161              uint64_t &FeatureMask);
162 
163   // Get start/end section marker pointer.
164   GlobalVariable *getSectionMarker(const Twine &MarkerName, Type *Ty);
165 
166   // Returns the target-dependent section name.
167   StringRef getSectionName(StringRef SectionSuffix);
168 
169   // Returns the section start marker name.
170   Twine getSectionStart(StringRef SectionSuffix);
171 
172   // Returns the section end marker name.
173   Twine getSectionEnd(StringRef SectionSuffix);
174 
175   // Returns true if the access to the address should be considered "atomic".
176   bool pretendAtomicAccess(const Value *Addr);
177 
178   Module &Mod;
179   const SanitizerBinaryMetadataOptions Options;
180   std::unique_ptr<SpecialCaseList> Ignorelist;
181   const Triple TargetTriple;
182   IRBuilder<> IRB;
183   BumpPtrAllocator Alloc;
184   UniqueStringSaver StringPool{Alloc};
185 };
186 
187 bool SanitizerBinaryMetadata::run() {
188   MetadataInfoSet MIS;
189 
190   for (Function &F : Mod)
191     runOn(F, MIS);
192 
193   if (MIS.empty())
194     return false;
195 
196   //
197   // Setup constructors and call all initialization functions for requested
198   // metadata features.
199   //
200 
201   auto *Int8PtrTy = IRB.getInt8PtrTy();
202   auto *Int8PtrPtrTy = PointerType::getUnqual(Int8PtrTy);
203   auto *Int32Ty = IRB.getInt32Ty();
204   const std::array<Type *, 3> InitTypes = {Int32Ty, Int8PtrPtrTy, Int8PtrPtrTy};
205   auto *Version = ConstantInt::get(Int32Ty, getVersion());
206 
207   for (const MetadataInfo *MI : MIS) {
208     const std::array<Value *, InitTypes.size()> InitArgs = {
209         Version,
210         getSectionMarker(getSectionStart(MI->SectionSuffix), Int8PtrTy),
211         getSectionMarker(getSectionEnd(MI->SectionSuffix), Int8PtrTy),
212     };
213     // We declare the _add and _del functions as weak, and only call them if
214     // there is a valid symbol linked. This allows building binaries with
215     // semantic metadata, but without having callbacks. When a tool that wants
216     // the metadata is linked which provides the callbacks, they will be called.
217     Function *Ctor =
218         createSanitizerCtorAndInitFunctions(
219             Mod, (MI->FunctionPrefix + ".module_ctor").str(),
220             (MI->FunctionPrefix + "_add").str(), InitTypes, InitArgs,
221             /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
222             .first;
223     Function *Dtor =
224         createSanitizerCtorAndInitFunctions(
225             Mod, (MI->FunctionPrefix + ".module_dtor").str(),
226             (MI->FunctionPrefix + "_del").str(), InitTypes, InitArgs,
227             /*VersionCheckName=*/StringRef(), /*Weak=*/ClWeakCallbacks)
228             .first;
229     Constant *CtorComdatKey = nullptr;
230     Constant *DtorComdatKey = nullptr;
231     if (TargetTriple.supportsCOMDAT()) {
232       // Use COMDAT to deduplicate constructor/destructor function. The COMDAT
233       // key needs to be a non-local linkage.
234       Ctor->setComdat(Mod.getOrInsertComdat(Ctor->getName()));
235       Dtor->setComdat(Mod.getOrInsertComdat(Dtor->getName()));
236       Ctor->setLinkage(GlobalValue::ExternalLinkage);
237       Dtor->setLinkage(GlobalValue::ExternalLinkage);
238       // DSOs should _not_ call another constructor/destructor!
239       Ctor->setVisibility(GlobalValue::HiddenVisibility);
240       Dtor->setVisibility(GlobalValue::HiddenVisibility);
241       CtorComdatKey = Ctor;
242       DtorComdatKey = Dtor;
243     }
244     appendToGlobalCtors(Mod, Ctor, kCtorDtorPriority, CtorComdatKey);
245     appendToGlobalDtors(Mod, Dtor, kCtorDtorPriority, DtorComdatKey);
246   }
247 
248   return true;
249 }
250 
251 void SanitizerBinaryMetadata::runOn(Function &F, MetadataInfoSet &MIS) {
252   if (F.empty())
253     return;
254   if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
255     return;
256   if (Ignorelist && Ignorelist->inSection("metadata", "fun", F.getName()))
257     return;
258   // Don't touch available_externally functions, their actual body is elsewhere.
259   if (F.getLinkage() == GlobalValue::AvailableExternallyLinkage)
260     return;
261 
262   MDBuilder MDB(F.getContext());
263 
264   // The metadata features enabled for this function, stored along covered
265   // metadata (if enabled).
266   uint64_t FeatureMask = 0;
267   // Don't emit unnecessary covered metadata for all functions to save space.
268   bool RequiresCovered = false;
269 
270   if (Options.Atomics || Options.UAR) {
271     for (BasicBlock &BB : F)
272       for (Instruction &I : BB)
273         RequiresCovered |= runOn(I, MIS, MDB, FeatureMask);
274   }
275 
276   if (ClNoSanitize && F.hasFnAttribute("no_sanitize_thread"))
277     FeatureMask &= ~kSanitizerBinaryMetadataAtomics;
278   if (F.isVarArg())
279     FeatureMask &= ~kSanitizerBinaryMetadataUAR;
280   if (FeatureMask & kSanitizerBinaryMetadataUAR) {
281     RequiresCovered = true;
282     NumMetadataUAR++;
283   }
284 
285   // Covered metadata is always emitted if explicitly requested, otherwise only
286   // if some other metadata requires it to unambiguously interpret it for
287   // modules compiled with SanitizerBinaryMetadata.
288   if (Options.Covered || (FeatureMask && RequiresCovered)) {
289     NumMetadataCovered++;
290     const auto *MI = &MetadataInfo::Covered;
291     MIS.insert(MI);
292     const StringRef Section = getSectionName(MI->SectionSuffix);
293     // The feature mask will be placed after the function size.
294     Constant *CFM = IRB.getInt64(FeatureMask);
295     F.setMetadata(LLVMContext::MD_pcsections,
296                   MDB.createPCSections({{Section, {CFM}}}));
297   }
298 }
299 
300 bool isUARSafeCall(CallInst *CI) {
301   auto *F = CI->getCalledFunction();
302   // There are no intrinsic functions that leak arguments.
303   // If the called function does not return, the current function
304   // does not return as well, so no possibility of use-after-return.
305   // Sanitizer function also don't leak or don't return.
306   // It's safe to both pass pointers to local variables to them
307   // and to tail-call them.
308   return F && (F->isIntrinsic() || F->doesNotReturn() ||
309                F->getName().startswith("__asan_") ||
310                F->getName().startswith("__hwsan_") ||
311                F->getName().startswith("__ubsan_") ||
312                F->getName().startswith("__msan_") ||
313                F->getName().startswith("__tsan_"));
314 }
315 
316 bool hasUseAfterReturnUnsafeUses(Value &V) {
317   for (User *U : V.users()) {
318     if (auto *I = dyn_cast<Instruction>(U)) {
319       if (I->isLifetimeStartOrEnd() || I->isDroppable())
320         continue;
321       if (auto *CI = dyn_cast<CallInst>(U)) {
322         if (isUARSafeCall(CI))
323           continue;
324       }
325       if (isa<LoadInst>(U))
326         continue;
327       if (auto *SI = dyn_cast<StoreInst>(U)) {
328         // If storing TO the alloca, then the address isn't taken.
329         if (SI->getOperand(1) == &V)
330           continue;
331       }
332       if (auto *GEPI = dyn_cast<GetElementPtrInst>(U)) {
333         if (!hasUseAfterReturnUnsafeUses(*GEPI))
334           continue;
335       } else if (auto *BCI = dyn_cast<BitCastInst>(U)) {
336         if (!hasUseAfterReturnUnsafeUses(*BCI))
337           continue;
338       }
339     }
340     return true;
341   }
342   return false;
343 }
344 
345 bool useAfterReturnUnsafe(Instruction &I) {
346   if (isa<AllocaInst>(I))
347     return hasUseAfterReturnUnsafeUses(I);
348   // Tail-called functions are not necessary intercepted
349   // at runtime because there is no call instruction.
350   // So conservatively mark the caller as requiring checking.
351   else if (auto *CI = dyn_cast<CallInst>(&I))
352     return CI->isTailCall() && !isUARSafeCall(CI);
353   return false;
354 }
355 
356 bool SanitizerBinaryMetadata::pretendAtomicAccess(const Value *Addr) {
357   if (!Addr)
358     return false;
359 
360   Addr = Addr->stripInBoundsOffsets();
361   auto *GV = dyn_cast<GlobalVariable>(Addr);
362   if (!GV)
363     return false;
364 
365   // Some compiler-generated accesses are known racy, to avoid false positives
366   // in data-race analysis pretend they're atomic.
367   if (GV->hasSection()) {
368     const auto OF = Triple(Mod.getTargetTriple()).getObjectFormat();
369     const auto ProfSec =
370         getInstrProfSectionName(IPSK_cnts, OF, /*AddSegmentInfo=*/false);
371     if (GV->getSection().endswith(ProfSec))
372       return true;
373   }
374   if (GV->getName().startswith("__llvm_gcov") ||
375       GV->getName().startswith("__llvm_gcda"))
376     return true;
377 
378   return false;
379 }
380 
381 // Returns true if the memory at `Addr` may be shared with other threads.
382 bool maybeSharedMutable(const Value *Addr) {
383   // By default assume memory may be shared.
384   if (!Addr)
385     return true;
386 
387   if (isa<AllocaInst>(getUnderlyingObject(Addr)) &&
388       !PointerMayBeCaptured(Addr, true, true))
389     return false; // Object is on stack but does not escape.
390 
391   Addr = Addr->stripInBoundsOffsets();
392   if (auto *GV = dyn_cast<GlobalVariable>(Addr)) {
393     if (GV->isConstant())
394       return false; // Shared, but not mutable.
395   }
396 
397   return true;
398 }
399 
400 bool SanitizerBinaryMetadata::runOn(Instruction &I, MetadataInfoSet &MIS,
401                                     MDBuilder &MDB, uint64_t &FeatureMask) {
402   SmallVector<const MetadataInfo *, 1> InstMetadata;
403   bool RequiresCovered = false;
404 
405   // Only call if at least 1 type of metadata is requested.
406   assert(Options.UAR || Options.Atomics);
407 
408   if (Options.UAR && !(FeatureMask & kSanitizerBinaryMetadataUAR)) {
409     if (useAfterReturnUnsafe(I))
410       FeatureMask |= kSanitizerBinaryMetadataUAR;
411   }
412 
413   if (Options.Atomics) {
414     const Value *Addr = nullptr;
415     if (auto *SI = dyn_cast<StoreInst>(&I))
416       Addr = SI->getPointerOperand();
417     else if (auto *LI = dyn_cast<LoadInst>(&I))
418       Addr = LI->getPointerOperand();
419 
420     if (I.mayReadOrWriteMemory() && maybeSharedMutable(Addr)) {
421       auto SSID = getAtomicSyncScopeID(&I);
422       if ((SSID.has_value() && *SSID != SyncScope::SingleThread) ||
423           pretendAtomicAccess(Addr)) {
424         NumMetadataAtomics++;
425         InstMetadata.push_back(&MetadataInfo::Atomics);
426       }
427       FeatureMask |= kSanitizerBinaryMetadataAtomics;
428       RequiresCovered = true;
429     }
430   }
431 
432   // Attach MD_pcsections to instruction.
433   if (!InstMetadata.empty()) {
434     MIS.insert(InstMetadata.begin(), InstMetadata.end());
435     SmallVector<MDBuilder::PCSection, 1> Sections;
436     for (const auto &MI : InstMetadata)
437       Sections.push_back({getSectionName(MI->SectionSuffix), {}});
438     I.setMetadata(LLVMContext::MD_pcsections, MDB.createPCSections(Sections));
439   }
440 
441   return RequiresCovered;
442 }
443 
444 GlobalVariable *
445 SanitizerBinaryMetadata::getSectionMarker(const Twine &MarkerName, Type *Ty) {
446   // Use ExternalWeak so that if all sections are discarded due to section
447   // garbage collection, the linker will not report undefined symbol errors.
448   auto *Marker = new GlobalVariable(Mod, Ty, /*isConstant=*/false,
449                                     GlobalVariable::ExternalWeakLinkage,
450                                     /*Initializer=*/nullptr, MarkerName);
451   Marker->setVisibility(GlobalValue::HiddenVisibility);
452   return Marker;
453 }
454 
455 StringRef SanitizerBinaryMetadata::getSectionName(StringRef SectionSuffix) {
456   // FIXME: Other TargetTriples.
457   // Request ULEB128 encoding for all integer constants.
458   return StringPool.save(SectionSuffix + "!C");
459 }
460 
461 Twine SanitizerBinaryMetadata::getSectionStart(StringRef SectionSuffix) {
462   return "__start_" + SectionSuffix;
463 }
464 
465 Twine SanitizerBinaryMetadata::getSectionEnd(StringRef SectionSuffix) {
466   return "__stop_" + SectionSuffix;
467 }
468 
469 } // namespace
470 
471 SanitizerBinaryMetadataPass::SanitizerBinaryMetadataPass(
472     SanitizerBinaryMetadataOptions Opts, ArrayRef<std::string> IgnorelistFiles)
473     : Options(std::move(Opts)), IgnorelistFiles(std::move(IgnorelistFiles)) {}
474 
475 PreservedAnalyses
476 SanitizerBinaryMetadataPass::run(Module &M, AnalysisManager<Module> &AM) {
477   std::unique_ptr<SpecialCaseList> Ignorelist;
478   if (!IgnorelistFiles.empty()) {
479     Ignorelist = SpecialCaseList::createOrDie(IgnorelistFiles,
480                                               *vfs::getRealFileSystem());
481     if (Ignorelist->inSection("metadata", "src", M.getSourceFileName()))
482       return PreservedAnalyses::all();
483   }
484 
485   SanitizerBinaryMetadata Pass(M, Options, std::move(Ignorelist));
486   if (Pass.run())
487     return PreservedAnalyses::none();
488   return PreservedAnalyses::all();
489 }
490