1 //===- DataFlowSanitizer.cpp - dynamic data flow analysis -----------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of DataFlowSanitizer, a generalised dynamic data flow
11 /// analysis.
12 ///
13 /// Unlike other Sanitizer tools, this tool is not designed to detect a specific
14 /// class of bugs on its own.  Instead, it provides a generic dynamic data flow
15 /// analysis framework to be used by clients to help detect application-specific
16 /// issues within their own code.
17 ///
18 /// The analysis is based on automatic propagation of data flow labels (also
19 /// known as taint labels) through a program as it performs computation.
20 ///
21 /// Argument and return value labels are passed through TLS variables
22 /// __dfsan_arg_tls and __dfsan_retval_tls.
23 ///
24 /// Each byte of application memory is backed by a shadow memory byte. The
25 /// shadow byte can represent up to 8 labels. On Linux/x86_64, memory is then
26 /// laid out as follows:
27 ///
28 /// +--------------------+ 0x800000000000 (top of memory)
29 /// |    application 3   |
30 /// +--------------------+ 0x700000000000
31 /// |      invalid       |
32 /// +--------------------+ 0x610000000000
33 /// |      origin 1      |
34 /// +--------------------+ 0x600000000000
35 /// |    application 2   |
36 /// +--------------------+ 0x510000000000
37 /// |      shadow 1      |
38 /// +--------------------+ 0x500000000000
39 /// |      invalid       |
40 /// +--------------------+ 0x400000000000
41 /// |      origin 3      |
42 /// +--------------------+ 0x300000000000
43 /// |      shadow 3      |
44 /// +--------------------+ 0x200000000000
45 /// |      origin 2      |
46 /// +--------------------+ 0x110000000000
47 /// |      invalid       |
48 /// +--------------------+ 0x100000000000
49 /// |      shadow 2      |
50 /// +--------------------+ 0x010000000000
51 /// |    application 1   |
52 /// +--------------------+ 0x000000000000
53 ///
54 /// MEM_TO_SHADOW(mem) = mem ^ 0x500000000000
55 /// SHADOW_TO_ORIGIN(shadow) = shadow + 0x100000000000
56 ///
57 /// For more information, please refer to the design document:
58 /// http://clang.llvm.org/docs/DataFlowSanitizerDesign.html
59 //
60 //===----------------------------------------------------------------------===//
61 
62 #include "llvm/Transforms/Instrumentation/DataFlowSanitizer.h"
63 #include "llvm/ADT/DenseMap.h"
64 #include "llvm/ADT/DenseSet.h"
65 #include "llvm/ADT/DepthFirstIterator.h"
66 #include "llvm/ADT/None.h"
67 #include "llvm/ADT/SmallPtrSet.h"
68 #include "llvm/ADT/SmallVector.h"
69 #include "llvm/ADT/StringRef.h"
70 #include "llvm/ADT/StringSet.h"
71 #include "llvm/ADT/Triple.h"
72 #include "llvm/ADT/iterator.h"
73 #include "llvm/Analysis/ValueTracking.h"
74 #include "llvm/IR/Argument.h"
75 #include "llvm/IR/Attributes.h"
76 #include "llvm/IR/BasicBlock.h"
77 #include "llvm/IR/Constant.h"
78 #include "llvm/IR/Constants.h"
79 #include "llvm/IR/DataLayout.h"
80 #include "llvm/IR/DerivedTypes.h"
81 #include "llvm/IR/Dominators.h"
82 #include "llvm/IR/Function.h"
83 #include "llvm/IR/GlobalAlias.h"
84 #include "llvm/IR/GlobalValue.h"
85 #include "llvm/IR/GlobalVariable.h"
86 #include "llvm/IR/IRBuilder.h"
87 #include "llvm/IR/InstVisitor.h"
88 #include "llvm/IR/InstrTypes.h"
89 #include "llvm/IR/Instruction.h"
90 #include "llvm/IR/Instructions.h"
91 #include "llvm/IR/IntrinsicInst.h"
92 #include "llvm/IR/MDBuilder.h"
93 #include "llvm/IR/Module.h"
94 #include "llvm/IR/PassManager.h"
95 #include "llvm/IR/Type.h"
96 #include "llvm/IR/User.h"
97 #include "llvm/IR/Value.h"
98 #include "llvm/InitializePasses.h"
99 #include "llvm/Pass.h"
100 #include "llvm/Support/Alignment.h"
101 #include "llvm/Support/Casting.h"
102 #include "llvm/Support/CommandLine.h"
103 #include "llvm/Support/ErrorHandling.h"
104 #include "llvm/Support/SpecialCaseList.h"
105 #include "llvm/Support/VirtualFileSystem.h"
106 #include "llvm/Transforms/Instrumentation.h"
107 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
108 #include "llvm/Transforms/Utils/Local.h"
109 #include <algorithm>
110 #include <cassert>
111 #include <cstddef>
112 #include <cstdint>
113 #include <memory>
114 #include <set>
115 #include <string>
116 #include <utility>
117 #include <vector>
118 
119 using namespace llvm;
120 
121 // This must be consistent with ShadowWidthBits.
122 static const Align ShadowTLSAlignment = Align(2);
123 
124 static const Align MinOriginAlignment = Align(4);
125 
126 // The size of TLS variables. These constants must be kept in sync with the ones
127 // in dfsan.cpp.
128 static const unsigned ArgTLSSize = 800;
129 static const unsigned RetvalTLSSize = 800;
130 
131 // The -dfsan-preserve-alignment flag controls whether this pass assumes that
132 // alignment requirements provided by the input IR are correct.  For example,
133 // if the input IR contains a load with alignment 8, this flag will cause
134 // the shadow load to have alignment 16.  This flag is disabled by default as
135 // we have unfortunately encountered too much code (including Clang itself;
136 // see PR14291) which performs misaligned access.
137 static cl::opt<bool> ClPreserveAlignment(
138     "dfsan-preserve-alignment",
139     cl::desc("respect alignment requirements provided by input IR"), cl::Hidden,
140     cl::init(false));
141 
142 // The ABI list files control how shadow parameters are passed. The pass treats
143 // every function labelled "uninstrumented" in the ABI list file as conforming
144 // to the "native" (i.e. unsanitized) ABI.  Unless the ABI list contains
145 // additional annotations for those functions, a call to one of those functions
146 // will produce a warning message, as the labelling behaviour of the function is
147 // unknown. The other supported annotations for uninstrumented functions are
148 // "functional" and "discard", which are described below under
149 // DataFlowSanitizer::WrapperKind.
150 // Functions will often be labelled with both "uninstrumented" and one of
151 // "functional" or "discard". This will leave the function unchanged by this
152 // pass, and create a wrapper function that will call the original.
153 //
154 // Instrumented functions can also be annotated as "force_zero_labels", which
155 // will make all shadow and return values set zero labels.
156 // Functions should never be labelled with both "force_zero_labels" and
157 // "uninstrumented" or any of the unistrumented wrapper kinds.
158 static cl::list<std::string> ClABIListFiles(
159     "dfsan-abilist",
160     cl::desc("File listing native ABI functions and how the pass treats them"),
161     cl::Hidden);
162 
163 // Controls whether the pass includes or ignores the labels of pointers in load
164 // instructions.
165 static cl::opt<bool> ClCombinePointerLabelsOnLoad(
166     "dfsan-combine-pointer-labels-on-load",
167     cl::desc("Combine the label of the pointer with the label of the data when "
168              "loading from memory."),
169     cl::Hidden, cl::init(true));
170 
171 // Controls whether the pass includes or ignores the labels of pointers in
172 // stores instructions.
173 static cl::opt<bool> ClCombinePointerLabelsOnStore(
174     "dfsan-combine-pointer-labels-on-store",
175     cl::desc("Combine the label of the pointer with the label of the data when "
176              "storing in memory."),
177     cl::Hidden, cl::init(false));
178 
179 // Controls whether the pass propagates labels of offsets in GEP instructions.
180 static cl::opt<bool> ClCombineOffsetLabelsOnGEP(
181     "dfsan-combine-offset-labels-on-gep",
182     cl::desc(
183         "Combine the label of the offset with the label of the pointer when "
184         "doing pointer arithmetic."),
185     cl::Hidden, cl::init(true));
186 
187 static cl::list<std::string> ClCombineTaintLookupTables(
188     "dfsan-combine-taint-lookup-table",
189     cl::desc(
190         "When dfsan-combine-offset-labels-on-gep and/or "
191         "dfsan-combine-pointer-labels-on-load are false, this flag can "
192         "be used to re-enable combining offset and/or pointer taint when "
193         "loading specific constant global variables (i.e. lookup tables)."),
194     cl::Hidden);
195 
196 static cl::opt<bool> ClDebugNonzeroLabels(
197     "dfsan-debug-nonzero-labels",
198     cl::desc("Insert calls to __dfsan_nonzero_label on observing a parameter, "
199              "load or return with a nonzero label"),
200     cl::Hidden);
201 
202 // Experimental feature that inserts callbacks for certain data events.
203 // Currently callbacks are only inserted for loads, stores, memory transfers
204 // (i.e. memcpy and memmove), and comparisons.
205 //
206 // If this flag is set to true, the user must provide definitions for the
207 // following callback functions:
208 //   void __dfsan_load_callback(dfsan_label Label, void* addr);
209 //   void __dfsan_store_callback(dfsan_label Label, void* addr);
210 //   void __dfsan_mem_transfer_callback(dfsan_label *Start, size_t Len);
211 //   void __dfsan_cmp_callback(dfsan_label CombinedLabel);
212 static cl::opt<bool> ClEventCallbacks(
213     "dfsan-event-callbacks",
214     cl::desc("Insert calls to __dfsan_*_callback functions on data events."),
215     cl::Hidden, cl::init(false));
216 
217 // Experimental feature that inserts callbacks for conditionals, including:
218 // conditional branch, switch, select.
219 // This must be true for dfsan_set_conditional_callback() to have effect.
220 static cl::opt<bool> ClConditionalCallbacks(
221     "dfsan-conditional-callbacks",
222     cl::desc("Insert calls to callback functions on conditionals."), cl::Hidden,
223     cl::init(false));
224 
225 // Controls whether the pass tracks the control flow of select instructions.
226 static cl::opt<bool> ClTrackSelectControlFlow(
227     "dfsan-track-select-control-flow",
228     cl::desc("Propagate labels from condition values of select instructions "
229              "to results."),
230     cl::Hidden, cl::init(true));
231 
232 // TODO: This default value follows MSan. DFSan may use a different value.
233 static cl::opt<int> ClInstrumentWithCallThreshold(
234     "dfsan-instrument-with-call-threshold",
235     cl::desc("If the function being instrumented requires more than "
236              "this number of origin stores, use callbacks instead of "
237              "inline checks (-1 means never use callbacks)."),
238     cl::Hidden, cl::init(3500));
239 
240 // Controls how to track origins.
241 // * 0: do not track origins.
242 // * 1: track origins at memory store operations.
243 // * 2: track origins at memory load and store operations.
244 //      TODO: track callsites.
245 static cl::opt<int> ClTrackOrigins("dfsan-track-origins",
246                                    cl::desc("Track origins of labels"),
247                                    cl::Hidden, cl::init(0));
248 
249 static cl::opt<bool> ClIgnorePersonalityRoutine(
250     "dfsan-ignore-personality-routine",
251     cl::desc("If a personality routine is marked uninstrumented from the ABI "
252              "list, do not create a wrapper for it."),
253     cl::Hidden, cl::init(false));
254 
255 static StringRef getGlobalTypeString(const GlobalValue &G) {
256   // Types of GlobalVariables are always pointer types.
257   Type *GType = G.getValueType();
258   // For now we support excluding struct types only.
259   if (StructType *SGType = dyn_cast<StructType>(GType)) {
260     if (!SGType->isLiteral())
261       return SGType->getName();
262   }
263   return "<unknown type>";
264 }
265 
266 namespace {
267 
268 // Memory map parameters used in application-to-shadow address calculation.
269 // Offset = (Addr & ~AndMask) ^ XorMask
270 // Shadow = ShadowBase + Offset
271 // Origin = (OriginBase + Offset) & ~3ULL
272 struct MemoryMapParams {
273   uint64_t AndMask;
274   uint64_t XorMask;
275   uint64_t ShadowBase;
276   uint64_t OriginBase;
277 };
278 
279 } // end anonymous namespace
280 
281 // x86_64 Linux
282 // NOLINTNEXTLINE(readability-identifier-naming)
283 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
284     0,              // AndMask (not used)
285     0x500000000000, // XorMask
286     0,              // ShadowBase (not used)
287     0x100000000000, // OriginBase
288 };
289 
290 namespace {
291 
292 class DFSanABIList {
293   std::unique_ptr<SpecialCaseList> SCL;
294 
295 public:
296   DFSanABIList() = default;
297 
298   void set(std::unique_ptr<SpecialCaseList> List) { SCL = std::move(List); }
299 
300   /// Returns whether either this function or its source file are listed in the
301   /// given category.
302   bool isIn(const Function &F, StringRef Category) const {
303     return isIn(*F.getParent(), Category) ||
304            SCL->inSection("dataflow", "fun", F.getName(), Category);
305   }
306 
307   /// Returns whether this global alias is listed in the given category.
308   ///
309   /// If GA aliases a function, the alias's name is matched as a function name
310   /// would be.  Similarly, aliases of globals are matched like globals.
311   bool isIn(const GlobalAlias &GA, StringRef Category) const {
312     if (isIn(*GA.getParent(), Category))
313       return true;
314 
315     if (isa<FunctionType>(GA.getValueType()))
316       return SCL->inSection("dataflow", "fun", GA.getName(), Category);
317 
318     return SCL->inSection("dataflow", "global", GA.getName(), Category) ||
319            SCL->inSection("dataflow", "type", getGlobalTypeString(GA),
320                           Category);
321   }
322 
323   /// Returns whether this module is listed in the given category.
324   bool isIn(const Module &M, StringRef Category) const {
325     return SCL->inSection("dataflow", "src", M.getModuleIdentifier(), Category);
326   }
327 };
328 
329 /// TransformedFunction is used to express the result of transforming one
330 /// function type into another.  This struct is immutable.  It holds metadata
331 /// useful for updating calls of the old function to the new type.
332 struct TransformedFunction {
333   TransformedFunction(FunctionType *OriginalType, FunctionType *TransformedType,
334                       std::vector<unsigned> ArgumentIndexMapping)
335       : OriginalType(OriginalType), TransformedType(TransformedType),
336         ArgumentIndexMapping(ArgumentIndexMapping) {}
337 
338   // Disallow copies.
339   TransformedFunction(const TransformedFunction &) = delete;
340   TransformedFunction &operator=(const TransformedFunction &) = delete;
341 
342   // Allow moves.
343   TransformedFunction(TransformedFunction &&) = default;
344   TransformedFunction &operator=(TransformedFunction &&) = default;
345 
346   /// Type of the function before the transformation.
347   FunctionType *OriginalType;
348 
349   /// Type of the function after the transformation.
350   FunctionType *TransformedType;
351 
352   /// Transforming a function may change the position of arguments.  This
353   /// member records the mapping from each argument's old position to its new
354   /// position.  Argument positions are zero-indexed.  If the transformation
355   /// from F to F' made the first argument of F into the third argument of F',
356   /// then ArgumentIndexMapping[0] will equal 2.
357   std::vector<unsigned> ArgumentIndexMapping;
358 };
359 
360 /// Given function attributes from a call site for the original function,
361 /// return function attributes appropriate for a call to the transformed
362 /// function.
363 AttributeList
364 transformFunctionAttributes(const TransformedFunction &TransformedFunction,
365                             LLVMContext &Ctx, AttributeList CallSiteAttrs) {
366 
367   // Construct a vector of AttributeSet for each function argument.
368   std::vector<llvm::AttributeSet> ArgumentAttributes(
369       TransformedFunction.TransformedType->getNumParams());
370 
371   // Copy attributes from the parameter of the original function to the
372   // transformed version.  'ArgumentIndexMapping' holds the mapping from
373   // old argument position to new.
374   for (unsigned I = 0, IE = TransformedFunction.ArgumentIndexMapping.size();
375        I < IE; ++I) {
376     unsigned TransformedIndex = TransformedFunction.ArgumentIndexMapping[I];
377     ArgumentAttributes[TransformedIndex] = CallSiteAttrs.getParamAttrs(I);
378   }
379 
380   // Copy annotations on varargs arguments.
381   for (unsigned I = TransformedFunction.OriginalType->getNumParams(),
382                 IE = CallSiteAttrs.getNumAttrSets();
383        I < IE; ++I) {
384     ArgumentAttributes.push_back(CallSiteAttrs.getParamAttrs(I));
385   }
386 
387   return AttributeList::get(Ctx, CallSiteAttrs.getFnAttrs(),
388                             CallSiteAttrs.getRetAttrs(),
389                             llvm::makeArrayRef(ArgumentAttributes));
390 }
391 
392 class DataFlowSanitizer {
393   friend struct DFSanFunction;
394   friend class DFSanVisitor;
395 
396   enum { ShadowWidthBits = 8, ShadowWidthBytes = ShadowWidthBits / 8 };
397 
398   enum { OriginWidthBits = 32, OriginWidthBytes = OriginWidthBits / 8 };
399 
400   /// How should calls to uninstrumented functions be handled?
401   enum WrapperKind {
402     /// This function is present in an uninstrumented form but we don't know
403     /// how it should be handled.  Print a warning and call the function anyway.
404     /// Don't label the return value.
405     WK_Warning,
406 
407     /// This function does not write to (user-accessible) memory, and its return
408     /// value is unlabelled.
409     WK_Discard,
410 
411     /// This function does not write to (user-accessible) memory, and the label
412     /// of its return value is the union of the label of its arguments.
413     WK_Functional,
414 
415     /// Instead of calling the function, a custom wrapper __dfsw_F is called,
416     /// where F is the name of the function.  This function may wrap the
417     /// original function or provide its own implementation. WK_Custom uses an
418     /// extra pointer argument to return the shadow.  This allows the wrapped
419     /// form of the function type to be expressed in C.
420     WK_Custom
421   };
422 
423   Module *Mod;
424   LLVMContext *Ctx;
425   Type *Int8Ptr;
426   IntegerType *OriginTy;
427   PointerType *OriginPtrTy;
428   ConstantInt *ZeroOrigin;
429   /// The shadow type for all primitive types and vector types.
430   IntegerType *PrimitiveShadowTy;
431   PointerType *PrimitiveShadowPtrTy;
432   IntegerType *IntptrTy;
433   ConstantInt *ZeroPrimitiveShadow;
434   Constant *ArgTLS;
435   ArrayType *ArgOriginTLSTy;
436   Constant *ArgOriginTLS;
437   Constant *RetvalTLS;
438   Constant *RetvalOriginTLS;
439   FunctionType *DFSanUnionLoadFnTy;
440   FunctionType *DFSanLoadLabelAndOriginFnTy;
441   FunctionType *DFSanUnimplementedFnTy;
442   FunctionType *DFSanWrapperExternWeakNullFnTy;
443   FunctionType *DFSanSetLabelFnTy;
444   FunctionType *DFSanNonzeroLabelFnTy;
445   FunctionType *DFSanVarargWrapperFnTy;
446   FunctionType *DFSanConditionalCallbackFnTy;
447   FunctionType *DFSanConditionalCallbackOriginFnTy;
448   FunctionType *DFSanCmpCallbackFnTy;
449   FunctionType *DFSanLoadStoreCallbackFnTy;
450   FunctionType *DFSanMemTransferCallbackFnTy;
451   FunctionType *DFSanChainOriginFnTy;
452   FunctionType *DFSanChainOriginIfTaintedFnTy;
453   FunctionType *DFSanMemOriginTransferFnTy;
454   FunctionType *DFSanMaybeStoreOriginFnTy;
455   FunctionCallee DFSanUnionLoadFn;
456   FunctionCallee DFSanLoadLabelAndOriginFn;
457   FunctionCallee DFSanUnimplementedFn;
458   FunctionCallee DFSanWrapperExternWeakNullFn;
459   FunctionCallee DFSanSetLabelFn;
460   FunctionCallee DFSanNonzeroLabelFn;
461   FunctionCallee DFSanVarargWrapperFn;
462   FunctionCallee DFSanLoadCallbackFn;
463   FunctionCallee DFSanStoreCallbackFn;
464   FunctionCallee DFSanMemTransferCallbackFn;
465   FunctionCallee DFSanConditionalCallbackFn;
466   FunctionCallee DFSanConditionalCallbackOriginFn;
467   FunctionCallee DFSanCmpCallbackFn;
468   FunctionCallee DFSanChainOriginFn;
469   FunctionCallee DFSanChainOriginIfTaintedFn;
470   FunctionCallee DFSanMemOriginTransferFn;
471   FunctionCallee DFSanMaybeStoreOriginFn;
472   SmallPtrSet<Value *, 16> DFSanRuntimeFunctions;
473   MDNode *ColdCallWeights;
474   MDNode *OriginStoreWeights;
475   DFSanABIList ABIList;
476   DenseMap<Value *, Function *> UnwrappedFnMap;
477   AttributeMask ReadOnlyNoneAttrs;
478   StringSet<> CombineTaintLookupTableNames;
479 
480   /// Memory map parameters used in calculation mapping application addresses
481   /// to shadow addresses and origin addresses.
482   const MemoryMapParams *MapParams;
483 
484   Value *getShadowOffset(Value *Addr, IRBuilder<> &IRB);
485   Value *getShadowAddress(Value *Addr, Instruction *Pos);
486   Value *getShadowAddress(Value *Addr, Instruction *Pos, Value *ShadowOffset);
487   std::pair<Value *, Value *>
488   getShadowOriginAddress(Value *Addr, Align InstAlignment, Instruction *Pos);
489   bool isInstrumented(const Function *F);
490   bool isInstrumented(const GlobalAlias *GA);
491   bool isForceZeroLabels(const Function *F);
492   TransformedFunction getCustomFunctionType(FunctionType *T);
493   WrapperKind getWrapperKind(Function *F);
494   void addGlobalNameSuffix(GlobalValue *GV);
495   void buildExternWeakCheckIfNeeded(IRBuilder<> &IRB, Function *F);
496   Function *buildWrapperFunction(Function *F, StringRef NewFName,
497                                  GlobalValue::LinkageTypes NewFLink,
498                                  FunctionType *NewFT);
499   void initializeCallbackFunctions(Module &M);
500   void initializeRuntimeFunctions(Module &M);
501   void injectMetadataGlobals(Module &M);
502   bool initializeModule(Module &M);
503 
504   /// Advances \p OriginAddr to point to the next 32-bit origin and then loads
505   /// from it. Returns the origin's loaded value.
506   Value *loadNextOrigin(Instruction *Pos, Align OriginAlign,
507                         Value **OriginAddr);
508 
509   /// Returns whether the given load byte size is amenable to inlined
510   /// optimization patterns.
511   bool hasLoadSizeForFastPath(uint64_t Size);
512 
513   /// Returns whether the pass tracks origins. Supports only TLS ABI mode.
514   bool shouldTrackOrigins();
515 
516   /// Returns a zero constant with the shadow type of OrigTy.
517   ///
518   /// getZeroShadow({T1,T2,...}) = {getZeroShadow(T1),getZeroShadow(T2,...}
519   /// getZeroShadow([n x T]) = [n x getZeroShadow(T)]
520   /// getZeroShadow(other type) = i16(0)
521   Constant *getZeroShadow(Type *OrigTy);
522   /// Returns a zero constant with the shadow type of V's type.
523   Constant *getZeroShadow(Value *V);
524 
525   /// Checks if V is a zero shadow.
526   bool isZeroShadow(Value *V);
527 
528   /// Returns the shadow type of OrigTy.
529   ///
530   /// getShadowTy({T1,T2,...}) = {getShadowTy(T1),getShadowTy(T2),...}
531   /// getShadowTy([n x T]) = [n x getShadowTy(T)]
532   /// getShadowTy(other type) = i16
533   Type *getShadowTy(Type *OrigTy);
534   /// Returns the shadow type of of V's type.
535   Type *getShadowTy(Value *V);
536 
537   const uint64_t NumOfElementsInArgOrgTLS = ArgTLSSize / OriginWidthBytes;
538 
539 public:
540   DataFlowSanitizer(const std::vector<std::string> &ABIListFiles);
541 
542   bool runImpl(Module &M);
543 };
544 
545 struct DFSanFunction {
546   DataFlowSanitizer &DFS;
547   Function *F;
548   DominatorTree DT;
549   bool IsNativeABI;
550   bool IsForceZeroLabels;
551   AllocaInst *LabelReturnAlloca = nullptr;
552   AllocaInst *OriginReturnAlloca = nullptr;
553   DenseMap<Value *, Value *> ValShadowMap;
554   DenseMap<Value *, Value *> ValOriginMap;
555   DenseMap<AllocaInst *, AllocaInst *> AllocaShadowMap;
556   DenseMap<AllocaInst *, AllocaInst *> AllocaOriginMap;
557 
558   struct PHIFixupElement {
559     PHINode *Phi;
560     PHINode *ShadowPhi;
561     PHINode *OriginPhi;
562   };
563   std::vector<PHIFixupElement> PHIFixups;
564 
565   DenseSet<Instruction *> SkipInsts;
566   std::vector<Value *> NonZeroChecks;
567 
568   struct CachedShadow {
569     BasicBlock *Block; // The block where Shadow is defined.
570     Value *Shadow;
571   };
572   /// Maps a value to its latest shadow value in terms of domination tree.
573   DenseMap<std::pair<Value *, Value *>, CachedShadow> CachedShadows;
574   /// Maps a value to its latest collapsed shadow value it was converted to in
575   /// terms of domination tree. When ClDebugNonzeroLabels is on, this cache is
576   /// used at a post process where CFG blocks are split. So it does not cache
577   /// BasicBlock like CachedShadows, but uses domination between values.
578   DenseMap<Value *, Value *> CachedCollapsedShadows;
579   DenseMap<Value *, std::set<Value *>> ShadowElements;
580 
581   DFSanFunction(DataFlowSanitizer &DFS, Function *F, bool IsNativeABI,
582                 bool IsForceZeroLabels)
583       : DFS(DFS), F(F), IsNativeABI(IsNativeABI),
584         IsForceZeroLabels(IsForceZeroLabels) {
585     DT.recalculate(*F);
586   }
587 
588   /// Computes the shadow address for a given function argument.
589   ///
590   /// Shadow = ArgTLS+ArgOffset.
591   Value *getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB);
592 
593   /// Computes the shadow address for a return value.
594   Value *getRetvalTLS(Type *T, IRBuilder<> &IRB);
595 
596   /// Computes the origin address for a given function argument.
597   ///
598   /// Origin = ArgOriginTLS[ArgNo].
599   Value *getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB);
600 
601   /// Computes the origin address for a return value.
602   Value *getRetvalOriginTLS();
603 
604   Value *getOrigin(Value *V);
605   void setOrigin(Instruction *I, Value *Origin);
606   /// Generates IR to compute the origin of the last operand with a taint label.
607   Value *combineOperandOrigins(Instruction *Inst);
608   /// Before the instruction Pos, generates IR to compute the last origin with a
609   /// taint label. Labels and origins are from vectors Shadows and Origins
610   /// correspondingly. The generated IR is like
611   ///   Sn-1 != Zero ? On-1: ... S2 != Zero ? O2: S1 != Zero ? O1: O0
612   /// When Zero is nullptr, it uses ZeroPrimitiveShadow. Otherwise it can be
613   /// zeros with other bitwidths.
614   Value *combineOrigins(const std::vector<Value *> &Shadows,
615                         const std::vector<Value *> &Origins, Instruction *Pos,
616                         ConstantInt *Zero = nullptr);
617 
618   Value *getShadow(Value *V);
619   void setShadow(Instruction *I, Value *Shadow);
620   /// Generates IR to compute the union of the two given shadows, inserting it
621   /// before Pos. The combined value is with primitive type.
622   Value *combineShadows(Value *V1, Value *V2, Instruction *Pos);
623   /// Combines the shadow values of V1 and V2, then converts the combined value
624   /// with primitive type into a shadow value with the original type T.
625   Value *combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
626                                    Instruction *Pos);
627   Value *combineOperandShadows(Instruction *Inst);
628 
629   /// Generates IR to load shadow and origin corresponding to bytes [\p
630   /// Addr, \p Addr + \p Size), where addr has alignment \p
631   /// InstAlignment, and take the union of each of those shadows. The returned
632   /// shadow always has primitive type.
633   ///
634   /// When tracking loads is enabled, the returned origin is a chain at the
635   /// current stack if the returned shadow is tainted.
636   std::pair<Value *, Value *> loadShadowOrigin(Value *Addr, uint64_t Size,
637                                                Align InstAlignment,
638                                                Instruction *Pos);
639 
640   void storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
641                                   Align InstAlignment, Value *PrimitiveShadow,
642                                   Value *Origin, Instruction *Pos);
643   /// Applies PrimitiveShadow to all primitive subtypes of T, returning
644   /// the expanded shadow value.
645   ///
646   /// EFP({T1,T2, ...}, PS) = {EFP(T1,PS),EFP(T2,PS),...}
647   /// EFP([n x T], PS) = [n x EFP(T,PS)]
648   /// EFP(other types, PS) = PS
649   Value *expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
650                                    Instruction *Pos);
651   /// Collapses Shadow into a single primitive shadow value, unioning all
652   /// primitive shadow values in the process. Returns the final primitive
653   /// shadow value.
654   ///
655   /// CTP({V1,V2, ...}) = UNION(CFP(V1,PS),CFP(V2,PS),...)
656   /// CTP([V1,V2,...]) = UNION(CFP(V1,PS),CFP(V2,PS),...)
657   /// CTP(other types, PS) = PS
658   Value *collapseToPrimitiveShadow(Value *Shadow, Instruction *Pos);
659 
660   void storeZeroPrimitiveShadow(Value *Addr, uint64_t Size, Align ShadowAlign,
661                                 Instruction *Pos);
662 
663   Align getShadowAlign(Align InstAlignment);
664 
665   // If ClConditionalCallbacks is enabled, insert a callback after a given
666   // branch instruction using the given conditional expression.
667   void addConditionalCallbacksIfEnabled(Instruction &I, Value *Condition);
668 
669   bool isLookupTableConstant(Value *P);
670 
671 private:
672   /// Collapses the shadow with aggregate type into a single primitive shadow
673   /// value.
674   template <class AggregateType>
675   Value *collapseAggregateShadow(AggregateType *AT, Value *Shadow,
676                                  IRBuilder<> &IRB);
677 
678   Value *collapseToPrimitiveShadow(Value *Shadow, IRBuilder<> &IRB);
679 
680   /// Returns the shadow value of an argument A.
681   Value *getShadowForTLSArgument(Argument *A);
682 
683   /// The fast path of loading shadows.
684   std::pair<Value *, Value *>
685   loadShadowFast(Value *ShadowAddr, Value *OriginAddr, uint64_t Size,
686                  Align ShadowAlign, Align OriginAlign, Value *FirstOrigin,
687                  Instruction *Pos);
688 
689   Align getOriginAlign(Align InstAlignment);
690 
691   /// Because 4 contiguous bytes share one 4-byte origin, the most accurate load
692   /// is __dfsan_load_label_and_origin. This function returns the union of all
693   /// labels and the origin of the first taint label. However this is an
694   /// additional call with many instructions. To ensure common cases are fast,
695   /// checks if it is possible to load labels and origins without using the
696   /// callback function.
697   ///
698   /// When enabling tracking load instructions, we always use
699   /// __dfsan_load_label_and_origin to reduce code size.
700   bool useCallbackLoadLabelAndOrigin(uint64_t Size, Align InstAlignment);
701 
702   /// Returns a chain at the current stack with previous origin V.
703   Value *updateOrigin(Value *V, IRBuilder<> &IRB);
704 
705   /// Returns a chain at the current stack with previous origin V if Shadow is
706   /// tainted.
707   Value *updateOriginIfTainted(Value *Shadow, Value *Origin, IRBuilder<> &IRB);
708 
709   /// Creates an Intptr = Origin | Origin << 32 if Intptr's size is 64. Returns
710   /// Origin otherwise.
711   Value *originToIntptr(IRBuilder<> &IRB, Value *Origin);
712 
713   /// Stores Origin into the address range [StoreOriginAddr, StoreOriginAddr +
714   /// Size).
715   void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *StoreOriginAddr,
716                    uint64_t StoreOriginSize, Align Alignment);
717 
718   /// Stores Origin in terms of its Shadow value.
719   /// * Do not write origins for zero shadows because we do not trace origins
720   ///   for untainted sinks.
721   /// * Use __dfsan_maybe_store_origin if there are too many origin store
722   ///   instrumentations.
723   void storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size, Value *Shadow,
724                    Value *Origin, Value *StoreOriginAddr, Align InstAlignment);
725 
726   /// Convert a scalar value to an i1 by comparing with 0.
727   Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &Name = "");
728 
729   bool shouldInstrumentWithCall();
730 
731   /// Generates IR to load shadow and origin corresponding to bytes [\p
732   /// Addr, \p Addr + \p Size), where addr has alignment \p
733   /// InstAlignment, and take the union of each of those shadows. The returned
734   /// shadow always has primitive type.
735   std::pair<Value *, Value *>
736   loadShadowOriginSansLoadTracking(Value *Addr, uint64_t Size,
737                                    Align InstAlignment, Instruction *Pos);
738   int NumOriginStores = 0;
739 };
740 
741 class DFSanVisitor : public InstVisitor<DFSanVisitor> {
742 public:
743   DFSanFunction &DFSF;
744 
745   DFSanVisitor(DFSanFunction &DFSF) : DFSF(DFSF) {}
746 
747   const DataLayout &getDataLayout() const {
748     return DFSF.F->getParent()->getDataLayout();
749   }
750 
751   // Combines shadow values and origins for all of I's operands.
752   void visitInstOperands(Instruction &I);
753 
754   void visitUnaryOperator(UnaryOperator &UO);
755   void visitBinaryOperator(BinaryOperator &BO);
756   void visitBitCastInst(BitCastInst &BCI);
757   void visitCastInst(CastInst &CI);
758   void visitCmpInst(CmpInst &CI);
759   void visitLandingPadInst(LandingPadInst &LPI);
760   void visitGetElementPtrInst(GetElementPtrInst &GEPI);
761   void visitLoadInst(LoadInst &LI);
762   void visitStoreInst(StoreInst &SI);
763   void visitAtomicRMWInst(AtomicRMWInst &I);
764   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I);
765   void visitReturnInst(ReturnInst &RI);
766   void visitCallBase(CallBase &CB);
767   void visitPHINode(PHINode &PN);
768   void visitExtractElementInst(ExtractElementInst &I);
769   void visitInsertElementInst(InsertElementInst &I);
770   void visitShuffleVectorInst(ShuffleVectorInst &I);
771   void visitExtractValueInst(ExtractValueInst &I);
772   void visitInsertValueInst(InsertValueInst &I);
773   void visitAllocaInst(AllocaInst &I);
774   void visitSelectInst(SelectInst &I);
775   void visitMemSetInst(MemSetInst &I);
776   void visitMemTransferInst(MemTransferInst &I);
777   void visitBranchInst(BranchInst &BR);
778   void visitSwitchInst(SwitchInst &SW);
779 
780 private:
781   void visitCASOrRMW(Align InstAlignment, Instruction &I);
782 
783   // Returns false when this is an invoke of a custom function.
784   bool visitWrappedCallBase(Function &F, CallBase &CB);
785 
786   // Combines origins for all of I's operands.
787   void visitInstOperandOrigins(Instruction &I);
788 
789   void addShadowArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
790                           IRBuilder<> &IRB);
791 
792   void addOriginArguments(Function &F, CallBase &CB, std::vector<Value *> &Args,
793                           IRBuilder<> &IRB);
794 };
795 
796 } // end anonymous namespace
797 
798 DataFlowSanitizer::DataFlowSanitizer(
799     const std::vector<std::string> &ABIListFiles) {
800   std::vector<std::string> AllABIListFiles(std::move(ABIListFiles));
801   llvm::append_range(AllABIListFiles, ClABIListFiles);
802   // FIXME: should we propagate vfs::FileSystem to this constructor?
803   ABIList.set(
804       SpecialCaseList::createOrDie(AllABIListFiles, *vfs::getRealFileSystem()));
805 
806   for (StringRef v : ClCombineTaintLookupTables)
807     CombineTaintLookupTableNames.insert(v);
808 }
809 
810 TransformedFunction DataFlowSanitizer::getCustomFunctionType(FunctionType *T) {
811   SmallVector<Type *, 4> ArgTypes;
812 
813   // Some parameters of the custom function being constructed are
814   // parameters of T.  Record the mapping from parameters of T to
815   // parameters of the custom function, so that parameter attributes
816   // at call sites can be updated.
817   std::vector<unsigned> ArgumentIndexMapping;
818   for (unsigned I = 0, E = T->getNumParams(); I != E; ++I) {
819     Type *ParamType = T->getParamType(I);
820     ArgumentIndexMapping.push_back(ArgTypes.size());
821     ArgTypes.push_back(ParamType);
822   }
823   for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
824     ArgTypes.push_back(PrimitiveShadowTy);
825   if (T->isVarArg())
826     ArgTypes.push_back(PrimitiveShadowPtrTy);
827   Type *RetType = T->getReturnType();
828   if (!RetType->isVoidTy())
829     ArgTypes.push_back(PrimitiveShadowPtrTy);
830 
831   if (shouldTrackOrigins()) {
832     for (unsigned I = 0, E = T->getNumParams(); I != E; ++I)
833       ArgTypes.push_back(OriginTy);
834     if (T->isVarArg())
835       ArgTypes.push_back(OriginPtrTy);
836     if (!RetType->isVoidTy())
837       ArgTypes.push_back(OriginPtrTy);
838   }
839 
840   return TransformedFunction(
841       T, FunctionType::get(T->getReturnType(), ArgTypes, T->isVarArg()),
842       ArgumentIndexMapping);
843 }
844 
845 bool DataFlowSanitizer::isZeroShadow(Value *V) {
846   Type *T = V->getType();
847   if (!isa<ArrayType>(T) && !isa<StructType>(T)) {
848     if (const ConstantInt *CI = dyn_cast<ConstantInt>(V))
849       return CI->isZero();
850     return false;
851   }
852 
853   return isa<ConstantAggregateZero>(V);
854 }
855 
856 bool DataFlowSanitizer::hasLoadSizeForFastPath(uint64_t Size) {
857   uint64_t ShadowSize = Size * ShadowWidthBytes;
858   return ShadowSize % 8 == 0 || ShadowSize == 4;
859 }
860 
861 bool DataFlowSanitizer::shouldTrackOrigins() {
862   static const bool ShouldTrackOrigins = ClTrackOrigins;
863   return ShouldTrackOrigins;
864 }
865 
866 Constant *DataFlowSanitizer::getZeroShadow(Type *OrigTy) {
867   if (!isa<ArrayType>(OrigTy) && !isa<StructType>(OrigTy))
868     return ZeroPrimitiveShadow;
869   Type *ShadowTy = getShadowTy(OrigTy);
870   return ConstantAggregateZero::get(ShadowTy);
871 }
872 
873 Constant *DataFlowSanitizer::getZeroShadow(Value *V) {
874   return getZeroShadow(V->getType());
875 }
876 
877 static Value *expandFromPrimitiveShadowRecursive(
878     Value *Shadow, SmallVector<unsigned, 4> &Indices, Type *SubShadowTy,
879     Value *PrimitiveShadow, IRBuilder<> &IRB) {
880   if (!isa<ArrayType>(SubShadowTy) && !isa<StructType>(SubShadowTy))
881     return IRB.CreateInsertValue(Shadow, PrimitiveShadow, Indices);
882 
883   if (ArrayType *AT = dyn_cast<ArrayType>(SubShadowTy)) {
884     for (unsigned Idx = 0; Idx < AT->getNumElements(); Idx++) {
885       Indices.push_back(Idx);
886       Shadow = expandFromPrimitiveShadowRecursive(
887           Shadow, Indices, AT->getElementType(), PrimitiveShadow, IRB);
888       Indices.pop_back();
889     }
890     return Shadow;
891   }
892 
893   if (StructType *ST = dyn_cast<StructType>(SubShadowTy)) {
894     for (unsigned Idx = 0; Idx < ST->getNumElements(); Idx++) {
895       Indices.push_back(Idx);
896       Shadow = expandFromPrimitiveShadowRecursive(
897           Shadow, Indices, ST->getElementType(Idx), PrimitiveShadow, IRB);
898       Indices.pop_back();
899     }
900     return Shadow;
901   }
902   llvm_unreachable("Unexpected shadow type");
903 }
904 
905 bool DFSanFunction::shouldInstrumentWithCall() {
906   return ClInstrumentWithCallThreshold >= 0 &&
907          NumOriginStores >= ClInstrumentWithCallThreshold;
908 }
909 
910 Value *DFSanFunction::expandFromPrimitiveShadow(Type *T, Value *PrimitiveShadow,
911                                                 Instruction *Pos) {
912   Type *ShadowTy = DFS.getShadowTy(T);
913 
914   if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
915     return PrimitiveShadow;
916 
917   if (DFS.isZeroShadow(PrimitiveShadow))
918     return DFS.getZeroShadow(ShadowTy);
919 
920   IRBuilder<> IRB(Pos);
921   SmallVector<unsigned, 4> Indices;
922   Value *Shadow = UndefValue::get(ShadowTy);
923   Shadow = expandFromPrimitiveShadowRecursive(Shadow, Indices, ShadowTy,
924                                               PrimitiveShadow, IRB);
925 
926   // Caches the primitive shadow value that built the shadow value.
927   CachedCollapsedShadows[Shadow] = PrimitiveShadow;
928   return Shadow;
929 }
930 
931 template <class AggregateType>
932 Value *DFSanFunction::collapseAggregateShadow(AggregateType *AT, Value *Shadow,
933                                               IRBuilder<> &IRB) {
934   if (!AT->getNumElements())
935     return DFS.ZeroPrimitiveShadow;
936 
937   Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
938   Value *Aggregator = collapseToPrimitiveShadow(FirstItem, IRB);
939 
940   for (unsigned Idx = 1; Idx < AT->getNumElements(); Idx++) {
941     Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
942     Value *ShadowInner = collapseToPrimitiveShadow(ShadowItem, IRB);
943     Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
944   }
945   return Aggregator;
946 }
947 
948 Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
949                                                 IRBuilder<> &IRB) {
950   Type *ShadowTy = Shadow->getType();
951   if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
952     return Shadow;
953   if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy))
954     return collapseAggregateShadow<>(AT, Shadow, IRB);
955   if (StructType *ST = dyn_cast<StructType>(ShadowTy))
956     return collapseAggregateShadow<>(ST, Shadow, IRB);
957   llvm_unreachable("Unexpected shadow type");
958 }
959 
960 Value *DFSanFunction::collapseToPrimitiveShadow(Value *Shadow,
961                                                 Instruction *Pos) {
962   Type *ShadowTy = Shadow->getType();
963   if (!isa<ArrayType>(ShadowTy) && !isa<StructType>(ShadowTy))
964     return Shadow;
965 
966   // Checks if the cached collapsed shadow value dominates Pos.
967   Value *&CS = CachedCollapsedShadows[Shadow];
968   if (CS && DT.dominates(CS, Pos))
969     return CS;
970 
971   IRBuilder<> IRB(Pos);
972   Value *PrimitiveShadow = collapseToPrimitiveShadow(Shadow, IRB);
973   // Caches the converted primitive shadow value.
974   CS = PrimitiveShadow;
975   return PrimitiveShadow;
976 }
977 
978 void DFSanFunction::addConditionalCallbacksIfEnabled(Instruction &I,
979                                                      Value *Condition) {
980   if (!ClConditionalCallbacks) {
981     return;
982   }
983   IRBuilder<> IRB(&I);
984   Value *CondShadow = getShadow(Condition);
985   if (DFS.shouldTrackOrigins()) {
986     Value *CondOrigin = getOrigin(Condition);
987     IRB.CreateCall(DFS.DFSanConditionalCallbackOriginFn,
988                    {CondShadow, CondOrigin});
989   } else {
990     IRB.CreateCall(DFS.DFSanConditionalCallbackFn, {CondShadow});
991   }
992 }
993 
994 Type *DataFlowSanitizer::getShadowTy(Type *OrigTy) {
995   if (!OrigTy->isSized())
996     return PrimitiveShadowTy;
997   if (isa<IntegerType>(OrigTy))
998     return PrimitiveShadowTy;
999   if (isa<VectorType>(OrigTy))
1000     return PrimitiveShadowTy;
1001   if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy))
1002     return ArrayType::get(getShadowTy(AT->getElementType()),
1003                           AT->getNumElements());
1004   if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1005     SmallVector<Type *, 4> Elements;
1006     for (unsigned I = 0, N = ST->getNumElements(); I < N; ++I)
1007       Elements.push_back(getShadowTy(ST->getElementType(I)));
1008     return StructType::get(*Ctx, Elements);
1009   }
1010   return PrimitiveShadowTy;
1011 }
1012 
1013 Type *DataFlowSanitizer::getShadowTy(Value *V) {
1014   return getShadowTy(V->getType());
1015 }
1016 
1017 bool DataFlowSanitizer::initializeModule(Module &M) {
1018   Triple TargetTriple(M.getTargetTriple());
1019   const DataLayout &DL = M.getDataLayout();
1020 
1021   if (TargetTriple.getOS() != Triple::Linux)
1022     report_fatal_error("unsupported operating system");
1023   if (TargetTriple.getArch() != Triple::x86_64)
1024     report_fatal_error("unsupported architecture");
1025   MapParams = &Linux_X86_64_MemoryMapParams;
1026 
1027   Mod = &M;
1028   Ctx = &M.getContext();
1029   Int8Ptr = Type::getInt8PtrTy(*Ctx);
1030   OriginTy = IntegerType::get(*Ctx, OriginWidthBits);
1031   OriginPtrTy = PointerType::getUnqual(OriginTy);
1032   PrimitiveShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
1033   PrimitiveShadowPtrTy = PointerType::getUnqual(PrimitiveShadowTy);
1034   IntptrTy = DL.getIntPtrType(*Ctx);
1035   ZeroPrimitiveShadow = ConstantInt::getSigned(PrimitiveShadowTy, 0);
1036   ZeroOrigin = ConstantInt::getSigned(OriginTy, 0);
1037 
1038   Type *DFSanUnionLoadArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
1039   DFSanUnionLoadFnTy = FunctionType::get(PrimitiveShadowTy, DFSanUnionLoadArgs,
1040                                          /*isVarArg=*/false);
1041   Type *DFSanLoadLabelAndOriginArgs[2] = {Int8Ptr, IntptrTy};
1042   DFSanLoadLabelAndOriginFnTy =
1043       FunctionType::get(IntegerType::get(*Ctx, 64), DFSanLoadLabelAndOriginArgs,
1044                         /*isVarArg=*/false);
1045   DFSanUnimplementedFnTy = FunctionType::get(
1046       Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
1047   Type *DFSanWrapperExternWeakNullArgs[2] = {Int8Ptr, Int8Ptr};
1048   DFSanWrapperExternWeakNullFnTy =
1049       FunctionType::get(Type::getVoidTy(*Ctx), DFSanWrapperExternWeakNullArgs,
1050                         /*isVarArg=*/false);
1051   Type *DFSanSetLabelArgs[4] = {PrimitiveShadowTy, OriginTy,
1052                                 Type::getInt8PtrTy(*Ctx), IntptrTy};
1053   DFSanSetLabelFnTy = FunctionType::get(Type::getVoidTy(*Ctx),
1054                                         DFSanSetLabelArgs, /*isVarArg=*/false);
1055   DFSanNonzeroLabelFnTy =
1056       FunctionType::get(Type::getVoidTy(*Ctx), None, /*isVarArg=*/false);
1057   DFSanVarargWrapperFnTy = FunctionType::get(
1058       Type::getVoidTy(*Ctx), Type::getInt8PtrTy(*Ctx), /*isVarArg=*/false);
1059   DFSanConditionalCallbackFnTy =
1060       FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
1061                         /*isVarArg=*/false);
1062   Type *DFSanConditionalCallbackOriginArgs[2] = {PrimitiveShadowTy, OriginTy};
1063   DFSanConditionalCallbackOriginFnTy = FunctionType::get(
1064       Type::getVoidTy(*Ctx), DFSanConditionalCallbackOriginArgs,
1065       /*isVarArg=*/false);
1066   DFSanCmpCallbackFnTy =
1067       FunctionType::get(Type::getVoidTy(*Ctx), PrimitiveShadowTy,
1068                         /*isVarArg=*/false);
1069   DFSanChainOriginFnTy =
1070       FunctionType::get(OriginTy, OriginTy, /*isVarArg=*/false);
1071   Type *DFSanChainOriginIfTaintedArgs[2] = {PrimitiveShadowTy, OriginTy};
1072   DFSanChainOriginIfTaintedFnTy = FunctionType::get(
1073       OriginTy, DFSanChainOriginIfTaintedArgs, /*isVarArg=*/false);
1074   Type *DFSanMaybeStoreOriginArgs[4] = {IntegerType::get(*Ctx, ShadowWidthBits),
1075                                         Int8Ptr, IntptrTy, OriginTy};
1076   DFSanMaybeStoreOriginFnTy = FunctionType::get(
1077       Type::getVoidTy(*Ctx), DFSanMaybeStoreOriginArgs, /*isVarArg=*/false);
1078   Type *DFSanMemOriginTransferArgs[3] = {Int8Ptr, Int8Ptr, IntptrTy};
1079   DFSanMemOriginTransferFnTy = FunctionType::get(
1080       Type::getVoidTy(*Ctx), DFSanMemOriginTransferArgs, /*isVarArg=*/false);
1081   Type *DFSanLoadStoreCallbackArgs[2] = {PrimitiveShadowTy, Int8Ptr};
1082   DFSanLoadStoreCallbackFnTy =
1083       FunctionType::get(Type::getVoidTy(*Ctx), DFSanLoadStoreCallbackArgs,
1084                         /*isVarArg=*/false);
1085   Type *DFSanMemTransferCallbackArgs[2] = {PrimitiveShadowPtrTy, IntptrTy};
1086   DFSanMemTransferCallbackFnTy =
1087       FunctionType::get(Type::getVoidTy(*Ctx), DFSanMemTransferCallbackArgs,
1088                         /*isVarArg=*/false);
1089 
1090   ColdCallWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
1091   OriginStoreWeights = MDBuilder(*Ctx).createBranchWeights(1, 1000);
1092   return true;
1093 }
1094 
1095 bool DataFlowSanitizer::isInstrumented(const Function *F) {
1096   return !ABIList.isIn(*F, "uninstrumented");
1097 }
1098 
1099 bool DataFlowSanitizer::isInstrumented(const GlobalAlias *GA) {
1100   return !ABIList.isIn(*GA, "uninstrumented");
1101 }
1102 
1103 bool DataFlowSanitizer::isForceZeroLabels(const Function *F) {
1104   return ABIList.isIn(*F, "force_zero_labels");
1105 }
1106 
1107 DataFlowSanitizer::WrapperKind DataFlowSanitizer::getWrapperKind(Function *F) {
1108   if (ABIList.isIn(*F, "functional"))
1109     return WK_Functional;
1110   if (ABIList.isIn(*F, "discard"))
1111     return WK_Discard;
1112   if (ABIList.isIn(*F, "custom"))
1113     return WK_Custom;
1114 
1115   return WK_Warning;
1116 }
1117 
1118 void DataFlowSanitizer::addGlobalNameSuffix(GlobalValue *GV) {
1119   std::string GVName = std::string(GV->getName()), Suffix = ".dfsan";
1120   GV->setName(GVName + Suffix);
1121 
1122   // Try to change the name of the function in module inline asm.  We only do
1123   // this for specific asm directives, currently only ".symver", to try to avoid
1124   // corrupting asm which happens to contain the symbol name as a substring.
1125   // Note that the substitution for .symver assumes that the versioned symbol
1126   // also has an instrumented name.
1127   std::string Asm = GV->getParent()->getModuleInlineAsm();
1128   std::string SearchStr = ".symver " + GVName + ",";
1129   size_t Pos = Asm.find(SearchStr);
1130   if (Pos != std::string::npos) {
1131     Asm.replace(Pos, SearchStr.size(), ".symver " + GVName + Suffix + ",");
1132     Pos = Asm.find("@");
1133 
1134     if (Pos == std::string::npos)
1135       report_fatal_error(Twine("unsupported .symver: ", Asm));
1136 
1137     Asm.replace(Pos, 1, Suffix + "@");
1138     GV->getParent()->setModuleInlineAsm(Asm);
1139   }
1140 }
1141 
1142 void DataFlowSanitizer::buildExternWeakCheckIfNeeded(IRBuilder<> &IRB,
1143                                                      Function *F) {
1144   // If the function we are wrapping was ExternWeak, it may be null.
1145   // The original code before calling this wrapper may have checked for null,
1146   // but replacing with a known-to-not-be-null wrapper can break this check.
1147   // When replacing uses of the extern weak function with the wrapper we try
1148   // to avoid replacing uses in conditionals, but this is not perfect.
1149   // In the case where we fail, and accidentially optimize out a null check
1150   // for a extern weak function, add a check here to help identify the issue.
1151   if (GlobalValue::isExternalWeakLinkage(F->getLinkage())) {
1152     std::vector<Value *> Args;
1153     Args.push_back(IRB.CreatePointerCast(F, IRB.getInt8PtrTy()));
1154     Args.push_back(IRB.CreateGlobalStringPtr(F->getName()));
1155     IRB.CreateCall(DFSanWrapperExternWeakNullFn, Args);
1156   }
1157 }
1158 
1159 Function *
1160 DataFlowSanitizer::buildWrapperFunction(Function *F, StringRef NewFName,
1161                                         GlobalValue::LinkageTypes NewFLink,
1162                                         FunctionType *NewFT) {
1163   FunctionType *FT = F->getFunctionType();
1164   Function *NewF = Function::Create(NewFT, NewFLink, F->getAddressSpace(),
1165                                     NewFName, F->getParent());
1166   NewF->copyAttributesFrom(F);
1167   NewF->removeRetAttrs(
1168       AttributeFuncs::typeIncompatible(NewFT->getReturnType()));
1169 
1170   BasicBlock *BB = BasicBlock::Create(*Ctx, "entry", NewF);
1171   if (F->isVarArg()) {
1172     NewF->removeFnAttr("split-stack");
1173     CallInst::Create(DFSanVarargWrapperFn,
1174                      IRBuilder<>(BB).CreateGlobalStringPtr(F->getName()), "",
1175                      BB);
1176     new UnreachableInst(*Ctx, BB);
1177   } else {
1178     auto ArgIt = pointer_iterator<Argument *>(NewF->arg_begin());
1179     std::vector<Value *> Args(ArgIt, ArgIt + FT->getNumParams());
1180 
1181     CallInst *CI = CallInst::Create(F, Args, "", BB);
1182     if (FT->getReturnType()->isVoidTy())
1183       ReturnInst::Create(*Ctx, BB);
1184     else
1185       ReturnInst::Create(*Ctx, CI, BB);
1186   }
1187 
1188   return NewF;
1189 }
1190 
1191 // Initialize DataFlowSanitizer runtime functions and declare them in the module
1192 void DataFlowSanitizer::initializeRuntimeFunctions(Module &M) {
1193   {
1194     AttributeList AL;
1195     AL = AL.addFnAttribute(M.getContext(), Attribute::NoUnwind);
1196     AL = AL.addFnAttribute(M.getContext(), Attribute::ReadOnly);
1197     AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1198     DFSanUnionLoadFn =
1199         Mod->getOrInsertFunction("__dfsan_union_load", DFSanUnionLoadFnTy, AL);
1200   }
1201   {
1202     AttributeList AL;
1203     AL = AL.addFnAttribute(M.getContext(), Attribute::NoUnwind);
1204     AL = AL.addFnAttribute(M.getContext(), Attribute::ReadOnly);
1205     AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1206     DFSanLoadLabelAndOriginFn = Mod->getOrInsertFunction(
1207         "__dfsan_load_label_and_origin", DFSanLoadLabelAndOriginFnTy, AL);
1208   }
1209   DFSanUnimplementedFn =
1210       Mod->getOrInsertFunction("__dfsan_unimplemented", DFSanUnimplementedFnTy);
1211   DFSanWrapperExternWeakNullFn = Mod->getOrInsertFunction(
1212       "__dfsan_wrapper_extern_weak_null", DFSanWrapperExternWeakNullFnTy);
1213   {
1214     AttributeList AL;
1215     AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1216     AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1217     DFSanSetLabelFn =
1218         Mod->getOrInsertFunction("__dfsan_set_label", DFSanSetLabelFnTy, AL);
1219   }
1220   DFSanNonzeroLabelFn =
1221       Mod->getOrInsertFunction("__dfsan_nonzero_label", DFSanNonzeroLabelFnTy);
1222   DFSanVarargWrapperFn = Mod->getOrInsertFunction("__dfsan_vararg_wrapper",
1223                                                   DFSanVarargWrapperFnTy);
1224   {
1225     AttributeList AL;
1226     AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1227     AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1228     DFSanChainOriginFn = Mod->getOrInsertFunction("__dfsan_chain_origin",
1229                                                   DFSanChainOriginFnTy, AL);
1230   }
1231   {
1232     AttributeList AL;
1233     AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1234     AL = AL.addParamAttribute(M.getContext(), 1, Attribute::ZExt);
1235     AL = AL.addRetAttribute(M.getContext(), Attribute::ZExt);
1236     DFSanChainOriginIfTaintedFn = Mod->getOrInsertFunction(
1237         "__dfsan_chain_origin_if_tainted", DFSanChainOriginIfTaintedFnTy, AL);
1238   }
1239   DFSanMemOriginTransferFn = Mod->getOrInsertFunction(
1240       "__dfsan_mem_origin_transfer", DFSanMemOriginTransferFnTy);
1241 
1242   {
1243     AttributeList AL;
1244     AL = AL.addParamAttribute(M.getContext(), 0, Attribute::ZExt);
1245     AL = AL.addParamAttribute(M.getContext(), 3, Attribute::ZExt);
1246     DFSanMaybeStoreOriginFn = Mod->getOrInsertFunction(
1247         "__dfsan_maybe_store_origin", DFSanMaybeStoreOriginFnTy, AL);
1248   }
1249 
1250   DFSanRuntimeFunctions.insert(
1251       DFSanUnionLoadFn.getCallee()->stripPointerCasts());
1252   DFSanRuntimeFunctions.insert(
1253       DFSanLoadLabelAndOriginFn.getCallee()->stripPointerCasts());
1254   DFSanRuntimeFunctions.insert(
1255       DFSanUnimplementedFn.getCallee()->stripPointerCasts());
1256   DFSanRuntimeFunctions.insert(
1257       DFSanWrapperExternWeakNullFn.getCallee()->stripPointerCasts());
1258   DFSanRuntimeFunctions.insert(
1259       DFSanSetLabelFn.getCallee()->stripPointerCasts());
1260   DFSanRuntimeFunctions.insert(
1261       DFSanNonzeroLabelFn.getCallee()->stripPointerCasts());
1262   DFSanRuntimeFunctions.insert(
1263       DFSanVarargWrapperFn.getCallee()->stripPointerCasts());
1264   DFSanRuntimeFunctions.insert(
1265       DFSanLoadCallbackFn.getCallee()->stripPointerCasts());
1266   DFSanRuntimeFunctions.insert(
1267       DFSanStoreCallbackFn.getCallee()->stripPointerCasts());
1268   DFSanRuntimeFunctions.insert(
1269       DFSanMemTransferCallbackFn.getCallee()->stripPointerCasts());
1270   DFSanRuntimeFunctions.insert(
1271       DFSanConditionalCallbackFn.getCallee()->stripPointerCasts());
1272   DFSanRuntimeFunctions.insert(
1273       DFSanConditionalCallbackOriginFn.getCallee()->stripPointerCasts());
1274   DFSanRuntimeFunctions.insert(
1275       DFSanCmpCallbackFn.getCallee()->stripPointerCasts());
1276   DFSanRuntimeFunctions.insert(
1277       DFSanChainOriginFn.getCallee()->stripPointerCasts());
1278   DFSanRuntimeFunctions.insert(
1279       DFSanChainOriginIfTaintedFn.getCallee()->stripPointerCasts());
1280   DFSanRuntimeFunctions.insert(
1281       DFSanMemOriginTransferFn.getCallee()->stripPointerCasts());
1282   DFSanRuntimeFunctions.insert(
1283       DFSanMaybeStoreOriginFn.getCallee()->stripPointerCasts());
1284 }
1285 
1286 // Initializes event callback functions and declare them in the module
1287 void DataFlowSanitizer::initializeCallbackFunctions(Module &M) {
1288   DFSanLoadCallbackFn = Mod->getOrInsertFunction("__dfsan_load_callback",
1289                                                  DFSanLoadStoreCallbackFnTy);
1290   DFSanStoreCallbackFn = Mod->getOrInsertFunction("__dfsan_store_callback",
1291                                                   DFSanLoadStoreCallbackFnTy);
1292   DFSanMemTransferCallbackFn = Mod->getOrInsertFunction(
1293       "__dfsan_mem_transfer_callback", DFSanMemTransferCallbackFnTy);
1294   DFSanCmpCallbackFn =
1295       Mod->getOrInsertFunction("__dfsan_cmp_callback", DFSanCmpCallbackFnTy);
1296 
1297   DFSanConditionalCallbackFn = Mod->getOrInsertFunction(
1298       "__dfsan_conditional_callback", DFSanConditionalCallbackFnTy);
1299   DFSanConditionalCallbackOriginFn =
1300       Mod->getOrInsertFunction("__dfsan_conditional_callback_origin",
1301                                DFSanConditionalCallbackOriginFnTy);
1302 }
1303 
1304 void DataFlowSanitizer::injectMetadataGlobals(Module &M) {
1305   // These variables can be used:
1306   // - by the runtime (to discover what the shadow width was, during
1307   //   compilation)
1308   // - in testing (to avoid hardcoding the shadow width and type but instead
1309   //   extract them by pattern matching)
1310   Type *IntTy = Type::getInt32Ty(*Ctx);
1311   (void)Mod->getOrInsertGlobal("__dfsan_shadow_width_bits", IntTy, [&] {
1312     return new GlobalVariable(
1313         M, IntTy, /*isConstant=*/true, GlobalValue::WeakODRLinkage,
1314         ConstantInt::get(IntTy, ShadowWidthBits), "__dfsan_shadow_width_bits");
1315   });
1316   (void)Mod->getOrInsertGlobal("__dfsan_shadow_width_bytes", IntTy, [&] {
1317     return new GlobalVariable(M, IntTy, /*isConstant=*/true,
1318                               GlobalValue::WeakODRLinkage,
1319                               ConstantInt::get(IntTy, ShadowWidthBytes),
1320                               "__dfsan_shadow_width_bytes");
1321   });
1322 }
1323 
1324 bool DataFlowSanitizer::runImpl(Module &M) {
1325   initializeModule(M);
1326 
1327   if (ABIList.isIn(M, "skip"))
1328     return false;
1329 
1330   const unsigned InitialGlobalSize = M.global_size();
1331   const unsigned InitialModuleSize = M.size();
1332 
1333   bool Changed = false;
1334 
1335   auto GetOrInsertGlobal = [this, &Changed](StringRef Name,
1336                                             Type *Ty) -> Constant * {
1337     Constant *C = Mod->getOrInsertGlobal(Name, Ty);
1338     if (GlobalVariable *G = dyn_cast<GlobalVariable>(C)) {
1339       Changed |= G->getThreadLocalMode() != GlobalVariable::InitialExecTLSModel;
1340       G->setThreadLocalMode(GlobalVariable::InitialExecTLSModel);
1341     }
1342     return C;
1343   };
1344 
1345   // These globals must be kept in sync with the ones in dfsan.cpp.
1346   ArgTLS =
1347       GetOrInsertGlobal("__dfsan_arg_tls",
1348                         ArrayType::get(Type::getInt64Ty(*Ctx), ArgTLSSize / 8));
1349   RetvalTLS = GetOrInsertGlobal(
1350       "__dfsan_retval_tls",
1351       ArrayType::get(Type::getInt64Ty(*Ctx), RetvalTLSSize / 8));
1352   ArgOriginTLSTy = ArrayType::get(OriginTy, NumOfElementsInArgOrgTLS);
1353   ArgOriginTLS = GetOrInsertGlobal("__dfsan_arg_origin_tls", ArgOriginTLSTy);
1354   RetvalOriginTLS = GetOrInsertGlobal("__dfsan_retval_origin_tls", OriginTy);
1355 
1356   (void)Mod->getOrInsertGlobal("__dfsan_track_origins", OriginTy, [&] {
1357     Changed = true;
1358     return new GlobalVariable(
1359         M, OriginTy, true, GlobalValue::WeakODRLinkage,
1360         ConstantInt::getSigned(OriginTy,
1361                                shouldTrackOrigins() ? ClTrackOrigins : 0),
1362         "__dfsan_track_origins");
1363   });
1364 
1365   injectMetadataGlobals(M);
1366 
1367   initializeCallbackFunctions(M);
1368   initializeRuntimeFunctions(M);
1369 
1370   std::vector<Function *> FnsToInstrument;
1371   SmallPtrSet<Function *, 2> FnsWithNativeABI;
1372   SmallPtrSet<Function *, 2> FnsWithForceZeroLabel;
1373   SmallPtrSet<Constant *, 1> PersonalityFns;
1374   for (Function &F : M)
1375     if (!F.isIntrinsic() && !DFSanRuntimeFunctions.contains(&F)) {
1376       FnsToInstrument.push_back(&F);
1377       if (F.hasPersonalityFn())
1378         PersonalityFns.insert(F.getPersonalityFn()->stripPointerCasts());
1379     }
1380 
1381   if (ClIgnorePersonalityRoutine) {
1382     for (auto *C : PersonalityFns) {
1383       assert(isa<Function>(C) && "Personality routine is not a function!");
1384       Function *F = cast<Function>(C);
1385       if (!isInstrumented(F))
1386         FnsToInstrument.erase(
1387             std::remove(FnsToInstrument.begin(), FnsToInstrument.end(), F),
1388             FnsToInstrument.end());
1389     }
1390   }
1391 
1392   // Give function aliases prefixes when necessary, and build wrappers where the
1393   // instrumentedness is inconsistent.
1394   for (GlobalAlias &GA : llvm::make_early_inc_range(M.aliases())) {
1395     // Don't stop on weak.  We assume people aren't playing games with the
1396     // instrumentedness of overridden weak aliases.
1397     auto *F = dyn_cast<Function>(GA.getAliaseeObject());
1398     if (!F)
1399       continue;
1400 
1401     bool GAInst = isInstrumented(&GA), FInst = isInstrumented(F);
1402     if (GAInst && FInst) {
1403       addGlobalNameSuffix(&GA);
1404     } else if (GAInst != FInst) {
1405       // Non-instrumented alias of an instrumented function, or vice versa.
1406       // Replace the alias with a native-ABI wrapper of the aliasee.  The pass
1407       // below will take care of instrumenting it.
1408       Function *NewF =
1409           buildWrapperFunction(F, "", GA.getLinkage(), F->getFunctionType());
1410       GA.replaceAllUsesWith(ConstantExpr::getBitCast(NewF, GA.getType()));
1411       NewF->takeName(&GA);
1412       GA.eraseFromParent();
1413       FnsToInstrument.push_back(NewF);
1414     }
1415   }
1416 
1417   ReadOnlyNoneAttrs.addAttribute(Attribute::ReadOnly)
1418       .addAttribute(Attribute::ReadNone);
1419 
1420   // First, change the ABI of every function in the module.  ABI-listed
1421   // functions keep their original ABI and get a wrapper function.
1422   for (std::vector<Function *>::iterator FI = FnsToInstrument.begin(),
1423                                          FE = FnsToInstrument.end();
1424        FI != FE; ++FI) {
1425     Function &F = **FI;
1426     FunctionType *FT = F.getFunctionType();
1427 
1428     bool IsZeroArgsVoidRet = (FT->getNumParams() == 0 && !FT->isVarArg() &&
1429                               FT->getReturnType()->isVoidTy());
1430 
1431     if (isInstrumented(&F)) {
1432       if (isForceZeroLabels(&F))
1433         FnsWithForceZeroLabel.insert(&F);
1434 
1435       // Instrumented functions get a '.dfsan' suffix.  This allows us to more
1436       // easily identify cases of mismatching ABIs. This naming scheme is
1437       // mangling-compatible (see Itanium ABI), using a vendor-specific suffix.
1438       addGlobalNameSuffix(&F);
1439     } else if (!IsZeroArgsVoidRet || getWrapperKind(&F) == WK_Custom) {
1440       // Build a wrapper function for F.  The wrapper simply calls F, and is
1441       // added to FnsToInstrument so that any instrumentation according to its
1442       // WrapperKind is done in the second pass below.
1443 
1444       // If the function being wrapped has local linkage, then preserve the
1445       // function's linkage in the wrapper function.
1446       GlobalValue::LinkageTypes WrapperLinkage =
1447           F.hasLocalLinkage() ? F.getLinkage()
1448                               : GlobalValue::LinkOnceODRLinkage;
1449 
1450       Function *NewF = buildWrapperFunction(
1451           &F,
1452           (shouldTrackOrigins() ? std::string("dfso$") : std::string("dfsw$")) +
1453               std::string(F.getName()),
1454           WrapperLinkage, FT);
1455       NewF->removeFnAttrs(ReadOnlyNoneAttrs);
1456 
1457       Value *WrappedFnCst =
1458           ConstantExpr::getBitCast(NewF, PointerType::getUnqual(FT));
1459 
1460       // Extern weak functions can sometimes be null at execution time.
1461       // Code will sometimes check if an extern weak function is null.
1462       // This could look something like:
1463       //   declare extern_weak i8 @my_func(i8)
1464       //   br i1 icmp ne (i8 (i8)* @my_func, i8 (i8)* null), label %use_my_func,
1465       //   label %avoid_my_func
1466       // The @"dfsw$my_func" wrapper is never null, so if we replace this use
1467       // in the comparision, the icmp will simplify to false and we have
1468       // accidentially optimized away a null check that is necessary.
1469       // This can lead to a crash when the null extern_weak my_func is called.
1470       //
1471       // To prevent (the most common pattern of) this problem,
1472       // do not replace uses in comparisons with the wrapper.
1473       // We definitely want to replace uses in call instructions.
1474       // Other uses (e.g. store the function address somewhere) might be
1475       // called or compared or both - this case may not be handled correctly.
1476       // We will default to replacing with wrapper in cases we are unsure.
1477       auto IsNotCmpUse = [](Use &U) -> bool {
1478         User *Usr = U.getUser();
1479         if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Usr)) {
1480           // This is the most common case for icmp ne null
1481           if (CE->getOpcode() == Instruction::ICmp) {
1482             return false;
1483           }
1484         }
1485         if (Instruction *I = dyn_cast<Instruction>(Usr)) {
1486           if (I->getOpcode() == Instruction::ICmp) {
1487             return false;
1488           }
1489         }
1490         return true;
1491       };
1492       F.replaceUsesWithIf(WrappedFnCst, IsNotCmpUse);
1493 
1494       UnwrappedFnMap[WrappedFnCst] = &F;
1495       *FI = NewF;
1496 
1497       if (!F.isDeclaration()) {
1498         // This function is probably defining an interposition of an
1499         // uninstrumented function and hence needs to keep the original ABI.
1500         // But any functions it may call need to use the instrumented ABI, so
1501         // we instrument it in a mode which preserves the original ABI.
1502         FnsWithNativeABI.insert(&F);
1503 
1504         // This code needs to rebuild the iterators, as they may be invalidated
1505         // by the push_back, taking care that the new range does not include
1506         // any functions added by this code.
1507         size_t N = FI - FnsToInstrument.begin(),
1508                Count = FE - FnsToInstrument.begin();
1509         FnsToInstrument.push_back(&F);
1510         FI = FnsToInstrument.begin() + N;
1511         FE = FnsToInstrument.begin() + Count;
1512       }
1513       // Hopefully, nobody will try to indirectly call a vararg
1514       // function... yet.
1515     } else if (FT->isVarArg()) {
1516       UnwrappedFnMap[&F] = &F;
1517       *FI = nullptr;
1518     }
1519   }
1520 
1521   for (Function *F : FnsToInstrument) {
1522     if (!F || F->isDeclaration())
1523       continue;
1524 
1525     removeUnreachableBlocks(*F);
1526 
1527     DFSanFunction DFSF(*this, F, FnsWithNativeABI.count(F),
1528                        FnsWithForceZeroLabel.count(F));
1529 
1530     // DFSanVisitor may create new basic blocks, which confuses df_iterator.
1531     // Build a copy of the list before iterating over it.
1532     SmallVector<BasicBlock *, 4> BBList(depth_first(&F->getEntryBlock()));
1533 
1534     for (BasicBlock *BB : BBList) {
1535       Instruction *Inst = &BB->front();
1536       while (true) {
1537         // DFSanVisitor may split the current basic block, changing the current
1538         // instruction's next pointer and moving the next instruction to the
1539         // tail block from which we should continue.
1540         Instruction *Next = Inst->getNextNode();
1541         // DFSanVisitor may delete Inst, so keep track of whether it was a
1542         // terminator.
1543         bool IsTerminator = Inst->isTerminator();
1544         if (!DFSF.SkipInsts.count(Inst))
1545           DFSanVisitor(DFSF).visit(Inst);
1546         if (IsTerminator)
1547           break;
1548         Inst = Next;
1549       }
1550     }
1551 
1552     // We will not necessarily be able to compute the shadow for every phi node
1553     // until we have visited every block.  Therefore, the code that handles phi
1554     // nodes adds them to the PHIFixups list so that they can be properly
1555     // handled here.
1556     for (DFSanFunction::PHIFixupElement &P : DFSF.PHIFixups) {
1557       for (unsigned Val = 0, N = P.Phi->getNumIncomingValues(); Val != N;
1558            ++Val) {
1559         P.ShadowPhi->setIncomingValue(
1560             Val, DFSF.getShadow(P.Phi->getIncomingValue(Val)));
1561         if (P.OriginPhi)
1562           P.OriginPhi->setIncomingValue(
1563               Val, DFSF.getOrigin(P.Phi->getIncomingValue(Val)));
1564       }
1565     }
1566 
1567     // -dfsan-debug-nonzero-labels will split the CFG in all kinds of crazy
1568     // places (i.e. instructions in basic blocks we haven't even begun visiting
1569     // yet).  To make our life easier, do this work in a pass after the main
1570     // instrumentation.
1571     if (ClDebugNonzeroLabels) {
1572       for (Value *V : DFSF.NonZeroChecks) {
1573         Instruction *Pos;
1574         if (Instruction *I = dyn_cast<Instruction>(V))
1575           Pos = I->getNextNode();
1576         else
1577           Pos = &DFSF.F->getEntryBlock().front();
1578         while (isa<PHINode>(Pos) || isa<AllocaInst>(Pos))
1579           Pos = Pos->getNextNode();
1580         IRBuilder<> IRB(Pos);
1581         Value *PrimitiveShadow = DFSF.collapseToPrimitiveShadow(V, Pos);
1582         Value *Ne =
1583             IRB.CreateICmpNE(PrimitiveShadow, DFSF.DFS.ZeroPrimitiveShadow);
1584         BranchInst *BI = cast<BranchInst>(SplitBlockAndInsertIfThen(
1585             Ne, Pos, /*Unreachable=*/false, ColdCallWeights));
1586         IRBuilder<> ThenIRB(BI);
1587         ThenIRB.CreateCall(DFSF.DFS.DFSanNonzeroLabelFn, {});
1588       }
1589     }
1590   }
1591 
1592   return Changed || !FnsToInstrument.empty() ||
1593          M.global_size() != InitialGlobalSize || M.size() != InitialModuleSize;
1594 }
1595 
1596 Value *DFSanFunction::getArgTLS(Type *T, unsigned ArgOffset, IRBuilder<> &IRB) {
1597   Value *Base = IRB.CreatePointerCast(DFS.ArgTLS, DFS.IntptrTy);
1598   if (ArgOffset)
1599     Base = IRB.CreateAdd(Base, ConstantInt::get(DFS.IntptrTy, ArgOffset));
1600   return IRB.CreateIntToPtr(Base, PointerType::get(DFS.getShadowTy(T), 0),
1601                             "_dfsarg");
1602 }
1603 
1604 Value *DFSanFunction::getRetvalTLS(Type *T, IRBuilder<> &IRB) {
1605   return IRB.CreatePointerCast(
1606       DFS.RetvalTLS, PointerType::get(DFS.getShadowTy(T), 0), "_dfsret");
1607 }
1608 
1609 Value *DFSanFunction::getRetvalOriginTLS() { return DFS.RetvalOriginTLS; }
1610 
1611 Value *DFSanFunction::getArgOriginTLS(unsigned ArgNo, IRBuilder<> &IRB) {
1612   return IRB.CreateConstGEP2_64(DFS.ArgOriginTLSTy, DFS.ArgOriginTLS, 0, ArgNo,
1613                                 "_dfsarg_o");
1614 }
1615 
1616 Value *DFSanFunction::getOrigin(Value *V) {
1617   assert(DFS.shouldTrackOrigins());
1618   if (!isa<Argument>(V) && !isa<Instruction>(V))
1619     return DFS.ZeroOrigin;
1620   Value *&Origin = ValOriginMap[V];
1621   if (!Origin) {
1622     if (Argument *A = dyn_cast<Argument>(V)) {
1623       if (IsNativeABI)
1624         return DFS.ZeroOrigin;
1625       if (A->getArgNo() < DFS.NumOfElementsInArgOrgTLS) {
1626         Instruction *ArgOriginTLSPos = &*F->getEntryBlock().begin();
1627         IRBuilder<> IRB(ArgOriginTLSPos);
1628         Value *ArgOriginPtr = getArgOriginTLS(A->getArgNo(), IRB);
1629         Origin = IRB.CreateLoad(DFS.OriginTy, ArgOriginPtr);
1630       } else {
1631         // Overflow
1632         Origin = DFS.ZeroOrigin;
1633       }
1634     } else {
1635       Origin = DFS.ZeroOrigin;
1636     }
1637   }
1638   return Origin;
1639 }
1640 
1641 void DFSanFunction::setOrigin(Instruction *I, Value *Origin) {
1642   if (!DFS.shouldTrackOrigins())
1643     return;
1644   assert(!ValOriginMap.count(I));
1645   assert(Origin->getType() == DFS.OriginTy);
1646   ValOriginMap[I] = Origin;
1647 }
1648 
1649 Value *DFSanFunction::getShadowForTLSArgument(Argument *A) {
1650   unsigned ArgOffset = 0;
1651   const DataLayout &DL = F->getParent()->getDataLayout();
1652   for (auto &FArg : F->args()) {
1653     if (!FArg.getType()->isSized()) {
1654       if (A == &FArg)
1655         break;
1656       continue;
1657     }
1658 
1659     unsigned Size = DL.getTypeAllocSize(DFS.getShadowTy(&FArg));
1660     if (A != &FArg) {
1661       ArgOffset += alignTo(Size, ShadowTLSAlignment);
1662       if (ArgOffset > ArgTLSSize)
1663         break; // ArgTLS overflows, uses a zero shadow.
1664       continue;
1665     }
1666 
1667     if (ArgOffset + Size > ArgTLSSize)
1668       break; // ArgTLS overflows, uses a zero shadow.
1669 
1670     Instruction *ArgTLSPos = &*F->getEntryBlock().begin();
1671     IRBuilder<> IRB(ArgTLSPos);
1672     Value *ArgShadowPtr = getArgTLS(FArg.getType(), ArgOffset, IRB);
1673     return IRB.CreateAlignedLoad(DFS.getShadowTy(&FArg), ArgShadowPtr,
1674                                  ShadowTLSAlignment);
1675   }
1676 
1677   return DFS.getZeroShadow(A);
1678 }
1679 
1680 Value *DFSanFunction::getShadow(Value *V) {
1681   if (!isa<Argument>(V) && !isa<Instruction>(V))
1682     return DFS.getZeroShadow(V);
1683   if (IsForceZeroLabels)
1684     return DFS.getZeroShadow(V);
1685   Value *&Shadow = ValShadowMap[V];
1686   if (!Shadow) {
1687     if (Argument *A = dyn_cast<Argument>(V)) {
1688       if (IsNativeABI)
1689         return DFS.getZeroShadow(V);
1690       Shadow = getShadowForTLSArgument(A);
1691       NonZeroChecks.push_back(Shadow);
1692     } else {
1693       Shadow = DFS.getZeroShadow(V);
1694     }
1695   }
1696   return Shadow;
1697 }
1698 
1699 void DFSanFunction::setShadow(Instruction *I, Value *Shadow) {
1700   assert(!ValShadowMap.count(I));
1701   ValShadowMap[I] = Shadow;
1702 }
1703 
1704 /// Compute the integer shadow offset that corresponds to a given
1705 /// application address.
1706 ///
1707 /// Offset = (Addr & ~AndMask) ^ XorMask
1708 Value *DataFlowSanitizer::getShadowOffset(Value *Addr, IRBuilder<> &IRB) {
1709   assert(Addr != RetvalTLS && "Reinstrumenting?");
1710   Value *OffsetLong = IRB.CreatePointerCast(Addr, IntptrTy);
1711 
1712   uint64_t AndMask = MapParams->AndMask;
1713   if (AndMask)
1714     OffsetLong =
1715         IRB.CreateAnd(OffsetLong, ConstantInt::get(IntptrTy, ~AndMask));
1716 
1717   uint64_t XorMask = MapParams->XorMask;
1718   if (XorMask)
1719     OffsetLong = IRB.CreateXor(OffsetLong, ConstantInt::get(IntptrTy, XorMask));
1720   return OffsetLong;
1721 }
1722 
1723 std::pair<Value *, Value *>
1724 DataFlowSanitizer::getShadowOriginAddress(Value *Addr, Align InstAlignment,
1725                                           Instruction *Pos) {
1726   // Returns ((Addr & shadow_mask) + origin_base - shadow_base) & ~4UL
1727   IRBuilder<> IRB(Pos);
1728   Value *ShadowOffset = getShadowOffset(Addr, IRB);
1729   Value *ShadowLong = ShadowOffset;
1730   uint64_t ShadowBase = MapParams->ShadowBase;
1731   if (ShadowBase != 0) {
1732     ShadowLong =
1733         IRB.CreateAdd(ShadowLong, ConstantInt::get(IntptrTy, ShadowBase));
1734   }
1735   IntegerType *ShadowTy = IntegerType::get(*Ctx, ShadowWidthBits);
1736   Value *ShadowPtr =
1737       IRB.CreateIntToPtr(ShadowLong, PointerType::get(ShadowTy, 0));
1738   Value *OriginPtr = nullptr;
1739   if (shouldTrackOrigins()) {
1740     Value *OriginLong = ShadowOffset;
1741     uint64_t OriginBase = MapParams->OriginBase;
1742     if (OriginBase != 0)
1743       OriginLong =
1744           IRB.CreateAdd(OriginLong, ConstantInt::get(IntptrTy, OriginBase));
1745     const Align Alignment = llvm::assumeAligned(InstAlignment.value());
1746     // When alignment is >= 4, Addr must be aligned to 4, otherwise it is UB.
1747     // So Mask is unnecessary.
1748     if (Alignment < MinOriginAlignment) {
1749       uint64_t Mask = MinOriginAlignment.value() - 1;
1750       OriginLong = IRB.CreateAnd(OriginLong, ConstantInt::get(IntptrTy, ~Mask));
1751     }
1752     OriginPtr = IRB.CreateIntToPtr(OriginLong, OriginPtrTy);
1753   }
1754   return std::make_pair(ShadowPtr, OriginPtr);
1755 }
1756 
1757 Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos,
1758                                            Value *ShadowOffset) {
1759   IRBuilder<> IRB(Pos);
1760   return IRB.CreateIntToPtr(ShadowOffset, PrimitiveShadowPtrTy);
1761 }
1762 
1763 Value *DataFlowSanitizer::getShadowAddress(Value *Addr, Instruction *Pos) {
1764   IRBuilder<> IRB(Pos);
1765   Value *ShadowOffset = getShadowOffset(Addr, IRB);
1766   return getShadowAddress(Addr, Pos, ShadowOffset);
1767 }
1768 
1769 Value *DFSanFunction::combineShadowsThenConvert(Type *T, Value *V1, Value *V2,
1770                                                 Instruction *Pos) {
1771   Value *PrimitiveValue = combineShadows(V1, V2, Pos);
1772   return expandFromPrimitiveShadow(T, PrimitiveValue, Pos);
1773 }
1774 
1775 // Generates IR to compute the union of the two given shadows, inserting it
1776 // before Pos. The combined value is with primitive type.
1777 Value *DFSanFunction::combineShadows(Value *V1, Value *V2, Instruction *Pos) {
1778   if (DFS.isZeroShadow(V1))
1779     return collapseToPrimitiveShadow(V2, Pos);
1780   if (DFS.isZeroShadow(V2))
1781     return collapseToPrimitiveShadow(V1, Pos);
1782   if (V1 == V2)
1783     return collapseToPrimitiveShadow(V1, Pos);
1784 
1785   auto V1Elems = ShadowElements.find(V1);
1786   auto V2Elems = ShadowElements.find(V2);
1787   if (V1Elems != ShadowElements.end() && V2Elems != ShadowElements.end()) {
1788     if (std::includes(V1Elems->second.begin(), V1Elems->second.end(),
1789                       V2Elems->second.begin(), V2Elems->second.end())) {
1790       return collapseToPrimitiveShadow(V1, Pos);
1791     }
1792     if (std::includes(V2Elems->second.begin(), V2Elems->second.end(),
1793                       V1Elems->second.begin(), V1Elems->second.end())) {
1794       return collapseToPrimitiveShadow(V2, Pos);
1795     }
1796   } else if (V1Elems != ShadowElements.end()) {
1797     if (V1Elems->second.count(V2))
1798       return collapseToPrimitiveShadow(V1, Pos);
1799   } else if (V2Elems != ShadowElements.end()) {
1800     if (V2Elems->second.count(V1))
1801       return collapseToPrimitiveShadow(V2, Pos);
1802   }
1803 
1804   auto Key = std::make_pair(V1, V2);
1805   if (V1 > V2)
1806     std::swap(Key.first, Key.second);
1807   CachedShadow &CCS = CachedShadows[Key];
1808   if (CCS.Block && DT.dominates(CCS.Block, Pos->getParent()))
1809     return CCS.Shadow;
1810 
1811   // Converts inputs shadows to shadows with primitive types.
1812   Value *PV1 = collapseToPrimitiveShadow(V1, Pos);
1813   Value *PV2 = collapseToPrimitiveShadow(V2, Pos);
1814 
1815   IRBuilder<> IRB(Pos);
1816   CCS.Block = Pos->getParent();
1817   CCS.Shadow = IRB.CreateOr(PV1, PV2);
1818 
1819   std::set<Value *> UnionElems;
1820   if (V1Elems != ShadowElements.end()) {
1821     UnionElems = V1Elems->second;
1822   } else {
1823     UnionElems.insert(V1);
1824   }
1825   if (V2Elems != ShadowElements.end()) {
1826     UnionElems.insert(V2Elems->second.begin(), V2Elems->second.end());
1827   } else {
1828     UnionElems.insert(V2);
1829   }
1830   ShadowElements[CCS.Shadow] = std::move(UnionElems);
1831 
1832   return CCS.Shadow;
1833 }
1834 
1835 // A convenience function which folds the shadows of each of the operands
1836 // of the provided instruction Inst, inserting the IR before Inst.  Returns
1837 // the computed union Value.
1838 Value *DFSanFunction::combineOperandShadows(Instruction *Inst) {
1839   if (Inst->getNumOperands() == 0)
1840     return DFS.getZeroShadow(Inst);
1841 
1842   Value *Shadow = getShadow(Inst->getOperand(0));
1843   for (unsigned I = 1, N = Inst->getNumOperands(); I < N; ++I)
1844     Shadow = combineShadows(Shadow, getShadow(Inst->getOperand(I)), Inst);
1845 
1846   return expandFromPrimitiveShadow(Inst->getType(), Shadow, Inst);
1847 }
1848 
1849 void DFSanVisitor::visitInstOperands(Instruction &I) {
1850   Value *CombinedShadow = DFSF.combineOperandShadows(&I);
1851   DFSF.setShadow(&I, CombinedShadow);
1852   visitInstOperandOrigins(I);
1853 }
1854 
1855 Value *DFSanFunction::combineOrigins(const std::vector<Value *> &Shadows,
1856                                      const std::vector<Value *> &Origins,
1857                                      Instruction *Pos, ConstantInt *Zero) {
1858   assert(Shadows.size() == Origins.size());
1859   size_t Size = Origins.size();
1860   if (Size == 0)
1861     return DFS.ZeroOrigin;
1862   Value *Origin = nullptr;
1863   if (!Zero)
1864     Zero = DFS.ZeroPrimitiveShadow;
1865   for (size_t I = 0; I != Size; ++I) {
1866     Value *OpOrigin = Origins[I];
1867     Constant *ConstOpOrigin = dyn_cast<Constant>(OpOrigin);
1868     if (ConstOpOrigin && ConstOpOrigin->isNullValue())
1869       continue;
1870     if (!Origin) {
1871       Origin = OpOrigin;
1872       continue;
1873     }
1874     Value *OpShadow = Shadows[I];
1875     Value *PrimitiveShadow = collapseToPrimitiveShadow(OpShadow, Pos);
1876     IRBuilder<> IRB(Pos);
1877     Value *Cond = IRB.CreateICmpNE(PrimitiveShadow, Zero);
1878     Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
1879   }
1880   return Origin ? Origin : DFS.ZeroOrigin;
1881 }
1882 
1883 Value *DFSanFunction::combineOperandOrigins(Instruction *Inst) {
1884   size_t Size = Inst->getNumOperands();
1885   std::vector<Value *> Shadows(Size);
1886   std::vector<Value *> Origins(Size);
1887   for (unsigned I = 0; I != Size; ++I) {
1888     Shadows[I] = getShadow(Inst->getOperand(I));
1889     Origins[I] = getOrigin(Inst->getOperand(I));
1890   }
1891   return combineOrigins(Shadows, Origins, Inst);
1892 }
1893 
1894 void DFSanVisitor::visitInstOperandOrigins(Instruction &I) {
1895   if (!DFSF.DFS.shouldTrackOrigins())
1896     return;
1897   Value *CombinedOrigin = DFSF.combineOperandOrigins(&I);
1898   DFSF.setOrigin(&I, CombinedOrigin);
1899 }
1900 
1901 Align DFSanFunction::getShadowAlign(Align InstAlignment) {
1902   const Align Alignment = ClPreserveAlignment ? InstAlignment : Align(1);
1903   return Align(Alignment.value() * DFS.ShadowWidthBytes);
1904 }
1905 
1906 Align DFSanFunction::getOriginAlign(Align InstAlignment) {
1907   const Align Alignment = llvm::assumeAligned(InstAlignment.value());
1908   return Align(std::max(MinOriginAlignment, Alignment));
1909 }
1910 
1911 bool DFSanFunction::isLookupTableConstant(Value *P) {
1912   if (GlobalVariable *GV = dyn_cast<GlobalVariable>(P->stripPointerCasts()))
1913     if (GV->isConstant() && GV->hasName())
1914       return DFS.CombineTaintLookupTableNames.count(GV->getName());
1915 
1916   return false;
1917 }
1918 
1919 bool DFSanFunction::useCallbackLoadLabelAndOrigin(uint64_t Size,
1920                                                   Align InstAlignment) {
1921   // When enabling tracking load instructions, we always use
1922   // __dfsan_load_label_and_origin to reduce code size.
1923   if (ClTrackOrigins == 2)
1924     return true;
1925 
1926   assert(Size != 0);
1927   // * if Size == 1, it is sufficient to load its origin aligned at 4.
1928   // * if Size == 2, we assume most cases Addr % 2 == 0, so it is sufficient to
1929   //   load its origin aligned at 4. If not, although origins may be lost, it
1930   //   should not happen very often.
1931   // * if align >= 4, Addr must be aligned to 4, otherwise it is UB. When
1932   //   Size % 4 == 0, it is more efficient to load origins without callbacks.
1933   // * Otherwise we use __dfsan_load_label_and_origin.
1934   // This should ensure that common cases run efficiently.
1935   if (Size <= 2)
1936     return false;
1937 
1938   const Align Alignment = llvm::assumeAligned(InstAlignment.value());
1939   return Alignment < MinOriginAlignment || !DFS.hasLoadSizeForFastPath(Size);
1940 }
1941 
1942 Value *DataFlowSanitizer::loadNextOrigin(Instruction *Pos, Align OriginAlign,
1943                                          Value **OriginAddr) {
1944   IRBuilder<> IRB(Pos);
1945   *OriginAddr =
1946       IRB.CreateGEP(OriginTy, *OriginAddr, ConstantInt::get(IntptrTy, 1));
1947   return IRB.CreateAlignedLoad(OriginTy, *OriginAddr, OriginAlign);
1948 }
1949 
1950 std::pair<Value *, Value *> DFSanFunction::loadShadowFast(
1951     Value *ShadowAddr, Value *OriginAddr, uint64_t Size, Align ShadowAlign,
1952     Align OriginAlign, Value *FirstOrigin, Instruction *Pos) {
1953   const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
1954   const uint64_t ShadowSize = Size * DFS.ShadowWidthBytes;
1955 
1956   assert(Size >= 4 && "Not large enough load size for fast path!");
1957 
1958   // Used for origin tracking.
1959   std::vector<Value *> Shadows;
1960   std::vector<Value *> Origins;
1961 
1962   // Load instructions in LLVM can have arbitrary byte sizes (e.g., 3, 12, 20)
1963   // but this function is only used in a subset of cases that make it possible
1964   // to optimize the instrumentation.
1965   //
1966   // Specifically, when the shadow size in bytes (i.e., loaded bytes x shadow
1967   // per byte) is either:
1968   // - a multiple of 8  (common)
1969   // - equal to 4       (only for load32)
1970   //
1971   // For the second case, we can fit the wide shadow in a 32-bit integer. In all
1972   // other cases, we use a 64-bit integer to hold the wide shadow.
1973   Type *WideShadowTy =
1974       ShadowSize == 4 ? Type::getInt32Ty(*DFS.Ctx) : Type::getInt64Ty(*DFS.Ctx);
1975 
1976   IRBuilder<> IRB(Pos);
1977   Value *WideAddr = IRB.CreateBitCast(ShadowAddr, WideShadowTy->getPointerTo());
1978   Value *CombinedWideShadow =
1979       IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign);
1980 
1981   unsigned WideShadowBitWidth = WideShadowTy->getIntegerBitWidth();
1982   const uint64_t BytesPerWideShadow = WideShadowBitWidth / DFS.ShadowWidthBits;
1983 
1984   auto AppendWideShadowAndOrigin = [&](Value *WideShadow, Value *Origin) {
1985     if (BytesPerWideShadow > 4) {
1986       assert(BytesPerWideShadow == 8);
1987       // The wide shadow relates to two origin pointers: one for the first four
1988       // application bytes, and one for the latest four. We use a left shift to
1989       // get just the shadow bytes that correspond to the first origin pointer,
1990       // and then the entire shadow for the second origin pointer (which will be
1991       // chosen by combineOrigins() iff the least-significant half of the wide
1992       // shadow was empty but the other half was not).
1993       Value *WideShadowLo = IRB.CreateShl(
1994           WideShadow, ConstantInt::get(WideShadowTy, WideShadowBitWidth / 2));
1995       Shadows.push_back(WideShadow);
1996       Origins.push_back(DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr));
1997 
1998       Shadows.push_back(WideShadowLo);
1999       Origins.push_back(Origin);
2000     } else {
2001       Shadows.push_back(WideShadow);
2002       Origins.push_back(Origin);
2003     }
2004   };
2005 
2006   if (ShouldTrackOrigins)
2007     AppendWideShadowAndOrigin(CombinedWideShadow, FirstOrigin);
2008 
2009   // First OR all the WideShadows (i.e., 64bit or 32bit shadow chunks) linearly;
2010   // then OR individual shadows within the combined WideShadow by binary ORing.
2011   // This is fewer instructions than ORing shadows individually, since it
2012   // needs logN shift/or instructions (N being the bytes of the combined wide
2013   // shadow).
2014   for (uint64_t ByteOfs = BytesPerWideShadow; ByteOfs < Size;
2015        ByteOfs += BytesPerWideShadow) {
2016     WideAddr = IRB.CreateGEP(WideShadowTy, WideAddr,
2017                              ConstantInt::get(DFS.IntptrTy, 1));
2018     Value *NextWideShadow =
2019         IRB.CreateAlignedLoad(WideShadowTy, WideAddr, ShadowAlign);
2020     CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, NextWideShadow);
2021     if (ShouldTrackOrigins) {
2022       Value *NextOrigin = DFS.loadNextOrigin(Pos, OriginAlign, &OriginAddr);
2023       AppendWideShadowAndOrigin(NextWideShadow, NextOrigin);
2024     }
2025   }
2026   for (unsigned Width = WideShadowBitWidth / 2; Width >= DFS.ShadowWidthBits;
2027        Width >>= 1) {
2028     Value *ShrShadow = IRB.CreateLShr(CombinedWideShadow, Width);
2029     CombinedWideShadow = IRB.CreateOr(CombinedWideShadow, ShrShadow);
2030   }
2031   return {IRB.CreateTrunc(CombinedWideShadow, DFS.PrimitiveShadowTy),
2032           ShouldTrackOrigins
2033               ? combineOrigins(Shadows, Origins, Pos,
2034                                ConstantInt::getSigned(IRB.getInt64Ty(), 0))
2035               : DFS.ZeroOrigin};
2036 }
2037 
2038 std::pair<Value *, Value *> DFSanFunction::loadShadowOriginSansLoadTracking(
2039     Value *Addr, uint64_t Size, Align InstAlignment, Instruction *Pos) {
2040   const bool ShouldTrackOrigins = DFS.shouldTrackOrigins();
2041 
2042   // Non-escaped loads.
2043   if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
2044     const auto SI = AllocaShadowMap.find(AI);
2045     if (SI != AllocaShadowMap.end()) {
2046       IRBuilder<> IRB(Pos);
2047       Value *ShadowLI = IRB.CreateLoad(DFS.PrimitiveShadowTy, SI->second);
2048       const auto OI = AllocaOriginMap.find(AI);
2049       assert(!ShouldTrackOrigins || OI != AllocaOriginMap.end());
2050       return {ShadowLI, ShouldTrackOrigins
2051                             ? IRB.CreateLoad(DFS.OriginTy, OI->second)
2052                             : nullptr};
2053     }
2054   }
2055 
2056   // Load from constant addresses.
2057   SmallVector<const Value *, 2> Objs;
2058   getUnderlyingObjects(Addr, Objs);
2059   bool AllConstants = true;
2060   for (const Value *Obj : Objs) {
2061     if (isa<Function>(Obj) || isa<BlockAddress>(Obj))
2062       continue;
2063     if (isa<GlobalVariable>(Obj) && cast<GlobalVariable>(Obj)->isConstant())
2064       continue;
2065 
2066     AllConstants = false;
2067     break;
2068   }
2069   if (AllConstants)
2070     return {DFS.ZeroPrimitiveShadow,
2071             ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
2072 
2073   if (Size == 0)
2074     return {DFS.ZeroPrimitiveShadow,
2075             ShouldTrackOrigins ? DFS.ZeroOrigin : nullptr};
2076 
2077   // Use callback to load if this is not an optimizable case for origin
2078   // tracking.
2079   if (ShouldTrackOrigins &&
2080       useCallbackLoadLabelAndOrigin(Size, InstAlignment)) {
2081     IRBuilder<> IRB(Pos);
2082     CallInst *Call =
2083         IRB.CreateCall(DFS.DFSanLoadLabelAndOriginFn,
2084                        {IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
2085                         ConstantInt::get(DFS.IntptrTy, Size)});
2086     Call->addRetAttr(Attribute::ZExt);
2087     return {IRB.CreateTrunc(IRB.CreateLShr(Call, DFS.OriginWidthBits),
2088                             DFS.PrimitiveShadowTy),
2089             IRB.CreateTrunc(Call, DFS.OriginTy)};
2090   }
2091 
2092   // Other cases that support loading shadows or origins in a fast way.
2093   Value *ShadowAddr, *OriginAddr;
2094   std::tie(ShadowAddr, OriginAddr) =
2095       DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
2096 
2097   const Align ShadowAlign = getShadowAlign(InstAlignment);
2098   const Align OriginAlign = getOriginAlign(InstAlignment);
2099   Value *Origin = nullptr;
2100   if (ShouldTrackOrigins) {
2101     IRBuilder<> IRB(Pos);
2102     Origin = IRB.CreateAlignedLoad(DFS.OriginTy, OriginAddr, OriginAlign);
2103   }
2104 
2105   // When the byte size is small enough, we can load the shadow directly with
2106   // just a few instructions.
2107   switch (Size) {
2108   case 1: {
2109     LoadInst *LI = new LoadInst(DFS.PrimitiveShadowTy, ShadowAddr, "", Pos);
2110     LI->setAlignment(ShadowAlign);
2111     return {LI, Origin};
2112   }
2113   case 2: {
2114     IRBuilder<> IRB(Pos);
2115     Value *ShadowAddr1 = IRB.CreateGEP(DFS.PrimitiveShadowTy, ShadowAddr,
2116                                        ConstantInt::get(DFS.IntptrTy, 1));
2117     Value *Load =
2118         IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr, ShadowAlign);
2119     Value *Load1 =
2120         IRB.CreateAlignedLoad(DFS.PrimitiveShadowTy, ShadowAddr1, ShadowAlign);
2121     return {combineShadows(Load, Load1, Pos), Origin};
2122   }
2123   }
2124   bool HasSizeForFastPath = DFS.hasLoadSizeForFastPath(Size);
2125 
2126   if (HasSizeForFastPath)
2127     return loadShadowFast(ShadowAddr, OriginAddr, Size, ShadowAlign,
2128                           OriginAlign, Origin, Pos);
2129 
2130   IRBuilder<> IRB(Pos);
2131   CallInst *FallbackCall = IRB.CreateCall(
2132       DFS.DFSanUnionLoadFn, {ShadowAddr, ConstantInt::get(DFS.IntptrTy, Size)});
2133   FallbackCall->addRetAttr(Attribute::ZExt);
2134   return {FallbackCall, Origin};
2135 }
2136 
2137 std::pair<Value *, Value *> DFSanFunction::loadShadowOrigin(Value *Addr,
2138                                                             uint64_t Size,
2139                                                             Align InstAlignment,
2140                                                             Instruction *Pos) {
2141   Value *PrimitiveShadow, *Origin;
2142   std::tie(PrimitiveShadow, Origin) =
2143       loadShadowOriginSansLoadTracking(Addr, Size, InstAlignment, Pos);
2144   if (DFS.shouldTrackOrigins()) {
2145     if (ClTrackOrigins == 2) {
2146       IRBuilder<> IRB(Pos);
2147       auto *ConstantShadow = dyn_cast<Constant>(PrimitiveShadow);
2148       if (!ConstantShadow || !ConstantShadow->isZeroValue())
2149         Origin = updateOriginIfTainted(PrimitiveShadow, Origin, IRB);
2150     }
2151   }
2152   return {PrimitiveShadow, Origin};
2153 }
2154 
2155 static AtomicOrdering addAcquireOrdering(AtomicOrdering AO) {
2156   switch (AO) {
2157   case AtomicOrdering::NotAtomic:
2158     return AtomicOrdering::NotAtomic;
2159   case AtomicOrdering::Unordered:
2160   case AtomicOrdering::Monotonic:
2161   case AtomicOrdering::Acquire:
2162     return AtomicOrdering::Acquire;
2163   case AtomicOrdering::Release:
2164   case AtomicOrdering::AcquireRelease:
2165     return AtomicOrdering::AcquireRelease;
2166   case AtomicOrdering::SequentiallyConsistent:
2167     return AtomicOrdering::SequentiallyConsistent;
2168   }
2169   llvm_unreachable("Unknown ordering");
2170 }
2171 
2172 Value *StripPointerGEPsAndCasts(Value *V) {
2173   if (!V->getType()->isPointerTy())
2174     return V;
2175 
2176   // DFSan pass should be running on valid IR, but we'll
2177   // keep a seen set to ensure there are no issues.
2178   SmallPtrSet<const Value *, 4> Visited;
2179   Visited.insert(V);
2180   do {
2181     if (auto *GEP = dyn_cast<GEPOperator>(V)) {
2182       V = GEP->getPointerOperand();
2183     } else if (Operator::getOpcode(V) == Instruction::BitCast) {
2184       V = cast<Operator>(V)->getOperand(0);
2185       if (!V->getType()->isPointerTy())
2186         return V;
2187     } else if (isa<GlobalAlias>(V)) {
2188       V = cast<GlobalAlias>(V)->getAliasee();
2189     }
2190   } while (Visited.insert(V).second);
2191 
2192   return V;
2193 }
2194 
2195 void DFSanVisitor::visitLoadInst(LoadInst &LI) {
2196   auto &DL = LI.getModule()->getDataLayout();
2197   uint64_t Size = DL.getTypeStoreSize(LI.getType());
2198   if (Size == 0) {
2199     DFSF.setShadow(&LI, DFSF.DFS.getZeroShadow(&LI));
2200     DFSF.setOrigin(&LI, DFSF.DFS.ZeroOrigin);
2201     return;
2202   }
2203 
2204   // When an application load is atomic, increase atomic ordering between
2205   // atomic application loads and stores to ensure happen-before order; load
2206   // shadow data after application data; store zero shadow data before
2207   // application data. This ensure shadow loads return either labels of the
2208   // initial application data or zeros.
2209   if (LI.isAtomic())
2210     LI.setOrdering(addAcquireOrdering(LI.getOrdering()));
2211 
2212   Instruction *Pos = LI.isAtomic() ? LI.getNextNode() : &LI;
2213   std::vector<Value *> Shadows;
2214   std::vector<Value *> Origins;
2215   Value *PrimitiveShadow, *Origin;
2216   std::tie(PrimitiveShadow, Origin) =
2217       DFSF.loadShadowOrigin(LI.getPointerOperand(), Size, LI.getAlign(), Pos);
2218   const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2219   if (ShouldTrackOrigins) {
2220     Shadows.push_back(PrimitiveShadow);
2221     Origins.push_back(Origin);
2222   }
2223   if (ClCombinePointerLabelsOnLoad ||
2224       DFSF.isLookupTableConstant(
2225           StripPointerGEPsAndCasts(LI.getPointerOperand()))) {
2226     Value *PtrShadow = DFSF.getShadow(LI.getPointerOperand());
2227     PrimitiveShadow = DFSF.combineShadows(PrimitiveShadow, PtrShadow, Pos);
2228     if (ShouldTrackOrigins) {
2229       Shadows.push_back(PtrShadow);
2230       Origins.push_back(DFSF.getOrigin(LI.getPointerOperand()));
2231     }
2232   }
2233   if (!DFSF.DFS.isZeroShadow(PrimitiveShadow))
2234     DFSF.NonZeroChecks.push_back(PrimitiveShadow);
2235 
2236   Value *Shadow =
2237       DFSF.expandFromPrimitiveShadow(LI.getType(), PrimitiveShadow, Pos);
2238   DFSF.setShadow(&LI, Shadow);
2239 
2240   if (ShouldTrackOrigins) {
2241     DFSF.setOrigin(&LI, DFSF.combineOrigins(Shadows, Origins, Pos));
2242   }
2243 
2244   if (ClEventCallbacks) {
2245     IRBuilder<> IRB(Pos);
2246     Value *Addr8 = IRB.CreateBitCast(LI.getPointerOperand(), DFSF.DFS.Int8Ptr);
2247     IRB.CreateCall(DFSF.DFS.DFSanLoadCallbackFn, {PrimitiveShadow, Addr8});
2248   }
2249 }
2250 
2251 Value *DFSanFunction::updateOriginIfTainted(Value *Shadow, Value *Origin,
2252                                             IRBuilder<> &IRB) {
2253   assert(DFS.shouldTrackOrigins());
2254   return IRB.CreateCall(DFS.DFSanChainOriginIfTaintedFn, {Shadow, Origin});
2255 }
2256 
2257 Value *DFSanFunction::updateOrigin(Value *V, IRBuilder<> &IRB) {
2258   if (!DFS.shouldTrackOrigins())
2259     return V;
2260   return IRB.CreateCall(DFS.DFSanChainOriginFn, V);
2261 }
2262 
2263 Value *DFSanFunction::originToIntptr(IRBuilder<> &IRB, Value *Origin) {
2264   const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
2265   const DataLayout &DL = F->getParent()->getDataLayout();
2266   unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
2267   if (IntptrSize == OriginSize)
2268     return Origin;
2269   assert(IntptrSize == OriginSize * 2);
2270   Origin = IRB.CreateIntCast(Origin, DFS.IntptrTy, /* isSigned */ false);
2271   return IRB.CreateOr(Origin, IRB.CreateShl(Origin, OriginSize * 8));
2272 }
2273 
2274 void DFSanFunction::paintOrigin(IRBuilder<> &IRB, Value *Origin,
2275                                 Value *StoreOriginAddr,
2276                                 uint64_t StoreOriginSize, Align Alignment) {
2277   const unsigned OriginSize = DataFlowSanitizer::OriginWidthBytes;
2278   const DataLayout &DL = F->getParent()->getDataLayout();
2279   const Align IntptrAlignment = DL.getABITypeAlign(DFS.IntptrTy);
2280   unsigned IntptrSize = DL.getTypeStoreSize(DFS.IntptrTy);
2281   assert(IntptrAlignment >= MinOriginAlignment);
2282   assert(IntptrSize >= OriginSize);
2283 
2284   unsigned Ofs = 0;
2285   Align CurrentAlignment = Alignment;
2286   if (Alignment >= IntptrAlignment && IntptrSize > OriginSize) {
2287     Value *IntptrOrigin = originToIntptr(IRB, Origin);
2288     Value *IntptrStoreOriginPtr = IRB.CreatePointerCast(
2289         StoreOriginAddr, PointerType::get(DFS.IntptrTy, 0));
2290     for (unsigned I = 0; I < StoreOriginSize / IntptrSize; ++I) {
2291       Value *Ptr =
2292           I ? IRB.CreateConstGEP1_32(DFS.IntptrTy, IntptrStoreOriginPtr, I)
2293             : IntptrStoreOriginPtr;
2294       IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
2295       Ofs += IntptrSize / OriginSize;
2296       CurrentAlignment = IntptrAlignment;
2297     }
2298   }
2299 
2300   for (unsigned I = Ofs; I < (StoreOriginSize + OriginSize - 1) / OriginSize;
2301        ++I) {
2302     Value *GEP = I ? IRB.CreateConstGEP1_32(DFS.OriginTy, StoreOriginAddr, I)
2303                    : StoreOriginAddr;
2304     IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
2305     CurrentAlignment = MinOriginAlignment;
2306   }
2307 }
2308 
2309 Value *DFSanFunction::convertToBool(Value *V, IRBuilder<> &IRB,
2310                                     const Twine &Name) {
2311   Type *VTy = V->getType();
2312   assert(VTy->isIntegerTy());
2313   if (VTy->getIntegerBitWidth() == 1)
2314     // Just converting a bool to a bool, so do nothing.
2315     return V;
2316   return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), Name);
2317 }
2318 
2319 void DFSanFunction::storeOrigin(Instruction *Pos, Value *Addr, uint64_t Size,
2320                                 Value *Shadow, Value *Origin,
2321                                 Value *StoreOriginAddr, Align InstAlignment) {
2322   // Do not write origins for zero shadows because we do not trace origins for
2323   // untainted sinks.
2324   const Align OriginAlignment = getOriginAlign(InstAlignment);
2325   Value *CollapsedShadow = collapseToPrimitiveShadow(Shadow, Pos);
2326   IRBuilder<> IRB(Pos);
2327   if (auto *ConstantShadow = dyn_cast<Constant>(CollapsedShadow)) {
2328     if (!ConstantShadow->isZeroValue())
2329       paintOrigin(IRB, updateOrigin(Origin, IRB), StoreOriginAddr, Size,
2330                   OriginAlignment);
2331     return;
2332   }
2333 
2334   if (shouldInstrumentWithCall()) {
2335     IRB.CreateCall(DFS.DFSanMaybeStoreOriginFn,
2336                    {CollapsedShadow,
2337                     IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()),
2338                     ConstantInt::get(DFS.IntptrTy, Size), Origin});
2339   } else {
2340     Value *Cmp = convertToBool(CollapsedShadow, IRB, "_dfscmp");
2341     Instruction *CheckTerm = SplitBlockAndInsertIfThen(
2342         Cmp, &*IRB.GetInsertPoint(), false, DFS.OriginStoreWeights, &DT);
2343     IRBuilder<> IRBNew(CheckTerm);
2344     paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), StoreOriginAddr, Size,
2345                 OriginAlignment);
2346     ++NumOriginStores;
2347   }
2348 }
2349 
2350 void DFSanFunction::storeZeroPrimitiveShadow(Value *Addr, uint64_t Size,
2351                                              Align ShadowAlign,
2352                                              Instruction *Pos) {
2353   IRBuilder<> IRB(Pos);
2354   IntegerType *ShadowTy =
2355       IntegerType::get(*DFS.Ctx, Size * DFS.ShadowWidthBits);
2356   Value *ExtZeroShadow = ConstantInt::get(ShadowTy, 0);
2357   Value *ShadowAddr = DFS.getShadowAddress(Addr, Pos);
2358   Value *ExtShadowAddr =
2359       IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowTy));
2360   IRB.CreateAlignedStore(ExtZeroShadow, ExtShadowAddr, ShadowAlign);
2361   // Do not write origins for 0 shadows because we do not trace origins for
2362   // untainted sinks.
2363 }
2364 
2365 void DFSanFunction::storePrimitiveShadowOrigin(Value *Addr, uint64_t Size,
2366                                                Align InstAlignment,
2367                                                Value *PrimitiveShadow,
2368                                                Value *Origin,
2369                                                Instruction *Pos) {
2370   const bool ShouldTrackOrigins = DFS.shouldTrackOrigins() && Origin;
2371 
2372   if (AllocaInst *AI = dyn_cast<AllocaInst>(Addr)) {
2373     const auto SI = AllocaShadowMap.find(AI);
2374     if (SI != AllocaShadowMap.end()) {
2375       IRBuilder<> IRB(Pos);
2376       IRB.CreateStore(PrimitiveShadow, SI->second);
2377 
2378       // Do not write origins for 0 shadows because we do not trace origins for
2379       // untainted sinks.
2380       if (ShouldTrackOrigins && !DFS.isZeroShadow(PrimitiveShadow)) {
2381         const auto OI = AllocaOriginMap.find(AI);
2382         assert(OI != AllocaOriginMap.end() && Origin);
2383         IRB.CreateStore(Origin, OI->second);
2384       }
2385       return;
2386     }
2387   }
2388 
2389   const Align ShadowAlign = getShadowAlign(InstAlignment);
2390   if (DFS.isZeroShadow(PrimitiveShadow)) {
2391     storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, Pos);
2392     return;
2393   }
2394 
2395   IRBuilder<> IRB(Pos);
2396   Value *ShadowAddr, *OriginAddr;
2397   std::tie(ShadowAddr, OriginAddr) =
2398       DFS.getShadowOriginAddress(Addr, InstAlignment, Pos);
2399 
2400   const unsigned ShadowVecSize = 8;
2401   assert(ShadowVecSize * DFS.ShadowWidthBits <= 128 &&
2402          "Shadow vector is too large!");
2403 
2404   uint64_t Offset = 0;
2405   uint64_t LeftSize = Size;
2406   if (LeftSize >= ShadowVecSize) {
2407     auto *ShadowVecTy =
2408         FixedVectorType::get(DFS.PrimitiveShadowTy, ShadowVecSize);
2409     Value *ShadowVec = UndefValue::get(ShadowVecTy);
2410     for (unsigned I = 0; I != ShadowVecSize; ++I) {
2411       ShadowVec = IRB.CreateInsertElement(
2412           ShadowVec, PrimitiveShadow,
2413           ConstantInt::get(Type::getInt32Ty(*DFS.Ctx), I));
2414     }
2415     Value *ShadowVecAddr =
2416         IRB.CreateBitCast(ShadowAddr, PointerType::getUnqual(ShadowVecTy));
2417     do {
2418       Value *CurShadowVecAddr =
2419           IRB.CreateConstGEP1_32(ShadowVecTy, ShadowVecAddr, Offset);
2420       IRB.CreateAlignedStore(ShadowVec, CurShadowVecAddr, ShadowAlign);
2421       LeftSize -= ShadowVecSize;
2422       ++Offset;
2423     } while (LeftSize >= ShadowVecSize);
2424     Offset *= ShadowVecSize;
2425   }
2426   while (LeftSize > 0) {
2427     Value *CurShadowAddr =
2428         IRB.CreateConstGEP1_32(DFS.PrimitiveShadowTy, ShadowAddr, Offset);
2429     IRB.CreateAlignedStore(PrimitiveShadow, CurShadowAddr, ShadowAlign);
2430     --LeftSize;
2431     ++Offset;
2432   }
2433 
2434   if (ShouldTrackOrigins) {
2435     storeOrigin(Pos, Addr, Size, PrimitiveShadow, Origin, OriginAddr,
2436                 InstAlignment);
2437   }
2438 }
2439 
2440 static AtomicOrdering addReleaseOrdering(AtomicOrdering AO) {
2441   switch (AO) {
2442   case AtomicOrdering::NotAtomic:
2443     return AtomicOrdering::NotAtomic;
2444   case AtomicOrdering::Unordered:
2445   case AtomicOrdering::Monotonic:
2446   case AtomicOrdering::Release:
2447     return AtomicOrdering::Release;
2448   case AtomicOrdering::Acquire:
2449   case AtomicOrdering::AcquireRelease:
2450     return AtomicOrdering::AcquireRelease;
2451   case AtomicOrdering::SequentiallyConsistent:
2452     return AtomicOrdering::SequentiallyConsistent;
2453   }
2454   llvm_unreachable("Unknown ordering");
2455 }
2456 
2457 void DFSanVisitor::visitStoreInst(StoreInst &SI) {
2458   auto &DL = SI.getModule()->getDataLayout();
2459   Value *Val = SI.getValueOperand();
2460   uint64_t Size = DL.getTypeStoreSize(Val->getType());
2461   if (Size == 0)
2462     return;
2463 
2464   // When an application store is atomic, increase atomic ordering between
2465   // atomic application loads and stores to ensure happen-before order; load
2466   // shadow data after application data; store zero shadow data before
2467   // application data. This ensure shadow loads return either labels of the
2468   // initial application data or zeros.
2469   if (SI.isAtomic())
2470     SI.setOrdering(addReleaseOrdering(SI.getOrdering()));
2471 
2472   const bool ShouldTrackOrigins =
2473       DFSF.DFS.shouldTrackOrigins() && !SI.isAtomic();
2474   std::vector<Value *> Shadows;
2475   std::vector<Value *> Origins;
2476 
2477   Value *Shadow =
2478       SI.isAtomic() ? DFSF.DFS.getZeroShadow(Val) : DFSF.getShadow(Val);
2479 
2480   if (ShouldTrackOrigins) {
2481     Shadows.push_back(Shadow);
2482     Origins.push_back(DFSF.getOrigin(Val));
2483   }
2484 
2485   Value *PrimitiveShadow;
2486   if (ClCombinePointerLabelsOnStore) {
2487     Value *PtrShadow = DFSF.getShadow(SI.getPointerOperand());
2488     if (ShouldTrackOrigins) {
2489       Shadows.push_back(PtrShadow);
2490       Origins.push_back(DFSF.getOrigin(SI.getPointerOperand()));
2491     }
2492     PrimitiveShadow = DFSF.combineShadows(Shadow, PtrShadow, &SI);
2493   } else {
2494     PrimitiveShadow = DFSF.collapseToPrimitiveShadow(Shadow, &SI);
2495   }
2496   Value *Origin = nullptr;
2497   if (ShouldTrackOrigins)
2498     Origin = DFSF.combineOrigins(Shadows, Origins, &SI);
2499   DFSF.storePrimitiveShadowOrigin(SI.getPointerOperand(), Size, SI.getAlign(),
2500                                   PrimitiveShadow, Origin, &SI);
2501   if (ClEventCallbacks) {
2502     IRBuilder<> IRB(&SI);
2503     Value *Addr8 = IRB.CreateBitCast(SI.getPointerOperand(), DFSF.DFS.Int8Ptr);
2504     IRB.CreateCall(DFSF.DFS.DFSanStoreCallbackFn, {PrimitiveShadow, Addr8});
2505   }
2506 }
2507 
2508 void DFSanVisitor::visitCASOrRMW(Align InstAlignment, Instruction &I) {
2509   assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
2510 
2511   Value *Val = I.getOperand(1);
2512   const auto &DL = I.getModule()->getDataLayout();
2513   uint64_t Size = DL.getTypeStoreSize(Val->getType());
2514   if (Size == 0)
2515     return;
2516 
2517   // Conservatively set data at stored addresses and return with zero shadow to
2518   // prevent shadow data races.
2519   IRBuilder<> IRB(&I);
2520   Value *Addr = I.getOperand(0);
2521   const Align ShadowAlign = DFSF.getShadowAlign(InstAlignment);
2522   DFSF.storeZeroPrimitiveShadow(Addr, Size, ShadowAlign, &I);
2523   DFSF.setShadow(&I, DFSF.DFS.getZeroShadow(&I));
2524   DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
2525 }
2526 
2527 void DFSanVisitor::visitAtomicRMWInst(AtomicRMWInst &I) {
2528   visitCASOrRMW(I.getAlign(), I);
2529   // TODO: The ordering change follows MSan. It is possible not to change
2530   // ordering because we always set and use 0 shadows.
2531   I.setOrdering(addReleaseOrdering(I.getOrdering()));
2532 }
2533 
2534 void DFSanVisitor::visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
2535   visitCASOrRMW(I.getAlign(), I);
2536   // TODO: The ordering change follows MSan. It is possible not to change
2537   // ordering because we always set and use 0 shadows.
2538   I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
2539 }
2540 
2541 void DFSanVisitor::visitUnaryOperator(UnaryOperator &UO) {
2542   visitInstOperands(UO);
2543 }
2544 
2545 void DFSanVisitor::visitBinaryOperator(BinaryOperator &BO) {
2546   visitInstOperands(BO);
2547 }
2548 
2549 void DFSanVisitor::visitBitCastInst(BitCastInst &BCI) {
2550   // Special case: if this is the bitcast (there is exactly 1 allowed) between
2551   // a musttail call and a ret, don't instrument. New instructions are not
2552   // allowed after a musttail call.
2553   if (auto *CI = dyn_cast<CallInst>(BCI.getOperand(0)))
2554     if (CI->isMustTailCall())
2555       return;
2556   visitInstOperands(BCI);
2557 }
2558 
2559 void DFSanVisitor::visitCastInst(CastInst &CI) { visitInstOperands(CI); }
2560 
2561 void DFSanVisitor::visitCmpInst(CmpInst &CI) {
2562   visitInstOperands(CI);
2563   if (ClEventCallbacks) {
2564     IRBuilder<> IRB(&CI);
2565     Value *CombinedShadow = DFSF.getShadow(&CI);
2566     IRB.CreateCall(DFSF.DFS.DFSanCmpCallbackFn, CombinedShadow);
2567   }
2568 }
2569 
2570 void DFSanVisitor::visitLandingPadInst(LandingPadInst &LPI) {
2571   // We do not need to track data through LandingPadInst.
2572   //
2573   // For the C++ exceptions, if a value is thrown, this value will be stored
2574   // in a memory location provided by __cxa_allocate_exception(...) (on the
2575   // throw side) or  __cxa_begin_catch(...) (on the catch side).
2576   // This memory will have a shadow, so with the loads and stores we will be
2577   // able to propagate labels on data thrown through exceptions, without any
2578   // special handling of the LandingPadInst.
2579   //
2580   // The second element in the pair result of the LandingPadInst is a
2581   // register value, but it is for a type ID and should never be tainted.
2582   DFSF.setShadow(&LPI, DFSF.DFS.getZeroShadow(&LPI));
2583   DFSF.setOrigin(&LPI, DFSF.DFS.ZeroOrigin);
2584 }
2585 
2586 void DFSanVisitor::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
2587   if (ClCombineOffsetLabelsOnGEP ||
2588       DFSF.isLookupTableConstant(
2589           StripPointerGEPsAndCasts(GEPI.getPointerOperand()))) {
2590     visitInstOperands(GEPI);
2591     return;
2592   }
2593 
2594   // Only propagate shadow/origin of base pointer value but ignore those of
2595   // offset operands.
2596   Value *BasePointer = GEPI.getPointerOperand();
2597   DFSF.setShadow(&GEPI, DFSF.getShadow(BasePointer));
2598   if (DFSF.DFS.shouldTrackOrigins())
2599     DFSF.setOrigin(&GEPI, DFSF.getOrigin(BasePointer));
2600 }
2601 
2602 void DFSanVisitor::visitExtractElementInst(ExtractElementInst &I) {
2603   visitInstOperands(I);
2604 }
2605 
2606 void DFSanVisitor::visitInsertElementInst(InsertElementInst &I) {
2607   visitInstOperands(I);
2608 }
2609 
2610 void DFSanVisitor::visitShuffleVectorInst(ShuffleVectorInst &I) {
2611   visitInstOperands(I);
2612 }
2613 
2614 void DFSanVisitor::visitExtractValueInst(ExtractValueInst &I) {
2615   IRBuilder<> IRB(&I);
2616   Value *Agg = I.getAggregateOperand();
2617   Value *AggShadow = DFSF.getShadow(Agg);
2618   Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
2619   DFSF.setShadow(&I, ResShadow);
2620   visitInstOperandOrigins(I);
2621 }
2622 
2623 void DFSanVisitor::visitInsertValueInst(InsertValueInst &I) {
2624   IRBuilder<> IRB(&I);
2625   Value *AggShadow = DFSF.getShadow(I.getAggregateOperand());
2626   Value *InsShadow = DFSF.getShadow(I.getInsertedValueOperand());
2627   Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
2628   DFSF.setShadow(&I, Res);
2629   visitInstOperandOrigins(I);
2630 }
2631 
2632 void DFSanVisitor::visitAllocaInst(AllocaInst &I) {
2633   bool AllLoadsStores = true;
2634   for (User *U : I.users()) {
2635     if (isa<LoadInst>(U))
2636       continue;
2637 
2638     if (StoreInst *SI = dyn_cast<StoreInst>(U)) {
2639       if (SI->getPointerOperand() == &I)
2640         continue;
2641     }
2642 
2643     AllLoadsStores = false;
2644     break;
2645   }
2646   if (AllLoadsStores) {
2647     IRBuilder<> IRB(&I);
2648     DFSF.AllocaShadowMap[&I] = IRB.CreateAlloca(DFSF.DFS.PrimitiveShadowTy);
2649     if (DFSF.DFS.shouldTrackOrigins()) {
2650       DFSF.AllocaOriginMap[&I] =
2651           IRB.CreateAlloca(DFSF.DFS.OriginTy, nullptr, "_dfsa");
2652     }
2653   }
2654   DFSF.setShadow(&I, DFSF.DFS.ZeroPrimitiveShadow);
2655   DFSF.setOrigin(&I, DFSF.DFS.ZeroOrigin);
2656 }
2657 
2658 void DFSanVisitor::visitSelectInst(SelectInst &I) {
2659   Value *CondShadow = DFSF.getShadow(I.getCondition());
2660   Value *TrueShadow = DFSF.getShadow(I.getTrueValue());
2661   Value *FalseShadow = DFSF.getShadow(I.getFalseValue());
2662   Value *ShadowSel = nullptr;
2663   const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2664   std::vector<Value *> Shadows;
2665   std::vector<Value *> Origins;
2666   Value *TrueOrigin =
2667       ShouldTrackOrigins ? DFSF.getOrigin(I.getTrueValue()) : nullptr;
2668   Value *FalseOrigin =
2669       ShouldTrackOrigins ? DFSF.getOrigin(I.getFalseValue()) : nullptr;
2670 
2671   DFSF.addConditionalCallbacksIfEnabled(I, I.getCondition());
2672 
2673   if (isa<VectorType>(I.getCondition()->getType())) {
2674     ShadowSel = DFSF.combineShadowsThenConvert(I.getType(), TrueShadow,
2675                                                FalseShadow, &I);
2676     if (ShouldTrackOrigins) {
2677       Shadows.push_back(TrueShadow);
2678       Shadows.push_back(FalseShadow);
2679       Origins.push_back(TrueOrigin);
2680       Origins.push_back(FalseOrigin);
2681     }
2682   } else {
2683     if (TrueShadow == FalseShadow) {
2684       ShadowSel = TrueShadow;
2685       if (ShouldTrackOrigins) {
2686         Shadows.push_back(TrueShadow);
2687         Origins.push_back(TrueOrigin);
2688       }
2689     } else {
2690       ShadowSel =
2691           SelectInst::Create(I.getCondition(), TrueShadow, FalseShadow, "", &I);
2692       if (ShouldTrackOrigins) {
2693         Shadows.push_back(ShadowSel);
2694         Origins.push_back(SelectInst::Create(I.getCondition(), TrueOrigin,
2695                                              FalseOrigin, "", &I));
2696       }
2697     }
2698   }
2699   DFSF.setShadow(&I, ClTrackSelectControlFlow
2700                          ? DFSF.combineShadowsThenConvert(
2701                                I.getType(), CondShadow, ShadowSel, &I)
2702                          : ShadowSel);
2703   if (ShouldTrackOrigins) {
2704     if (ClTrackSelectControlFlow) {
2705       Shadows.push_back(CondShadow);
2706       Origins.push_back(DFSF.getOrigin(I.getCondition()));
2707     }
2708     DFSF.setOrigin(&I, DFSF.combineOrigins(Shadows, Origins, &I));
2709   }
2710 }
2711 
2712 void DFSanVisitor::visitMemSetInst(MemSetInst &I) {
2713   IRBuilder<> IRB(&I);
2714   Value *ValShadow = DFSF.getShadow(I.getValue());
2715   Value *ValOrigin = DFSF.DFS.shouldTrackOrigins()
2716                          ? DFSF.getOrigin(I.getValue())
2717                          : DFSF.DFS.ZeroOrigin;
2718   IRB.CreateCall(
2719       DFSF.DFS.DFSanSetLabelFn,
2720       {ValShadow, ValOrigin,
2721        IRB.CreateBitCast(I.getDest(), Type::getInt8PtrTy(*DFSF.DFS.Ctx)),
2722        IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
2723 }
2724 
2725 void DFSanVisitor::visitMemTransferInst(MemTransferInst &I) {
2726   IRBuilder<> IRB(&I);
2727 
2728   // CopyOrMoveOrigin transfers origins by refering to their shadows. So we
2729   // need to move origins before moving shadows.
2730   if (DFSF.DFS.shouldTrackOrigins()) {
2731     IRB.CreateCall(
2732         DFSF.DFS.DFSanMemOriginTransferFn,
2733         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2734          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2735          IRB.CreateIntCast(I.getArgOperand(2), DFSF.DFS.IntptrTy, false)});
2736   }
2737 
2738   Value *RawDestShadow = DFSF.DFS.getShadowAddress(I.getDest(), &I);
2739   Value *SrcShadow = DFSF.DFS.getShadowAddress(I.getSource(), &I);
2740   Value *LenShadow =
2741       IRB.CreateMul(I.getLength(), ConstantInt::get(I.getLength()->getType(),
2742                                                     DFSF.DFS.ShadowWidthBytes));
2743   Type *Int8Ptr = Type::getInt8PtrTy(*DFSF.DFS.Ctx);
2744   Value *DestShadow = IRB.CreateBitCast(RawDestShadow, Int8Ptr);
2745   SrcShadow = IRB.CreateBitCast(SrcShadow, Int8Ptr);
2746   auto *MTI = cast<MemTransferInst>(
2747       IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
2748                      {DestShadow, SrcShadow, LenShadow, I.getVolatileCst()}));
2749   MTI->setDestAlignment(DFSF.getShadowAlign(I.getDestAlign().valueOrOne()));
2750   MTI->setSourceAlignment(DFSF.getShadowAlign(I.getSourceAlign().valueOrOne()));
2751   if (ClEventCallbacks) {
2752     IRB.CreateCall(DFSF.DFS.DFSanMemTransferCallbackFn,
2753                    {RawDestShadow,
2754                     IRB.CreateZExtOrTrunc(I.getLength(), DFSF.DFS.IntptrTy)});
2755   }
2756 }
2757 
2758 void DFSanVisitor::visitBranchInst(BranchInst &BR) {
2759   if (!BR.isConditional())
2760     return;
2761 
2762   DFSF.addConditionalCallbacksIfEnabled(BR, BR.getCondition());
2763 }
2764 
2765 void DFSanVisitor::visitSwitchInst(SwitchInst &SW) {
2766   DFSF.addConditionalCallbacksIfEnabled(SW, SW.getCondition());
2767 }
2768 
2769 static bool isAMustTailRetVal(Value *RetVal) {
2770   // Tail call may have a bitcast between return.
2771   if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
2772     RetVal = I->getOperand(0);
2773   }
2774   if (auto *I = dyn_cast<CallInst>(RetVal)) {
2775     return I->isMustTailCall();
2776   }
2777   return false;
2778 }
2779 
2780 void DFSanVisitor::visitReturnInst(ReturnInst &RI) {
2781   if (!DFSF.IsNativeABI && RI.getReturnValue()) {
2782     // Don't emit the instrumentation for musttail call returns.
2783     if (isAMustTailRetVal(RI.getReturnValue()))
2784       return;
2785 
2786     Value *S = DFSF.getShadow(RI.getReturnValue());
2787     IRBuilder<> IRB(&RI);
2788     Type *RT = DFSF.F->getFunctionType()->getReturnType();
2789     unsigned Size = getDataLayout().getTypeAllocSize(DFSF.DFS.getShadowTy(RT));
2790     if (Size <= RetvalTLSSize) {
2791       // If the size overflows, stores nothing. At callsite, oversized return
2792       // shadows are set to zero.
2793       IRB.CreateAlignedStore(S, DFSF.getRetvalTLS(RT, IRB), ShadowTLSAlignment);
2794     }
2795     if (DFSF.DFS.shouldTrackOrigins()) {
2796       Value *O = DFSF.getOrigin(RI.getReturnValue());
2797       IRB.CreateStore(O, DFSF.getRetvalOriginTLS());
2798     }
2799   }
2800 }
2801 
2802 void DFSanVisitor::addShadowArguments(Function &F, CallBase &CB,
2803                                       std::vector<Value *> &Args,
2804                                       IRBuilder<> &IRB) {
2805   FunctionType *FT = F.getFunctionType();
2806 
2807   auto *I = CB.arg_begin();
2808 
2809   // Adds non-variable argument shadows.
2810   for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
2811     Args.push_back(DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), &CB));
2812 
2813   // Adds variable argument shadows.
2814   if (FT->isVarArg()) {
2815     auto *LabelVATy = ArrayType::get(DFSF.DFS.PrimitiveShadowTy,
2816                                      CB.arg_size() - FT->getNumParams());
2817     auto *LabelVAAlloca =
2818         new AllocaInst(LabelVATy, getDataLayout().getAllocaAddrSpace(),
2819                        "labelva", &DFSF.F->getEntryBlock().front());
2820 
2821     for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
2822       auto *LabelVAPtr = IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, N);
2823       IRB.CreateStore(DFSF.collapseToPrimitiveShadow(DFSF.getShadow(*I), &CB),
2824                       LabelVAPtr);
2825     }
2826 
2827     Args.push_back(IRB.CreateStructGEP(LabelVATy, LabelVAAlloca, 0));
2828   }
2829 
2830   // Adds the return value shadow.
2831   if (!FT->getReturnType()->isVoidTy()) {
2832     if (!DFSF.LabelReturnAlloca) {
2833       DFSF.LabelReturnAlloca = new AllocaInst(
2834           DFSF.DFS.PrimitiveShadowTy, getDataLayout().getAllocaAddrSpace(),
2835           "labelreturn", &DFSF.F->getEntryBlock().front());
2836     }
2837     Args.push_back(DFSF.LabelReturnAlloca);
2838   }
2839 }
2840 
2841 void DFSanVisitor::addOriginArguments(Function &F, CallBase &CB,
2842                                       std::vector<Value *> &Args,
2843                                       IRBuilder<> &IRB) {
2844   FunctionType *FT = F.getFunctionType();
2845 
2846   auto *I = CB.arg_begin();
2847 
2848   // Add non-variable argument origins.
2849   for (unsigned N = FT->getNumParams(); N != 0; ++I, --N)
2850     Args.push_back(DFSF.getOrigin(*I));
2851 
2852   // Add variable argument origins.
2853   if (FT->isVarArg()) {
2854     auto *OriginVATy =
2855         ArrayType::get(DFSF.DFS.OriginTy, CB.arg_size() - FT->getNumParams());
2856     auto *OriginVAAlloca =
2857         new AllocaInst(OriginVATy, getDataLayout().getAllocaAddrSpace(),
2858                        "originva", &DFSF.F->getEntryBlock().front());
2859 
2860     for (unsigned N = 0; I != CB.arg_end(); ++I, ++N) {
2861       auto *OriginVAPtr = IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, N);
2862       IRB.CreateStore(DFSF.getOrigin(*I), OriginVAPtr);
2863     }
2864 
2865     Args.push_back(IRB.CreateStructGEP(OriginVATy, OriginVAAlloca, 0));
2866   }
2867 
2868   // Add the return value origin.
2869   if (!FT->getReturnType()->isVoidTy()) {
2870     if (!DFSF.OriginReturnAlloca) {
2871       DFSF.OriginReturnAlloca = new AllocaInst(
2872           DFSF.DFS.OriginTy, getDataLayout().getAllocaAddrSpace(),
2873           "originreturn", &DFSF.F->getEntryBlock().front());
2874     }
2875     Args.push_back(DFSF.OriginReturnAlloca);
2876   }
2877 }
2878 
2879 bool DFSanVisitor::visitWrappedCallBase(Function &F, CallBase &CB) {
2880   IRBuilder<> IRB(&CB);
2881   switch (DFSF.DFS.getWrapperKind(&F)) {
2882   case DataFlowSanitizer::WK_Warning:
2883     CB.setCalledFunction(&F);
2884     IRB.CreateCall(DFSF.DFS.DFSanUnimplementedFn,
2885                    IRB.CreateGlobalStringPtr(F.getName()));
2886     DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
2887     DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
2888     DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
2889     return true;
2890   case DataFlowSanitizer::WK_Discard:
2891     CB.setCalledFunction(&F);
2892     DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
2893     DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
2894     DFSF.setOrigin(&CB, DFSF.DFS.ZeroOrigin);
2895     return true;
2896   case DataFlowSanitizer::WK_Functional:
2897     CB.setCalledFunction(&F);
2898     DFSF.DFS.buildExternWeakCheckIfNeeded(IRB, &F);
2899     visitInstOperands(CB);
2900     return true;
2901   case DataFlowSanitizer::WK_Custom:
2902     // Don't try to handle invokes of custom functions, it's too complicated.
2903     // Instead, invoke the dfsw$ wrapper, which will in turn call the __dfsw_
2904     // wrapper.
2905     CallInst *CI = dyn_cast<CallInst>(&CB);
2906     if (!CI)
2907       return false;
2908 
2909     const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
2910     FunctionType *FT = F.getFunctionType();
2911     TransformedFunction CustomFn = DFSF.DFS.getCustomFunctionType(FT);
2912     std::string CustomFName = ShouldTrackOrigins ? "__dfso_" : "__dfsw_";
2913     CustomFName += F.getName();
2914     FunctionCallee CustomF = DFSF.DFS.Mod->getOrInsertFunction(
2915         CustomFName, CustomFn.TransformedType);
2916     if (Function *CustomFn = dyn_cast<Function>(CustomF.getCallee())) {
2917       CustomFn->copyAttributesFrom(&F);
2918 
2919       // Custom functions returning non-void will write to the return label.
2920       if (!FT->getReturnType()->isVoidTy()) {
2921         CustomFn->removeFnAttrs(DFSF.DFS.ReadOnlyNoneAttrs);
2922       }
2923     }
2924 
2925     std::vector<Value *> Args;
2926 
2927     // Adds non-variable arguments.
2928     auto *I = CB.arg_begin();
2929     for (unsigned N = FT->getNumParams(); N != 0; ++I, --N) {
2930       Args.push_back(*I);
2931     }
2932 
2933     // Adds shadow arguments.
2934     const unsigned ShadowArgStart = Args.size();
2935     addShadowArguments(F, CB, Args, IRB);
2936 
2937     // Adds origin arguments.
2938     const unsigned OriginArgStart = Args.size();
2939     if (ShouldTrackOrigins)
2940       addOriginArguments(F, CB, Args, IRB);
2941 
2942     // Adds variable arguments.
2943     append_range(Args, drop_begin(CB.args(), FT->getNumParams()));
2944 
2945     CallInst *CustomCI = IRB.CreateCall(CustomF, Args);
2946     CustomCI->setCallingConv(CI->getCallingConv());
2947     CustomCI->setAttributes(transformFunctionAttributes(
2948         CustomFn, CI->getContext(), CI->getAttributes()));
2949 
2950     // Update the parameter attributes of the custom call instruction to
2951     // zero extend the shadow parameters. This is required for targets
2952     // which consider PrimitiveShadowTy an illegal type.
2953     for (unsigned N = 0; N < FT->getNumParams(); N++) {
2954       const unsigned ArgNo = ShadowArgStart + N;
2955       if (CustomCI->getArgOperand(ArgNo)->getType() ==
2956           DFSF.DFS.PrimitiveShadowTy)
2957         CustomCI->addParamAttr(ArgNo, Attribute::ZExt);
2958       if (ShouldTrackOrigins) {
2959         const unsigned OriginArgNo = OriginArgStart + N;
2960         if (CustomCI->getArgOperand(OriginArgNo)->getType() ==
2961             DFSF.DFS.OriginTy)
2962           CustomCI->addParamAttr(OriginArgNo, Attribute::ZExt);
2963       }
2964     }
2965 
2966     // Loads the return value shadow and origin.
2967     if (!FT->getReturnType()->isVoidTy()) {
2968       LoadInst *LabelLoad =
2969           IRB.CreateLoad(DFSF.DFS.PrimitiveShadowTy, DFSF.LabelReturnAlloca);
2970       DFSF.setShadow(CustomCI, DFSF.expandFromPrimitiveShadow(
2971                                    FT->getReturnType(), LabelLoad, &CB));
2972       if (ShouldTrackOrigins) {
2973         LoadInst *OriginLoad =
2974             IRB.CreateLoad(DFSF.DFS.OriginTy, DFSF.OriginReturnAlloca);
2975         DFSF.setOrigin(CustomCI, OriginLoad);
2976       }
2977     }
2978 
2979     CI->replaceAllUsesWith(CustomCI);
2980     CI->eraseFromParent();
2981     return true;
2982   }
2983   return false;
2984 }
2985 
2986 void DFSanVisitor::visitCallBase(CallBase &CB) {
2987   Function *F = CB.getCalledFunction();
2988   if ((F && F->isIntrinsic()) || CB.isInlineAsm()) {
2989     visitInstOperands(CB);
2990     return;
2991   }
2992 
2993   // Calls to this function are synthesized in wrappers, and we shouldn't
2994   // instrument them.
2995   if (F == DFSF.DFS.DFSanVarargWrapperFn.getCallee()->stripPointerCasts())
2996     return;
2997 
2998   DenseMap<Value *, Function *>::iterator UnwrappedFnIt =
2999       DFSF.DFS.UnwrappedFnMap.find(CB.getCalledOperand());
3000   if (UnwrappedFnIt != DFSF.DFS.UnwrappedFnMap.end())
3001     if (visitWrappedCallBase(*UnwrappedFnIt->second, CB))
3002       return;
3003 
3004   IRBuilder<> IRB(&CB);
3005 
3006   const bool ShouldTrackOrigins = DFSF.DFS.shouldTrackOrigins();
3007   FunctionType *FT = CB.getFunctionType();
3008   const DataLayout &DL = getDataLayout();
3009 
3010   // Stores argument shadows.
3011   unsigned ArgOffset = 0;
3012   for (unsigned I = 0, N = FT->getNumParams(); I != N; ++I) {
3013     if (ShouldTrackOrigins) {
3014       // Ignore overflowed origins
3015       Value *ArgShadow = DFSF.getShadow(CB.getArgOperand(I));
3016       if (I < DFSF.DFS.NumOfElementsInArgOrgTLS &&
3017           !DFSF.DFS.isZeroShadow(ArgShadow))
3018         IRB.CreateStore(DFSF.getOrigin(CB.getArgOperand(I)),
3019                         DFSF.getArgOriginTLS(I, IRB));
3020     }
3021 
3022     unsigned Size =
3023         DL.getTypeAllocSize(DFSF.DFS.getShadowTy(FT->getParamType(I)));
3024     // Stop storing if arguments' size overflows. Inside a function, arguments
3025     // after overflow have zero shadow values.
3026     if (ArgOffset + Size > ArgTLSSize)
3027       break;
3028     IRB.CreateAlignedStore(DFSF.getShadow(CB.getArgOperand(I)),
3029                            DFSF.getArgTLS(FT->getParamType(I), ArgOffset, IRB),
3030                            ShadowTLSAlignment);
3031     ArgOffset += alignTo(Size, ShadowTLSAlignment);
3032   }
3033 
3034   Instruction *Next = nullptr;
3035   if (!CB.getType()->isVoidTy()) {
3036     if (InvokeInst *II = dyn_cast<InvokeInst>(&CB)) {
3037       if (II->getNormalDest()->getSinglePredecessor()) {
3038         Next = &II->getNormalDest()->front();
3039       } else {
3040         BasicBlock *NewBB =
3041             SplitEdge(II->getParent(), II->getNormalDest(), &DFSF.DT);
3042         Next = &NewBB->front();
3043       }
3044     } else {
3045       assert(CB.getIterator() != CB.getParent()->end());
3046       Next = CB.getNextNode();
3047     }
3048 
3049     // Don't emit the epilogue for musttail call returns.
3050     if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
3051       return;
3052 
3053     // Loads the return value shadow.
3054     IRBuilder<> NextIRB(Next);
3055     unsigned Size = DL.getTypeAllocSize(DFSF.DFS.getShadowTy(&CB));
3056     if (Size > RetvalTLSSize) {
3057       // Set overflowed return shadow to be zero.
3058       DFSF.setShadow(&CB, DFSF.DFS.getZeroShadow(&CB));
3059     } else {
3060       LoadInst *LI = NextIRB.CreateAlignedLoad(
3061           DFSF.DFS.getShadowTy(&CB), DFSF.getRetvalTLS(CB.getType(), NextIRB),
3062           ShadowTLSAlignment, "_dfsret");
3063       DFSF.SkipInsts.insert(LI);
3064       DFSF.setShadow(&CB, LI);
3065       DFSF.NonZeroChecks.push_back(LI);
3066     }
3067 
3068     if (ShouldTrackOrigins) {
3069       LoadInst *LI = NextIRB.CreateLoad(DFSF.DFS.OriginTy,
3070                                         DFSF.getRetvalOriginTLS(), "_dfsret_o");
3071       DFSF.SkipInsts.insert(LI);
3072       DFSF.setOrigin(&CB, LI);
3073     }
3074   }
3075 }
3076 
3077 void DFSanVisitor::visitPHINode(PHINode &PN) {
3078   Type *ShadowTy = DFSF.DFS.getShadowTy(&PN);
3079   PHINode *ShadowPN =
3080       PHINode::Create(ShadowTy, PN.getNumIncomingValues(), "", &PN);
3081 
3082   // Give the shadow phi node valid predecessors to fool SplitEdge into working.
3083   Value *UndefShadow = UndefValue::get(ShadowTy);
3084   for (BasicBlock *BB : PN.blocks())
3085     ShadowPN->addIncoming(UndefShadow, BB);
3086 
3087   DFSF.setShadow(&PN, ShadowPN);
3088 
3089   PHINode *OriginPN = nullptr;
3090   if (DFSF.DFS.shouldTrackOrigins()) {
3091     OriginPN =
3092         PHINode::Create(DFSF.DFS.OriginTy, PN.getNumIncomingValues(), "", &PN);
3093     Value *UndefOrigin = UndefValue::get(DFSF.DFS.OriginTy);
3094     for (BasicBlock *BB : PN.blocks())
3095       OriginPN->addIncoming(UndefOrigin, BB);
3096     DFSF.setOrigin(&PN, OriginPN);
3097   }
3098 
3099   DFSF.PHIFixups.push_back({&PN, ShadowPN, OriginPN});
3100 }
3101 
3102 namespace {
3103 class DataFlowSanitizerLegacyPass : public ModulePass {
3104 private:
3105   std::vector<std::string> ABIListFiles;
3106 
3107 public:
3108   static char ID;
3109 
3110   DataFlowSanitizerLegacyPass(
3111       const std::vector<std::string> &ABIListFiles = std::vector<std::string>())
3112       : ModulePass(ID), ABIListFiles(ABIListFiles) {}
3113 
3114   bool runOnModule(Module &M) override {
3115     return DataFlowSanitizer(ABIListFiles).runImpl(M);
3116   }
3117 };
3118 } // namespace
3119 
3120 char DataFlowSanitizerLegacyPass::ID;
3121 
3122 INITIALIZE_PASS(DataFlowSanitizerLegacyPass, "dfsan",
3123                 "DataFlowSanitizer: dynamic data flow analysis.", false, false)
3124 
3125 ModulePass *llvm::createDataFlowSanitizerLegacyPassPass(
3126     const std::vector<std::string> &ABIListFiles) {
3127   return new DataFlowSanitizerLegacyPass(ABIListFiles);
3128 }
3129 
3130 PreservedAnalyses DataFlowSanitizerPass::run(Module &M,
3131                                              ModuleAnalysisManager &AM) {
3132   if (DataFlowSanitizer(ABIListFiles).runImpl(M)) {
3133     return PreservedAnalyses::none();
3134   }
3135   return PreservedAnalyses::all();
3136 }
3137