1 //===- MemorySanitizer.cpp - detector of uninitialized reads --------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file
10 /// This file is a part of MemorySanitizer, a detector of uninitialized
11 /// reads.
12 ///
13 /// The algorithm of the tool is similar to Memcheck
14 /// (http://goo.gl/QKbem). We associate a few shadow bits with every
15 /// byte of the application memory, poison the shadow of the malloc-ed
16 /// or alloca-ed memory, load the shadow bits on every memory read,
17 /// propagate the shadow bits through some of the arithmetic
18 /// instruction (including MOV), store the shadow bits on every memory
19 /// write, report a bug on some other instructions (e.g. JMP) if the
20 /// associated shadow is poisoned.
21 ///
22 /// But there are differences too. The first and the major one:
23 /// compiler instrumentation instead of binary instrumentation. This
24 /// gives us much better register allocation, possible compiler
25 /// optimizations and a fast start-up. But this brings the major issue
26 /// as well: msan needs to see all program events, including system
27 /// calls and reads/writes in system libraries, so we either need to
28 /// compile *everything* with msan or use a binary translation
29 /// component (e.g. DynamoRIO) to instrument pre-built libraries.
30 /// Another difference from Memcheck is that we use 8 shadow bits per
31 /// byte of application memory and use a direct shadow mapping. This
32 /// greatly simplifies the instrumentation code and avoids races on
33 /// shadow updates (Memcheck is single-threaded so races are not a
34 /// concern there. Memcheck uses 2 shadow bits per byte with a slow
35 /// path storage that uses 8 bits per byte).
36 ///
37 /// The default value of shadow is 0, which means "clean" (not poisoned).
38 ///
39 /// Every module initializer should call __msan_init to ensure that the
40 /// shadow memory is ready. On error, __msan_warning is called. Since
41 /// parameters and return values may be passed via registers, we have a
42 /// specialized thread-local shadow for return values
43 /// (__msan_retval_tls) and parameters (__msan_param_tls).
44 ///
45 ///                           Origin tracking.
46 ///
47 /// MemorySanitizer can track origins (allocation points) of all uninitialized
48 /// values. This behavior is controlled with a flag (msan-track-origins) and is
49 /// disabled by default.
50 ///
51 /// Origins are 4-byte values created and interpreted by the runtime library.
52 /// They are stored in a second shadow mapping, one 4-byte value for 4 bytes
53 /// of application memory. Propagation of origins is basically a bunch of
54 /// "select" instructions that pick the origin of a dirty argument, if an
55 /// instruction has one.
56 ///
57 /// Every 4 aligned, consecutive bytes of application memory have one origin
58 /// value associated with them. If these bytes contain uninitialized data
59 /// coming from 2 different allocations, the last store wins. Because of this,
60 /// MemorySanitizer reports can show unrelated origins, but this is unlikely in
61 /// practice.
62 ///
63 /// Origins are meaningless for fully initialized values, so MemorySanitizer
64 /// avoids storing origin to memory when a fully initialized value is stored.
65 /// This way it avoids needless overwriting origin of the 4-byte region on
66 /// a short (i.e. 1 byte) clean store, and it is also good for performance.
67 ///
68 ///                            Atomic handling.
69 ///
70 /// Ideally, every atomic store of application value should update the
71 /// corresponding shadow location in an atomic way. Unfortunately, atomic store
72 /// of two disjoint locations can not be done without severe slowdown.
73 ///
74 /// Therefore, we implement an approximation that may err on the safe side.
75 /// In this implementation, every atomically accessed location in the program
76 /// may only change from (partially) uninitialized to fully initialized, but
77 /// not the other way around. We load the shadow _after_ the application load,
78 /// and we store the shadow _before_ the app store. Also, we always store clean
79 /// shadow (if the application store is atomic). This way, if the store-load
80 /// pair constitutes a happens-before arc, shadow store and load are correctly
81 /// ordered such that the load will get either the value that was stored, or
82 /// some later value (which is always clean).
83 ///
84 /// This does not work very well with Compare-And-Swap (CAS) and
85 /// Read-Modify-Write (RMW) operations. To follow the above logic, CAS and RMW
86 /// must store the new shadow before the app operation, and load the shadow
87 /// after the app operation. Computers don't work this way. Current
88 /// implementation ignores the load aspect of CAS/RMW, always returning a clean
89 /// value. It implements the store part as a simple atomic store by storing a
90 /// clean shadow.
91 ///
92 ///                      Instrumenting inline assembly.
93 ///
94 /// For inline assembly code LLVM has little idea about which memory locations
95 /// become initialized depending on the arguments. It can be possible to figure
96 /// out which arguments are meant to point to inputs and outputs, but the
97 /// actual semantics can be only visible at runtime. In the Linux kernel it's
98 /// also possible that the arguments only indicate the offset for a base taken
99 /// from a segment register, so it's dangerous to treat any asm() arguments as
100 /// pointers. We take a conservative approach generating calls to
101 ///   __msan_instrument_asm_store(ptr, size)
102 /// , which defer the memory unpoisoning to the runtime library.
103 /// The latter can perform more complex address checks to figure out whether
104 /// it's safe to touch the shadow memory.
105 /// Like with atomic operations, we call __msan_instrument_asm_store() before
106 /// the assembly call, so that changes to the shadow memory will be seen by
107 /// other threads together with main memory initialization.
108 ///
109 ///                  KernelMemorySanitizer (KMSAN) implementation.
110 ///
111 /// The major differences between KMSAN and MSan instrumentation are:
112 ///  - KMSAN always tracks the origins and implies msan-keep-going=true;
113 ///  - KMSAN allocates shadow and origin memory for each page separately, so
114 ///    there are no explicit accesses to shadow and origin in the
115 ///    instrumentation.
116 ///    Shadow and origin values for a particular X-byte memory location
117 ///    (X=1,2,4,8) are accessed through pointers obtained via the
118 ///      __msan_metadata_ptr_for_load_X(ptr)
119 ///      __msan_metadata_ptr_for_store_X(ptr)
120 ///    functions. The corresponding functions check that the X-byte accesses
121 ///    are possible and returns the pointers to shadow and origin memory.
122 ///    Arbitrary sized accesses are handled with:
123 ///      __msan_metadata_ptr_for_load_n(ptr, size)
124 ///      __msan_metadata_ptr_for_store_n(ptr, size);
125 ///    Note that the sanitizer code has to deal with how shadow/origin pairs
126 ///    returned by the these functions are represented in different ABIs. In
127 ///    the X86_64 ABI they are returned in RDX:RAX, and in the SystemZ ABI they
128 ///    are written to memory pointed to by a hidden parameter.
129 ///  - TLS variables are stored in a single per-task struct. A call to a
130 ///    function __msan_get_context_state() returning a pointer to that struct
131 ///    is inserted into every instrumented function before the entry block;
132 ///  - __msan_warning() takes a 32-bit origin parameter;
133 ///  - local variables are poisoned with __msan_poison_alloca() upon function
134 ///    entry and unpoisoned with __msan_unpoison_alloca() before leaving the
135 ///    function;
136 ///  - the pass doesn't declare any global variables or add global constructors
137 ///    to the translation unit.
138 ///
139 /// Also, KMSAN currently ignores uninitialized memory passed into inline asm
140 /// calls, making sure we're on the safe side wrt. possible false positives.
141 ///
142 ///  KernelMemorySanitizer only supports X86_64 and SystemZ at the moment.
143 ///
144 //
145 // FIXME: This sanitizer does not yet handle scalable vectors
146 //
147 //===----------------------------------------------------------------------===//
148 
149 #include "llvm/Transforms/Instrumentation/MemorySanitizer.h"
150 #include "llvm/ADT/APInt.h"
151 #include "llvm/ADT/ArrayRef.h"
152 #include "llvm/ADT/DenseMap.h"
153 #include "llvm/ADT/DepthFirstIterator.h"
154 #include "llvm/ADT/SetVector.h"
155 #include "llvm/ADT/SmallString.h"
156 #include "llvm/ADT/SmallVector.h"
157 #include "llvm/ADT/StringExtras.h"
158 #include "llvm/ADT/StringRef.h"
159 #include "llvm/Analysis/GlobalsModRef.h"
160 #include "llvm/Analysis/TargetLibraryInfo.h"
161 #include "llvm/Analysis/ValueTracking.h"
162 #include "llvm/IR/Argument.h"
163 #include "llvm/IR/AttributeMask.h"
164 #include "llvm/IR/Attributes.h"
165 #include "llvm/IR/BasicBlock.h"
166 #include "llvm/IR/CallingConv.h"
167 #include "llvm/IR/Constant.h"
168 #include "llvm/IR/Constants.h"
169 #include "llvm/IR/DataLayout.h"
170 #include "llvm/IR/DerivedTypes.h"
171 #include "llvm/IR/Function.h"
172 #include "llvm/IR/GlobalValue.h"
173 #include "llvm/IR/GlobalVariable.h"
174 #include "llvm/IR/IRBuilder.h"
175 #include "llvm/IR/InlineAsm.h"
176 #include "llvm/IR/InstVisitor.h"
177 #include "llvm/IR/InstrTypes.h"
178 #include "llvm/IR/Instruction.h"
179 #include "llvm/IR/Instructions.h"
180 #include "llvm/IR/IntrinsicInst.h"
181 #include "llvm/IR/Intrinsics.h"
182 #include "llvm/IR/IntrinsicsX86.h"
183 #include "llvm/IR/MDBuilder.h"
184 #include "llvm/IR/Module.h"
185 #include "llvm/IR/Type.h"
186 #include "llvm/IR/Value.h"
187 #include "llvm/IR/ValueMap.h"
188 #include "llvm/Support/Alignment.h"
189 #include "llvm/Support/AtomicOrdering.h"
190 #include "llvm/Support/Casting.h"
191 #include "llvm/Support/CommandLine.h"
192 #include "llvm/Support/Debug.h"
193 #include "llvm/Support/DebugCounter.h"
194 #include "llvm/Support/ErrorHandling.h"
195 #include "llvm/Support/MathExtras.h"
196 #include "llvm/Support/raw_ostream.h"
197 #include "llvm/TargetParser/Triple.h"
198 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
199 #include "llvm/Transforms/Utils/Local.h"
200 #include "llvm/Transforms/Utils/ModuleUtils.h"
201 #include <algorithm>
202 #include <cassert>
203 #include <cstddef>
204 #include <cstdint>
205 #include <memory>
206 #include <string>
207 #include <tuple>
208 
209 using namespace llvm;
210 
211 #define DEBUG_TYPE "msan"
212 
213 DEBUG_COUNTER(DebugInsertCheck, "msan-insert-check",
214               "Controls which checks to insert");
215 
216 static const unsigned kOriginSize = 4;
217 static const Align kMinOriginAlignment = Align(4);
218 static const Align kShadowTLSAlignment = Align(8);
219 
220 // These constants must be kept in sync with the ones in msan.h.
221 static const unsigned kParamTLSSize = 800;
222 static const unsigned kRetvalTLSSize = 800;
223 
224 // Accesses sizes are powers of two: 1, 2, 4, 8.
225 static const size_t kNumberOfAccessSizes = 4;
226 
227 /// Track origins of uninitialized values.
228 ///
229 /// Adds a section to MemorySanitizer report that points to the allocation
230 /// (stack or heap) the uninitialized bits came from originally.
231 static cl::opt<int> ClTrackOrigins(
232     "msan-track-origins",
233     cl::desc("Track origins (allocation sites) of poisoned memory"), cl::Hidden,
234     cl::init(0));
235 
236 static cl::opt<bool> ClKeepGoing("msan-keep-going",
237                                  cl::desc("keep going after reporting a UMR"),
238                                  cl::Hidden, cl::init(false));
239 
240 static cl::opt<bool>
241     ClPoisonStack("msan-poison-stack",
242                   cl::desc("poison uninitialized stack variables"), cl::Hidden,
243                   cl::init(true));
244 
245 static cl::opt<bool> ClPoisonStackWithCall(
246     "msan-poison-stack-with-call",
247     cl::desc("poison uninitialized stack variables with a call"), cl::Hidden,
248     cl::init(false));
249 
250 static cl::opt<int> ClPoisonStackPattern(
251     "msan-poison-stack-pattern",
252     cl::desc("poison uninitialized stack variables with the given pattern"),
253     cl::Hidden, cl::init(0xff));
254 
255 static cl::opt<bool>
256     ClPrintStackNames("msan-print-stack-names",
257                       cl::desc("Print name of local stack variable"),
258                       cl::Hidden, cl::init(true));
259 
260 static cl::opt<bool> ClPoisonUndef("msan-poison-undef",
261                                    cl::desc("poison undef temps"), cl::Hidden,
262                                    cl::init(true));
263 
264 static cl::opt<bool>
265     ClHandleICmp("msan-handle-icmp",
266                  cl::desc("propagate shadow through ICmpEQ and ICmpNE"),
267                  cl::Hidden, cl::init(true));
268 
269 static cl::opt<bool>
270     ClHandleICmpExact("msan-handle-icmp-exact",
271                       cl::desc("exact handling of relational integer ICmp"),
272                       cl::Hidden, cl::init(false));
273 
274 static cl::opt<bool> ClHandleLifetimeIntrinsics(
275     "msan-handle-lifetime-intrinsics",
276     cl::desc(
277         "when possible, poison scoped variables at the beginning of the scope "
278         "(slower, but more precise)"),
279     cl::Hidden, cl::init(true));
280 
281 // When compiling the Linux kernel, we sometimes see false positives related to
282 // MSan being unable to understand that inline assembly calls may initialize
283 // local variables.
284 // This flag makes the compiler conservatively unpoison every memory location
285 // passed into an assembly call. Note that this may cause false positives.
286 // Because it's impossible to figure out the array sizes, we can only unpoison
287 // the first sizeof(type) bytes for each type* pointer.
288 // The instrumentation is only enabled in KMSAN builds, and only if
289 // -msan-handle-asm-conservative is on. This is done because we may want to
290 // quickly disable assembly instrumentation when it breaks.
291 static cl::opt<bool> ClHandleAsmConservative(
292     "msan-handle-asm-conservative",
293     cl::desc("conservative handling of inline assembly"), cl::Hidden,
294     cl::init(true));
295 
296 // This flag controls whether we check the shadow of the address
297 // operand of load or store. Such bugs are very rare, since load from
298 // a garbage address typically results in SEGV, but still happen
299 // (e.g. only lower bits of address are garbage, or the access happens
300 // early at program startup where malloc-ed memory is more likely to
301 // be zeroed. As of 2012-08-28 this flag adds 20% slowdown.
302 static cl::opt<bool> ClCheckAccessAddress(
303     "msan-check-access-address",
304     cl::desc("report accesses through a pointer which has poisoned shadow"),
305     cl::Hidden, cl::init(true));
306 
307 static cl::opt<bool> ClEagerChecks(
308     "msan-eager-checks",
309     cl::desc("check arguments and return values at function call boundaries"),
310     cl::Hidden, cl::init(false));
311 
312 static cl::opt<bool> ClDumpStrictInstructions(
313     "msan-dump-strict-instructions",
314     cl::desc("print out instructions with default strict semantics"),
315     cl::Hidden, cl::init(false));
316 
317 static cl::opt<int> ClInstrumentationWithCallThreshold(
318     "msan-instrumentation-with-call-threshold",
319     cl::desc(
320         "If the function being instrumented requires more than "
321         "this number of checks and origin stores, use callbacks instead of "
322         "inline checks (-1 means never use callbacks)."),
323     cl::Hidden, cl::init(3500));
324 
325 static cl::opt<bool>
326     ClEnableKmsan("msan-kernel",
327                   cl::desc("Enable KernelMemorySanitizer instrumentation"),
328                   cl::Hidden, cl::init(false));
329 
330 static cl::opt<bool>
331     ClDisableChecks("msan-disable-checks",
332                     cl::desc("Apply no_sanitize to the whole file"), cl::Hidden,
333                     cl::init(false));
334 
335 static cl::opt<bool>
336     ClCheckConstantShadow("msan-check-constant-shadow",
337                           cl::desc("Insert checks for constant shadow values"),
338                           cl::Hidden, cl::init(true));
339 
340 // This is off by default because of a bug in gold:
341 // https://sourceware.org/bugzilla/show_bug.cgi?id=19002
342 static cl::opt<bool>
343     ClWithComdat("msan-with-comdat",
344                  cl::desc("Place MSan constructors in comdat sections"),
345                  cl::Hidden, cl::init(false));
346 
347 // These options allow to specify custom memory map parameters
348 // See MemoryMapParams for details.
349 static cl::opt<uint64_t> ClAndMask("msan-and-mask",
350                                    cl::desc("Define custom MSan AndMask"),
351                                    cl::Hidden, cl::init(0));
352 
353 static cl::opt<uint64_t> ClXorMask("msan-xor-mask",
354                                    cl::desc("Define custom MSan XorMask"),
355                                    cl::Hidden, cl::init(0));
356 
357 static cl::opt<uint64_t> ClShadowBase("msan-shadow-base",
358                                       cl::desc("Define custom MSan ShadowBase"),
359                                       cl::Hidden, cl::init(0));
360 
361 static cl::opt<uint64_t> ClOriginBase("msan-origin-base",
362                                       cl::desc("Define custom MSan OriginBase"),
363                                       cl::Hidden, cl::init(0));
364 
365 static cl::opt<int>
366     ClDisambiguateWarning("msan-disambiguate-warning-threshold",
367                           cl::desc("Define threshold for number of checks per "
368                                    "debug location to force origin update."),
369                           cl::Hidden, cl::init(3));
370 
371 const char kMsanModuleCtorName[] = "msan.module_ctor";
372 const char kMsanInitName[] = "__msan_init";
373 
374 namespace {
375 
376 // Memory map parameters used in application-to-shadow address calculation.
377 // Offset = (Addr & ~AndMask) ^ XorMask
378 // Shadow = ShadowBase + Offset
379 // Origin = OriginBase + Offset
380 struct MemoryMapParams {
381   uint64_t AndMask;
382   uint64_t XorMask;
383   uint64_t ShadowBase;
384   uint64_t OriginBase;
385 };
386 
387 struct PlatformMemoryMapParams {
388   const MemoryMapParams *bits32;
389   const MemoryMapParams *bits64;
390 };
391 
392 } // end anonymous namespace
393 
394 // i386 Linux
395 static const MemoryMapParams Linux_I386_MemoryMapParams = {
396     0x000080000000, // AndMask
397     0,              // XorMask (not used)
398     0,              // ShadowBase (not used)
399     0x000040000000, // OriginBase
400 };
401 
402 // x86_64 Linux
403 static const MemoryMapParams Linux_X86_64_MemoryMapParams = {
404     0,              // AndMask (not used)
405     0x500000000000, // XorMask
406     0,              // ShadowBase (not used)
407     0x100000000000, // OriginBase
408 };
409 
410 // mips64 Linux
411 static const MemoryMapParams Linux_MIPS64_MemoryMapParams = {
412     0,              // AndMask (not used)
413     0x008000000000, // XorMask
414     0,              // ShadowBase (not used)
415     0x002000000000, // OriginBase
416 };
417 
418 // ppc64 Linux
419 static const MemoryMapParams Linux_PowerPC64_MemoryMapParams = {
420     0xE00000000000, // AndMask
421     0x100000000000, // XorMask
422     0x080000000000, // ShadowBase
423     0x1C0000000000, // OriginBase
424 };
425 
426 // s390x Linux
427 static const MemoryMapParams Linux_S390X_MemoryMapParams = {
428     0xC00000000000, // AndMask
429     0,              // XorMask (not used)
430     0x080000000000, // ShadowBase
431     0x1C0000000000, // OriginBase
432 };
433 
434 // aarch64 Linux
435 static const MemoryMapParams Linux_AArch64_MemoryMapParams = {
436     0,               // AndMask (not used)
437     0x0B00000000000, // XorMask
438     0,               // ShadowBase (not used)
439     0x0200000000000, // OriginBase
440 };
441 
442 // loongarch64 Linux
443 static const MemoryMapParams Linux_LoongArch64_MemoryMapParams = {
444     0,              // AndMask (not used)
445     0x500000000000, // XorMask
446     0,              // ShadowBase (not used)
447     0x100000000000, // OriginBase
448 };
449 
450 // aarch64 FreeBSD
451 static const MemoryMapParams FreeBSD_AArch64_MemoryMapParams = {
452     0x1800000000000, // AndMask
453     0x0400000000000, // XorMask
454     0x0200000000000, // ShadowBase
455     0x0700000000000, // OriginBase
456 };
457 
458 // i386 FreeBSD
459 static const MemoryMapParams FreeBSD_I386_MemoryMapParams = {
460     0x000180000000, // AndMask
461     0x000040000000, // XorMask
462     0x000020000000, // ShadowBase
463     0x000700000000, // OriginBase
464 };
465 
466 // x86_64 FreeBSD
467 static const MemoryMapParams FreeBSD_X86_64_MemoryMapParams = {
468     0xc00000000000, // AndMask
469     0x200000000000, // XorMask
470     0x100000000000, // ShadowBase
471     0x380000000000, // OriginBase
472 };
473 
474 // x86_64 NetBSD
475 static const MemoryMapParams NetBSD_X86_64_MemoryMapParams = {
476     0,              // AndMask
477     0x500000000000, // XorMask
478     0,              // ShadowBase
479     0x100000000000, // OriginBase
480 };
481 
482 static const PlatformMemoryMapParams Linux_X86_MemoryMapParams = {
483     &Linux_I386_MemoryMapParams,
484     &Linux_X86_64_MemoryMapParams,
485 };
486 
487 static const PlatformMemoryMapParams Linux_MIPS_MemoryMapParams = {
488     nullptr,
489     &Linux_MIPS64_MemoryMapParams,
490 };
491 
492 static const PlatformMemoryMapParams Linux_PowerPC_MemoryMapParams = {
493     nullptr,
494     &Linux_PowerPC64_MemoryMapParams,
495 };
496 
497 static const PlatformMemoryMapParams Linux_S390_MemoryMapParams = {
498     nullptr,
499     &Linux_S390X_MemoryMapParams,
500 };
501 
502 static const PlatformMemoryMapParams Linux_ARM_MemoryMapParams = {
503     nullptr,
504     &Linux_AArch64_MemoryMapParams,
505 };
506 
507 static const PlatformMemoryMapParams Linux_LoongArch_MemoryMapParams = {
508     nullptr,
509     &Linux_LoongArch64_MemoryMapParams,
510 };
511 
512 static const PlatformMemoryMapParams FreeBSD_ARM_MemoryMapParams = {
513     nullptr,
514     &FreeBSD_AArch64_MemoryMapParams,
515 };
516 
517 static const PlatformMemoryMapParams FreeBSD_X86_MemoryMapParams = {
518     &FreeBSD_I386_MemoryMapParams,
519     &FreeBSD_X86_64_MemoryMapParams,
520 };
521 
522 static const PlatformMemoryMapParams NetBSD_X86_MemoryMapParams = {
523     nullptr,
524     &NetBSD_X86_64_MemoryMapParams,
525 };
526 
527 namespace {
528 
529 /// Instrument functions of a module to detect uninitialized reads.
530 ///
531 /// Instantiating MemorySanitizer inserts the msan runtime library API function
532 /// declarations into the module if they don't exist already. Instantiating
533 /// ensures the __msan_init function is in the list of global constructors for
534 /// the module.
535 class MemorySanitizer {
536 public:
537   MemorySanitizer(Module &M, MemorySanitizerOptions Options)
538       : CompileKernel(Options.Kernel), TrackOrigins(Options.TrackOrigins),
539         Recover(Options.Recover), EagerChecks(Options.EagerChecks) {
540     initializeModule(M);
541   }
542 
543   // MSan cannot be moved or copied because of MapParams.
544   MemorySanitizer(MemorySanitizer &&) = delete;
545   MemorySanitizer &operator=(MemorySanitizer &&) = delete;
546   MemorySanitizer(const MemorySanitizer &) = delete;
547   MemorySanitizer &operator=(const MemorySanitizer &) = delete;
548 
549   bool sanitizeFunction(Function &F, TargetLibraryInfo &TLI);
550 
551 private:
552   friend struct MemorySanitizerVisitor;
553   friend struct VarArgAMD64Helper;
554   friend struct VarArgMIPS64Helper;
555   friend struct VarArgAArch64Helper;
556   friend struct VarArgPowerPC64Helper;
557   friend struct VarArgSystemZHelper;
558 
559   void initializeModule(Module &M);
560   void initializeCallbacks(Module &M, const TargetLibraryInfo &TLI);
561   void createKernelApi(Module &M, const TargetLibraryInfo &TLI);
562   void createUserspaceApi(Module &M, const TargetLibraryInfo &TLI);
563 
564   template <typename... ArgsTy>
565   FunctionCallee getOrInsertMsanMetadataFunction(Module &M, StringRef Name,
566                                                  ArgsTy... Args);
567 
568   /// True if we're compiling the Linux kernel.
569   bool CompileKernel;
570   /// Track origins (allocation points) of uninitialized values.
571   int TrackOrigins;
572   bool Recover;
573   bool EagerChecks;
574 
575   Triple TargetTriple;
576   LLVMContext *C;
577   Type *IntptrTy;
578   Type *OriginTy;
579 
580   // XxxTLS variables represent the per-thread state in MSan and per-task state
581   // in KMSAN.
582   // For the userspace these point to thread-local globals. In the kernel land
583   // they point to the members of a per-task struct obtained via a call to
584   // __msan_get_context_state().
585 
586   /// Thread-local shadow storage for function parameters.
587   Value *ParamTLS;
588 
589   /// Thread-local origin storage for function parameters.
590   Value *ParamOriginTLS;
591 
592   /// Thread-local shadow storage for function return value.
593   Value *RetvalTLS;
594 
595   /// Thread-local origin storage for function return value.
596   Value *RetvalOriginTLS;
597 
598   /// Thread-local shadow storage for in-register va_arg function
599   /// parameters (x86_64-specific).
600   Value *VAArgTLS;
601 
602   /// Thread-local shadow storage for in-register va_arg function
603   /// parameters (x86_64-specific).
604   Value *VAArgOriginTLS;
605 
606   /// Thread-local shadow storage for va_arg overflow area
607   /// (x86_64-specific).
608   Value *VAArgOverflowSizeTLS;
609 
610   /// Are the instrumentation callbacks set up?
611   bool CallbacksInitialized = false;
612 
613   /// The run-time callback to print a warning.
614   FunctionCallee WarningFn;
615 
616   // These arrays are indexed by log2(AccessSize).
617   FunctionCallee MaybeWarningFn[kNumberOfAccessSizes];
618   FunctionCallee MaybeStoreOriginFn[kNumberOfAccessSizes];
619 
620   /// Run-time helper that generates a new origin value for a stack
621   /// allocation.
622   FunctionCallee MsanSetAllocaOriginWithDescriptionFn;
623   // No description version
624   FunctionCallee MsanSetAllocaOriginNoDescriptionFn;
625 
626   /// Run-time helper that poisons stack on function entry.
627   FunctionCallee MsanPoisonStackFn;
628 
629   /// Run-time helper that records a store (or any event) of an
630   /// uninitialized value and returns an updated origin id encoding this info.
631   FunctionCallee MsanChainOriginFn;
632 
633   /// Run-time helper that paints an origin over a region.
634   FunctionCallee MsanSetOriginFn;
635 
636   /// MSan runtime replacements for memmove, memcpy and memset.
637   FunctionCallee MemmoveFn, MemcpyFn, MemsetFn;
638 
639   /// KMSAN callback for task-local function argument shadow.
640   StructType *MsanContextStateTy;
641   FunctionCallee MsanGetContextStateFn;
642 
643   /// Functions for poisoning/unpoisoning local variables
644   FunctionCallee MsanPoisonAllocaFn, MsanUnpoisonAllocaFn;
645 
646   /// Pair of shadow/origin pointers.
647   Type *MsanMetadata;
648 
649   /// Each of the MsanMetadataPtrXxx functions returns a MsanMetadata.
650   FunctionCallee MsanMetadataPtrForLoadN, MsanMetadataPtrForStoreN;
651   FunctionCallee MsanMetadataPtrForLoad_1_8[4];
652   FunctionCallee MsanMetadataPtrForStore_1_8[4];
653   FunctionCallee MsanInstrumentAsmStoreFn;
654 
655   /// Storage for return values of the MsanMetadataPtrXxx functions.
656   Value *MsanMetadataAlloca;
657 
658   /// Helper to choose between different MsanMetadataPtrXxx().
659   FunctionCallee getKmsanShadowOriginAccessFn(bool isStore, int size);
660 
661   /// Memory map parameters used in application-to-shadow calculation.
662   const MemoryMapParams *MapParams;
663 
664   /// Custom memory map parameters used when -msan-shadow-base or
665   // -msan-origin-base is provided.
666   MemoryMapParams CustomMapParams;
667 
668   MDNode *ColdCallWeights;
669 
670   /// Branch weights for origin store.
671   MDNode *OriginStoreWeights;
672 };
673 
674 void insertModuleCtor(Module &M) {
675   getOrCreateSanitizerCtorAndInitFunctions(
676       M, kMsanModuleCtorName, kMsanInitName,
677       /*InitArgTypes=*/{},
678       /*InitArgs=*/{},
679       // This callback is invoked when the functions are created the first
680       // time. Hook them into the global ctors list in that case:
681       [&](Function *Ctor, FunctionCallee) {
682         if (!ClWithComdat) {
683           appendToGlobalCtors(M, Ctor, 0);
684           return;
685         }
686         Comdat *MsanCtorComdat = M.getOrInsertComdat(kMsanModuleCtorName);
687         Ctor->setComdat(MsanCtorComdat);
688         appendToGlobalCtors(M, Ctor, 0, Ctor);
689       });
690 }
691 
692 template <class T> T getOptOrDefault(const cl::opt<T> &Opt, T Default) {
693   return (Opt.getNumOccurrences() > 0) ? Opt : Default;
694 }
695 
696 } // end anonymous namespace
697 
698 MemorySanitizerOptions::MemorySanitizerOptions(int TO, bool R, bool K,
699                                                bool EagerChecks)
700     : Kernel(getOptOrDefault(ClEnableKmsan, K)),
701       TrackOrigins(getOptOrDefault(ClTrackOrigins, Kernel ? 2 : TO)),
702       Recover(getOptOrDefault(ClKeepGoing, Kernel || R)),
703       EagerChecks(getOptOrDefault(ClEagerChecks, EagerChecks)) {}
704 
705 PreservedAnalyses MemorySanitizerPass::run(Module &M,
706                                            ModuleAnalysisManager &AM) {
707   bool Modified = false;
708   if (!Options.Kernel) {
709     insertModuleCtor(M);
710     Modified = true;
711   }
712 
713   auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
714   for (Function &F : M) {
715     if (F.empty())
716       continue;
717     MemorySanitizer Msan(*F.getParent(), Options);
718     Modified |=
719         Msan.sanitizeFunction(F, FAM.getResult<TargetLibraryAnalysis>(F));
720   }
721 
722   if (!Modified)
723     return PreservedAnalyses::all();
724 
725   PreservedAnalyses PA = PreservedAnalyses::none();
726   // GlobalsAA is considered stateless and does not get invalidated unless
727   // explicitly invalidated; PreservedAnalyses::none() is not enough. Sanitizers
728   // make changes that require GlobalsAA to be invalidated.
729   PA.abandon<GlobalsAA>();
730   return PA;
731 }
732 
733 void MemorySanitizerPass::printPipeline(
734     raw_ostream &OS, function_ref<StringRef(StringRef)> MapClassName2PassName) {
735   static_cast<PassInfoMixin<MemorySanitizerPass> *>(this)->printPipeline(
736       OS, MapClassName2PassName);
737   OS << '<';
738   if (Options.Recover)
739     OS << "recover;";
740   if (Options.Kernel)
741     OS << "kernel;";
742   if (Options.EagerChecks)
743     OS << "eager-checks;";
744   OS << "track-origins=" << Options.TrackOrigins;
745   OS << '>';
746 }
747 
748 /// Create a non-const global initialized with the given string.
749 ///
750 /// Creates a writable global for Str so that we can pass it to the
751 /// run-time lib. Runtime uses first 4 bytes of the string to store the
752 /// frame ID, so the string needs to be mutable.
753 static GlobalVariable *createPrivateConstGlobalForString(Module &M,
754                                                          StringRef Str) {
755   Constant *StrConst = ConstantDataArray::getString(M.getContext(), Str);
756   return new GlobalVariable(M, StrConst->getType(), /*isConstant=*/true,
757                             GlobalValue::PrivateLinkage, StrConst, "");
758 }
759 
760 template <typename... ArgsTy>
761 FunctionCallee
762 MemorySanitizer::getOrInsertMsanMetadataFunction(Module &M, StringRef Name,
763                                                  ArgsTy... Args) {
764   if (TargetTriple.getArch() == Triple::systemz) {
765     // SystemZ ABI: shadow/origin pair is returned via a hidden parameter.
766     return M.getOrInsertFunction(Name, Type::getVoidTy(*C),
767                                  PointerType::get(MsanMetadata, 0),
768                                  std::forward<ArgsTy>(Args)...);
769   }
770 
771   return M.getOrInsertFunction(Name, MsanMetadata,
772                                std::forward<ArgsTy>(Args)...);
773 }
774 
775 /// Create KMSAN API callbacks.
776 void MemorySanitizer::createKernelApi(Module &M, const TargetLibraryInfo &TLI) {
777   IRBuilder<> IRB(*C);
778 
779   // These will be initialized in insertKmsanPrologue().
780   RetvalTLS = nullptr;
781   RetvalOriginTLS = nullptr;
782   ParamTLS = nullptr;
783   ParamOriginTLS = nullptr;
784   VAArgTLS = nullptr;
785   VAArgOriginTLS = nullptr;
786   VAArgOverflowSizeTLS = nullptr;
787 
788   WarningFn = M.getOrInsertFunction("__msan_warning",
789                                     TLI.getAttrList(C, {0}, /*Signed=*/false),
790                                     IRB.getVoidTy(), IRB.getInt32Ty());
791 
792   // Requests the per-task context state (kmsan_context_state*) from the
793   // runtime library.
794   MsanContextStateTy = StructType::get(
795       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
796       ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8),
797       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8),
798       ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8), /* va_arg_origin */
799       IRB.getInt64Ty(), ArrayType::get(OriginTy, kParamTLSSize / 4), OriginTy,
800       OriginTy);
801   MsanGetContextStateFn = M.getOrInsertFunction(
802       "__msan_get_context_state", PointerType::get(MsanContextStateTy, 0));
803 
804   MsanMetadata = StructType::get(PointerType::get(IRB.getInt8Ty(), 0),
805                                  PointerType::get(IRB.getInt32Ty(), 0));
806 
807   for (int ind = 0, size = 1; ind < 4; ind++, size <<= 1) {
808     std::string name_load =
809         "__msan_metadata_ptr_for_load_" + std::to_string(size);
810     std::string name_store =
811         "__msan_metadata_ptr_for_store_" + std::to_string(size);
812     MsanMetadataPtrForLoad_1_8[ind] = getOrInsertMsanMetadataFunction(
813         M, name_load, PointerType::get(IRB.getInt8Ty(), 0));
814     MsanMetadataPtrForStore_1_8[ind] = getOrInsertMsanMetadataFunction(
815         M, name_store, PointerType::get(IRB.getInt8Ty(), 0));
816   }
817 
818   MsanMetadataPtrForLoadN = getOrInsertMsanMetadataFunction(
819       M, "__msan_metadata_ptr_for_load_n", PointerType::get(IRB.getInt8Ty(), 0),
820       IRB.getInt64Ty());
821   MsanMetadataPtrForStoreN = getOrInsertMsanMetadataFunction(
822       M, "__msan_metadata_ptr_for_store_n",
823       PointerType::get(IRB.getInt8Ty(), 0), IRB.getInt64Ty());
824 
825   // Functions for poisoning and unpoisoning memory.
826   MsanPoisonAllocaFn =
827       M.getOrInsertFunction("__msan_poison_alloca", IRB.getVoidTy(),
828                             IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy());
829   MsanUnpoisonAllocaFn = M.getOrInsertFunction(
830       "__msan_unpoison_alloca", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
831 }
832 
833 static Constant *getOrInsertGlobal(Module &M, StringRef Name, Type *Ty) {
834   return M.getOrInsertGlobal(Name, Ty, [&] {
835     return new GlobalVariable(M, Ty, false, GlobalVariable::ExternalLinkage,
836                               nullptr, Name, nullptr,
837                               GlobalVariable::InitialExecTLSModel);
838   });
839 }
840 
841 /// Insert declarations for userspace-specific functions and globals.
842 void MemorySanitizer::createUserspaceApi(Module &M, const TargetLibraryInfo &TLI) {
843   IRBuilder<> IRB(*C);
844 
845   // Create the callback.
846   // FIXME: this function should have "Cold" calling conv,
847   // which is not yet implemented.
848   if (TrackOrigins) {
849     StringRef WarningFnName = Recover ? "__msan_warning_with_origin"
850                                       : "__msan_warning_with_origin_noreturn";
851     WarningFn = M.getOrInsertFunction(WarningFnName,
852                                       TLI.getAttrList(C, {0}, /*Signed=*/false),
853                                       IRB.getVoidTy(), IRB.getInt32Ty());
854   } else {
855     StringRef WarningFnName =
856         Recover ? "__msan_warning" : "__msan_warning_noreturn";
857     WarningFn = M.getOrInsertFunction(WarningFnName, IRB.getVoidTy());
858   }
859 
860   // Create the global TLS variables.
861   RetvalTLS =
862       getOrInsertGlobal(M, "__msan_retval_tls",
863                         ArrayType::get(IRB.getInt64Ty(), kRetvalTLSSize / 8));
864 
865   RetvalOriginTLS = getOrInsertGlobal(M, "__msan_retval_origin_tls", OriginTy);
866 
867   ParamTLS =
868       getOrInsertGlobal(M, "__msan_param_tls",
869                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
870 
871   ParamOriginTLS =
872       getOrInsertGlobal(M, "__msan_param_origin_tls",
873                         ArrayType::get(OriginTy, kParamTLSSize / 4));
874 
875   VAArgTLS =
876       getOrInsertGlobal(M, "__msan_va_arg_tls",
877                         ArrayType::get(IRB.getInt64Ty(), kParamTLSSize / 8));
878 
879   VAArgOriginTLS =
880       getOrInsertGlobal(M, "__msan_va_arg_origin_tls",
881                         ArrayType::get(OriginTy, kParamTLSSize / 4));
882 
883   VAArgOverflowSizeTLS =
884       getOrInsertGlobal(M, "__msan_va_arg_overflow_size_tls", IRB.getInt64Ty());
885 
886   for (size_t AccessSizeIndex = 0; AccessSizeIndex < kNumberOfAccessSizes;
887        AccessSizeIndex++) {
888     unsigned AccessSize = 1 << AccessSizeIndex;
889     std::string FunctionName = "__msan_maybe_warning_" + itostr(AccessSize);
890     MaybeWarningFn[AccessSizeIndex] = M.getOrInsertFunction(
891         FunctionName, TLI.getAttrList(C, {0, 1}, /*Signed=*/false),
892         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt32Ty());
893 
894     FunctionName = "__msan_maybe_store_origin_" + itostr(AccessSize);
895     MaybeStoreOriginFn[AccessSizeIndex] = M.getOrInsertFunction(
896         FunctionName, TLI.getAttrList(C, {0, 2}, /*Signed=*/false),
897         IRB.getVoidTy(), IRB.getIntNTy(AccessSize * 8), IRB.getInt8PtrTy(),
898         IRB.getInt32Ty());
899   }
900 
901   MsanSetAllocaOriginWithDescriptionFn = M.getOrInsertFunction(
902       "__msan_set_alloca_origin_with_descr", IRB.getVoidTy(),
903       IRB.getInt8PtrTy(), IntptrTy, IRB.getInt8PtrTy(), IRB.getInt8PtrTy());
904   MsanSetAllocaOriginNoDescriptionFn = M.getOrInsertFunction(
905       "__msan_set_alloca_origin_no_descr", IRB.getVoidTy(), IRB.getInt8PtrTy(),
906       IntptrTy, IRB.getInt8PtrTy());
907   MsanPoisonStackFn = M.getOrInsertFunction(
908       "__msan_poison_stack", IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy);
909 }
910 
911 /// Insert extern declaration of runtime-provided functions and globals.
912 void MemorySanitizer::initializeCallbacks(Module &M, const TargetLibraryInfo &TLI) {
913   // Only do this once.
914   if (CallbacksInitialized)
915     return;
916 
917   IRBuilder<> IRB(*C);
918   // Initialize callbacks that are common for kernel and userspace
919   // instrumentation.
920   MsanChainOriginFn = M.getOrInsertFunction(
921       "__msan_chain_origin",
922       TLI.getAttrList(C, {0}, /*Signed=*/false, /*Ret=*/true), IRB.getInt32Ty(),
923       IRB.getInt32Ty());
924   MsanSetOriginFn = M.getOrInsertFunction(
925       "__msan_set_origin", TLI.getAttrList(C, {2}, /*Signed=*/false),
926       IRB.getVoidTy(), IRB.getInt8PtrTy(), IntptrTy, IRB.getInt32Ty());
927   MemmoveFn =
928       M.getOrInsertFunction("__msan_memmove", IRB.getInt8PtrTy(),
929                             IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
930   MemcpyFn =
931       M.getOrInsertFunction("__msan_memcpy", IRB.getInt8PtrTy(),
932                             IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IntptrTy);
933   MemsetFn = M.getOrInsertFunction(
934       "__msan_memset", TLI.getAttrList(C, {1}, /*Signed=*/true),
935       IRB.getInt8PtrTy(), IRB.getInt8PtrTy(), IRB.getInt32Ty(), IntptrTy);
936 
937   MsanInstrumentAsmStoreFn =
938       M.getOrInsertFunction("__msan_instrument_asm_store", IRB.getVoidTy(),
939                             PointerType::get(IRB.getInt8Ty(), 0), IntptrTy);
940 
941   if (CompileKernel) {
942     createKernelApi(M, TLI);
943   } else {
944     createUserspaceApi(M, TLI);
945   }
946   CallbacksInitialized = true;
947 }
948 
949 FunctionCallee MemorySanitizer::getKmsanShadowOriginAccessFn(bool isStore,
950                                                              int size) {
951   FunctionCallee *Fns =
952       isStore ? MsanMetadataPtrForStore_1_8 : MsanMetadataPtrForLoad_1_8;
953   switch (size) {
954   case 1:
955     return Fns[0];
956   case 2:
957     return Fns[1];
958   case 4:
959     return Fns[2];
960   case 8:
961     return Fns[3];
962   default:
963     return nullptr;
964   }
965 }
966 
967 /// Module-level initialization.
968 ///
969 /// inserts a call to __msan_init to the module's constructor list.
970 void MemorySanitizer::initializeModule(Module &M) {
971   auto &DL = M.getDataLayout();
972 
973   TargetTriple = Triple(M.getTargetTriple());
974 
975   bool ShadowPassed = ClShadowBase.getNumOccurrences() > 0;
976   bool OriginPassed = ClOriginBase.getNumOccurrences() > 0;
977   // Check the overrides first
978   if (ShadowPassed || OriginPassed) {
979     CustomMapParams.AndMask = ClAndMask;
980     CustomMapParams.XorMask = ClXorMask;
981     CustomMapParams.ShadowBase = ClShadowBase;
982     CustomMapParams.OriginBase = ClOriginBase;
983     MapParams = &CustomMapParams;
984   } else {
985     switch (TargetTriple.getOS()) {
986     case Triple::FreeBSD:
987       switch (TargetTriple.getArch()) {
988       case Triple::aarch64:
989         MapParams = FreeBSD_ARM_MemoryMapParams.bits64;
990         break;
991       case Triple::x86_64:
992         MapParams = FreeBSD_X86_MemoryMapParams.bits64;
993         break;
994       case Triple::x86:
995         MapParams = FreeBSD_X86_MemoryMapParams.bits32;
996         break;
997       default:
998         report_fatal_error("unsupported architecture");
999       }
1000       break;
1001     case Triple::NetBSD:
1002       switch (TargetTriple.getArch()) {
1003       case Triple::x86_64:
1004         MapParams = NetBSD_X86_MemoryMapParams.bits64;
1005         break;
1006       default:
1007         report_fatal_error("unsupported architecture");
1008       }
1009       break;
1010     case Triple::Linux:
1011       switch (TargetTriple.getArch()) {
1012       case Triple::x86_64:
1013         MapParams = Linux_X86_MemoryMapParams.bits64;
1014         break;
1015       case Triple::x86:
1016         MapParams = Linux_X86_MemoryMapParams.bits32;
1017         break;
1018       case Triple::mips64:
1019       case Triple::mips64el:
1020         MapParams = Linux_MIPS_MemoryMapParams.bits64;
1021         break;
1022       case Triple::ppc64:
1023       case Triple::ppc64le:
1024         MapParams = Linux_PowerPC_MemoryMapParams.bits64;
1025         break;
1026       case Triple::systemz:
1027         MapParams = Linux_S390_MemoryMapParams.bits64;
1028         break;
1029       case Triple::aarch64:
1030       case Triple::aarch64_be:
1031         MapParams = Linux_ARM_MemoryMapParams.bits64;
1032         break;
1033       case Triple::loongarch64:
1034         MapParams = Linux_LoongArch_MemoryMapParams.bits64;
1035         break;
1036       default:
1037         report_fatal_error("unsupported architecture");
1038       }
1039       break;
1040     default:
1041       report_fatal_error("unsupported operating system");
1042     }
1043   }
1044 
1045   C = &(M.getContext());
1046   IRBuilder<> IRB(*C);
1047   IntptrTy = IRB.getIntPtrTy(DL);
1048   OriginTy = IRB.getInt32Ty();
1049 
1050   ColdCallWeights = MDBuilder(*C).createBranchWeights(1, 1000);
1051   OriginStoreWeights = MDBuilder(*C).createBranchWeights(1, 1000);
1052 
1053   if (!CompileKernel) {
1054     if (TrackOrigins)
1055       M.getOrInsertGlobal("__msan_track_origins", IRB.getInt32Ty(), [&] {
1056         return new GlobalVariable(
1057             M, IRB.getInt32Ty(), true, GlobalValue::WeakODRLinkage,
1058             IRB.getInt32(TrackOrigins), "__msan_track_origins");
1059       });
1060 
1061     if (Recover)
1062       M.getOrInsertGlobal("__msan_keep_going", IRB.getInt32Ty(), [&] {
1063         return new GlobalVariable(M, IRB.getInt32Ty(), true,
1064                                   GlobalValue::WeakODRLinkage,
1065                                   IRB.getInt32(Recover), "__msan_keep_going");
1066       });
1067   }
1068 }
1069 
1070 namespace {
1071 
1072 /// A helper class that handles instrumentation of VarArg
1073 /// functions on a particular platform.
1074 ///
1075 /// Implementations are expected to insert the instrumentation
1076 /// necessary to propagate argument shadow through VarArg function
1077 /// calls. Visit* methods are called during an InstVisitor pass over
1078 /// the function, and should avoid creating new basic blocks. A new
1079 /// instance of this class is created for each instrumented function.
1080 struct VarArgHelper {
1081   virtual ~VarArgHelper() = default;
1082 
1083   /// Visit a CallBase.
1084   virtual void visitCallBase(CallBase &CB, IRBuilder<> &IRB) = 0;
1085 
1086   /// Visit a va_start call.
1087   virtual void visitVAStartInst(VAStartInst &I) = 0;
1088 
1089   /// Visit a va_copy call.
1090   virtual void visitVACopyInst(VACopyInst &I) = 0;
1091 
1092   /// Finalize function instrumentation.
1093   ///
1094   /// This method is called after visiting all interesting (see above)
1095   /// instructions in a function.
1096   virtual void finalizeInstrumentation() = 0;
1097 };
1098 
1099 struct MemorySanitizerVisitor;
1100 
1101 } // end anonymous namespace
1102 
1103 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
1104                                         MemorySanitizerVisitor &Visitor);
1105 
1106 static unsigned TypeSizeToSizeIndex(TypeSize TS) {
1107   if (TS.isScalable())
1108     // Scalable types unconditionally take slowpaths.
1109     return kNumberOfAccessSizes;
1110   unsigned TypeSizeFixed = TS.getFixedValue();
1111   if (TypeSizeFixed <= 8)
1112     return 0;
1113   return Log2_32_Ceil((TypeSizeFixed + 7) / 8);
1114 }
1115 
1116 namespace {
1117 
1118 /// Helper class to attach debug information of the given instruction onto new
1119 /// instructions inserted after.
1120 class NextNodeIRBuilder : public IRBuilder<> {
1121 public:
1122   explicit NextNodeIRBuilder(Instruction *IP) : IRBuilder<>(IP->getNextNode()) {
1123     SetCurrentDebugLocation(IP->getDebugLoc());
1124   }
1125 };
1126 
1127 /// This class does all the work for a given function. Store and Load
1128 /// instructions store and load corresponding shadow and origin
1129 /// values. Most instructions propagate shadow from arguments to their
1130 /// return values. Certain instructions (most importantly, BranchInst)
1131 /// test their argument shadow and print reports (with a runtime call) if it's
1132 /// non-zero.
1133 struct MemorySanitizerVisitor : public InstVisitor<MemorySanitizerVisitor> {
1134   Function &F;
1135   MemorySanitizer &MS;
1136   SmallVector<PHINode *, 16> ShadowPHINodes, OriginPHINodes;
1137   ValueMap<Value *, Value *> ShadowMap, OriginMap;
1138   std::unique_ptr<VarArgHelper> VAHelper;
1139   const TargetLibraryInfo *TLI;
1140   Instruction *FnPrologueEnd;
1141 
1142   // The following flags disable parts of MSan instrumentation based on
1143   // exclusion list contents and command-line options.
1144   bool InsertChecks;
1145   bool PropagateShadow;
1146   bool PoisonStack;
1147   bool PoisonUndef;
1148 
1149   struct ShadowOriginAndInsertPoint {
1150     Value *Shadow;
1151     Value *Origin;
1152     Instruction *OrigIns;
1153 
1154     ShadowOriginAndInsertPoint(Value *S, Value *O, Instruction *I)
1155         : Shadow(S), Origin(O), OrigIns(I) {}
1156   };
1157   SmallVector<ShadowOriginAndInsertPoint, 16> InstrumentationList;
1158   DenseMap<const DILocation *, int> LazyWarningDebugLocationCount;
1159   bool InstrumentLifetimeStart = ClHandleLifetimeIntrinsics;
1160   SmallSetVector<AllocaInst *, 16> AllocaSet;
1161   SmallVector<std::pair<IntrinsicInst *, AllocaInst *>, 16> LifetimeStartList;
1162   SmallVector<StoreInst *, 16> StoreList;
1163   int64_t SplittableBlocksCount = 0;
1164 
1165   MemorySanitizerVisitor(Function &F, MemorySanitizer &MS,
1166                          const TargetLibraryInfo &TLI)
1167       : F(F), MS(MS), VAHelper(CreateVarArgHelper(F, MS, *this)), TLI(&TLI) {
1168     bool SanitizeFunction =
1169         F.hasFnAttribute(Attribute::SanitizeMemory) && !ClDisableChecks;
1170     InsertChecks = SanitizeFunction;
1171     PropagateShadow = SanitizeFunction;
1172     PoisonStack = SanitizeFunction && ClPoisonStack;
1173     PoisonUndef = SanitizeFunction && ClPoisonUndef;
1174 
1175     // In the presence of unreachable blocks, we may see Phi nodes with
1176     // incoming nodes from such blocks. Since InstVisitor skips unreachable
1177     // blocks, such nodes will not have any shadow value associated with them.
1178     // It's easier to remove unreachable blocks than deal with missing shadow.
1179     removeUnreachableBlocks(F);
1180 
1181     MS.initializeCallbacks(*F.getParent(), TLI);
1182     FnPrologueEnd = IRBuilder<>(F.getEntryBlock().getFirstNonPHI())
1183                         .CreateIntrinsic(Intrinsic::donothing, {}, {});
1184 
1185     if (MS.CompileKernel) {
1186       IRBuilder<> IRB(FnPrologueEnd);
1187       insertKmsanPrologue(IRB);
1188     }
1189 
1190     LLVM_DEBUG(if (!InsertChecks) dbgs()
1191                << "MemorySanitizer is not inserting checks into '"
1192                << F.getName() << "'\n");
1193   }
1194 
1195   bool instrumentWithCalls(Value *V) {
1196     // Constants likely will be eliminated by follow-up passes.
1197     if (isa<Constant>(V))
1198       return false;
1199 
1200     ++SplittableBlocksCount;
1201     return ClInstrumentationWithCallThreshold >= 0 &&
1202            SplittableBlocksCount > ClInstrumentationWithCallThreshold;
1203   }
1204 
1205   bool isInPrologue(Instruction &I) {
1206     return I.getParent() == FnPrologueEnd->getParent() &&
1207            (&I == FnPrologueEnd || I.comesBefore(FnPrologueEnd));
1208   }
1209 
1210   // Creates a new origin and records the stack trace. In general we can call
1211   // this function for any origin manipulation we like. However it will cost
1212   // runtime resources. So use this wisely only if it can provide additional
1213   // information helpful to a user.
1214   Value *updateOrigin(Value *V, IRBuilder<> &IRB) {
1215     if (MS.TrackOrigins <= 1)
1216       return V;
1217     return IRB.CreateCall(MS.MsanChainOriginFn, V);
1218   }
1219 
1220   Value *originToIntptr(IRBuilder<> &IRB, Value *Origin) {
1221     const DataLayout &DL = F.getParent()->getDataLayout();
1222     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1223     if (IntptrSize == kOriginSize)
1224       return Origin;
1225     assert(IntptrSize == kOriginSize * 2);
1226     Origin = IRB.CreateIntCast(Origin, MS.IntptrTy, /* isSigned */ false);
1227     return IRB.CreateOr(Origin, IRB.CreateShl(Origin, kOriginSize * 8));
1228   }
1229 
1230   /// Fill memory range with the given origin value.
1231   void paintOrigin(IRBuilder<> &IRB, Value *Origin, Value *OriginPtr,
1232                    TypeSize TS, Align Alignment) {
1233     const DataLayout &DL = F.getParent()->getDataLayout();
1234     const Align IntptrAlignment = DL.getABITypeAlign(MS.IntptrTy);
1235     unsigned IntptrSize = DL.getTypeStoreSize(MS.IntptrTy);
1236     assert(IntptrAlignment >= kMinOriginAlignment);
1237     assert(IntptrSize >= kOriginSize);
1238 
1239     // Note: The loop based formation works for fixed length vectors too,
1240     // however we prefer to unroll and specialize alignment below.
1241     if (TS.isScalable()) {
1242       Value *Size = IRB.CreateTypeSize(IRB.getInt32Ty(), TS);
1243       Value *RoundUp = IRB.CreateAdd(Size, IRB.getInt32(kOriginSize - 1));
1244       Value *End = IRB.CreateUDiv(RoundUp, IRB.getInt32(kOriginSize));
1245       auto [InsertPt, Index] =
1246         SplitBlockAndInsertSimpleForLoop(End, &*IRB.GetInsertPoint());
1247       IRB.SetInsertPoint(InsertPt);
1248 
1249       Value *GEP = IRB.CreateGEP(MS.OriginTy, OriginPtr, Index);
1250       IRB.CreateAlignedStore(Origin, GEP, kMinOriginAlignment);
1251       return;
1252     }
1253 
1254     unsigned Size = TS.getFixedValue();
1255 
1256     unsigned Ofs = 0;
1257     Align CurrentAlignment = Alignment;
1258     if (Alignment >= IntptrAlignment && IntptrSize > kOriginSize) {
1259       Value *IntptrOrigin = originToIntptr(IRB, Origin);
1260       Value *IntptrOriginPtr =
1261           IRB.CreatePointerCast(OriginPtr, PointerType::get(MS.IntptrTy, 0));
1262       for (unsigned i = 0; i < Size / IntptrSize; ++i) {
1263         Value *Ptr = i ? IRB.CreateConstGEP1_32(MS.IntptrTy, IntptrOriginPtr, i)
1264                        : IntptrOriginPtr;
1265         IRB.CreateAlignedStore(IntptrOrigin, Ptr, CurrentAlignment);
1266         Ofs += IntptrSize / kOriginSize;
1267         CurrentAlignment = IntptrAlignment;
1268       }
1269     }
1270 
1271     for (unsigned i = Ofs; i < (Size + kOriginSize - 1) / kOriginSize; ++i) {
1272       Value *GEP =
1273           i ? IRB.CreateConstGEP1_32(MS.OriginTy, OriginPtr, i) : OriginPtr;
1274       IRB.CreateAlignedStore(Origin, GEP, CurrentAlignment);
1275       CurrentAlignment = kMinOriginAlignment;
1276     }
1277   }
1278 
1279   void storeOrigin(IRBuilder<> &IRB, Value *Addr, Value *Shadow, Value *Origin,
1280                    Value *OriginPtr, Align Alignment) {
1281     const DataLayout &DL = F.getParent()->getDataLayout();
1282     const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1283     TypeSize StoreSize = DL.getTypeStoreSize(Shadow->getType());
1284     Value *ConvertedShadow = convertShadowToScalar(Shadow, IRB);
1285     if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1286       if (!ClCheckConstantShadow || ConstantShadow->isZeroValue()) {
1287         // Origin is not needed: value is initialized or const shadow is
1288         // ignored.
1289         return;
1290       }
1291       if (llvm::isKnownNonZero(ConvertedShadow, DL)) {
1292         // Copy origin as the value is definitely uninitialized.
1293         paintOrigin(IRB, updateOrigin(Origin, IRB), OriginPtr, StoreSize,
1294                     OriginAlignment);
1295         return;
1296       }
1297       // Fallback to runtime check, which still can be optimized out later.
1298     }
1299 
1300     TypeSize TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1301     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1302     if (instrumentWithCalls(ConvertedShadow) &&
1303         SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1304       FunctionCallee Fn = MS.MaybeStoreOriginFn[SizeIndex];
1305       Value *ConvertedShadow2 =
1306           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1307       CallBase *CB = IRB.CreateCall(
1308           Fn, {ConvertedShadow2,
1309                IRB.CreatePointerCast(Addr, IRB.getInt8PtrTy()), Origin});
1310       CB->addParamAttr(0, Attribute::ZExt);
1311       CB->addParamAttr(2, Attribute::ZExt);
1312     } else {
1313       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
1314       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1315           Cmp, &*IRB.GetInsertPoint(), false, MS.OriginStoreWeights);
1316       IRBuilder<> IRBNew(CheckTerm);
1317       paintOrigin(IRBNew, updateOrigin(Origin, IRBNew), OriginPtr, StoreSize,
1318                   OriginAlignment);
1319     }
1320   }
1321 
1322   void materializeStores() {
1323     for (StoreInst *SI : StoreList) {
1324       IRBuilder<> IRB(SI);
1325       Value *Val = SI->getValueOperand();
1326       Value *Addr = SI->getPointerOperand();
1327       Value *Shadow = SI->isAtomic() ? getCleanShadow(Val) : getShadow(Val);
1328       Value *ShadowPtr, *OriginPtr;
1329       Type *ShadowTy = Shadow->getType();
1330       const Align Alignment = SI->getAlign();
1331       const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
1332       std::tie(ShadowPtr, OriginPtr) =
1333           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ true);
1334 
1335       StoreInst *NewSI = IRB.CreateAlignedStore(Shadow, ShadowPtr, Alignment);
1336       LLVM_DEBUG(dbgs() << "  STORE: " << *NewSI << "\n");
1337       (void)NewSI;
1338 
1339       if (SI->isAtomic())
1340         SI->setOrdering(addReleaseOrdering(SI->getOrdering()));
1341 
1342       if (MS.TrackOrigins && !SI->isAtomic())
1343         storeOrigin(IRB, Addr, Shadow, getOrigin(Val), OriginPtr,
1344                     OriginAlignment);
1345     }
1346   }
1347 
1348   // Returns true if Debug Location curresponds to multiple warnings.
1349   bool shouldDisambiguateWarningLocation(const DebugLoc &DebugLoc) {
1350     if (MS.TrackOrigins < 2)
1351       return false;
1352 
1353     if (LazyWarningDebugLocationCount.empty())
1354       for (const auto &I : InstrumentationList)
1355         ++LazyWarningDebugLocationCount[I.OrigIns->getDebugLoc()];
1356 
1357     return LazyWarningDebugLocationCount[DebugLoc] >= ClDisambiguateWarning;
1358   }
1359 
1360   /// Helper function to insert a warning at IRB's current insert point.
1361   void insertWarningFn(IRBuilder<> &IRB, Value *Origin) {
1362     if (!Origin)
1363       Origin = (Value *)IRB.getInt32(0);
1364     assert(Origin->getType()->isIntegerTy());
1365 
1366     if (shouldDisambiguateWarningLocation(IRB.getCurrentDebugLocation())) {
1367       // Try to create additional origin with debug info of the last origin
1368       // instruction. It may provide additional information to the user.
1369       if (Instruction *OI = dyn_cast_or_null<Instruction>(Origin)) {
1370         assert(MS.TrackOrigins);
1371         auto NewDebugLoc = OI->getDebugLoc();
1372         // Origin update with missing or the same debug location provides no
1373         // additional value.
1374         if (NewDebugLoc && NewDebugLoc != IRB.getCurrentDebugLocation()) {
1375           // Insert update just before the check, so we call runtime only just
1376           // before the report.
1377           IRBuilder<> IRBOrigin(&*IRB.GetInsertPoint());
1378           IRBOrigin.SetCurrentDebugLocation(NewDebugLoc);
1379           Origin = updateOrigin(Origin, IRBOrigin);
1380         }
1381       }
1382     }
1383 
1384     if (MS.CompileKernel || MS.TrackOrigins)
1385       IRB.CreateCall(MS.WarningFn, Origin)->setCannotMerge();
1386     else
1387       IRB.CreateCall(MS.WarningFn)->setCannotMerge();
1388     // FIXME: Insert UnreachableInst if !MS.Recover?
1389     // This may invalidate some of the following checks and needs to be done
1390     // at the very end.
1391   }
1392 
1393   void materializeOneCheck(IRBuilder<> &IRB, Value *ConvertedShadow,
1394                            Value *Origin) {
1395     const DataLayout &DL = F.getParent()->getDataLayout();
1396     TypeSize TypeSizeInBits = DL.getTypeSizeInBits(ConvertedShadow->getType());
1397     unsigned SizeIndex = TypeSizeToSizeIndex(TypeSizeInBits);
1398     if (instrumentWithCalls(ConvertedShadow) &&
1399         SizeIndex < kNumberOfAccessSizes && !MS.CompileKernel) {
1400       FunctionCallee Fn = MS.MaybeWarningFn[SizeIndex];
1401       Value *ConvertedShadow2 =
1402           IRB.CreateZExt(ConvertedShadow, IRB.getIntNTy(8 * (1 << SizeIndex)));
1403       CallBase *CB = IRB.CreateCall(
1404           Fn, {ConvertedShadow2,
1405                MS.TrackOrigins && Origin ? Origin : (Value *)IRB.getInt32(0)});
1406       CB->addParamAttr(0, Attribute::ZExt);
1407       CB->addParamAttr(1, Attribute::ZExt);
1408     } else {
1409       Value *Cmp = convertToBool(ConvertedShadow, IRB, "_mscmp");
1410       Instruction *CheckTerm = SplitBlockAndInsertIfThen(
1411           Cmp, &*IRB.GetInsertPoint(),
1412           /* Unreachable */ !MS.Recover, MS.ColdCallWeights);
1413 
1414       IRB.SetInsertPoint(CheckTerm);
1415       insertWarningFn(IRB, Origin);
1416       LLVM_DEBUG(dbgs() << "  CHECK: " << *Cmp << "\n");
1417     }
1418   }
1419 
1420   void materializeInstructionChecks(
1421       ArrayRef<ShadowOriginAndInsertPoint> InstructionChecks) {
1422     const DataLayout &DL = F.getParent()->getDataLayout();
1423     // Disable combining in some cases. TrackOrigins checks each shadow to pick
1424     // correct origin.
1425     bool Combine = !MS.TrackOrigins;
1426     Instruction *Instruction = InstructionChecks.front().OrigIns;
1427     Value *Shadow = nullptr;
1428     for (const auto &ShadowData : InstructionChecks) {
1429       assert(ShadowData.OrigIns == Instruction);
1430       IRBuilder<> IRB(Instruction);
1431 
1432       Value *ConvertedShadow = ShadowData.Shadow;
1433 
1434       if (auto *ConstantShadow = dyn_cast<Constant>(ConvertedShadow)) {
1435         if (!ClCheckConstantShadow || ConstantShadow->isZeroValue()) {
1436           // Skip, value is initialized or const shadow is ignored.
1437           continue;
1438         }
1439         if (llvm::isKnownNonZero(ConvertedShadow, DL)) {
1440           // Report as the value is definitely uninitialized.
1441           insertWarningFn(IRB, ShadowData.Origin);
1442           if (!MS.Recover)
1443             return; // Always fail and stop here, not need to check the rest.
1444           // Skip entire instruction,
1445           continue;
1446         }
1447         // Fallback to runtime check, which still can be optimized out later.
1448       }
1449 
1450       if (!Combine) {
1451         materializeOneCheck(IRB, ConvertedShadow, ShadowData.Origin);
1452         continue;
1453       }
1454 
1455       if (!Shadow) {
1456         Shadow = ConvertedShadow;
1457         continue;
1458       }
1459 
1460       Shadow = convertToBool(Shadow, IRB, "_mscmp");
1461       ConvertedShadow = convertToBool(ConvertedShadow, IRB, "_mscmp");
1462       Shadow = IRB.CreateOr(Shadow, ConvertedShadow, "_msor");
1463     }
1464 
1465     if (Shadow) {
1466       assert(Combine);
1467       IRBuilder<> IRB(Instruction);
1468       materializeOneCheck(IRB, Shadow, nullptr);
1469     }
1470   }
1471 
1472   void materializeChecks() {
1473     llvm::stable_sort(InstrumentationList,
1474                       [](const ShadowOriginAndInsertPoint &L,
1475                          const ShadowOriginAndInsertPoint &R) {
1476                         return L.OrigIns < R.OrigIns;
1477                       });
1478 
1479     for (auto I = InstrumentationList.begin();
1480          I != InstrumentationList.end();) {
1481       auto J =
1482           std::find_if(I + 1, InstrumentationList.end(),
1483                        [L = I->OrigIns](const ShadowOriginAndInsertPoint &R) {
1484                          return L != R.OrigIns;
1485                        });
1486       // Process all checks of instruction at once.
1487       materializeInstructionChecks(ArrayRef<ShadowOriginAndInsertPoint>(I, J));
1488       I = J;
1489     }
1490 
1491     LLVM_DEBUG(dbgs() << "DONE:\n" << F);
1492   }
1493 
1494   // Returns the last instruction in the new prologue
1495   void insertKmsanPrologue(IRBuilder<> &IRB) {
1496     Value *ContextState = IRB.CreateCall(MS.MsanGetContextStateFn, {});
1497     Constant *Zero = IRB.getInt32(0);
1498     MS.ParamTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1499                                 {Zero, IRB.getInt32(0)}, "param_shadow");
1500     MS.RetvalTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1501                                  {Zero, IRB.getInt32(1)}, "retval_shadow");
1502     MS.VAArgTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1503                                 {Zero, IRB.getInt32(2)}, "va_arg_shadow");
1504     MS.VAArgOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1505                                       {Zero, IRB.getInt32(3)}, "va_arg_origin");
1506     MS.VAArgOverflowSizeTLS =
1507         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1508                       {Zero, IRB.getInt32(4)}, "va_arg_overflow_size");
1509     MS.ParamOriginTLS = IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1510                                       {Zero, IRB.getInt32(5)}, "param_origin");
1511     MS.RetvalOriginTLS =
1512         IRB.CreateGEP(MS.MsanContextStateTy, ContextState,
1513                       {Zero, IRB.getInt32(6)}, "retval_origin");
1514     if (MS.TargetTriple.getArch() == Triple::systemz)
1515       MS.MsanMetadataAlloca = IRB.CreateAlloca(MS.MsanMetadata, 0u);
1516   }
1517 
1518   /// Add MemorySanitizer instrumentation to a function.
1519   bool runOnFunction() {
1520     // Iterate all BBs in depth-first order and create shadow instructions
1521     // for all instructions (where applicable).
1522     // For PHI nodes we create dummy shadow PHIs which will be finalized later.
1523     for (BasicBlock *BB : depth_first(FnPrologueEnd->getParent()))
1524       visit(*BB);
1525 
1526     // Finalize PHI nodes.
1527     for (PHINode *PN : ShadowPHINodes) {
1528       PHINode *PNS = cast<PHINode>(getShadow(PN));
1529       PHINode *PNO = MS.TrackOrigins ? cast<PHINode>(getOrigin(PN)) : nullptr;
1530       size_t NumValues = PN->getNumIncomingValues();
1531       for (size_t v = 0; v < NumValues; v++) {
1532         PNS->addIncoming(getShadow(PN, v), PN->getIncomingBlock(v));
1533         if (PNO)
1534           PNO->addIncoming(getOrigin(PN, v), PN->getIncomingBlock(v));
1535       }
1536     }
1537 
1538     VAHelper->finalizeInstrumentation();
1539 
1540     // Poison llvm.lifetime.start intrinsics, if we haven't fallen back to
1541     // instrumenting only allocas.
1542     if (InstrumentLifetimeStart) {
1543       for (auto Item : LifetimeStartList) {
1544         instrumentAlloca(*Item.second, Item.first);
1545         AllocaSet.remove(Item.second);
1546       }
1547     }
1548     // Poison the allocas for which we didn't instrument the corresponding
1549     // lifetime intrinsics.
1550     for (AllocaInst *AI : AllocaSet)
1551       instrumentAlloca(*AI);
1552 
1553     // Insert shadow value checks.
1554     materializeChecks();
1555 
1556     // Delayed instrumentation of StoreInst.
1557     // This may not add new address checks.
1558     materializeStores();
1559 
1560     return true;
1561   }
1562 
1563   /// Compute the shadow type that corresponds to a given Value.
1564   Type *getShadowTy(Value *V) { return getShadowTy(V->getType()); }
1565 
1566   /// Compute the shadow type that corresponds to a given Type.
1567   Type *getShadowTy(Type *OrigTy) {
1568     if (!OrigTy->isSized()) {
1569       return nullptr;
1570     }
1571     // For integer type, shadow is the same as the original type.
1572     // This may return weird-sized types like i1.
1573     if (IntegerType *IT = dyn_cast<IntegerType>(OrigTy))
1574       return IT;
1575     const DataLayout &DL = F.getParent()->getDataLayout();
1576     if (VectorType *VT = dyn_cast<VectorType>(OrigTy)) {
1577       uint32_t EltSize = DL.getTypeSizeInBits(VT->getElementType());
1578       return VectorType::get(IntegerType::get(*MS.C, EltSize),
1579                              VT->getElementCount());
1580     }
1581     if (ArrayType *AT = dyn_cast<ArrayType>(OrigTy)) {
1582       return ArrayType::get(getShadowTy(AT->getElementType()),
1583                             AT->getNumElements());
1584     }
1585     if (StructType *ST = dyn_cast<StructType>(OrigTy)) {
1586       SmallVector<Type *, 4> Elements;
1587       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1588         Elements.push_back(getShadowTy(ST->getElementType(i)));
1589       StructType *Res = StructType::get(*MS.C, Elements, ST->isPacked());
1590       LLVM_DEBUG(dbgs() << "getShadowTy: " << *ST << " ===> " << *Res << "\n");
1591       return Res;
1592     }
1593     uint32_t TypeSize = DL.getTypeSizeInBits(OrigTy);
1594     return IntegerType::get(*MS.C, TypeSize);
1595   }
1596 
1597   /// Extract combined shadow of struct elements as a bool
1598   Value *collapseStructShadow(StructType *Struct, Value *Shadow,
1599                               IRBuilder<> &IRB) {
1600     Value *FalseVal = IRB.getIntN(/* width */ 1, /* value */ 0);
1601     Value *Aggregator = FalseVal;
1602 
1603     for (unsigned Idx = 0; Idx < Struct->getNumElements(); Idx++) {
1604       // Combine by ORing together each element's bool shadow
1605       Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1606       Value *ShadowBool = convertToBool(ShadowItem, IRB);
1607 
1608       if (Aggregator != FalseVal)
1609         Aggregator = IRB.CreateOr(Aggregator, ShadowBool);
1610       else
1611         Aggregator = ShadowBool;
1612     }
1613 
1614     return Aggregator;
1615   }
1616 
1617   // Extract combined shadow of array elements
1618   Value *collapseArrayShadow(ArrayType *Array, Value *Shadow,
1619                              IRBuilder<> &IRB) {
1620     if (!Array->getNumElements())
1621       return IRB.getIntN(/* width */ 1, /* value */ 0);
1622 
1623     Value *FirstItem = IRB.CreateExtractValue(Shadow, 0);
1624     Value *Aggregator = convertShadowToScalar(FirstItem, IRB);
1625 
1626     for (unsigned Idx = 1; Idx < Array->getNumElements(); Idx++) {
1627       Value *ShadowItem = IRB.CreateExtractValue(Shadow, Idx);
1628       Value *ShadowInner = convertShadowToScalar(ShadowItem, IRB);
1629       Aggregator = IRB.CreateOr(Aggregator, ShadowInner);
1630     }
1631     return Aggregator;
1632   }
1633 
1634   /// Convert a shadow value to it's flattened variant. The resulting
1635   /// shadow may not necessarily have the same bit width as the input
1636   /// value, but it will always be comparable to zero.
1637   Value *convertShadowToScalar(Value *V, IRBuilder<> &IRB) {
1638     if (StructType *Struct = dyn_cast<StructType>(V->getType()))
1639       return collapseStructShadow(Struct, V, IRB);
1640     if (ArrayType *Array = dyn_cast<ArrayType>(V->getType()))
1641       return collapseArrayShadow(Array, V, IRB);
1642     if (isa<VectorType>(V->getType())) {
1643       if (isa<ScalableVectorType>(V->getType()))
1644         return convertShadowToScalar(IRB.CreateOrReduce(V), IRB);
1645       unsigned BitWidth =
1646         V->getType()->getPrimitiveSizeInBits().getFixedValue();
1647       return IRB.CreateBitCast(V, IntegerType::get(*MS.C, BitWidth));
1648     }
1649     return V;
1650   }
1651 
1652   // Convert a scalar value to an i1 by comparing with 0
1653   Value *convertToBool(Value *V, IRBuilder<> &IRB, const Twine &name = "") {
1654     Type *VTy = V->getType();
1655     if (!VTy->isIntegerTy())
1656       return convertToBool(convertShadowToScalar(V, IRB), IRB, name);
1657     if (VTy->getIntegerBitWidth() == 1)
1658       // Just converting a bool to a bool, so do nothing.
1659       return V;
1660     return IRB.CreateICmpNE(V, ConstantInt::get(VTy, 0), name);
1661   }
1662 
1663   Type *ptrToIntPtrType(Type *PtrTy) const {
1664     if (VectorType *VectTy = dyn_cast<VectorType>(PtrTy)) {
1665       return VectorType::get(ptrToIntPtrType(VectTy->getElementType()),
1666                              VectTy->getElementCount());
1667     }
1668     assert(PtrTy->isIntOrPtrTy());
1669     return MS.IntptrTy;
1670   }
1671 
1672   Type *getPtrToShadowPtrType(Type *IntPtrTy, Type *ShadowTy) const {
1673     if (VectorType *VectTy = dyn_cast<VectorType>(IntPtrTy)) {
1674       return VectorType::get(
1675           getPtrToShadowPtrType(VectTy->getElementType(), ShadowTy),
1676           VectTy->getElementCount());
1677     }
1678     assert(IntPtrTy == MS.IntptrTy);
1679     return ShadowTy->getPointerTo();
1680   }
1681 
1682   Constant *constToIntPtr(Type *IntPtrTy, uint64_t C) const {
1683     if (VectorType *VectTy = dyn_cast<VectorType>(IntPtrTy)) {
1684       return ConstantVector::getSplat(
1685           VectTy->getElementCount(), constToIntPtr(VectTy->getElementType(), C));
1686     }
1687     assert(IntPtrTy == MS.IntptrTy);
1688     return ConstantInt::get(MS.IntptrTy, C);
1689   }
1690 
1691   /// Compute the integer shadow offset that corresponds to a given
1692   /// application address.
1693   ///
1694   /// Offset = (Addr & ~AndMask) ^ XorMask
1695   /// Addr can be a ptr or <N x ptr>. In both cases ShadowTy the shadow type of
1696   /// a single pointee.
1697   /// Returns <shadow_ptr, origin_ptr> or <<N x shadow_ptr>, <N x origin_ptr>>.
1698   Value *getShadowPtrOffset(Value *Addr, IRBuilder<> &IRB) {
1699     Type *IntptrTy = ptrToIntPtrType(Addr->getType());
1700     Value *OffsetLong = IRB.CreatePointerCast(Addr, IntptrTy);
1701 
1702     if (uint64_t AndMask = MS.MapParams->AndMask)
1703       OffsetLong = IRB.CreateAnd(OffsetLong, constToIntPtr(IntptrTy, ~AndMask));
1704 
1705     if (uint64_t XorMask = MS.MapParams->XorMask)
1706       OffsetLong = IRB.CreateXor(OffsetLong, constToIntPtr(IntptrTy, XorMask));
1707     return OffsetLong;
1708   }
1709 
1710   /// Compute the shadow and origin addresses corresponding to a given
1711   /// application address.
1712   ///
1713   /// Shadow = ShadowBase + Offset
1714   /// Origin = (OriginBase + Offset) & ~3ULL
1715   /// Addr can be a ptr or <N x ptr>. In both cases ShadowTy the shadow type of
1716   /// a single pointee.
1717   /// Returns <shadow_ptr, origin_ptr> or <<N x shadow_ptr>, <N x origin_ptr>>.
1718   std::pair<Value *, Value *>
1719   getShadowOriginPtrUserspace(Value *Addr, IRBuilder<> &IRB, Type *ShadowTy,
1720                               MaybeAlign Alignment) {
1721     Type *IntptrTy = ptrToIntPtrType(Addr->getType());
1722     Value *ShadowOffset = getShadowPtrOffset(Addr, IRB);
1723     Value *ShadowLong = ShadowOffset;
1724     if (uint64_t ShadowBase = MS.MapParams->ShadowBase) {
1725       ShadowLong =
1726           IRB.CreateAdd(ShadowLong, constToIntPtr(IntptrTy, ShadowBase));
1727     }
1728     Value *ShadowPtr = IRB.CreateIntToPtr(
1729         ShadowLong, getPtrToShadowPtrType(IntptrTy, ShadowTy));
1730 
1731     Value *OriginPtr = nullptr;
1732     if (MS.TrackOrigins) {
1733       Value *OriginLong = ShadowOffset;
1734       uint64_t OriginBase = MS.MapParams->OriginBase;
1735       if (OriginBase != 0)
1736         OriginLong =
1737             IRB.CreateAdd(OriginLong, constToIntPtr(IntptrTy, OriginBase));
1738       if (!Alignment || *Alignment < kMinOriginAlignment) {
1739         uint64_t Mask = kMinOriginAlignment.value() - 1;
1740         OriginLong = IRB.CreateAnd(OriginLong, constToIntPtr(IntptrTy, ~Mask));
1741       }
1742       OriginPtr = IRB.CreateIntToPtr(
1743           OriginLong, getPtrToShadowPtrType(IntptrTy, MS.OriginTy));
1744     }
1745     return std::make_pair(ShadowPtr, OriginPtr);
1746   }
1747 
1748   template <typename... ArgsTy>
1749   Value *createMetadataCall(IRBuilder<> &IRB, FunctionCallee Callee,
1750                             ArgsTy... Args) {
1751     if (MS.TargetTriple.getArch() == Triple::systemz) {
1752       IRB.CreateCall(Callee,
1753                      {MS.MsanMetadataAlloca, std::forward<ArgsTy>(Args)...});
1754       return IRB.CreateLoad(MS.MsanMetadata, MS.MsanMetadataAlloca);
1755     }
1756 
1757     return IRB.CreateCall(Callee, {std::forward<ArgsTy>(Args)...});
1758   }
1759 
1760   std::pair<Value *, Value *> getShadowOriginPtrKernelNoVec(Value *Addr,
1761                                                             IRBuilder<> &IRB,
1762                                                             Type *ShadowTy,
1763                                                             bool isStore) {
1764     Value *ShadowOriginPtrs;
1765     const DataLayout &DL = F.getParent()->getDataLayout();
1766     TypeSize Size = DL.getTypeStoreSize(ShadowTy);
1767 
1768     FunctionCallee Getter = MS.getKmsanShadowOriginAccessFn(isStore, Size);
1769     Value *AddrCast =
1770         IRB.CreatePointerCast(Addr, PointerType::get(IRB.getInt8Ty(), 0));
1771     if (Getter) {
1772       ShadowOriginPtrs = createMetadataCall(IRB, Getter, AddrCast);
1773     } else {
1774       Value *SizeVal = ConstantInt::get(MS.IntptrTy, Size);
1775       ShadowOriginPtrs = createMetadataCall(
1776           IRB,
1777           isStore ? MS.MsanMetadataPtrForStoreN : MS.MsanMetadataPtrForLoadN,
1778           AddrCast, SizeVal);
1779     }
1780     Value *ShadowPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 0);
1781     ShadowPtr = IRB.CreatePointerCast(ShadowPtr, PointerType::get(ShadowTy, 0));
1782     Value *OriginPtr = IRB.CreateExtractValue(ShadowOriginPtrs, 1);
1783 
1784     return std::make_pair(ShadowPtr, OriginPtr);
1785   }
1786 
1787   /// Addr can be a ptr or <N x ptr>. In both cases ShadowTy the shadow type of
1788   /// a single pointee.
1789   /// Returns <shadow_ptr, origin_ptr> or <<N x shadow_ptr>, <N x origin_ptr>>.
1790   std::pair<Value *, Value *> getShadowOriginPtrKernel(Value *Addr,
1791                                                        IRBuilder<> &IRB,
1792                                                        Type *ShadowTy,
1793                                                        bool isStore) {
1794     VectorType *VectTy = dyn_cast<VectorType>(Addr->getType());
1795     if (!VectTy) {
1796       assert(Addr->getType()->isPointerTy());
1797       return getShadowOriginPtrKernelNoVec(Addr, IRB, ShadowTy, isStore);
1798     }
1799 
1800     // TODO: Support callbacs with vectors of addresses.
1801     unsigned NumElements = cast<FixedVectorType>(VectTy)->getNumElements();
1802     Value *ShadowPtrs = ConstantInt::getNullValue(
1803         FixedVectorType::get(ShadowTy->getPointerTo(), NumElements));
1804     Value *OriginPtrs = nullptr;
1805     if (MS.TrackOrigins)
1806       OriginPtrs = ConstantInt::getNullValue(
1807           FixedVectorType::get(MS.OriginTy->getPointerTo(), NumElements));
1808     for (unsigned i = 0; i < NumElements; ++i) {
1809       Value *OneAddr =
1810           IRB.CreateExtractElement(Addr, ConstantInt::get(IRB.getInt32Ty(), i));
1811       auto [ShadowPtr, OriginPtr] =
1812           getShadowOriginPtrKernelNoVec(OneAddr, IRB, ShadowTy, isStore);
1813 
1814       ShadowPtrs = IRB.CreateInsertElement(
1815           ShadowPtrs, ShadowPtr, ConstantInt::get(IRB.getInt32Ty(), i));
1816       if (MS.TrackOrigins)
1817         OriginPtrs = IRB.CreateInsertElement(
1818             OriginPtrs, OriginPtr, ConstantInt::get(IRB.getInt32Ty(), i));
1819     }
1820     return {ShadowPtrs, OriginPtrs};
1821   }
1822 
1823   std::pair<Value *, Value *> getShadowOriginPtr(Value *Addr, IRBuilder<> &IRB,
1824                                                  Type *ShadowTy,
1825                                                  MaybeAlign Alignment,
1826                                                  bool isStore) {
1827     if (MS.CompileKernel)
1828       return getShadowOriginPtrKernel(Addr, IRB, ShadowTy, isStore);
1829     return getShadowOriginPtrUserspace(Addr, IRB, ShadowTy, Alignment);
1830   }
1831 
1832   /// Compute the shadow address for a given function argument.
1833   ///
1834   /// Shadow = ParamTLS+ArgOffset.
1835   Value *getShadowPtrForArgument(Value *A, IRBuilder<> &IRB, int ArgOffset) {
1836     Value *Base = IRB.CreatePointerCast(MS.ParamTLS, MS.IntptrTy);
1837     if (ArgOffset)
1838       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1839     return IRB.CreateIntToPtr(Base, PointerType::get(getShadowTy(A), 0),
1840                               "_msarg");
1841   }
1842 
1843   /// Compute the origin address for a given function argument.
1844   Value *getOriginPtrForArgument(Value *A, IRBuilder<> &IRB, int ArgOffset) {
1845     if (!MS.TrackOrigins)
1846       return nullptr;
1847     Value *Base = IRB.CreatePointerCast(MS.ParamOriginTLS, MS.IntptrTy);
1848     if (ArgOffset)
1849       Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
1850     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
1851                               "_msarg_o");
1852   }
1853 
1854   /// Compute the shadow address for a retval.
1855   Value *getShadowPtrForRetval(Value *A, IRBuilder<> &IRB) {
1856     return IRB.CreatePointerCast(MS.RetvalTLS,
1857                                  PointerType::get(getShadowTy(A), 0), "_msret");
1858   }
1859 
1860   /// Compute the origin address for a retval.
1861   Value *getOriginPtrForRetval(IRBuilder<> &IRB) {
1862     // We keep a single origin for the entire retval. Might be too optimistic.
1863     return MS.RetvalOriginTLS;
1864   }
1865 
1866   /// Set SV to be the shadow value for V.
1867   void setShadow(Value *V, Value *SV) {
1868     assert(!ShadowMap.count(V) && "Values may only have one shadow");
1869     ShadowMap[V] = PropagateShadow ? SV : getCleanShadow(V);
1870   }
1871 
1872   /// Set Origin to be the origin value for V.
1873   void setOrigin(Value *V, Value *Origin) {
1874     if (!MS.TrackOrigins)
1875       return;
1876     assert(!OriginMap.count(V) && "Values may only have one origin");
1877     LLVM_DEBUG(dbgs() << "ORIGIN: " << *V << "  ==> " << *Origin << "\n");
1878     OriginMap[V] = Origin;
1879   }
1880 
1881   Constant *getCleanShadow(Type *OrigTy) {
1882     Type *ShadowTy = getShadowTy(OrigTy);
1883     if (!ShadowTy)
1884       return nullptr;
1885     return Constant::getNullValue(ShadowTy);
1886   }
1887 
1888   /// Create a clean shadow value for a given value.
1889   ///
1890   /// Clean shadow (all zeroes) means all bits of the value are defined
1891   /// (initialized).
1892   Constant *getCleanShadow(Value *V) { return getCleanShadow(V->getType()); }
1893 
1894   /// Create a dirty shadow of a given shadow type.
1895   Constant *getPoisonedShadow(Type *ShadowTy) {
1896     assert(ShadowTy);
1897     if (isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy))
1898       return Constant::getAllOnesValue(ShadowTy);
1899     if (ArrayType *AT = dyn_cast<ArrayType>(ShadowTy)) {
1900       SmallVector<Constant *, 4> Vals(AT->getNumElements(),
1901                                       getPoisonedShadow(AT->getElementType()));
1902       return ConstantArray::get(AT, Vals);
1903     }
1904     if (StructType *ST = dyn_cast<StructType>(ShadowTy)) {
1905       SmallVector<Constant *, 4> Vals;
1906       for (unsigned i = 0, n = ST->getNumElements(); i < n; i++)
1907         Vals.push_back(getPoisonedShadow(ST->getElementType(i)));
1908       return ConstantStruct::get(ST, Vals);
1909     }
1910     llvm_unreachable("Unexpected shadow type");
1911   }
1912 
1913   /// Create a dirty shadow for a given value.
1914   Constant *getPoisonedShadow(Value *V) {
1915     Type *ShadowTy = getShadowTy(V);
1916     if (!ShadowTy)
1917       return nullptr;
1918     return getPoisonedShadow(ShadowTy);
1919   }
1920 
1921   /// Create a clean (zero) origin.
1922   Value *getCleanOrigin() { return Constant::getNullValue(MS.OriginTy); }
1923 
1924   /// Get the shadow value for a given Value.
1925   ///
1926   /// This function either returns the value set earlier with setShadow,
1927   /// or extracts if from ParamTLS (for function arguments).
1928   Value *getShadow(Value *V) {
1929     if (Instruction *I = dyn_cast<Instruction>(V)) {
1930       if (!PropagateShadow || I->getMetadata(LLVMContext::MD_nosanitize))
1931         return getCleanShadow(V);
1932       // For instructions the shadow is already stored in the map.
1933       Value *Shadow = ShadowMap[V];
1934       if (!Shadow) {
1935         LLVM_DEBUG(dbgs() << "No shadow: " << *V << "\n" << *(I->getParent()));
1936         (void)I;
1937         assert(Shadow && "No shadow for a value");
1938       }
1939       return Shadow;
1940     }
1941     if (UndefValue *U = dyn_cast<UndefValue>(V)) {
1942       Value *AllOnes = (PropagateShadow && PoisonUndef) ? getPoisonedShadow(V)
1943                                                         : getCleanShadow(V);
1944       LLVM_DEBUG(dbgs() << "Undef: " << *U << " ==> " << *AllOnes << "\n");
1945       (void)U;
1946       return AllOnes;
1947     }
1948     if (Argument *A = dyn_cast<Argument>(V)) {
1949       // For arguments we compute the shadow on demand and store it in the map.
1950       Value *&ShadowPtr = ShadowMap[V];
1951       if (ShadowPtr)
1952         return ShadowPtr;
1953       Function *F = A->getParent();
1954       IRBuilder<> EntryIRB(FnPrologueEnd);
1955       unsigned ArgOffset = 0;
1956       const DataLayout &DL = F->getParent()->getDataLayout();
1957       for (auto &FArg : F->args()) {
1958         if (!FArg.getType()->isSized()) {
1959           LLVM_DEBUG(dbgs() << "Arg is not sized\n");
1960           continue;
1961         }
1962 
1963         unsigned Size = FArg.hasByValAttr()
1964                             ? DL.getTypeAllocSize(FArg.getParamByValType())
1965                             : DL.getTypeAllocSize(FArg.getType());
1966 
1967         if (A == &FArg) {
1968           bool Overflow = ArgOffset + Size > kParamTLSSize;
1969           if (FArg.hasByValAttr()) {
1970             // ByVal pointer itself has clean shadow. We copy the actual
1971             // argument shadow to the underlying memory.
1972             // Figure out maximal valid memcpy alignment.
1973             const Align ArgAlign = DL.getValueOrABITypeAlignment(
1974                 FArg.getParamAlign(), FArg.getParamByValType());
1975             Value *CpShadowPtr, *CpOriginPtr;
1976             std::tie(CpShadowPtr, CpOriginPtr) =
1977                 getShadowOriginPtr(V, EntryIRB, EntryIRB.getInt8Ty(), ArgAlign,
1978                                    /*isStore*/ true);
1979             if (!PropagateShadow || Overflow) {
1980               // ParamTLS overflow.
1981               EntryIRB.CreateMemSet(
1982                   CpShadowPtr, Constant::getNullValue(EntryIRB.getInt8Ty()),
1983                   Size, ArgAlign);
1984             } else {
1985               Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
1986               const Align CopyAlign = std::min(ArgAlign, kShadowTLSAlignment);
1987               Value *Cpy = EntryIRB.CreateMemCpy(CpShadowPtr, CopyAlign, Base,
1988                                                  CopyAlign, Size);
1989               LLVM_DEBUG(dbgs() << "  ByValCpy: " << *Cpy << "\n");
1990               (void)Cpy;
1991 
1992               if (MS.TrackOrigins) {
1993                 Value *OriginPtr =
1994                     getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
1995                 // FIXME: OriginSize should be:
1996                 // alignTo(V % kMinOriginAlignment + Size, kMinOriginAlignment)
1997                 unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
1998                 EntryIRB.CreateMemCpy(
1999                     CpOriginPtr,
2000                     /* by getShadowOriginPtr */ kMinOriginAlignment, OriginPtr,
2001                     /* by origin_tls[ArgOffset] */ kMinOriginAlignment,
2002                     OriginSize);
2003               }
2004             }
2005           }
2006 
2007           if (!PropagateShadow || Overflow || FArg.hasByValAttr() ||
2008               (MS.EagerChecks && FArg.hasAttribute(Attribute::NoUndef))) {
2009             ShadowPtr = getCleanShadow(V);
2010             setOrigin(A, getCleanOrigin());
2011           } else {
2012             // Shadow over TLS
2013             Value *Base = getShadowPtrForArgument(&FArg, EntryIRB, ArgOffset);
2014             ShadowPtr = EntryIRB.CreateAlignedLoad(getShadowTy(&FArg), Base,
2015                                                    kShadowTLSAlignment);
2016             if (MS.TrackOrigins) {
2017               Value *OriginPtr =
2018                   getOriginPtrForArgument(&FArg, EntryIRB, ArgOffset);
2019               setOrigin(A, EntryIRB.CreateLoad(MS.OriginTy, OriginPtr));
2020             }
2021           }
2022           LLVM_DEBUG(dbgs()
2023                      << "  ARG:    " << FArg << " ==> " << *ShadowPtr << "\n");
2024           break;
2025         }
2026 
2027         ArgOffset += alignTo(Size, kShadowTLSAlignment);
2028       }
2029       assert(ShadowPtr && "Could not find shadow for an argument");
2030       return ShadowPtr;
2031     }
2032     // For everything else the shadow is zero.
2033     return getCleanShadow(V);
2034   }
2035 
2036   /// Get the shadow for i-th argument of the instruction I.
2037   Value *getShadow(Instruction *I, int i) {
2038     return getShadow(I->getOperand(i));
2039   }
2040 
2041   /// Get the origin for a value.
2042   Value *getOrigin(Value *V) {
2043     if (!MS.TrackOrigins)
2044       return nullptr;
2045     if (!PropagateShadow || isa<Constant>(V) || isa<InlineAsm>(V))
2046       return getCleanOrigin();
2047     assert((isa<Instruction>(V) || isa<Argument>(V)) &&
2048            "Unexpected value type in getOrigin()");
2049     if (Instruction *I = dyn_cast<Instruction>(V)) {
2050       if (I->getMetadata(LLVMContext::MD_nosanitize))
2051         return getCleanOrigin();
2052     }
2053     Value *Origin = OriginMap[V];
2054     assert(Origin && "Missing origin");
2055     return Origin;
2056   }
2057 
2058   /// Get the origin for i-th argument of the instruction I.
2059   Value *getOrigin(Instruction *I, int i) {
2060     return getOrigin(I->getOperand(i));
2061   }
2062 
2063   /// Remember the place where a shadow check should be inserted.
2064   ///
2065   /// This location will be later instrumented with a check that will print a
2066   /// UMR warning in runtime if the shadow value is not 0.
2067   void insertShadowCheck(Value *Shadow, Value *Origin, Instruction *OrigIns) {
2068     assert(Shadow);
2069     if (!InsertChecks)
2070       return;
2071 
2072     if (!DebugCounter::shouldExecute(DebugInsertCheck)) {
2073       LLVM_DEBUG(dbgs() << "Skipping check of " << *Shadow << " before "
2074                         << *OrigIns << "\n");
2075       return;
2076     }
2077 #ifndef NDEBUG
2078     Type *ShadowTy = Shadow->getType();
2079     assert((isa<IntegerType>(ShadowTy) || isa<VectorType>(ShadowTy) ||
2080             isa<StructType>(ShadowTy) || isa<ArrayType>(ShadowTy)) &&
2081            "Can only insert checks for integer, vector, and aggregate shadow "
2082            "types");
2083 #endif
2084     InstrumentationList.push_back(
2085         ShadowOriginAndInsertPoint(Shadow, Origin, OrigIns));
2086   }
2087 
2088   /// Remember the place where a shadow check should be inserted.
2089   ///
2090   /// This location will be later instrumented with a check that will print a
2091   /// UMR warning in runtime if the value is not fully defined.
2092   void insertShadowCheck(Value *Val, Instruction *OrigIns) {
2093     assert(Val);
2094     Value *Shadow, *Origin;
2095     if (ClCheckConstantShadow) {
2096       Shadow = getShadow(Val);
2097       if (!Shadow)
2098         return;
2099       Origin = getOrigin(Val);
2100     } else {
2101       Shadow = dyn_cast_or_null<Instruction>(getShadow(Val));
2102       if (!Shadow)
2103         return;
2104       Origin = dyn_cast_or_null<Instruction>(getOrigin(Val));
2105     }
2106     insertShadowCheck(Shadow, Origin, OrigIns);
2107   }
2108 
2109   AtomicOrdering addReleaseOrdering(AtomicOrdering a) {
2110     switch (a) {
2111     case AtomicOrdering::NotAtomic:
2112       return AtomicOrdering::NotAtomic;
2113     case AtomicOrdering::Unordered:
2114     case AtomicOrdering::Monotonic:
2115     case AtomicOrdering::Release:
2116       return AtomicOrdering::Release;
2117     case AtomicOrdering::Acquire:
2118     case AtomicOrdering::AcquireRelease:
2119       return AtomicOrdering::AcquireRelease;
2120     case AtomicOrdering::SequentiallyConsistent:
2121       return AtomicOrdering::SequentiallyConsistent;
2122     }
2123     llvm_unreachable("Unknown ordering");
2124   }
2125 
2126   Value *makeAddReleaseOrderingTable(IRBuilder<> &IRB) {
2127     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
2128     uint32_t OrderingTable[NumOrderings] = {};
2129 
2130     OrderingTable[(int)AtomicOrderingCABI::relaxed] =
2131         OrderingTable[(int)AtomicOrderingCABI::release] =
2132             (int)AtomicOrderingCABI::release;
2133     OrderingTable[(int)AtomicOrderingCABI::consume] =
2134         OrderingTable[(int)AtomicOrderingCABI::acquire] =
2135             OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
2136                 (int)AtomicOrderingCABI::acq_rel;
2137     OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
2138         (int)AtomicOrderingCABI::seq_cst;
2139 
2140     return ConstantDataVector::get(IRB.getContext(),
2141                                    ArrayRef(OrderingTable, NumOrderings));
2142   }
2143 
2144   AtomicOrdering addAcquireOrdering(AtomicOrdering a) {
2145     switch (a) {
2146     case AtomicOrdering::NotAtomic:
2147       return AtomicOrdering::NotAtomic;
2148     case AtomicOrdering::Unordered:
2149     case AtomicOrdering::Monotonic:
2150     case AtomicOrdering::Acquire:
2151       return AtomicOrdering::Acquire;
2152     case AtomicOrdering::Release:
2153     case AtomicOrdering::AcquireRelease:
2154       return AtomicOrdering::AcquireRelease;
2155     case AtomicOrdering::SequentiallyConsistent:
2156       return AtomicOrdering::SequentiallyConsistent;
2157     }
2158     llvm_unreachable("Unknown ordering");
2159   }
2160 
2161   Value *makeAddAcquireOrderingTable(IRBuilder<> &IRB) {
2162     constexpr int NumOrderings = (int)AtomicOrderingCABI::seq_cst + 1;
2163     uint32_t OrderingTable[NumOrderings] = {};
2164 
2165     OrderingTable[(int)AtomicOrderingCABI::relaxed] =
2166         OrderingTable[(int)AtomicOrderingCABI::acquire] =
2167             OrderingTable[(int)AtomicOrderingCABI::consume] =
2168                 (int)AtomicOrderingCABI::acquire;
2169     OrderingTable[(int)AtomicOrderingCABI::release] =
2170         OrderingTable[(int)AtomicOrderingCABI::acq_rel] =
2171             (int)AtomicOrderingCABI::acq_rel;
2172     OrderingTable[(int)AtomicOrderingCABI::seq_cst] =
2173         (int)AtomicOrderingCABI::seq_cst;
2174 
2175     return ConstantDataVector::get(IRB.getContext(),
2176                                    ArrayRef(OrderingTable, NumOrderings));
2177   }
2178 
2179   // ------------------- Visitors.
2180   using InstVisitor<MemorySanitizerVisitor>::visit;
2181   void visit(Instruction &I) {
2182     if (I.getMetadata(LLVMContext::MD_nosanitize))
2183       return;
2184     // Don't want to visit if we're in the prologue
2185     if (isInPrologue(I))
2186       return;
2187     InstVisitor<MemorySanitizerVisitor>::visit(I);
2188   }
2189 
2190   /// Instrument LoadInst
2191   ///
2192   /// Loads the corresponding shadow and (optionally) origin.
2193   /// Optionally, checks that the load address is fully defined.
2194   void visitLoadInst(LoadInst &I) {
2195     assert(I.getType()->isSized() && "Load type must have size");
2196     assert(!I.getMetadata(LLVMContext::MD_nosanitize));
2197     NextNodeIRBuilder IRB(&I);
2198     Type *ShadowTy = getShadowTy(&I);
2199     Value *Addr = I.getPointerOperand();
2200     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
2201     const Align Alignment = I.getAlign();
2202     if (PropagateShadow) {
2203       std::tie(ShadowPtr, OriginPtr) =
2204           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2205       setShadow(&I,
2206                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
2207     } else {
2208       setShadow(&I, getCleanShadow(&I));
2209     }
2210 
2211     if (ClCheckAccessAddress)
2212       insertShadowCheck(I.getPointerOperand(), &I);
2213 
2214     if (I.isAtomic())
2215       I.setOrdering(addAcquireOrdering(I.getOrdering()));
2216 
2217     if (MS.TrackOrigins) {
2218       if (PropagateShadow) {
2219         const Align OriginAlignment = std::max(kMinOriginAlignment, Alignment);
2220         setOrigin(
2221             &I, IRB.CreateAlignedLoad(MS.OriginTy, OriginPtr, OriginAlignment));
2222       } else {
2223         setOrigin(&I, getCleanOrigin());
2224       }
2225     }
2226   }
2227 
2228   /// Instrument StoreInst
2229   ///
2230   /// Stores the corresponding shadow and (optionally) origin.
2231   /// Optionally, checks that the store address is fully defined.
2232   void visitStoreInst(StoreInst &I) {
2233     StoreList.push_back(&I);
2234     if (ClCheckAccessAddress)
2235       insertShadowCheck(I.getPointerOperand(), &I);
2236   }
2237 
2238   void handleCASOrRMW(Instruction &I) {
2239     assert(isa<AtomicRMWInst>(I) || isa<AtomicCmpXchgInst>(I));
2240 
2241     IRBuilder<> IRB(&I);
2242     Value *Addr = I.getOperand(0);
2243     Value *Val = I.getOperand(1);
2244     Value *ShadowPtr = getShadowOriginPtr(Addr, IRB, getShadowTy(Val), Align(1),
2245                                           /*isStore*/ true)
2246                            .first;
2247 
2248     if (ClCheckAccessAddress)
2249       insertShadowCheck(Addr, &I);
2250 
2251     // Only test the conditional argument of cmpxchg instruction.
2252     // The other argument can potentially be uninitialized, but we can not
2253     // detect this situation reliably without possible false positives.
2254     if (isa<AtomicCmpXchgInst>(I))
2255       insertShadowCheck(Val, &I);
2256 
2257     IRB.CreateStore(getCleanShadow(Val), ShadowPtr);
2258 
2259     setShadow(&I, getCleanShadow(&I));
2260     setOrigin(&I, getCleanOrigin());
2261   }
2262 
2263   void visitAtomicRMWInst(AtomicRMWInst &I) {
2264     handleCASOrRMW(I);
2265     I.setOrdering(addReleaseOrdering(I.getOrdering()));
2266   }
2267 
2268   void visitAtomicCmpXchgInst(AtomicCmpXchgInst &I) {
2269     handleCASOrRMW(I);
2270     I.setSuccessOrdering(addReleaseOrdering(I.getSuccessOrdering()));
2271   }
2272 
2273   // Vector manipulation.
2274   void visitExtractElementInst(ExtractElementInst &I) {
2275     insertShadowCheck(I.getOperand(1), &I);
2276     IRBuilder<> IRB(&I);
2277     setShadow(&I, IRB.CreateExtractElement(getShadow(&I, 0), I.getOperand(1),
2278                                            "_msprop"));
2279     setOrigin(&I, getOrigin(&I, 0));
2280   }
2281 
2282   void visitInsertElementInst(InsertElementInst &I) {
2283     insertShadowCheck(I.getOperand(2), &I);
2284     IRBuilder<> IRB(&I);
2285     auto *Shadow0 = getShadow(&I, 0);
2286     auto *Shadow1 = getShadow(&I, 1);
2287     setShadow(&I, IRB.CreateInsertElement(Shadow0, Shadow1, I.getOperand(2),
2288                                           "_msprop"));
2289     setOriginForNaryOp(I);
2290   }
2291 
2292   void visitShuffleVectorInst(ShuffleVectorInst &I) {
2293     IRBuilder<> IRB(&I);
2294     auto *Shadow0 = getShadow(&I, 0);
2295     auto *Shadow1 = getShadow(&I, 1);
2296     setShadow(&I, IRB.CreateShuffleVector(Shadow0, Shadow1, I.getShuffleMask(),
2297                                           "_msprop"));
2298     setOriginForNaryOp(I);
2299   }
2300 
2301   // Casts.
2302   void visitSExtInst(SExtInst &I) {
2303     IRBuilder<> IRB(&I);
2304     setShadow(&I, IRB.CreateSExt(getShadow(&I, 0), I.getType(), "_msprop"));
2305     setOrigin(&I, getOrigin(&I, 0));
2306   }
2307 
2308   void visitZExtInst(ZExtInst &I) {
2309     IRBuilder<> IRB(&I);
2310     setShadow(&I, IRB.CreateZExt(getShadow(&I, 0), I.getType(), "_msprop"));
2311     setOrigin(&I, getOrigin(&I, 0));
2312   }
2313 
2314   void visitTruncInst(TruncInst &I) {
2315     IRBuilder<> IRB(&I);
2316     setShadow(&I, IRB.CreateTrunc(getShadow(&I, 0), I.getType(), "_msprop"));
2317     setOrigin(&I, getOrigin(&I, 0));
2318   }
2319 
2320   void visitBitCastInst(BitCastInst &I) {
2321     // Special case: if this is the bitcast (there is exactly 1 allowed) between
2322     // a musttail call and a ret, don't instrument. New instructions are not
2323     // allowed after a musttail call.
2324     if (auto *CI = dyn_cast<CallInst>(I.getOperand(0)))
2325       if (CI->isMustTailCall())
2326         return;
2327     IRBuilder<> IRB(&I);
2328     setShadow(&I, IRB.CreateBitCast(getShadow(&I, 0), getShadowTy(&I)));
2329     setOrigin(&I, getOrigin(&I, 0));
2330   }
2331 
2332   void visitPtrToIntInst(PtrToIntInst &I) {
2333     IRBuilder<> IRB(&I);
2334     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
2335                                     "_msprop_ptrtoint"));
2336     setOrigin(&I, getOrigin(&I, 0));
2337   }
2338 
2339   void visitIntToPtrInst(IntToPtrInst &I) {
2340     IRBuilder<> IRB(&I);
2341     setShadow(&I, IRB.CreateIntCast(getShadow(&I, 0), getShadowTy(&I), false,
2342                                     "_msprop_inttoptr"));
2343     setOrigin(&I, getOrigin(&I, 0));
2344   }
2345 
2346   void visitFPToSIInst(CastInst &I) { handleShadowOr(I); }
2347   void visitFPToUIInst(CastInst &I) { handleShadowOr(I); }
2348   void visitSIToFPInst(CastInst &I) { handleShadowOr(I); }
2349   void visitUIToFPInst(CastInst &I) { handleShadowOr(I); }
2350   void visitFPExtInst(CastInst &I) { handleShadowOr(I); }
2351   void visitFPTruncInst(CastInst &I) { handleShadowOr(I); }
2352 
2353   /// Propagate shadow for bitwise AND.
2354   ///
2355   /// This code is exact, i.e. if, for example, a bit in the left argument
2356   /// is defined and 0, then neither the value not definedness of the
2357   /// corresponding bit in B don't affect the resulting shadow.
2358   void visitAnd(BinaryOperator &I) {
2359     IRBuilder<> IRB(&I);
2360     //  "And" of 0 and a poisoned value results in unpoisoned value.
2361     //  1&1 => 1;     0&1 => 0;     p&1 => p;
2362     //  1&0 => 0;     0&0 => 0;     p&0 => 0;
2363     //  1&p => p;     0&p => 0;     p&p => p;
2364     //  S = (S1 & S2) | (V1 & S2) | (S1 & V2)
2365     Value *S1 = getShadow(&I, 0);
2366     Value *S2 = getShadow(&I, 1);
2367     Value *V1 = I.getOperand(0);
2368     Value *V2 = I.getOperand(1);
2369     if (V1->getType() != S1->getType()) {
2370       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2371       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2372     }
2373     Value *S1S2 = IRB.CreateAnd(S1, S2);
2374     Value *V1S2 = IRB.CreateAnd(V1, S2);
2375     Value *S1V2 = IRB.CreateAnd(S1, V2);
2376     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2377     setOriginForNaryOp(I);
2378   }
2379 
2380   void visitOr(BinaryOperator &I) {
2381     IRBuilder<> IRB(&I);
2382     //  "Or" of 1 and a poisoned value results in unpoisoned value.
2383     //  1|1 => 1;     0|1 => 1;     p|1 => 1;
2384     //  1|0 => 1;     0|0 => 0;     p|0 => p;
2385     //  1|p => 1;     0|p => p;     p|p => p;
2386     //  S = (S1 & S2) | (~V1 & S2) | (S1 & ~V2)
2387     Value *S1 = getShadow(&I, 0);
2388     Value *S2 = getShadow(&I, 1);
2389     Value *V1 = IRB.CreateNot(I.getOperand(0));
2390     Value *V2 = IRB.CreateNot(I.getOperand(1));
2391     if (V1->getType() != S1->getType()) {
2392       V1 = IRB.CreateIntCast(V1, S1->getType(), false);
2393       V2 = IRB.CreateIntCast(V2, S2->getType(), false);
2394     }
2395     Value *S1S2 = IRB.CreateAnd(S1, S2);
2396     Value *V1S2 = IRB.CreateAnd(V1, S2);
2397     Value *S1V2 = IRB.CreateAnd(S1, V2);
2398     setShadow(&I, IRB.CreateOr({S1S2, V1S2, S1V2}));
2399     setOriginForNaryOp(I);
2400   }
2401 
2402   /// Default propagation of shadow and/or origin.
2403   ///
2404   /// This class implements the general case of shadow propagation, used in all
2405   /// cases where we don't know and/or don't care about what the operation
2406   /// actually does. It converts all input shadow values to a common type
2407   /// (extending or truncating as necessary), and bitwise OR's them.
2408   ///
2409   /// This is much cheaper than inserting checks (i.e. requiring inputs to be
2410   /// fully initialized), and less prone to false positives.
2411   ///
2412   /// This class also implements the general case of origin propagation. For a
2413   /// Nary operation, result origin is set to the origin of an argument that is
2414   /// not entirely initialized. If there is more than one such arguments, the
2415   /// rightmost of them is picked. It does not matter which one is picked if all
2416   /// arguments are initialized.
2417   template <bool CombineShadow> class Combiner {
2418     Value *Shadow = nullptr;
2419     Value *Origin = nullptr;
2420     IRBuilder<> &IRB;
2421     MemorySanitizerVisitor *MSV;
2422 
2423   public:
2424     Combiner(MemorySanitizerVisitor *MSV, IRBuilder<> &IRB)
2425         : IRB(IRB), MSV(MSV) {}
2426 
2427     /// Add a pair of shadow and origin values to the mix.
2428     Combiner &Add(Value *OpShadow, Value *OpOrigin) {
2429       if (CombineShadow) {
2430         assert(OpShadow);
2431         if (!Shadow)
2432           Shadow = OpShadow;
2433         else {
2434           OpShadow = MSV->CreateShadowCast(IRB, OpShadow, Shadow->getType());
2435           Shadow = IRB.CreateOr(Shadow, OpShadow, "_msprop");
2436         }
2437       }
2438 
2439       if (MSV->MS.TrackOrigins) {
2440         assert(OpOrigin);
2441         if (!Origin) {
2442           Origin = OpOrigin;
2443         } else {
2444           Constant *ConstOrigin = dyn_cast<Constant>(OpOrigin);
2445           // No point in adding something that might result in 0 origin value.
2446           if (!ConstOrigin || !ConstOrigin->isNullValue()) {
2447             Value *Cond = MSV->convertToBool(OpShadow, IRB);
2448             Origin = IRB.CreateSelect(Cond, OpOrigin, Origin);
2449           }
2450         }
2451       }
2452       return *this;
2453     }
2454 
2455     /// Add an application value to the mix.
2456     Combiner &Add(Value *V) {
2457       Value *OpShadow = MSV->getShadow(V);
2458       Value *OpOrigin = MSV->MS.TrackOrigins ? MSV->getOrigin(V) : nullptr;
2459       return Add(OpShadow, OpOrigin);
2460     }
2461 
2462     /// Set the current combined values as the given instruction's shadow
2463     /// and origin.
2464     void Done(Instruction *I) {
2465       if (CombineShadow) {
2466         assert(Shadow);
2467         Shadow = MSV->CreateShadowCast(IRB, Shadow, MSV->getShadowTy(I));
2468         MSV->setShadow(I, Shadow);
2469       }
2470       if (MSV->MS.TrackOrigins) {
2471         assert(Origin);
2472         MSV->setOrigin(I, Origin);
2473       }
2474     }
2475   };
2476 
2477   using ShadowAndOriginCombiner = Combiner<true>;
2478   using OriginCombiner = Combiner<false>;
2479 
2480   /// Propagate origin for arbitrary operation.
2481   void setOriginForNaryOp(Instruction &I) {
2482     if (!MS.TrackOrigins)
2483       return;
2484     IRBuilder<> IRB(&I);
2485     OriginCombiner OC(this, IRB);
2486     for (Use &Op : I.operands())
2487       OC.Add(Op.get());
2488     OC.Done(&I);
2489   }
2490 
2491   size_t VectorOrPrimitiveTypeSizeInBits(Type *Ty) {
2492     assert(!(Ty->isVectorTy() && Ty->getScalarType()->isPointerTy()) &&
2493            "Vector of pointers is not a valid shadow type");
2494     return Ty->isVectorTy() ? cast<FixedVectorType>(Ty)->getNumElements() *
2495                                   Ty->getScalarSizeInBits()
2496                             : Ty->getPrimitiveSizeInBits();
2497   }
2498 
2499   /// Cast between two shadow types, extending or truncating as
2500   /// necessary.
2501   Value *CreateShadowCast(IRBuilder<> &IRB, Value *V, Type *dstTy,
2502                           bool Signed = false) {
2503     Type *srcTy = V->getType();
2504     size_t srcSizeInBits = VectorOrPrimitiveTypeSizeInBits(srcTy);
2505     size_t dstSizeInBits = VectorOrPrimitiveTypeSizeInBits(dstTy);
2506     if (srcSizeInBits > 1 && dstSizeInBits == 1)
2507       return IRB.CreateICmpNE(V, getCleanShadow(V));
2508 
2509     if (dstTy->isIntegerTy() && srcTy->isIntegerTy())
2510       return IRB.CreateIntCast(V, dstTy, Signed);
2511     if (dstTy->isVectorTy() && srcTy->isVectorTy() &&
2512         cast<VectorType>(dstTy)->getElementCount() ==
2513             cast<VectorType>(srcTy)->getElementCount())
2514       return IRB.CreateIntCast(V, dstTy, Signed);
2515     Value *V1 = IRB.CreateBitCast(V, Type::getIntNTy(*MS.C, srcSizeInBits));
2516     Value *V2 =
2517         IRB.CreateIntCast(V1, Type::getIntNTy(*MS.C, dstSizeInBits), Signed);
2518     return IRB.CreateBitCast(V2, dstTy);
2519     // TODO: handle struct types.
2520   }
2521 
2522   /// Cast an application value to the type of its own shadow.
2523   Value *CreateAppToShadowCast(IRBuilder<> &IRB, Value *V) {
2524     Type *ShadowTy = getShadowTy(V);
2525     if (V->getType() == ShadowTy)
2526       return V;
2527     if (V->getType()->isPtrOrPtrVectorTy())
2528       return IRB.CreatePtrToInt(V, ShadowTy);
2529     else
2530       return IRB.CreateBitCast(V, ShadowTy);
2531   }
2532 
2533   /// Propagate shadow for arbitrary operation.
2534   void handleShadowOr(Instruction &I) {
2535     IRBuilder<> IRB(&I);
2536     ShadowAndOriginCombiner SC(this, IRB);
2537     for (Use &Op : I.operands())
2538       SC.Add(Op.get());
2539     SC.Done(&I);
2540   }
2541 
2542   void visitFNeg(UnaryOperator &I) { handleShadowOr(I); }
2543 
2544   // Handle multiplication by constant.
2545   //
2546   // Handle a special case of multiplication by constant that may have one or
2547   // more zeros in the lower bits. This makes corresponding number of lower bits
2548   // of the result zero as well. We model it by shifting the other operand
2549   // shadow left by the required number of bits. Effectively, we transform
2550   // (X * (A * 2**B)) to ((X << B) * A) and instrument (X << B) as (Sx << B).
2551   // We use multiplication by 2**N instead of shift to cover the case of
2552   // multiplication by 0, which may occur in some elements of a vector operand.
2553   void handleMulByConstant(BinaryOperator &I, Constant *ConstArg,
2554                            Value *OtherArg) {
2555     Constant *ShadowMul;
2556     Type *Ty = ConstArg->getType();
2557     if (auto *VTy = dyn_cast<VectorType>(Ty)) {
2558       unsigned NumElements = cast<FixedVectorType>(VTy)->getNumElements();
2559       Type *EltTy = VTy->getElementType();
2560       SmallVector<Constant *, 16> Elements;
2561       for (unsigned Idx = 0; Idx < NumElements; ++Idx) {
2562         if (ConstantInt *Elt =
2563                 dyn_cast<ConstantInt>(ConstArg->getAggregateElement(Idx))) {
2564           const APInt &V = Elt->getValue();
2565           APInt V2 = APInt(V.getBitWidth(), 1) << V.countr_zero();
2566           Elements.push_back(ConstantInt::get(EltTy, V2));
2567         } else {
2568           Elements.push_back(ConstantInt::get(EltTy, 1));
2569         }
2570       }
2571       ShadowMul = ConstantVector::get(Elements);
2572     } else {
2573       if (ConstantInt *Elt = dyn_cast<ConstantInt>(ConstArg)) {
2574         const APInt &V = Elt->getValue();
2575         APInt V2 = APInt(V.getBitWidth(), 1) << V.countr_zero();
2576         ShadowMul = ConstantInt::get(Ty, V2);
2577       } else {
2578         ShadowMul = ConstantInt::get(Ty, 1);
2579       }
2580     }
2581 
2582     IRBuilder<> IRB(&I);
2583     setShadow(&I,
2584               IRB.CreateMul(getShadow(OtherArg), ShadowMul, "msprop_mul_cst"));
2585     setOrigin(&I, getOrigin(OtherArg));
2586   }
2587 
2588   void visitMul(BinaryOperator &I) {
2589     Constant *constOp0 = dyn_cast<Constant>(I.getOperand(0));
2590     Constant *constOp1 = dyn_cast<Constant>(I.getOperand(1));
2591     if (constOp0 && !constOp1)
2592       handleMulByConstant(I, constOp0, I.getOperand(1));
2593     else if (constOp1 && !constOp0)
2594       handleMulByConstant(I, constOp1, I.getOperand(0));
2595     else
2596       handleShadowOr(I);
2597   }
2598 
2599   void visitFAdd(BinaryOperator &I) { handleShadowOr(I); }
2600   void visitFSub(BinaryOperator &I) { handleShadowOr(I); }
2601   void visitFMul(BinaryOperator &I) { handleShadowOr(I); }
2602   void visitAdd(BinaryOperator &I) { handleShadowOr(I); }
2603   void visitSub(BinaryOperator &I) { handleShadowOr(I); }
2604   void visitXor(BinaryOperator &I) { handleShadowOr(I); }
2605 
2606   void handleIntegerDiv(Instruction &I) {
2607     IRBuilder<> IRB(&I);
2608     // Strict on the second argument.
2609     insertShadowCheck(I.getOperand(1), &I);
2610     setShadow(&I, getShadow(&I, 0));
2611     setOrigin(&I, getOrigin(&I, 0));
2612   }
2613 
2614   void visitUDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2615   void visitSDiv(BinaryOperator &I) { handleIntegerDiv(I); }
2616   void visitURem(BinaryOperator &I) { handleIntegerDiv(I); }
2617   void visitSRem(BinaryOperator &I) { handleIntegerDiv(I); }
2618 
2619   // Floating point division is side-effect free. We can not require that the
2620   // divisor is fully initialized and must propagate shadow. See PR37523.
2621   void visitFDiv(BinaryOperator &I) { handleShadowOr(I); }
2622   void visitFRem(BinaryOperator &I) { handleShadowOr(I); }
2623 
2624   /// Instrument == and != comparisons.
2625   ///
2626   /// Sometimes the comparison result is known even if some of the bits of the
2627   /// arguments are not.
2628   void handleEqualityComparison(ICmpInst &I) {
2629     IRBuilder<> IRB(&I);
2630     Value *A = I.getOperand(0);
2631     Value *B = I.getOperand(1);
2632     Value *Sa = getShadow(A);
2633     Value *Sb = getShadow(B);
2634 
2635     // Get rid of pointers and vectors of pointers.
2636     // For ints (and vectors of ints), types of A and Sa match,
2637     // and this is a no-op.
2638     A = IRB.CreatePointerCast(A, Sa->getType());
2639     B = IRB.CreatePointerCast(B, Sb->getType());
2640 
2641     // A == B  <==>  (C = A^B) == 0
2642     // A != B  <==>  (C = A^B) != 0
2643     // Sc = Sa | Sb
2644     Value *C = IRB.CreateXor(A, B);
2645     Value *Sc = IRB.CreateOr(Sa, Sb);
2646     // Now dealing with i = (C == 0) comparison (or C != 0, does not matter now)
2647     // Result is defined if one of the following is true
2648     // * there is a defined 1 bit in C
2649     // * C is fully defined
2650     // Si = !(C & ~Sc) && Sc
2651     Value *Zero = Constant::getNullValue(Sc->getType());
2652     Value *MinusOne = Constant::getAllOnesValue(Sc->getType());
2653     Value *LHS = IRB.CreateICmpNE(Sc, Zero);
2654     Value *RHS =
2655         IRB.CreateICmpEQ(IRB.CreateAnd(IRB.CreateXor(Sc, MinusOne), C), Zero);
2656     Value *Si = IRB.CreateAnd(LHS, RHS);
2657     Si->setName("_msprop_icmp");
2658     setShadow(&I, Si);
2659     setOriginForNaryOp(I);
2660   }
2661 
2662   /// Build the lowest possible value of V, taking into account V's
2663   ///        uninitialized bits.
2664   Value *getLowestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2665                                 bool isSigned) {
2666     if (isSigned) {
2667       // Split shadow into sign bit and other bits.
2668       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2669       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2670       // Maximise the undefined shadow bit, minimize other undefined bits.
2671       return IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaOtherBits)),
2672                           SaSignBit);
2673     } else {
2674       // Minimize undefined bits.
2675       return IRB.CreateAnd(A, IRB.CreateNot(Sa));
2676     }
2677   }
2678 
2679   /// Build the highest possible value of V, taking into account V's
2680   ///        uninitialized bits.
2681   Value *getHighestPossibleValue(IRBuilder<> &IRB, Value *A, Value *Sa,
2682                                  bool isSigned) {
2683     if (isSigned) {
2684       // Split shadow into sign bit and other bits.
2685       Value *SaOtherBits = IRB.CreateLShr(IRB.CreateShl(Sa, 1), 1);
2686       Value *SaSignBit = IRB.CreateXor(Sa, SaOtherBits);
2687       // Minimise the undefined shadow bit, maximise other undefined bits.
2688       return IRB.CreateOr(IRB.CreateAnd(A, IRB.CreateNot(SaSignBit)),
2689                           SaOtherBits);
2690     } else {
2691       // Maximize undefined bits.
2692       return IRB.CreateOr(A, Sa);
2693     }
2694   }
2695 
2696   /// Instrument relational comparisons.
2697   ///
2698   /// This function does exact shadow propagation for all relational
2699   /// comparisons of integers, pointers and vectors of those.
2700   /// FIXME: output seems suboptimal when one of the operands is a constant
2701   void handleRelationalComparisonExact(ICmpInst &I) {
2702     IRBuilder<> IRB(&I);
2703     Value *A = I.getOperand(0);
2704     Value *B = I.getOperand(1);
2705     Value *Sa = getShadow(A);
2706     Value *Sb = getShadow(B);
2707 
2708     // Get rid of pointers and vectors of pointers.
2709     // For ints (and vectors of ints), types of A and Sa match,
2710     // and this is a no-op.
2711     A = IRB.CreatePointerCast(A, Sa->getType());
2712     B = IRB.CreatePointerCast(B, Sb->getType());
2713 
2714     // Let [a0, a1] be the interval of possible values of A, taking into account
2715     // its undefined bits. Let [b0, b1] be the interval of possible values of B.
2716     // Then (A cmp B) is defined iff (a0 cmp b1) == (a1 cmp b0).
2717     bool IsSigned = I.isSigned();
2718     Value *S1 = IRB.CreateICmp(I.getPredicate(),
2719                                getLowestPossibleValue(IRB, A, Sa, IsSigned),
2720                                getHighestPossibleValue(IRB, B, Sb, IsSigned));
2721     Value *S2 = IRB.CreateICmp(I.getPredicate(),
2722                                getHighestPossibleValue(IRB, A, Sa, IsSigned),
2723                                getLowestPossibleValue(IRB, B, Sb, IsSigned));
2724     Value *Si = IRB.CreateXor(S1, S2);
2725     setShadow(&I, Si);
2726     setOriginForNaryOp(I);
2727   }
2728 
2729   /// Instrument signed relational comparisons.
2730   ///
2731   /// Handle sign bit tests: x<0, x>=0, x<=-1, x>-1 by propagating the highest
2732   /// bit of the shadow. Everything else is delegated to handleShadowOr().
2733   void handleSignedRelationalComparison(ICmpInst &I) {
2734     Constant *constOp;
2735     Value *op = nullptr;
2736     CmpInst::Predicate pre;
2737     if ((constOp = dyn_cast<Constant>(I.getOperand(1)))) {
2738       op = I.getOperand(0);
2739       pre = I.getPredicate();
2740     } else if ((constOp = dyn_cast<Constant>(I.getOperand(0)))) {
2741       op = I.getOperand(1);
2742       pre = I.getSwappedPredicate();
2743     } else {
2744       handleShadowOr(I);
2745       return;
2746     }
2747 
2748     if ((constOp->isNullValue() &&
2749          (pre == CmpInst::ICMP_SLT || pre == CmpInst::ICMP_SGE)) ||
2750         (constOp->isAllOnesValue() &&
2751          (pre == CmpInst::ICMP_SGT || pre == CmpInst::ICMP_SLE))) {
2752       IRBuilder<> IRB(&I);
2753       Value *Shadow = IRB.CreateICmpSLT(getShadow(op), getCleanShadow(op),
2754                                         "_msprop_icmp_s");
2755       setShadow(&I, Shadow);
2756       setOrigin(&I, getOrigin(op));
2757     } else {
2758       handleShadowOr(I);
2759     }
2760   }
2761 
2762   void visitICmpInst(ICmpInst &I) {
2763     if (!ClHandleICmp) {
2764       handleShadowOr(I);
2765       return;
2766     }
2767     if (I.isEquality()) {
2768       handleEqualityComparison(I);
2769       return;
2770     }
2771 
2772     assert(I.isRelational());
2773     if (ClHandleICmpExact) {
2774       handleRelationalComparisonExact(I);
2775       return;
2776     }
2777     if (I.isSigned()) {
2778       handleSignedRelationalComparison(I);
2779       return;
2780     }
2781 
2782     assert(I.isUnsigned());
2783     if ((isa<Constant>(I.getOperand(0)) || isa<Constant>(I.getOperand(1)))) {
2784       handleRelationalComparisonExact(I);
2785       return;
2786     }
2787 
2788     handleShadowOr(I);
2789   }
2790 
2791   void visitFCmpInst(FCmpInst &I) { handleShadowOr(I); }
2792 
2793   void handleShift(BinaryOperator &I) {
2794     IRBuilder<> IRB(&I);
2795     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2796     // Otherwise perform the same shift on S1.
2797     Value *S1 = getShadow(&I, 0);
2798     Value *S2 = getShadow(&I, 1);
2799     Value *S2Conv =
2800         IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)), S2->getType());
2801     Value *V2 = I.getOperand(1);
2802     Value *Shift = IRB.CreateBinOp(I.getOpcode(), S1, V2);
2803     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2804     setOriginForNaryOp(I);
2805   }
2806 
2807   void visitShl(BinaryOperator &I) { handleShift(I); }
2808   void visitAShr(BinaryOperator &I) { handleShift(I); }
2809   void visitLShr(BinaryOperator &I) { handleShift(I); }
2810 
2811   void handleFunnelShift(IntrinsicInst &I) {
2812     IRBuilder<> IRB(&I);
2813     // If any of the S2 bits are poisoned, the whole thing is poisoned.
2814     // Otherwise perform the same shift on S0 and S1.
2815     Value *S0 = getShadow(&I, 0);
2816     Value *S1 = getShadow(&I, 1);
2817     Value *S2 = getShadow(&I, 2);
2818     Value *S2Conv =
2819         IRB.CreateSExt(IRB.CreateICmpNE(S2, getCleanShadow(S2)), S2->getType());
2820     Value *V2 = I.getOperand(2);
2821     Function *Intrin = Intrinsic::getDeclaration(
2822         I.getModule(), I.getIntrinsicID(), S2Conv->getType());
2823     Value *Shift = IRB.CreateCall(Intrin, {S0, S1, V2});
2824     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
2825     setOriginForNaryOp(I);
2826   }
2827 
2828   /// Instrument llvm.memmove
2829   ///
2830   /// At this point we don't know if llvm.memmove will be inlined or not.
2831   /// If we don't instrument it and it gets inlined,
2832   /// our interceptor will not kick in and we will lose the memmove.
2833   /// If we instrument the call here, but it does not get inlined,
2834   /// we will memove the shadow twice: which is bad in case
2835   /// of overlapping regions. So, we simply lower the intrinsic to a call.
2836   ///
2837   /// Similar situation exists for memcpy and memset.
2838   void visitMemMoveInst(MemMoveInst &I) {
2839     getShadow(I.getArgOperand(1)); // Ensure shadow initialized
2840     IRBuilder<> IRB(&I);
2841     IRB.CreateCall(
2842         MS.MemmoveFn,
2843         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2844          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2845          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2846     I.eraseFromParent();
2847   }
2848 
2849   /// Instrument memcpy
2850   ///
2851   /// Similar to memmove: avoid copying shadow twice. This is somewhat
2852   /// unfortunate as it may slowdown small constant memcpys.
2853   /// FIXME: consider doing manual inline for small constant sizes and proper
2854   /// alignment.
2855   ///
2856   /// Note: This also handles memcpy.inline, which promises no calls to external
2857   /// functions as an optimization. However, with instrumentation enabled this
2858   /// is difficult to promise; additionally, we know that the MSan runtime
2859   /// exists and provides __msan_memcpy(). Therefore, we assume that with
2860   /// instrumentation it's safe to turn memcpy.inline into a call to
2861   /// __msan_memcpy(). Should this be wrong, such as when implementing memcpy()
2862   /// itself, instrumentation should be disabled with the no_sanitize attribute.
2863   void visitMemCpyInst(MemCpyInst &I) {
2864     getShadow(I.getArgOperand(1)); // Ensure shadow initialized
2865     IRBuilder<> IRB(&I);
2866     IRB.CreateCall(
2867         MS.MemcpyFn,
2868         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2869          IRB.CreatePointerCast(I.getArgOperand(1), IRB.getInt8PtrTy()),
2870          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2871     I.eraseFromParent();
2872   }
2873 
2874   // Same as memcpy.
2875   void visitMemSetInst(MemSetInst &I) {
2876     IRBuilder<> IRB(&I);
2877     IRB.CreateCall(
2878         MS.MemsetFn,
2879         {IRB.CreatePointerCast(I.getArgOperand(0), IRB.getInt8PtrTy()),
2880          IRB.CreateIntCast(I.getArgOperand(1), IRB.getInt32Ty(), false),
2881          IRB.CreateIntCast(I.getArgOperand(2), MS.IntptrTy, false)});
2882     I.eraseFromParent();
2883   }
2884 
2885   void visitVAStartInst(VAStartInst &I) { VAHelper->visitVAStartInst(I); }
2886 
2887   void visitVACopyInst(VACopyInst &I) { VAHelper->visitVACopyInst(I); }
2888 
2889   /// Handle vector store-like intrinsics.
2890   ///
2891   /// Instrument intrinsics that look like a simple SIMD store: writes memory,
2892   /// has 1 pointer argument and 1 vector argument, returns void.
2893   bool handleVectorStoreIntrinsic(IntrinsicInst &I) {
2894     IRBuilder<> IRB(&I);
2895     Value *Addr = I.getArgOperand(0);
2896     Value *Shadow = getShadow(&I, 1);
2897     Value *ShadowPtr, *OriginPtr;
2898 
2899     // We don't know the pointer alignment (could be unaligned SSE store!).
2900     // Have to assume to worst case.
2901     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
2902         Addr, IRB, Shadow->getType(), Align(1), /*isStore*/ true);
2903     IRB.CreateAlignedStore(Shadow, ShadowPtr, Align(1));
2904 
2905     if (ClCheckAccessAddress)
2906       insertShadowCheck(Addr, &I);
2907 
2908     // FIXME: factor out common code from materializeStores
2909     if (MS.TrackOrigins)
2910       IRB.CreateStore(getOrigin(&I, 1), OriginPtr);
2911     return true;
2912   }
2913 
2914   /// Handle vector load-like intrinsics.
2915   ///
2916   /// Instrument intrinsics that look like a simple SIMD load: reads memory,
2917   /// has 1 pointer argument, returns a vector.
2918   bool handleVectorLoadIntrinsic(IntrinsicInst &I) {
2919     IRBuilder<> IRB(&I);
2920     Value *Addr = I.getArgOperand(0);
2921 
2922     Type *ShadowTy = getShadowTy(&I);
2923     Value *ShadowPtr = nullptr, *OriginPtr = nullptr;
2924     if (PropagateShadow) {
2925       // We don't know the pointer alignment (could be unaligned SSE load!).
2926       // Have to assume to worst case.
2927       const Align Alignment = Align(1);
2928       std::tie(ShadowPtr, OriginPtr) =
2929           getShadowOriginPtr(Addr, IRB, ShadowTy, Alignment, /*isStore*/ false);
2930       setShadow(&I,
2931                 IRB.CreateAlignedLoad(ShadowTy, ShadowPtr, Alignment, "_msld"));
2932     } else {
2933       setShadow(&I, getCleanShadow(&I));
2934     }
2935 
2936     if (ClCheckAccessAddress)
2937       insertShadowCheck(Addr, &I);
2938 
2939     if (MS.TrackOrigins) {
2940       if (PropagateShadow)
2941         setOrigin(&I, IRB.CreateLoad(MS.OriginTy, OriginPtr));
2942       else
2943         setOrigin(&I, getCleanOrigin());
2944     }
2945     return true;
2946   }
2947 
2948   /// Handle (SIMD arithmetic)-like intrinsics.
2949   ///
2950   /// Instrument intrinsics with any number of arguments of the same type,
2951   /// equal to the return type. The type should be simple (no aggregates or
2952   /// pointers; vectors are fine).
2953   /// Caller guarantees that this intrinsic does not access memory.
2954   bool maybeHandleSimpleNomemIntrinsic(IntrinsicInst &I) {
2955     Type *RetTy = I.getType();
2956     if (!(RetTy->isIntOrIntVectorTy() || RetTy->isFPOrFPVectorTy() ||
2957           RetTy->isX86_MMXTy()))
2958       return false;
2959 
2960     unsigned NumArgOperands = I.arg_size();
2961     for (unsigned i = 0; i < NumArgOperands; ++i) {
2962       Type *Ty = I.getArgOperand(i)->getType();
2963       if (Ty != RetTy)
2964         return false;
2965     }
2966 
2967     IRBuilder<> IRB(&I);
2968     ShadowAndOriginCombiner SC(this, IRB);
2969     for (unsigned i = 0; i < NumArgOperands; ++i)
2970       SC.Add(I.getArgOperand(i));
2971     SC.Done(&I);
2972 
2973     return true;
2974   }
2975 
2976   /// Heuristically instrument unknown intrinsics.
2977   ///
2978   /// The main purpose of this code is to do something reasonable with all
2979   /// random intrinsics we might encounter, most importantly - SIMD intrinsics.
2980   /// We recognize several classes of intrinsics by their argument types and
2981   /// ModRefBehaviour and apply special instrumentation when we are reasonably
2982   /// sure that we know what the intrinsic does.
2983   ///
2984   /// We special-case intrinsics where this approach fails. See llvm.bswap
2985   /// handling as an example of that.
2986   bool handleUnknownIntrinsic(IntrinsicInst &I) {
2987     unsigned NumArgOperands = I.arg_size();
2988     if (NumArgOperands == 0)
2989       return false;
2990 
2991     if (NumArgOperands == 2 && I.getArgOperand(0)->getType()->isPointerTy() &&
2992         I.getArgOperand(1)->getType()->isVectorTy() &&
2993         I.getType()->isVoidTy() && !I.onlyReadsMemory()) {
2994       // This looks like a vector store.
2995       return handleVectorStoreIntrinsic(I);
2996     }
2997 
2998     if (NumArgOperands == 1 && I.getArgOperand(0)->getType()->isPointerTy() &&
2999         I.getType()->isVectorTy() && I.onlyReadsMemory()) {
3000       // This looks like a vector load.
3001       return handleVectorLoadIntrinsic(I);
3002     }
3003 
3004     if (I.doesNotAccessMemory())
3005       if (maybeHandleSimpleNomemIntrinsic(I))
3006         return true;
3007 
3008     // FIXME: detect and handle SSE maskstore/maskload
3009     return false;
3010   }
3011 
3012   void handleInvariantGroup(IntrinsicInst &I) {
3013     setShadow(&I, getShadow(&I, 0));
3014     setOrigin(&I, getOrigin(&I, 0));
3015   }
3016 
3017   void handleLifetimeStart(IntrinsicInst &I) {
3018     if (!PoisonStack)
3019       return;
3020     AllocaInst *AI = llvm::findAllocaForValue(I.getArgOperand(1));
3021     if (!AI)
3022       InstrumentLifetimeStart = false;
3023     LifetimeStartList.push_back(std::make_pair(&I, AI));
3024   }
3025 
3026   void handleBswap(IntrinsicInst &I) {
3027     IRBuilder<> IRB(&I);
3028     Value *Op = I.getArgOperand(0);
3029     Type *OpType = Op->getType();
3030     Function *BswapFunc = Intrinsic::getDeclaration(
3031         F.getParent(), Intrinsic::bswap, ArrayRef(&OpType, 1));
3032     setShadow(&I, IRB.CreateCall(BswapFunc, getShadow(Op)));
3033     setOrigin(&I, getOrigin(Op));
3034   }
3035 
3036   void handleCountZeroes(IntrinsicInst &I) {
3037     IRBuilder<> IRB(&I);
3038     Value *Src = I.getArgOperand(0);
3039 
3040     // Set the Output shadow based on input Shadow
3041     Value *BoolShadow = IRB.CreateIsNotNull(getShadow(Src), "_mscz_bs");
3042 
3043     // If zero poison is requested, mix in with the shadow
3044     Constant *IsZeroPoison = cast<Constant>(I.getOperand(1));
3045     if (!IsZeroPoison->isZeroValue()) {
3046       Value *BoolZeroPoison = IRB.CreateIsNull(Src, "_mscz_bzp");
3047       BoolShadow = IRB.CreateOr(BoolShadow, BoolZeroPoison, "_mscz_bs");
3048     }
3049 
3050     Value *OutputShadow =
3051         IRB.CreateSExt(BoolShadow, getShadowTy(Src), "_mscz_os");
3052 
3053     setShadow(&I, OutputShadow);
3054     setOriginForNaryOp(I);
3055   }
3056 
3057   // Instrument vector convert intrinsic.
3058   //
3059   // This function instruments intrinsics like cvtsi2ss:
3060   // %Out = int_xxx_cvtyyy(%ConvertOp)
3061   // or
3062   // %Out = int_xxx_cvtyyy(%CopyOp, %ConvertOp)
3063   // Intrinsic converts \p NumUsedElements elements of \p ConvertOp to the same
3064   // number \p Out elements, and (if has 2 arguments) copies the rest of the
3065   // elements from \p CopyOp.
3066   // In most cases conversion involves floating-point value which may trigger a
3067   // hardware exception when not fully initialized. For this reason we require
3068   // \p ConvertOp[0:NumUsedElements] to be fully initialized and trap otherwise.
3069   // We copy the shadow of \p CopyOp[NumUsedElements:] to \p
3070   // Out[NumUsedElements:]. This means that intrinsics without \p CopyOp always
3071   // return a fully initialized value.
3072   void handleVectorConvertIntrinsic(IntrinsicInst &I, int NumUsedElements,
3073                                     bool HasRoundingMode = false) {
3074     IRBuilder<> IRB(&I);
3075     Value *CopyOp, *ConvertOp;
3076 
3077     assert((!HasRoundingMode ||
3078             isa<ConstantInt>(I.getArgOperand(I.arg_size() - 1))) &&
3079            "Invalid rounding mode");
3080 
3081     switch (I.arg_size() - HasRoundingMode) {
3082     case 2:
3083       CopyOp = I.getArgOperand(0);
3084       ConvertOp = I.getArgOperand(1);
3085       break;
3086     case 1:
3087       ConvertOp = I.getArgOperand(0);
3088       CopyOp = nullptr;
3089       break;
3090     default:
3091       llvm_unreachable("Cvt intrinsic with unsupported number of arguments.");
3092     }
3093 
3094     // The first *NumUsedElements* elements of ConvertOp are converted to the
3095     // same number of output elements. The rest of the output is copied from
3096     // CopyOp, or (if not available) filled with zeroes.
3097     // Combine shadow for elements of ConvertOp that are used in this operation,
3098     // and insert a check.
3099     // FIXME: consider propagating shadow of ConvertOp, at least in the case of
3100     // int->any conversion.
3101     Value *ConvertShadow = getShadow(ConvertOp);
3102     Value *AggShadow = nullptr;
3103     if (ConvertOp->getType()->isVectorTy()) {
3104       AggShadow = IRB.CreateExtractElement(
3105           ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), 0));
3106       for (int i = 1; i < NumUsedElements; ++i) {
3107         Value *MoreShadow = IRB.CreateExtractElement(
3108             ConvertShadow, ConstantInt::get(IRB.getInt32Ty(), i));
3109         AggShadow = IRB.CreateOr(AggShadow, MoreShadow);
3110       }
3111     } else {
3112       AggShadow = ConvertShadow;
3113     }
3114     assert(AggShadow->getType()->isIntegerTy());
3115     insertShadowCheck(AggShadow, getOrigin(ConvertOp), &I);
3116 
3117     // Build result shadow by zero-filling parts of CopyOp shadow that come from
3118     // ConvertOp.
3119     if (CopyOp) {
3120       assert(CopyOp->getType() == I.getType());
3121       assert(CopyOp->getType()->isVectorTy());
3122       Value *ResultShadow = getShadow(CopyOp);
3123       Type *EltTy = cast<VectorType>(ResultShadow->getType())->getElementType();
3124       for (int i = 0; i < NumUsedElements; ++i) {
3125         ResultShadow = IRB.CreateInsertElement(
3126             ResultShadow, ConstantInt::getNullValue(EltTy),
3127             ConstantInt::get(IRB.getInt32Ty(), i));
3128       }
3129       setShadow(&I, ResultShadow);
3130       setOrigin(&I, getOrigin(CopyOp));
3131     } else {
3132       setShadow(&I, getCleanShadow(&I));
3133       setOrigin(&I, getCleanOrigin());
3134     }
3135   }
3136 
3137   // Given a scalar or vector, extract lower 64 bits (or less), and return all
3138   // zeroes if it is zero, and all ones otherwise.
3139   Value *Lower64ShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
3140     if (S->getType()->isVectorTy())
3141       S = CreateShadowCast(IRB, S, IRB.getInt64Ty(), /* Signed */ true);
3142     assert(S->getType()->getPrimitiveSizeInBits() <= 64);
3143     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
3144     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
3145   }
3146 
3147   // Given a vector, extract its first element, and return all
3148   // zeroes if it is zero, and all ones otherwise.
3149   Value *LowerElementShadowExtend(IRBuilder<> &IRB, Value *S, Type *T) {
3150     Value *S1 = IRB.CreateExtractElement(S, (uint64_t)0);
3151     Value *S2 = IRB.CreateICmpNE(S1, getCleanShadow(S1));
3152     return CreateShadowCast(IRB, S2, T, /* Signed */ true);
3153   }
3154 
3155   Value *VariableShadowExtend(IRBuilder<> &IRB, Value *S) {
3156     Type *T = S->getType();
3157     assert(T->isVectorTy());
3158     Value *S2 = IRB.CreateICmpNE(S, getCleanShadow(S));
3159     return IRB.CreateSExt(S2, T);
3160   }
3161 
3162   // Instrument vector shift intrinsic.
3163   //
3164   // This function instruments intrinsics like int_x86_avx2_psll_w.
3165   // Intrinsic shifts %In by %ShiftSize bits.
3166   // %ShiftSize may be a vector. In that case the lower 64 bits determine shift
3167   // size, and the rest is ignored. Behavior is defined even if shift size is
3168   // greater than register (or field) width.
3169   void handleVectorShiftIntrinsic(IntrinsicInst &I, bool Variable) {
3170     assert(I.arg_size() == 2);
3171     IRBuilder<> IRB(&I);
3172     // If any of the S2 bits are poisoned, the whole thing is poisoned.
3173     // Otherwise perform the same shift on S1.
3174     Value *S1 = getShadow(&I, 0);
3175     Value *S2 = getShadow(&I, 1);
3176     Value *S2Conv = Variable ? VariableShadowExtend(IRB, S2)
3177                              : Lower64ShadowExtend(IRB, S2, getShadowTy(&I));
3178     Value *V1 = I.getOperand(0);
3179     Value *V2 = I.getOperand(1);
3180     Value *Shift = IRB.CreateCall(I.getFunctionType(), I.getCalledOperand(),
3181                                   {IRB.CreateBitCast(S1, V1->getType()), V2});
3182     Shift = IRB.CreateBitCast(Shift, getShadowTy(&I));
3183     setShadow(&I, IRB.CreateOr(Shift, S2Conv));
3184     setOriginForNaryOp(I);
3185   }
3186 
3187   // Get an X86_MMX-sized vector type.
3188   Type *getMMXVectorTy(unsigned EltSizeInBits) {
3189     const unsigned X86_MMXSizeInBits = 64;
3190     assert(EltSizeInBits != 0 && (X86_MMXSizeInBits % EltSizeInBits) == 0 &&
3191            "Illegal MMX vector element size");
3192     return FixedVectorType::get(IntegerType::get(*MS.C, EltSizeInBits),
3193                                 X86_MMXSizeInBits / EltSizeInBits);
3194   }
3195 
3196   // Returns a signed counterpart for an (un)signed-saturate-and-pack
3197   // intrinsic.
3198   Intrinsic::ID getSignedPackIntrinsic(Intrinsic::ID id) {
3199     switch (id) {
3200     case Intrinsic::x86_sse2_packsswb_128:
3201     case Intrinsic::x86_sse2_packuswb_128:
3202       return Intrinsic::x86_sse2_packsswb_128;
3203 
3204     case Intrinsic::x86_sse2_packssdw_128:
3205     case Intrinsic::x86_sse41_packusdw:
3206       return Intrinsic::x86_sse2_packssdw_128;
3207 
3208     case Intrinsic::x86_avx2_packsswb:
3209     case Intrinsic::x86_avx2_packuswb:
3210       return Intrinsic::x86_avx2_packsswb;
3211 
3212     case Intrinsic::x86_avx2_packssdw:
3213     case Intrinsic::x86_avx2_packusdw:
3214       return Intrinsic::x86_avx2_packssdw;
3215 
3216     case Intrinsic::x86_mmx_packsswb:
3217     case Intrinsic::x86_mmx_packuswb:
3218       return Intrinsic::x86_mmx_packsswb;
3219 
3220     case Intrinsic::x86_mmx_packssdw:
3221       return Intrinsic::x86_mmx_packssdw;
3222     default:
3223       llvm_unreachable("unexpected intrinsic id");
3224     }
3225   }
3226 
3227   // Instrument vector pack intrinsic.
3228   //
3229   // This function instruments intrinsics like x86_mmx_packsswb, that
3230   // packs elements of 2 input vectors into half as many bits with saturation.
3231   // Shadow is propagated with the signed variant of the same intrinsic applied
3232   // to sext(Sa != zeroinitializer), sext(Sb != zeroinitializer).
3233   // EltSizeInBits is used only for x86mmx arguments.
3234   void handleVectorPackIntrinsic(IntrinsicInst &I, unsigned EltSizeInBits = 0) {
3235     assert(I.arg_size() == 2);
3236     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
3237     IRBuilder<> IRB(&I);
3238     Value *S1 = getShadow(&I, 0);
3239     Value *S2 = getShadow(&I, 1);
3240     assert(isX86_MMX || S1->getType()->isVectorTy());
3241 
3242     // SExt and ICmpNE below must apply to individual elements of input vectors.
3243     // In case of x86mmx arguments, cast them to appropriate vector types and
3244     // back.
3245     Type *T = isX86_MMX ? getMMXVectorTy(EltSizeInBits) : S1->getType();
3246     if (isX86_MMX) {
3247       S1 = IRB.CreateBitCast(S1, T);
3248       S2 = IRB.CreateBitCast(S2, T);
3249     }
3250     Value *S1_ext =
3251         IRB.CreateSExt(IRB.CreateICmpNE(S1, Constant::getNullValue(T)), T);
3252     Value *S2_ext =
3253         IRB.CreateSExt(IRB.CreateICmpNE(S2, Constant::getNullValue(T)), T);
3254     if (isX86_MMX) {
3255       Type *X86_MMXTy = Type::getX86_MMXTy(*MS.C);
3256       S1_ext = IRB.CreateBitCast(S1_ext, X86_MMXTy);
3257       S2_ext = IRB.CreateBitCast(S2_ext, X86_MMXTy);
3258     }
3259 
3260     Function *ShadowFn = Intrinsic::getDeclaration(
3261         F.getParent(), getSignedPackIntrinsic(I.getIntrinsicID()));
3262 
3263     Value *S =
3264         IRB.CreateCall(ShadowFn, {S1_ext, S2_ext}, "_msprop_vector_pack");
3265     if (isX86_MMX)
3266       S = IRB.CreateBitCast(S, getShadowTy(&I));
3267     setShadow(&I, S);
3268     setOriginForNaryOp(I);
3269   }
3270 
3271   // Instrument sum-of-absolute-differences intrinsic.
3272   void handleVectorSadIntrinsic(IntrinsicInst &I) {
3273     const unsigned SignificantBitsPerResultElement = 16;
3274     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
3275     Type *ResTy = isX86_MMX ? IntegerType::get(*MS.C, 64) : I.getType();
3276     unsigned ZeroBitsPerResultElement =
3277         ResTy->getScalarSizeInBits() - SignificantBitsPerResultElement;
3278 
3279     IRBuilder<> IRB(&I);
3280     auto *Shadow0 = getShadow(&I, 0);
3281     auto *Shadow1 = getShadow(&I, 1);
3282     Value *S = IRB.CreateOr(Shadow0, Shadow1);
3283     S = IRB.CreateBitCast(S, ResTy);
3284     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
3285                        ResTy);
3286     S = IRB.CreateLShr(S, ZeroBitsPerResultElement);
3287     S = IRB.CreateBitCast(S, getShadowTy(&I));
3288     setShadow(&I, S);
3289     setOriginForNaryOp(I);
3290   }
3291 
3292   // Instrument multiply-add intrinsic.
3293   void handleVectorPmaddIntrinsic(IntrinsicInst &I,
3294                                   unsigned EltSizeInBits = 0) {
3295     bool isX86_MMX = I.getOperand(0)->getType()->isX86_MMXTy();
3296     Type *ResTy = isX86_MMX ? getMMXVectorTy(EltSizeInBits * 2) : I.getType();
3297     IRBuilder<> IRB(&I);
3298     auto *Shadow0 = getShadow(&I, 0);
3299     auto *Shadow1 = getShadow(&I, 1);
3300     Value *S = IRB.CreateOr(Shadow0, Shadow1);
3301     S = IRB.CreateBitCast(S, ResTy);
3302     S = IRB.CreateSExt(IRB.CreateICmpNE(S, Constant::getNullValue(ResTy)),
3303                        ResTy);
3304     S = IRB.CreateBitCast(S, getShadowTy(&I));
3305     setShadow(&I, S);
3306     setOriginForNaryOp(I);
3307   }
3308 
3309   // Instrument compare-packed intrinsic.
3310   // Basically, an or followed by sext(icmp ne 0) to end up with all-zeros or
3311   // all-ones shadow.
3312   void handleVectorComparePackedIntrinsic(IntrinsicInst &I) {
3313     IRBuilder<> IRB(&I);
3314     Type *ResTy = getShadowTy(&I);
3315     auto *Shadow0 = getShadow(&I, 0);
3316     auto *Shadow1 = getShadow(&I, 1);
3317     Value *S0 = IRB.CreateOr(Shadow0, Shadow1);
3318     Value *S = IRB.CreateSExt(
3319         IRB.CreateICmpNE(S0, Constant::getNullValue(ResTy)), ResTy);
3320     setShadow(&I, S);
3321     setOriginForNaryOp(I);
3322   }
3323 
3324   // Instrument compare-scalar intrinsic.
3325   // This handles both cmp* intrinsics which return the result in the first
3326   // element of a vector, and comi* which return the result as i32.
3327   void handleVectorCompareScalarIntrinsic(IntrinsicInst &I) {
3328     IRBuilder<> IRB(&I);
3329     auto *Shadow0 = getShadow(&I, 0);
3330     auto *Shadow1 = getShadow(&I, 1);
3331     Value *S0 = IRB.CreateOr(Shadow0, Shadow1);
3332     Value *S = LowerElementShadowExtend(IRB, S0, getShadowTy(&I));
3333     setShadow(&I, S);
3334     setOriginForNaryOp(I);
3335   }
3336 
3337   // Instrument generic vector reduction intrinsics
3338   // by ORing together all their fields.
3339   void handleVectorReduceIntrinsic(IntrinsicInst &I) {
3340     IRBuilder<> IRB(&I);
3341     Value *S = IRB.CreateOrReduce(getShadow(&I, 0));
3342     setShadow(&I, S);
3343     setOrigin(&I, getOrigin(&I, 0));
3344   }
3345 
3346   // Instrument vector.reduce.or intrinsic.
3347   // Valid (non-poisoned) set bits in the operand pull low the
3348   // corresponding shadow bits.
3349   void handleVectorReduceOrIntrinsic(IntrinsicInst &I) {
3350     IRBuilder<> IRB(&I);
3351     Value *OperandShadow = getShadow(&I, 0);
3352     Value *OperandUnsetBits = IRB.CreateNot(I.getOperand(0));
3353     Value *OperandUnsetOrPoison = IRB.CreateOr(OperandUnsetBits, OperandShadow);
3354     // Bit N is clean if any field's bit N is 1 and unpoison
3355     Value *OutShadowMask = IRB.CreateAndReduce(OperandUnsetOrPoison);
3356     // Otherwise, it is clean if every field's bit N is unpoison
3357     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
3358     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
3359 
3360     setShadow(&I, S);
3361     setOrigin(&I, getOrigin(&I, 0));
3362   }
3363 
3364   // Instrument vector.reduce.and intrinsic.
3365   // Valid (non-poisoned) unset bits in the operand pull down the
3366   // corresponding shadow bits.
3367   void handleVectorReduceAndIntrinsic(IntrinsicInst &I) {
3368     IRBuilder<> IRB(&I);
3369     Value *OperandShadow = getShadow(&I, 0);
3370     Value *OperandSetOrPoison = IRB.CreateOr(I.getOperand(0), OperandShadow);
3371     // Bit N is clean if any field's bit N is 0 and unpoison
3372     Value *OutShadowMask = IRB.CreateAndReduce(OperandSetOrPoison);
3373     // Otherwise, it is clean if every field's bit N is unpoison
3374     Value *OrShadow = IRB.CreateOrReduce(OperandShadow);
3375     Value *S = IRB.CreateAnd(OutShadowMask, OrShadow);
3376 
3377     setShadow(&I, S);
3378     setOrigin(&I, getOrigin(&I, 0));
3379   }
3380 
3381   void handleStmxcsr(IntrinsicInst &I) {
3382     IRBuilder<> IRB(&I);
3383     Value *Addr = I.getArgOperand(0);
3384     Type *Ty = IRB.getInt32Ty();
3385     Value *ShadowPtr =
3386         getShadowOriginPtr(Addr, IRB, Ty, Align(1), /*isStore*/ true).first;
3387 
3388     IRB.CreateStore(getCleanShadow(Ty),
3389                     IRB.CreatePointerCast(ShadowPtr, Ty->getPointerTo()));
3390 
3391     if (ClCheckAccessAddress)
3392       insertShadowCheck(Addr, &I);
3393   }
3394 
3395   void handleLdmxcsr(IntrinsicInst &I) {
3396     if (!InsertChecks)
3397       return;
3398 
3399     IRBuilder<> IRB(&I);
3400     Value *Addr = I.getArgOperand(0);
3401     Type *Ty = IRB.getInt32Ty();
3402     const Align Alignment = Align(1);
3403     Value *ShadowPtr, *OriginPtr;
3404     std::tie(ShadowPtr, OriginPtr) =
3405         getShadowOriginPtr(Addr, IRB, Ty, Alignment, /*isStore*/ false);
3406 
3407     if (ClCheckAccessAddress)
3408       insertShadowCheck(Addr, &I);
3409 
3410     Value *Shadow = IRB.CreateAlignedLoad(Ty, ShadowPtr, Alignment, "_ldmxcsr");
3411     Value *Origin = MS.TrackOrigins ? IRB.CreateLoad(MS.OriginTy, OriginPtr)
3412                                     : getCleanOrigin();
3413     insertShadowCheck(Shadow, Origin, &I);
3414   }
3415 
3416   void handleMaskedExpandLoad(IntrinsicInst &I) {
3417     IRBuilder<> IRB(&I);
3418     Value *Ptr = I.getArgOperand(0);
3419     Value *Mask = I.getArgOperand(1);
3420     Value *PassThru = I.getArgOperand(2);
3421 
3422     if (ClCheckAccessAddress) {
3423       insertShadowCheck(Ptr, &I);
3424       insertShadowCheck(Mask, &I);
3425     }
3426 
3427     if (!PropagateShadow) {
3428       setShadow(&I, getCleanShadow(&I));
3429       setOrigin(&I, getCleanOrigin());
3430       return;
3431     }
3432 
3433     Type *ShadowTy = getShadowTy(&I);
3434     Type *ElementShadowTy = cast<VectorType>(ShadowTy)->getElementType();
3435     auto [ShadowPtr, OriginPtr] =
3436         getShadowOriginPtr(Ptr, IRB, ElementShadowTy, {}, /*isStore*/ false);
3437 
3438     Value *Shadow = IRB.CreateMaskedExpandLoad(
3439         ShadowTy, ShadowPtr, Mask, getShadow(PassThru), "_msmaskedexpload");
3440 
3441     setShadow(&I, Shadow);
3442 
3443     // TODO: Store origins.
3444     setOrigin(&I, getCleanOrigin());
3445   }
3446 
3447   void handleMaskedCompressStore(IntrinsicInst &I) {
3448     IRBuilder<> IRB(&I);
3449     Value *Values = I.getArgOperand(0);
3450     Value *Ptr = I.getArgOperand(1);
3451     Value *Mask = I.getArgOperand(2);
3452 
3453     if (ClCheckAccessAddress) {
3454       insertShadowCheck(Ptr, &I);
3455       insertShadowCheck(Mask, &I);
3456     }
3457 
3458     Value *Shadow = getShadow(Values);
3459     Type *ElementShadowTy =
3460         getShadowTy(cast<VectorType>(Values->getType())->getElementType());
3461     auto [ShadowPtr, OriginPtrs] =
3462         getShadowOriginPtr(Ptr, IRB, ElementShadowTy, {}, /*isStore*/ true);
3463 
3464     IRB.CreateMaskedCompressStore(Shadow, ShadowPtr, Mask);
3465 
3466     // TODO: Store origins.
3467   }
3468 
3469   void handleMaskedGather(IntrinsicInst &I) {
3470     IRBuilder<> IRB(&I);
3471     Value *Ptrs = I.getArgOperand(0);
3472     const Align Alignment(
3473         cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
3474     Value *Mask = I.getArgOperand(2);
3475     Value *PassThru = I.getArgOperand(3);
3476 
3477     Type *PtrsShadowTy = getShadowTy(Ptrs);
3478     if (ClCheckAccessAddress) {
3479       insertShadowCheck(Mask, &I);
3480       Value *MaskedPtrShadow = IRB.CreateSelect(
3481           Mask, getShadow(Ptrs), Constant::getNullValue((PtrsShadowTy)),
3482           "_msmaskedptrs");
3483       insertShadowCheck(MaskedPtrShadow, getOrigin(Ptrs), &I);
3484     }
3485 
3486     if (!PropagateShadow) {
3487       setShadow(&I, getCleanShadow(&I));
3488       setOrigin(&I, getCleanOrigin());
3489       return;
3490     }
3491 
3492     Type *ShadowTy = getShadowTy(&I);
3493     Type *ElementShadowTy = cast<VectorType>(ShadowTy)->getElementType();
3494     auto [ShadowPtrs, OriginPtrs] = getShadowOriginPtr(
3495         Ptrs, IRB, ElementShadowTy, Alignment, /*isStore*/ false);
3496 
3497     Value *Shadow =
3498         IRB.CreateMaskedGather(ShadowTy, ShadowPtrs, Alignment, Mask,
3499                                getShadow(PassThru), "_msmaskedgather");
3500 
3501     setShadow(&I, Shadow);
3502 
3503     // TODO: Store origins.
3504     setOrigin(&I, getCleanOrigin());
3505   }
3506 
3507   void handleMaskedScatter(IntrinsicInst &I) {
3508     IRBuilder<> IRB(&I);
3509     Value *Values = I.getArgOperand(0);
3510     Value *Ptrs = I.getArgOperand(1);
3511     const Align Alignment(
3512         cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
3513     Value *Mask = I.getArgOperand(3);
3514 
3515     Type *PtrsShadowTy = getShadowTy(Ptrs);
3516     if (ClCheckAccessAddress) {
3517       insertShadowCheck(Mask, &I);
3518       Value *MaskedPtrShadow = IRB.CreateSelect(
3519           Mask, getShadow(Ptrs), Constant::getNullValue((PtrsShadowTy)),
3520           "_msmaskedptrs");
3521       insertShadowCheck(MaskedPtrShadow, getOrigin(Ptrs), &I);
3522     }
3523 
3524     Value *Shadow = getShadow(Values);
3525     Type *ElementShadowTy =
3526         getShadowTy(cast<VectorType>(Values->getType())->getElementType());
3527     auto [ShadowPtrs, OriginPtrs] = getShadowOriginPtr(
3528         Ptrs, IRB, ElementShadowTy, Alignment, /*isStore*/ true);
3529 
3530     IRB.CreateMaskedScatter(Shadow, ShadowPtrs, Alignment, Mask);
3531 
3532     // TODO: Store origin.
3533   }
3534 
3535   void handleMaskedStore(IntrinsicInst &I) {
3536     IRBuilder<> IRB(&I);
3537     Value *V = I.getArgOperand(0);
3538     Value *Ptr = I.getArgOperand(1);
3539     const Align Alignment(
3540         cast<ConstantInt>(I.getArgOperand(2))->getZExtValue());
3541     Value *Mask = I.getArgOperand(3);
3542     Value *Shadow = getShadow(V);
3543 
3544     if (ClCheckAccessAddress) {
3545       insertShadowCheck(Ptr, &I);
3546       insertShadowCheck(Mask, &I);
3547     }
3548 
3549     Value *ShadowPtr;
3550     Value *OriginPtr;
3551     std::tie(ShadowPtr, OriginPtr) = getShadowOriginPtr(
3552         Ptr, IRB, Shadow->getType(), Alignment, /*isStore*/ true);
3553 
3554     IRB.CreateMaskedStore(Shadow, ShadowPtr, Alignment, Mask);
3555 
3556     if (!MS.TrackOrigins)
3557       return;
3558 
3559     auto &DL = F.getParent()->getDataLayout();
3560     paintOrigin(IRB, getOrigin(V), OriginPtr,
3561                 DL.getTypeStoreSize(Shadow->getType()),
3562                 std::max(Alignment, kMinOriginAlignment));
3563   }
3564 
3565   void handleMaskedLoad(IntrinsicInst &I) {
3566     IRBuilder<> IRB(&I);
3567     Value *Ptr = I.getArgOperand(0);
3568     const Align Alignment(
3569         cast<ConstantInt>(I.getArgOperand(1))->getZExtValue());
3570     Value *Mask = I.getArgOperand(2);
3571     Value *PassThru = I.getArgOperand(3);
3572 
3573     if (ClCheckAccessAddress) {
3574       insertShadowCheck(Ptr, &I);
3575       insertShadowCheck(Mask, &I);
3576     }
3577 
3578     if (!PropagateShadow) {
3579       setShadow(&I, getCleanShadow(&I));
3580       setOrigin(&I, getCleanOrigin());
3581       return;
3582     }
3583 
3584     Type *ShadowTy = getShadowTy(&I);
3585     Value *ShadowPtr, *OriginPtr;
3586     std::tie(ShadowPtr, OriginPtr) =
3587         getShadowOriginPtr(Ptr, IRB, ShadowTy, Alignment, /*isStore*/ false);
3588     setShadow(&I, IRB.CreateMaskedLoad(ShadowTy, ShadowPtr, Alignment, Mask,
3589                                        getShadow(PassThru), "_msmaskedld"));
3590 
3591     if (!MS.TrackOrigins)
3592       return;
3593 
3594     // Choose between PassThru's and the loaded value's origins.
3595     Value *MaskedPassThruShadow = IRB.CreateAnd(
3596         getShadow(PassThru), IRB.CreateSExt(IRB.CreateNeg(Mask), ShadowTy));
3597 
3598     Value *NotNull = convertToBool(MaskedPassThruShadow, IRB, "_mscmp");
3599 
3600     Value *PtrOrigin = IRB.CreateLoad(MS.OriginTy, OriginPtr);
3601     Value *Origin = IRB.CreateSelect(NotNull, getOrigin(PassThru), PtrOrigin);
3602 
3603     setOrigin(&I, Origin);
3604   }
3605 
3606   // Instrument BMI / BMI2 intrinsics.
3607   // All of these intrinsics are Z = I(X, Y)
3608   // where the types of all operands and the result match, and are either i32 or
3609   // i64. The following instrumentation happens to work for all of them:
3610   //   Sz = I(Sx, Y) | (sext (Sy != 0))
3611   void handleBmiIntrinsic(IntrinsicInst &I) {
3612     IRBuilder<> IRB(&I);
3613     Type *ShadowTy = getShadowTy(&I);
3614 
3615     // If any bit of the mask operand is poisoned, then the whole thing is.
3616     Value *SMask = getShadow(&I, 1);
3617     SMask = IRB.CreateSExt(IRB.CreateICmpNE(SMask, getCleanShadow(ShadowTy)),
3618                            ShadowTy);
3619     // Apply the same intrinsic to the shadow of the first operand.
3620     Value *S = IRB.CreateCall(I.getCalledFunction(),
3621                               {getShadow(&I, 0), I.getOperand(1)});
3622     S = IRB.CreateOr(SMask, S);
3623     setShadow(&I, S);
3624     setOriginForNaryOp(I);
3625   }
3626 
3627   SmallVector<int, 8> getPclmulMask(unsigned Width, bool OddElements) {
3628     SmallVector<int, 8> Mask;
3629     for (unsigned X = OddElements ? 1 : 0; X < Width; X += 2) {
3630       Mask.append(2, X);
3631     }
3632     return Mask;
3633   }
3634 
3635   // Instrument pclmul intrinsics.
3636   // These intrinsics operate either on odd or on even elements of the input
3637   // vectors, depending on the constant in the 3rd argument, ignoring the rest.
3638   // Replace the unused elements with copies of the used ones, ex:
3639   //   (0, 1, 2, 3) -> (0, 0, 2, 2) (even case)
3640   // or
3641   //   (0, 1, 2, 3) -> (1, 1, 3, 3) (odd case)
3642   // and then apply the usual shadow combining logic.
3643   void handlePclmulIntrinsic(IntrinsicInst &I) {
3644     IRBuilder<> IRB(&I);
3645     unsigned Width =
3646         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3647     assert(isa<ConstantInt>(I.getArgOperand(2)) &&
3648            "pclmul 3rd operand must be a constant");
3649     unsigned Imm = cast<ConstantInt>(I.getArgOperand(2))->getZExtValue();
3650     Value *Shuf0 = IRB.CreateShuffleVector(getShadow(&I, 0),
3651                                            getPclmulMask(Width, Imm & 0x01));
3652     Value *Shuf1 = IRB.CreateShuffleVector(getShadow(&I, 1),
3653                                            getPclmulMask(Width, Imm & 0x10));
3654     ShadowAndOriginCombiner SOC(this, IRB);
3655     SOC.Add(Shuf0, getOrigin(&I, 0));
3656     SOC.Add(Shuf1, getOrigin(&I, 1));
3657     SOC.Done(&I);
3658   }
3659 
3660   // Instrument _mm_*_sd|ss intrinsics
3661   void handleUnarySdSsIntrinsic(IntrinsicInst &I) {
3662     IRBuilder<> IRB(&I);
3663     unsigned Width =
3664         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3665     Value *First = getShadow(&I, 0);
3666     Value *Second = getShadow(&I, 1);
3667     // First element of second operand, remaining elements of first operand
3668     SmallVector<int, 16> Mask;
3669     Mask.push_back(Width);
3670     for (unsigned i = 1; i < Width; i++)
3671       Mask.push_back(i);
3672     Value *Shadow = IRB.CreateShuffleVector(First, Second, Mask);
3673 
3674     setShadow(&I, Shadow);
3675     setOriginForNaryOp(I);
3676   }
3677 
3678   void handleVtestIntrinsic(IntrinsicInst &I) {
3679     IRBuilder<> IRB(&I);
3680     Value *Shadow0 = getShadow(&I, 0);
3681     Value *Shadow1 = getShadow(&I, 1);
3682     Value *Or = IRB.CreateOr(Shadow0, Shadow1);
3683     Value *NZ = IRB.CreateICmpNE(Or, Constant::getNullValue(Or->getType()));
3684     Value *Scalar = convertShadowToScalar(NZ, IRB);
3685     Value *Shadow = IRB.CreateZExt(Scalar, getShadowTy(&I));
3686 
3687     setShadow(&I, Shadow);
3688     setOriginForNaryOp(I);
3689   }
3690 
3691   void handleBinarySdSsIntrinsic(IntrinsicInst &I) {
3692     IRBuilder<> IRB(&I);
3693     unsigned Width =
3694         cast<FixedVectorType>(I.getArgOperand(0)->getType())->getNumElements();
3695     Value *First = getShadow(&I, 0);
3696     Value *Second = getShadow(&I, 1);
3697     Value *OrShadow = IRB.CreateOr(First, Second);
3698     // First element of both OR'd together, remaining elements of first operand
3699     SmallVector<int, 16> Mask;
3700     Mask.push_back(Width);
3701     for (unsigned i = 1; i < Width; i++)
3702       Mask.push_back(i);
3703     Value *Shadow = IRB.CreateShuffleVector(First, OrShadow, Mask);
3704 
3705     setShadow(&I, Shadow);
3706     setOriginForNaryOp(I);
3707   }
3708 
3709   // Instrument abs intrinsic.
3710   // handleUnknownIntrinsic can't handle it because of the last
3711   // is_int_min_poison argument which does not match the result type.
3712   void handleAbsIntrinsic(IntrinsicInst &I) {
3713     assert(I.getType()->isIntOrIntVectorTy());
3714     assert(I.getArgOperand(0)->getType() == I.getType());
3715 
3716     // FIXME: Handle is_int_min_poison.
3717     IRBuilder<> IRB(&I);
3718     setShadow(&I, getShadow(&I, 0));
3719     setOrigin(&I, getOrigin(&I, 0));
3720   }
3721 
3722   void handleIsFpClass(IntrinsicInst &I) {
3723     IRBuilder<> IRB(&I);
3724     Value *Shadow = getShadow(&I, 0);
3725     setShadow(&I, IRB.CreateICmpNE(Shadow, getCleanShadow(Shadow)));
3726     setOrigin(&I, getOrigin(&I, 0));
3727   }
3728 
3729   void visitIntrinsicInst(IntrinsicInst &I) {
3730     switch (I.getIntrinsicID()) {
3731     case Intrinsic::abs:
3732       handleAbsIntrinsic(I);
3733       break;
3734     case Intrinsic::is_fpclass:
3735       handleIsFpClass(I);
3736       break;
3737     case Intrinsic::lifetime_start:
3738       handleLifetimeStart(I);
3739       break;
3740     case Intrinsic::launder_invariant_group:
3741     case Intrinsic::strip_invariant_group:
3742       handleInvariantGroup(I);
3743       break;
3744     case Intrinsic::bswap:
3745       handleBswap(I);
3746       break;
3747     case Intrinsic::ctlz:
3748     case Intrinsic::cttz:
3749       handleCountZeroes(I);
3750       break;
3751     case Intrinsic::masked_compressstore:
3752       handleMaskedCompressStore(I);
3753       break;
3754     case Intrinsic::masked_expandload:
3755       handleMaskedExpandLoad(I);
3756       break;
3757     case Intrinsic::masked_gather:
3758       handleMaskedGather(I);
3759       break;
3760     case Intrinsic::masked_scatter:
3761       handleMaskedScatter(I);
3762       break;
3763     case Intrinsic::masked_store:
3764       handleMaskedStore(I);
3765       break;
3766     case Intrinsic::masked_load:
3767       handleMaskedLoad(I);
3768       break;
3769     case Intrinsic::vector_reduce_and:
3770       handleVectorReduceAndIntrinsic(I);
3771       break;
3772     case Intrinsic::vector_reduce_or:
3773       handleVectorReduceOrIntrinsic(I);
3774       break;
3775     case Intrinsic::vector_reduce_add:
3776     case Intrinsic::vector_reduce_xor:
3777     case Intrinsic::vector_reduce_mul:
3778       handleVectorReduceIntrinsic(I);
3779       break;
3780     case Intrinsic::x86_sse_stmxcsr:
3781       handleStmxcsr(I);
3782       break;
3783     case Intrinsic::x86_sse_ldmxcsr:
3784       handleLdmxcsr(I);
3785       break;
3786     case Intrinsic::x86_avx512_vcvtsd2usi64:
3787     case Intrinsic::x86_avx512_vcvtsd2usi32:
3788     case Intrinsic::x86_avx512_vcvtss2usi64:
3789     case Intrinsic::x86_avx512_vcvtss2usi32:
3790     case Intrinsic::x86_avx512_cvttss2usi64:
3791     case Intrinsic::x86_avx512_cvttss2usi:
3792     case Intrinsic::x86_avx512_cvttsd2usi64:
3793     case Intrinsic::x86_avx512_cvttsd2usi:
3794     case Intrinsic::x86_avx512_cvtusi2ss:
3795     case Intrinsic::x86_avx512_cvtusi642sd:
3796     case Intrinsic::x86_avx512_cvtusi642ss:
3797       handleVectorConvertIntrinsic(I, 1, true);
3798       break;
3799     case Intrinsic::x86_sse2_cvtsd2si64:
3800     case Intrinsic::x86_sse2_cvtsd2si:
3801     case Intrinsic::x86_sse2_cvtsd2ss:
3802     case Intrinsic::x86_sse2_cvttsd2si64:
3803     case Intrinsic::x86_sse2_cvttsd2si:
3804     case Intrinsic::x86_sse_cvtss2si64:
3805     case Intrinsic::x86_sse_cvtss2si:
3806     case Intrinsic::x86_sse_cvttss2si64:
3807     case Intrinsic::x86_sse_cvttss2si:
3808       handleVectorConvertIntrinsic(I, 1);
3809       break;
3810     case Intrinsic::x86_sse_cvtps2pi:
3811     case Intrinsic::x86_sse_cvttps2pi:
3812       handleVectorConvertIntrinsic(I, 2);
3813       break;
3814 
3815     case Intrinsic::x86_avx512_psll_w_512:
3816     case Intrinsic::x86_avx512_psll_d_512:
3817     case Intrinsic::x86_avx512_psll_q_512:
3818     case Intrinsic::x86_avx512_pslli_w_512:
3819     case Intrinsic::x86_avx512_pslli_d_512:
3820     case Intrinsic::x86_avx512_pslli_q_512:
3821     case Intrinsic::x86_avx512_psrl_w_512:
3822     case Intrinsic::x86_avx512_psrl_d_512:
3823     case Intrinsic::x86_avx512_psrl_q_512:
3824     case Intrinsic::x86_avx512_psra_w_512:
3825     case Intrinsic::x86_avx512_psra_d_512:
3826     case Intrinsic::x86_avx512_psra_q_512:
3827     case Intrinsic::x86_avx512_psrli_w_512:
3828     case Intrinsic::x86_avx512_psrli_d_512:
3829     case Intrinsic::x86_avx512_psrli_q_512:
3830     case Intrinsic::x86_avx512_psrai_w_512:
3831     case Intrinsic::x86_avx512_psrai_d_512:
3832     case Intrinsic::x86_avx512_psrai_q_512:
3833     case Intrinsic::x86_avx512_psra_q_256:
3834     case Intrinsic::x86_avx512_psra_q_128:
3835     case Intrinsic::x86_avx512_psrai_q_256:
3836     case Intrinsic::x86_avx512_psrai_q_128:
3837     case Intrinsic::x86_avx2_psll_w:
3838     case Intrinsic::x86_avx2_psll_d:
3839     case Intrinsic::x86_avx2_psll_q:
3840     case Intrinsic::x86_avx2_pslli_w:
3841     case Intrinsic::x86_avx2_pslli_d:
3842     case Intrinsic::x86_avx2_pslli_q:
3843     case Intrinsic::x86_avx2_psrl_w:
3844     case Intrinsic::x86_avx2_psrl_d:
3845     case Intrinsic::x86_avx2_psrl_q:
3846     case Intrinsic::x86_avx2_psra_w:
3847     case Intrinsic::x86_avx2_psra_d:
3848     case Intrinsic::x86_avx2_psrli_w:
3849     case Intrinsic::x86_avx2_psrli_d:
3850     case Intrinsic::x86_avx2_psrli_q:
3851     case Intrinsic::x86_avx2_psrai_w:
3852     case Intrinsic::x86_avx2_psrai_d:
3853     case Intrinsic::x86_sse2_psll_w:
3854     case Intrinsic::x86_sse2_psll_d:
3855     case Intrinsic::x86_sse2_psll_q:
3856     case Intrinsic::x86_sse2_pslli_w:
3857     case Intrinsic::x86_sse2_pslli_d:
3858     case Intrinsic::x86_sse2_pslli_q:
3859     case Intrinsic::x86_sse2_psrl_w:
3860     case Intrinsic::x86_sse2_psrl_d:
3861     case Intrinsic::x86_sse2_psrl_q:
3862     case Intrinsic::x86_sse2_psra_w:
3863     case Intrinsic::x86_sse2_psra_d:
3864     case Intrinsic::x86_sse2_psrli_w:
3865     case Intrinsic::x86_sse2_psrli_d:
3866     case Intrinsic::x86_sse2_psrli_q:
3867     case Intrinsic::x86_sse2_psrai_w:
3868     case Intrinsic::x86_sse2_psrai_d:
3869     case Intrinsic::x86_mmx_psll_w:
3870     case Intrinsic::x86_mmx_psll_d:
3871     case Intrinsic::x86_mmx_psll_q:
3872     case Intrinsic::x86_mmx_pslli_w:
3873     case Intrinsic::x86_mmx_pslli_d:
3874     case Intrinsic::x86_mmx_pslli_q:
3875     case Intrinsic::x86_mmx_psrl_w:
3876     case Intrinsic::x86_mmx_psrl_d:
3877     case Intrinsic::x86_mmx_psrl_q:
3878     case Intrinsic::x86_mmx_psra_w:
3879     case Intrinsic::x86_mmx_psra_d:
3880     case Intrinsic::x86_mmx_psrli_w:
3881     case Intrinsic::x86_mmx_psrli_d:
3882     case Intrinsic::x86_mmx_psrli_q:
3883     case Intrinsic::x86_mmx_psrai_w:
3884     case Intrinsic::x86_mmx_psrai_d:
3885       handleVectorShiftIntrinsic(I, /* Variable */ false);
3886       break;
3887     case Intrinsic::x86_avx2_psllv_d:
3888     case Intrinsic::x86_avx2_psllv_d_256:
3889     case Intrinsic::x86_avx512_psllv_d_512:
3890     case Intrinsic::x86_avx2_psllv_q:
3891     case Intrinsic::x86_avx2_psllv_q_256:
3892     case Intrinsic::x86_avx512_psllv_q_512:
3893     case Intrinsic::x86_avx2_psrlv_d:
3894     case Intrinsic::x86_avx2_psrlv_d_256:
3895     case Intrinsic::x86_avx512_psrlv_d_512:
3896     case Intrinsic::x86_avx2_psrlv_q:
3897     case Intrinsic::x86_avx2_psrlv_q_256:
3898     case Intrinsic::x86_avx512_psrlv_q_512:
3899     case Intrinsic::x86_avx2_psrav_d:
3900     case Intrinsic::x86_avx2_psrav_d_256:
3901     case Intrinsic::x86_avx512_psrav_d_512:
3902     case Intrinsic::x86_avx512_psrav_q_128:
3903     case Intrinsic::x86_avx512_psrav_q_256:
3904     case Intrinsic::x86_avx512_psrav_q_512:
3905       handleVectorShiftIntrinsic(I, /* Variable */ true);
3906       break;
3907 
3908     case Intrinsic::x86_sse2_packsswb_128:
3909     case Intrinsic::x86_sse2_packssdw_128:
3910     case Intrinsic::x86_sse2_packuswb_128:
3911     case Intrinsic::x86_sse41_packusdw:
3912     case Intrinsic::x86_avx2_packsswb:
3913     case Intrinsic::x86_avx2_packssdw:
3914     case Intrinsic::x86_avx2_packuswb:
3915     case Intrinsic::x86_avx2_packusdw:
3916       handleVectorPackIntrinsic(I);
3917       break;
3918 
3919     case Intrinsic::x86_mmx_packsswb:
3920     case Intrinsic::x86_mmx_packuswb:
3921       handleVectorPackIntrinsic(I, 16);
3922       break;
3923 
3924     case Intrinsic::x86_mmx_packssdw:
3925       handleVectorPackIntrinsic(I, 32);
3926       break;
3927 
3928     case Intrinsic::x86_mmx_psad_bw:
3929     case Intrinsic::x86_sse2_psad_bw:
3930     case Intrinsic::x86_avx2_psad_bw:
3931       handleVectorSadIntrinsic(I);
3932       break;
3933 
3934     case Intrinsic::x86_sse2_pmadd_wd:
3935     case Intrinsic::x86_avx2_pmadd_wd:
3936     case Intrinsic::x86_ssse3_pmadd_ub_sw_128:
3937     case Intrinsic::x86_avx2_pmadd_ub_sw:
3938       handleVectorPmaddIntrinsic(I);
3939       break;
3940 
3941     case Intrinsic::x86_ssse3_pmadd_ub_sw:
3942       handleVectorPmaddIntrinsic(I, 8);
3943       break;
3944 
3945     case Intrinsic::x86_mmx_pmadd_wd:
3946       handleVectorPmaddIntrinsic(I, 16);
3947       break;
3948 
3949     case Intrinsic::x86_sse_cmp_ss:
3950     case Intrinsic::x86_sse2_cmp_sd:
3951     case Intrinsic::x86_sse_comieq_ss:
3952     case Intrinsic::x86_sse_comilt_ss:
3953     case Intrinsic::x86_sse_comile_ss:
3954     case Intrinsic::x86_sse_comigt_ss:
3955     case Intrinsic::x86_sse_comige_ss:
3956     case Intrinsic::x86_sse_comineq_ss:
3957     case Intrinsic::x86_sse_ucomieq_ss:
3958     case Intrinsic::x86_sse_ucomilt_ss:
3959     case Intrinsic::x86_sse_ucomile_ss:
3960     case Intrinsic::x86_sse_ucomigt_ss:
3961     case Intrinsic::x86_sse_ucomige_ss:
3962     case Intrinsic::x86_sse_ucomineq_ss:
3963     case Intrinsic::x86_sse2_comieq_sd:
3964     case Intrinsic::x86_sse2_comilt_sd:
3965     case Intrinsic::x86_sse2_comile_sd:
3966     case Intrinsic::x86_sse2_comigt_sd:
3967     case Intrinsic::x86_sse2_comige_sd:
3968     case Intrinsic::x86_sse2_comineq_sd:
3969     case Intrinsic::x86_sse2_ucomieq_sd:
3970     case Intrinsic::x86_sse2_ucomilt_sd:
3971     case Intrinsic::x86_sse2_ucomile_sd:
3972     case Intrinsic::x86_sse2_ucomigt_sd:
3973     case Intrinsic::x86_sse2_ucomige_sd:
3974     case Intrinsic::x86_sse2_ucomineq_sd:
3975       handleVectorCompareScalarIntrinsic(I);
3976       break;
3977 
3978     case Intrinsic::x86_avx_cmp_pd_256:
3979     case Intrinsic::x86_avx_cmp_ps_256:
3980     case Intrinsic::x86_sse2_cmp_pd:
3981     case Intrinsic::x86_sse_cmp_ps:
3982       handleVectorComparePackedIntrinsic(I);
3983       break;
3984 
3985     case Intrinsic::x86_bmi_bextr_32:
3986     case Intrinsic::x86_bmi_bextr_64:
3987     case Intrinsic::x86_bmi_bzhi_32:
3988     case Intrinsic::x86_bmi_bzhi_64:
3989     case Intrinsic::x86_bmi_pdep_32:
3990     case Intrinsic::x86_bmi_pdep_64:
3991     case Intrinsic::x86_bmi_pext_32:
3992     case Intrinsic::x86_bmi_pext_64:
3993       handleBmiIntrinsic(I);
3994       break;
3995 
3996     case Intrinsic::x86_pclmulqdq:
3997     case Intrinsic::x86_pclmulqdq_256:
3998     case Intrinsic::x86_pclmulqdq_512:
3999       handlePclmulIntrinsic(I);
4000       break;
4001 
4002     case Intrinsic::x86_sse41_round_sd:
4003     case Intrinsic::x86_sse41_round_ss:
4004       handleUnarySdSsIntrinsic(I);
4005       break;
4006     case Intrinsic::x86_sse2_max_sd:
4007     case Intrinsic::x86_sse_max_ss:
4008     case Intrinsic::x86_sse2_min_sd:
4009     case Intrinsic::x86_sse_min_ss:
4010       handleBinarySdSsIntrinsic(I);
4011       break;
4012 
4013     case Intrinsic::x86_avx_vtestc_pd:
4014     case Intrinsic::x86_avx_vtestc_pd_256:
4015     case Intrinsic::x86_avx_vtestc_ps:
4016     case Intrinsic::x86_avx_vtestc_ps_256:
4017     case Intrinsic::x86_avx_vtestnzc_pd:
4018     case Intrinsic::x86_avx_vtestnzc_pd_256:
4019     case Intrinsic::x86_avx_vtestnzc_ps:
4020     case Intrinsic::x86_avx_vtestnzc_ps_256:
4021     case Intrinsic::x86_avx_vtestz_pd:
4022     case Intrinsic::x86_avx_vtestz_pd_256:
4023     case Intrinsic::x86_avx_vtestz_ps:
4024     case Intrinsic::x86_avx_vtestz_ps_256:
4025     case Intrinsic::x86_avx_ptestc_256:
4026     case Intrinsic::x86_avx_ptestnzc_256:
4027     case Intrinsic::x86_avx_ptestz_256:
4028     case Intrinsic::x86_sse41_ptestc:
4029     case Intrinsic::x86_sse41_ptestnzc:
4030     case Intrinsic::x86_sse41_ptestz:
4031       handleVtestIntrinsic(I);
4032       break;
4033 
4034     case Intrinsic::fshl:
4035     case Intrinsic::fshr:
4036       handleFunnelShift(I);
4037       break;
4038 
4039     case Intrinsic::is_constant:
4040       // The result of llvm.is.constant() is always defined.
4041       setShadow(&I, getCleanShadow(&I));
4042       setOrigin(&I, getCleanOrigin());
4043       break;
4044 
4045     default:
4046       if (!handleUnknownIntrinsic(I))
4047         visitInstruction(I);
4048       break;
4049     }
4050   }
4051 
4052   void visitLibAtomicLoad(CallBase &CB) {
4053     // Since we use getNextNode here, we can't have CB terminate the BB.
4054     assert(isa<CallInst>(CB));
4055 
4056     IRBuilder<> IRB(&CB);
4057     Value *Size = CB.getArgOperand(0);
4058     Value *SrcPtr = CB.getArgOperand(1);
4059     Value *DstPtr = CB.getArgOperand(2);
4060     Value *Ordering = CB.getArgOperand(3);
4061     // Convert the call to have at least Acquire ordering to make sure
4062     // the shadow operations aren't reordered before it.
4063     Value *NewOrdering =
4064         IRB.CreateExtractElement(makeAddAcquireOrderingTable(IRB), Ordering);
4065     CB.setArgOperand(3, NewOrdering);
4066 
4067     NextNodeIRBuilder NextIRB(&CB);
4068     Value *SrcShadowPtr, *SrcOriginPtr;
4069     std::tie(SrcShadowPtr, SrcOriginPtr) =
4070         getShadowOriginPtr(SrcPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
4071                            /*isStore*/ false);
4072     Value *DstShadowPtr =
4073         getShadowOriginPtr(DstPtr, NextIRB, NextIRB.getInt8Ty(), Align(1),
4074                            /*isStore*/ true)
4075             .first;
4076 
4077     NextIRB.CreateMemCpy(DstShadowPtr, Align(1), SrcShadowPtr, Align(1), Size);
4078     if (MS.TrackOrigins) {
4079       Value *SrcOrigin = NextIRB.CreateAlignedLoad(MS.OriginTy, SrcOriginPtr,
4080                                                    kMinOriginAlignment);
4081       Value *NewOrigin = updateOrigin(SrcOrigin, NextIRB);
4082       NextIRB.CreateCall(MS.MsanSetOriginFn, {DstPtr, Size, NewOrigin});
4083     }
4084   }
4085 
4086   void visitLibAtomicStore(CallBase &CB) {
4087     IRBuilder<> IRB(&CB);
4088     Value *Size = CB.getArgOperand(0);
4089     Value *DstPtr = CB.getArgOperand(2);
4090     Value *Ordering = CB.getArgOperand(3);
4091     // Convert the call to have at least Release ordering to make sure
4092     // the shadow operations aren't reordered after it.
4093     Value *NewOrdering =
4094         IRB.CreateExtractElement(makeAddReleaseOrderingTable(IRB), Ordering);
4095     CB.setArgOperand(3, NewOrdering);
4096 
4097     Value *DstShadowPtr =
4098         getShadowOriginPtr(DstPtr, IRB, IRB.getInt8Ty(), Align(1),
4099                            /*isStore*/ true)
4100             .first;
4101 
4102     // Atomic store always paints clean shadow/origin. See file header.
4103     IRB.CreateMemSet(DstShadowPtr, getCleanShadow(IRB.getInt8Ty()), Size,
4104                      Align(1));
4105   }
4106 
4107   void visitCallBase(CallBase &CB) {
4108     assert(!CB.getMetadata(LLVMContext::MD_nosanitize));
4109     if (CB.isInlineAsm()) {
4110       // For inline asm (either a call to asm function, or callbr instruction),
4111       // do the usual thing: check argument shadow and mark all outputs as
4112       // clean. Note that any side effects of the inline asm that are not
4113       // immediately visible in its constraints are not handled.
4114       if (ClHandleAsmConservative && MS.CompileKernel)
4115         visitAsmInstruction(CB);
4116       else
4117         visitInstruction(CB);
4118       return;
4119     }
4120     LibFunc LF;
4121     if (TLI->getLibFunc(CB, LF)) {
4122       // libatomic.a functions need to have special handling because there isn't
4123       // a good way to intercept them or compile the library with
4124       // instrumentation.
4125       switch (LF) {
4126       case LibFunc_atomic_load:
4127         if (!isa<CallInst>(CB)) {
4128           llvm::errs() << "MSAN -- cannot instrument invoke of libatomic load."
4129                           "Ignoring!\n";
4130           break;
4131         }
4132         visitLibAtomicLoad(CB);
4133         return;
4134       case LibFunc_atomic_store:
4135         visitLibAtomicStore(CB);
4136         return;
4137       default:
4138         break;
4139       }
4140     }
4141 
4142     if (auto *Call = dyn_cast<CallInst>(&CB)) {
4143       assert(!isa<IntrinsicInst>(Call) && "intrinsics are handled elsewhere");
4144 
4145       // We are going to insert code that relies on the fact that the callee
4146       // will become a non-readonly function after it is instrumented by us. To
4147       // prevent this code from being optimized out, mark that function
4148       // non-readonly in advance.
4149       // TODO: We can likely do better than dropping memory() completely here.
4150       AttributeMask B;
4151       B.addAttribute(Attribute::Memory).addAttribute(Attribute::Speculatable);
4152 
4153       Call->removeFnAttrs(B);
4154       if (Function *Func = Call->getCalledFunction()) {
4155         Func->removeFnAttrs(B);
4156       }
4157 
4158       maybeMarkSanitizerLibraryCallNoBuiltin(Call, TLI);
4159     }
4160     IRBuilder<> IRB(&CB);
4161     bool MayCheckCall = MS.EagerChecks;
4162     if (Function *Func = CB.getCalledFunction()) {
4163       // __sanitizer_unaligned_{load,store} functions may be called by users
4164       // and always expects shadows in the TLS. So don't check them.
4165       MayCheckCall &= !Func->getName().startswith("__sanitizer_unaligned_");
4166     }
4167 
4168     unsigned ArgOffset = 0;
4169     LLVM_DEBUG(dbgs() << "  CallSite: " << CB << "\n");
4170     for (const auto &[i, A] : llvm::enumerate(CB.args())) {
4171       if (!A->getType()->isSized()) {
4172         LLVM_DEBUG(dbgs() << "Arg " << i << " is not sized: " << CB << "\n");
4173         continue;
4174       }
4175       unsigned Size = 0;
4176       const DataLayout &DL = F.getParent()->getDataLayout();
4177 
4178       bool ByVal = CB.paramHasAttr(i, Attribute::ByVal);
4179       bool NoUndef = CB.paramHasAttr(i, Attribute::NoUndef);
4180       bool EagerCheck = MayCheckCall && !ByVal && NoUndef;
4181 
4182       if (EagerCheck) {
4183         insertShadowCheck(A, &CB);
4184         Size = DL.getTypeAllocSize(A->getType());
4185       } else {
4186         Value *Store = nullptr;
4187         // Compute the Shadow for arg even if it is ByVal, because
4188         // in that case getShadow() will copy the actual arg shadow to
4189         // __msan_param_tls.
4190         Value *ArgShadow = getShadow(A);
4191         Value *ArgShadowBase = getShadowPtrForArgument(A, IRB, ArgOffset);
4192         LLVM_DEBUG(dbgs() << "  Arg#" << i << ": " << *A
4193                           << " Shadow: " << *ArgShadow << "\n");
4194         if (ByVal) {
4195           // ByVal requires some special handling as it's too big for a single
4196           // load
4197           assert(A->getType()->isPointerTy() &&
4198                  "ByVal argument is not a pointer!");
4199           Size = DL.getTypeAllocSize(CB.getParamByValType(i));
4200           if (ArgOffset + Size > kParamTLSSize)
4201             break;
4202           const MaybeAlign ParamAlignment(CB.getParamAlign(i));
4203           MaybeAlign Alignment = std::nullopt;
4204           if (ParamAlignment)
4205             Alignment = std::min(*ParamAlignment, kShadowTLSAlignment);
4206           Value *AShadowPtr, *AOriginPtr;
4207           std::tie(AShadowPtr, AOriginPtr) =
4208               getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), Alignment,
4209                                  /*isStore*/ false);
4210           if (!PropagateShadow) {
4211             Store = IRB.CreateMemSet(ArgShadowBase,
4212                                      Constant::getNullValue(IRB.getInt8Ty()),
4213                                      Size, Alignment);
4214           } else {
4215             Store = IRB.CreateMemCpy(ArgShadowBase, Alignment, AShadowPtr,
4216                                      Alignment, Size);
4217             if (MS.TrackOrigins) {
4218               Value *ArgOriginBase = getOriginPtrForArgument(A, IRB, ArgOffset);
4219               // FIXME: OriginSize should be:
4220               // alignTo(A % kMinOriginAlignment + Size, kMinOriginAlignment)
4221               unsigned OriginSize = alignTo(Size, kMinOriginAlignment);
4222               IRB.CreateMemCpy(
4223                   ArgOriginBase,
4224                   /* by origin_tls[ArgOffset] */ kMinOriginAlignment,
4225                   AOriginPtr,
4226                   /* by getShadowOriginPtr */ kMinOriginAlignment, OriginSize);
4227             }
4228           }
4229         } else {
4230           // Any other parameters mean we need bit-grained tracking of uninit
4231           // data
4232           Size = DL.getTypeAllocSize(A->getType());
4233           if (ArgOffset + Size > kParamTLSSize)
4234             break;
4235           Store = IRB.CreateAlignedStore(ArgShadow, ArgShadowBase,
4236                                          kShadowTLSAlignment);
4237           Constant *Cst = dyn_cast<Constant>(ArgShadow);
4238           if (MS.TrackOrigins && !(Cst && Cst->isNullValue())) {
4239             IRB.CreateStore(getOrigin(A),
4240                             getOriginPtrForArgument(A, IRB, ArgOffset));
4241           }
4242         }
4243         (void)Store;
4244         assert(Store != nullptr);
4245         LLVM_DEBUG(dbgs() << "  Param:" << *Store << "\n");
4246       }
4247       assert(Size != 0);
4248       ArgOffset += alignTo(Size, kShadowTLSAlignment);
4249     }
4250     LLVM_DEBUG(dbgs() << "  done with call args\n");
4251 
4252     FunctionType *FT = CB.getFunctionType();
4253     if (FT->isVarArg()) {
4254       VAHelper->visitCallBase(CB, IRB);
4255     }
4256 
4257     // Now, get the shadow for the RetVal.
4258     if (!CB.getType()->isSized())
4259       return;
4260     // Don't emit the epilogue for musttail call returns.
4261     if (isa<CallInst>(CB) && cast<CallInst>(CB).isMustTailCall())
4262       return;
4263 
4264     if (MayCheckCall && CB.hasRetAttr(Attribute::NoUndef)) {
4265       setShadow(&CB, getCleanShadow(&CB));
4266       setOrigin(&CB, getCleanOrigin());
4267       return;
4268     }
4269 
4270     IRBuilder<> IRBBefore(&CB);
4271     // Until we have full dynamic coverage, make sure the retval shadow is 0.
4272     Value *Base = getShadowPtrForRetval(&CB, IRBBefore);
4273     IRBBefore.CreateAlignedStore(getCleanShadow(&CB), Base,
4274                                  kShadowTLSAlignment);
4275     BasicBlock::iterator NextInsn;
4276     if (isa<CallInst>(CB)) {
4277       NextInsn = ++CB.getIterator();
4278       assert(NextInsn != CB.getParent()->end());
4279     } else {
4280       BasicBlock *NormalDest = cast<InvokeInst>(CB).getNormalDest();
4281       if (!NormalDest->getSinglePredecessor()) {
4282         // FIXME: this case is tricky, so we are just conservative here.
4283         // Perhaps we need to split the edge between this BB and NormalDest,
4284         // but a naive attempt to use SplitEdge leads to a crash.
4285         setShadow(&CB, getCleanShadow(&CB));
4286         setOrigin(&CB, getCleanOrigin());
4287         return;
4288       }
4289       // FIXME: NextInsn is likely in a basic block that has not been visited
4290       // yet. Anything inserted there will be instrumented by MSan later!
4291       NextInsn = NormalDest->getFirstInsertionPt();
4292       assert(NextInsn != NormalDest->end() &&
4293              "Could not find insertion point for retval shadow load");
4294     }
4295     IRBuilder<> IRBAfter(&*NextInsn);
4296     Value *RetvalShadow = IRBAfter.CreateAlignedLoad(
4297         getShadowTy(&CB), getShadowPtrForRetval(&CB, IRBAfter),
4298         kShadowTLSAlignment, "_msret");
4299     setShadow(&CB, RetvalShadow);
4300     if (MS.TrackOrigins)
4301       setOrigin(&CB, IRBAfter.CreateLoad(MS.OriginTy,
4302                                          getOriginPtrForRetval(IRBAfter)));
4303   }
4304 
4305   bool isAMustTailRetVal(Value *RetVal) {
4306     if (auto *I = dyn_cast<BitCastInst>(RetVal)) {
4307       RetVal = I->getOperand(0);
4308     }
4309     if (auto *I = dyn_cast<CallInst>(RetVal)) {
4310       return I->isMustTailCall();
4311     }
4312     return false;
4313   }
4314 
4315   void visitReturnInst(ReturnInst &I) {
4316     IRBuilder<> IRB(&I);
4317     Value *RetVal = I.getReturnValue();
4318     if (!RetVal)
4319       return;
4320     // Don't emit the epilogue for musttail call returns.
4321     if (isAMustTailRetVal(RetVal))
4322       return;
4323     Value *ShadowPtr = getShadowPtrForRetval(RetVal, IRB);
4324     bool HasNoUndef = F.hasRetAttribute(Attribute::NoUndef);
4325     bool StoreShadow = !(MS.EagerChecks && HasNoUndef);
4326     // FIXME: Consider using SpecialCaseList to specify a list of functions that
4327     // must always return fully initialized values. For now, we hardcode "main".
4328     bool EagerCheck = (MS.EagerChecks && HasNoUndef) || (F.getName() == "main");
4329 
4330     Value *Shadow = getShadow(RetVal);
4331     bool StoreOrigin = true;
4332     if (EagerCheck) {
4333       insertShadowCheck(RetVal, &I);
4334       Shadow = getCleanShadow(RetVal);
4335       StoreOrigin = false;
4336     }
4337 
4338     // The caller may still expect information passed over TLS if we pass our
4339     // check
4340     if (StoreShadow) {
4341       IRB.CreateAlignedStore(Shadow, ShadowPtr, kShadowTLSAlignment);
4342       if (MS.TrackOrigins && StoreOrigin)
4343         IRB.CreateStore(getOrigin(RetVal), getOriginPtrForRetval(IRB));
4344     }
4345   }
4346 
4347   void visitPHINode(PHINode &I) {
4348     IRBuilder<> IRB(&I);
4349     if (!PropagateShadow) {
4350       setShadow(&I, getCleanShadow(&I));
4351       setOrigin(&I, getCleanOrigin());
4352       return;
4353     }
4354 
4355     ShadowPHINodes.push_back(&I);
4356     setShadow(&I, IRB.CreatePHI(getShadowTy(&I), I.getNumIncomingValues(),
4357                                 "_msphi_s"));
4358     if (MS.TrackOrigins)
4359       setOrigin(
4360           &I, IRB.CreatePHI(MS.OriginTy, I.getNumIncomingValues(), "_msphi_o"));
4361   }
4362 
4363   Value *getLocalVarIdptr(AllocaInst &I) {
4364     ConstantInt *IntConst =
4365         ConstantInt::get(Type::getInt32Ty((*F.getParent()).getContext()), 0);
4366     return new GlobalVariable(*F.getParent(), IntConst->getType(),
4367                               /*isConstant=*/false, GlobalValue::PrivateLinkage,
4368                               IntConst);
4369   }
4370 
4371   Value *getLocalVarDescription(AllocaInst &I) {
4372     return createPrivateConstGlobalForString(*F.getParent(), I.getName());
4373   }
4374 
4375   void poisonAllocaUserspace(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
4376     if (PoisonStack && ClPoisonStackWithCall) {
4377       IRB.CreateCall(MS.MsanPoisonStackFn,
4378                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
4379     } else {
4380       Value *ShadowBase, *OriginBase;
4381       std::tie(ShadowBase, OriginBase) = getShadowOriginPtr(
4382           &I, IRB, IRB.getInt8Ty(), Align(1), /*isStore*/ true);
4383 
4384       Value *PoisonValue = IRB.getInt8(PoisonStack ? ClPoisonStackPattern : 0);
4385       IRB.CreateMemSet(ShadowBase, PoisonValue, Len, I.getAlign());
4386     }
4387 
4388     if (PoisonStack && MS.TrackOrigins) {
4389       Value *Idptr = getLocalVarIdptr(I);
4390       if (ClPrintStackNames) {
4391         Value *Descr = getLocalVarDescription(I);
4392         IRB.CreateCall(MS.MsanSetAllocaOriginWithDescriptionFn,
4393                        {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
4394                         IRB.CreatePointerCast(Idptr, IRB.getInt8PtrTy()),
4395                         IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
4396       } else {
4397         IRB.CreateCall(MS.MsanSetAllocaOriginNoDescriptionFn,
4398                        {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
4399                         IRB.CreatePointerCast(Idptr, IRB.getInt8PtrTy())});
4400       }
4401     }
4402   }
4403 
4404   void poisonAllocaKmsan(AllocaInst &I, IRBuilder<> &IRB, Value *Len) {
4405     Value *Descr = getLocalVarDescription(I);
4406     if (PoisonStack) {
4407       IRB.CreateCall(MS.MsanPoisonAllocaFn,
4408                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len,
4409                       IRB.CreatePointerCast(Descr, IRB.getInt8PtrTy())});
4410     } else {
4411       IRB.CreateCall(MS.MsanUnpoisonAllocaFn,
4412                      {IRB.CreatePointerCast(&I, IRB.getInt8PtrTy()), Len});
4413     }
4414   }
4415 
4416   void instrumentAlloca(AllocaInst &I, Instruction *InsPoint = nullptr) {
4417     if (!InsPoint)
4418       InsPoint = &I;
4419     NextNodeIRBuilder IRB(InsPoint);
4420     const DataLayout &DL = F.getParent()->getDataLayout();
4421     uint64_t TypeSize = DL.getTypeAllocSize(I.getAllocatedType());
4422     Value *Len = ConstantInt::get(MS.IntptrTy, TypeSize);
4423     if (I.isArrayAllocation())
4424       Len = IRB.CreateMul(Len,
4425                           IRB.CreateZExtOrTrunc(I.getArraySize(), MS.IntptrTy));
4426 
4427     if (MS.CompileKernel)
4428       poisonAllocaKmsan(I, IRB, Len);
4429     else
4430       poisonAllocaUserspace(I, IRB, Len);
4431   }
4432 
4433   void visitAllocaInst(AllocaInst &I) {
4434     setShadow(&I, getCleanShadow(&I));
4435     setOrigin(&I, getCleanOrigin());
4436     // We'll get to this alloca later unless it's poisoned at the corresponding
4437     // llvm.lifetime.start.
4438     AllocaSet.insert(&I);
4439   }
4440 
4441   void visitSelectInst(SelectInst &I) {
4442     IRBuilder<> IRB(&I);
4443     // a = select b, c, d
4444     Value *B = I.getCondition();
4445     Value *C = I.getTrueValue();
4446     Value *D = I.getFalseValue();
4447     Value *Sb = getShadow(B);
4448     Value *Sc = getShadow(C);
4449     Value *Sd = getShadow(D);
4450 
4451     // Result shadow if condition shadow is 0.
4452     Value *Sa0 = IRB.CreateSelect(B, Sc, Sd);
4453     Value *Sa1;
4454     if (I.getType()->isAggregateType()) {
4455       // To avoid "sign extending" i1 to an arbitrary aggregate type, we just do
4456       // an extra "select". This results in much more compact IR.
4457       // Sa = select Sb, poisoned, (select b, Sc, Sd)
4458       Sa1 = getPoisonedShadow(getShadowTy(I.getType()));
4459     } else {
4460       // Sa = select Sb, [ (c^d) | Sc | Sd ], [ b ? Sc : Sd ]
4461       // If Sb (condition is poisoned), look for bits in c and d that are equal
4462       // and both unpoisoned.
4463       // If !Sb (condition is unpoisoned), simply pick one of Sc and Sd.
4464 
4465       // Cast arguments to shadow-compatible type.
4466       C = CreateAppToShadowCast(IRB, C);
4467       D = CreateAppToShadowCast(IRB, D);
4468 
4469       // Result shadow if condition shadow is 1.
4470       Sa1 = IRB.CreateOr({IRB.CreateXor(C, D), Sc, Sd});
4471     }
4472     Value *Sa = IRB.CreateSelect(Sb, Sa1, Sa0, "_msprop_select");
4473     setShadow(&I, Sa);
4474     if (MS.TrackOrigins) {
4475       // Origins are always i32, so any vector conditions must be flattened.
4476       // FIXME: consider tracking vector origins for app vectors?
4477       if (B->getType()->isVectorTy()) {
4478         B = convertToBool(B, IRB);
4479         Sb = convertToBool(Sb, IRB);
4480       }
4481       // a = select b, c, d
4482       // Oa = Sb ? Ob : (b ? Oc : Od)
4483       setOrigin(
4484           &I, IRB.CreateSelect(Sb, getOrigin(I.getCondition()),
4485                                IRB.CreateSelect(B, getOrigin(I.getTrueValue()),
4486                                                 getOrigin(I.getFalseValue()))));
4487     }
4488   }
4489 
4490   void visitLandingPadInst(LandingPadInst &I) {
4491     // Do nothing.
4492     // See https://github.com/google/sanitizers/issues/504
4493     setShadow(&I, getCleanShadow(&I));
4494     setOrigin(&I, getCleanOrigin());
4495   }
4496 
4497   void visitCatchSwitchInst(CatchSwitchInst &I) {
4498     setShadow(&I, getCleanShadow(&I));
4499     setOrigin(&I, getCleanOrigin());
4500   }
4501 
4502   void visitFuncletPadInst(FuncletPadInst &I) {
4503     setShadow(&I, getCleanShadow(&I));
4504     setOrigin(&I, getCleanOrigin());
4505   }
4506 
4507   void visitGetElementPtrInst(GetElementPtrInst &I) { handleShadowOr(I); }
4508 
4509   void visitExtractValueInst(ExtractValueInst &I) {
4510     IRBuilder<> IRB(&I);
4511     Value *Agg = I.getAggregateOperand();
4512     LLVM_DEBUG(dbgs() << "ExtractValue:  " << I << "\n");
4513     Value *AggShadow = getShadow(Agg);
4514     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
4515     Value *ResShadow = IRB.CreateExtractValue(AggShadow, I.getIndices());
4516     LLVM_DEBUG(dbgs() << "   ResShadow:  " << *ResShadow << "\n");
4517     setShadow(&I, ResShadow);
4518     setOriginForNaryOp(I);
4519   }
4520 
4521   void visitInsertValueInst(InsertValueInst &I) {
4522     IRBuilder<> IRB(&I);
4523     LLVM_DEBUG(dbgs() << "InsertValue:  " << I << "\n");
4524     Value *AggShadow = getShadow(I.getAggregateOperand());
4525     Value *InsShadow = getShadow(I.getInsertedValueOperand());
4526     LLVM_DEBUG(dbgs() << "   AggShadow:  " << *AggShadow << "\n");
4527     LLVM_DEBUG(dbgs() << "   InsShadow:  " << *InsShadow << "\n");
4528     Value *Res = IRB.CreateInsertValue(AggShadow, InsShadow, I.getIndices());
4529     LLVM_DEBUG(dbgs() << "   Res:        " << *Res << "\n");
4530     setShadow(&I, Res);
4531     setOriginForNaryOp(I);
4532   }
4533 
4534   void dumpInst(Instruction &I) {
4535     if (CallInst *CI = dyn_cast<CallInst>(&I)) {
4536       errs() << "ZZZ call " << CI->getCalledFunction()->getName() << "\n";
4537     } else {
4538       errs() << "ZZZ " << I.getOpcodeName() << "\n";
4539     }
4540     errs() << "QQQ " << I << "\n";
4541   }
4542 
4543   void visitResumeInst(ResumeInst &I) {
4544     LLVM_DEBUG(dbgs() << "Resume: " << I << "\n");
4545     // Nothing to do here.
4546   }
4547 
4548   void visitCleanupReturnInst(CleanupReturnInst &CRI) {
4549     LLVM_DEBUG(dbgs() << "CleanupReturn: " << CRI << "\n");
4550     // Nothing to do here.
4551   }
4552 
4553   void visitCatchReturnInst(CatchReturnInst &CRI) {
4554     LLVM_DEBUG(dbgs() << "CatchReturn: " << CRI << "\n");
4555     // Nothing to do here.
4556   }
4557 
4558   void instrumentAsmArgument(Value *Operand, Type *ElemTy, Instruction &I,
4559                              IRBuilder<> &IRB, const DataLayout &DL,
4560                              bool isOutput) {
4561     // For each assembly argument, we check its value for being initialized.
4562     // If the argument is a pointer, we assume it points to a single element
4563     // of the corresponding type (or to a 8-byte word, if the type is unsized).
4564     // Each such pointer is instrumented with a call to the runtime library.
4565     Type *OpType = Operand->getType();
4566     // Check the operand value itself.
4567     insertShadowCheck(Operand, &I);
4568     if (!OpType->isPointerTy() || !isOutput) {
4569       assert(!isOutput);
4570       return;
4571     }
4572     if (!ElemTy->isSized())
4573       return;
4574     Value *Ptr = IRB.CreatePointerCast(Operand, IRB.getInt8PtrTy());
4575     Value *SizeVal =
4576       IRB.CreateTypeSize(MS.IntptrTy, DL.getTypeStoreSize(ElemTy));
4577     IRB.CreateCall(MS.MsanInstrumentAsmStoreFn, {Ptr, SizeVal});
4578   }
4579 
4580   /// Get the number of output arguments returned by pointers.
4581   int getNumOutputArgs(InlineAsm *IA, CallBase *CB) {
4582     int NumRetOutputs = 0;
4583     int NumOutputs = 0;
4584     Type *RetTy = cast<Value>(CB)->getType();
4585     if (!RetTy->isVoidTy()) {
4586       // Register outputs are returned via the CallInst return value.
4587       auto *ST = dyn_cast<StructType>(RetTy);
4588       if (ST)
4589         NumRetOutputs = ST->getNumElements();
4590       else
4591         NumRetOutputs = 1;
4592     }
4593     InlineAsm::ConstraintInfoVector Constraints = IA->ParseConstraints();
4594     for (const InlineAsm::ConstraintInfo &Info : Constraints) {
4595       switch (Info.Type) {
4596       case InlineAsm::isOutput:
4597         NumOutputs++;
4598         break;
4599       default:
4600         break;
4601       }
4602     }
4603     return NumOutputs - NumRetOutputs;
4604   }
4605 
4606   void visitAsmInstruction(Instruction &I) {
4607     // Conservative inline assembly handling: check for poisoned shadow of
4608     // asm() arguments, then unpoison the result and all the memory locations
4609     // pointed to by those arguments.
4610     // An inline asm() statement in C++ contains lists of input and output
4611     // arguments used by the assembly code. These are mapped to operands of the
4612     // CallInst as follows:
4613     //  - nR register outputs ("=r) are returned by value in a single structure
4614     //  (SSA value of the CallInst);
4615     //  - nO other outputs ("=m" and others) are returned by pointer as first
4616     // nO operands of the CallInst;
4617     //  - nI inputs ("r", "m" and others) are passed to CallInst as the
4618     // remaining nI operands.
4619     // The total number of asm() arguments in the source is nR+nO+nI, and the
4620     // corresponding CallInst has nO+nI+1 operands (the last operand is the
4621     // function to be called).
4622     const DataLayout &DL = F.getParent()->getDataLayout();
4623     CallBase *CB = cast<CallBase>(&I);
4624     IRBuilder<> IRB(&I);
4625     InlineAsm *IA = cast<InlineAsm>(CB->getCalledOperand());
4626     int OutputArgs = getNumOutputArgs(IA, CB);
4627     // The last operand of a CallInst is the function itself.
4628     int NumOperands = CB->getNumOperands() - 1;
4629 
4630     // Check input arguments. Doing so before unpoisoning output arguments, so
4631     // that we won't overwrite uninit values before checking them.
4632     for (int i = OutputArgs; i < NumOperands; i++) {
4633       Value *Operand = CB->getOperand(i);
4634       instrumentAsmArgument(Operand, CB->getParamElementType(i), I, IRB, DL,
4635                             /*isOutput*/ false);
4636     }
4637     // Unpoison output arguments. This must happen before the actual InlineAsm
4638     // call, so that the shadow for memory published in the asm() statement
4639     // remains valid.
4640     for (int i = 0; i < OutputArgs; i++) {
4641       Value *Operand = CB->getOperand(i);
4642       instrumentAsmArgument(Operand, CB->getParamElementType(i), I, IRB, DL,
4643                             /*isOutput*/ true);
4644     }
4645 
4646     setShadow(&I, getCleanShadow(&I));
4647     setOrigin(&I, getCleanOrigin());
4648   }
4649 
4650   void visitFreezeInst(FreezeInst &I) {
4651     // Freeze always returns a fully defined value.
4652     setShadow(&I, getCleanShadow(&I));
4653     setOrigin(&I, getCleanOrigin());
4654   }
4655 
4656   void visitInstruction(Instruction &I) {
4657     // Everything else: stop propagating and check for poisoned shadow.
4658     if (ClDumpStrictInstructions)
4659       dumpInst(I);
4660     LLVM_DEBUG(dbgs() << "DEFAULT: " << I << "\n");
4661     for (size_t i = 0, n = I.getNumOperands(); i < n; i++) {
4662       Value *Operand = I.getOperand(i);
4663       if (Operand->getType()->isSized())
4664         insertShadowCheck(Operand, &I);
4665     }
4666     setShadow(&I, getCleanShadow(&I));
4667     setOrigin(&I, getCleanOrigin());
4668   }
4669 };
4670 
4671 /// AMD64-specific implementation of VarArgHelper.
4672 struct VarArgAMD64Helper : public VarArgHelper {
4673   // An unfortunate workaround for asymmetric lowering of va_arg stuff.
4674   // See a comment in visitCallBase for more details.
4675   static const unsigned AMD64GpEndOffset = 48; // AMD64 ABI Draft 0.99.6 p3.5.7
4676   static const unsigned AMD64FpEndOffsetSSE = 176;
4677   // If SSE is disabled, fp_offset in va_list is zero.
4678   static const unsigned AMD64FpEndOffsetNoSSE = AMD64GpEndOffset;
4679 
4680   unsigned AMD64FpEndOffset;
4681   Function &F;
4682   MemorySanitizer &MS;
4683   MemorySanitizerVisitor &MSV;
4684   AllocaInst *VAArgTLSCopy = nullptr;
4685   AllocaInst *VAArgTLSOriginCopy = nullptr;
4686   Value *VAArgOverflowSize = nullptr;
4687 
4688   SmallVector<CallInst *, 16> VAStartInstrumentationList;
4689 
4690   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
4691 
4692   VarArgAMD64Helper(Function &F, MemorySanitizer &MS,
4693                     MemorySanitizerVisitor &MSV)
4694       : F(F), MS(MS), MSV(MSV) {
4695     AMD64FpEndOffset = AMD64FpEndOffsetSSE;
4696     for (const auto &Attr : F.getAttributes().getFnAttrs()) {
4697       if (Attr.isStringAttribute() &&
4698           (Attr.getKindAsString() == "target-features")) {
4699         if (Attr.getValueAsString().contains("-sse"))
4700           AMD64FpEndOffset = AMD64FpEndOffsetNoSSE;
4701         break;
4702       }
4703     }
4704   }
4705 
4706   ArgKind classifyArgument(Value *arg) {
4707     // A very rough approximation of X86_64 argument classification rules.
4708     Type *T = arg->getType();
4709     if (T->isFPOrFPVectorTy() || T->isX86_MMXTy())
4710       return AK_FloatingPoint;
4711     if (T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64)
4712       return AK_GeneralPurpose;
4713     if (T->isPointerTy())
4714       return AK_GeneralPurpose;
4715     return AK_Memory;
4716   }
4717 
4718   // For VarArg functions, store the argument shadow in an ABI-specific format
4719   // that corresponds to va_list layout.
4720   // We do this because Clang lowers va_arg in the frontend, and this pass
4721   // only sees the low level code that deals with va_list internals.
4722   // A much easier alternative (provided that Clang emits va_arg instructions)
4723   // would have been to associate each live instance of va_list with a copy of
4724   // MSanParamTLS, and extract shadow on va_arg() call in the argument list
4725   // order.
4726   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4727     unsigned GpOffset = 0;
4728     unsigned FpOffset = AMD64GpEndOffset;
4729     unsigned OverflowOffset = AMD64FpEndOffset;
4730     const DataLayout &DL = F.getParent()->getDataLayout();
4731     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
4732       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
4733       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
4734       if (IsByVal) {
4735         // ByVal arguments always go to the overflow area.
4736         // Fixed arguments passed through the overflow area will be stepped
4737         // over by va_start, so don't count them towards the offset.
4738         if (IsFixed)
4739           continue;
4740         assert(A->getType()->isPointerTy());
4741         Type *RealTy = CB.getParamByValType(ArgNo);
4742         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
4743         Value *ShadowBase = getShadowPtrForVAArgument(
4744             RealTy, IRB, OverflowOffset, alignTo(ArgSize, 8));
4745         Value *OriginBase = nullptr;
4746         if (MS.TrackOrigins)
4747           OriginBase = getOriginPtrForVAArgument(RealTy, IRB, OverflowOffset);
4748         OverflowOffset += alignTo(ArgSize, 8);
4749         if (!ShadowBase)
4750           continue;
4751         Value *ShadowPtr, *OriginPtr;
4752         std::tie(ShadowPtr, OriginPtr) =
4753             MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(), kShadowTLSAlignment,
4754                                    /*isStore*/ false);
4755 
4756         IRB.CreateMemCpy(ShadowBase, kShadowTLSAlignment, ShadowPtr,
4757                          kShadowTLSAlignment, ArgSize);
4758         if (MS.TrackOrigins)
4759           IRB.CreateMemCpy(OriginBase, kShadowTLSAlignment, OriginPtr,
4760                            kShadowTLSAlignment, ArgSize);
4761       } else {
4762         ArgKind AK = classifyArgument(A);
4763         if (AK == AK_GeneralPurpose && GpOffset >= AMD64GpEndOffset)
4764           AK = AK_Memory;
4765         if (AK == AK_FloatingPoint && FpOffset >= AMD64FpEndOffset)
4766           AK = AK_Memory;
4767         Value *ShadowBase, *OriginBase = nullptr;
4768         switch (AK) {
4769         case AK_GeneralPurpose:
4770           ShadowBase =
4771               getShadowPtrForVAArgument(A->getType(), IRB, GpOffset, 8);
4772           if (MS.TrackOrigins)
4773             OriginBase = getOriginPtrForVAArgument(A->getType(), IRB, GpOffset);
4774           GpOffset += 8;
4775           break;
4776         case AK_FloatingPoint:
4777           ShadowBase =
4778               getShadowPtrForVAArgument(A->getType(), IRB, FpOffset, 16);
4779           if (MS.TrackOrigins)
4780             OriginBase = getOriginPtrForVAArgument(A->getType(), IRB, FpOffset);
4781           FpOffset += 16;
4782           break;
4783         case AK_Memory:
4784           if (IsFixed)
4785             continue;
4786           uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4787           ShadowBase =
4788               getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset, 8);
4789           if (MS.TrackOrigins)
4790             OriginBase =
4791                 getOriginPtrForVAArgument(A->getType(), IRB, OverflowOffset);
4792           OverflowOffset += alignTo(ArgSize, 8);
4793         }
4794         // Take fixed arguments into account for GpOffset and FpOffset,
4795         // but don't actually store shadows for them.
4796         // TODO(glider): don't call get*PtrForVAArgument() for them.
4797         if (IsFixed)
4798           continue;
4799         if (!ShadowBase)
4800           continue;
4801         Value *Shadow = MSV.getShadow(A);
4802         IRB.CreateAlignedStore(Shadow, ShadowBase, kShadowTLSAlignment);
4803         if (MS.TrackOrigins) {
4804           Value *Origin = MSV.getOrigin(A);
4805           TypeSize StoreSize = DL.getTypeStoreSize(Shadow->getType());
4806           MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
4807                           std::max(kShadowTLSAlignment, kMinOriginAlignment));
4808         }
4809       }
4810     }
4811     Constant *OverflowSize =
4812         ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AMD64FpEndOffset);
4813     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
4814   }
4815 
4816   /// Compute the shadow address for a given va_arg.
4817   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4818                                    unsigned ArgOffset, unsigned ArgSize) {
4819     // Make sure we don't overflow __msan_va_arg_tls.
4820     if (ArgOffset + ArgSize > kParamTLSSize)
4821       return nullptr;
4822     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4823     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4824     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4825                               "_msarg_va_s");
4826   }
4827 
4828   /// Compute the origin address for a given va_arg.
4829   Value *getOriginPtrForVAArgument(Type *Ty, IRBuilder<> &IRB, int ArgOffset) {
4830     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
4831     // getOriginPtrForVAArgument() is always called after
4832     // getShadowPtrForVAArgument(), so __msan_va_arg_origin_tls can never
4833     // overflow.
4834     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4835     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
4836                               "_msarg_va_o");
4837   }
4838 
4839   void unpoisonVAListTagForInst(IntrinsicInst &I) {
4840     IRBuilder<> IRB(&I);
4841     Value *VAListTag = I.getArgOperand(0);
4842     Value *ShadowPtr, *OriginPtr;
4843     const Align Alignment = Align(8);
4844     std::tie(ShadowPtr, OriginPtr) =
4845         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
4846                                /*isStore*/ true);
4847 
4848     // Unpoison the whole __va_list_tag.
4849     // FIXME: magic ABI constants.
4850     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
4851                      /* size */ 24, Alignment, false);
4852     // We shouldn't need to zero out the origins, as they're only checked for
4853     // nonzero shadow.
4854   }
4855 
4856   void visitVAStartInst(VAStartInst &I) override {
4857     if (F.getCallingConv() == CallingConv::Win64)
4858       return;
4859     VAStartInstrumentationList.push_back(&I);
4860     unpoisonVAListTagForInst(I);
4861   }
4862 
4863   void visitVACopyInst(VACopyInst &I) override {
4864     if (F.getCallingConv() == CallingConv::Win64)
4865       return;
4866     unpoisonVAListTagForInst(I);
4867   }
4868 
4869   void finalizeInstrumentation() override {
4870     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
4871            "finalizeInstrumentation called twice");
4872     if (!VAStartInstrumentationList.empty()) {
4873       // If there is a va_start in this function, make a backup copy of
4874       // va_arg_tls somewhere in the function entry block.
4875       IRBuilder<> IRB(MSV.FnPrologueEnd);
4876       VAArgOverflowSize =
4877           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
4878       Value *CopySize = IRB.CreateAdd(
4879           ConstantInt::get(MS.IntptrTy, AMD64FpEndOffset), VAArgOverflowSize);
4880       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4881       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
4882       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
4883                        CopySize, kShadowTLSAlignment, false);
4884 
4885       Value *SrcSize = IRB.CreateBinaryIntrinsic(
4886           Intrinsic::umin, CopySize,
4887           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
4888       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
4889                        kShadowTLSAlignment, SrcSize);
4890       if (MS.TrackOrigins) {
4891         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
4892         VAArgTLSOriginCopy->setAlignment(kShadowTLSAlignment);
4893         IRB.CreateMemCpy(VAArgTLSOriginCopy, kShadowTLSAlignment,
4894                          MS.VAArgOriginTLS, kShadowTLSAlignment, SrcSize);
4895       }
4896     }
4897 
4898     // Instrument va_start.
4899     // Copy va_list shadow from the backup copy of the TLS contents.
4900     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
4901       CallInst *OrigInst = VAStartInstrumentationList[i];
4902       NextNodeIRBuilder IRB(OrigInst);
4903       Value *VAListTag = OrigInst->getArgOperand(0);
4904 
4905       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4906       Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
4907           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4908                         ConstantInt::get(MS.IntptrTy, 16)),
4909           PointerType::get(RegSaveAreaPtrTy, 0));
4910       Value *RegSaveAreaPtr =
4911           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
4912       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
4913       const Align Alignment = Align(16);
4914       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
4915           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
4916                                  Alignment, /*isStore*/ true);
4917       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
4918                        AMD64FpEndOffset);
4919       if (MS.TrackOrigins)
4920         IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
4921                          Alignment, AMD64FpEndOffset);
4922       Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
4923       Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
4924           IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
4925                         ConstantInt::get(MS.IntptrTy, 8)),
4926           PointerType::get(OverflowArgAreaPtrTy, 0));
4927       Value *OverflowArgAreaPtr =
4928           IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
4929       Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
4930       std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
4931           MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
4932                                  Alignment, /*isStore*/ true);
4933       Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
4934                                              AMD64FpEndOffset);
4935       IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
4936                        VAArgOverflowSize);
4937       if (MS.TrackOrigins) {
4938         SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
4939                                         AMD64FpEndOffset);
4940         IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
4941                          VAArgOverflowSize);
4942       }
4943     }
4944   }
4945 };
4946 
4947 /// MIPS64-specific implementation of VarArgHelper.
4948 struct VarArgMIPS64Helper : public VarArgHelper {
4949   Function &F;
4950   MemorySanitizer &MS;
4951   MemorySanitizerVisitor &MSV;
4952   AllocaInst *VAArgTLSCopy = nullptr;
4953   Value *VAArgSize = nullptr;
4954 
4955   SmallVector<CallInst *, 16> VAStartInstrumentationList;
4956 
4957   VarArgMIPS64Helper(Function &F, MemorySanitizer &MS,
4958                      MemorySanitizerVisitor &MSV)
4959       : F(F), MS(MS), MSV(MSV) {}
4960 
4961   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
4962     unsigned VAArgOffset = 0;
4963     const DataLayout &DL = F.getParent()->getDataLayout();
4964     for (Value *A :
4965          llvm::drop_begin(CB.args(), CB.getFunctionType()->getNumParams())) {
4966       Triple TargetTriple(F.getParent()->getTargetTriple());
4967       Value *Base;
4968       uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
4969       if (TargetTriple.getArch() == Triple::mips64) {
4970         // Adjusting the shadow for argument with size < 8 to match the
4971         // placement of bits in big endian system
4972         if (ArgSize < 8)
4973           VAArgOffset += (8 - ArgSize);
4974       }
4975       Base = getShadowPtrForVAArgument(A->getType(), IRB, VAArgOffset, ArgSize);
4976       VAArgOffset += ArgSize;
4977       VAArgOffset = alignTo(VAArgOffset, 8);
4978       if (!Base)
4979         continue;
4980       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
4981     }
4982 
4983     Constant *TotalVAArgSize = ConstantInt::get(IRB.getInt64Ty(), VAArgOffset);
4984     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
4985     // a new class member i.e. it is the total size of all VarArgs.
4986     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
4987   }
4988 
4989   /// Compute the shadow address for a given va_arg.
4990   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
4991                                    unsigned ArgOffset, unsigned ArgSize) {
4992     // Make sure we don't overflow __msan_va_arg_tls.
4993     if (ArgOffset + ArgSize > kParamTLSSize)
4994       return nullptr;
4995     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
4996     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
4997     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
4998                               "_msarg");
4999   }
5000 
5001   void visitVAStartInst(VAStartInst &I) override {
5002     IRBuilder<> IRB(&I);
5003     VAStartInstrumentationList.push_back(&I);
5004     Value *VAListTag = I.getArgOperand(0);
5005     Value *ShadowPtr, *OriginPtr;
5006     const Align Alignment = Align(8);
5007     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
5008         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
5009     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5010                      /* size */ 8, Alignment, false);
5011   }
5012 
5013   void visitVACopyInst(VACopyInst &I) override {
5014     IRBuilder<> IRB(&I);
5015     VAStartInstrumentationList.push_back(&I);
5016     Value *VAListTag = I.getArgOperand(0);
5017     Value *ShadowPtr, *OriginPtr;
5018     const Align Alignment = Align(8);
5019     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
5020         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
5021     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5022                      /* size */ 8, Alignment, false);
5023   }
5024 
5025   void finalizeInstrumentation() override {
5026     assert(!VAArgSize && !VAArgTLSCopy &&
5027            "finalizeInstrumentation called twice");
5028     IRBuilder<> IRB(MSV.FnPrologueEnd);
5029     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5030     Value *CopySize =
5031         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), VAArgSize);
5032 
5033     if (!VAStartInstrumentationList.empty()) {
5034       // If there is a va_start in this function, make a backup copy of
5035       // va_arg_tls somewhere in the function entry block.
5036       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5037       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
5038       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
5039                        CopySize, kShadowTLSAlignment, false);
5040 
5041       Value *SrcSize = IRB.CreateBinaryIntrinsic(
5042           Intrinsic::umin, CopySize,
5043           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
5044       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
5045                        kShadowTLSAlignment, SrcSize);
5046     }
5047 
5048     // Instrument va_start.
5049     // Copy va_list shadow from the backup copy of the TLS contents.
5050     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
5051       CallInst *OrigInst = VAStartInstrumentationList[i];
5052       NextNodeIRBuilder IRB(OrigInst);
5053       Value *VAListTag = OrigInst->getArgOperand(0);
5054       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5055       Value *RegSaveAreaPtrPtr =
5056           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5057                              PointerType::get(RegSaveAreaPtrTy, 0));
5058       Value *RegSaveAreaPtr =
5059           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
5060       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
5061       const Align Alignment = Align(8);
5062       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5063           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5064                                  Alignment, /*isStore*/ true);
5065       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5066                        CopySize);
5067     }
5068   }
5069 };
5070 
5071 /// AArch64-specific implementation of VarArgHelper.
5072 struct VarArgAArch64Helper : public VarArgHelper {
5073   static const unsigned kAArch64GrArgSize = 64;
5074   static const unsigned kAArch64VrArgSize = 128;
5075 
5076   static const unsigned AArch64GrBegOffset = 0;
5077   static const unsigned AArch64GrEndOffset = kAArch64GrArgSize;
5078   // Make VR space aligned to 16 bytes.
5079   static const unsigned AArch64VrBegOffset = AArch64GrEndOffset;
5080   static const unsigned AArch64VrEndOffset =
5081       AArch64VrBegOffset + kAArch64VrArgSize;
5082   static const unsigned AArch64VAEndOffset = AArch64VrEndOffset;
5083 
5084   Function &F;
5085   MemorySanitizer &MS;
5086   MemorySanitizerVisitor &MSV;
5087   AllocaInst *VAArgTLSCopy = nullptr;
5088   Value *VAArgOverflowSize = nullptr;
5089 
5090   SmallVector<CallInst *, 16> VAStartInstrumentationList;
5091 
5092   enum ArgKind { AK_GeneralPurpose, AK_FloatingPoint, AK_Memory };
5093 
5094   VarArgAArch64Helper(Function &F, MemorySanitizer &MS,
5095                       MemorySanitizerVisitor &MSV)
5096       : F(F), MS(MS), MSV(MSV) {}
5097 
5098   ArgKind classifyArgument(Value *arg) {
5099     Type *T = arg->getType();
5100     if (T->isFPOrFPVectorTy())
5101       return AK_FloatingPoint;
5102     if ((T->isIntegerTy() && T->getPrimitiveSizeInBits() <= 64) ||
5103         (T->isPointerTy()))
5104       return AK_GeneralPurpose;
5105     return AK_Memory;
5106   }
5107 
5108   // The instrumentation stores the argument shadow in a non ABI-specific
5109   // format because it does not know which argument is named (since Clang,
5110   // like x86_64 case, lowers the va_args in the frontend and this pass only
5111   // sees the low level code that deals with va_list internals).
5112   // The first seven GR registers are saved in the first 56 bytes of the
5113   // va_arg tls arra, followers by the first 8 FP/SIMD registers, and then
5114   // the remaining arguments.
5115   // Using constant offset within the va_arg TLS array allows fast copy
5116   // in the finalize instrumentation.
5117   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
5118     unsigned GrOffset = AArch64GrBegOffset;
5119     unsigned VrOffset = AArch64VrBegOffset;
5120     unsigned OverflowOffset = AArch64VAEndOffset;
5121 
5122     const DataLayout &DL = F.getParent()->getDataLayout();
5123     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
5124       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
5125       ArgKind AK = classifyArgument(A);
5126       if (AK == AK_GeneralPurpose && GrOffset >= AArch64GrEndOffset)
5127         AK = AK_Memory;
5128       if (AK == AK_FloatingPoint && VrOffset >= AArch64VrEndOffset)
5129         AK = AK_Memory;
5130       Value *Base;
5131       switch (AK) {
5132       case AK_GeneralPurpose:
5133         Base = getShadowPtrForVAArgument(A->getType(), IRB, GrOffset, 8);
5134         GrOffset += 8;
5135         break;
5136       case AK_FloatingPoint:
5137         Base = getShadowPtrForVAArgument(A->getType(), IRB, VrOffset, 8);
5138         VrOffset += 16;
5139         break;
5140       case AK_Memory:
5141         // Don't count fixed arguments in the overflow area - va_start will
5142         // skip right over them.
5143         if (IsFixed)
5144           continue;
5145         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
5146         Base = getShadowPtrForVAArgument(A->getType(), IRB, OverflowOffset,
5147                                          alignTo(ArgSize, 8));
5148         OverflowOffset += alignTo(ArgSize, 8);
5149         break;
5150       }
5151       // Count Gp/Vr fixed arguments to their respective offsets, but don't
5152       // bother to actually store a shadow.
5153       if (IsFixed)
5154         continue;
5155       if (!Base)
5156         continue;
5157       IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
5158     }
5159     Constant *OverflowSize =
5160         ConstantInt::get(IRB.getInt64Ty(), OverflowOffset - AArch64VAEndOffset);
5161     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
5162   }
5163 
5164   /// Compute the shadow address for a given va_arg.
5165   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
5166                                    unsigned ArgOffset, unsigned ArgSize) {
5167     // Make sure we don't overflow __msan_va_arg_tls.
5168     if (ArgOffset + ArgSize > kParamTLSSize)
5169       return nullptr;
5170     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
5171     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5172     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
5173                               "_msarg");
5174   }
5175 
5176   void visitVAStartInst(VAStartInst &I) override {
5177     IRBuilder<> IRB(&I);
5178     VAStartInstrumentationList.push_back(&I);
5179     Value *VAListTag = I.getArgOperand(0);
5180     Value *ShadowPtr, *OriginPtr;
5181     const Align Alignment = Align(8);
5182     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
5183         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
5184     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5185                      /* size */ 32, Alignment, false);
5186   }
5187 
5188   void visitVACopyInst(VACopyInst &I) override {
5189     IRBuilder<> IRB(&I);
5190     VAStartInstrumentationList.push_back(&I);
5191     Value *VAListTag = I.getArgOperand(0);
5192     Value *ShadowPtr, *OriginPtr;
5193     const Align Alignment = Align(8);
5194     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
5195         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
5196     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5197                      /* size */ 32, Alignment, false);
5198   }
5199 
5200   // Retrieve a va_list field of 'void*' size.
5201   Value *getVAField64(IRBuilder<> &IRB, Value *VAListTag, int offset) {
5202     Value *SaveAreaPtrPtr = IRB.CreateIntToPtr(
5203         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5204                       ConstantInt::get(MS.IntptrTy, offset)),
5205         Type::getInt64PtrTy(*MS.C));
5206     return IRB.CreateLoad(Type::getInt64Ty(*MS.C), SaveAreaPtrPtr);
5207   }
5208 
5209   // Retrieve a va_list field of 'int' size.
5210   Value *getVAField32(IRBuilder<> &IRB, Value *VAListTag, int offset) {
5211     Value *SaveAreaPtr = IRB.CreateIntToPtr(
5212         IRB.CreateAdd(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5213                       ConstantInt::get(MS.IntptrTy, offset)),
5214         Type::getInt32PtrTy(*MS.C));
5215     Value *SaveArea32 = IRB.CreateLoad(IRB.getInt32Ty(), SaveAreaPtr);
5216     return IRB.CreateSExt(SaveArea32, MS.IntptrTy);
5217   }
5218 
5219   void finalizeInstrumentation() override {
5220     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
5221            "finalizeInstrumentation called twice");
5222     if (!VAStartInstrumentationList.empty()) {
5223       // If there is a va_start in this function, make a backup copy of
5224       // va_arg_tls somewhere in the function entry block.
5225       IRBuilder<> IRB(MSV.FnPrologueEnd);
5226       VAArgOverflowSize =
5227           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5228       Value *CopySize = IRB.CreateAdd(
5229           ConstantInt::get(MS.IntptrTy, AArch64VAEndOffset), VAArgOverflowSize);
5230       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5231       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
5232       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
5233                        CopySize, kShadowTLSAlignment, false);
5234 
5235       Value *SrcSize = IRB.CreateBinaryIntrinsic(
5236           Intrinsic::umin, CopySize,
5237           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
5238       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
5239                        kShadowTLSAlignment, SrcSize);
5240     }
5241 
5242     Value *GrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64GrArgSize);
5243     Value *VrArgSize = ConstantInt::get(MS.IntptrTy, kAArch64VrArgSize);
5244 
5245     // Instrument va_start, copy va_list shadow from the backup copy of
5246     // the TLS contents.
5247     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
5248       CallInst *OrigInst = VAStartInstrumentationList[i];
5249       NextNodeIRBuilder IRB(OrigInst);
5250 
5251       Value *VAListTag = OrigInst->getArgOperand(0);
5252 
5253       // The variadic ABI for AArch64 creates two areas to save the incoming
5254       // argument registers (one for 64-bit general register xn-x7 and another
5255       // for 128-bit FP/SIMD vn-v7).
5256       // We need then to propagate the shadow arguments on both regions
5257       // 'va::__gr_top + va::__gr_offs' and 'va::__vr_top + va::__vr_offs'.
5258       // The remaining arguments are saved on shadow for 'va::stack'.
5259       // One caveat is it requires only to propagate the non-named arguments,
5260       // however on the call site instrumentation 'all' the arguments are
5261       // saved. So to copy the shadow values from the va_arg TLS array
5262       // we need to adjust the offset for both GR and VR fields based on
5263       // the __{gr,vr}_offs value (since they are stores based on incoming
5264       // named arguments).
5265 
5266       // Read the stack pointer from the va_list.
5267       Value *StackSaveAreaPtr = getVAField64(IRB, VAListTag, 0);
5268 
5269       // Read both the __gr_top and __gr_off and add them up.
5270       Value *GrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 8);
5271       Value *GrOffSaveArea = getVAField32(IRB, VAListTag, 24);
5272 
5273       Value *GrRegSaveAreaPtr = IRB.CreateAdd(GrTopSaveAreaPtr, GrOffSaveArea);
5274 
5275       // Read both the __vr_top and __vr_off and add them up.
5276       Value *VrTopSaveAreaPtr = getVAField64(IRB, VAListTag, 16);
5277       Value *VrOffSaveArea = getVAField32(IRB, VAListTag, 28);
5278 
5279       Value *VrRegSaveAreaPtr = IRB.CreateAdd(VrTopSaveAreaPtr, VrOffSaveArea);
5280 
5281       // It does not know how many named arguments is being used and, on the
5282       // callsite all the arguments were saved.  Since __gr_off is defined as
5283       // '0 - ((8 - named_gr) * 8)', the idea is to just propagate the variadic
5284       // argument by ignoring the bytes of shadow from named arguments.
5285       Value *GrRegSaveAreaShadowPtrOff =
5286           IRB.CreateAdd(GrArgSize, GrOffSaveArea);
5287 
5288       Value *GrRegSaveAreaShadowPtr =
5289           MSV.getShadowOriginPtr(GrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5290                                  Align(8), /*isStore*/ true)
5291               .first;
5292 
5293       Value *GrSrcPtr = IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
5294                                               GrRegSaveAreaShadowPtrOff);
5295       Value *GrCopySize = IRB.CreateSub(GrArgSize, GrRegSaveAreaShadowPtrOff);
5296 
5297       IRB.CreateMemCpy(GrRegSaveAreaShadowPtr, Align(8), GrSrcPtr, Align(8),
5298                        GrCopySize);
5299 
5300       // Again, but for FP/SIMD values.
5301       Value *VrRegSaveAreaShadowPtrOff =
5302           IRB.CreateAdd(VrArgSize, VrOffSaveArea);
5303 
5304       Value *VrRegSaveAreaShadowPtr =
5305           MSV.getShadowOriginPtr(VrRegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5306                                  Align(8), /*isStore*/ true)
5307               .first;
5308 
5309       Value *VrSrcPtr = IRB.CreateInBoundsGEP(
5310           IRB.getInt8Ty(),
5311           IRB.CreateInBoundsGEP(IRB.getInt8Ty(), VAArgTLSCopy,
5312                                 IRB.getInt32(AArch64VrBegOffset)),
5313           VrRegSaveAreaShadowPtrOff);
5314       Value *VrCopySize = IRB.CreateSub(VrArgSize, VrRegSaveAreaShadowPtrOff);
5315 
5316       IRB.CreateMemCpy(VrRegSaveAreaShadowPtr, Align(8), VrSrcPtr, Align(8),
5317                        VrCopySize);
5318 
5319       // And finally for remaining arguments.
5320       Value *StackSaveAreaShadowPtr =
5321           MSV.getShadowOriginPtr(StackSaveAreaPtr, IRB, IRB.getInt8Ty(),
5322                                  Align(16), /*isStore*/ true)
5323               .first;
5324 
5325       Value *StackSrcPtr = IRB.CreateInBoundsGEP(
5326           IRB.getInt8Ty(), VAArgTLSCopy, IRB.getInt32(AArch64VAEndOffset));
5327 
5328       IRB.CreateMemCpy(StackSaveAreaShadowPtr, Align(16), StackSrcPtr,
5329                        Align(16), VAArgOverflowSize);
5330     }
5331   }
5332 };
5333 
5334 /// PowerPC64-specific implementation of VarArgHelper.
5335 struct VarArgPowerPC64Helper : public VarArgHelper {
5336   Function &F;
5337   MemorySanitizer &MS;
5338   MemorySanitizerVisitor &MSV;
5339   AllocaInst *VAArgTLSCopy = nullptr;
5340   Value *VAArgSize = nullptr;
5341 
5342   SmallVector<CallInst *, 16> VAStartInstrumentationList;
5343 
5344   VarArgPowerPC64Helper(Function &F, MemorySanitizer &MS,
5345                         MemorySanitizerVisitor &MSV)
5346       : F(F), MS(MS), MSV(MSV) {}
5347 
5348   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
5349     // For PowerPC, we need to deal with alignment of stack arguments -
5350     // they are mostly aligned to 8 bytes, but vectors and i128 arrays
5351     // are aligned to 16 bytes, byvals can be aligned to 8 or 16 bytes,
5352     // For that reason, we compute current offset from stack pointer (which is
5353     // always properly aligned), and offset for the first vararg, then subtract
5354     // them.
5355     unsigned VAArgBase;
5356     Triple TargetTriple(F.getParent()->getTargetTriple());
5357     // Parameter save area starts at 48 bytes from frame pointer for ABIv1,
5358     // and 32 bytes for ABIv2.  This is usually determined by target
5359     // endianness, but in theory could be overridden by function attribute.
5360     if (TargetTriple.getArch() == Triple::ppc64)
5361       VAArgBase = 48;
5362     else
5363       VAArgBase = 32;
5364     unsigned VAArgOffset = VAArgBase;
5365     const DataLayout &DL = F.getParent()->getDataLayout();
5366     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
5367       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
5368       bool IsByVal = CB.paramHasAttr(ArgNo, Attribute::ByVal);
5369       if (IsByVal) {
5370         assert(A->getType()->isPointerTy());
5371         Type *RealTy = CB.getParamByValType(ArgNo);
5372         uint64_t ArgSize = DL.getTypeAllocSize(RealTy);
5373         Align ArgAlign = CB.getParamAlign(ArgNo).value_or(Align(8));
5374         if (ArgAlign < 8)
5375           ArgAlign = Align(8);
5376         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
5377         if (!IsFixed) {
5378           Value *Base = getShadowPtrForVAArgument(
5379               RealTy, IRB, VAArgOffset - VAArgBase, ArgSize);
5380           if (Base) {
5381             Value *AShadowPtr, *AOriginPtr;
5382             std::tie(AShadowPtr, AOriginPtr) =
5383                 MSV.getShadowOriginPtr(A, IRB, IRB.getInt8Ty(),
5384                                        kShadowTLSAlignment, /*isStore*/ false);
5385 
5386             IRB.CreateMemCpy(Base, kShadowTLSAlignment, AShadowPtr,
5387                              kShadowTLSAlignment, ArgSize);
5388           }
5389         }
5390         VAArgOffset += alignTo(ArgSize, Align(8));
5391       } else {
5392         Value *Base;
5393         uint64_t ArgSize = DL.getTypeAllocSize(A->getType());
5394         Align ArgAlign = Align(8);
5395         if (A->getType()->isArrayTy()) {
5396           // Arrays are aligned to element size, except for long double
5397           // arrays, which are aligned to 8 bytes.
5398           Type *ElementTy = A->getType()->getArrayElementType();
5399           if (!ElementTy->isPPC_FP128Ty())
5400             ArgAlign = Align(DL.getTypeAllocSize(ElementTy));
5401         } else if (A->getType()->isVectorTy()) {
5402           // Vectors are naturally aligned.
5403           ArgAlign = Align(ArgSize);
5404         }
5405         if (ArgAlign < 8)
5406           ArgAlign = Align(8);
5407         VAArgOffset = alignTo(VAArgOffset, ArgAlign);
5408         if (DL.isBigEndian()) {
5409           // Adjusting the shadow for argument with size < 8 to match the
5410           // placement of bits in big endian system
5411           if (ArgSize < 8)
5412             VAArgOffset += (8 - ArgSize);
5413         }
5414         if (!IsFixed) {
5415           Base = getShadowPtrForVAArgument(A->getType(), IRB,
5416                                            VAArgOffset - VAArgBase, ArgSize);
5417           if (Base)
5418             IRB.CreateAlignedStore(MSV.getShadow(A), Base, kShadowTLSAlignment);
5419         }
5420         VAArgOffset += ArgSize;
5421         VAArgOffset = alignTo(VAArgOffset, Align(8));
5422       }
5423       if (IsFixed)
5424         VAArgBase = VAArgOffset;
5425     }
5426 
5427     Constant *TotalVAArgSize =
5428         ConstantInt::get(IRB.getInt64Ty(), VAArgOffset - VAArgBase);
5429     // Here using VAArgOverflowSizeTLS as VAArgSizeTLS to avoid creation of
5430     // a new class member i.e. it is the total size of all VarArgs.
5431     IRB.CreateStore(TotalVAArgSize, MS.VAArgOverflowSizeTLS);
5432   }
5433 
5434   /// Compute the shadow address for a given va_arg.
5435   Value *getShadowPtrForVAArgument(Type *Ty, IRBuilder<> &IRB,
5436                                    unsigned ArgOffset, unsigned ArgSize) {
5437     // Make sure we don't overflow __msan_va_arg_tls.
5438     if (ArgOffset + ArgSize > kParamTLSSize)
5439       return nullptr;
5440     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
5441     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5442     return IRB.CreateIntToPtr(Base, PointerType::get(MSV.getShadowTy(Ty), 0),
5443                               "_msarg");
5444   }
5445 
5446   void visitVAStartInst(VAStartInst &I) override {
5447     IRBuilder<> IRB(&I);
5448     VAStartInstrumentationList.push_back(&I);
5449     Value *VAListTag = I.getArgOperand(0);
5450     Value *ShadowPtr, *OriginPtr;
5451     const Align Alignment = Align(8);
5452     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
5453         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
5454     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5455                      /* size */ 8, Alignment, false);
5456   }
5457 
5458   void visitVACopyInst(VACopyInst &I) override {
5459     IRBuilder<> IRB(&I);
5460     Value *VAListTag = I.getArgOperand(0);
5461     Value *ShadowPtr, *OriginPtr;
5462     const Align Alignment = Align(8);
5463     std::tie(ShadowPtr, OriginPtr) = MSV.getShadowOriginPtr(
5464         VAListTag, IRB, IRB.getInt8Ty(), Alignment, /*isStore*/ true);
5465     // Unpoison the whole __va_list_tag.
5466     // FIXME: magic ABI constants.
5467     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5468                      /* size */ 8, Alignment, false);
5469   }
5470 
5471   void finalizeInstrumentation() override {
5472     assert(!VAArgSize && !VAArgTLSCopy &&
5473            "finalizeInstrumentation called twice");
5474     IRBuilder<> IRB(MSV.FnPrologueEnd);
5475     VAArgSize = IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5476     Value *CopySize =
5477         IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, 0), VAArgSize);
5478 
5479     if (!VAStartInstrumentationList.empty()) {
5480       // If there is a va_start in this function, make a backup copy of
5481       // va_arg_tls somewhere in the function entry block.
5482 
5483       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5484       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
5485       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
5486                        CopySize, kShadowTLSAlignment, false);
5487 
5488       Value *SrcSize = IRB.CreateBinaryIntrinsic(
5489           Intrinsic::umin, CopySize,
5490           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
5491       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
5492                        kShadowTLSAlignment, SrcSize);
5493     }
5494 
5495     // Instrument va_start.
5496     // Copy va_list shadow from the backup copy of the TLS contents.
5497     for (size_t i = 0, n = VAStartInstrumentationList.size(); i < n; i++) {
5498       CallInst *OrigInst = VAStartInstrumentationList[i];
5499       NextNodeIRBuilder IRB(OrigInst);
5500       Value *VAListTag = OrigInst->getArgOperand(0);
5501       Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5502       Value *RegSaveAreaPtrPtr =
5503           IRB.CreateIntToPtr(IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5504                              PointerType::get(RegSaveAreaPtrTy, 0));
5505       Value *RegSaveAreaPtr =
5506           IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
5507       Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
5508       const Align Alignment = Align(8);
5509       std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5510           MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(),
5511                                  Alignment, /*isStore*/ true);
5512       IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5513                        CopySize);
5514     }
5515   }
5516 };
5517 
5518 /// SystemZ-specific implementation of VarArgHelper.
5519 struct VarArgSystemZHelper : public VarArgHelper {
5520   static const unsigned SystemZGpOffset = 16;
5521   static const unsigned SystemZGpEndOffset = 56;
5522   static const unsigned SystemZFpOffset = 128;
5523   static const unsigned SystemZFpEndOffset = 160;
5524   static const unsigned SystemZMaxVrArgs = 8;
5525   static const unsigned SystemZRegSaveAreaSize = 160;
5526   static const unsigned SystemZOverflowOffset = 160;
5527   static const unsigned SystemZVAListTagSize = 32;
5528   static const unsigned SystemZOverflowArgAreaPtrOffset = 16;
5529   static const unsigned SystemZRegSaveAreaPtrOffset = 24;
5530 
5531   Function &F;
5532   MemorySanitizer &MS;
5533   MemorySanitizerVisitor &MSV;
5534   bool IsSoftFloatABI;
5535   AllocaInst *VAArgTLSCopy = nullptr;
5536   AllocaInst *VAArgTLSOriginCopy = nullptr;
5537   Value *VAArgOverflowSize = nullptr;
5538 
5539   SmallVector<CallInst *, 16> VAStartInstrumentationList;
5540 
5541   enum class ArgKind {
5542     GeneralPurpose,
5543     FloatingPoint,
5544     Vector,
5545     Memory,
5546     Indirect,
5547   };
5548 
5549   enum class ShadowExtension { None, Zero, Sign };
5550 
5551   VarArgSystemZHelper(Function &F, MemorySanitizer &MS,
5552                       MemorySanitizerVisitor &MSV)
5553       : F(F), MS(MS), MSV(MSV),
5554         IsSoftFloatABI(F.getFnAttribute("use-soft-float").getValueAsBool()) {}
5555 
5556   ArgKind classifyArgument(Type *T) {
5557     // T is a SystemZABIInfo::classifyArgumentType() output, and there are
5558     // only a few possibilities of what it can be. In particular, enums, single
5559     // element structs and large types have already been taken care of.
5560 
5561     // Some i128 and fp128 arguments are converted to pointers only in the
5562     // back end.
5563     if (T->isIntegerTy(128) || T->isFP128Ty())
5564       return ArgKind::Indirect;
5565     if (T->isFloatingPointTy())
5566       return IsSoftFloatABI ? ArgKind::GeneralPurpose : ArgKind::FloatingPoint;
5567     if (T->isIntegerTy() || T->isPointerTy())
5568       return ArgKind::GeneralPurpose;
5569     if (T->isVectorTy())
5570       return ArgKind::Vector;
5571     return ArgKind::Memory;
5572   }
5573 
5574   ShadowExtension getShadowExtension(const CallBase &CB, unsigned ArgNo) {
5575     // ABI says: "One of the simple integer types no more than 64 bits wide.
5576     // ... If such an argument is shorter than 64 bits, replace it by a full
5577     // 64-bit integer representing the same number, using sign or zero
5578     // extension". Shadow for an integer argument has the same type as the
5579     // argument itself, so it can be sign or zero extended as well.
5580     bool ZExt = CB.paramHasAttr(ArgNo, Attribute::ZExt);
5581     bool SExt = CB.paramHasAttr(ArgNo, Attribute::SExt);
5582     if (ZExt) {
5583       assert(!SExt);
5584       return ShadowExtension::Zero;
5585     }
5586     if (SExt) {
5587       assert(!ZExt);
5588       return ShadowExtension::Sign;
5589     }
5590     return ShadowExtension::None;
5591   }
5592 
5593   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {
5594     unsigned GpOffset = SystemZGpOffset;
5595     unsigned FpOffset = SystemZFpOffset;
5596     unsigned VrIndex = 0;
5597     unsigned OverflowOffset = SystemZOverflowOffset;
5598     const DataLayout &DL = F.getParent()->getDataLayout();
5599     for (const auto &[ArgNo, A] : llvm::enumerate(CB.args())) {
5600       bool IsFixed = ArgNo < CB.getFunctionType()->getNumParams();
5601       // SystemZABIInfo does not produce ByVal parameters.
5602       assert(!CB.paramHasAttr(ArgNo, Attribute::ByVal));
5603       Type *T = A->getType();
5604       ArgKind AK = classifyArgument(T);
5605       if (AK == ArgKind::Indirect) {
5606         T = PointerType::get(T, 0);
5607         AK = ArgKind::GeneralPurpose;
5608       }
5609       if (AK == ArgKind::GeneralPurpose && GpOffset >= SystemZGpEndOffset)
5610         AK = ArgKind::Memory;
5611       if (AK == ArgKind::FloatingPoint && FpOffset >= SystemZFpEndOffset)
5612         AK = ArgKind::Memory;
5613       if (AK == ArgKind::Vector && (VrIndex >= SystemZMaxVrArgs || !IsFixed))
5614         AK = ArgKind::Memory;
5615       Value *ShadowBase = nullptr;
5616       Value *OriginBase = nullptr;
5617       ShadowExtension SE = ShadowExtension::None;
5618       switch (AK) {
5619       case ArgKind::GeneralPurpose: {
5620         // Always keep track of GpOffset, but store shadow only for varargs.
5621         uint64_t ArgSize = 8;
5622         if (GpOffset + ArgSize <= kParamTLSSize) {
5623           if (!IsFixed) {
5624             SE = getShadowExtension(CB, ArgNo);
5625             uint64_t GapSize = 0;
5626             if (SE == ShadowExtension::None) {
5627               uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
5628               assert(ArgAllocSize <= ArgSize);
5629               GapSize = ArgSize - ArgAllocSize;
5630             }
5631             ShadowBase = getShadowAddrForVAArgument(IRB, GpOffset + GapSize);
5632             if (MS.TrackOrigins)
5633               OriginBase = getOriginPtrForVAArgument(IRB, GpOffset + GapSize);
5634           }
5635           GpOffset += ArgSize;
5636         } else {
5637           GpOffset = kParamTLSSize;
5638         }
5639         break;
5640       }
5641       case ArgKind::FloatingPoint: {
5642         // Always keep track of FpOffset, but store shadow only for varargs.
5643         uint64_t ArgSize = 8;
5644         if (FpOffset + ArgSize <= kParamTLSSize) {
5645           if (!IsFixed) {
5646             // PoP says: "A short floating-point datum requires only the
5647             // left-most 32 bit positions of a floating-point register".
5648             // Therefore, in contrast to AK_GeneralPurpose and AK_Memory,
5649             // don't extend shadow and don't mind the gap.
5650             ShadowBase = getShadowAddrForVAArgument(IRB, FpOffset);
5651             if (MS.TrackOrigins)
5652               OriginBase = getOriginPtrForVAArgument(IRB, FpOffset);
5653           }
5654           FpOffset += ArgSize;
5655         } else {
5656           FpOffset = kParamTLSSize;
5657         }
5658         break;
5659       }
5660       case ArgKind::Vector: {
5661         // Keep track of VrIndex. No need to store shadow, since vector varargs
5662         // go through AK_Memory.
5663         assert(IsFixed);
5664         VrIndex++;
5665         break;
5666       }
5667       case ArgKind::Memory: {
5668         // Keep track of OverflowOffset and store shadow only for varargs.
5669         // Ignore fixed args, since we need to copy only the vararg portion of
5670         // the overflow area shadow.
5671         if (!IsFixed) {
5672           uint64_t ArgAllocSize = DL.getTypeAllocSize(T);
5673           uint64_t ArgSize = alignTo(ArgAllocSize, 8);
5674           if (OverflowOffset + ArgSize <= kParamTLSSize) {
5675             SE = getShadowExtension(CB, ArgNo);
5676             uint64_t GapSize =
5677                 SE == ShadowExtension::None ? ArgSize - ArgAllocSize : 0;
5678             ShadowBase =
5679                 getShadowAddrForVAArgument(IRB, OverflowOffset + GapSize);
5680             if (MS.TrackOrigins)
5681               OriginBase =
5682                   getOriginPtrForVAArgument(IRB, OverflowOffset + GapSize);
5683             OverflowOffset += ArgSize;
5684           } else {
5685             OverflowOffset = kParamTLSSize;
5686           }
5687         }
5688         break;
5689       }
5690       case ArgKind::Indirect:
5691         llvm_unreachable("Indirect must be converted to GeneralPurpose");
5692       }
5693       if (ShadowBase == nullptr)
5694         continue;
5695       Value *Shadow = MSV.getShadow(A);
5696       if (SE != ShadowExtension::None)
5697         Shadow = MSV.CreateShadowCast(IRB, Shadow, IRB.getInt64Ty(),
5698                                       /*Signed*/ SE == ShadowExtension::Sign);
5699       ShadowBase = IRB.CreateIntToPtr(
5700           ShadowBase, PointerType::get(Shadow->getType(), 0), "_msarg_va_s");
5701       IRB.CreateStore(Shadow, ShadowBase);
5702       if (MS.TrackOrigins) {
5703         Value *Origin = MSV.getOrigin(A);
5704         TypeSize StoreSize = DL.getTypeStoreSize(Shadow->getType());
5705         MSV.paintOrigin(IRB, Origin, OriginBase, StoreSize,
5706                         kMinOriginAlignment);
5707       }
5708     }
5709     Constant *OverflowSize = ConstantInt::get(
5710         IRB.getInt64Ty(), OverflowOffset - SystemZOverflowOffset);
5711     IRB.CreateStore(OverflowSize, MS.VAArgOverflowSizeTLS);
5712   }
5713 
5714   Value *getShadowAddrForVAArgument(IRBuilder<> &IRB, unsigned ArgOffset) {
5715     Value *Base = IRB.CreatePointerCast(MS.VAArgTLS, MS.IntptrTy);
5716     return IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5717   }
5718 
5719   Value *getOriginPtrForVAArgument(IRBuilder<> &IRB, int ArgOffset) {
5720     Value *Base = IRB.CreatePointerCast(MS.VAArgOriginTLS, MS.IntptrTy);
5721     Base = IRB.CreateAdd(Base, ConstantInt::get(MS.IntptrTy, ArgOffset));
5722     return IRB.CreateIntToPtr(Base, PointerType::get(MS.OriginTy, 0),
5723                               "_msarg_va_o");
5724   }
5725 
5726   void unpoisonVAListTagForInst(IntrinsicInst &I) {
5727     IRBuilder<> IRB(&I);
5728     Value *VAListTag = I.getArgOperand(0);
5729     Value *ShadowPtr, *OriginPtr;
5730     const Align Alignment = Align(8);
5731     std::tie(ShadowPtr, OriginPtr) =
5732         MSV.getShadowOriginPtr(VAListTag, IRB, IRB.getInt8Ty(), Alignment,
5733                                /*isStore*/ true);
5734     IRB.CreateMemSet(ShadowPtr, Constant::getNullValue(IRB.getInt8Ty()),
5735                      SystemZVAListTagSize, Alignment, false);
5736   }
5737 
5738   void visitVAStartInst(VAStartInst &I) override {
5739     VAStartInstrumentationList.push_back(&I);
5740     unpoisonVAListTagForInst(I);
5741   }
5742 
5743   void visitVACopyInst(VACopyInst &I) override { unpoisonVAListTagForInst(I); }
5744 
5745   void copyRegSaveArea(IRBuilder<> &IRB, Value *VAListTag) {
5746     Type *RegSaveAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5747     Value *RegSaveAreaPtrPtr = IRB.CreateIntToPtr(
5748         IRB.CreateAdd(
5749             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5750             ConstantInt::get(MS.IntptrTy, SystemZRegSaveAreaPtrOffset)),
5751         PointerType::get(RegSaveAreaPtrTy, 0));
5752     Value *RegSaveAreaPtr = IRB.CreateLoad(RegSaveAreaPtrTy, RegSaveAreaPtrPtr);
5753     Value *RegSaveAreaShadowPtr, *RegSaveAreaOriginPtr;
5754     const Align Alignment = Align(8);
5755     std::tie(RegSaveAreaShadowPtr, RegSaveAreaOriginPtr) =
5756         MSV.getShadowOriginPtr(RegSaveAreaPtr, IRB, IRB.getInt8Ty(), Alignment,
5757                                /*isStore*/ true);
5758     // TODO(iii): copy only fragments filled by visitCallBase()
5759     // TODO(iii): support packed-stack && !use-soft-float
5760     // For use-soft-float functions, it is enough to copy just the GPRs.
5761     unsigned RegSaveAreaSize =
5762         IsSoftFloatABI ? SystemZGpEndOffset : SystemZRegSaveAreaSize;
5763     IRB.CreateMemCpy(RegSaveAreaShadowPtr, Alignment, VAArgTLSCopy, Alignment,
5764                      RegSaveAreaSize);
5765     if (MS.TrackOrigins)
5766       IRB.CreateMemCpy(RegSaveAreaOriginPtr, Alignment, VAArgTLSOriginCopy,
5767                        Alignment, RegSaveAreaSize);
5768   }
5769 
5770   void copyOverflowArea(IRBuilder<> &IRB, Value *VAListTag) {
5771     Type *OverflowArgAreaPtrTy = Type::getInt64PtrTy(*MS.C);
5772     Value *OverflowArgAreaPtrPtr = IRB.CreateIntToPtr(
5773         IRB.CreateAdd(
5774             IRB.CreatePtrToInt(VAListTag, MS.IntptrTy),
5775             ConstantInt::get(MS.IntptrTy, SystemZOverflowArgAreaPtrOffset)),
5776         PointerType::get(OverflowArgAreaPtrTy, 0));
5777     Value *OverflowArgAreaPtr =
5778         IRB.CreateLoad(OverflowArgAreaPtrTy, OverflowArgAreaPtrPtr);
5779     Value *OverflowArgAreaShadowPtr, *OverflowArgAreaOriginPtr;
5780     const Align Alignment = Align(8);
5781     std::tie(OverflowArgAreaShadowPtr, OverflowArgAreaOriginPtr) =
5782         MSV.getShadowOriginPtr(OverflowArgAreaPtr, IRB, IRB.getInt8Ty(),
5783                                Alignment, /*isStore*/ true);
5784     Value *SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSCopy,
5785                                            SystemZOverflowOffset);
5786     IRB.CreateMemCpy(OverflowArgAreaShadowPtr, Alignment, SrcPtr, Alignment,
5787                      VAArgOverflowSize);
5788     if (MS.TrackOrigins) {
5789       SrcPtr = IRB.CreateConstGEP1_32(IRB.getInt8Ty(), VAArgTLSOriginCopy,
5790                                       SystemZOverflowOffset);
5791       IRB.CreateMemCpy(OverflowArgAreaOriginPtr, Alignment, SrcPtr, Alignment,
5792                        VAArgOverflowSize);
5793     }
5794   }
5795 
5796   void finalizeInstrumentation() override {
5797     assert(!VAArgOverflowSize && !VAArgTLSCopy &&
5798            "finalizeInstrumentation called twice");
5799     if (!VAStartInstrumentationList.empty()) {
5800       // If there is a va_start in this function, make a backup copy of
5801       // va_arg_tls somewhere in the function entry block.
5802       IRBuilder<> IRB(MSV.FnPrologueEnd);
5803       VAArgOverflowSize =
5804           IRB.CreateLoad(IRB.getInt64Ty(), MS.VAArgOverflowSizeTLS);
5805       Value *CopySize =
5806           IRB.CreateAdd(ConstantInt::get(MS.IntptrTy, SystemZOverflowOffset),
5807                         VAArgOverflowSize);
5808       VAArgTLSCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5809       VAArgTLSCopy->setAlignment(kShadowTLSAlignment);
5810       IRB.CreateMemSet(VAArgTLSCopy, Constant::getNullValue(IRB.getInt8Ty()),
5811                        CopySize, kShadowTLSAlignment, false);
5812 
5813       Value *SrcSize = IRB.CreateBinaryIntrinsic(
5814           Intrinsic::umin, CopySize,
5815           ConstantInt::get(MS.IntptrTy, kParamTLSSize));
5816       IRB.CreateMemCpy(VAArgTLSCopy, kShadowTLSAlignment, MS.VAArgTLS,
5817                        kShadowTLSAlignment, SrcSize);
5818       if (MS.TrackOrigins) {
5819         VAArgTLSOriginCopy = IRB.CreateAlloca(Type::getInt8Ty(*MS.C), CopySize);
5820         VAArgTLSOriginCopy->setAlignment(kShadowTLSAlignment);
5821         IRB.CreateMemCpy(VAArgTLSOriginCopy, kShadowTLSAlignment,
5822                          MS.VAArgOriginTLS, kShadowTLSAlignment, SrcSize);
5823       }
5824     }
5825 
5826     // Instrument va_start.
5827     // Copy va_list shadow from the backup copy of the TLS contents.
5828     for (size_t VaStartNo = 0, VaStartNum = VAStartInstrumentationList.size();
5829          VaStartNo < VaStartNum; VaStartNo++) {
5830       CallInst *OrigInst = VAStartInstrumentationList[VaStartNo];
5831       NextNodeIRBuilder IRB(OrigInst);
5832       Value *VAListTag = OrigInst->getArgOperand(0);
5833       copyRegSaveArea(IRB, VAListTag);
5834       copyOverflowArea(IRB, VAListTag);
5835     }
5836   }
5837 };
5838 
5839 /// A no-op implementation of VarArgHelper.
5840 struct VarArgNoOpHelper : public VarArgHelper {
5841   VarArgNoOpHelper(Function &F, MemorySanitizer &MS,
5842                    MemorySanitizerVisitor &MSV) {}
5843 
5844   void visitCallBase(CallBase &CB, IRBuilder<> &IRB) override {}
5845 
5846   void visitVAStartInst(VAStartInst &I) override {}
5847 
5848   void visitVACopyInst(VACopyInst &I) override {}
5849 
5850   void finalizeInstrumentation() override {}
5851 };
5852 
5853 } // end anonymous namespace
5854 
5855 static VarArgHelper *CreateVarArgHelper(Function &Func, MemorySanitizer &Msan,
5856                                         MemorySanitizerVisitor &Visitor) {
5857   // VarArg handling is only implemented on AMD64. False positives are possible
5858   // on other platforms.
5859   Triple TargetTriple(Func.getParent()->getTargetTriple());
5860   if (TargetTriple.getArch() == Triple::x86_64)
5861     return new VarArgAMD64Helper(Func, Msan, Visitor);
5862   else if (TargetTriple.isMIPS64())
5863     return new VarArgMIPS64Helper(Func, Msan, Visitor);
5864   else if (TargetTriple.getArch() == Triple::aarch64)
5865     return new VarArgAArch64Helper(Func, Msan, Visitor);
5866   else if (TargetTriple.getArch() == Triple::ppc64 ||
5867            TargetTriple.getArch() == Triple::ppc64le)
5868     return new VarArgPowerPC64Helper(Func, Msan, Visitor);
5869   else if (TargetTriple.getArch() == Triple::systemz)
5870     return new VarArgSystemZHelper(Func, Msan, Visitor);
5871   else
5872     return new VarArgNoOpHelper(Func, Msan, Visitor);
5873 }
5874 
5875 bool MemorySanitizer::sanitizeFunction(Function &F, TargetLibraryInfo &TLI) {
5876   if (!CompileKernel && F.getName() == kMsanModuleCtorName)
5877     return false;
5878 
5879   if (F.hasFnAttribute(Attribute::DisableSanitizerInstrumentation))
5880     return false;
5881 
5882   MemorySanitizerVisitor Visitor(F, *this, TLI);
5883 
5884   // Clear out memory attributes.
5885   AttributeMask B;
5886   B.addAttribute(Attribute::Memory).addAttribute(Attribute::Speculatable);
5887   F.removeFnAttrs(B);
5888 
5889   return Visitor.runOnFunction();
5890 }
5891