109467b48Spatrick //===- PoisonChecking.cpp - -----------------------------------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // Implements a transform pass which instruments IR such that poison semantics
1009467b48Spatrick // are made explicit.  That is, it provides a (possibly partial) executable
1109467b48Spatrick // semantics for every instruction w.r.t. poison as specified in the LLVM
1209467b48Spatrick // LangRef.  There are obvious parallels to the sanitizer tools, but this pass
1309467b48Spatrick // is focused purely on the semantics of LLVM IR, not any particular source
1409467b48Spatrick // language.   If you're looking for something to see if your C/C++ contains
1509467b48Spatrick // UB, this is not it.
1609467b48Spatrick //
1709467b48Spatrick // The rewritten semantics of each instruction will include the following
1809467b48Spatrick // components:
1909467b48Spatrick //
2009467b48Spatrick // 1) The original instruction, unmodified.
2109467b48Spatrick // 2) A propagation rule which translates dynamic information about the poison
2209467b48Spatrick //    state of each input to whether the dynamic output of the instruction
2309467b48Spatrick //    produces poison.
24097a140dSpatrick // 3) A creation rule which validates any poison producing flags on the
2509467b48Spatrick //    instruction itself (e.g. checks for overflow on nsw).
2609467b48Spatrick // 4) A check rule which traps (to a handler function) if this instruction must
2709467b48Spatrick //    execute undefined behavior given the poison state of it's inputs.
2809467b48Spatrick //
29097a140dSpatrick // This is a must analysis based transform; that is, the resulting code may
30097a140dSpatrick // produce a false negative result (not report UB when actually exists
31097a140dSpatrick // according to the LangRef spec), but should never produce a false positive
32097a140dSpatrick // (report UB where it doesn't exist).
3309467b48Spatrick //
3409467b48Spatrick // Use cases for this pass include:
3509467b48Spatrick // - Understanding (and testing!) the implications of the definition of poison
3609467b48Spatrick //   from the LangRef.
3709467b48Spatrick // - Validating the output of a IR fuzzer to ensure that all programs produced
3809467b48Spatrick //   are well defined on the specific input used.
3909467b48Spatrick // - Finding/confirming poison specific miscompiles by checking the poison
4009467b48Spatrick //   status of an input/IR pair is the same before and after an optimization
4109467b48Spatrick //   transform.
4209467b48Spatrick // - Checking that a bugpoint reduction does not introduce UB which didn't
4309467b48Spatrick //   exist in the original program being reduced.
4409467b48Spatrick //
4509467b48Spatrick // The major sources of inaccuracy are currently:
4609467b48Spatrick // - Most validation rules not yet implemented for instructions with poison
4709467b48Spatrick //   relavant flags.  At the moment, only nsw/nuw on add/sub are supported.
4809467b48Spatrick // - UB which is control dependent on a branch on poison is not yet
4909467b48Spatrick //   reported. Currently, only data flow dependence is modeled.
5009467b48Spatrick // - Poison which is propagated through memory is not modeled.  As such,
5109467b48Spatrick //   storing poison to memory and then reloading it will cause a false negative
5209467b48Spatrick //   as we consider the reloaded value to not be poisoned.
5309467b48Spatrick // - Poison propagation across function boundaries is not modeled.  At the
5409467b48Spatrick //   moment, all arguments and return values are assumed not to be poison.
5509467b48Spatrick // - Undef is not modeled.  In particular, the optimizer's freedom to pick
5609467b48Spatrick //   concrete values for undef bits so as to maximize potential for producing
5709467b48Spatrick //   poison is not modeled.
5809467b48Spatrick //
5909467b48Spatrick //===----------------------------------------------------------------------===//
6009467b48Spatrick 
6109467b48Spatrick #include "llvm/Transforms/Instrumentation/PoisonChecking.h"
6209467b48Spatrick #include "llvm/ADT/DenseMap.h"
6309467b48Spatrick #include "llvm/Analysis/ValueTracking.h"
6409467b48Spatrick #include "llvm/IR/IRBuilder.h"
6509467b48Spatrick #include "llvm/Support/CommandLine.h"
6609467b48Spatrick 
6709467b48Spatrick using namespace llvm;
6809467b48Spatrick 
6909467b48Spatrick #define DEBUG_TYPE "poison-checking"
7009467b48Spatrick 
7109467b48Spatrick static cl::opt<bool>
7209467b48Spatrick LocalCheck("poison-checking-function-local",
7309467b48Spatrick            cl::init(false),
7409467b48Spatrick            cl::desc("Check that returns are non-poison (for testing)"));
7509467b48Spatrick 
7609467b48Spatrick 
isConstantFalse(Value * V)7709467b48Spatrick static bool isConstantFalse(Value* V) {
7809467b48Spatrick   assert(V->getType()->isIntegerTy(1));
7909467b48Spatrick   if (auto *CI = dyn_cast<ConstantInt>(V))
8009467b48Spatrick     return CI->isZero();
8109467b48Spatrick   return false;
8209467b48Spatrick }
8309467b48Spatrick 
buildOrChain(IRBuilder<> & B,ArrayRef<Value * > Ops)8409467b48Spatrick static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
8509467b48Spatrick   if (Ops.size() == 0)
8609467b48Spatrick     return B.getFalse();
8709467b48Spatrick   unsigned i = 0;
8809467b48Spatrick   for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
8909467b48Spatrick   if (i == Ops.size())
9009467b48Spatrick     return B.getFalse();
9109467b48Spatrick   Value *Accum = Ops[i++];
92*d415bd75Srobert   for (Value *Op : llvm::drop_begin(Ops, i))
93*d415bd75Srobert     if (!isConstantFalse(Op))
94*d415bd75Srobert       Accum = B.CreateOr(Accum, Op);
9509467b48Spatrick   return Accum;
9609467b48Spatrick }
9709467b48Spatrick 
generateCreationChecksForBinOp(Instruction & I,SmallVectorImpl<Value * > & Checks)98097a140dSpatrick static void generateCreationChecksForBinOp(Instruction &I,
99097a140dSpatrick                                            SmallVectorImpl<Value*> &Checks) {
10009467b48Spatrick   assert(isa<BinaryOperator>(I));
10109467b48Spatrick 
10209467b48Spatrick   IRBuilder<> B(&I);
10309467b48Spatrick   Value *LHS = I.getOperand(0);
10409467b48Spatrick   Value *RHS = I.getOperand(1);
10509467b48Spatrick   switch (I.getOpcode()) {
10609467b48Spatrick   default:
10709467b48Spatrick     return;
10809467b48Spatrick   case Instruction::Add: {
10909467b48Spatrick     if (I.hasNoSignedWrap()) {
11009467b48Spatrick       auto *OverflowOp =
11109467b48Spatrick         B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
11209467b48Spatrick       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
11309467b48Spatrick     }
11409467b48Spatrick     if (I.hasNoUnsignedWrap()) {
11509467b48Spatrick       auto *OverflowOp =
11609467b48Spatrick         B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
11709467b48Spatrick       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
11809467b48Spatrick     }
11909467b48Spatrick     break;
12009467b48Spatrick   }
12109467b48Spatrick   case Instruction::Sub: {
12209467b48Spatrick     if (I.hasNoSignedWrap()) {
12309467b48Spatrick       auto *OverflowOp =
12409467b48Spatrick         B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
12509467b48Spatrick       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
12609467b48Spatrick     }
12709467b48Spatrick     if (I.hasNoUnsignedWrap()) {
12809467b48Spatrick       auto *OverflowOp =
12909467b48Spatrick         B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
13009467b48Spatrick       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
13109467b48Spatrick     }
13209467b48Spatrick     break;
13309467b48Spatrick   }
13409467b48Spatrick   case Instruction::Mul: {
13509467b48Spatrick     if (I.hasNoSignedWrap()) {
13609467b48Spatrick       auto *OverflowOp =
13709467b48Spatrick         B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
13809467b48Spatrick       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
13909467b48Spatrick     }
14009467b48Spatrick     if (I.hasNoUnsignedWrap()) {
14109467b48Spatrick       auto *OverflowOp =
14209467b48Spatrick         B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
14309467b48Spatrick       Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
14409467b48Spatrick     }
14509467b48Spatrick     break;
14609467b48Spatrick   }
14709467b48Spatrick   case Instruction::UDiv: {
14809467b48Spatrick     if (I.isExact()) {
14909467b48Spatrick       auto *Check =
15009467b48Spatrick         B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
15109467b48Spatrick                      ConstantInt::get(LHS->getType(), 0));
15209467b48Spatrick       Checks.push_back(Check);
15309467b48Spatrick     }
15409467b48Spatrick     break;
15509467b48Spatrick   }
15609467b48Spatrick   case Instruction::SDiv: {
15709467b48Spatrick     if (I.isExact()) {
15809467b48Spatrick       auto *Check =
15909467b48Spatrick         B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
16009467b48Spatrick                      ConstantInt::get(LHS->getType(), 0));
16109467b48Spatrick       Checks.push_back(Check);
16209467b48Spatrick     }
16309467b48Spatrick     break;
16409467b48Spatrick   }
16509467b48Spatrick   case Instruction::AShr:
16609467b48Spatrick   case Instruction::LShr:
16709467b48Spatrick   case Instruction::Shl: {
16809467b48Spatrick     Value *ShiftCheck =
16909467b48Spatrick       B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
17009467b48Spatrick                    ConstantInt::get(RHS->getType(),
17109467b48Spatrick                                     LHS->getType()->getScalarSizeInBits()));
17209467b48Spatrick     Checks.push_back(ShiftCheck);
17309467b48Spatrick     break;
17409467b48Spatrick   }
17509467b48Spatrick   };
17609467b48Spatrick }
17709467b48Spatrick 
178097a140dSpatrick /// Given an instruction which can produce poison on non-poison inputs
179097a140dSpatrick /// (i.e. canCreatePoison returns true), generate runtime checks to produce
180097a140dSpatrick /// boolean indicators of when poison would result.
generateCreationChecks(Instruction & I,SmallVectorImpl<Value * > & Checks)181097a140dSpatrick static void generateCreationChecks(Instruction &I,
182097a140dSpatrick                                    SmallVectorImpl<Value*> &Checks) {
18309467b48Spatrick   IRBuilder<> B(&I);
18409467b48Spatrick   if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
185097a140dSpatrick     generateCreationChecksForBinOp(I, Checks);
18609467b48Spatrick 
187097a140dSpatrick   // Handle non-binops separately
18809467b48Spatrick   switch (I.getOpcode()) {
18909467b48Spatrick   default:
190097a140dSpatrick     // Note there are a couple of missing cases here, once implemented, this
191097a140dSpatrick     // should become an llvm_unreachable.
19209467b48Spatrick     break;
19309467b48Spatrick   case Instruction::ExtractElement: {
19409467b48Spatrick     Value *Vec = I.getOperand(0);
195097a140dSpatrick     auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
196097a140dSpatrick     if (!VecVTy)
19709467b48Spatrick       break;
19809467b48Spatrick     Value *Idx = I.getOperand(1);
199097a140dSpatrick     unsigned NumElts = VecVTy->getNumElements();
20009467b48Spatrick     Value *Check =
20109467b48Spatrick       B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
20209467b48Spatrick                    ConstantInt::get(Idx->getType(), NumElts));
20309467b48Spatrick     Checks.push_back(Check);
20409467b48Spatrick     break;
20509467b48Spatrick   }
20609467b48Spatrick   case Instruction::InsertElement: {
20709467b48Spatrick     Value *Vec = I.getOperand(0);
208097a140dSpatrick     auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
209097a140dSpatrick     if (!VecVTy)
21009467b48Spatrick       break;
21109467b48Spatrick     Value *Idx = I.getOperand(2);
212097a140dSpatrick     unsigned NumElts = VecVTy->getNumElements();
21309467b48Spatrick     Value *Check =
21409467b48Spatrick       B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
21509467b48Spatrick                    ConstantInt::get(Idx->getType(), NumElts));
21609467b48Spatrick     Checks.push_back(Check);
21709467b48Spatrick     break;
21809467b48Spatrick   }
21909467b48Spatrick   };
22009467b48Spatrick }
22109467b48Spatrick 
getPoisonFor(DenseMap<Value *,Value * > & ValToPoison,Value * V)22209467b48Spatrick static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
22309467b48Spatrick   auto Itr = ValToPoison.find(V);
22409467b48Spatrick   if (Itr != ValToPoison.end())
22509467b48Spatrick     return Itr->second;
22609467b48Spatrick   if (isa<Constant>(V)) {
22709467b48Spatrick     return ConstantInt::getFalse(V->getContext());
22809467b48Spatrick   }
22909467b48Spatrick   // Return false for unknwon values - this implements a non-strict mode where
23009467b48Spatrick   // unhandled IR constructs are simply considered to never produce poison.  At
23109467b48Spatrick   // some point in the future, we probably want a "strict mode" for testing if
23209467b48Spatrick   // nothing else.
23309467b48Spatrick   return ConstantInt::getFalse(V->getContext());
23409467b48Spatrick }
23509467b48Spatrick 
CreateAssert(IRBuilder<> & B,Value * Cond)23609467b48Spatrick static void CreateAssert(IRBuilder<> &B, Value *Cond) {
23709467b48Spatrick   assert(Cond->getType()->isIntegerTy(1));
23809467b48Spatrick   if (auto *CI = dyn_cast<ConstantInt>(Cond))
23909467b48Spatrick     if (CI->isAllOnesValue())
24009467b48Spatrick       return;
24109467b48Spatrick 
24209467b48Spatrick   Module *M = B.GetInsertBlock()->getModule();
24309467b48Spatrick   M->getOrInsertFunction("__poison_checker_assert",
24409467b48Spatrick                          Type::getVoidTy(M->getContext()),
24509467b48Spatrick                          Type::getInt1Ty(M->getContext()));
24609467b48Spatrick   Function *TrapFunc = M->getFunction("__poison_checker_assert");
24709467b48Spatrick   B.CreateCall(TrapFunc, Cond);
24809467b48Spatrick }
24909467b48Spatrick 
CreateAssertNot(IRBuilder<> & B,Value * Cond)25009467b48Spatrick static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
25109467b48Spatrick   assert(Cond->getType()->isIntegerTy(1));
25209467b48Spatrick   CreateAssert(B, B.CreateNot(Cond));
25309467b48Spatrick }
25409467b48Spatrick 
rewrite(Function & F)25509467b48Spatrick static bool rewrite(Function &F) {
25609467b48Spatrick   auto * const Int1Ty = Type::getInt1Ty(F.getContext());
25709467b48Spatrick 
25809467b48Spatrick   DenseMap<Value *, Value *> ValToPoison;
25909467b48Spatrick 
26009467b48Spatrick   for (BasicBlock &BB : F)
26109467b48Spatrick     for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
26209467b48Spatrick       auto *OldPHI = cast<PHINode>(&*I);
263097a140dSpatrick       auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues());
26409467b48Spatrick       for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
26509467b48Spatrick         NewPHI->addIncoming(UndefValue::get(Int1Ty),
26609467b48Spatrick                             OldPHI->getIncomingBlock(i));
26709467b48Spatrick       NewPHI->insertBefore(OldPHI);
26809467b48Spatrick       ValToPoison[OldPHI] = NewPHI;
26909467b48Spatrick     }
27009467b48Spatrick 
27109467b48Spatrick   for (BasicBlock &BB : F)
27209467b48Spatrick     for (Instruction &I : BB) {
27309467b48Spatrick       if (isa<PHINode>(I)) continue;
27409467b48Spatrick 
27509467b48Spatrick       IRBuilder<> B(cast<Instruction>(&I));
27609467b48Spatrick 
27709467b48Spatrick       // Note: There are many more sources of documented UB, but this pass only
27809467b48Spatrick       // attempts to find UB triggered by propagation of poison.
279*d415bd75Srobert       SmallVector<const Value *, 4> NonPoisonOps;
280*d415bd75Srobert       SmallPtrSet<const Value *, 4> SeenNonPoisonOps;
28173471bf0Spatrick       getGuaranteedNonPoisonOps(&I, NonPoisonOps);
28273471bf0Spatrick       for (const Value *Op : NonPoisonOps)
283*d415bd75Srobert         if (SeenNonPoisonOps.insert(Op).second)
284*d415bd75Srobert           CreateAssertNot(B,
285*d415bd75Srobert                           getPoisonFor(ValToPoison, const_cast<Value *>(Op)));
28609467b48Spatrick 
28709467b48Spatrick       if (LocalCheck)
28809467b48Spatrick         if (auto *RI = dyn_cast<ReturnInst>(&I))
28909467b48Spatrick           if (RI->getNumOperands() != 0) {
29009467b48Spatrick             Value *Op = RI->getOperand(0);
29109467b48Spatrick             CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
29209467b48Spatrick           }
29309467b48Spatrick 
29409467b48Spatrick       SmallVector<Value*, 4> Checks;
295*d415bd75Srobert       for (const Use &U : I.operands()) {
296*d415bd75Srobert         if (ValToPoison.count(U) && propagatesPoison(U))
297*d415bd75Srobert           Checks.push_back(getPoisonFor(ValToPoison, U));
298*d415bd75Srobert       }
29909467b48Spatrick 
30073471bf0Spatrick       if (canCreatePoison(cast<Operator>(&I)))
301097a140dSpatrick         generateCreationChecks(I, Checks);
30209467b48Spatrick       ValToPoison[&I] = buildOrChain(B, Checks);
30309467b48Spatrick     }
30409467b48Spatrick 
30509467b48Spatrick   for (BasicBlock &BB : F)
30609467b48Spatrick     for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
30709467b48Spatrick       auto *OldPHI = cast<PHINode>(&*I);
30809467b48Spatrick       if (!ValToPoison.count(OldPHI))
30909467b48Spatrick         continue; // skip the newly inserted phis
31009467b48Spatrick       auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
31109467b48Spatrick       for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
31209467b48Spatrick         auto *OldVal = OldPHI->getIncomingValue(i);
31309467b48Spatrick         NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
31409467b48Spatrick       }
31509467b48Spatrick     }
31609467b48Spatrick   return true;
31709467b48Spatrick }
31809467b48Spatrick 
31909467b48Spatrick 
run(Module & M,ModuleAnalysisManager & AM)32009467b48Spatrick PreservedAnalyses PoisonCheckingPass::run(Module &M,
32109467b48Spatrick                                           ModuleAnalysisManager &AM) {
32209467b48Spatrick   bool Changed = false;
32309467b48Spatrick   for (auto &F : M)
32409467b48Spatrick     Changed |= rewrite(F);
32509467b48Spatrick 
32609467b48Spatrick   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
32709467b48Spatrick }
32809467b48Spatrick 
run(Function & F,FunctionAnalysisManager & AM)32909467b48Spatrick PreservedAnalyses PoisonCheckingPass::run(Function &F,
33009467b48Spatrick                                           FunctionAnalysisManager &AM) {
33109467b48Spatrick   return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
33209467b48Spatrick }
33309467b48Spatrick 
33409467b48Spatrick /* Major TODO Items:
33509467b48Spatrick    - Control dependent poison UB
33609467b48Spatrick    - Strict mode - (i.e. must analyze every operand)
33709467b48Spatrick      - Poison through memory
33809467b48Spatrick      - Function ABIs
33909467b48Spatrick      - Full coverage of intrinsics, etc.. (ouch)
34009467b48Spatrick 
34109467b48Spatrick    Instructions w/Unclear Semantics:
34209467b48Spatrick    - shufflevector - It would seem reasonable for an out of bounds mask element
34309467b48Spatrick      to produce poison, but the LangRef does not state.
34409467b48Spatrick    - all binary ops w/vector operands - The likely interpretation would be that
34509467b48Spatrick      any element overflowing should produce poison for the entire result, but
34609467b48Spatrick      the LangRef does not state.
34709467b48Spatrick    - Floating point binary ops w/fmf flags other than (nnan, noinfs).  It seems
34809467b48Spatrick      strange that only certian flags should be documented as producing poison.
34909467b48Spatrick 
35009467b48Spatrick    Cases of clear poison semantics not yet implemented:
35109467b48Spatrick    - Exact flags on ashr/lshr produce poison
35209467b48Spatrick    - NSW/NUW flags on shl produce poison
35309467b48Spatrick    - Inbounds flag on getelementptr produce poison
35409467b48Spatrick    - fptosi/fptoui (out of bounds input) produce poison
35509467b48Spatrick    - Scalable vector types for insertelement/extractelement
35609467b48Spatrick    - Floating point binary ops w/fmf nnan/noinfs flags produce poison
35709467b48Spatrick  */
358