109467b48Spatrick //===- PoisonChecking.cpp - -----------------------------------------------===//
209467b48Spatrick //
309467b48Spatrick // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
409467b48Spatrick // See https://llvm.org/LICENSE.txt for license information.
509467b48Spatrick // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
609467b48Spatrick //
709467b48Spatrick //===----------------------------------------------------------------------===//
809467b48Spatrick //
909467b48Spatrick // Implements a transform pass which instruments IR such that poison semantics
1009467b48Spatrick // are made explicit. That is, it provides a (possibly partial) executable
1109467b48Spatrick // semantics for every instruction w.r.t. poison as specified in the LLVM
1209467b48Spatrick // LangRef. There are obvious parallels to the sanitizer tools, but this pass
1309467b48Spatrick // is focused purely on the semantics of LLVM IR, not any particular source
1409467b48Spatrick // language. If you're looking for something to see if your C/C++ contains
1509467b48Spatrick // UB, this is not it.
1609467b48Spatrick //
1709467b48Spatrick // The rewritten semantics of each instruction will include the following
1809467b48Spatrick // components:
1909467b48Spatrick //
2009467b48Spatrick // 1) The original instruction, unmodified.
2109467b48Spatrick // 2) A propagation rule which translates dynamic information about the poison
2209467b48Spatrick // state of each input to whether the dynamic output of the instruction
2309467b48Spatrick // produces poison.
24097a140dSpatrick // 3) A creation rule which validates any poison producing flags on the
2509467b48Spatrick // instruction itself (e.g. checks for overflow on nsw).
2609467b48Spatrick // 4) A check rule which traps (to a handler function) if this instruction must
2709467b48Spatrick // execute undefined behavior given the poison state of it's inputs.
2809467b48Spatrick //
29097a140dSpatrick // This is a must analysis based transform; that is, the resulting code may
30097a140dSpatrick // produce a false negative result (not report UB when actually exists
31097a140dSpatrick // according to the LangRef spec), but should never produce a false positive
32097a140dSpatrick // (report UB where it doesn't exist).
3309467b48Spatrick //
3409467b48Spatrick // Use cases for this pass include:
3509467b48Spatrick // - Understanding (and testing!) the implications of the definition of poison
3609467b48Spatrick // from the LangRef.
3709467b48Spatrick // - Validating the output of a IR fuzzer to ensure that all programs produced
3809467b48Spatrick // are well defined on the specific input used.
3909467b48Spatrick // - Finding/confirming poison specific miscompiles by checking the poison
4009467b48Spatrick // status of an input/IR pair is the same before and after an optimization
4109467b48Spatrick // transform.
4209467b48Spatrick // - Checking that a bugpoint reduction does not introduce UB which didn't
4309467b48Spatrick // exist in the original program being reduced.
4409467b48Spatrick //
4509467b48Spatrick // The major sources of inaccuracy are currently:
4609467b48Spatrick // - Most validation rules not yet implemented for instructions with poison
4709467b48Spatrick // relavant flags. At the moment, only nsw/nuw on add/sub are supported.
4809467b48Spatrick // - UB which is control dependent on a branch on poison is not yet
4909467b48Spatrick // reported. Currently, only data flow dependence is modeled.
5009467b48Spatrick // - Poison which is propagated through memory is not modeled. As such,
5109467b48Spatrick // storing poison to memory and then reloading it will cause a false negative
5209467b48Spatrick // as we consider the reloaded value to not be poisoned.
5309467b48Spatrick // - Poison propagation across function boundaries is not modeled. At the
5409467b48Spatrick // moment, all arguments and return values are assumed not to be poison.
5509467b48Spatrick // - Undef is not modeled. In particular, the optimizer's freedom to pick
5609467b48Spatrick // concrete values for undef bits so as to maximize potential for producing
5709467b48Spatrick // poison is not modeled.
5809467b48Spatrick //
5909467b48Spatrick //===----------------------------------------------------------------------===//
6009467b48Spatrick
6109467b48Spatrick #include "llvm/Transforms/Instrumentation/PoisonChecking.h"
6209467b48Spatrick #include "llvm/ADT/DenseMap.h"
6309467b48Spatrick #include "llvm/Analysis/ValueTracking.h"
6409467b48Spatrick #include "llvm/IR/IRBuilder.h"
6509467b48Spatrick #include "llvm/Support/CommandLine.h"
6609467b48Spatrick
6709467b48Spatrick using namespace llvm;
6809467b48Spatrick
6909467b48Spatrick #define DEBUG_TYPE "poison-checking"
7009467b48Spatrick
7109467b48Spatrick static cl::opt<bool>
7209467b48Spatrick LocalCheck("poison-checking-function-local",
7309467b48Spatrick cl::init(false),
7409467b48Spatrick cl::desc("Check that returns are non-poison (for testing)"));
7509467b48Spatrick
7609467b48Spatrick
isConstantFalse(Value * V)7709467b48Spatrick static bool isConstantFalse(Value* V) {
7809467b48Spatrick assert(V->getType()->isIntegerTy(1));
7909467b48Spatrick if (auto *CI = dyn_cast<ConstantInt>(V))
8009467b48Spatrick return CI->isZero();
8109467b48Spatrick return false;
8209467b48Spatrick }
8309467b48Spatrick
buildOrChain(IRBuilder<> & B,ArrayRef<Value * > Ops)8409467b48Spatrick static Value *buildOrChain(IRBuilder<> &B, ArrayRef<Value*> Ops) {
8509467b48Spatrick if (Ops.size() == 0)
8609467b48Spatrick return B.getFalse();
8709467b48Spatrick unsigned i = 0;
8809467b48Spatrick for (; i < Ops.size() && isConstantFalse(Ops[i]); i++) {}
8909467b48Spatrick if (i == Ops.size())
9009467b48Spatrick return B.getFalse();
9109467b48Spatrick Value *Accum = Ops[i++];
92*d415bd75Srobert for (Value *Op : llvm::drop_begin(Ops, i))
93*d415bd75Srobert if (!isConstantFalse(Op))
94*d415bd75Srobert Accum = B.CreateOr(Accum, Op);
9509467b48Spatrick return Accum;
9609467b48Spatrick }
9709467b48Spatrick
generateCreationChecksForBinOp(Instruction & I,SmallVectorImpl<Value * > & Checks)98097a140dSpatrick static void generateCreationChecksForBinOp(Instruction &I,
99097a140dSpatrick SmallVectorImpl<Value*> &Checks) {
10009467b48Spatrick assert(isa<BinaryOperator>(I));
10109467b48Spatrick
10209467b48Spatrick IRBuilder<> B(&I);
10309467b48Spatrick Value *LHS = I.getOperand(0);
10409467b48Spatrick Value *RHS = I.getOperand(1);
10509467b48Spatrick switch (I.getOpcode()) {
10609467b48Spatrick default:
10709467b48Spatrick return;
10809467b48Spatrick case Instruction::Add: {
10909467b48Spatrick if (I.hasNoSignedWrap()) {
11009467b48Spatrick auto *OverflowOp =
11109467b48Spatrick B.CreateBinaryIntrinsic(Intrinsic::sadd_with_overflow, LHS, RHS);
11209467b48Spatrick Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
11309467b48Spatrick }
11409467b48Spatrick if (I.hasNoUnsignedWrap()) {
11509467b48Spatrick auto *OverflowOp =
11609467b48Spatrick B.CreateBinaryIntrinsic(Intrinsic::uadd_with_overflow, LHS, RHS);
11709467b48Spatrick Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
11809467b48Spatrick }
11909467b48Spatrick break;
12009467b48Spatrick }
12109467b48Spatrick case Instruction::Sub: {
12209467b48Spatrick if (I.hasNoSignedWrap()) {
12309467b48Spatrick auto *OverflowOp =
12409467b48Spatrick B.CreateBinaryIntrinsic(Intrinsic::ssub_with_overflow, LHS, RHS);
12509467b48Spatrick Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
12609467b48Spatrick }
12709467b48Spatrick if (I.hasNoUnsignedWrap()) {
12809467b48Spatrick auto *OverflowOp =
12909467b48Spatrick B.CreateBinaryIntrinsic(Intrinsic::usub_with_overflow, LHS, RHS);
13009467b48Spatrick Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
13109467b48Spatrick }
13209467b48Spatrick break;
13309467b48Spatrick }
13409467b48Spatrick case Instruction::Mul: {
13509467b48Spatrick if (I.hasNoSignedWrap()) {
13609467b48Spatrick auto *OverflowOp =
13709467b48Spatrick B.CreateBinaryIntrinsic(Intrinsic::smul_with_overflow, LHS, RHS);
13809467b48Spatrick Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
13909467b48Spatrick }
14009467b48Spatrick if (I.hasNoUnsignedWrap()) {
14109467b48Spatrick auto *OverflowOp =
14209467b48Spatrick B.CreateBinaryIntrinsic(Intrinsic::umul_with_overflow, LHS, RHS);
14309467b48Spatrick Checks.push_back(B.CreateExtractValue(OverflowOp, 1));
14409467b48Spatrick }
14509467b48Spatrick break;
14609467b48Spatrick }
14709467b48Spatrick case Instruction::UDiv: {
14809467b48Spatrick if (I.isExact()) {
14909467b48Spatrick auto *Check =
15009467b48Spatrick B.CreateICmp(ICmpInst::ICMP_NE, B.CreateURem(LHS, RHS),
15109467b48Spatrick ConstantInt::get(LHS->getType(), 0));
15209467b48Spatrick Checks.push_back(Check);
15309467b48Spatrick }
15409467b48Spatrick break;
15509467b48Spatrick }
15609467b48Spatrick case Instruction::SDiv: {
15709467b48Spatrick if (I.isExact()) {
15809467b48Spatrick auto *Check =
15909467b48Spatrick B.CreateICmp(ICmpInst::ICMP_NE, B.CreateSRem(LHS, RHS),
16009467b48Spatrick ConstantInt::get(LHS->getType(), 0));
16109467b48Spatrick Checks.push_back(Check);
16209467b48Spatrick }
16309467b48Spatrick break;
16409467b48Spatrick }
16509467b48Spatrick case Instruction::AShr:
16609467b48Spatrick case Instruction::LShr:
16709467b48Spatrick case Instruction::Shl: {
16809467b48Spatrick Value *ShiftCheck =
16909467b48Spatrick B.CreateICmp(ICmpInst::ICMP_UGE, RHS,
17009467b48Spatrick ConstantInt::get(RHS->getType(),
17109467b48Spatrick LHS->getType()->getScalarSizeInBits()));
17209467b48Spatrick Checks.push_back(ShiftCheck);
17309467b48Spatrick break;
17409467b48Spatrick }
17509467b48Spatrick };
17609467b48Spatrick }
17709467b48Spatrick
178097a140dSpatrick /// Given an instruction which can produce poison on non-poison inputs
179097a140dSpatrick /// (i.e. canCreatePoison returns true), generate runtime checks to produce
180097a140dSpatrick /// boolean indicators of when poison would result.
generateCreationChecks(Instruction & I,SmallVectorImpl<Value * > & Checks)181097a140dSpatrick static void generateCreationChecks(Instruction &I,
182097a140dSpatrick SmallVectorImpl<Value*> &Checks) {
18309467b48Spatrick IRBuilder<> B(&I);
18409467b48Spatrick if (isa<BinaryOperator>(I) && !I.getType()->isVectorTy())
185097a140dSpatrick generateCreationChecksForBinOp(I, Checks);
18609467b48Spatrick
187097a140dSpatrick // Handle non-binops separately
18809467b48Spatrick switch (I.getOpcode()) {
18909467b48Spatrick default:
190097a140dSpatrick // Note there are a couple of missing cases here, once implemented, this
191097a140dSpatrick // should become an llvm_unreachable.
19209467b48Spatrick break;
19309467b48Spatrick case Instruction::ExtractElement: {
19409467b48Spatrick Value *Vec = I.getOperand(0);
195097a140dSpatrick auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
196097a140dSpatrick if (!VecVTy)
19709467b48Spatrick break;
19809467b48Spatrick Value *Idx = I.getOperand(1);
199097a140dSpatrick unsigned NumElts = VecVTy->getNumElements();
20009467b48Spatrick Value *Check =
20109467b48Spatrick B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
20209467b48Spatrick ConstantInt::get(Idx->getType(), NumElts));
20309467b48Spatrick Checks.push_back(Check);
20409467b48Spatrick break;
20509467b48Spatrick }
20609467b48Spatrick case Instruction::InsertElement: {
20709467b48Spatrick Value *Vec = I.getOperand(0);
208097a140dSpatrick auto *VecVTy = dyn_cast<FixedVectorType>(Vec->getType());
209097a140dSpatrick if (!VecVTy)
21009467b48Spatrick break;
21109467b48Spatrick Value *Idx = I.getOperand(2);
212097a140dSpatrick unsigned NumElts = VecVTy->getNumElements();
21309467b48Spatrick Value *Check =
21409467b48Spatrick B.CreateICmp(ICmpInst::ICMP_UGE, Idx,
21509467b48Spatrick ConstantInt::get(Idx->getType(), NumElts));
21609467b48Spatrick Checks.push_back(Check);
21709467b48Spatrick break;
21809467b48Spatrick }
21909467b48Spatrick };
22009467b48Spatrick }
22109467b48Spatrick
getPoisonFor(DenseMap<Value *,Value * > & ValToPoison,Value * V)22209467b48Spatrick static Value *getPoisonFor(DenseMap<Value *, Value *> &ValToPoison, Value *V) {
22309467b48Spatrick auto Itr = ValToPoison.find(V);
22409467b48Spatrick if (Itr != ValToPoison.end())
22509467b48Spatrick return Itr->second;
22609467b48Spatrick if (isa<Constant>(V)) {
22709467b48Spatrick return ConstantInt::getFalse(V->getContext());
22809467b48Spatrick }
22909467b48Spatrick // Return false for unknwon values - this implements a non-strict mode where
23009467b48Spatrick // unhandled IR constructs are simply considered to never produce poison. At
23109467b48Spatrick // some point in the future, we probably want a "strict mode" for testing if
23209467b48Spatrick // nothing else.
23309467b48Spatrick return ConstantInt::getFalse(V->getContext());
23409467b48Spatrick }
23509467b48Spatrick
CreateAssert(IRBuilder<> & B,Value * Cond)23609467b48Spatrick static void CreateAssert(IRBuilder<> &B, Value *Cond) {
23709467b48Spatrick assert(Cond->getType()->isIntegerTy(1));
23809467b48Spatrick if (auto *CI = dyn_cast<ConstantInt>(Cond))
23909467b48Spatrick if (CI->isAllOnesValue())
24009467b48Spatrick return;
24109467b48Spatrick
24209467b48Spatrick Module *M = B.GetInsertBlock()->getModule();
24309467b48Spatrick M->getOrInsertFunction("__poison_checker_assert",
24409467b48Spatrick Type::getVoidTy(M->getContext()),
24509467b48Spatrick Type::getInt1Ty(M->getContext()));
24609467b48Spatrick Function *TrapFunc = M->getFunction("__poison_checker_assert");
24709467b48Spatrick B.CreateCall(TrapFunc, Cond);
24809467b48Spatrick }
24909467b48Spatrick
CreateAssertNot(IRBuilder<> & B,Value * Cond)25009467b48Spatrick static void CreateAssertNot(IRBuilder<> &B, Value *Cond) {
25109467b48Spatrick assert(Cond->getType()->isIntegerTy(1));
25209467b48Spatrick CreateAssert(B, B.CreateNot(Cond));
25309467b48Spatrick }
25409467b48Spatrick
rewrite(Function & F)25509467b48Spatrick static bool rewrite(Function &F) {
25609467b48Spatrick auto * const Int1Ty = Type::getInt1Ty(F.getContext());
25709467b48Spatrick
25809467b48Spatrick DenseMap<Value *, Value *> ValToPoison;
25909467b48Spatrick
26009467b48Spatrick for (BasicBlock &BB : F)
26109467b48Spatrick for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
26209467b48Spatrick auto *OldPHI = cast<PHINode>(&*I);
263097a140dSpatrick auto *NewPHI = PHINode::Create(Int1Ty, OldPHI->getNumIncomingValues());
26409467b48Spatrick for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++)
26509467b48Spatrick NewPHI->addIncoming(UndefValue::get(Int1Ty),
26609467b48Spatrick OldPHI->getIncomingBlock(i));
26709467b48Spatrick NewPHI->insertBefore(OldPHI);
26809467b48Spatrick ValToPoison[OldPHI] = NewPHI;
26909467b48Spatrick }
27009467b48Spatrick
27109467b48Spatrick for (BasicBlock &BB : F)
27209467b48Spatrick for (Instruction &I : BB) {
27309467b48Spatrick if (isa<PHINode>(I)) continue;
27409467b48Spatrick
27509467b48Spatrick IRBuilder<> B(cast<Instruction>(&I));
27609467b48Spatrick
27709467b48Spatrick // Note: There are many more sources of documented UB, but this pass only
27809467b48Spatrick // attempts to find UB triggered by propagation of poison.
279*d415bd75Srobert SmallVector<const Value *, 4> NonPoisonOps;
280*d415bd75Srobert SmallPtrSet<const Value *, 4> SeenNonPoisonOps;
28173471bf0Spatrick getGuaranteedNonPoisonOps(&I, NonPoisonOps);
28273471bf0Spatrick for (const Value *Op : NonPoisonOps)
283*d415bd75Srobert if (SeenNonPoisonOps.insert(Op).second)
284*d415bd75Srobert CreateAssertNot(B,
285*d415bd75Srobert getPoisonFor(ValToPoison, const_cast<Value *>(Op)));
28609467b48Spatrick
28709467b48Spatrick if (LocalCheck)
28809467b48Spatrick if (auto *RI = dyn_cast<ReturnInst>(&I))
28909467b48Spatrick if (RI->getNumOperands() != 0) {
29009467b48Spatrick Value *Op = RI->getOperand(0);
29109467b48Spatrick CreateAssertNot(B, getPoisonFor(ValToPoison, Op));
29209467b48Spatrick }
29309467b48Spatrick
29409467b48Spatrick SmallVector<Value*, 4> Checks;
295*d415bd75Srobert for (const Use &U : I.operands()) {
296*d415bd75Srobert if (ValToPoison.count(U) && propagatesPoison(U))
297*d415bd75Srobert Checks.push_back(getPoisonFor(ValToPoison, U));
298*d415bd75Srobert }
29909467b48Spatrick
30073471bf0Spatrick if (canCreatePoison(cast<Operator>(&I)))
301097a140dSpatrick generateCreationChecks(I, Checks);
30209467b48Spatrick ValToPoison[&I] = buildOrChain(B, Checks);
30309467b48Spatrick }
30409467b48Spatrick
30509467b48Spatrick for (BasicBlock &BB : F)
30609467b48Spatrick for (auto I = BB.begin(); isa<PHINode>(&*I); I++) {
30709467b48Spatrick auto *OldPHI = cast<PHINode>(&*I);
30809467b48Spatrick if (!ValToPoison.count(OldPHI))
30909467b48Spatrick continue; // skip the newly inserted phis
31009467b48Spatrick auto *NewPHI = cast<PHINode>(ValToPoison[OldPHI]);
31109467b48Spatrick for (unsigned i = 0; i < OldPHI->getNumIncomingValues(); i++) {
31209467b48Spatrick auto *OldVal = OldPHI->getIncomingValue(i);
31309467b48Spatrick NewPHI->setIncomingValue(i, getPoisonFor(ValToPoison, OldVal));
31409467b48Spatrick }
31509467b48Spatrick }
31609467b48Spatrick return true;
31709467b48Spatrick }
31809467b48Spatrick
31909467b48Spatrick
run(Module & M,ModuleAnalysisManager & AM)32009467b48Spatrick PreservedAnalyses PoisonCheckingPass::run(Module &M,
32109467b48Spatrick ModuleAnalysisManager &AM) {
32209467b48Spatrick bool Changed = false;
32309467b48Spatrick for (auto &F : M)
32409467b48Spatrick Changed |= rewrite(F);
32509467b48Spatrick
32609467b48Spatrick return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
32709467b48Spatrick }
32809467b48Spatrick
run(Function & F,FunctionAnalysisManager & AM)32909467b48Spatrick PreservedAnalyses PoisonCheckingPass::run(Function &F,
33009467b48Spatrick FunctionAnalysisManager &AM) {
33109467b48Spatrick return rewrite(F) ? PreservedAnalyses::none() : PreservedAnalyses::all();
33209467b48Spatrick }
33309467b48Spatrick
33409467b48Spatrick /* Major TODO Items:
33509467b48Spatrick - Control dependent poison UB
33609467b48Spatrick - Strict mode - (i.e. must analyze every operand)
33709467b48Spatrick - Poison through memory
33809467b48Spatrick - Function ABIs
33909467b48Spatrick - Full coverage of intrinsics, etc.. (ouch)
34009467b48Spatrick
34109467b48Spatrick Instructions w/Unclear Semantics:
34209467b48Spatrick - shufflevector - It would seem reasonable for an out of bounds mask element
34309467b48Spatrick to produce poison, but the LangRef does not state.
34409467b48Spatrick - all binary ops w/vector operands - The likely interpretation would be that
34509467b48Spatrick any element overflowing should produce poison for the entire result, but
34609467b48Spatrick the LangRef does not state.
34709467b48Spatrick - Floating point binary ops w/fmf flags other than (nnan, noinfs). It seems
34809467b48Spatrick strange that only certian flags should be documented as producing poison.
34909467b48Spatrick
35009467b48Spatrick Cases of clear poison semantics not yet implemented:
35109467b48Spatrick - Exact flags on ashr/lshr produce poison
35209467b48Spatrick - NSW/NUW flags on shl produce poison
35309467b48Spatrick - Inbounds flag on getelementptr produce poison
35409467b48Spatrick - fptosi/fptoui (out of bounds input) produce poison
35509467b48Spatrick - Scalable vector types for insertelement/extractelement
35609467b48Spatrick - Floating point binary ops w/fmf nnan/noinfs flags produce poison
35709467b48Spatrick */
358