1 // SimpleSValBuilder.cpp - A basic SValBuilder -----------------------*- C++ -*-
2 //
3 // The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This file defines SimpleSValBuilder, a basic implementation of SValBuilder.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
15 #include "clang/StaticAnalyzer/Core/PathSensitive/AnalysisManager.h"
16 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
17 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
18 #include "clang/StaticAnalyzer/Core/PathSensitive/SubEngine.h"
19 #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
20
21 using namespace clang;
22 using namespace ento;
23
24 namespace {
25 class SimpleSValBuilder : public SValBuilder {
26 protected:
27 SVal dispatchCast(SVal val, QualType castTy) override;
28 SVal evalCastFromNonLoc(NonLoc val, QualType castTy) override;
29 SVal evalCastFromLoc(Loc val, QualType castTy) override;
30
31 public:
SimpleSValBuilder(llvm::BumpPtrAllocator & alloc,ASTContext & context,ProgramStateManager & stateMgr)32 SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
33 ProgramStateManager &stateMgr)
34 : SValBuilder(alloc, context, stateMgr) {}
~SimpleSValBuilder()35 ~SimpleSValBuilder() override {}
36
37 SVal evalMinus(NonLoc val) override;
38 SVal evalComplement(NonLoc val) override;
39 SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op,
40 NonLoc lhs, NonLoc rhs, QualType resultTy) override;
41 SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op,
42 Loc lhs, Loc rhs, QualType resultTy) override;
43 SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op,
44 Loc lhs, NonLoc rhs, QualType resultTy) override;
45
46 /// getKnownValue - evaluates a given SVal. If the SVal has only one possible
47 /// (integer) value, that value is returned. Otherwise, returns NULL.
48 const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal V) override;
49
50 /// Recursively descends into symbolic expressions and replaces symbols
51 /// with their known values (in the sense of the getKnownValue() method).
52 SVal simplifySVal(ProgramStateRef State, SVal V) override;
53
54 SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op,
55 const llvm::APSInt &RHS, QualType resultTy);
56 };
57 } // end anonymous namespace
58
createSimpleSValBuilder(llvm::BumpPtrAllocator & alloc,ASTContext & context,ProgramStateManager & stateMgr)59 SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc,
60 ASTContext &context,
61 ProgramStateManager &stateMgr) {
62 return new SimpleSValBuilder(alloc, context, stateMgr);
63 }
64
65 //===----------------------------------------------------------------------===//
66 // Transfer function for Casts.
67 //===----------------------------------------------------------------------===//
68
dispatchCast(SVal Val,QualType CastTy)69 SVal SimpleSValBuilder::dispatchCast(SVal Val, QualType CastTy) {
70 assert(Val.getAs<Loc>() || Val.getAs<NonLoc>());
71 return Val.getAs<Loc>() ? evalCastFromLoc(Val.castAs<Loc>(), CastTy)
72 : evalCastFromNonLoc(Val.castAs<NonLoc>(), CastTy);
73 }
74
evalCastFromNonLoc(NonLoc val,QualType castTy)75 SVal SimpleSValBuilder::evalCastFromNonLoc(NonLoc val, QualType castTy) {
76 bool isLocType = Loc::isLocType(castTy);
77 if (val.getAs<nonloc::PointerToMember>())
78 return val;
79
80 if (Optional<nonloc::LocAsInteger> LI = val.getAs<nonloc::LocAsInteger>()) {
81 if (isLocType)
82 return LI->getLoc();
83 // FIXME: Correctly support promotions/truncations.
84 unsigned castSize = Context.getIntWidth(castTy);
85 if (castSize == LI->getNumBits())
86 return val;
87 return makeLocAsInteger(LI->getLoc(), castSize);
88 }
89
90 if (const SymExpr *se = val.getAsSymbolicExpression()) {
91 QualType T = Context.getCanonicalType(se->getType());
92 // If types are the same or both are integers, ignore the cast.
93 // FIXME: Remove this hack when we support symbolic truncation/extension.
94 // HACK: If both castTy and T are integers, ignore the cast. This is
95 // not a permanent solution. Eventually we want to precisely handle
96 // extension/truncation of symbolic integers. This prevents us from losing
97 // precision when we assign 'x = y' and 'y' is symbolic and x and y are
98 // different integer types.
99 if (haveSameType(T, castTy))
100 return val;
101
102 if (!isLocType)
103 return makeNonLoc(se, T, castTy);
104 return UnknownVal();
105 }
106
107 // If value is a non-integer constant, produce unknown.
108 if (!val.getAs<nonloc::ConcreteInt>())
109 return UnknownVal();
110
111 // Handle casts to a boolean type.
112 if (castTy->isBooleanType()) {
113 bool b = val.castAs<nonloc::ConcreteInt>().getValue().getBoolValue();
114 return makeTruthVal(b, castTy);
115 }
116
117 // Only handle casts from integers to integers - if val is an integer constant
118 // being cast to a non-integer type, produce unknown.
119 if (!isLocType && !castTy->isIntegralOrEnumerationType())
120 return UnknownVal();
121
122 llvm::APSInt i = val.castAs<nonloc::ConcreteInt>().getValue();
123 BasicVals.getAPSIntType(castTy).apply(i);
124
125 if (isLocType)
126 return makeIntLocVal(i);
127 else
128 return makeIntVal(i);
129 }
130
evalCastFromLoc(Loc val,QualType castTy)131 SVal SimpleSValBuilder::evalCastFromLoc(Loc val, QualType castTy) {
132
133 // Casts from pointers -> pointers, just return the lval.
134 //
135 // Casts from pointers -> references, just return the lval. These
136 // can be introduced by the frontend for corner cases, e.g
137 // casting from va_list* to __builtin_va_list&.
138 //
139 if (Loc::isLocType(castTy) || castTy->isReferenceType())
140 return val;
141
142 // FIXME: Handle transparent unions where a value can be "transparently"
143 // lifted into a union type.
144 if (castTy->isUnionType())
145 return UnknownVal();
146
147 // Casting a Loc to a bool will almost always be true,
148 // unless this is a weak function or a symbolic region.
149 if (castTy->isBooleanType()) {
150 switch (val.getSubKind()) {
151 case loc::MemRegionValKind: {
152 const MemRegion *R = val.castAs<loc::MemRegionVal>().getRegion();
153 if (const FunctionCodeRegion *FTR = dyn_cast<FunctionCodeRegion>(R))
154 if (const FunctionDecl *FD = dyn_cast<FunctionDecl>(FTR->getDecl()))
155 if (FD->isWeak())
156 // FIXME: Currently we are using an extent symbol here,
157 // because there are no generic region address metadata
158 // symbols to use, only content metadata.
159 return nonloc::SymbolVal(SymMgr.getExtentSymbol(FTR));
160
161 if (const SymbolicRegion *SymR = R->getSymbolicBase())
162 return makeNonLoc(SymR->getSymbol(), BO_NE,
163 BasicVals.getZeroWithPtrWidth(), castTy);
164
165 // FALL-THROUGH
166 LLVM_FALLTHROUGH;
167 }
168
169 case loc::GotoLabelKind:
170 // Labels and non-symbolic memory regions are always true.
171 return makeTruthVal(true, castTy);
172 }
173 }
174
175 if (castTy->isIntegralOrEnumerationType()) {
176 unsigned BitWidth = Context.getIntWidth(castTy);
177
178 if (!val.getAs<loc::ConcreteInt>())
179 return makeLocAsInteger(val, BitWidth);
180
181 llvm::APSInt i = val.castAs<loc::ConcreteInt>().getValue();
182 BasicVals.getAPSIntType(castTy).apply(i);
183 return makeIntVal(i);
184 }
185
186 // All other cases: return 'UnknownVal'. This includes casting pointers
187 // to floats, which is probably badness it itself, but this is a good
188 // intermediate solution until we do something better.
189 return UnknownVal();
190 }
191
192 //===----------------------------------------------------------------------===//
193 // Transfer function for unary operators.
194 //===----------------------------------------------------------------------===//
195
evalMinus(NonLoc val)196 SVal SimpleSValBuilder::evalMinus(NonLoc val) {
197 switch (val.getSubKind()) {
198 case nonloc::ConcreteIntKind:
199 return val.castAs<nonloc::ConcreteInt>().evalMinus(*this);
200 default:
201 return UnknownVal();
202 }
203 }
204
evalComplement(NonLoc X)205 SVal SimpleSValBuilder::evalComplement(NonLoc X) {
206 switch (X.getSubKind()) {
207 case nonloc::ConcreteIntKind:
208 return X.castAs<nonloc::ConcreteInt>().evalComplement(*this);
209 default:
210 return UnknownVal();
211 }
212 }
213
214 //===----------------------------------------------------------------------===//
215 // Transfer function for binary operators.
216 //===----------------------------------------------------------------------===//
217
MakeSymIntVal(const SymExpr * LHS,BinaryOperator::Opcode op,const llvm::APSInt & RHS,QualType resultTy)218 SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS,
219 BinaryOperator::Opcode op,
220 const llvm::APSInt &RHS,
221 QualType resultTy) {
222 bool isIdempotent = false;
223
224 // Check for a few special cases with known reductions first.
225 switch (op) {
226 default:
227 // We can't reduce this case; just treat it normally.
228 break;
229 case BO_Mul:
230 // a*0 and a*1
231 if (RHS == 0)
232 return makeIntVal(0, resultTy);
233 else if (RHS == 1)
234 isIdempotent = true;
235 break;
236 case BO_Div:
237 // a/0 and a/1
238 if (RHS == 0)
239 // This is also handled elsewhere.
240 return UndefinedVal();
241 else if (RHS == 1)
242 isIdempotent = true;
243 break;
244 case BO_Rem:
245 // a%0 and a%1
246 if (RHS == 0)
247 // This is also handled elsewhere.
248 return UndefinedVal();
249 else if (RHS == 1)
250 return makeIntVal(0, resultTy);
251 break;
252 case BO_Add:
253 case BO_Sub:
254 case BO_Shl:
255 case BO_Shr:
256 case BO_Xor:
257 // a+0, a-0, a<<0, a>>0, a^0
258 if (RHS == 0)
259 isIdempotent = true;
260 break;
261 case BO_And:
262 // a&0 and a&(~0)
263 if (RHS == 0)
264 return makeIntVal(0, resultTy);
265 else if (RHS.isAllOnesValue())
266 isIdempotent = true;
267 break;
268 case BO_Or:
269 // a|0 and a|(~0)
270 if (RHS == 0)
271 isIdempotent = true;
272 else if (RHS.isAllOnesValue()) {
273 const llvm::APSInt &Result = BasicVals.Convert(resultTy, RHS);
274 return nonloc::ConcreteInt(Result);
275 }
276 break;
277 }
278
279 // Idempotent ops (like a*1) can still change the type of an expression.
280 // Wrap the LHS up in a NonLoc again and let evalCastFromNonLoc do the
281 // dirty work.
282 if (isIdempotent)
283 return evalCastFromNonLoc(nonloc::SymbolVal(LHS), resultTy);
284
285 // If we reach this point, the expression cannot be simplified.
286 // Make a SymbolVal for the entire expression, after converting the RHS.
287 const llvm::APSInt *ConvertedRHS = &RHS;
288 if (BinaryOperator::isComparisonOp(op)) {
289 // We're looking for a type big enough to compare the symbolic value
290 // with the given constant.
291 // FIXME: This is an approximation of Sema::UsualArithmeticConversions.
292 ASTContext &Ctx = getContext();
293 QualType SymbolType = LHS->getType();
294 uint64_t ValWidth = RHS.getBitWidth();
295 uint64_t TypeWidth = Ctx.getTypeSize(SymbolType);
296
297 if (ValWidth < TypeWidth) {
298 // If the value is too small, extend it.
299 ConvertedRHS = &BasicVals.Convert(SymbolType, RHS);
300 } else if (ValWidth == TypeWidth) {
301 // If the value is signed but the symbol is unsigned, do the comparison
302 // in unsigned space. [C99 6.3.1.8]
303 // (For the opposite case, the value is already unsigned.)
304 if (RHS.isSigned() && !SymbolType->isSignedIntegerOrEnumerationType())
305 ConvertedRHS = &BasicVals.Convert(SymbolType, RHS);
306 }
307 } else
308 ConvertedRHS = &BasicVals.Convert(resultTy, RHS);
309
310 return makeNonLoc(LHS, op, *ConvertedRHS, resultTy);
311 }
312
313 // See if Sym is known to be a relation Rel with Bound.
isInRelation(BinaryOperator::Opcode Rel,SymbolRef Sym,llvm::APSInt Bound,ProgramStateRef State)314 static bool isInRelation(BinaryOperator::Opcode Rel, SymbolRef Sym,
315 llvm::APSInt Bound, ProgramStateRef State) {
316 SValBuilder &SVB = State->getStateManager().getSValBuilder();
317 SVal Result =
318 SVB.evalBinOpNN(State, Rel, nonloc::SymbolVal(Sym),
319 nonloc::ConcreteInt(Bound), SVB.getConditionType());
320 if (auto DV = Result.getAs<DefinedSVal>()) {
321 return !State->assume(*DV, false);
322 }
323 return false;
324 }
325
326 // See if Sym is known to be within [min/4, max/4], where min and max
327 // are the bounds of the symbol's integral type. With such symbols,
328 // some manipulations can be performed without the risk of overflow.
329 // assume() doesn't cause infinite recursion because we should be dealing
330 // with simpler symbols on every recursive call.
isWithinConstantOverflowBounds(SymbolRef Sym,ProgramStateRef State)331 static bool isWithinConstantOverflowBounds(SymbolRef Sym,
332 ProgramStateRef State) {
333 SValBuilder &SVB = State->getStateManager().getSValBuilder();
334 BasicValueFactory &BV = SVB.getBasicValueFactory();
335
336 QualType T = Sym->getType();
337 assert(T->isSignedIntegerOrEnumerationType() &&
338 "This only works with signed integers!");
339 APSIntType AT = BV.getAPSIntType(T);
340
341 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
342 return isInRelation(BO_LE, Sym, Max, State) &&
343 isInRelation(BO_GE, Sym, Min, State);
344 }
345
346 // Same for the concrete integers: see if I is within [min/4, max/4].
isWithinConstantOverflowBounds(llvm::APSInt I)347 static bool isWithinConstantOverflowBounds(llvm::APSInt I) {
348 APSIntType AT(I);
349 assert(!AT.isUnsigned() &&
350 "This only works with signed integers!");
351
352 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
353 return (I <= Max) && (I >= -Max);
354 }
355
356 static std::pair<SymbolRef, llvm::APSInt>
decomposeSymbol(SymbolRef Sym,BasicValueFactory & BV)357 decomposeSymbol(SymbolRef Sym, BasicValueFactory &BV) {
358 if (const auto *SymInt = dyn_cast<SymIntExpr>(Sym))
359 if (BinaryOperator::isAdditiveOp(SymInt->getOpcode()))
360 return std::make_pair(SymInt->getLHS(),
361 (SymInt->getOpcode() == BO_Add) ?
362 (SymInt->getRHS()) :
363 (-SymInt->getRHS()));
364
365 // Fail to decompose: "reduce" the problem to the "$x + 0" case.
366 return std::make_pair(Sym, BV.getValue(0, Sym->getType()));
367 }
368
369 // Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the
370 // same signed integral type and no overflows occur (which should be checked
371 // by the caller).
doRearrangeUnchecked(ProgramStateRef State,BinaryOperator::Opcode Op,SymbolRef LSym,llvm::APSInt LInt,SymbolRef RSym,llvm::APSInt RInt)372 static NonLoc doRearrangeUnchecked(ProgramStateRef State,
373 BinaryOperator::Opcode Op,
374 SymbolRef LSym, llvm::APSInt LInt,
375 SymbolRef RSym, llvm::APSInt RInt) {
376 SValBuilder &SVB = State->getStateManager().getSValBuilder();
377 BasicValueFactory &BV = SVB.getBasicValueFactory();
378 SymbolManager &SymMgr = SVB.getSymbolManager();
379
380 QualType SymTy = LSym->getType();
381 assert(SymTy == RSym->getType() &&
382 "Symbols are not of the same type!");
383 assert(APSIntType(LInt) == BV.getAPSIntType(SymTy) &&
384 "Integers are not of the same type as symbols!");
385 assert(APSIntType(RInt) == BV.getAPSIntType(SymTy) &&
386 "Integers are not of the same type as symbols!");
387
388 QualType ResultTy;
389 if (BinaryOperator::isComparisonOp(Op))
390 ResultTy = SVB.getConditionType();
391 else if (BinaryOperator::isAdditiveOp(Op))
392 ResultTy = SymTy;
393 else
394 llvm_unreachable("Operation not suitable for unchecked rearrangement!");
395
396 // FIXME: Can we use assume() without getting into an infinite recursion?
397 if (LSym == RSym)
398 return SVB.evalBinOpNN(State, Op, nonloc::ConcreteInt(LInt),
399 nonloc::ConcreteInt(RInt), ResultTy)
400 .castAs<NonLoc>();
401
402 SymbolRef ResultSym = nullptr;
403 BinaryOperator::Opcode ResultOp;
404 llvm::APSInt ResultInt;
405 if (BinaryOperator::isComparisonOp(Op)) {
406 // Prefer comparing to a non-negative number.
407 // FIXME: Maybe it'd be better to have consistency in
408 // "$x - $y" vs. "$y - $x" because those are solver's keys.
409 if (LInt > RInt) {
410 ResultSym = SymMgr.getSymSymExpr(RSym, BO_Sub, LSym, SymTy);
411 ResultOp = BinaryOperator::reverseComparisonOp(Op);
412 ResultInt = LInt - RInt; // Opposite order!
413 } else {
414 ResultSym = SymMgr.getSymSymExpr(LSym, BO_Sub, RSym, SymTy);
415 ResultOp = Op;
416 ResultInt = RInt - LInt; // Opposite order!
417 }
418 } else {
419 ResultSym = SymMgr.getSymSymExpr(LSym, Op, RSym, SymTy);
420 ResultInt = (Op == BO_Add) ? (LInt + RInt) : (LInt - RInt);
421 ResultOp = BO_Add;
422 // Bring back the cosmetic difference.
423 if (ResultInt < 0) {
424 ResultInt = -ResultInt;
425 ResultOp = BO_Sub;
426 } else if (ResultInt == 0) {
427 // Shortcut: Simplify "$x + 0" to "$x".
428 return nonloc::SymbolVal(ResultSym);
429 }
430 }
431 const llvm::APSInt &PersistentResultInt = BV.getValue(ResultInt);
432 return nonloc::SymbolVal(
433 SymMgr.getSymIntExpr(ResultSym, ResultOp, PersistentResultInt, ResultTy));
434 }
435
436 // Rearrange if symbol type matches the result type and if the operator is a
437 // comparison operator, both symbol and constant must be within constant
438 // overflow bounds.
shouldRearrange(ProgramStateRef State,BinaryOperator::Opcode Op,SymbolRef Sym,llvm::APSInt Int,QualType Ty)439 static bool shouldRearrange(ProgramStateRef State, BinaryOperator::Opcode Op,
440 SymbolRef Sym, llvm::APSInt Int, QualType Ty) {
441 return Sym->getType() == Ty &&
442 (!BinaryOperator::isComparisonOp(Op) ||
443 (isWithinConstantOverflowBounds(Sym, State) &&
444 isWithinConstantOverflowBounds(Int)));
445 }
446
tryRearrange(ProgramStateRef State,BinaryOperator::Opcode Op,NonLoc Lhs,NonLoc Rhs,QualType ResultTy)447 static Optional<NonLoc> tryRearrange(ProgramStateRef State,
448 BinaryOperator::Opcode Op, NonLoc Lhs,
449 NonLoc Rhs, QualType ResultTy) {
450 ProgramStateManager &StateMgr = State->getStateManager();
451 SValBuilder &SVB = StateMgr.getSValBuilder();
452
453 // We expect everything to be of the same type - this type.
454 QualType SingleTy;
455
456 auto &Opts =
457 StateMgr.getOwningEngine().getAnalysisManager().getAnalyzerOptions();
458
459 // FIXME: After putting complexity threshold to the symbols we can always
460 // rearrange additive operations but rearrange comparisons only if
461 // option is set.
462 if(!Opts.ShouldAggressivelySimplifyBinaryOperation)
463 return None;
464
465 SymbolRef LSym = Lhs.getAsSymbol();
466 if (!LSym)
467 return None;
468
469 if (BinaryOperator::isComparisonOp(Op)) {
470 SingleTy = LSym->getType();
471 if (ResultTy != SVB.getConditionType())
472 return None;
473 // Initialize SingleTy later with a symbol's type.
474 } else if (BinaryOperator::isAdditiveOp(Op)) {
475 SingleTy = ResultTy;
476 if (LSym->getType() != SingleTy)
477 return None;
478 } else {
479 // Don't rearrange other operations.
480 return None;
481 }
482
483 assert(!SingleTy.isNull() && "We should have figured out the type by now!");
484
485 // Rearrange signed symbolic expressions only
486 if (!SingleTy->isSignedIntegerOrEnumerationType())
487 return None;
488
489 SymbolRef RSym = Rhs.getAsSymbol();
490 if (!RSym || RSym->getType() != SingleTy)
491 return None;
492
493 BasicValueFactory &BV = State->getBasicVals();
494 llvm::APSInt LInt, RInt;
495 std::tie(LSym, LInt) = decomposeSymbol(LSym, BV);
496 std::tie(RSym, RInt) = decomposeSymbol(RSym, BV);
497 if (!shouldRearrange(State, Op, LSym, LInt, SingleTy) ||
498 !shouldRearrange(State, Op, RSym, RInt, SingleTy))
499 return None;
500
501 // We know that no overflows can occur anymore.
502 return doRearrangeUnchecked(State, Op, LSym, LInt, RSym, RInt);
503 }
504
evalBinOpNN(ProgramStateRef state,BinaryOperator::Opcode op,NonLoc lhs,NonLoc rhs,QualType resultTy)505 SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state,
506 BinaryOperator::Opcode op,
507 NonLoc lhs, NonLoc rhs,
508 QualType resultTy) {
509 NonLoc InputLHS = lhs;
510 NonLoc InputRHS = rhs;
511
512 // Handle trivial case where left-side and right-side are the same.
513 if (lhs == rhs)
514 switch (op) {
515 default:
516 break;
517 case BO_EQ:
518 case BO_LE:
519 case BO_GE:
520 return makeTruthVal(true, resultTy);
521 case BO_LT:
522 case BO_GT:
523 case BO_NE:
524 return makeTruthVal(false, resultTy);
525 case BO_Xor:
526 case BO_Sub:
527 if (resultTy->isIntegralOrEnumerationType())
528 return makeIntVal(0, resultTy);
529 return evalCastFromNonLoc(makeIntVal(0, /*Unsigned=*/false), resultTy);
530 case BO_Or:
531 case BO_And:
532 return evalCastFromNonLoc(lhs, resultTy);
533 }
534
535 while (1) {
536 switch (lhs.getSubKind()) {
537 default:
538 return makeSymExprValNN(op, lhs, rhs, resultTy);
539 case nonloc::PointerToMemberKind: {
540 assert(rhs.getSubKind() == nonloc::PointerToMemberKind &&
541 "Both SVals should have pointer-to-member-type");
542 auto LPTM = lhs.castAs<nonloc::PointerToMember>(),
543 RPTM = rhs.castAs<nonloc::PointerToMember>();
544 auto LPTMD = LPTM.getPTMData(), RPTMD = RPTM.getPTMData();
545 switch (op) {
546 case BO_EQ:
547 return makeTruthVal(LPTMD == RPTMD, resultTy);
548 case BO_NE:
549 return makeTruthVal(LPTMD != RPTMD, resultTy);
550 default:
551 return UnknownVal();
552 }
553 }
554 case nonloc::LocAsIntegerKind: {
555 Loc lhsL = lhs.castAs<nonloc::LocAsInteger>().getLoc();
556 switch (rhs.getSubKind()) {
557 case nonloc::LocAsIntegerKind:
558 // FIXME: at the moment the implementation
559 // of modeling "pointers as integers" is not complete.
560 if (!BinaryOperator::isComparisonOp(op))
561 return UnknownVal();
562 return evalBinOpLL(state, op, lhsL,
563 rhs.castAs<nonloc::LocAsInteger>().getLoc(),
564 resultTy);
565 case nonloc::ConcreteIntKind: {
566 // FIXME: at the moment the implementation
567 // of modeling "pointers as integers" is not complete.
568 if (!BinaryOperator::isComparisonOp(op))
569 return UnknownVal();
570 // Transform the integer into a location and compare.
571 // FIXME: This only makes sense for comparisons. If we want to, say,
572 // add 1 to a LocAsInteger, we'd better unpack the Loc and add to it,
573 // then pack it back into a LocAsInteger.
574 llvm::APSInt i = rhs.castAs<nonloc::ConcreteInt>().getValue();
575 BasicVals.getAPSIntType(Context.VoidPtrTy).apply(i);
576 return evalBinOpLL(state, op, lhsL, makeLoc(i), resultTy);
577 }
578 default:
579 switch (op) {
580 case BO_EQ:
581 return makeTruthVal(false, resultTy);
582 case BO_NE:
583 return makeTruthVal(true, resultTy);
584 default:
585 // This case also handles pointer arithmetic.
586 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
587 }
588 }
589 }
590 case nonloc::ConcreteIntKind: {
591 llvm::APSInt LHSValue = lhs.castAs<nonloc::ConcreteInt>().getValue();
592
593 // If we're dealing with two known constants, just perform the operation.
594 if (const llvm::APSInt *KnownRHSValue = getKnownValue(state, rhs)) {
595 llvm::APSInt RHSValue = *KnownRHSValue;
596 if (BinaryOperator::isComparisonOp(op)) {
597 // We're looking for a type big enough to compare the two values.
598 // FIXME: This is not correct. char + short will result in a promotion
599 // to int. Unfortunately we have lost types by this point.
600 APSIntType CompareType = std::max(APSIntType(LHSValue),
601 APSIntType(RHSValue));
602 CompareType.apply(LHSValue);
603 CompareType.apply(RHSValue);
604 } else if (!BinaryOperator::isShiftOp(op)) {
605 APSIntType IntType = BasicVals.getAPSIntType(resultTy);
606 IntType.apply(LHSValue);
607 IntType.apply(RHSValue);
608 }
609
610 const llvm::APSInt *Result =
611 BasicVals.evalAPSInt(op, LHSValue, RHSValue);
612 if (!Result)
613 return UndefinedVal();
614
615 return nonloc::ConcreteInt(*Result);
616 }
617
618 // Swap the left and right sides and flip the operator if doing so
619 // allows us to better reason about the expression (this is a form
620 // of expression canonicalization).
621 // While we're at it, catch some special cases for non-commutative ops.
622 switch (op) {
623 case BO_LT:
624 case BO_GT:
625 case BO_LE:
626 case BO_GE:
627 op = BinaryOperator::reverseComparisonOp(op);
628 LLVM_FALLTHROUGH;
629 case BO_EQ:
630 case BO_NE:
631 case BO_Add:
632 case BO_Mul:
633 case BO_And:
634 case BO_Xor:
635 case BO_Or:
636 std::swap(lhs, rhs);
637 continue;
638 case BO_Shr:
639 // (~0)>>a
640 if (LHSValue.isAllOnesValue() && LHSValue.isSigned())
641 return evalCastFromNonLoc(lhs, resultTy);
642 LLVM_FALLTHROUGH;
643 case BO_Shl:
644 // 0<<a and 0>>a
645 if (LHSValue == 0)
646 return evalCastFromNonLoc(lhs, resultTy);
647 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
648 default:
649 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
650 }
651 }
652 case nonloc::SymbolValKind: {
653 // We only handle LHS as simple symbols or SymIntExprs.
654 SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol();
655
656 // LHS is a symbolic expression.
657 if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) {
658
659 // Is this a logical not? (!x is represented as x == 0.)
660 if (op == BO_EQ && rhs.isZeroConstant()) {
661 // We know how to negate certain expressions. Simplify them here.
662
663 BinaryOperator::Opcode opc = symIntExpr->getOpcode();
664 switch (opc) {
665 default:
666 // We don't know how to negate this operation.
667 // Just handle it as if it were a normal comparison to 0.
668 break;
669 case BO_LAnd:
670 case BO_LOr:
671 llvm_unreachable("Logical operators handled by branching logic.");
672 case BO_Assign:
673 case BO_MulAssign:
674 case BO_DivAssign:
675 case BO_RemAssign:
676 case BO_AddAssign:
677 case BO_SubAssign:
678 case BO_ShlAssign:
679 case BO_ShrAssign:
680 case BO_AndAssign:
681 case BO_XorAssign:
682 case BO_OrAssign:
683 case BO_Comma:
684 llvm_unreachable("'=' and ',' operators handled by ExprEngine.");
685 case BO_PtrMemD:
686 case BO_PtrMemI:
687 llvm_unreachable("Pointer arithmetic not handled here.");
688 case BO_LT:
689 case BO_GT:
690 case BO_LE:
691 case BO_GE:
692 case BO_EQ:
693 case BO_NE:
694 assert(resultTy->isBooleanType() ||
695 resultTy == getConditionType());
696 assert(symIntExpr->getType()->isBooleanType() ||
697 getContext().hasSameUnqualifiedType(symIntExpr->getType(),
698 getConditionType()));
699 // Negate the comparison and make a value.
700 opc = BinaryOperator::negateComparisonOp(opc);
701 return makeNonLoc(symIntExpr->getLHS(), opc,
702 symIntExpr->getRHS(), resultTy);
703 }
704 }
705
706 // For now, only handle expressions whose RHS is a constant.
707 if (const llvm::APSInt *RHSValue = getKnownValue(state, rhs)) {
708 // If both the LHS and the current expression are additive,
709 // fold their constants and try again.
710 if (BinaryOperator::isAdditiveOp(op)) {
711 BinaryOperator::Opcode lop = symIntExpr->getOpcode();
712 if (BinaryOperator::isAdditiveOp(lop)) {
713 // Convert the two constants to a common type, then combine them.
714
715 // resultTy may not be the best type to convert to, but it's
716 // probably the best choice in expressions with mixed type
717 // (such as x+1U+2LL). The rules for implicit conversions should
718 // choose a reasonable type to preserve the expression, and will
719 // at least match how the value is going to be used.
720 APSIntType IntType = BasicVals.getAPSIntType(resultTy);
721 const llvm::APSInt &first = IntType.convert(symIntExpr->getRHS());
722 const llvm::APSInt &second = IntType.convert(*RHSValue);
723
724 const llvm::APSInt *newRHS;
725 if (lop == op)
726 newRHS = BasicVals.evalAPSInt(BO_Add, first, second);
727 else
728 newRHS = BasicVals.evalAPSInt(BO_Sub, first, second);
729
730 assert(newRHS && "Invalid operation despite common type!");
731 rhs = nonloc::ConcreteInt(*newRHS);
732 lhs = nonloc::SymbolVal(symIntExpr->getLHS());
733 op = lop;
734 continue;
735 }
736 }
737
738 // Otherwise, make a SymIntExpr out of the expression.
739 return MakeSymIntVal(symIntExpr, op, *RHSValue, resultTy);
740 }
741 }
742
743 // Does the symbolic expression simplify to a constant?
744 // If so, "fold" the constant by setting 'lhs' to a ConcreteInt
745 // and try again.
746 SVal simplifiedLhs = simplifySVal(state, lhs);
747 if (simplifiedLhs != lhs)
748 if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>()) {
749 lhs = *simplifiedLhsAsNonLoc;
750 continue;
751 }
752
753 // Is the RHS a constant?
754 if (const llvm::APSInt *RHSValue = getKnownValue(state, rhs))
755 return MakeSymIntVal(Sym, op, *RHSValue, resultTy);
756
757 if (Optional<NonLoc> V = tryRearrange(state, op, lhs, rhs, resultTy))
758 return *V;
759
760 // Give up -- this is not a symbolic expression we can handle.
761 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
762 }
763 }
764 }
765 }
766
evalBinOpFieldRegionFieldRegion(const FieldRegion * LeftFR,const FieldRegion * RightFR,BinaryOperator::Opcode op,QualType resultTy,SimpleSValBuilder & SVB)767 static SVal evalBinOpFieldRegionFieldRegion(const FieldRegion *LeftFR,
768 const FieldRegion *RightFR,
769 BinaryOperator::Opcode op,
770 QualType resultTy,
771 SimpleSValBuilder &SVB) {
772 // Only comparisons are meaningful here!
773 if (!BinaryOperator::isComparisonOp(op))
774 return UnknownVal();
775
776 // Next, see if the two FRs have the same super-region.
777 // FIXME: This doesn't handle casts yet, and simply stripping the casts
778 // doesn't help.
779 if (LeftFR->getSuperRegion() != RightFR->getSuperRegion())
780 return UnknownVal();
781
782 const FieldDecl *LeftFD = LeftFR->getDecl();
783 const FieldDecl *RightFD = RightFR->getDecl();
784 const RecordDecl *RD = LeftFD->getParent();
785
786 // Make sure the two FRs are from the same kind of record. Just in case!
787 // FIXME: This is probably where inheritance would be a problem.
788 if (RD != RightFD->getParent())
789 return UnknownVal();
790
791 // We know for sure that the two fields are not the same, since that
792 // would have given us the same SVal.
793 if (op == BO_EQ)
794 return SVB.makeTruthVal(false, resultTy);
795 if (op == BO_NE)
796 return SVB.makeTruthVal(true, resultTy);
797
798 // Iterate through the fields and see which one comes first.
799 // [C99 6.7.2.1.13] "Within a structure object, the non-bit-field
800 // members and the units in which bit-fields reside have addresses that
801 // increase in the order in which they are declared."
802 bool leftFirst = (op == BO_LT || op == BO_LE);
803 for (const auto *I : RD->fields()) {
804 if (I == LeftFD)
805 return SVB.makeTruthVal(leftFirst, resultTy);
806 if (I == RightFD)
807 return SVB.makeTruthVal(!leftFirst, resultTy);
808 }
809
810 llvm_unreachable("Fields not found in parent record's definition");
811 }
812
813 // FIXME: all this logic will change if/when we have MemRegion::getLocation().
evalBinOpLL(ProgramStateRef state,BinaryOperator::Opcode op,Loc lhs,Loc rhs,QualType resultTy)814 SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state,
815 BinaryOperator::Opcode op,
816 Loc lhs, Loc rhs,
817 QualType resultTy) {
818 // Only comparisons and subtractions are valid operations on two pointers.
819 // See [C99 6.5.5 through 6.5.14] or [C++0x 5.6 through 5.15].
820 // However, if a pointer is casted to an integer, evalBinOpNN may end up
821 // calling this function with another operation (PR7527). We don't attempt to
822 // model this for now, but it could be useful, particularly when the
823 // "location" is actually an integer value that's been passed through a void*.
824 if (!(BinaryOperator::isComparisonOp(op) || op == BO_Sub))
825 return UnknownVal();
826
827 // Special cases for when both sides are identical.
828 if (lhs == rhs) {
829 switch (op) {
830 default:
831 llvm_unreachable("Unimplemented operation for two identical values");
832 case BO_Sub:
833 return makeZeroVal(resultTy);
834 case BO_EQ:
835 case BO_LE:
836 case BO_GE:
837 return makeTruthVal(true, resultTy);
838 case BO_NE:
839 case BO_LT:
840 case BO_GT:
841 return makeTruthVal(false, resultTy);
842 }
843 }
844
845 switch (lhs.getSubKind()) {
846 default:
847 llvm_unreachable("Ordering not implemented for this Loc.");
848
849 case loc::GotoLabelKind:
850 // The only thing we know about labels is that they're non-null.
851 if (rhs.isZeroConstant()) {
852 switch (op) {
853 default:
854 break;
855 case BO_Sub:
856 return evalCastFromLoc(lhs, resultTy);
857 case BO_EQ:
858 case BO_LE:
859 case BO_LT:
860 return makeTruthVal(false, resultTy);
861 case BO_NE:
862 case BO_GT:
863 case BO_GE:
864 return makeTruthVal(true, resultTy);
865 }
866 }
867 // There may be two labels for the same location, and a function region may
868 // have the same address as a label at the start of the function (depending
869 // on the ABI).
870 // FIXME: we can probably do a comparison against other MemRegions, though.
871 // FIXME: is there a way to tell if two labels refer to the same location?
872 return UnknownVal();
873
874 case loc::ConcreteIntKind: {
875 // If one of the operands is a symbol and the other is a constant,
876 // build an expression for use by the constraint manager.
877 if (SymbolRef rSym = rhs.getAsLocSymbol()) {
878 // We can only build expressions with symbols on the left,
879 // so we need a reversible operator.
880 if (!BinaryOperator::isComparisonOp(op) || op == BO_Cmp)
881 return UnknownVal();
882
883 const llvm::APSInt &lVal = lhs.castAs<loc::ConcreteInt>().getValue();
884 op = BinaryOperator::reverseComparisonOp(op);
885 return makeNonLoc(rSym, op, lVal, resultTy);
886 }
887
888 // If both operands are constants, just perform the operation.
889 if (Optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
890 SVal ResultVal =
891 lhs.castAs<loc::ConcreteInt>().evalBinOp(BasicVals, op, *rInt);
892 if (Optional<NonLoc> Result = ResultVal.getAs<NonLoc>())
893 return evalCastFromNonLoc(*Result, resultTy);
894
895 assert(!ResultVal.getAs<Loc>() && "Loc-Loc ops should not produce Locs");
896 return UnknownVal();
897 }
898
899 // Special case comparisons against NULL.
900 // This must come after the test if the RHS is a symbol, which is used to
901 // build constraints. The address of any non-symbolic region is guaranteed
902 // to be non-NULL, as is any label.
903 assert(rhs.getAs<loc::MemRegionVal>() || rhs.getAs<loc::GotoLabel>());
904 if (lhs.isZeroConstant()) {
905 switch (op) {
906 default:
907 break;
908 case BO_EQ:
909 case BO_GT:
910 case BO_GE:
911 return makeTruthVal(false, resultTy);
912 case BO_NE:
913 case BO_LT:
914 case BO_LE:
915 return makeTruthVal(true, resultTy);
916 }
917 }
918
919 // Comparing an arbitrary integer to a region or label address is
920 // completely unknowable.
921 return UnknownVal();
922 }
923 case loc::MemRegionValKind: {
924 if (Optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
925 // If one of the operands is a symbol and the other is a constant,
926 // build an expression for use by the constraint manager.
927 if (SymbolRef lSym = lhs.getAsLocSymbol(true)) {
928 if (BinaryOperator::isComparisonOp(op))
929 return MakeSymIntVal(lSym, op, rInt->getValue(), resultTy);
930 return UnknownVal();
931 }
932 // Special case comparisons to NULL.
933 // This must come after the test if the LHS is a symbol, which is used to
934 // build constraints. The address of any non-symbolic region is guaranteed
935 // to be non-NULL.
936 if (rInt->isZeroConstant()) {
937 if (op == BO_Sub)
938 return evalCastFromLoc(lhs, resultTy);
939
940 if (BinaryOperator::isComparisonOp(op)) {
941 QualType boolType = getContext().BoolTy;
942 NonLoc l = evalCastFromLoc(lhs, boolType).castAs<NonLoc>();
943 NonLoc r = makeTruthVal(false, boolType).castAs<NonLoc>();
944 return evalBinOpNN(state, op, l, r, resultTy);
945 }
946 }
947
948 // Comparing a region to an arbitrary integer is completely unknowable.
949 return UnknownVal();
950 }
951
952 // Get both values as regions, if possible.
953 const MemRegion *LeftMR = lhs.getAsRegion();
954 assert(LeftMR && "MemRegionValKind SVal doesn't have a region!");
955
956 const MemRegion *RightMR = rhs.getAsRegion();
957 if (!RightMR)
958 // The RHS is probably a label, which in theory could address a region.
959 // FIXME: we can probably make a more useful statement about non-code
960 // regions, though.
961 return UnknownVal();
962
963 const MemRegion *LeftBase = LeftMR->getBaseRegion();
964 const MemRegion *RightBase = RightMR->getBaseRegion();
965 const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace();
966 const MemSpaceRegion *RightMS = RightBase->getMemorySpace();
967 const MemSpaceRegion *UnknownMS = MemMgr.getUnknownRegion();
968
969 // If the two regions are from different known memory spaces they cannot be
970 // equal. Also, assume that no symbolic region (whose memory space is
971 // unknown) is on the stack.
972 if (LeftMS != RightMS &&
973 ((LeftMS != UnknownMS && RightMS != UnknownMS) ||
974 (isa<StackSpaceRegion>(LeftMS) || isa<StackSpaceRegion>(RightMS)))) {
975 switch (op) {
976 default:
977 return UnknownVal();
978 case BO_EQ:
979 return makeTruthVal(false, resultTy);
980 case BO_NE:
981 return makeTruthVal(true, resultTy);
982 }
983 }
984
985 // If both values wrap regions, see if they're from different base regions.
986 // Note, heap base symbolic regions are assumed to not alias with
987 // each other; for example, we assume that malloc returns different address
988 // on each invocation.
989 // FIXME: ObjC object pointers always reside on the heap, but currently
990 // we treat their memory space as unknown, because symbolic pointers
991 // to ObjC objects may alias. There should be a way to construct
992 // possibly-aliasing heap-based regions. For instance, MacOSXApiChecker
993 // guesses memory space for ObjC object pointers manually instead of
994 // relying on us.
995 if (LeftBase != RightBase &&
996 ((!isa<SymbolicRegion>(LeftBase) && !isa<SymbolicRegion>(RightBase)) ||
997 (isa<HeapSpaceRegion>(LeftMS) || isa<HeapSpaceRegion>(RightMS))) ){
998 switch (op) {
999 default:
1000 return UnknownVal();
1001 case BO_EQ:
1002 return makeTruthVal(false, resultTy);
1003 case BO_NE:
1004 return makeTruthVal(true, resultTy);
1005 }
1006 }
1007
1008 // Handle special cases for when both regions are element regions.
1009 const ElementRegion *RightER = dyn_cast<ElementRegion>(RightMR);
1010 const ElementRegion *LeftER = dyn_cast<ElementRegion>(LeftMR);
1011 if (RightER && LeftER) {
1012 // Next, see if the two ERs have the same super-region and matching types.
1013 // FIXME: This should do something useful even if the types don't match,
1014 // though if both indexes are constant the RegionRawOffset path will
1015 // give the correct answer.
1016 if (LeftER->getSuperRegion() == RightER->getSuperRegion() &&
1017 LeftER->getElementType() == RightER->getElementType()) {
1018 // Get the left index and cast it to the correct type.
1019 // If the index is unknown or undefined, bail out here.
1020 SVal LeftIndexVal = LeftER->getIndex();
1021 Optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>();
1022 if (!LeftIndex)
1023 return UnknownVal();
1024 LeftIndexVal = evalCastFromNonLoc(*LeftIndex, ArrayIndexTy);
1025 LeftIndex = LeftIndexVal.getAs<NonLoc>();
1026 if (!LeftIndex)
1027 return UnknownVal();
1028
1029 // Do the same for the right index.
1030 SVal RightIndexVal = RightER->getIndex();
1031 Optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>();
1032 if (!RightIndex)
1033 return UnknownVal();
1034 RightIndexVal = evalCastFromNonLoc(*RightIndex, ArrayIndexTy);
1035 RightIndex = RightIndexVal.getAs<NonLoc>();
1036 if (!RightIndex)
1037 return UnknownVal();
1038
1039 // Actually perform the operation.
1040 // evalBinOpNN expects the two indexes to already be the right type.
1041 return evalBinOpNN(state, op, *LeftIndex, *RightIndex, resultTy);
1042 }
1043 }
1044
1045 // Special handling of the FieldRegions, even with symbolic offsets.
1046 const FieldRegion *RightFR = dyn_cast<FieldRegion>(RightMR);
1047 const FieldRegion *LeftFR = dyn_cast<FieldRegion>(LeftMR);
1048 if (RightFR && LeftFR) {
1049 SVal R = evalBinOpFieldRegionFieldRegion(LeftFR, RightFR, op, resultTy,
1050 *this);
1051 if (!R.isUnknown())
1052 return R;
1053 }
1054
1055 // Compare the regions using the raw offsets.
1056 RegionOffset LeftOffset = LeftMR->getAsOffset();
1057 RegionOffset RightOffset = RightMR->getAsOffset();
1058
1059 if (LeftOffset.getRegion() != nullptr &&
1060 LeftOffset.getRegion() == RightOffset.getRegion() &&
1061 !LeftOffset.hasSymbolicOffset() && !RightOffset.hasSymbolicOffset()) {
1062 int64_t left = LeftOffset.getOffset();
1063 int64_t right = RightOffset.getOffset();
1064
1065 switch (op) {
1066 default:
1067 return UnknownVal();
1068 case BO_LT:
1069 return makeTruthVal(left < right, resultTy);
1070 case BO_GT:
1071 return makeTruthVal(left > right, resultTy);
1072 case BO_LE:
1073 return makeTruthVal(left <= right, resultTy);
1074 case BO_GE:
1075 return makeTruthVal(left >= right, resultTy);
1076 case BO_EQ:
1077 return makeTruthVal(left == right, resultTy);
1078 case BO_NE:
1079 return makeTruthVal(left != right, resultTy);
1080 }
1081 }
1082
1083 // At this point we're not going to get a good answer, but we can try
1084 // conjuring an expression instead.
1085 SymbolRef LHSSym = lhs.getAsLocSymbol();
1086 SymbolRef RHSSym = rhs.getAsLocSymbol();
1087 if (LHSSym && RHSSym)
1088 return makeNonLoc(LHSSym, op, RHSSym, resultTy);
1089
1090 // If we get here, we have no way of comparing the regions.
1091 return UnknownVal();
1092 }
1093 }
1094 }
1095
evalBinOpLN(ProgramStateRef state,BinaryOperator::Opcode op,Loc lhs,NonLoc rhs,QualType resultTy)1096 SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state,
1097 BinaryOperator::Opcode op,
1098 Loc lhs, NonLoc rhs, QualType resultTy) {
1099 if (op >= BO_PtrMemD && op <= BO_PtrMemI) {
1100 if (auto PTMSV = rhs.getAs<nonloc::PointerToMember>()) {
1101 if (PTMSV->isNullMemberPointer())
1102 return UndefinedVal();
1103 if (const FieldDecl *FD = PTMSV->getDeclAs<FieldDecl>()) {
1104 SVal Result = lhs;
1105
1106 for (const auto &I : *PTMSV)
1107 Result = StateMgr.getStoreManager().evalDerivedToBase(
1108 Result, I->getType(),I->isVirtual());
1109 return state->getLValue(FD, Result);
1110 }
1111 }
1112
1113 return rhs;
1114 }
1115
1116 assert(!BinaryOperator::isComparisonOp(op) &&
1117 "arguments to comparison ops must be of the same type");
1118
1119 // Special case: rhs is a zero constant.
1120 if (rhs.isZeroConstant())
1121 return lhs;
1122
1123 // Perserve the null pointer so that it can be found by the DerefChecker.
1124 if (lhs.isZeroConstant())
1125 return lhs;
1126
1127 // We are dealing with pointer arithmetic.
1128
1129 // Handle pointer arithmetic on constant values.
1130 if (Optional<nonloc::ConcreteInt> rhsInt = rhs.getAs<nonloc::ConcreteInt>()) {
1131 if (Optional<loc::ConcreteInt> lhsInt = lhs.getAs<loc::ConcreteInt>()) {
1132 const llvm::APSInt &leftI = lhsInt->getValue();
1133 assert(leftI.isUnsigned());
1134 llvm::APSInt rightI(rhsInt->getValue(), /* isUnsigned */ true);
1135
1136 // Convert the bitwidth of rightI. This should deal with overflow
1137 // since we are dealing with concrete values.
1138 rightI = rightI.extOrTrunc(leftI.getBitWidth());
1139
1140 // Offset the increment by the pointer size.
1141 llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true);
1142 QualType pointeeType = resultTy->getPointeeType();
1143 Multiplicand = getContext().getTypeSizeInChars(pointeeType).getQuantity();
1144 rightI *= Multiplicand;
1145
1146 // Compute the adjusted pointer.
1147 switch (op) {
1148 case BO_Add:
1149 rightI = leftI + rightI;
1150 break;
1151 case BO_Sub:
1152 rightI = leftI - rightI;
1153 break;
1154 default:
1155 llvm_unreachable("Invalid pointer arithmetic operation");
1156 }
1157 return loc::ConcreteInt(getBasicValueFactory().getValue(rightI));
1158 }
1159 }
1160
1161 // Handle cases where 'lhs' is a region.
1162 if (const MemRegion *region = lhs.getAsRegion()) {
1163 rhs = convertToArrayIndex(rhs).castAs<NonLoc>();
1164 SVal index = UnknownVal();
1165 const SubRegion *superR = nullptr;
1166 // We need to know the type of the pointer in order to add an integer to it.
1167 // Depending on the type, different amount of bytes is added.
1168 QualType elementType;
1169
1170 if (const ElementRegion *elemReg = dyn_cast<ElementRegion>(region)) {
1171 assert(op == BO_Add || op == BO_Sub);
1172 index = evalBinOpNN(state, op, elemReg->getIndex(), rhs,
1173 getArrayIndexType());
1174 superR = cast<SubRegion>(elemReg->getSuperRegion());
1175 elementType = elemReg->getElementType();
1176 }
1177 else if (isa<SubRegion>(region)) {
1178 assert(op == BO_Add || op == BO_Sub);
1179 index = (op == BO_Add) ? rhs : evalMinus(rhs);
1180 superR = cast<SubRegion>(region);
1181 // TODO: Is this actually reliable? Maybe improving our MemRegion
1182 // hierarchy to provide typed regions for all non-void pointers would be
1183 // better. For instance, we cannot extend this towards LocAsInteger
1184 // operations, where result type of the expression is integer.
1185 if (resultTy->isAnyPointerType())
1186 elementType = resultTy->getPointeeType();
1187 }
1188
1189 // Represent arithmetic on void pointers as arithmetic on char pointers.
1190 // It is fine when a TypedValueRegion of char value type represents
1191 // a void pointer. Note that arithmetic on void pointers is a GCC extension.
1192 if (elementType->isVoidType())
1193 elementType = getContext().CharTy;
1194
1195 if (Optional<NonLoc> indexV = index.getAs<NonLoc>()) {
1196 return loc::MemRegionVal(MemMgr.getElementRegion(elementType, *indexV,
1197 superR, getContext()));
1198 }
1199 }
1200 return UnknownVal();
1201 }
1202
getKnownValue(ProgramStateRef state,SVal V)1203 const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state,
1204 SVal V) {
1205 V = simplifySVal(state, V);
1206 if (V.isUnknownOrUndef())
1207 return nullptr;
1208
1209 if (Optional<loc::ConcreteInt> X = V.getAs<loc::ConcreteInt>())
1210 return &X->getValue();
1211
1212 if (Optional<nonloc::ConcreteInt> X = V.getAs<nonloc::ConcreteInt>())
1213 return &X->getValue();
1214
1215 if (SymbolRef Sym = V.getAsSymbol())
1216 return state->getConstraintManager().getSymVal(state, Sym);
1217
1218 // FIXME: Add support for SymExprs.
1219 return nullptr;
1220 }
1221
simplifySVal(ProgramStateRef State,SVal V)1222 SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) {
1223 // For now, this function tries to constant-fold symbols inside a
1224 // nonloc::SymbolVal, and does nothing else. More simplifications should
1225 // be possible, such as constant-folding an index in an ElementRegion.
1226
1227 class Simplifier : public FullSValVisitor<Simplifier, SVal> {
1228 ProgramStateRef State;
1229 SValBuilder &SVB;
1230
1231 // Cache results for the lifetime of the Simplifier. Results change every
1232 // time new constraints are added to the program state, which is the whole
1233 // point of simplifying, and for that very reason it's pointless to maintain
1234 // the same cache for the duration of the whole analysis.
1235 llvm::DenseMap<SymbolRef, SVal> Cached;
1236
1237 static bool isUnchanged(SymbolRef Sym, SVal Val) {
1238 return Sym == Val.getAsSymbol();
1239 }
1240
1241 SVal cache(SymbolRef Sym, SVal V) {
1242 Cached[Sym] = V;
1243 return V;
1244 }
1245
1246 SVal skip(SymbolRef Sym) {
1247 return cache(Sym, SVB.makeSymbolVal(Sym));
1248 }
1249
1250 public:
1251 Simplifier(ProgramStateRef State)
1252 : State(State), SVB(State->getStateManager().getSValBuilder()) {}
1253
1254 SVal VisitSymbolData(const SymbolData *S) {
1255 // No cache here.
1256 if (const llvm::APSInt *I =
1257 SVB.getKnownValue(State, SVB.makeSymbolVal(S)))
1258 return Loc::isLocType(S->getType()) ? (SVal)SVB.makeIntLocVal(*I)
1259 : (SVal)SVB.makeIntVal(*I);
1260 return SVB.makeSymbolVal(S);
1261 }
1262
1263 // TODO: Support SymbolCast. Support IntSymExpr when/if we actually
1264 // start producing them.
1265
1266 SVal VisitSymIntExpr(const SymIntExpr *S) {
1267 auto I = Cached.find(S);
1268 if (I != Cached.end())
1269 return I->second;
1270
1271 SVal LHS = Visit(S->getLHS());
1272 if (isUnchanged(S->getLHS(), LHS))
1273 return skip(S);
1274
1275 SVal RHS;
1276 // By looking at the APSInt in the right-hand side of S, we cannot
1277 // figure out if it should be treated as a Loc or as a NonLoc.
1278 // So make our guess by recalling that we cannot multiply pointers
1279 // or compare a pointer to an integer.
1280 if (Loc::isLocType(S->getLHS()->getType()) &&
1281 BinaryOperator::isComparisonOp(S->getOpcode())) {
1282 // The usual conversion of $sym to &SymRegion{$sym}, as they have
1283 // the same meaning for Loc-type symbols, but the latter form
1284 // is preferred in SVal computations for being Loc itself.
1285 if (SymbolRef Sym = LHS.getAsSymbol()) {
1286 assert(Loc::isLocType(Sym->getType()));
1287 LHS = SVB.makeLoc(Sym);
1288 }
1289 RHS = SVB.makeIntLocVal(S->getRHS());
1290 } else {
1291 RHS = SVB.makeIntVal(S->getRHS());
1292 }
1293
1294 return cache(
1295 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1296 }
1297
1298 SVal VisitSymSymExpr(const SymSymExpr *S) {
1299 auto I = Cached.find(S);
1300 if (I != Cached.end())
1301 return I->second;
1302
1303 // For now don't try to simplify mixed Loc/NonLoc expressions
1304 // because they often appear from LocAsInteger operations
1305 // and we don't know how to combine a LocAsInteger
1306 // with a concrete value.
1307 if (Loc::isLocType(S->getLHS()->getType()) !=
1308 Loc::isLocType(S->getRHS()->getType()))
1309 return skip(S);
1310
1311 SVal LHS = Visit(S->getLHS());
1312 SVal RHS = Visit(S->getRHS());
1313 if (isUnchanged(S->getLHS(), LHS) && isUnchanged(S->getRHS(), RHS))
1314 return skip(S);
1315
1316 return cache(
1317 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1318 }
1319
1320 SVal VisitSymExpr(SymbolRef S) { return nonloc::SymbolVal(S); }
1321
1322 SVal VisitMemRegion(const MemRegion *R) { return loc::MemRegionVal(R); }
1323
1324 SVal VisitNonLocSymbolVal(nonloc::SymbolVal V) {
1325 // Simplification is much more costly than computing complexity.
1326 // For high complexity, it may be not worth it.
1327 return Visit(V.getSymbol());
1328 }
1329
1330 SVal VisitSVal(SVal V) { return V; }
1331 };
1332
1333 // A crude way of preventing this function from calling itself from evalBinOp.
1334 static bool isReentering = false;
1335 if (isReentering)
1336 return V;
1337
1338 isReentering = true;
1339 SVal SimplifiedV = Simplifier(State).Visit(V);
1340 isReentering = false;
1341
1342 return SimplifiedV;
1343 }
1344