1 // SimpleSValBuilder.cpp - A basic SValBuilder -----------------------*- C++ -*-
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines SimpleSValBuilder, a basic implementation of SValBuilder.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h"
14 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
15 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h"
16 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
17 #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
18 #include <optional>
19
20 using namespace clang;
21 using namespace ento;
22
23 namespace {
24 class SimpleSValBuilder : public SValBuilder {
25
26 // Query the constraint manager whether the SVal has only one possible
27 // (integer) value. If that is the case, the value is returned. Otherwise,
28 // returns NULL.
29 // This is an implementation detail. Checkers should use `getKnownValue()`
30 // instead.
31 const llvm::APSInt *getConstValue(ProgramStateRef state, SVal V);
32
33 // With one `simplifySValOnce` call, a compound symbols might collapse to
34 // simpler symbol tree that is still possible to further simplify. Thus, we
35 // do the simplification on a new symbol tree until we reach the simplest
36 // form, i.e. the fixpoint.
37 // Consider the following symbol `(b * b) * b * b` which has this tree:
38 // *
39 // / \
40 // * b
41 // / \
42 // / b
43 // (b * b)
44 // Now, if the `b * b == 1` new constraint is added then during the first
45 // iteration we have the following transformations:
46 // * *
47 // / \ / \
48 // * b --> b b
49 // / \
50 // / b
51 // 1
52 // We need another iteration to reach the final result `1`.
53 SVal simplifyUntilFixpoint(ProgramStateRef State, SVal Val);
54
55 // Recursively descends into symbolic expressions and replaces symbols
56 // with their known values (in the sense of the getConstValue() method).
57 // We traverse the symbol tree and query the constraint values for the
58 // sub-trees and if a value is a constant we do the constant folding.
59 SVal simplifySValOnce(ProgramStateRef State, SVal V);
60
61 public:
SimpleSValBuilder(llvm::BumpPtrAllocator & alloc,ASTContext & context,ProgramStateManager & stateMgr)62 SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context,
63 ProgramStateManager &stateMgr)
64 : SValBuilder(alloc, context, stateMgr) {}
~SimpleSValBuilder()65 ~SimpleSValBuilder() override {}
66
67 SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op,
68 NonLoc lhs, NonLoc rhs, QualType resultTy) override;
69 SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op,
70 Loc lhs, Loc rhs, QualType resultTy) override;
71 SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op,
72 Loc lhs, NonLoc rhs, QualType resultTy) override;
73
74 /// Evaluates a given SVal by recursively evaluating and
75 /// simplifying the children SVals. If the SVal has only one possible
76 /// (integer) value, that value is returned. Otherwise, returns NULL.
77 const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal V) override;
78
79 SVal simplifySVal(ProgramStateRef State, SVal V) override;
80
81 SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op,
82 const llvm::APSInt &RHS, QualType resultTy);
83 };
84 } // end anonymous namespace
85
createSimpleSValBuilder(llvm::BumpPtrAllocator & alloc,ASTContext & context,ProgramStateManager & stateMgr)86 SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc,
87 ASTContext &context,
88 ProgramStateManager &stateMgr) {
89 return new SimpleSValBuilder(alloc, context, stateMgr);
90 }
91
92 // Checks if the negation the value and flipping sign preserve
93 // the semantics on the operation in the resultType
isNegationValuePreserving(const llvm::APSInt & Value,APSIntType ResultType)94 static bool isNegationValuePreserving(const llvm::APSInt &Value,
95 APSIntType ResultType) {
96 const unsigned ValueBits = Value.getSignificantBits();
97 if (ValueBits == ResultType.getBitWidth()) {
98 // The value is the lowest negative value that is representable
99 // in signed integer with bitWith of result type. The
100 // negation is representable if resultType is unsigned.
101 return ResultType.isUnsigned();
102 }
103
104 // If resultType bitWith is higher that number of bits required
105 // to represent RHS, the sign flip produce same value.
106 return ValueBits < ResultType.getBitWidth();
107 }
108
109 //===----------------------------------------------------------------------===//
110 // Transfer function for binary operators.
111 //===----------------------------------------------------------------------===//
112
MakeSymIntVal(const SymExpr * LHS,BinaryOperator::Opcode op,const llvm::APSInt & RHS,QualType resultTy)113 SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS,
114 BinaryOperator::Opcode op,
115 const llvm::APSInt &RHS,
116 QualType resultTy) {
117 bool isIdempotent = false;
118
119 // Check for a few special cases with known reductions first.
120 switch (op) {
121 default:
122 // We can't reduce this case; just treat it normally.
123 break;
124 case BO_Mul:
125 // a*0 and a*1
126 if (RHS == 0)
127 return makeIntVal(0, resultTy);
128 else if (RHS == 1)
129 isIdempotent = true;
130 break;
131 case BO_Div:
132 // a/0 and a/1
133 if (RHS == 0)
134 // This is also handled elsewhere.
135 return UndefinedVal();
136 else if (RHS == 1)
137 isIdempotent = true;
138 break;
139 case BO_Rem:
140 // a%0 and a%1
141 if (RHS == 0)
142 // This is also handled elsewhere.
143 return UndefinedVal();
144 else if (RHS == 1)
145 return makeIntVal(0, resultTy);
146 break;
147 case BO_Add:
148 case BO_Sub:
149 case BO_Shl:
150 case BO_Shr:
151 case BO_Xor:
152 // a+0, a-0, a<<0, a>>0, a^0
153 if (RHS == 0)
154 isIdempotent = true;
155 break;
156 case BO_And:
157 // a&0 and a&(~0)
158 if (RHS == 0)
159 return makeIntVal(0, resultTy);
160 else if (RHS.isAllOnes())
161 isIdempotent = true;
162 break;
163 case BO_Or:
164 // a|0 and a|(~0)
165 if (RHS == 0)
166 isIdempotent = true;
167 else if (RHS.isAllOnes()) {
168 const llvm::APSInt &Result = BasicVals.Convert(resultTy, RHS);
169 return nonloc::ConcreteInt(Result);
170 }
171 break;
172 }
173
174 // Idempotent ops (like a*1) can still change the type of an expression.
175 // Wrap the LHS up in a NonLoc again and let evalCast do the
176 // dirty work.
177 if (isIdempotent)
178 return evalCast(nonloc::SymbolVal(LHS), resultTy, QualType{});
179
180 // If we reach this point, the expression cannot be simplified.
181 // Make a SymbolVal for the entire expression, after converting the RHS.
182 const llvm::APSInt *ConvertedRHS = &RHS;
183 if (BinaryOperator::isComparisonOp(op)) {
184 // We're looking for a type big enough to compare the symbolic value
185 // with the given constant.
186 // FIXME: This is an approximation of Sema::UsualArithmeticConversions.
187 ASTContext &Ctx = getContext();
188 QualType SymbolType = LHS->getType();
189 uint64_t ValWidth = RHS.getBitWidth();
190 uint64_t TypeWidth = Ctx.getTypeSize(SymbolType);
191
192 if (ValWidth < TypeWidth) {
193 // If the value is too small, extend it.
194 ConvertedRHS = &BasicVals.Convert(SymbolType, RHS);
195 } else if (ValWidth == TypeWidth) {
196 // If the value is signed but the symbol is unsigned, do the comparison
197 // in unsigned space. [C99 6.3.1.8]
198 // (For the opposite case, the value is already unsigned.)
199 if (RHS.isSigned() && !SymbolType->isSignedIntegerOrEnumerationType())
200 ConvertedRHS = &BasicVals.Convert(SymbolType, RHS);
201 }
202 } else if (BinaryOperator::isAdditiveOp(op) && RHS.isNegative()) {
203 // Change a+(-N) into a-N, and a-(-N) into a+N
204 // Adjust addition/subtraction of negative value, to
205 // subtraction/addition of the negated value.
206 APSIntType resultIntTy = BasicVals.getAPSIntType(resultTy);
207 if (isNegationValuePreserving(RHS, resultIntTy)) {
208 ConvertedRHS = &BasicVals.getValue(-resultIntTy.convert(RHS));
209 op = (op == BO_Add) ? BO_Sub : BO_Add;
210 } else {
211 ConvertedRHS = &BasicVals.Convert(resultTy, RHS);
212 }
213 } else
214 ConvertedRHS = &BasicVals.Convert(resultTy, RHS);
215
216 return makeNonLoc(LHS, op, *ConvertedRHS, resultTy);
217 }
218
219 // See if Sym is known to be a relation Rel with Bound.
isInRelation(BinaryOperator::Opcode Rel,SymbolRef Sym,llvm::APSInt Bound,ProgramStateRef State)220 static bool isInRelation(BinaryOperator::Opcode Rel, SymbolRef Sym,
221 llvm::APSInt Bound, ProgramStateRef State) {
222 SValBuilder &SVB = State->getStateManager().getSValBuilder();
223 SVal Result =
224 SVB.evalBinOpNN(State, Rel, nonloc::SymbolVal(Sym),
225 nonloc::ConcreteInt(Bound), SVB.getConditionType());
226 if (auto DV = Result.getAs<DefinedSVal>()) {
227 return !State->assume(*DV, false);
228 }
229 return false;
230 }
231
232 // See if Sym is known to be within [min/4, max/4], where min and max
233 // are the bounds of the symbol's integral type. With such symbols,
234 // some manipulations can be performed without the risk of overflow.
235 // assume() doesn't cause infinite recursion because we should be dealing
236 // with simpler symbols on every recursive call.
isWithinConstantOverflowBounds(SymbolRef Sym,ProgramStateRef State)237 static bool isWithinConstantOverflowBounds(SymbolRef Sym,
238 ProgramStateRef State) {
239 SValBuilder &SVB = State->getStateManager().getSValBuilder();
240 BasicValueFactory &BV = SVB.getBasicValueFactory();
241
242 QualType T = Sym->getType();
243 assert(T->isSignedIntegerOrEnumerationType() &&
244 "This only works with signed integers!");
245 APSIntType AT = BV.getAPSIntType(T);
246
247 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
248 return isInRelation(BO_LE, Sym, Max, State) &&
249 isInRelation(BO_GE, Sym, Min, State);
250 }
251
252 // Same for the concrete integers: see if I is within [min/4, max/4].
isWithinConstantOverflowBounds(llvm::APSInt I)253 static bool isWithinConstantOverflowBounds(llvm::APSInt I) {
254 APSIntType AT(I);
255 assert(!AT.isUnsigned() &&
256 "This only works with signed integers!");
257
258 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max;
259 return (I <= Max) && (I >= -Max);
260 }
261
262 static std::pair<SymbolRef, llvm::APSInt>
decomposeSymbol(SymbolRef Sym,BasicValueFactory & BV)263 decomposeSymbol(SymbolRef Sym, BasicValueFactory &BV) {
264 if (const auto *SymInt = dyn_cast<SymIntExpr>(Sym))
265 if (BinaryOperator::isAdditiveOp(SymInt->getOpcode()))
266 return std::make_pair(SymInt->getLHS(),
267 (SymInt->getOpcode() == BO_Add) ?
268 (SymInt->getRHS()) :
269 (-SymInt->getRHS()));
270
271 // Fail to decompose: "reduce" the problem to the "$x + 0" case.
272 return std::make_pair(Sym, BV.getValue(0, Sym->getType()));
273 }
274
275 // Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the
276 // same signed integral type and no overflows occur (which should be checked
277 // by the caller).
doRearrangeUnchecked(ProgramStateRef State,BinaryOperator::Opcode Op,SymbolRef LSym,llvm::APSInt LInt,SymbolRef RSym,llvm::APSInt RInt)278 static NonLoc doRearrangeUnchecked(ProgramStateRef State,
279 BinaryOperator::Opcode Op,
280 SymbolRef LSym, llvm::APSInt LInt,
281 SymbolRef RSym, llvm::APSInt RInt) {
282 SValBuilder &SVB = State->getStateManager().getSValBuilder();
283 BasicValueFactory &BV = SVB.getBasicValueFactory();
284 SymbolManager &SymMgr = SVB.getSymbolManager();
285
286 QualType SymTy = LSym->getType();
287 assert(SymTy == RSym->getType() &&
288 "Symbols are not of the same type!");
289 assert(APSIntType(LInt) == BV.getAPSIntType(SymTy) &&
290 "Integers are not of the same type as symbols!");
291 assert(APSIntType(RInt) == BV.getAPSIntType(SymTy) &&
292 "Integers are not of the same type as symbols!");
293
294 QualType ResultTy;
295 if (BinaryOperator::isComparisonOp(Op))
296 ResultTy = SVB.getConditionType();
297 else if (BinaryOperator::isAdditiveOp(Op))
298 ResultTy = SymTy;
299 else
300 llvm_unreachable("Operation not suitable for unchecked rearrangement!");
301
302 if (LSym == RSym)
303 return SVB.evalBinOpNN(State, Op, nonloc::ConcreteInt(LInt),
304 nonloc::ConcreteInt(RInt), ResultTy)
305 .castAs<NonLoc>();
306
307 SymbolRef ResultSym = nullptr;
308 BinaryOperator::Opcode ResultOp;
309 llvm::APSInt ResultInt;
310 if (BinaryOperator::isComparisonOp(Op)) {
311 // Prefer comparing to a non-negative number.
312 // FIXME: Maybe it'd be better to have consistency in
313 // "$x - $y" vs. "$y - $x" because those are solver's keys.
314 if (LInt > RInt) {
315 ResultSym = SymMgr.getSymSymExpr(RSym, BO_Sub, LSym, SymTy);
316 ResultOp = BinaryOperator::reverseComparisonOp(Op);
317 ResultInt = LInt - RInt; // Opposite order!
318 } else {
319 ResultSym = SymMgr.getSymSymExpr(LSym, BO_Sub, RSym, SymTy);
320 ResultOp = Op;
321 ResultInt = RInt - LInt; // Opposite order!
322 }
323 } else {
324 ResultSym = SymMgr.getSymSymExpr(LSym, Op, RSym, SymTy);
325 ResultInt = (Op == BO_Add) ? (LInt + RInt) : (LInt - RInt);
326 ResultOp = BO_Add;
327 // Bring back the cosmetic difference.
328 if (ResultInt < 0) {
329 ResultInt = -ResultInt;
330 ResultOp = BO_Sub;
331 } else if (ResultInt == 0) {
332 // Shortcut: Simplify "$x + 0" to "$x".
333 return nonloc::SymbolVal(ResultSym);
334 }
335 }
336 const llvm::APSInt &PersistentResultInt = BV.getValue(ResultInt);
337 return nonloc::SymbolVal(
338 SymMgr.getSymIntExpr(ResultSym, ResultOp, PersistentResultInt, ResultTy));
339 }
340
341 // Rearrange if symbol type matches the result type and if the operator is a
342 // comparison operator, both symbol and constant must be within constant
343 // overflow bounds.
shouldRearrange(ProgramStateRef State,BinaryOperator::Opcode Op,SymbolRef Sym,llvm::APSInt Int,QualType Ty)344 static bool shouldRearrange(ProgramStateRef State, BinaryOperator::Opcode Op,
345 SymbolRef Sym, llvm::APSInt Int, QualType Ty) {
346 return Sym->getType() == Ty &&
347 (!BinaryOperator::isComparisonOp(Op) ||
348 (isWithinConstantOverflowBounds(Sym, State) &&
349 isWithinConstantOverflowBounds(Int)));
350 }
351
tryRearrange(ProgramStateRef State,BinaryOperator::Opcode Op,NonLoc Lhs,NonLoc Rhs,QualType ResultTy)352 static std::optional<NonLoc> tryRearrange(ProgramStateRef State,
353 BinaryOperator::Opcode Op, NonLoc Lhs,
354 NonLoc Rhs, QualType ResultTy) {
355 ProgramStateManager &StateMgr = State->getStateManager();
356 SValBuilder &SVB = StateMgr.getSValBuilder();
357
358 // We expect everything to be of the same type - this type.
359 QualType SingleTy;
360
361 // FIXME: After putting complexity threshold to the symbols we can always
362 // rearrange additive operations but rearrange comparisons only if
363 // option is set.
364 if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation)
365 return std::nullopt;
366
367 SymbolRef LSym = Lhs.getAsSymbol();
368 if (!LSym)
369 return std::nullopt;
370
371 if (BinaryOperator::isComparisonOp(Op)) {
372 SingleTy = LSym->getType();
373 if (ResultTy != SVB.getConditionType())
374 return std::nullopt;
375 // Initialize SingleTy later with a symbol's type.
376 } else if (BinaryOperator::isAdditiveOp(Op)) {
377 SingleTy = ResultTy;
378 if (LSym->getType() != SingleTy)
379 return std::nullopt;
380 } else {
381 // Don't rearrange other operations.
382 return std::nullopt;
383 }
384
385 assert(!SingleTy.isNull() && "We should have figured out the type by now!");
386
387 // Rearrange signed symbolic expressions only
388 if (!SingleTy->isSignedIntegerOrEnumerationType())
389 return std::nullopt;
390
391 SymbolRef RSym = Rhs.getAsSymbol();
392 if (!RSym || RSym->getType() != SingleTy)
393 return std::nullopt;
394
395 BasicValueFactory &BV = State->getBasicVals();
396 llvm::APSInt LInt, RInt;
397 std::tie(LSym, LInt) = decomposeSymbol(LSym, BV);
398 std::tie(RSym, RInt) = decomposeSymbol(RSym, BV);
399 if (!shouldRearrange(State, Op, LSym, LInt, SingleTy) ||
400 !shouldRearrange(State, Op, RSym, RInt, SingleTy))
401 return std::nullopt;
402
403 // We know that no overflows can occur anymore.
404 return doRearrangeUnchecked(State, Op, LSym, LInt, RSym, RInt);
405 }
406
evalBinOpNN(ProgramStateRef state,BinaryOperator::Opcode op,NonLoc lhs,NonLoc rhs,QualType resultTy)407 SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state,
408 BinaryOperator::Opcode op,
409 NonLoc lhs, NonLoc rhs,
410 QualType resultTy) {
411 NonLoc InputLHS = lhs;
412 NonLoc InputRHS = rhs;
413
414 // Constraints may have changed since the creation of a bound SVal. Check if
415 // the values can be simplified based on those new constraints.
416 SVal simplifiedLhs = simplifySVal(state, lhs);
417 SVal simplifiedRhs = simplifySVal(state, rhs);
418 if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>())
419 lhs = *simplifiedLhsAsNonLoc;
420 if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>())
421 rhs = *simplifiedRhsAsNonLoc;
422
423 // Handle trivial case where left-side and right-side are the same.
424 if (lhs == rhs)
425 switch (op) {
426 default:
427 break;
428 case BO_EQ:
429 case BO_LE:
430 case BO_GE:
431 return makeTruthVal(true, resultTy);
432 case BO_LT:
433 case BO_GT:
434 case BO_NE:
435 return makeTruthVal(false, resultTy);
436 case BO_Xor:
437 case BO_Sub:
438 if (resultTy->isIntegralOrEnumerationType())
439 return makeIntVal(0, resultTy);
440 return evalCast(makeIntVal(0, /*isUnsigned=*/false), resultTy,
441 QualType{});
442 case BO_Or:
443 case BO_And:
444 return evalCast(lhs, resultTy, QualType{});
445 }
446
447 while (true) {
448 switch (lhs.getSubKind()) {
449 default:
450 return makeSymExprValNN(op, lhs, rhs, resultTy);
451 case nonloc::PointerToMemberKind: {
452 assert(rhs.getSubKind() == nonloc::PointerToMemberKind &&
453 "Both SVals should have pointer-to-member-type");
454 auto LPTM = lhs.castAs<nonloc::PointerToMember>(),
455 RPTM = rhs.castAs<nonloc::PointerToMember>();
456 auto LPTMD = LPTM.getPTMData(), RPTMD = RPTM.getPTMData();
457 switch (op) {
458 case BO_EQ:
459 return makeTruthVal(LPTMD == RPTMD, resultTy);
460 case BO_NE:
461 return makeTruthVal(LPTMD != RPTMD, resultTy);
462 default:
463 return UnknownVal();
464 }
465 }
466 case nonloc::LocAsIntegerKind: {
467 Loc lhsL = lhs.castAs<nonloc::LocAsInteger>().getLoc();
468 switch (rhs.getSubKind()) {
469 case nonloc::LocAsIntegerKind:
470 // FIXME: at the moment the implementation
471 // of modeling "pointers as integers" is not complete.
472 if (!BinaryOperator::isComparisonOp(op))
473 return UnknownVal();
474 return evalBinOpLL(state, op, lhsL,
475 rhs.castAs<nonloc::LocAsInteger>().getLoc(),
476 resultTy);
477 case nonloc::ConcreteIntKind: {
478 // FIXME: at the moment the implementation
479 // of modeling "pointers as integers" is not complete.
480 if (!BinaryOperator::isComparisonOp(op))
481 return UnknownVal();
482 // Transform the integer into a location and compare.
483 // FIXME: This only makes sense for comparisons. If we want to, say,
484 // add 1 to a LocAsInteger, we'd better unpack the Loc and add to it,
485 // then pack it back into a LocAsInteger.
486 llvm::APSInt i = rhs.castAs<nonloc::ConcreteInt>().getValue();
487 // If the region has a symbolic base, pay attention to the type; it
488 // might be coming from a non-default address space. For non-symbolic
489 // regions it doesn't matter that much because such comparisons would
490 // most likely evaluate to concrete false anyway. FIXME: We might
491 // still need to handle the non-comparison case.
492 if (SymbolRef lSym = lhs.getAsLocSymbol(true))
493 BasicVals.getAPSIntType(lSym->getType()).apply(i);
494 else
495 BasicVals.getAPSIntType(Context.VoidPtrTy).apply(i);
496 return evalBinOpLL(state, op, lhsL, makeLoc(i), resultTy);
497 }
498 default:
499 switch (op) {
500 case BO_EQ:
501 return makeTruthVal(false, resultTy);
502 case BO_NE:
503 return makeTruthVal(true, resultTy);
504 default:
505 // This case also handles pointer arithmetic.
506 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
507 }
508 }
509 }
510 case nonloc::ConcreteIntKind: {
511 llvm::APSInt LHSValue = lhs.castAs<nonloc::ConcreteInt>().getValue();
512
513 // If we're dealing with two known constants, just perform the operation.
514 if (const llvm::APSInt *KnownRHSValue = getConstValue(state, rhs)) {
515 llvm::APSInt RHSValue = *KnownRHSValue;
516 if (BinaryOperator::isComparisonOp(op)) {
517 // We're looking for a type big enough to compare the two values.
518 // FIXME: This is not correct. char + short will result in a promotion
519 // to int. Unfortunately we have lost types by this point.
520 APSIntType CompareType = std::max(APSIntType(LHSValue),
521 APSIntType(RHSValue));
522 CompareType.apply(LHSValue);
523 CompareType.apply(RHSValue);
524 } else if (!BinaryOperator::isShiftOp(op)) {
525 APSIntType IntType = BasicVals.getAPSIntType(resultTy);
526 IntType.apply(LHSValue);
527 IntType.apply(RHSValue);
528 }
529
530 const llvm::APSInt *Result =
531 BasicVals.evalAPSInt(op, LHSValue, RHSValue);
532 if (!Result)
533 return UndefinedVal();
534
535 return nonloc::ConcreteInt(*Result);
536 }
537
538 // Swap the left and right sides and flip the operator if doing so
539 // allows us to better reason about the expression (this is a form
540 // of expression canonicalization).
541 // While we're at it, catch some special cases for non-commutative ops.
542 switch (op) {
543 case BO_LT:
544 case BO_GT:
545 case BO_LE:
546 case BO_GE:
547 op = BinaryOperator::reverseComparisonOp(op);
548 [[fallthrough]];
549 case BO_EQ:
550 case BO_NE:
551 case BO_Add:
552 case BO_Mul:
553 case BO_And:
554 case BO_Xor:
555 case BO_Or:
556 std::swap(lhs, rhs);
557 continue;
558 case BO_Shr:
559 // (~0)>>a
560 if (LHSValue.isAllOnes() && LHSValue.isSigned())
561 return evalCast(lhs, resultTy, QualType{});
562 [[fallthrough]];
563 case BO_Shl:
564 // 0<<a and 0>>a
565 if (LHSValue == 0)
566 return evalCast(lhs, resultTy, QualType{});
567 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
568 case BO_Div:
569 // 0 / x == 0
570 case BO_Rem:
571 // 0 % x == 0
572 if (LHSValue == 0)
573 return makeZeroVal(resultTy);
574 [[fallthrough]];
575 default:
576 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
577 }
578 }
579 case nonloc::SymbolValKind: {
580 // We only handle LHS as simple symbols or SymIntExprs.
581 SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol();
582
583 // LHS is a symbolic expression.
584 if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) {
585
586 // Is this a logical not? (!x is represented as x == 0.)
587 if (op == BO_EQ && rhs.isZeroConstant()) {
588 // We know how to negate certain expressions. Simplify them here.
589
590 BinaryOperator::Opcode opc = symIntExpr->getOpcode();
591 switch (opc) {
592 default:
593 // We don't know how to negate this operation.
594 // Just handle it as if it were a normal comparison to 0.
595 break;
596 case BO_LAnd:
597 case BO_LOr:
598 llvm_unreachable("Logical operators handled by branching logic.");
599 case BO_Assign:
600 case BO_MulAssign:
601 case BO_DivAssign:
602 case BO_RemAssign:
603 case BO_AddAssign:
604 case BO_SubAssign:
605 case BO_ShlAssign:
606 case BO_ShrAssign:
607 case BO_AndAssign:
608 case BO_XorAssign:
609 case BO_OrAssign:
610 case BO_Comma:
611 llvm_unreachable("'=' and ',' operators handled by ExprEngine.");
612 case BO_PtrMemD:
613 case BO_PtrMemI:
614 llvm_unreachable("Pointer arithmetic not handled here.");
615 case BO_LT:
616 case BO_GT:
617 case BO_LE:
618 case BO_GE:
619 case BO_EQ:
620 case BO_NE:
621 assert(resultTy->isBooleanType() ||
622 resultTy == getConditionType());
623 assert(symIntExpr->getType()->isBooleanType() ||
624 getContext().hasSameUnqualifiedType(symIntExpr->getType(),
625 getConditionType()));
626 // Negate the comparison and make a value.
627 opc = BinaryOperator::negateComparisonOp(opc);
628 return makeNonLoc(symIntExpr->getLHS(), opc,
629 symIntExpr->getRHS(), resultTy);
630 }
631 }
632
633 // For now, only handle expressions whose RHS is a constant.
634 if (const llvm::APSInt *RHSValue = getConstValue(state, rhs)) {
635 // If both the LHS and the current expression are additive,
636 // fold their constants and try again.
637 if (BinaryOperator::isAdditiveOp(op)) {
638 BinaryOperator::Opcode lop = symIntExpr->getOpcode();
639 if (BinaryOperator::isAdditiveOp(lop)) {
640 // Convert the two constants to a common type, then combine them.
641
642 // resultTy may not be the best type to convert to, but it's
643 // probably the best choice in expressions with mixed type
644 // (such as x+1U+2LL). The rules for implicit conversions should
645 // choose a reasonable type to preserve the expression, and will
646 // at least match how the value is going to be used.
647 APSIntType IntType = BasicVals.getAPSIntType(resultTy);
648 const llvm::APSInt &first = IntType.convert(symIntExpr->getRHS());
649 const llvm::APSInt &second = IntType.convert(*RHSValue);
650
651 // If the op and lop agrees, then we just need to
652 // sum the constants. Otherwise, we change to operation
653 // type if substraction would produce negative value
654 // (and cause overflow for unsigned integers),
655 // as consequence x+1U-10 produces x-9U, instead
656 // of x+4294967287U, that would be produced without this
657 // additional check.
658 const llvm::APSInt *newRHS;
659 if (lop == op) {
660 newRHS = BasicVals.evalAPSInt(BO_Add, first, second);
661 } else if (first >= second) {
662 newRHS = BasicVals.evalAPSInt(BO_Sub, first, second);
663 op = lop;
664 } else {
665 newRHS = BasicVals.evalAPSInt(BO_Sub, second, first);
666 }
667
668 assert(newRHS && "Invalid operation despite common type!");
669 rhs = nonloc::ConcreteInt(*newRHS);
670 lhs = nonloc::SymbolVal(symIntExpr->getLHS());
671 continue;
672 }
673 }
674
675 // Otherwise, make a SymIntExpr out of the expression.
676 return MakeSymIntVal(symIntExpr, op, *RHSValue, resultTy);
677 }
678 }
679
680 // Is the RHS a constant?
681 if (const llvm::APSInt *RHSValue = getConstValue(state, rhs))
682 return MakeSymIntVal(Sym, op, *RHSValue, resultTy);
683
684 if (std::optional<NonLoc> V = tryRearrange(state, op, lhs, rhs, resultTy))
685 return *V;
686
687 // Give up -- this is not a symbolic expression we can handle.
688 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy);
689 }
690 }
691 }
692 }
693
evalBinOpFieldRegionFieldRegion(const FieldRegion * LeftFR,const FieldRegion * RightFR,BinaryOperator::Opcode op,QualType resultTy,SimpleSValBuilder & SVB)694 static SVal evalBinOpFieldRegionFieldRegion(const FieldRegion *LeftFR,
695 const FieldRegion *RightFR,
696 BinaryOperator::Opcode op,
697 QualType resultTy,
698 SimpleSValBuilder &SVB) {
699 // Only comparisons are meaningful here!
700 if (!BinaryOperator::isComparisonOp(op))
701 return UnknownVal();
702
703 // Next, see if the two FRs have the same super-region.
704 // FIXME: This doesn't handle casts yet, and simply stripping the casts
705 // doesn't help.
706 if (LeftFR->getSuperRegion() != RightFR->getSuperRegion())
707 return UnknownVal();
708
709 const FieldDecl *LeftFD = LeftFR->getDecl();
710 const FieldDecl *RightFD = RightFR->getDecl();
711 const RecordDecl *RD = LeftFD->getParent();
712
713 // Make sure the two FRs are from the same kind of record. Just in case!
714 // FIXME: This is probably where inheritance would be a problem.
715 if (RD != RightFD->getParent())
716 return UnknownVal();
717
718 // We know for sure that the two fields are not the same, since that
719 // would have given us the same SVal.
720 if (op == BO_EQ)
721 return SVB.makeTruthVal(false, resultTy);
722 if (op == BO_NE)
723 return SVB.makeTruthVal(true, resultTy);
724
725 // Iterate through the fields and see which one comes first.
726 // [C99 6.7.2.1.13] "Within a structure object, the non-bit-field
727 // members and the units in which bit-fields reside have addresses that
728 // increase in the order in which they are declared."
729 bool leftFirst = (op == BO_LT || op == BO_LE);
730 for (const auto *I : RD->fields()) {
731 if (I == LeftFD)
732 return SVB.makeTruthVal(leftFirst, resultTy);
733 if (I == RightFD)
734 return SVB.makeTruthVal(!leftFirst, resultTy);
735 }
736
737 llvm_unreachable("Fields not found in parent record's definition");
738 }
739
740 // This is used in debug builds only for now because some downstream users
741 // may hit this assert in their subsequent merges.
742 // There are still places in the analyzer where equal bitwidth Locs
743 // are compared, and need to be found and corrected. Recent previous fixes have
744 // addressed the known problems of making NULLs with specific bitwidths
745 // for Loc comparisons along with deprecation of APIs for the same purpose.
746 //
assertEqualBitWidths(ProgramStateRef State,Loc RhsLoc,Loc LhsLoc)747 static void assertEqualBitWidths(ProgramStateRef State, Loc RhsLoc,
748 Loc LhsLoc) {
749 // Implements a "best effort" check for RhsLoc and LhsLoc bit widths
750 ASTContext &Ctx = State->getStateManager().getContext();
751 uint64_t RhsBitwidth =
752 RhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(RhsLoc.getType(Ctx));
753 uint64_t LhsBitwidth =
754 LhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(LhsLoc.getType(Ctx));
755 if (RhsBitwidth && LhsBitwidth &&
756 (LhsLoc.getSubKind() == RhsLoc.getSubKind())) {
757 assert(RhsBitwidth == LhsBitwidth &&
758 "RhsLoc and LhsLoc bitwidth must be same!");
759 }
760 }
761
762 // FIXME: all this logic will change if/when we have MemRegion::getLocation().
evalBinOpLL(ProgramStateRef state,BinaryOperator::Opcode op,Loc lhs,Loc rhs,QualType resultTy)763 SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state,
764 BinaryOperator::Opcode op,
765 Loc lhs, Loc rhs,
766 QualType resultTy) {
767
768 // Assert that bitwidth of lhs and rhs are the same.
769 // This can happen if two different address spaces are used,
770 // and the bitwidths of the address spaces are different.
771 // See LIT case clang/test/Analysis/cstring-checker-addressspace.c
772 // FIXME: See comment above in the function assertEqualBitWidths
773 assertEqualBitWidths(state, rhs, lhs);
774
775 // Only comparisons and subtractions are valid operations on two pointers.
776 // See [C99 6.5.5 through 6.5.14] or [C++0x 5.6 through 5.15].
777 // However, if a pointer is casted to an integer, evalBinOpNN may end up
778 // calling this function with another operation (PR7527). We don't attempt to
779 // model this for now, but it could be useful, particularly when the
780 // "location" is actually an integer value that's been passed through a void*.
781 if (!(BinaryOperator::isComparisonOp(op) || op == BO_Sub))
782 return UnknownVal();
783
784 // Special cases for when both sides are identical.
785 if (lhs == rhs) {
786 switch (op) {
787 default:
788 llvm_unreachable("Unimplemented operation for two identical values");
789 case BO_Sub:
790 return makeZeroVal(resultTy);
791 case BO_EQ:
792 case BO_LE:
793 case BO_GE:
794 return makeTruthVal(true, resultTy);
795 case BO_NE:
796 case BO_LT:
797 case BO_GT:
798 return makeTruthVal(false, resultTy);
799 }
800 }
801
802 switch (lhs.getSubKind()) {
803 default:
804 llvm_unreachable("Ordering not implemented for this Loc.");
805
806 case loc::GotoLabelKind:
807 // The only thing we know about labels is that they're non-null.
808 if (rhs.isZeroConstant()) {
809 switch (op) {
810 default:
811 break;
812 case BO_Sub:
813 return evalCast(lhs, resultTy, QualType{});
814 case BO_EQ:
815 case BO_LE:
816 case BO_LT:
817 return makeTruthVal(false, resultTy);
818 case BO_NE:
819 case BO_GT:
820 case BO_GE:
821 return makeTruthVal(true, resultTy);
822 }
823 }
824 // There may be two labels for the same location, and a function region may
825 // have the same address as a label at the start of the function (depending
826 // on the ABI).
827 // FIXME: we can probably do a comparison against other MemRegions, though.
828 // FIXME: is there a way to tell if two labels refer to the same location?
829 return UnknownVal();
830
831 case loc::ConcreteIntKind: {
832 auto L = lhs.castAs<loc::ConcreteInt>();
833
834 // If one of the operands is a symbol and the other is a constant,
835 // build an expression for use by the constraint manager.
836 if (SymbolRef rSym = rhs.getAsLocSymbol()) {
837 // We can only build expressions with symbols on the left,
838 // so we need a reversible operator.
839 if (!BinaryOperator::isComparisonOp(op) || op == BO_Cmp)
840 return UnknownVal();
841
842 op = BinaryOperator::reverseComparisonOp(op);
843 return makeNonLoc(rSym, op, L.getValue(), resultTy);
844 }
845
846 // If both operands are constants, just perform the operation.
847 if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
848 assert(BinaryOperator::isComparisonOp(op) || op == BO_Sub);
849
850 if (const auto *ResultInt =
851 BasicVals.evalAPSInt(op, L.getValue(), rInt->getValue()))
852 return evalCast(nonloc::ConcreteInt(*ResultInt), resultTy, QualType{});
853 return UnknownVal();
854 }
855
856 // Special case comparisons against NULL.
857 // This must come after the test if the RHS is a symbol, which is used to
858 // build constraints. The address of any non-symbolic region is guaranteed
859 // to be non-NULL, as is any label.
860 assert((isa<loc::MemRegionVal, loc::GotoLabel>(rhs)));
861 if (lhs.isZeroConstant()) {
862 switch (op) {
863 default:
864 break;
865 case BO_EQ:
866 case BO_GT:
867 case BO_GE:
868 return makeTruthVal(false, resultTy);
869 case BO_NE:
870 case BO_LT:
871 case BO_LE:
872 return makeTruthVal(true, resultTy);
873 }
874 }
875
876 // Comparing an arbitrary integer to a region or label address is
877 // completely unknowable.
878 return UnknownVal();
879 }
880 case loc::MemRegionValKind: {
881 if (std::optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) {
882 // If one of the operands is a symbol and the other is a constant,
883 // build an expression for use by the constraint manager.
884 if (SymbolRef lSym = lhs.getAsLocSymbol(true)) {
885 if (BinaryOperator::isComparisonOp(op))
886 return MakeSymIntVal(lSym, op, rInt->getValue(), resultTy);
887 return UnknownVal();
888 }
889 // Special case comparisons to NULL.
890 // This must come after the test if the LHS is a symbol, which is used to
891 // build constraints. The address of any non-symbolic region is guaranteed
892 // to be non-NULL.
893 if (rInt->isZeroConstant()) {
894 if (op == BO_Sub)
895 return evalCast(lhs, resultTy, QualType{});
896
897 if (BinaryOperator::isComparisonOp(op)) {
898 QualType boolType = getContext().BoolTy;
899 NonLoc l = evalCast(lhs, boolType, QualType{}).castAs<NonLoc>();
900 NonLoc r = makeTruthVal(false, boolType).castAs<NonLoc>();
901 return evalBinOpNN(state, op, l, r, resultTy);
902 }
903 }
904
905 // Comparing a region to an arbitrary integer is completely unknowable.
906 return UnknownVal();
907 }
908
909 // Get both values as regions, if possible.
910 const MemRegion *LeftMR = lhs.getAsRegion();
911 assert(LeftMR && "MemRegionValKind SVal doesn't have a region!");
912
913 const MemRegion *RightMR = rhs.getAsRegion();
914 if (!RightMR)
915 // The RHS is probably a label, which in theory could address a region.
916 // FIXME: we can probably make a more useful statement about non-code
917 // regions, though.
918 return UnknownVal();
919
920 const MemRegion *LeftBase = LeftMR->getBaseRegion();
921 const MemRegion *RightBase = RightMR->getBaseRegion();
922 const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace();
923 const MemSpaceRegion *RightMS = RightBase->getMemorySpace();
924 const MemSpaceRegion *UnknownMS = MemMgr.getUnknownRegion();
925
926 // If the two regions are from different known memory spaces they cannot be
927 // equal. Also, assume that no symbolic region (whose memory space is
928 // unknown) is on the stack.
929 if (LeftMS != RightMS &&
930 ((LeftMS != UnknownMS && RightMS != UnknownMS) ||
931 (isa<StackSpaceRegion>(LeftMS) || isa<StackSpaceRegion>(RightMS)))) {
932 switch (op) {
933 default:
934 return UnknownVal();
935 case BO_EQ:
936 return makeTruthVal(false, resultTy);
937 case BO_NE:
938 return makeTruthVal(true, resultTy);
939 }
940 }
941
942 // If both values wrap regions, see if they're from different base regions.
943 // Note, heap base symbolic regions are assumed to not alias with
944 // each other; for example, we assume that malloc returns different address
945 // on each invocation.
946 // FIXME: ObjC object pointers always reside on the heap, but currently
947 // we treat their memory space as unknown, because symbolic pointers
948 // to ObjC objects may alias. There should be a way to construct
949 // possibly-aliasing heap-based regions. For instance, MacOSXApiChecker
950 // guesses memory space for ObjC object pointers manually instead of
951 // relying on us.
952 if (LeftBase != RightBase &&
953 ((!isa<SymbolicRegion>(LeftBase) && !isa<SymbolicRegion>(RightBase)) ||
954 (isa<HeapSpaceRegion>(LeftMS) || isa<HeapSpaceRegion>(RightMS))) ){
955 switch (op) {
956 default:
957 return UnknownVal();
958 case BO_EQ:
959 return makeTruthVal(false, resultTy);
960 case BO_NE:
961 return makeTruthVal(true, resultTy);
962 }
963 }
964
965 // Handle special cases for when both regions are element regions.
966 const ElementRegion *RightER = dyn_cast<ElementRegion>(RightMR);
967 const ElementRegion *LeftER = dyn_cast<ElementRegion>(LeftMR);
968 if (RightER && LeftER) {
969 // Next, see if the two ERs have the same super-region and matching types.
970 // FIXME: This should do something useful even if the types don't match,
971 // though if both indexes are constant the RegionRawOffset path will
972 // give the correct answer.
973 if (LeftER->getSuperRegion() == RightER->getSuperRegion() &&
974 LeftER->getElementType() == RightER->getElementType()) {
975 // Get the left index and cast it to the correct type.
976 // If the index is unknown or undefined, bail out here.
977 SVal LeftIndexVal = LeftER->getIndex();
978 std::optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>();
979 if (!LeftIndex)
980 return UnknownVal();
981 LeftIndexVal = evalCast(*LeftIndex, ArrayIndexTy, QualType{});
982 LeftIndex = LeftIndexVal.getAs<NonLoc>();
983 if (!LeftIndex)
984 return UnknownVal();
985
986 // Do the same for the right index.
987 SVal RightIndexVal = RightER->getIndex();
988 std::optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>();
989 if (!RightIndex)
990 return UnknownVal();
991 RightIndexVal = evalCast(*RightIndex, ArrayIndexTy, QualType{});
992 RightIndex = RightIndexVal.getAs<NonLoc>();
993 if (!RightIndex)
994 return UnknownVal();
995
996 // Actually perform the operation.
997 // evalBinOpNN expects the two indexes to already be the right type.
998 return evalBinOpNN(state, op, *LeftIndex, *RightIndex, resultTy);
999 }
1000 }
1001
1002 // Special handling of the FieldRegions, even with symbolic offsets.
1003 const FieldRegion *RightFR = dyn_cast<FieldRegion>(RightMR);
1004 const FieldRegion *LeftFR = dyn_cast<FieldRegion>(LeftMR);
1005 if (RightFR && LeftFR) {
1006 SVal R = evalBinOpFieldRegionFieldRegion(LeftFR, RightFR, op, resultTy,
1007 *this);
1008 if (!R.isUnknown())
1009 return R;
1010 }
1011
1012 // Compare the regions using the raw offsets.
1013 RegionOffset LeftOffset = LeftMR->getAsOffset();
1014 RegionOffset RightOffset = RightMR->getAsOffset();
1015
1016 if (LeftOffset.getRegion() != nullptr &&
1017 LeftOffset.getRegion() == RightOffset.getRegion() &&
1018 !LeftOffset.hasSymbolicOffset() && !RightOffset.hasSymbolicOffset()) {
1019 int64_t left = LeftOffset.getOffset();
1020 int64_t right = RightOffset.getOffset();
1021
1022 switch (op) {
1023 default:
1024 return UnknownVal();
1025 case BO_LT:
1026 return makeTruthVal(left < right, resultTy);
1027 case BO_GT:
1028 return makeTruthVal(left > right, resultTy);
1029 case BO_LE:
1030 return makeTruthVal(left <= right, resultTy);
1031 case BO_GE:
1032 return makeTruthVal(left >= right, resultTy);
1033 case BO_EQ:
1034 return makeTruthVal(left == right, resultTy);
1035 case BO_NE:
1036 return makeTruthVal(left != right, resultTy);
1037 }
1038 }
1039
1040 // At this point we're not going to get a good answer, but we can try
1041 // conjuring an expression instead.
1042 SymbolRef LHSSym = lhs.getAsLocSymbol();
1043 SymbolRef RHSSym = rhs.getAsLocSymbol();
1044 if (LHSSym && RHSSym)
1045 return makeNonLoc(LHSSym, op, RHSSym, resultTy);
1046
1047 // If we get here, we have no way of comparing the regions.
1048 return UnknownVal();
1049 }
1050 }
1051 }
1052
evalBinOpLN(ProgramStateRef state,BinaryOperator::Opcode op,Loc lhs,NonLoc rhs,QualType resultTy)1053 SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state,
1054 BinaryOperator::Opcode op, Loc lhs,
1055 NonLoc rhs, QualType resultTy) {
1056 if (op >= BO_PtrMemD && op <= BO_PtrMemI) {
1057 if (auto PTMSV = rhs.getAs<nonloc::PointerToMember>()) {
1058 if (PTMSV->isNullMemberPointer())
1059 return UndefinedVal();
1060
1061 auto getFieldLValue = [&](const auto *FD) -> SVal {
1062 SVal Result = lhs;
1063
1064 for (const auto &I : *PTMSV)
1065 Result = StateMgr.getStoreManager().evalDerivedToBase(
1066 Result, I->getType(), I->isVirtual());
1067
1068 return state->getLValue(FD, Result);
1069 };
1070
1071 if (const auto *FD = PTMSV->getDeclAs<FieldDecl>()) {
1072 return getFieldLValue(FD);
1073 }
1074 if (const auto *FD = PTMSV->getDeclAs<IndirectFieldDecl>()) {
1075 return getFieldLValue(FD);
1076 }
1077 }
1078
1079 return rhs;
1080 }
1081
1082 assert(!BinaryOperator::isComparisonOp(op) &&
1083 "arguments to comparison ops must be of the same type");
1084
1085 // Special case: rhs is a zero constant.
1086 if (rhs.isZeroConstant())
1087 return lhs;
1088
1089 // Perserve the null pointer so that it can be found by the DerefChecker.
1090 if (lhs.isZeroConstant())
1091 return lhs;
1092
1093 // We are dealing with pointer arithmetic.
1094
1095 // Handle pointer arithmetic on constant values.
1096 if (std::optional<nonloc::ConcreteInt> rhsInt =
1097 rhs.getAs<nonloc::ConcreteInt>()) {
1098 if (std::optional<loc::ConcreteInt> lhsInt =
1099 lhs.getAs<loc::ConcreteInt>()) {
1100 const llvm::APSInt &leftI = lhsInt->getValue();
1101 assert(leftI.isUnsigned());
1102 llvm::APSInt rightI(rhsInt->getValue(), /* isUnsigned */ true);
1103
1104 // Convert the bitwidth of rightI. This should deal with overflow
1105 // since we are dealing with concrete values.
1106 rightI = rightI.extOrTrunc(leftI.getBitWidth());
1107
1108 // Offset the increment by the pointer size.
1109 llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true);
1110 QualType pointeeType = resultTy->getPointeeType();
1111 Multiplicand = getContext().getTypeSizeInChars(pointeeType).getQuantity();
1112 rightI *= Multiplicand;
1113
1114 // Compute the adjusted pointer.
1115 switch (op) {
1116 case BO_Add:
1117 rightI = leftI + rightI;
1118 break;
1119 case BO_Sub:
1120 rightI = leftI - rightI;
1121 break;
1122 default:
1123 llvm_unreachable("Invalid pointer arithmetic operation");
1124 }
1125 return loc::ConcreteInt(getBasicValueFactory().getValue(rightI));
1126 }
1127 }
1128
1129 // Handle cases where 'lhs' is a region.
1130 if (const MemRegion *region = lhs.getAsRegion()) {
1131 rhs = convertToArrayIndex(rhs).castAs<NonLoc>();
1132 SVal index = UnknownVal();
1133 const SubRegion *superR = nullptr;
1134 // We need to know the type of the pointer in order to add an integer to it.
1135 // Depending on the type, different amount of bytes is added.
1136 QualType elementType;
1137
1138 if (const ElementRegion *elemReg = dyn_cast<ElementRegion>(region)) {
1139 assert(op == BO_Add || op == BO_Sub);
1140 index = evalBinOpNN(state, op, elemReg->getIndex(), rhs,
1141 getArrayIndexType());
1142 superR = cast<SubRegion>(elemReg->getSuperRegion());
1143 elementType = elemReg->getElementType();
1144 }
1145 else if (isa<SubRegion>(region)) {
1146 assert(op == BO_Add || op == BO_Sub);
1147 index = (op == BO_Add) ? rhs : evalMinus(rhs);
1148 superR = cast<SubRegion>(region);
1149 // TODO: Is this actually reliable? Maybe improving our MemRegion
1150 // hierarchy to provide typed regions for all non-void pointers would be
1151 // better. For instance, we cannot extend this towards LocAsInteger
1152 // operations, where result type of the expression is integer.
1153 if (resultTy->isAnyPointerType())
1154 elementType = resultTy->getPointeeType();
1155 }
1156
1157 // Represent arithmetic on void pointers as arithmetic on char pointers.
1158 // It is fine when a TypedValueRegion of char value type represents
1159 // a void pointer. Note that arithmetic on void pointers is a GCC extension.
1160 if (elementType->isVoidType())
1161 elementType = getContext().CharTy;
1162
1163 if (std::optional<NonLoc> indexV = index.getAs<NonLoc>()) {
1164 return loc::MemRegionVal(MemMgr.getElementRegion(elementType, *indexV,
1165 superR, getContext()));
1166 }
1167 }
1168 return UnknownVal();
1169 }
1170
getConstValue(ProgramStateRef state,SVal V)1171 const llvm::APSInt *SimpleSValBuilder::getConstValue(ProgramStateRef state,
1172 SVal V) {
1173 if (V.isUnknownOrUndef())
1174 return nullptr;
1175
1176 if (std::optional<loc::ConcreteInt> X = V.getAs<loc::ConcreteInt>())
1177 return &X->getValue();
1178
1179 if (std::optional<nonloc::ConcreteInt> X = V.getAs<nonloc::ConcreteInt>())
1180 return &X->getValue();
1181
1182 if (SymbolRef Sym = V.getAsSymbol())
1183 return state->getConstraintManager().getSymVal(state, Sym);
1184
1185 return nullptr;
1186 }
1187
getKnownValue(ProgramStateRef state,SVal V)1188 const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state,
1189 SVal V) {
1190 return getConstValue(state, simplifySVal(state, V));
1191 }
1192
simplifyUntilFixpoint(ProgramStateRef State,SVal Val)1193 SVal SimpleSValBuilder::simplifyUntilFixpoint(ProgramStateRef State, SVal Val) {
1194 SVal SimplifiedVal = simplifySValOnce(State, Val);
1195 while (SimplifiedVal != Val) {
1196 Val = SimplifiedVal;
1197 SimplifiedVal = simplifySValOnce(State, Val);
1198 }
1199 return SimplifiedVal;
1200 }
1201
simplifySVal(ProgramStateRef State,SVal V)1202 SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) {
1203 return simplifyUntilFixpoint(State, V);
1204 }
1205
simplifySValOnce(ProgramStateRef State,SVal V)1206 SVal SimpleSValBuilder::simplifySValOnce(ProgramStateRef State, SVal V) {
1207 // For now, this function tries to constant-fold symbols inside a
1208 // nonloc::SymbolVal, and does nothing else. More simplifications should
1209 // be possible, such as constant-folding an index in an ElementRegion.
1210
1211 class Simplifier : public FullSValVisitor<Simplifier, SVal> {
1212 ProgramStateRef State;
1213 SValBuilder &SVB;
1214
1215 // Cache results for the lifetime of the Simplifier. Results change every
1216 // time new constraints are added to the program state, which is the whole
1217 // point of simplifying, and for that very reason it's pointless to maintain
1218 // the same cache for the duration of the whole analysis.
1219 llvm::DenseMap<SymbolRef, SVal> Cached;
1220
1221 static bool isUnchanged(SymbolRef Sym, SVal Val) {
1222 return Sym == Val.getAsSymbol();
1223 }
1224
1225 SVal cache(SymbolRef Sym, SVal V) {
1226 Cached[Sym] = V;
1227 return V;
1228 }
1229
1230 SVal skip(SymbolRef Sym) {
1231 return cache(Sym, SVB.makeSymbolVal(Sym));
1232 }
1233
1234 // Return the known const value for the Sym if available, or return Undef
1235 // otherwise.
1236 SVal getConst(SymbolRef Sym) {
1237 const llvm::APSInt *Const =
1238 State->getConstraintManager().getSymVal(State, Sym);
1239 if (Const)
1240 return Loc::isLocType(Sym->getType()) ? (SVal)SVB.makeIntLocVal(*Const)
1241 : (SVal)SVB.makeIntVal(*Const);
1242 return UndefinedVal();
1243 }
1244
1245 SVal getConstOrVisit(SymbolRef Sym) {
1246 const SVal Ret = getConst(Sym);
1247 if (Ret.isUndef())
1248 return Visit(Sym);
1249 return Ret;
1250 }
1251
1252 public:
1253 Simplifier(ProgramStateRef State)
1254 : State(State), SVB(State->getStateManager().getSValBuilder()) {}
1255
1256 SVal VisitSymbolData(const SymbolData *S) {
1257 // No cache here.
1258 if (const llvm::APSInt *I =
1259 State->getConstraintManager().getSymVal(State, S))
1260 return Loc::isLocType(S->getType()) ? (SVal)SVB.makeIntLocVal(*I)
1261 : (SVal)SVB.makeIntVal(*I);
1262 return SVB.makeSymbolVal(S);
1263 }
1264
1265 SVal VisitSymIntExpr(const SymIntExpr *S) {
1266 auto I = Cached.find(S);
1267 if (I != Cached.end())
1268 return I->second;
1269
1270 SVal LHS = getConstOrVisit(S->getLHS());
1271 if (isUnchanged(S->getLHS(), LHS))
1272 return skip(S);
1273
1274 SVal RHS;
1275 // By looking at the APSInt in the right-hand side of S, we cannot
1276 // figure out if it should be treated as a Loc or as a NonLoc.
1277 // So make our guess by recalling that we cannot multiply pointers
1278 // or compare a pointer to an integer.
1279 if (Loc::isLocType(S->getLHS()->getType()) &&
1280 BinaryOperator::isComparisonOp(S->getOpcode())) {
1281 // The usual conversion of $sym to &SymRegion{$sym}, as they have
1282 // the same meaning for Loc-type symbols, but the latter form
1283 // is preferred in SVal computations for being Loc itself.
1284 if (SymbolRef Sym = LHS.getAsSymbol()) {
1285 assert(Loc::isLocType(Sym->getType()));
1286 LHS = SVB.makeLoc(Sym);
1287 }
1288 RHS = SVB.makeIntLocVal(S->getRHS());
1289 } else {
1290 RHS = SVB.makeIntVal(S->getRHS());
1291 }
1292
1293 return cache(
1294 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1295 }
1296
1297 SVal VisitIntSymExpr(const IntSymExpr *S) {
1298 auto I = Cached.find(S);
1299 if (I != Cached.end())
1300 return I->second;
1301
1302 SVal RHS = getConstOrVisit(S->getRHS());
1303 if (isUnchanged(S->getRHS(), RHS))
1304 return skip(S);
1305
1306 SVal LHS = SVB.makeIntVal(S->getLHS());
1307 return cache(
1308 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1309 }
1310
1311 SVal VisitSymSymExpr(const SymSymExpr *S) {
1312 auto I = Cached.find(S);
1313 if (I != Cached.end())
1314 return I->second;
1315
1316 // For now don't try to simplify mixed Loc/NonLoc expressions
1317 // because they often appear from LocAsInteger operations
1318 // and we don't know how to combine a LocAsInteger
1319 // with a concrete value.
1320 if (Loc::isLocType(S->getLHS()->getType()) !=
1321 Loc::isLocType(S->getRHS()->getType()))
1322 return skip(S);
1323
1324 SVal LHS = getConstOrVisit(S->getLHS());
1325 SVal RHS = getConstOrVisit(S->getRHS());
1326
1327 if (isUnchanged(S->getLHS(), LHS) && isUnchanged(S->getRHS(), RHS))
1328 return skip(S);
1329
1330 return cache(
1331 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType()));
1332 }
1333
1334 SVal VisitSymbolCast(const SymbolCast *S) {
1335 auto I = Cached.find(S);
1336 if (I != Cached.end())
1337 return I->second;
1338 const SymExpr *OpSym = S->getOperand();
1339 SVal OpVal = getConstOrVisit(OpSym);
1340 if (isUnchanged(OpSym, OpVal))
1341 return skip(S);
1342
1343 return cache(S, SVB.evalCast(OpVal, S->getType(), OpSym->getType()));
1344 }
1345
1346 SVal VisitUnarySymExpr(const UnarySymExpr *S) {
1347 auto I = Cached.find(S);
1348 if (I != Cached.end())
1349 return I->second;
1350 SVal Op = getConstOrVisit(S->getOperand());
1351 if (isUnchanged(S->getOperand(), Op))
1352 return skip(S);
1353
1354 return cache(
1355 S, SVB.evalUnaryOp(State, S->getOpcode(), Op, S->getType()));
1356 }
1357
1358 SVal VisitSymExpr(SymbolRef S) { return nonloc::SymbolVal(S); }
1359
1360 SVal VisitMemRegion(const MemRegion *R) { return loc::MemRegionVal(R); }
1361
1362 SVal VisitNonLocSymbolVal(nonloc::SymbolVal V) {
1363 // Simplification is much more costly than computing complexity.
1364 // For high complexity, it may be not worth it.
1365 return Visit(V.getSymbol());
1366 }
1367
1368 SVal VisitSVal(SVal V) { return V; }
1369 };
1370
1371 SVal SimplifiedV = Simplifier(State).Visit(V);
1372 return SimplifiedV;
1373 }
1374