1 // SimpleSValBuilder.cpp - A basic SValBuilder -----------------------*- C++ -*- 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines SimpleSValBuilder, a basic implementation of SValBuilder. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "clang/StaticAnalyzer/Core/PathSensitive/SValBuilder.h" 14 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h" 15 #include "clang/StaticAnalyzer/Core/PathSensitive/ExprEngine.h" 16 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h" 17 #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h" 18 19 using namespace clang; 20 using namespace ento; 21 22 namespace { 23 class SimpleSValBuilder : public SValBuilder { 24 25 // Query the constraint manager whether the SVal has only one possible 26 // (integer) value. If that is the case, the value is returned. Otherwise, 27 // returns NULL. 28 // This is an implementation detail. Checkers should use `getKnownValue()` 29 // instead. 30 const llvm::APSInt *getConstValue(ProgramStateRef state, SVal V); 31 32 // With one `simplifySValOnce` call, a compound symbols might collapse to 33 // simpler symbol tree that is still possible to further simplify. Thus, we 34 // do the simplification on a new symbol tree until we reach the simplest 35 // form, i.e. the fixpoint. 36 // Consider the following symbol `(b * b) * b * b` which has this tree: 37 // * 38 // / \ 39 // * b 40 // / \ 41 // / b 42 // (b * b) 43 // Now, if the `b * b == 1` new constraint is added then during the first 44 // iteration we have the following transformations: 45 // * * 46 // / \ / \ 47 // * b --> b b 48 // / \ 49 // / b 50 // 1 51 // We need another iteration to reach the final result `1`. 52 SVal simplifyUntilFixpoint(ProgramStateRef State, SVal Val); 53 54 // Recursively descends into symbolic expressions and replaces symbols 55 // with their known values (in the sense of the getConstValue() method). 56 // We traverse the symbol tree and query the constraint values for the 57 // sub-trees and if a value is a constant we do the constant folding. 58 SVal simplifySValOnce(ProgramStateRef State, SVal V); 59 60 public: 61 SimpleSValBuilder(llvm::BumpPtrAllocator &alloc, ASTContext &context, 62 ProgramStateManager &stateMgr) 63 : SValBuilder(alloc, context, stateMgr) {} 64 ~SimpleSValBuilder() override {} 65 66 SVal evalBinOpNN(ProgramStateRef state, BinaryOperator::Opcode op, 67 NonLoc lhs, NonLoc rhs, QualType resultTy) override; 68 SVal evalBinOpLL(ProgramStateRef state, BinaryOperator::Opcode op, 69 Loc lhs, Loc rhs, QualType resultTy) override; 70 SVal evalBinOpLN(ProgramStateRef state, BinaryOperator::Opcode op, 71 Loc lhs, NonLoc rhs, QualType resultTy) override; 72 73 /// Evaluates a given SVal by recursively evaluating and 74 /// simplifying the children SVals. If the SVal has only one possible 75 /// (integer) value, that value is returned. Otherwise, returns NULL. 76 const llvm::APSInt *getKnownValue(ProgramStateRef state, SVal V) override; 77 78 SVal simplifySVal(ProgramStateRef State, SVal V) override; 79 80 SVal MakeSymIntVal(const SymExpr *LHS, BinaryOperator::Opcode op, 81 const llvm::APSInt &RHS, QualType resultTy); 82 }; 83 } // end anonymous namespace 84 85 SValBuilder *ento::createSimpleSValBuilder(llvm::BumpPtrAllocator &alloc, 86 ASTContext &context, 87 ProgramStateManager &stateMgr) { 88 return new SimpleSValBuilder(alloc, context, stateMgr); 89 } 90 91 // Checks if the negation the value and flipping sign preserve 92 // the semantics on the operation in the resultType 93 static bool isNegationValuePreserving(const llvm::APSInt &Value, 94 APSIntType ResultType) { 95 const unsigned ValueBits = Value.getSignificantBits(); 96 if (ValueBits == ResultType.getBitWidth()) { 97 // The value is the lowest negative value that is representable 98 // in signed integer with bitWith of result type. The 99 // negation is representable if resultType is unsigned. 100 return ResultType.isUnsigned(); 101 } 102 103 // If resultType bitWith is higher that number of bits required 104 // to represent RHS, the sign flip produce same value. 105 return ValueBits < ResultType.getBitWidth(); 106 } 107 108 //===----------------------------------------------------------------------===// 109 // Transfer function for binary operators. 110 //===----------------------------------------------------------------------===// 111 112 SVal SimpleSValBuilder::MakeSymIntVal(const SymExpr *LHS, 113 BinaryOperator::Opcode op, 114 const llvm::APSInt &RHS, 115 QualType resultTy) { 116 bool isIdempotent = false; 117 118 // Check for a few special cases with known reductions first. 119 switch (op) { 120 default: 121 // We can't reduce this case; just treat it normally. 122 break; 123 case BO_Mul: 124 // a*0 and a*1 125 if (RHS == 0) 126 return makeIntVal(0, resultTy); 127 else if (RHS == 1) 128 isIdempotent = true; 129 break; 130 case BO_Div: 131 // a/0 and a/1 132 if (RHS == 0) 133 // This is also handled elsewhere. 134 return UndefinedVal(); 135 else if (RHS == 1) 136 isIdempotent = true; 137 break; 138 case BO_Rem: 139 // a%0 and a%1 140 if (RHS == 0) 141 // This is also handled elsewhere. 142 return UndefinedVal(); 143 else if (RHS == 1) 144 return makeIntVal(0, resultTy); 145 break; 146 case BO_Add: 147 case BO_Sub: 148 case BO_Shl: 149 case BO_Shr: 150 case BO_Xor: 151 // a+0, a-0, a<<0, a>>0, a^0 152 if (RHS == 0) 153 isIdempotent = true; 154 break; 155 case BO_And: 156 // a&0 and a&(~0) 157 if (RHS == 0) 158 return makeIntVal(0, resultTy); 159 else if (RHS.isAllOnes()) 160 isIdempotent = true; 161 break; 162 case BO_Or: 163 // a|0 and a|(~0) 164 if (RHS == 0) 165 isIdempotent = true; 166 else if (RHS.isAllOnes()) { 167 const llvm::APSInt &Result = BasicVals.Convert(resultTy, RHS); 168 return nonloc::ConcreteInt(Result); 169 } 170 break; 171 } 172 173 // Idempotent ops (like a*1) can still change the type of an expression. 174 // Wrap the LHS up in a NonLoc again and let evalCast do the 175 // dirty work. 176 if (isIdempotent) 177 return evalCast(nonloc::SymbolVal(LHS), resultTy, QualType{}); 178 179 // If we reach this point, the expression cannot be simplified. 180 // Make a SymbolVal for the entire expression, after converting the RHS. 181 const llvm::APSInt *ConvertedRHS = &RHS; 182 if (BinaryOperator::isComparisonOp(op)) { 183 // We're looking for a type big enough to compare the symbolic value 184 // with the given constant. 185 // FIXME: This is an approximation of Sema::UsualArithmeticConversions. 186 ASTContext &Ctx = getContext(); 187 QualType SymbolType = LHS->getType(); 188 uint64_t ValWidth = RHS.getBitWidth(); 189 uint64_t TypeWidth = Ctx.getTypeSize(SymbolType); 190 191 if (ValWidth < TypeWidth) { 192 // If the value is too small, extend it. 193 ConvertedRHS = &BasicVals.Convert(SymbolType, RHS); 194 } else if (ValWidth == TypeWidth) { 195 // If the value is signed but the symbol is unsigned, do the comparison 196 // in unsigned space. [C99 6.3.1.8] 197 // (For the opposite case, the value is already unsigned.) 198 if (RHS.isSigned() && !SymbolType->isSignedIntegerOrEnumerationType()) 199 ConvertedRHS = &BasicVals.Convert(SymbolType, RHS); 200 } 201 } else if (BinaryOperator::isAdditiveOp(op) && RHS.isNegative()) { 202 // Change a+(-N) into a-N, and a-(-N) into a+N 203 // Adjust addition/subtraction of negative value, to 204 // subtraction/addition of the negated value. 205 APSIntType resultIntTy = BasicVals.getAPSIntType(resultTy); 206 if (isNegationValuePreserving(RHS, resultIntTy)) { 207 ConvertedRHS = &BasicVals.getValue(-resultIntTy.convert(RHS)); 208 op = (op == BO_Add) ? BO_Sub : BO_Add; 209 } else { 210 ConvertedRHS = &BasicVals.Convert(resultTy, RHS); 211 } 212 } else 213 ConvertedRHS = &BasicVals.Convert(resultTy, RHS); 214 215 return makeNonLoc(LHS, op, *ConvertedRHS, resultTy); 216 } 217 218 // See if Sym is known to be a relation Rel with Bound. 219 static bool isInRelation(BinaryOperator::Opcode Rel, SymbolRef Sym, 220 llvm::APSInt Bound, ProgramStateRef State) { 221 SValBuilder &SVB = State->getStateManager().getSValBuilder(); 222 SVal Result = 223 SVB.evalBinOpNN(State, Rel, nonloc::SymbolVal(Sym), 224 nonloc::ConcreteInt(Bound), SVB.getConditionType()); 225 if (auto DV = Result.getAs<DefinedSVal>()) { 226 return !State->assume(*DV, false); 227 } 228 return false; 229 } 230 231 // See if Sym is known to be within [min/4, max/4], where min and max 232 // are the bounds of the symbol's integral type. With such symbols, 233 // some manipulations can be performed without the risk of overflow. 234 // assume() doesn't cause infinite recursion because we should be dealing 235 // with simpler symbols on every recursive call. 236 static bool isWithinConstantOverflowBounds(SymbolRef Sym, 237 ProgramStateRef State) { 238 SValBuilder &SVB = State->getStateManager().getSValBuilder(); 239 BasicValueFactory &BV = SVB.getBasicValueFactory(); 240 241 QualType T = Sym->getType(); 242 assert(T->isSignedIntegerOrEnumerationType() && 243 "This only works with signed integers!"); 244 APSIntType AT = BV.getAPSIntType(T); 245 246 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max; 247 return isInRelation(BO_LE, Sym, Max, State) && 248 isInRelation(BO_GE, Sym, Min, State); 249 } 250 251 // Same for the concrete integers: see if I is within [min/4, max/4]. 252 static bool isWithinConstantOverflowBounds(llvm::APSInt I) { 253 APSIntType AT(I); 254 assert(!AT.isUnsigned() && 255 "This only works with signed integers!"); 256 257 llvm::APSInt Max = AT.getMaxValue() / AT.getValue(4), Min = -Max; 258 return (I <= Max) && (I >= -Max); 259 } 260 261 static std::pair<SymbolRef, llvm::APSInt> 262 decomposeSymbol(SymbolRef Sym, BasicValueFactory &BV) { 263 if (const auto *SymInt = dyn_cast<SymIntExpr>(Sym)) 264 if (BinaryOperator::isAdditiveOp(SymInt->getOpcode())) 265 return std::make_pair(SymInt->getLHS(), 266 (SymInt->getOpcode() == BO_Add) ? 267 (SymInt->getRHS()) : 268 (-SymInt->getRHS())); 269 270 // Fail to decompose: "reduce" the problem to the "$x + 0" case. 271 return std::make_pair(Sym, BV.getValue(0, Sym->getType())); 272 } 273 274 // Simplify "(LSym + LInt) Op (RSym + RInt)" assuming all values are of the 275 // same signed integral type and no overflows occur (which should be checked 276 // by the caller). 277 static NonLoc doRearrangeUnchecked(ProgramStateRef State, 278 BinaryOperator::Opcode Op, 279 SymbolRef LSym, llvm::APSInt LInt, 280 SymbolRef RSym, llvm::APSInt RInt) { 281 SValBuilder &SVB = State->getStateManager().getSValBuilder(); 282 BasicValueFactory &BV = SVB.getBasicValueFactory(); 283 SymbolManager &SymMgr = SVB.getSymbolManager(); 284 285 QualType SymTy = LSym->getType(); 286 assert(SymTy == RSym->getType() && 287 "Symbols are not of the same type!"); 288 assert(APSIntType(LInt) == BV.getAPSIntType(SymTy) && 289 "Integers are not of the same type as symbols!"); 290 assert(APSIntType(RInt) == BV.getAPSIntType(SymTy) && 291 "Integers are not of the same type as symbols!"); 292 293 QualType ResultTy; 294 if (BinaryOperator::isComparisonOp(Op)) 295 ResultTy = SVB.getConditionType(); 296 else if (BinaryOperator::isAdditiveOp(Op)) 297 ResultTy = SymTy; 298 else 299 llvm_unreachable("Operation not suitable for unchecked rearrangement!"); 300 301 if (LSym == RSym) 302 return SVB.evalBinOpNN(State, Op, nonloc::ConcreteInt(LInt), 303 nonloc::ConcreteInt(RInt), ResultTy) 304 .castAs<NonLoc>(); 305 306 SymbolRef ResultSym = nullptr; 307 BinaryOperator::Opcode ResultOp; 308 llvm::APSInt ResultInt; 309 if (BinaryOperator::isComparisonOp(Op)) { 310 // Prefer comparing to a non-negative number. 311 // FIXME: Maybe it'd be better to have consistency in 312 // "$x - $y" vs. "$y - $x" because those are solver's keys. 313 if (LInt > RInt) { 314 ResultSym = SymMgr.getSymSymExpr(RSym, BO_Sub, LSym, SymTy); 315 ResultOp = BinaryOperator::reverseComparisonOp(Op); 316 ResultInt = LInt - RInt; // Opposite order! 317 } else { 318 ResultSym = SymMgr.getSymSymExpr(LSym, BO_Sub, RSym, SymTy); 319 ResultOp = Op; 320 ResultInt = RInt - LInt; // Opposite order! 321 } 322 } else { 323 ResultSym = SymMgr.getSymSymExpr(LSym, Op, RSym, SymTy); 324 ResultInt = (Op == BO_Add) ? (LInt + RInt) : (LInt - RInt); 325 ResultOp = BO_Add; 326 // Bring back the cosmetic difference. 327 if (ResultInt < 0) { 328 ResultInt = -ResultInt; 329 ResultOp = BO_Sub; 330 } else if (ResultInt == 0) { 331 // Shortcut: Simplify "$x + 0" to "$x". 332 return nonloc::SymbolVal(ResultSym); 333 } 334 } 335 const llvm::APSInt &PersistentResultInt = BV.getValue(ResultInt); 336 return nonloc::SymbolVal( 337 SymMgr.getSymIntExpr(ResultSym, ResultOp, PersistentResultInt, ResultTy)); 338 } 339 340 // Rearrange if symbol type matches the result type and if the operator is a 341 // comparison operator, both symbol and constant must be within constant 342 // overflow bounds. 343 static bool shouldRearrange(ProgramStateRef State, BinaryOperator::Opcode Op, 344 SymbolRef Sym, llvm::APSInt Int, QualType Ty) { 345 return Sym->getType() == Ty && 346 (!BinaryOperator::isComparisonOp(Op) || 347 (isWithinConstantOverflowBounds(Sym, State) && 348 isWithinConstantOverflowBounds(Int))); 349 } 350 351 static Optional<NonLoc> tryRearrange(ProgramStateRef State, 352 BinaryOperator::Opcode Op, NonLoc Lhs, 353 NonLoc Rhs, QualType ResultTy) { 354 ProgramStateManager &StateMgr = State->getStateManager(); 355 SValBuilder &SVB = StateMgr.getSValBuilder(); 356 357 // We expect everything to be of the same type - this type. 358 QualType SingleTy; 359 360 // FIXME: After putting complexity threshold to the symbols we can always 361 // rearrange additive operations but rearrange comparisons only if 362 // option is set. 363 if (!SVB.getAnalyzerOptions().ShouldAggressivelySimplifyBinaryOperation) 364 return None; 365 366 SymbolRef LSym = Lhs.getAsSymbol(); 367 if (!LSym) 368 return None; 369 370 if (BinaryOperator::isComparisonOp(Op)) { 371 SingleTy = LSym->getType(); 372 if (ResultTy != SVB.getConditionType()) 373 return None; 374 // Initialize SingleTy later with a symbol's type. 375 } else if (BinaryOperator::isAdditiveOp(Op)) { 376 SingleTy = ResultTy; 377 if (LSym->getType() != SingleTy) 378 return None; 379 } else { 380 // Don't rearrange other operations. 381 return None; 382 } 383 384 assert(!SingleTy.isNull() && "We should have figured out the type by now!"); 385 386 // Rearrange signed symbolic expressions only 387 if (!SingleTy->isSignedIntegerOrEnumerationType()) 388 return None; 389 390 SymbolRef RSym = Rhs.getAsSymbol(); 391 if (!RSym || RSym->getType() != SingleTy) 392 return None; 393 394 BasicValueFactory &BV = State->getBasicVals(); 395 llvm::APSInt LInt, RInt; 396 std::tie(LSym, LInt) = decomposeSymbol(LSym, BV); 397 std::tie(RSym, RInt) = decomposeSymbol(RSym, BV); 398 if (!shouldRearrange(State, Op, LSym, LInt, SingleTy) || 399 !shouldRearrange(State, Op, RSym, RInt, SingleTy)) 400 return None; 401 402 // We know that no overflows can occur anymore. 403 return doRearrangeUnchecked(State, Op, LSym, LInt, RSym, RInt); 404 } 405 406 SVal SimpleSValBuilder::evalBinOpNN(ProgramStateRef state, 407 BinaryOperator::Opcode op, 408 NonLoc lhs, NonLoc rhs, 409 QualType resultTy) { 410 NonLoc InputLHS = lhs; 411 NonLoc InputRHS = rhs; 412 413 // Constraints may have changed since the creation of a bound SVal. Check if 414 // the values can be simplified based on those new constraints. 415 SVal simplifiedLhs = simplifySVal(state, lhs); 416 SVal simplifiedRhs = simplifySVal(state, rhs); 417 if (auto simplifiedLhsAsNonLoc = simplifiedLhs.getAs<NonLoc>()) 418 lhs = *simplifiedLhsAsNonLoc; 419 if (auto simplifiedRhsAsNonLoc = simplifiedRhs.getAs<NonLoc>()) 420 rhs = *simplifiedRhsAsNonLoc; 421 422 // Handle trivial case where left-side and right-side are the same. 423 if (lhs == rhs) 424 switch (op) { 425 default: 426 break; 427 case BO_EQ: 428 case BO_LE: 429 case BO_GE: 430 return makeTruthVal(true, resultTy); 431 case BO_LT: 432 case BO_GT: 433 case BO_NE: 434 return makeTruthVal(false, resultTy); 435 case BO_Xor: 436 case BO_Sub: 437 if (resultTy->isIntegralOrEnumerationType()) 438 return makeIntVal(0, resultTy); 439 return evalCast(makeIntVal(0, /*isUnsigned=*/false), resultTy, 440 QualType{}); 441 case BO_Or: 442 case BO_And: 443 return evalCast(lhs, resultTy, QualType{}); 444 } 445 446 while (true) { 447 switch (lhs.getSubKind()) { 448 default: 449 return makeSymExprValNN(op, lhs, rhs, resultTy); 450 case nonloc::PointerToMemberKind: { 451 assert(rhs.getSubKind() == nonloc::PointerToMemberKind && 452 "Both SVals should have pointer-to-member-type"); 453 auto LPTM = lhs.castAs<nonloc::PointerToMember>(), 454 RPTM = rhs.castAs<nonloc::PointerToMember>(); 455 auto LPTMD = LPTM.getPTMData(), RPTMD = RPTM.getPTMData(); 456 switch (op) { 457 case BO_EQ: 458 return makeTruthVal(LPTMD == RPTMD, resultTy); 459 case BO_NE: 460 return makeTruthVal(LPTMD != RPTMD, resultTy); 461 default: 462 return UnknownVal(); 463 } 464 } 465 case nonloc::LocAsIntegerKind: { 466 Loc lhsL = lhs.castAs<nonloc::LocAsInteger>().getLoc(); 467 switch (rhs.getSubKind()) { 468 case nonloc::LocAsIntegerKind: 469 // FIXME: at the moment the implementation 470 // of modeling "pointers as integers" is not complete. 471 if (!BinaryOperator::isComparisonOp(op)) 472 return UnknownVal(); 473 return evalBinOpLL(state, op, lhsL, 474 rhs.castAs<nonloc::LocAsInteger>().getLoc(), 475 resultTy); 476 case nonloc::ConcreteIntKind: { 477 // FIXME: at the moment the implementation 478 // of modeling "pointers as integers" is not complete. 479 if (!BinaryOperator::isComparisonOp(op)) 480 return UnknownVal(); 481 // Transform the integer into a location and compare. 482 // FIXME: This only makes sense for comparisons. If we want to, say, 483 // add 1 to a LocAsInteger, we'd better unpack the Loc and add to it, 484 // then pack it back into a LocAsInteger. 485 llvm::APSInt i = rhs.castAs<nonloc::ConcreteInt>().getValue(); 486 // If the region has a symbolic base, pay attention to the type; it 487 // might be coming from a non-default address space. For non-symbolic 488 // regions it doesn't matter that much because such comparisons would 489 // most likely evaluate to concrete false anyway. FIXME: We might 490 // still need to handle the non-comparison case. 491 if (SymbolRef lSym = lhs.getAsLocSymbol(true)) 492 BasicVals.getAPSIntType(lSym->getType()).apply(i); 493 else 494 BasicVals.getAPSIntType(Context.VoidPtrTy).apply(i); 495 return evalBinOpLL(state, op, lhsL, makeLoc(i), resultTy); 496 } 497 default: 498 switch (op) { 499 case BO_EQ: 500 return makeTruthVal(false, resultTy); 501 case BO_NE: 502 return makeTruthVal(true, resultTy); 503 default: 504 // This case also handles pointer arithmetic. 505 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); 506 } 507 } 508 } 509 case nonloc::ConcreteIntKind: { 510 llvm::APSInt LHSValue = lhs.castAs<nonloc::ConcreteInt>().getValue(); 511 512 // If we're dealing with two known constants, just perform the operation. 513 if (const llvm::APSInt *KnownRHSValue = getConstValue(state, rhs)) { 514 llvm::APSInt RHSValue = *KnownRHSValue; 515 if (BinaryOperator::isComparisonOp(op)) { 516 // We're looking for a type big enough to compare the two values. 517 // FIXME: This is not correct. char + short will result in a promotion 518 // to int. Unfortunately we have lost types by this point. 519 APSIntType CompareType = std::max(APSIntType(LHSValue), 520 APSIntType(RHSValue)); 521 CompareType.apply(LHSValue); 522 CompareType.apply(RHSValue); 523 } else if (!BinaryOperator::isShiftOp(op)) { 524 APSIntType IntType = BasicVals.getAPSIntType(resultTy); 525 IntType.apply(LHSValue); 526 IntType.apply(RHSValue); 527 } 528 529 const llvm::APSInt *Result = 530 BasicVals.evalAPSInt(op, LHSValue, RHSValue); 531 if (!Result) 532 return UndefinedVal(); 533 534 return nonloc::ConcreteInt(*Result); 535 } 536 537 // Swap the left and right sides and flip the operator if doing so 538 // allows us to better reason about the expression (this is a form 539 // of expression canonicalization). 540 // While we're at it, catch some special cases for non-commutative ops. 541 switch (op) { 542 case BO_LT: 543 case BO_GT: 544 case BO_LE: 545 case BO_GE: 546 op = BinaryOperator::reverseComparisonOp(op); 547 LLVM_FALLTHROUGH; 548 case BO_EQ: 549 case BO_NE: 550 case BO_Add: 551 case BO_Mul: 552 case BO_And: 553 case BO_Xor: 554 case BO_Or: 555 std::swap(lhs, rhs); 556 continue; 557 case BO_Shr: 558 // (~0)>>a 559 if (LHSValue.isAllOnes() && LHSValue.isSigned()) 560 return evalCast(lhs, resultTy, QualType{}); 561 LLVM_FALLTHROUGH; 562 case BO_Shl: 563 // 0<<a and 0>>a 564 if (LHSValue == 0) 565 return evalCast(lhs, resultTy, QualType{}); 566 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); 567 case BO_Div: 568 // 0 / x == 0 569 case BO_Rem: 570 // 0 % x == 0 571 if (LHSValue == 0) 572 return makeZeroVal(resultTy); 573 LLVM_FALLTHROUGH; 574 default: 575 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); 576 } 577 } 578 case nonloc::SymbolValKind: { 579 // We only handle LHS as simple symbols or SymIntExprs. 580 SymbolRef Sym = lhs.castAs<nonloc::SymbolVal>().getSymbol(); 581 582 // LHS is a symbolic expression. 583 if (const SymIntExpr *symIntExpr = dyn_cast<SymIntExpr>(Sym)) { 584 585 // Is this a logical not? (!x is represented as x == 0.) 586 if (op == BO_EQ && rhs.isZeroConstant()) { 587 // We know how to negate certain expressions. Simplify them here. 588 589 BinaryOperator::Opcode opc = symIntExpr->getOpcode(); 590 switch (opc) { 591 default: 592 // We don't know how to negate this operation. 593 // Just handle it as if it were a normal comparison to 0. 594 break; 595 case BO_LAnd: 596 case BO_LOr: 597 llvm_unreachable("Logical operators handled by branching logic."); 598 case BO_Assign: 599 case BO_MulAssign: 600 case BO_DivAssign: 601 case BO_RemAssign: 602 case BO_AddAssign: 603 case BO_SubAssign: 604 case BO_ShlAssign: 605 case BO_ShrAssign: 606 case BO_AndAssign: 607 case BO_XorAssign: 608 case BO_OrAssign: 609 case BO_Comma: 610 llvm_unreachable("'=' and ',' operators handled by ExprEngine."); 611 case BO_PtrMemD: 612 case BO_PtrMemI: 613 llvm_unreachable("Pointer arithmetic not handled here."); 614 case BO_LT: 615 case BO_GT: 616 case BO_LE: 617 case BO_GE: 618 case BO_EQ: 619 case BO_NE: 620 assert(resultTy->isBooleanType() || 621 resultTy == getConditionType()); 622 assert(symIntExpr->getType()->isBooleanType() || 623 getContext().hasSameUnqualifiedType(symIntExpr->getType(), 624 getConditionType())); 625 // Negate the comparison and make a value. 626 opc = BinaryOperator::negateComparisonOp(opc); 627 return makeNonLoc(symIntExpr->getLHS(), opc, 628 symIntExpr->getRHS(), resultTy); 629 } 630 } 631 632 // For now, only handle expressions whose RHS is a constant. 633 if (const llvm::APSInt *RHSValue = getConstValue(state, rhs)) { 634 // If both the LHS and the current expression are additive, 635 // fold their constants and try again. 636 if (BinaryOperator::isAdditiveOp(op)) { 637 BinaryOperator::Opcode lop = symIntExpr->getOpcode(); 638 if (BinaryOperator::isAdditiveOp(lop)) { 639 // Convert the two constants to a common type, then combine them. 640 641 // resultTy may not be the best type to convert to, but it's 642 // probably the best choice in expressions with mixed type 643 // (such as x+1U+2LL). The rules for implicit conversions should 644 // choose a reasonable type to preserve the expression, and will 645 // at least match how the value is going to be used. 646 APSIntType IntType = BasicVals.getAPSIntType(resultTy); 647 const llvm::APSInt &first = IntType.convert(symIntExpr->getRHS()); 648 const llvm::APSInt &second = IntType.convert(*RHSValue); 649 650 // If the op and lop agrees, then we just need to 651 // sum the constants. Otherwise, we change to operation 652 // type if substraction would produce negative value 653 // (and cause overflow for unsigned integers), 654 // as consequence x+1U-10 produces x-9U, instead 655 // of x+4294967287U, that would be produced without this 656 // additional check. 657 const llvm::APSInt *newRHS; 658 if (lop == op) { 659 newRHS = BasicVals.evalAPSInt(BO_Add, first, second); 660 } else if (first >= second) { 661 newRHS = BasicVals.evalAPSInt(BO_Sub, first, second); 662 op = lop; 663 } else { 664 newRHS = BasicVals.evalAPSInt(BO_Sub, second, first); 665 } 666 667 assert(newRHS && "Invalid operation despite common type!"); 668 rhs = nonloc::ConcreteInt(*newRHS); 669 lhs = nonloc::SymbolVal(symIntExpr->getLHS()); 670 continue; 671 } 672 } 673 674 // Otherwise, make a SymIntExpr out of the expression. 675 return MakeSymIntVal(symIntExpr, op, *RHSValue, resultTy); 676 } 677 } 678 679 // Is the RHS a constant? 680 if (const llvm::APSInt *RHSValue = getConstValue(state, rhs)) 681 return MakeSymIntVal(Sym, op, *RHSValue, resultTy); 682 683 if (Optional<NonLoc> V = tryRearrange(state, op, lhs, rhs, resultTy)) 684 return *V; 685 686 // Give up -- this is not a symbolic expression we can handle. 687 return makeSymExprValNN(op, InputLHS, InputRHS, resultTy); 688 } 689 } 690 } 691 } 692 693 static SVal evalBinOpFieldRegionFieldRegion(const FieldRegion *LeftFR, 694 const FieldRegion *RightFR, 695 BinaryOperator::Opcode op, 696 QualType resultTy, 697 SimpleSValBuilder &SVB) { 698 // Only comparisons are meaningful here! 699 if (!BinaryOperator::isComparisonOp(op)) 700 return UnknownVal(); 701 702 // Next, see if the two FRs have the same super-region. 703 // FIXME: This doesn't handle casts yet, and simply stripping the casts 704 // doesn't help. 705 if (LeftFR->getSuperRegion() != RightFR->getSuperRegion()) 706 return UnknownVal(); 707 708 const FieldDecl *LeftFD = LeftFR->getDecl(); 709 const FieldDecl *RightFD = RightFR->getDecl(); 710 const RecordDecl *RD = LeftFD->getParent(); 711 712 // Make sure the two FRs are from the same kind of record. Just in case! 713 // FIXME: This is probably where inheritance would be a problem. 714 if (RD != RightFD->getParent()) 715 return UnknownVal(); 716 717 // We know for sure that the two fields are not the same, since that 718 // would have given us the same SVal. 719 if (op == BO_EQ) 720 return SVB.makeTruthVal(false, resultTy); 721 if (op == BO_NE) 722 return SVB.makeTruthVal(true, resultTy); 723 724 // Iterate through the fields and see which one comes first. 725 // [C99 6.7.2.1.13] "Within a structure object, the non-bit-field 726 // members and the units in which bit-fields reside have addresses that 727 // increase in the order in which they are declared." 728 bool leftFirst = (op == BO_LT || op == BO_LE); 729 for (const auto *I : RD->fields()) { 730 if (I == LeftFD) 731 return SVB.makeTruthVal(leftFirst, resultTy); 732 if (I == RightFD) 733 return SVB.makeTruthVal(!leftFirst, resultTy); 734 } 735 736 llvm_unreachable("Fields not found in parent record's definition"); 737 } 738 739 // This is used in debug builds only for now because some downstream users 740 // may hit this assert in their subsequent merges. 741 // There are still places in the analyzer where equal bitwidth Locs 742 // are compared, and need to be found and corrected. Recent previous fixes have 743 // addressed the known problems of making NULLs with specific bitwidths 744 // for Loc comparisons along with deprecation of APIs for the same purpose. 745 // 746 static void assertEqualBitWidths(ProgramStateRef State, Loc RhsLoc, 747 Loc LhsLoc) { 748 // Implements a "best effort" check for RhsLoc and LhsLoc bit widths 749 ASTContext &Ctx = State->getStateManager().getContext(); 750 uint64_t RhsBitwidth = 751 RhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(RhsLoc.getType(Ctx)); 752 uint64_t LhsBitwidth = 753 LhsLoc.getType(Ctx).isNull() ? 0 : Ctx.getTypeSize(LhsLoc.getType(Ctx)); 754 if (RhsBitwidth && LhsBitwidth && 755 (LhsLoc.getSubKind() == RhsLoc.getSubKind())) { 756 assert(RhsBitwidth == LhsBitwidth && 757 "RhsLoc and LhsLoc bitwidth must be same!"); 758 } 759 } 760 761 // FIXME: all this logic will change if/when we have MemRegion::getLocation(). 762 SVal SimpleSValBuilder::evalBinOpLL(ProgramStateRef state, 763 BinaryOperator::Opcode op, 764 Loc lhs, Loc rhs, 765 QualType resultTy) { 766 767 // Assert that bitwidth of lhs and rhs are the same. 768 // This can happen if two different address spaces are used, 769 // and the bitwidths of the address spaces are different. 770 // See LIT case clang/test/Analysis/cstring-checker-addressspace.c 771 // FIXME: See comment above in the function assertEqualBitWidths 772 assertEqualBitWidths(state, rhs, lhs); 773 774 // Only comparisons and subtractions are valid operations on two pointers. 775 // See [C99 6.5.5 through 6.5.14] or [C++0x 5.6 through 5.15]. 776 // However, if a pointer is casted to an integer, evalBinOpNN may end up 777 // calling this function with another operation (PR7527). We don't attempt to 778 // model this for now, but it could be useful, particularly when the 779 // "location" is actually an integer value that's been passed through a void*. 780 if (!(BinaryOperator::isComparisonOp(op) || op == BO_Sub)) 781 return UnknownVal(); 782 783 // Special cases for when both sides are identical. 784 if (lhs == rhs) { 785 switch (op) { 786 default: 787 llvm_unreachable("Unimplemented operation for two identical values"); 788 case BO_Sub: 789 return makeZeroVal(resultTy); 790 case BO_EQ: 791 case BO_LE: 792 case BO_GE: 793 return makeTruthVal(true, resultTy); 794 case BO_NE: 795 case BO_LT: 796 case BO_GT: 797 return makeTruthVal(false, resultTy); 798 } 799 } 800 801 switch (lhs.getSubKind()) { 802 default: 803 llvm_unreachable("Ordering not implemented for this Loc."); 804 805 case loc::GotoLabelKind: 806 // The only thing we know about labels is that they're non-null. 807 if (rhs.isZeroConstant()) { 808 switch (op) { 809 default: 810 break; 811 case BO_Sub: 812 return evalCast(lhs, resultTy, QualType{}); 813 case BO_EQ: 814 case BO_LE: 815 case BO_LT: 816 return makeTruthVal(false, resultTy); 817 case BO_NE: 818 case BO_GT: 819 case BO_GE: 820 return makeTruthVal(true, resultTy); 821 } 822 } 823 // There may be two labels for the same location, and a function region may 824 // have the same address as a label at the start of the function (depending 825 // on the ABI). 826 // FIXME: we can probably do a comparison against other MemRegions, though. 827 // FIXME: is there a way to tell if two labels refer to the same location? 828 return UnknownVal(); 829 830 case loc::ConcreteIntKind: { 831 auto L = lhs.castAs<loc::ConcreteInt>(); 832 833 // If one of the operands is a symbol and the other is a constant, 834 // build an expression for use by the constraint manager. 835 if (SymbolRef rSym = rhs.getAsLocSymbol()) { 836 // We can only build expressions with symbols on the left, 837 // so we need a reversible operator. 838 if (!BinaryOperator::isComparisonOp(op) || op == BO_Cmp) 839 return UnknownVal(); 840 841 op = BinaryOperator::reverseComparisonOp(op); 842 return makeNonLoc(rSym, op, L.getValue(), resultTy); 843 } 844 845 // If both operands are constants, just perform the operation. 846 if (Optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) { 847 assert(BinaryOperator::isComparisonOp(op) || op == BO_Sub); 848 849 if (const auto *ResultInt = 850 BasicVals.evalAPSInt(op, L.getValue(), rInt->getValue())) 851 return evalCast(nonloc::ConcreteInt(*ResultInt), resultTy, QualType{}); 852 return UnknownVal(); 853 } 854 855 // Special case comparisons against NULL. 856 // This must come after the test if the RHS is a symbol, which is used to 857 // build constraints. The address of any non-symbolic region is guaranteed 858 // to be non-NULL, as is any label. 859 assert((isa<loc::MemRegionVal, loc::GotoLabel>(rhs))); 860 if (lhs.isZeroConstant()) { 861 switch (op) { 862 default: 863 break; 864 case BO_EQ: 865 case BO_GT: 866 case BO_GE: 867 return makeTruthVal(false, resultTy); 868 case BO_NE: 869 case BO_LT: 870 case BO_LE: 871 return makeTruthVal(true, resultTy); 872 } 873 } 874 875 // Comparing an arbitrary integer to a region or label address is 876 // completely unknowable. 877 return UnknownVal(); 878 } 879 case loc::MemRegionValKind: { 880 if (Optional<loc::ConcreteInt> rInt = rhs.getAs<loc::ConcreteInt>()) { 881 // If one of the operands is a symbol and the other is a constant, 882 // build an expression for use by the constraint manager. 883 if (SymbolRef lSym = lhs.getAsLocSymbol(true)) { 884 if (BinaryOperator::isComparisonOp(op)) 885 return MakeSymIntVal(lSym, op, rInt->getValue(), resultTy); 886 return UnknownVal(); 887 } 888 // Special case comparisons to NULL. 889 // This must come after the test if the LHS is a symbol, which is used to 890 // build constraints. The address of any non-symbolic region is guaranteed 891 // to be non-NULL. 892 if (rInt->isZeroConstant()) { 893 if (op == BO_Sub) 894 return evalCast(lhs, resultTy, QualType{}); 895 896 if (BinaryOperator::isComparisonOp(op)) { 897 QualType boolType = getContext().BoolTy; 898 NonLoc l = evalCast(lhs, boolType, QualType{}).castAs<NonLoc>(); 899 NonLoc r = makeTruthVal(false, boolType).castAs<NonLoc>(); 900 return evalBinOpNN(state, op, l, r, resultTy); 901 } 902 } 903 904 // Comparing a region to an arbitrary integer is completely unknowable. 905 return UnknownVal(); 906 } 907 908 // Get both values as regions, if possible. 909 const MemRegion *LeftMR = lhs.getAsRegion(); 910 assert(LeftMR && "MemRegionValKind SVal doesn't have a region!"); 911 912 const MemRegion *RightMR = rhs.getAsRegion(); 913 if (!RightMR) 914 // The RHS is probably a label, which in theory could address a region. 915 // FIXME: we can probably make a more useful statement about non-code 916 // regions, though. 917 return UnknownVal(); 918 919 const MemRegion *LeftBase = LeftMR->getBaseRegion(); 920 const MemRegion *RightBase = RightMR->getBaseRegion(); 921 const MemSpaceRegion *LeftMS = LeftBase->getMemorySpace(); 922 const MemSpaceRegion *RightMS = RightBase->getMemorySpace(); 923 const MemSpaceRegion *UnknownMS = MemMgr.getUnknownRegion(); 924 925 // If the two regions are from different known memory spaces they cannot be 926 // equal. Also, assume that no symbolic region (whose memory space is 927 // unknown) is on the stack. 928 if (LeftMS != RightMS && 929 ((LeftMS != UnknownMS && RightMS != UnknownMS) || 930 (isa<StackSpaceRegion>(LeftMS) || isa<StackSpaceRegion>(RightMS)))) { 931 switch (op) { 932 default: 933 return UnknownVal(); 934 case BO_EQ: 935 return makeTruthVal(false, resultTy); 936 case BO_NE: 937 return makeTruthVal(true, resultTy); 938 } 939 } 940 941 // If both values wrap regions, see if they're from different base regions. 942 // Note, heap base symbolic regions are assumed to not alias with 943 // each other; for example, we assume that malloc returns different address 944 // on each invocation. 945 // FIXME: ObjC object pointers always reside on the heap, but currently 946 // we treat their memory space as unknown, because symbolic pointers 947 // to ObjC objects may alias. There should be a way to construct 948 // possibly-aliasing heap-based regions. For instance, MacOSXApiChecker 949 // guesses memory space for ObjC object pointers manually instead of 950 // relying on us. 951 if (LeftBase != RightBase && 952 ((!isa<SymbolicRegion>(LeftBase) && !isa<SymbolicRegion>(RightBase)) || 953 (isa<HeapSpaceRegion>(LeftMS) || isa<HeapSpaceRegion>(RightMS))) ){ 954 switch (op) { 955 default: 956 return UnknownVal(); 957 case BO_EQ: 958 return makeTruthVal(false, resultTy); 959 case BO_NE: 960 return makeTruthVal(true, resultTy); 961 } 962 } 963 964 // Handle special cases for when both regions are element regions. 965 const ElementRegion *RightER = dyn_cast<ElementRegion>(RightMR); 966 const ElementRegion *LeftER = dyn_cast<ElementRegion>(LeftMR); 967 if (RightER && LeftER) { 968 // Next, see if the two ERs have the same super-region and matching types. 969 // FIXME: This should do something useful even if the types don't match, 970 // though if both indexes are constant the RegionRawOffset path will 971 // give the correct answer. 972 if (LeftER->getSuperRegion() == RightER->getSuperRegion() && 973 LeftER->getElementType() == RightER->getElementType()) { 974 // Get the left index and cast it to the correct type. 975 // If the index is unknown or undefined, bail out here. 976 SVal LeftIndexVal = LeftER->getIndex(); 977 Optional<NonLoc> LeftIndex = LeftIndexVal.getAs<NonLoc>(); 978 if (!LeftIndex) 979 return UnknownVal(); 980 LeftIndexVal = evalCast(*LeftIndex, ArrayIndexTy, QualType{}); 981 LeftIndex = LeftIndexVal.getAs<NonLoc>(); 982 if (!LeftIndex) 983 return UnknownVal(); 984 985 // Do the same for the right index. 986 SVal RightIndexVal = RightER->getIndex(); 987 Optional<NonLoc> RightIndex = RightIndexVal.getAs<NonLoc>(); 988 if (!RightIndex) 989 return UnknownVal(); 990 RightIndexVal = evalCast(*RightIndex, ArrayIndexTy, QualType{}); 991 RightIndex = RightIndexVal.getAs<NonLoc>(); 992 if (!RightIndex) 993 return UnknownVal(); 994 995 // Actually perform the operation. 996 // evalBinOpNN expects the two indexes to already be the right type. 997 return evalBinOpNN(state, op, *LeftIndex, *RightIndex, resultTy); 998 } 999 } 1000 1001 // Special handling of the FieldRegions, even with symbolic offsets. 1002 const FieldRegion *RightFR = dyn_cast<FieldRegion>(RightMR); 1003 const FieldRegion *LeftFR = dyn_cast<FieldRegion>(LeftMR); 1004 if (RightFR && LeftFR) { 1005 SVal R = evalBinOpFieldRegionFieldRegion(LeftFR, RightFR, op, resultTy, 1006 *this); 1007 if (!R.isUnknown()) 1008 return R; 1009 } 1010 1011 // Compare the regions using the raw offsets. 1012 RegionOffset LeftOffset = LeftMR->getAsOffset(); 1013 RegionOffset RightOffset = RightMR->getAsOffset(); 1014 1015 if (LeftOffset.getRegion() != nullptr && 1016 LeftOffset.getRegion() == RightOffset.getRegion() && 1017 !LeftOffset.hasSymbolicOffset() && !RightOffset.hasSymbolicOffset()) { 1018 int64_t left = LeftOffset.getOffset(); 1019 int64_t right = RightOffset.getOffset(); 1020 1021 switch (op) { 1022 default: 1023 return UnknownVal(); 1024 case BO_LT: 1025 return makeTruthVal(left < right, resultTy); 1026 case BO_GT: 1027 return makeTruthVal(left > right, resultTy); 1028 case BO_LE: 1029 return makeTruthVal(left <= right, resultTy); 1030 case BO_GE: 1031 return makeTruthVal(left >= right, resultTy); 1032 case BO_EQ: 1033 return makeTruthVal(left == right, resultTy); 1034 case BO_NE: 1035 return makeTruthVal(left != right, resultTy); 1036 } 1037 } 1038 1039 // At this point we're not going to get a good answer, but we can try 1040 // conjuring an expression instead. 1041 SymbolRef LHSSym = lhs.getAsLocSymbol(); 1042 SymbolRef RHSSym = rhs.getAsLocSymbol(); 1043 if (LHSSym && RHSSym) 1044 return makeNonLoc(LHSSym, op, RHSSym, resultTy); 1045 1046 // If we get here, we have no way of comparing the regions. 1047 return UnknownVal(); 1048 } 1049 } 1050 } 1051 1052 SVal SimpleSValBuilder::evalBinOpLN(ProgramStateRef state, 1053 BinaryOperator::Opcode op, Loc lhs, 1054 NonLoc rhs, QualType resultTy) { 1055 if (op >= BO_PtrMemD && op <= BO_PtrMemI) { 1056 if (auto PTMSV = rhs.getAs<nonloc::PointerToMember>()) { 1057 if (PTMSV->isNullMemberPointer()) 1058 return UndefinedVal(); 1059 1060 auto getFieldLValue = [&](const auto *FD) -> SVal { 1061 SVal Result = lhs; 1062 1063 for (const auto &I : *PTMSV) 1064 Result = StateMgr.getStoreManager().evalDerivedToBase( 1065 Result, I->getType(), I->isVirtual()); 1066 1067 return state->getLValue(FD, Result); 1068 }; 1069 1070 if (const auto *FD = PTMSV->getDeclAs<FieldDecl>()) { 1071 return getFieldLValue(FD); 1072 } 1073 if (const auto *FD = PTMSV->getDeclAs<IndirectFieldDecl>()) { 1074 return getFieldLValue(FD); 1075 } 1076 } 1077 1078 return rhs; 1079 } 1080 1081 assert(!BinaryOperator::isComparisonOp(op) && 1082 "arguments to comparison ops must be of the same type"); 1083 1084 // Special case: rhs is a zero constant. 1085 if (rhs.isZeroConstant()) 1086 return lhs; 1087 1088 // Perserve the null pointer so that it can be found by the DerefChecker. 1089 if (lhs.isZeroConstant()) 1090 return lhs; 1091 1092 // We are dealing with pointer arithmetic. 1093 1094 // Handle pointer arithmetic on constant values. 1095 if (Optional<nonloc::ConcreteInt> rhsInt = rhs.getAs<nonloc::ConcreteInt>()) { 1096 if (Optional<loc::ConcreteInt> lhsInt = lhs.getAs<loc::ConcreteInt>()) { 1097 const llvm::APSInt &leftI = lhsInt->getValue(); 1098 assert(leftI.isUnsigned()); 1099 llvm::APSInt rightI(rhsInt->getValue(), /* isUnsigned */ true); 1100 1101 // Convert the bitwidth of rightI. This should deal with overflow 1102 // since we are dealing with concrete values. 1103 rightI = rightI.extOrTrunc(leftI.getBitWidth()); 1104 1105 // Offset the increment by the pointer size. 1106 llvm::APSInt Multiplicand(rightI.getBitWidth(), /* isUnsigned */ true); 1107 QualType pointeeType = resultTy->getPointeeType(); 1108 Multiplicand = getContext().getTypeSizeInChars(pointeeType).getQuantity(); 1109 rightI *= Multiplicand; 1110 1111 // Compute the adjusted pointer. 1112 switch (op) { 1113 case BO_Add: 1114 rightI = leftI + rightI; 1115 break; 1116 case BO_Sub: 1117 rightI = leftI - rightI; 1118 break; 1119 default: 1120 llvm_unreachable("Invalid pointer arithmetic operation"); 1121 } 1122 return loc::ConcreteInt(getBasicValueFactory().getValue(rightI)); 1123 } 1124 } 1125 1126 // Handle cases where 'lhs' is a region. 1127 if (const MemRegion *region = lhs.getAsRegion()) { 1128 rhs = convertToArrayIndex(rhs).castAs<NonLoc>(); 1129 SVal index = UnknownVal(); 1130 const SubRegion *superR = nullptr; 1131 // We need to know the type of the pointer in order to add an integer to it. 1132 // Depending on the type, different amount of bytes is added. 1133 QualType elementType; 1134 1135 if (const ElementRegion *elemReg = dyn_cast<ElementRegion>(region)) { 1136 assert(op == BO_Add || op == BO_Sub); 1137 index = evalBinOpNN(state, op, elemReg->getIndex(), rhs, 1138 getArrayIndexType()); 1139 superR = cast<SubRegion>(elemReg->getSuperRegion()); 1140 elementType = elemReg->getElementType(); 1141 } 1142 else if (isa<SubRegion>(region)) { 1143 assert(op == BO_Add || op == BO_Sub); 1144 index = (op == BO_Add) ? rhs : evalMinus(rhs); 1145 superR = cast<SubRegion>(region); 1146 // TODO: Is this actually reliable? Maybe improving our MemRegion 1147 // hierarchy to provide typed regions for all non-void pointers would be 1148 // better. For instance, we cannot extend this towards LocAsInteger 1149 // operations, where result type of the expression is integer. 1150 if (resultTy->isAnyPointerType()) 1151 elementType = resultTy->getPointeeType(); 1152 } 1153 1154 // Represent arithmetic on void pointers as arithmetic on char pointers. 1155 // It is fine when a TypedValueRegion of char value type represents 1156 // a void pointer. Note that arithmetic on void pointers is a GCC extension. 1157 if (elementType->isVoidType()) 1158 elementType = getContext().CharTy; 1159 1160 if (Optional<NonLoc> indexV = index.getAs<NonLoc>()) { 1161 return loc::MemRegionVal(MemMgr.getElementRegion(elementType, *indexV, 1162 superR, getContext())); 1163 } 1164 } 1165 return UnknownVal(); 1166 } 1167 1168 const llvm::APSInt *SimpleSValBuilder::getConstValue(ProgramStateRef state, 1169 SVal V) { 1170 if (V.isUnknownOrUndef()) 1171 return nullptr; 1172 1173 if (Optional<loc::ConcreteInt> X = V.getAs<loc::ConcreteInt>()) 1174 return &X->getValue(); 1175 1176 if (Optional<nonloc::ConcreteInt> X = V.getAs<nonloc::ConcreteInt>()) 1177 return &X->getValue(); 1178 1179 if (SymbolRef Sym = V.getAsSymbol()) 1180 return state->getConstraintManager().getSymVal(state, Sym); 1181 1182 return nullptr; 1183 } 1184 1185 const llvm::APSInt *SimpleSValBuilder::getKnownValue(ProgramStateRef state, 1186 SVal V) { 1187 return getConstValue(state, simplifySVal(state, V)); 1188 } 1189 1190 SVal SimpleSValBuilder::simplifyUntilFixpoint(ProgramStateRef State, SVal Val) { 1191 SVal SimplifiedVal = simplifySValOnce(State, Val); 1192 while (SimplifiedVal != Val) { 1193 Val = SimplifiedVal; 1194 SimplifiedVal = simplifySValOnce(State, Val); 1195 } 1196 return SimplifiedVal; 1197 } 1198 1199 SVal SimpleSValBuilder::simplifySVal(ProgramStateRef State, SVal V) { 1200 return simplifyUntilFixpoint(State, V); 1201 } 1202 1203 SVal SimpleSValBuilder::simplifySValOnce(ProgramStateRef State, SVal V) { 1204 // For now, this function tries to constant-fold symbols inside a 1205 // nonloc::SymbolVal, and does nothing else. More simplifications should 1206 // be possible, such as constant-folding an index in an ElementRegion. 1207 1208 class Simplifier : public FullSValVisitor<Simplifier, SVal> { 1209 ProgramStateRef State; 1210 SValBuilder &SVB; 1211 1212 // Cache results for the lifetime of the Simplifier. Results change every 1213 // time new constraints are added to the program state, which is the whole 1214 // point of simplifying, and for that very reason it's pointless to maintain 1215 // the same cache for the duration of the whole analysis. 1216 llvm::DenseMap<SymbolRef, SVal> Cached; 1217 1218 static bool isUnchanged(SymbolRef Sym, SVal Val) { 1219 return Sym == Val.getAsSymbol(); 1220 } 1221 1222 SVal cache(SymbolRef Sym, SVal V) { 1223 Cached[Sym] = V; 1224 return V; 1225 } 1226 1227 SVal skip(SymbolRef Sym) { 1228 return cache(Sym, SVB.makeSymbolVal(Sym)); 1229 } 1230 1231 // Return the known const value for the Sym if available, or return Undef 1232 // otherwise. 1233 SVal getConst(SymbolRef Sym) { 1234 const llvm::APSInt *Const = 1235 State->getConstraintManager().getSymVal(State, Sym); 1236 if (Const) 1237 return Loc::isLocType(Sym->getType()) ? (SVal)SVB.makeIntLocVal(*Const) 1238 : (SVal)SVB.makeIntVal(*Const); 1239 return UndefinedVal(); 1240 } 1241 1242 SVal getConstOrVisit(SymbolRef Sym) { 1243 const SVal Ret = getConst(Sym); 1244 if (Ret.isUndef()) 1245 return Visit(Sym); 1246 return Ret; 1247 } 1248 1249 public: 1250 Simplifier(ProgramStateRef State) 1251 : State(State), SVB(State->getStateManager().getSValBuilder()) {} 1252 1253 SVal VisitSymbolData(const SymbolData *S) { 1254 // No cache here. 1255 if (const llvm::APSInt *I = 1256 State->getConstraintManager().getSymVal(State, S)) 1257 return Loc::isLocType(S->getType()) ? (SVal)SVB.makeIntLocVal(*I) 1258 : (SVal)SVB.makeIntVal(*I); 1259 return SVB.makeSymbolVal(S); 1260 } 1261 1262 SVal VisitSymIntExpr(const SymIntExpr *S) { 1263 auto I = Cached.find(S); 1264 if (I != Cached.end()) 1265 return I->second; 1266 1267 SVal LHS = getConstOrVisit(S->getLHS()); 1268 if (isUnchanged(S->getLHS(), LHS)) 1269 return skip(S); 1270 1271 SVal RHS; 1272 // By looking at the APSInt in the right-hand side of S, we cannot 1273 // figure out if it should be treated as a Loc or as a NonLoc. 1274 // So make our guess by recalling that we cannot multiply pointers 1275 // or compare a pointer to an integer. 1276 if (Loc::isLocType(S->getLHS()->getType()) && 1277 BinaryOperator::isComparisonOp(S->getOpcode())) { 1278 // The usual conversion of $sym to &SymRegion{$sym}, as they have 1279 // the same meaning for Loc-type symbols, but the latter form 1280 // is preferred in SVal computations for being Loc itself. 1281 if (SymbolRef Sym = LHS.getAsSymbol()) { 1282 assert(Loc::isLocType(Sym->getType())); 1283 LHS = SVB.makeLoc(Sym); 1284 } 1285 RHS = SVB.makeIntLocVal(S->getRHS()); 1286 } else { 1287 RHS = SVB.makeIntVal(S->getRHS()); 1288 } 1289 1290 return cache( 1291 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType())); 1292 } 1293 1294 SVal VisitIntSymExpr(const IntSymExpr *S) { 1295 auto I = Cached.find(S); 1296 if (I != Cached.end()) 1297 return I->second; 1298 1299 SVal RHS = getConstOrVisit(S->getRHS()); 1300 if (isUnchanged(S->getRHS(), RHS)) 1301 return skip(S); 1302 1303 SVal LHS = SVB.makeIntVal(S->getLHS()); 1304 return cache( 1305 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType())); 1306 } 1307 1308 SVal VisitSymSymExpr(const SymSymExpr *S) { 1309 auto I = Cached.find(S); 1310 if (I != Cached.end()) 1311 return I->second; 1312 1313 // For now don't try to simplify mixed Loc/NonLoc expressions 1314 // because they often appear from LocAsInteger operations 1315 // and we don't know how to combine a LocAsInteger 1316 // with a concrete value. 1317 if (Loc::isLocType(S->getLHS()->getType()) != 1318 Loc::isLocType(S->getRHS()->getType())) 1319 return skip(S); 1320 1321 SVal LHS = getConstOrVisit(S->getLHS()); 1322 SVal RHS = getConstOrVisit(S->getRHS()); 1323 1324 if (isUnchanged(S->getLHS(), LHS) && isUnchanged(S->getRHS(), RHS)) 1325 return skip(S); 1326 1327 return cache( 1328 S, SVB.evalBinOp(State, S->getOpcode(), LHS, RHS, S->getType())); 1329 } 1330 1331 SVal VisitSymbolCast(const SymbolCast *S) { 1332 auto I = Cached.find(S); 1333 if (I != Cached.end()) 1334 return I->second; 1335 const SymExpr *OpSym = S->getOperand(); 1336 SVal OpVal = getConstOrVisit(OpSym); 1337 if (isUnchanged(OpSym, OpVal)) 1338 return skip(S); 1339 1340 return cache(S, SVB.evalCast(OpVal, S->getType(), OpSym->getType())); 1341 } 1342 1343 SVal VisitUnarySymExpr(const UnarySymExpr *S) { 1344 auto I = Cached.find(S); 1345 if (I != Cached.end()) 1346 return I->second; 1347 SVal Op = getConstOrVisit(S->getOperand()); 1348 if (isUnchanged(S->getOperand(), Op)) 1349 return skip(S); 1350 1351 return cache( 1352 S, SVB.evalUnaryOp(State, S->getOpcode(), Op, S->getType())); 1353 } 1354 1355 SVal VisitSymExpr(SymbolRef S) { return nonloc::SymbolVal(S); } 1356 1357 SVal VisitMemRegion(const MemRegion *R) { return loc::MemRegionVal(R); } 1358 1359 SVal VisitNonLocSymbolVal(nonloc::SymbolVal V) { 1360 // Simplification is much more costly than computing complexity. 1361 // For high complexity, it may be not worth it. 1362 return Visit(V.getSymbol()); 1363 } 1364 1365 SVal VisitSVal(SVal V) { return V; } 1366 }; 1367 1368 SVal SimplifiedV = Simplifier(State).Visit(V); 1369 return SimplifiedV; 1370 } 1371