1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This defines CStringChecker, which is an assortment of checks on calls 10 // to functions in <string.h>. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "InterCheckerAPI.h" 15 #include "clang/Basic/Builtins.h" 16 #include "clang/Basic/CharInfo.h" 17 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h" 18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h" 19 #include "clang/StaticAnalyzer/Core/Checker.h" 20 #include "clang/StaticAnalyzer/Core/CheckerManager.h" 21 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h" 22 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h" 23 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h" 24 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h" 25 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h" 26 #include "llvm/ADT/STLExtras.h" 27 #include "llvm/ADT/SmallString.h" 28 #include "llvm/ADT/StringExtras.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include <functional> 31 #include <optional> 32 33 using namespace clang; 34 using namespace ento; 35 using namespace std::placeholders; 36 37 namespace { 38 struct AnyArgExpr { 39 const Expr *Expression; 40 unsigned ArgumentIndex; 41 }; 42 struct SourceArgExpr : AnyArgExpr {}; 43 struct DestinationArgExpr : AnyArgExpr {}; 44 struct SizeArgExpr : AnyArgExpr {}; 45 46 using ErrorMessage = SmallString<128>; 47 enum class AccessKind { write, read }; 48 49 static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription, 50 AccessKind Access) { 51 ErrorMessage Message; 52 llvm::raw_svector_ostream Os(Message); 53 54 // Function classification like: Memory copy function 55 Os << toUppercase(FunctionDescription.front()) 56 << &FunctionDescription.data()[1]; 57 58 if (Access == AccessKind::write) { 59 Os << " overflows the destination buffer"; 60 } else { // read access 61 Os << " accesses out-of-bound array element"; 62 } 63 64 return Message; 65 } 66 67 enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 }; 68 69 enum class CharKind { Regular = 0, Wide }; 70 constexpr CharKind CK_Regular = CharKind::Regular; 71 constexpr CharKind CK_Wide = CharKind::Wide; 72 73 static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) { 74 return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy 75 : Ctx.WideCharTy); 76 } 77 78 class CStringChecker : public Checker< eval::Call, 79 check::PreStmt<DeclStmt>, 80 check::LiveSymbols, 81 check::DeadSymbols, 82 check::RegionChanges 83 > { 84 mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap, 85 BT_NotCString, BT_AdditionOverflow, BT_UninitRead; 86 87 mutable const char *CurrentFunctionDescription = nullptr; 88 89 public: 90 /// The filter is used to filter out the diagnostics which are not enabled by 91 /// the user. 92 struct CStringChecksFilter { 93 bool CheckCStringNullArg = false; 94 bool CheckCStringOutOfBounds = false; 95 bool CheckCStringBufferOverlap = false; 96 bool CheckCStringNotNullTerm = false; 97 bool CheckCStringUninitializedRead = false; 98 99 CheckerNameRef CheckNameCStringNullArg; 100 CheckerNameRef CheckNameCStringOutOfBounds; 101 CheckerNameRef CheckNameCStringBufferOverlap; 102 CheckerNameRef CheckNameCStringNotNullTerm; 103 CheckerNameRef CheckNameCStringUninitializedRead; 104 }; 105 106 CStringChecksFilter Filter; 107 108 static void *getTag() { static int tag; return &tag; } 109 110 bool evalCall(const CallEvent &Call, CheckerContext &C) const; 111 void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const; 112 void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const; 113 void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const; 114 115 ProgramStateRef 116 checkRegionChanges(ProgramStateRef state, 117 const InvalidatedSymbols *, 118 ArrayRef<const MemRegion *> ExplicitRegions, 119 ArrayRef<const MemRegion *> Regions, 120 const LocationContext *LCtx, 121 const CallEvent *Call) const; 122 123 using FnCheck = std::function<void(const CStringChecker *, CheckerContext &, 124 const CallExpr *)>; 125 126 CallDescriptionMap<FnCheck> Callbacks = { 127 {{CDF_MaybeBuiltin, {"memcpy"}, 3}, 128 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)}, 129 {{CDF_MaybeBuiltin, {"wmemcpy"}, 3}, 130 std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)}, 131 {{CDF_MaybeBuiltin, {"mempcpy"}, 3}, 132 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)}, 133 {{CDF_None, {"wmempcpy"}, 3}, 134 std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)}, 135 {{CDF_MaybeBuiltin, {"memcmp"}, 3}, 136 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)}, 137 {{CDF_MaybeBuiltin, {"wmemcmp"}, 3}, 138 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)}, 139 {{CDF_MaybeBuiltin, {"memmove"}, 3}, 140 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)}, 141 {{CDF_MaybeBuiltin, {"wmemmove"}, 3}, 142 std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)}, 143 {{CDF_MaybeBuiltin, {"memset"}, 3}, &CStringChecker::evalMemset}, 144 {{CDF_MaybeBuiltin, {"explicit_memset"}, 3}, &CStringChecker::evalMemset}, 145 {{CDF_MaybeBuiltin, {"strcpy"}, 2}, &CStringChecker::evalStrcpy}, 146 {{CDF_MaybeBuiltin, {"strncpy"}, 3}, &CStringChecker::evalStrncpy}, 147 {{CDF_MaybeBuiltin, {"stpcpy"}, 2}, &CStringChecker::evalStpcpy}, 148 {{CDF_MaybeBuiltin, {"strlcpy"}, 3}, &CStringChecker::evalStrlcpy}, 149 {{CDF_MaybeBuiltin, {"strcat"}, 2}, &CStringChecker::evalStrcat}, 150 {{CDF_MaybeBuiltin, {"strncat"}, 3}, &CStringChecker::evalStrncat}, 151 {{CDF_MaybeBuiltin, {"strlcat"}, 3}, &CStringChecker::evalStrlcat}, 152 {{CDF_MaybeBuiltin, {"strlen"}, 1}, &CStringChecker::evalstrLength}, 153 {{CDF_MaybeBuiltin, {"wcslen"}, 1}, &CStringChecker::evalstrLength}, 154 {{CDF_MaybeBuiltin, {"strnlen"}, 2}, &CStringChecker::evalstrnLength}, 155 {{CDF_MaybeBuiltin, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength}, 156 {{CDF_MaybeBuiltin, {"strcmp"}, 2}, &CStringChecker::evalStrcmp}, 157 {{CDF_MaybeBuiltin, {"strncmp"}, 3}, &CStringChecker::evalStrncmp}, 158 {{CDF_MaybeBuiltin, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp}, 159 {{CDF_MaybeBuiltin, {"strncasecmp"}, 3}, 160 &CStringChecker::evalStrncasecmp}, 161 {{CDF_MaybeBuiltin, {"strsep"}, 2}, &CStringChecker::evalStrsep}, 162 {{CDF_MaybeBuiltin, {"bcopy"}, 3}, &CStringChecker::evalBcopy}, 163 {{CDF_MaybeBuiltin, {"bcmp"}, 3}, 164 std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)}, 165 {{CDF_MaybeBuiltin, {"bzero"}, 2}, &CStringChecker::evalBzero}, 166 {{CDF_MaybeBuiltin, {"explicit_bzero"}, 2}, &CStringChecker::evalBzero}, 167 {{CDF_MaybeBuiltin, {"sprintf"}, 2}, &CStringChecker::evalSprintf}, 168 {{CDF_MaybeBuiltin, {"snprintf"}, 2}, &CStringChecker::evalSnprintf}, 169 }; 170 171 // These require a bit of special handling. 172 CallDescription StdCopy{{"std", "copy"}, 3}, 173 StdCopyBackward{{"std", "copy_backward"}, 3}; 174 175 FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const; 176 void evalMemcpy(CheckerContext &C, const CallExpr *CE, CharKind CK) const; 177 void evalMempcpy(CheckerContext &C, const CallExpr *CE, CharKind CK) const; 178 void evalMemmove(CheckerContext &C, const CallExpr *CE, CharKind CK) const; 179 void evalBcopy(CheckerContext &C, const CallExpr *CE) const; 180 void evalCopyCommon(CheckerContext &C, const CallExpr *CE, 181 ProgramStateRef state, SizeArgExpr Size, 182 DestinationArgExpr Dest, SourceArgExpr Source, 183 bool Restricted, bool IsMempcpy, CharKind CK) const; 184 185 void evalMemcmp(CheckerContext &C, const CallExpr *CE, CharKind CK) const; 186 187 void evalstrLength(CheckerContext &C, const CallExpr *CE) const; 188 void evalstrnLength(CheckerContext &C, const CallExpr *CE) const; 189 void evalstrLengthCommon(CheckerContext &C, 190 const CallExpr *CE, 191 bool IsStrnlen = false) const; 192 193 void evalStrcpy(CheckerContext &C, const CallExpr *CE) const; 194 void evalStrncpy(CheckerContext &C, const CallExpr *CE) const; 195 void evalStpcpy(CheckerContext &C, const CallExpr *CE) const; 196 void evalStrlcpy(CheckerContext &C, const CallExpr *CE) const; 197 void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool ReturnEnd, 198 bool IsBounded, ConcatFnKind appendK, 199 bool returnPtr = true) const; 200 201 void evalStrcat(CheckerContext &C, const CallExpr *CE) const; 202 void evalStrncat(CheckerContext &C, const CallExpr *CE) const; 203 void evalStrlcat(CheckerContext &C, const CallExpr *CE) const; 204 205 void evalStrcmp(CheckerContext &C, const CallExpr *CE) const; 206 void evalStrncmp(CheckerContext &C, const CallExpr *CE) const; 207 void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const; 208 void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const; 209 void evalStrcmpCommon(CheckerContext &C, 210 const CallExpr *CE, 211 bool IsBounded = false, 212 bool IgnoreCase = false) const; 213 214 void evalStrsep(CheckerContext &C, const CallExpr *CE) const; 215 216 void evalStdCopy(CheckerContext &C, const CallExpr *CE) const; 217 void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const; 218 void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const; 219 void evalMemset(CheckerContext &C, const CallExpr *CE) const; 220 void evalBzero(CheckerContext &C, const CallExpr *CE) const; 221 222 void evalSprintf(CheckerContext &C, const CallExpr *CE) const; 223 void evalSnprintf(CheckerContext &C, const CallExpr *CE) const; 224 void evalSprintfCommon(CheckerContext &C, const CallExpr *CE, bool IsBounded, 225 bool IsBuiltin) const; 226 227 // Utility methods 228 std::pair<ProgramStateRef , ProgramStateRef > 229 static assumeZero(CheckerContext &C, 230 ProgramStateRef state, SVal V, QualType Ty); 231 232 static ProgramStateRef setCStringLength(ProgramStateRef state, 233 const MemRegion *MR, 234 SVal strLength); 235 static SVal getCStringLengthForRegion(CheckerContext &C, 236 ProgramStateRef &state, 237 const Expr *Ex, 238 const MemRegion *MR, 239 bool hypothetical); 240 SVal getCStringLength(CheckerContext &C, 241 ProgramStateRef &state, 242 const Expr *Ex, 243 SVal Buf, 244 bool hypothetical = false) const; 245 246 const StringLiteral *getCStringLiteral(CheckerContext &C, 247 ProgramStateRef &state, 248 const Expr *expr, 249 SVal val) const; 250 251 /// Invalidate the destination buffer determined by characters copied. 252 static ProgramStateRef 253 invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S, 254 const Expr *BufE, SVal BufV, SVal SizeV, 255 QualType SizeTy); 256 257 /// Operation never overflows, do not invalidate the super region. 258 static ProgramStateRef invalidateDestinationBufferNeverOverflows( 259 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV); 260 261 /// We do not know whether the operation can overflow (e.g. size is unknown), 262 /// invalidate the super region and escape related pointers. 263 static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion( 264 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV); 265 266 /// Invalidate the source buffer for escaping pointers. 267 static ProgramStateRef invalidateSourceBuffer(CheckerContext &C, 268 ProgramStateRef S, 269 const Expr *BufE, SVal BufV); 270 271 /// @param InvalidationTraitOperations Determine how to invlidate the 272 /// MemRegion by setting the invalidation traits. Return true to cause pointer 273 /// escape, or false otherwise. 274 static ProgramStateRef invalidateBufferAux( 275 CheckerContext &C, ProgramStateRef State, const Expr *Ex, SVal V, 276 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &, 277 const MemRegion *)> 278 InvalidationTraitOperations); 279 280 static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 281 const MemRegion *MR); 282 283 static bool memsetAux(const Expr *DstBuffer, SVal CharE, 284 const Expr *Size, CheckerContext &C, 285 ProgramStateRef &State); 286 287 // Re-usable checks 288 ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State, 289 AnyArgExpr Arg, SVal l) const; 290 ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state, 291 AnyArgExpr Buffer, SVal Element, 292 AccessKind Access, 293 CharKind CK = CharKind::Regular) const; 294 ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State, 295 AnyArgExpr Buffer, SizeArgExpr Size, 296 AccessKind Access, 297 CharKind CK = CharKind::Regular) const; 298 ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state, 299 SizeArgExpr Size, AnyArgExpr First, 300 AnyArgExpr Second, 301 CharKind CK = CharKind::Regular) const; 302 void emitOverlapBug(CheckerContext &C, 303 ProgramStateRef state, 304 const Stmt *First, 305 const Stmt *Second) const; 306 307 void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S, 308 StringRef WarningMsg) const; 309 void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State, 310 const Stmt *S, StringRef WarningMsg) const; 311 void emitNotCStringBug(CheckerContext &C, ProgramStateRef State, 312 const Stmt *S, StringRef WarningMsg) const; 313 void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const; 314 void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State, 315 const Expr *E) const; 316 ProgramStateRef checkAdditionOverflow(CheckerContext &C, 317 ProgramStateRef state, 318 NonLoc left, 319 NonLoc right) const; 320 321 // Return true if the destination buffer of the copy function may be in bound. 322 // Expects SVal of Size to be positive and unsigned. 323 // Expects SVal of FirstBuf to be a FieldRegion. 324 static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State, 325 SVal BufVal, QualType BufTy, SVal LengthVal, 326 QualType LengthTy); 327 }; 328 329 } //end anonymous namespace 330 331 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal) 332 333 //===----------------------------------------------------------------------===// 334 // Individual checks and utility methods. 335 //===----------------------------------------------------------------------===// 336 337 std::pair<ProgramStateRef , ProgramStateRef > 338 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V, 339 QualType Ty) { 340 std::optional<DefinedSVal> val = V.getAs<DefinedSVal>(); 341 if (!val) 342 return std::pair<ProgramStateRef , ProgramStateRef >(state, state); 343 344 SValBuilder &svalBuilder = C.getSValBuilder(); 345 DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty); 346 return state->assume(svalBuilder.evalEQ(state, *val, zero)); 347 } 348 349 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C, 350 ProgramStateRef State, 351 AnyArgExpr Arg, SVal l) const { 352 // If a previous check has failed, propagate the failure. 353 if (!State) 354 return nullptr; 355 356 ProgramStateRef stateNull, stateNonNull; 357 std::tie(stateNull, stateNonNull) = 358 assumeZero(C, State, l, Arg.Expression->getType()); 359 360 if (stateNull && !stateNonNull) { 361 if (Filter.CheckCStringNullArg) { 362 SmallString<80> buf; 363 llvm::raw_svector_ostream OS(buf); 364 assert(CurrentFunctionDescription); 365 OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1) 366 << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to " 367 << CurrentFunctionDescription; 368 369 emitNullArgBug(C, stateNull, Arg.Expression, OS.str()); 370 } 371 return nullptr; 372 } 373 374 // From here on, assume that the value is non-null. 375 assert(stateNonNull); 376 return stateNonNull; 377 } 378 379 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor? 380 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C, 381 ProgramStateRef state, 382 AnyArgExpr Buffer, SVal Element, 383 AccessKind Access, 384 CharKind CK) const { 385 386 // If a previous check has failed, propagate the failure. 387 if (!state) 388 return nullptr; 389 390 // Check for out of bound array element access. 391 const MemRegion *R = Element.getAsRegion(); 392 if (!R) 393 return state; 394 395 const auto *ER = dyn_cast<ElementRegion>(R); 396 if (!ER) 397 return state; 398 399 SValBuilder &svalBuilder = C.getSValBuilder(); 400 ASTContext &Ctx = svalBuilder.getContext(); 401 402 // Get the index of the accessed element. 403 NonLoc Idx = ER->getIndex(); 404 405 if (CK == CharKind::Regular) { 406 if (ER->getValueType() != Ctx.CharTy) 407 return state; 408 } else { 409 if (ER->getValueType() != Ctx.WideCharTy) 410 return state; 411 412 QualType SizeTy = Ctx.getSizeType(); 413 NonLoc WideSize = 414 svalBuilder 415 .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(), 416 SizeTy) 417 .castAs<NonLoc>(); 418 SVal Offset = svalBuilder.evalBinOpNN(state, BO_Mul, Idx, WideSize, SizeTy); 419 if (Offset.isUnknown()) 420 return state; 421 Idx = Offset.castAs<NonLoc>(); 422 } 423 424 // Get the size of the array. 425 const auto *superReg = cast<SubRegion>(ER->getSuperRegion()); 426 DefinedOrUnknownSVal Size = 427 getDynamicExtent(state, superReg, C.getSValBuilder()); 428 429 ProgramStateRef StInBound, StOutBound; 430 std::tie(StInBound, StOutBound) = state->assumeInBoundDual(Idx, Size); 431 if (StOutBound && !StInBound) { 432 // These checks are either enabled by the CString out-of-bounds checker 433 // explicitly or implicitly by the Malloc checker. 434 // In the latter case we only do modeling but do not emit warning. 435 if (!Filter.CheckCStringOutOfBounds) 436 return nullptr; 437 438 // Emit a bug report. 439 ErrorMessage Message = 440 createOutOfBoundErrorMsg(CurrentFunctionDescription, Access); 441 emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message); 442 return nullptr; 443 } 444 445 // Ensure that we wouldn't read uninitialized value. 446 if (Access == AccessKind::read) { 447 if (Filter.CheckCStringUninitializedRead && 448 StInBound->getSVal(ER).isUndef()) { 449 emitUninitializedReadBug(C, StInBound, Buffer.Expression); 450 return nullptr; 451 } 452 } 453 454 // Array bound check succeeded. From this point forward the array bound 455 // should always succeed. 456 return StInBound; 457 } 458 459 ProgramStateRef 460 CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State, 461 AnyArgExpr Buffer, SizeArgExpr Size, 462 AccessKind Access, CharKind CK) const { 463 // If a previous check has failed, propagate the failure. 464 if (!State) 465 return nullptr; 466 467 SValBuilder &svalBuilder = C.getSValBuilder(); 468 ASTContext &Ctx = svalBuilder.getContext(); 469 470 QualType SizeTy = Size.Expression->getType(); 471 QualType PtrTy = getCharPtrType(Ctx, CK); 472 473 // Check that the first buffer is non-null. 474 SVal BufVal = C.getSVal(Buffer.Expression); 475 State = checkNonNull(C, State, Buffer, BufVal); 476 if (!State) 477 return nullptr; 478 479 // If out-of-bounds checking is turned off, skip the rest. 480 if (!Filter.CheckCStringOutOfBounds) 481 return State; 482 483 SVal BufStart = 484 svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType()); 485 486 // Check if the first byte of the buffer is accessible. 487 State = CheckLocation(C, State, Buffer, BufStart, Access, CK); 488 if (!State) 489 return nullptr; 490 491 // Get the access length and make sure it is known. 492 // FIXME: This assumes the caller has already checked that the access length 493 // is positive. And that it's unsigned. 494 SVal LengthVal = C.getSVal(Size.Expression); 495 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 496 if (!Length) 497 return State; 498 499 // Compute the offset of the last element to be accessed: size-1. 500 NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>(); 501 SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy); 502 if (Offset.isUnknown()) 503 return nullptr; 504 NonLoc LastOffset = Offset.castAs<NonLoc>(); 505 506 // Check that the first buffer is sufficiently long. 507 if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) { 508 509 SVal BufEnd = 510 svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy); 511 State = CheckLocation(C, State, Buffer, BufEnd, Access, CK); 512 513 // If the buffer isn't large enough, abort. 514 if (!State) 515 return nullptr; 516 } 517 518 // Large enough or not, return this state! 519 return State; 520 } 521 522 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C, 523 ProgramStateRef state, 524 SizeArgExpr Size, AnyArgExpr First, 525 AnyArgExpr Second, 526 CharKind CK) const { 527 if (!Filter.CheckCStringBufferOverlap) 528 return state; 529 530 // Do a simple check for overlap: if the two arguments are from the same 531 // buffer, see if the end of the first is greater than the start of the second 532 // or vice versa. 533 534 // If a previous check has failed, propagate the failure. 535 if (!state) 536 return nullptr; 537 538 ProgramStateRef stateTrue, stateFalse; 539 540 // Assume different address spaces cannot overlap. 541 if (First.Expression->getType()->getPointeeType().getAddressSpace() != 542 Second.Expression->getType()->getPointeeType().getAddressSpace()) 543 return state; 544 545 // Get the buffer values and make sure they're known locations. 546 const LocationContext *LCtx = C.getLocationContext(); 547 SVal firstVal = state->getSVal(First.Expression, LCtx); 548 SVal secondVal = state->getSVal(Second.Expression, LCtx); 549 550 std::optional<Loc> firstLoc = firstVal.getAs<Loc>(); 551 if (!firstLoc) 552 return state; 553 554 std::optional<Loc> secondLoc = secondVal.getAs<Loc>(); 555 if (!secondLoc) 556 return state; 557 558 // Are the two values the same? 559 SValBuilder &svalBuilder = C.getSValBuilder(); 560 std::tie(stateTrue, stateFalse) = 561 state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc)); 562 563 if (stateTrue && !stateFalse) { 564 // If the values are known to be equal, that's automatically an overlap. 565 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression); 566 return nullptr; 567 } 568 569 // assume the two expressions are not equal. 570 assert(stateFalse); 571 state = stateFalse; 572 573 // Which value comes first? 574 QualType cmpTy = svalBuilder.getConditionType(); 575 SVal reverse = 576 svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy); 577 std::optional<DefinedOrUnknownSVal> reverseTest = 578 reverse.getAs<DefinedOrUnknownSVal>(); 579 if (!reverseTest) 580 return state; 581 582 std::tie(stateTrue, stateFalse) = state->assume(*reverseTest); 583 if (stateTrue) { 584 if (stateFalse) { 585 // If we don't know which one comes first, we can't perform this test. 586 return state; 587 } else { 588 // Switch the values so that firstVal is before secondVal. 589 std::swap(firstLoc, secondLoc); 590 591 // Switch the Exprs as well, so that they still correspond. 592 std::swap(First, Second); 593 } 594 } 595 596 // Get the length, and make sure it too is known. 597 SVal LengthVal = state->getSVal(Size.Expression, LCtx); 598 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 599 if (!Length) 600 return state; 601 602 // Convert the first buffer's start address to char*. 603 // Bail out if the cast fails. 604 ASTContext &Ctx = svalBuilder.getContext(); 605 QualType CharPtrTy = getCharPtrType(Ctx, CK); 606 SVal FirstStart = 607 svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType()); 608 std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>(); 609 if (!FirstStartLoc) 610 return state; 611 612 // Compute the end of the first buffer. Bail out if THAT fails. 613 SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc, 614 *Length, CharPtrTy); 615 std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>(); 616 if (!FirstEndLoc) 617 return state; 618 619 // Is the end of the first buffer past the start of the second buffer? 620 SVal Overlap = 621 svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy); 622 std::optional<DefinedOrUnknownSVal> OverlapTest = 623 Overlap.getAs<DefinedOrUnknownSVal>(); 624 if (!OverlapTest) 625 return state; 626 627 std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest); 628 629 if (stateTrue && !stateFalse) { 630 // Overlap! 631 emitOverlapBug(C, stateTrue, First.Expression, Second.Expression); 632 return nullptr; 633 } 634 635 // assume the two expressions don't overlap. 636 assert(stateFalse); 637 return stateFalse; 638 } 639 640 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state, 641 const Stmt *First, const Stmt *Second) const { 642 ExplodedNode *N = C.generateErrorNode(state); 643 if (!N) 644 return; 645 646 if (!BT_Overlap) 647 BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap, 648 categories::UnixAPI, "Improper arguments")); 649 650 // Generate a report for this bug. 651 auto report = std::make_unique<PathSensitiveBugReport>( 652 *BT_Overlap, "Arguments must not be overlapping buffers", N); 653 report->addRange(First->getSourceRange()); 654 report->addRange(Second->getSourceRange()); 655 656 C.emitReport(std::move(report)); 657 } 658 659 void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State, 660 const Stmt *S, StringRef WarningMsg) const { 661 if (ExplodedNode *N = C.generateErrorNode(State)) { 662 if (!BT_Null) { 663 // FIXME: This call uses the string constant 'categories::UnixAPI' as the 664 // description of the bug; it should be replaced by a real description. 665 BT_Null.reset( 666 new BugType(Filter.CheckNameCStringNullArg, categories::UnixAPI)); 667 } 668 669 auto Report = 670 std::make_unique<PathSensitiveBugReport>(*BT_Null, WarningMsg, N); 671 Report->addRange(S->getSourceRange()); 672 if (const auto *Ex = dyn_cast<Expr>(S)) 673 bugreporter::trackExpressionValue(N, Ex, *Report); 674 C.emitReport(std::move(Report)); 675 } 676 } 677 678 void CStringChecker::emitUninitializedReadBug(CheckerContext &C, 679 ProgramStateRef State, 680 const Expr *E) const { 681 if (ExplodedNode *N = C.generateErrorNode(State)) { 682 const char *Msg = 683 "Bytes string function accesses uninitialized/garbage values"; 684 if (!BT_UninitRead) 685 BT_UninitRead.reset(new BugType(Filter.CheckNameCStringUninitializedRead, 686 "Accessing unitialized/garbage values")); 687 688 auto Report = 689 std::make_unique<PathSensitiveBugReport>(*BT_UninitRead, Msg, N); 690 Report->addRange(E->getSourceRange()); 691 bugreporter::trackExpressionValue(N, E, *Report); 692 C.emitReport(std::move(Report)); 693 } 694 } 695 696 void CStringChecker::emitOutOfBoundsBug(CheckerContext &C, 697 ProgramStateRef State, const Stmt *S, 698 StringRef WarningMsg) const { 699 if (ExplodedNode *N = C.generateErrorNode(State)) { 700 if (!BT_Bounds) 701 BT_Bounds.reset(new BugType(Filter.CheckCStringOutOfBounds 702 ? Filter.CheckNameCStringOutOfBounds 703 : Filter.CheckNameCStringNullArg, 704 "Out-of-bound array access")); 705 706 // FIXME: It would be nice to eventually make this diagnostic more clear, 707 // e.g., by referencing the original declaration or by saying *why* this 708 // reference is outside the range. 709 auto Report = 710 std::make_unique<PathSensitiveBugReport>(*BT_Bounds, WarningMsg, N); 711 Report->addRange(S->getSourceRange()); 712 C.emitReport(std::move(Report)); 713 } 714 } 715 716 void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State, 717 const Stmt *S, 718 StringRef WarningMsg) const { 719 if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) { 720 if (!BT_NotCString) { 721 // FIXME: This call uses the string constant 'categories::UnixAPI' as the 722 // description of the bug; it should be replaced by a real description. 723 BT_NotCString.reset( 724 new BugType(Filter.CheckNameCStringNotNullTerm, categories::UnixAPI)); 725 } 726 727 auto Report = 728 std::make_unique<PathSensitiveBugReport>(*BT_NotCString, WarningMsg, N); 729 730 Report->addRange(S->getSourceRange()); 731 C.emitReport(std::move(Report)); 732 } 733 } 734 735 void CStringChecker::emitAdditionOverflowBug(CheckerContext &C, 736 ProgramStateRef State) const { 737 if (ExplodedNode *N = C.generateErrorNode(State)) { 738 if (!BT_AdditionOverflow) { 739 // FIXME: This call uses the word "API" as the description of the bug; 740 // it should be replaced by a better error message (if this unlikely 741 // situation continues to exist as a separate bug type). 742 BT_AdditionOverflow.reset( 743 new BugType(Filter.CheckNameCStringOutOfBounds, "API")); 744 } 745 746 // This isn't a great error message, but this should never occur in real 747 // code anyway -- you'd have to create a buffer longer than a size_t can 748 // represent, which is sort of a contradiction. 749 const char *WarningMsg = 750 "This expression will create a string whose length is too big to " 751 "be represented as a size_t"; 752 753 auto Report = std::make_unique<PathSensitiveBugReport>(*BT_AdditionOverflow, 754 WarningMsg, N); 755 C.emitReport(std::move(Report)); 756 } 757 } 758 759 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C, 760 ProgramStateRef state, 761 NonLoc left, 762 NonLoc right) const { 763 // If out-of-bounds checking is turned off, skip the rest. 764 if (!Filter.CheckCStringOutOfBounds) 765 return state; 766 767 // If a previous check has failed, propagate the failure. 768 if (!state) 769 return nullptr; 770 771 SValBuilder &svalBuilder = C.getSValBuilder(); 772 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 773 774 QualType sizeTy = svalBuilder.getContext().getSizeType(); 775 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 776 NonLoc maxVal = svalBuilder.makeIntVal(maxValInt); 777 778 SVal maxMinusRight; 779 if (isa<nonloc::ConcreteInt>(right)) { 780 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right, 781 sizeTy); 782 } else { 783 // Try switching the operands. (The order of these two assignments is 784 // important!) 785 maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left, 786 sizeTy); 787 left = right; 788 } 789 790 if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) { 791 QualType cmpTy = svalBuilder.getConditionType(); 792 // If left > max - right, we have an overflow. 793 SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left, 794 *maxMinusRightNL, cmpTy); 795 796 ProgramStateRef stateOverflow, stateOkay; 797 std::tie(stateOverflow, stateOkay) = 798 state->assume(willOverflow.castAs<DefinedOrUnknownSVal>()); 799 800 if (stateOverflow && !stateOkay) { 801 // We have an overflow. Emit a bug report. 802 emitAdditionOverflowBug(C, stateOverflow); 803 return nullptr; 804 } 805 806 // From now on, assume an overflow didn't occur. 807 assert(stateOkay); 808 state = stateOkay; 809 } 810 811 return state; 812 } 813 814 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state, 815 const MemRegion *MR, 816 SVal strLength) { 817 assert(!strLength.isUndef() && "Attempt to set an undefined string length"); 818 819 MR = MR->StripCasts(); 820 821 switch (MR->getKind()) { 822 case MemRegion::StringRegionKind: 823 // FIXME: This can happen if we strcpy() into a string region. This is 824 // undefined [C99 6.4.5p6], but we should still warn about it. 825 return state; 826 827 case MemRegion::SymbolicRegionKind: 828 case MemRegion::AllocaRegionKind: 829 case MemRegion::NonParamVarRegionKind: 830 case MemRegion::ParamVarRegionKind: 831 case MemRegion::FieldRegionKind: 832 case MemRegion::ObjCIvarRegionKind: 833 // These are the types we can currently track string lengths for. 834 break; 835 836 case MemRegion::ElementRegionKind: 837 // FIXME: Handle element regions by upper-bounding the parent region's 838 // string length. 839 return state; 840 841 default: 842 // Other regions (mostly non-data) can't have a reliable C string length. 843 // For now, just ignore the change. 844 // FIXME: These are rare but not impossible. We should output some kind of 845 // warning for things like strcpy((char[]){'a', 0}, "b"); 846 return state; 847 } 848 849 if (strLength.isUnknown()) 850 return state->remove<CStringLength>(MR); 851 852 return state->set<CStringLength>(MR, strLength); 853 } 854 855 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C, 856 ProgramStateRef &state, 857 const Expr *Ex, 858 const MemRegion *MR, 859 bool hypothetical) { 860 if (!hypothetical) { 861 // If there's a recorded length, go ahead and return it. 862 const SVal *Recorded = state->get<CStringLength>(MR); 863 if (Recorded) 864 return *Recorded; 865 } 866 867 // Otherwise, get a new symbol and update the state. 868 SValBuilder &svalBuilder = C.getSValBuilder(); 869 QualType sizeTy = svalBuilder.getContext().getSizeType(); 870 SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(), 871 MR, Ex, sizeTy, 872 C.getLocationContext(), 873 C.blockCount()); 874 875 if (!hypothetical) { 876 if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) { 877 // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4 878 BasicValueFactory &BVF = svalBuilder.getBasicValueFactory(); 879 const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy); 880 llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4); 881 const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt, 882 fourInt); 883 NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt); 884 SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn, maxLength, 885 svalBuilder.getConditionType()); 886 state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true); 887 } 888 state = state->set<CStringLength>(MR, strLength); 889 } 890 891 return strLength; 892 } 893 894 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state, 895 const Expr *Ex, SVal Buf, 896 bool hypothetical) const { 897 const MemRegion *MR = Buf.getAsRegion(); 898 if (!MR) { 899 // If we can't get a region, see if it's something we /know/ isn't a 900 // C string. In the context of locations, the only time we can issue such 901 // a warning is for labels. 902 if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) { 903 if (Filter.CheckCStringNotNullTerm) { 904 SmallString<120> buf; 905 llvm::raw_svector_ostream os(buf); 906 assert(CurrentFunctionDescription); 907 os << "Argument to " << CurrentFunctionDescription 908 << " is the address of the label '" << Label->getLabel()->getName() 909 << "', which is not a null-terminated string"; 910 911 emitNotCStringBug(C, state, Ex, os.str()); 912 } 913 return UndefinedVal(); 914 } 915 916 // If it's not a region and not a label, give up. 917 return UnknownVal(); 918 } 919 920 // If we have a region, strip casts from it and see if we can figure out 921 // its length. For anything we can't figure out, just return UnknownVal. 922 MR = MR->StripCasts(); 923 924 switch (MR->getKind()) { 925 case MemRegion::StringRegionKind: { 926 // Modifying the contents of string regions is undefined [C99 6.4.5p6], 927 // so we can assume that the byte length is the correct C string length. 928 SValBuilder &svalBuilder = C.getSValBuilder(); 929 QualType sizeTy = svalBuilder.getContext().getSizeType(); 930 const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral(); 931 return svalBuilder.makeIntVal(strLit->getLength(), sizeTy); 932 } 933 case MemRegion::NonParamVarRegionKind: { 934 // If we have a global constant with a string literal initializer, 935 // compute the initializer's length. 936 const VarDecl *Decl = cast<NonParamVarRegion>(MR)->getDecl(); 937 if (Decl->getType().isConstQualified() && Decl->hasGlobalStorage()) { 938 if (const Expr *Init = Decl->getInit()) { 939 if (auto *StrLit = dyn_cast<StringLiteral>(Init)) { 940 SValBuilder &SvalBuilder = C.getSValBuilder(); 941 QualType SizeTy = SvalBuilder.getContext().getSizeType(); 942 return SvalBuilder.makeIntVal(StrLit->getLength(), SizeTy); 943 } 944 } 945 } 946 [[fallthrough]]; 947 } 948 case MemRegion::SymbolicRegionKind: 949 case MemRegion::AllocaRegionKind: 950 case MemRegion::ParamVarRegionKind: 951 case MemRegion::FieldRegionKind: 952 case MemRegion::ObjCIvarRegionKind: 953 return getCStringLengthForRegion(C, state, Ex, MR, hypothetical); 954 case MemRegion::CompoundLiteralRegionKind: 955 // FIXME: Can we track this? Is it necessary? 956 return UnknownVal(); 957 case MemRegion::ElementRegionKind: 958 // FIXME: How can we handle this? It's not good enough to subtract the 959 // offset from the base string length; consider "123\x00567" and &a[5]. 960 return UnknownVal(); 961 default: 962 // Other regions (mostly non-data) can't have a reliable C string length. 963 // In this case, an error is emitted and UndefinedVal is returned. 964 // The caller should always be prepared to handle this case. 965 if (Filter.CheckCStringNotNullTerm) { 966 SmallString<120> buf; 967 llvm::raw_svector_ostream os(buf); 968 969 assert(CurrentFunctionDescription); 970 os << "Argument to " << CurrentFunctionDescription << " is "; 971 972 if (SummarizeRegion(os, C.getASTContext(), MR)) 973 os << ", which is not a null-terminated string"; 974 else 975 os << "not a null-terminated string"; 976 977 emitNotCStringBug(C, state, Ex, os.str()); 978 } 979 return UndefinedVal(); 980 } 981 } 982 983 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C, 984 ProgramStateRef &state, const Expr *expr, SVal val) const { 985 986 // Get the memory region pointed to by the val. 987 const MemRegion *bufRegion = val.getAsRegion(); 988 if (!bufRegion) 989 return nullptr; 990 991 // Strip casts off the memory region. 992 bufRegion = bufRegion->StripCasts(); 993 994 // Cast the memory region to a string region. 995 const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion); 996 if (!strRegion) 997 return nullptr; 998 999 // Return the actual string in the string region. 1000 return strRegion->getStringLiteral(); 1001 } 1002 1003 bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State, 1004 SVal BufVal, QualType BufTy, 1005 SVal LengthVal, QualType LengthTy) { 1006 // If we do not know that the buffer is long enough we return 'true'. 1007 // Otherwise the parent region of this field region would also get 1008 // invalidated, which would lead to warnings based on an unknown state. 1009 1010 if (LengthVal.isUnknown()) 1011 return false; 1012 1013 // Originally copied from CheckBufferAccess and CheckLocation. 1014 SValBuilder &SB = C.getSValBuilder(); 1015 ASTContext &Ctx = C.getASTContext(); 1016 1017 QualType PtrTy = Ctx.getPointerType(Ctx.CharTy); 1018 1019 std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>(); 1020 if (!Length) 1021 return true; // cf top comment. 1022 1023 // Compute the offset of the last element to be accessed: size-1. 1024 NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>(); 1025 SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy); 1026 if (Offset.isUnknown()) 1027 return true; // cf top comment 1028 NonLoc LastOffset = Offset.castAs<NonLoc>(); 1029 1030 // Check that the first buffer is sufficiently long. 1031 SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy); 1032 std::optional<Loc> BufLoc = BufStart.getAs<Loc>(); 1033 if (!BufLoc) 1034 return true; // cf top comment. 1035 1036 SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy); 1037 1038 // Check for out of bound array element access. 1039 const MemRegion *R = BufEnd.getAsRegion(); 1040 if (!R) 1041 return true; // cf top comment. 1042 1043 const ElementRegion *ER = dyn_cast<ElementRegion>(R); 1044 if (!ER) 1045 return true; // cf top comment. 1046 1047 // FIXME: Does this crash when a non-standard definition 1048 // of a library function is encountered? 1049 assert(ER->getValueType() == C.getASTContext().CharTy && 1050 "isFirstBufInBound should only be called with char* ElementRegions"); 1051 1052 // Get the size of the array. 1053 const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion()); 1054 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB); 1055 1056 // Get the index of the accessed element. 1057 DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>(); 1058 1059 ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true); 1060 1061 return static_cast<bool>(StInBound); 1062 } 1063 1064 ProgramStateRef CStringChecker::invalidateDestinationBufferBySize( 1065 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV, 1066 SVal SizeV, QualType SizeTy) { 1067 auto InvalidationTraitOperations = 1068 [&C, S, BufTy = BufE->getType(), BufV, SizeV, 1069 SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { 1070 // If destination buffer is a field region and access is in bound, do 1071 // not invalidate its super region. 1072 if (MemRegion::FieldRegionKind == R->getKind() && 1073 isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) { 1074 ITraits.setTrait( 1075 R, 1076 RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); 1077 } 1078 return false; 1079 }; 1080 1081 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); 1082 } 1083 1084 ProgramStateRef 1085 CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion( 1086 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) { 1087 auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &, 1088 const MemRegion *R) { 1089 return isa<FieldRegion>(R); 1090 }; 1091 1092 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); 1093 } 1094 1095 ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows( 1096 CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) { 1097 auto InvalidationTraitOperations = 1098 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { 1099 if (MemRegion::FieldRegionKind == R->getKind()) 1100 ITraits.setTrait( 1101 R, 1102 RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion); 1103 return false; 1104 }; 1105 1106 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); 1107 } 1108 1109 ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C, 1110 ProgramStateRef S, 1111 const Expr *BufE, 1112 SVal BufV) { 1113 auto InvalidationTraitOperations = 1114 [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) { 1115 ITraits.setTrait( 1116 R->getBaseRegion(), 1117 RegionAndSymbolInvalidationTraits::TK_PreserveContents); 1118 ITraits.setTrait(R, 1119 RegionAndSymbolInvalidationTraits::TK_SuppressEscape); 1120 return true; 1121 }; 1122 1123 return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations); 1124 } 1125 1126 ProgramStateRef CStringChecker::invalidateBufferAux( 1127 CheckerContext &C, ProgramStateRef State, const Expr *E, SVal V, 1128 llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &, 1129 const MemRegion *)> 1130 InvalidationTraitOperations) { 1131 std::optional<Loc> L = V.getAs<Loc>(); 1132 if (!L) 1133 return State; 1134 1135 // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes 1136 // some assumptions about the value that CFRefCount can't. Even so, it should 1137 // probably be refactored. 1138 if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) { 1139 const MemRegion *R = MR->getRegion()->StripCasts(); 1140 1141 // Are we dealing with an ElementRegion? If so, we should be invalidating 1142 // the super-region. 1143 if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) { 1144 R = ER->getSuperRegion(); 1145 // FIXME: What about layers of ElementRegions? 1146 } 1147 1148 // Invalidate this region. 1149 const LocationContext *LCtx = C.getPredecessor()->getLocationContext(); 1150 RegionAndSymbolInvalidationTraits ITraits; 1151 bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R); 1152 1153 return State->invalidateRegions(R, E, C.blockCount(), LCtx, 1154 CausesPointerEscape, nullptr, nullptr, 1155 &ITraits); 1156 } 1157 1158 // If we have a non-region value by chance, just remove the binding. 1159 // FIXME: is this necessary or correct? This handles the non-Region 1160 // cases. Is it ever valid to store to these? 1161 return State->killBinding(*L); 1162 } 1163 1164 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx, 1165 const MemRegion *MR) { 1166 switch (MR->getKind()) { 1167 case MemRegion::FunctionCodeRegionKind: { 1168 if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl()) 1169 os << "the address of the function '" << *FD << '\''; 1170 else 1171 os << "the address of a function"; 1172 return true; 1173 } 1174 case MemRegion::BlockCodeRegionKind: 1175 os << "block text"; 1176 return true; 1177 case MemRegion::BlockDataRegionKind: 1178 os << "a block"; 1179 return true; 1180 case MemRegion::CXXThisRegionKind: 1181 case MemRegion::CXXTempObjectRegionKind: 1182 os << "a C++ temp object of type " 1183 << cast<TypedValueRegion>(MR)->getValueType(); 1184 return true; 1185 case MemRegion::NonParamVarRegionKind: 1186 os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType(); 1187 return true; 1188 case MemRegion::ParamVarRegionKind: 1189 os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType(); 1190 return true; 1191 case MemRegion::FieldRegionKind: 1192 os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType(); 1193 return true; 1194 case MemRegion::ObjCIvarRegionKind: 1195 os << "an instance variable of type " 1196 << cast<TypedValueRegion>(MR)->getValueType(); 1197 return true; 1198 default: 1199 return false; 1200 } 1201 } 1202 1203 bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal, 1204 const Expr *Size, CheckerContext &C, 1205 ProgramStateRef &State) { 1206 SVal MemVal = C.getSVal(DstBuffer); 1207 SVal SizeVal = C.getSVal(Size); 1208 const MemRegion *MR = MemVal.getAsRegion(); 1209 if (!MR) 1210 return false; 1211 1212 // We're about to model memset by producing a "default binding" in the Store. 1213 // Our current implementation - RegionStore - doesn't support default bindings 1214 // that don't cover the whole base region. So we should first get the offset 1215 // and the base region to figure out whether the offset of buffer is 0. 1216 RegionOffset Offset = MR->getAsOffset(); 1217 const MemRegion *BR = Offset.getRegion(); 1218 1219 std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>(); 1220 if (!SizeNL) 1221 return false; 1222 1223 SValBuilder &svalBuilder = C.getSValBuilder(); 1224 ASTContext &Ctx = C.getASTContext(); 1225 1226 // void *memset(void *dest, int ch, size_t count); 1227 // For now we can only handle the case of offset is 0 and concrete char value. 1228 if (Offset.isValid() && !Offset.hasSymbolicOffset() && 1229 Offset.getOffset() == 0) { 1230 // Get the base region's size. 1231 DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder); 1232 1233 ProgramStateRef StateWholeReg, StateNotWholeReg; 1234 std::tie(StateWholeReg, StateNotWholeReg) = 1235 State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL)); 1236 1237 // With the semantic of 'memset()', we should convert the CharVal to 1238 // unsigned char. 1239 CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy); 1240 1241 ProgramStateRef StateNullChar, StateNonNullChar; 1242 std::tie(StateNullChar, StateNonNullChar) = 1243 assumeZero(C, State, CharVal, Ctx.UnsignedCharTy); 1244 1245 if (StateWholeReg && !StateNotWholeReg && StateNullChar && 1246 !StateNonNullChar) { 1247 // If the 'memset()' acts on the whole region of destination buffer and 1248 // the value of the second argument of 'memset()' is zero, bind the second 1249 // argument's value to the destination buffer with 'default binding'. 1250 // FIXME: Since there is no perfect way to bind the non-zero character, we 1251 // can only deal with zero value here. In the future, we need to deal with 1252 // the binding of non-zero value in the case of whole region. 1253 State = State->bindDefaultZero(svalBuilder.makeLoc(BR), 1254 C.getLocationContext()); 1255 } else { 1256 // If the destination buffer's extent is not equal to the value of 1257 // third argument, just invalidate buffer. 1258 State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal, 1259 SizeVal, Size->getType()); 1260 } 1261 1262 if (StateNullChar && !StateNonNullChar) { 1263 // If the value of the second argument of 'memset()' is zero, set the 1264 // string length of destination buffer to 0 directly. 1265 State = setCStringLength(State, MR, 1266 svalBuilder.makeZeroVal(Ctx.getSizeType())); 1267 } else if (!StateNullChar && StateNonNullChar) { 1268 SVal NewStrLen = svalBuilder.getMetadataSymbolVal( 1269 CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(), 1270 C.getLocationContext(), C.blockCount()); 1271 1272 // If the value of second argument is not zero, then the string length 1273 // is at least the size argument. 1274 SVal NewStrLenGESize = svalBuilder.evalBinOp( 1275 State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType()); 1276 1277 State = setCStringLength( 1278 State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true), 1279 MR, NewStrLen); 1280 } 1281 } else { 1282 // If the offset is not zero and char value is not concrete, we can do 1283 // nothing but invalidate the buffer. 1284 State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal, 1285 SizeVal, Size->getType()); 1286 } 1287 return true; 1288 } 1289 1290 //===----------------------------------------------------------------------===// 1291 // evaluation of individual function calls. 1292 //===----------------------------------------------------------------------===// 1293 1294 void CStringChecker::evalCopyCommon(CheckerContext &C, const CallExpr *CE, 1295 ProgramStateRef state, SizeArgExpr Size, 1296 DestinationArgExpr Dest, 1297 SourceArgExpr Source, bool Restricted, 1298 bool IsMempcpy, CharKind CK) const { 1299 CurrentFunctionDescription = "memory copy function"; 1300 1301 // See if the size argument is zero. 1302 const LocationContext *LCtx = C.getLocationContext(); 1303 SVal sizeVal = state->getSVal(Size.Expression, LCtx); 1304 QualType sizeTy = Size.Expression->getType(); 1305 1306 ProgramStateRef stateZeroSize, stateNonZeroSize; 1307 std::tie(stateZeroSize, stateNonZeroSize) = 1308 assumeZero(C, state, sizeVal, sizeTy); 1309 1310 // Get the value of the Dest. 1311 SVal destVal = state->getSVal(Dest.Expression, LCtx); 1312 1313 // If the size is zero, there won't be any actual memory access, so 1314 // just bind the return value to the destination buffer and return. 1315 if (stateZeroSize && !stateNonZeroSize) { 1316 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal); 1317 C.addTransition(stateZeroSize); 1318 return; 1319 } 1320 1321 // If the size can be nonzero, we have to check the other arguments. 1322 if (stateNonZeroSize) { 1323 state = stateNonZeroSize; 1324 1325 // Ensure the destination is not null. If it is NULL there will be a 1326 // NULL pointer dereference. 1327 state = checkNonNull(C, state, Dest, destVal); 1328 if (!state) 1329 return; 1330 1331 // Get the value of the Src. 1332 SVal srcVal = state->getSVal(Source.Expression, LCtx); 1333 1334 // Ensure the source is not null. If it is NULL there will be a 1335 // NULL pointer dereference. 1336 state = checkNonNull(C, state, Source, srcVal); 1337 if (!state) 1338 return; 1339 1340 // Ensure the accesses are valid and that the buffers do not overlap. 1341 state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK); 1342 state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK); 1343 1344 if (Restricted) 1345 state = CheckOverlap(C, state, Size, Dest, Source, CK); 1346 1347 if (!state) 1348 return; 1349 1350 // If this is mempcpy, get the byte after the last byte copied and 1351 // bind the expr. 1352 if (IsMempcpy) { 1353 // Get the byte after the last byte copied. 1354 SValBuilder &SvalBuilder = C.getSValBuilder(); 1355 ASTContext &Ctx = SvalBuilder.getContext(); 1356 QualType CharPtrTy = getCharPtrType(Ctx, CK); 1357 SVal DestRegCharVal = 1358 SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType()); 1359 SVal lastElement = C.getSValBuilder().evalBinOp( 1360 state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType()); 1361 // If we don't know how much we copied, we can at least 1362 // conjure a return value for later. 1363 if (lastElement.isUnknown()) 1364 lastElement = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1365 C.blockCount()); 1366 1367 // The byte after the last byte copied is the return value. 1368 state = state->BindExpr(CE, LCtx, lastElement); 1369 } else { 1370 // All other copies return the destination buffer. 1371 // (Well, bcopy() has a void return type, but this won't hurt.) 1372 state = state->BindExpr(CE, LCtx, destVal); 1373 } 1374 1375 // Invalidate the destination (regular invalidation without pointer-escaping 1376 // the address of the top-level region). 1377 // FIXME: Even if we can't perfectly model the copy, we should see if we 1378 // can use LazyCompoundVals to copy the source values into the destination. 1379 // This would probably remove any existing bindings past the end of the 1380 // copied region, but that's still an improvement over blank invalidation. 1381 state = invalidateDestinationBufferBySize( 1382 C, state, Dest.Expression, C.getSVal(Dest.Expression), sizeVal, 1383 Size.Expression->getType()); 1384 1385 // Invalidate the source (const-invalidation without const-pointer-escaping 1386 // the address of the top-level region). 1387 state = invalidateSourceBuffer(C, state, Source.Expression, 1388 C.getSVal(Source.Expression)); 1389 1390 C.addTransition(state); 1391 } 1392 } 1393 1394 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE, 1395 CharKind CK) const { 1396 // void *memcpy(void *restrict dst, const void *restrict src, size_t n); 1397 // The return value is the address of the destination buffer. 1398 DestinationArgExpr Dest = {{CE->getArg(0), 0}}; 1399 SourceArgExpr Src = {{CE->getArg(1), 1}}; 1400 SizeArgExpr Size = {{CE->getArg(2), 2}}; 1401 1402 ProgramStateRef State = C.getState(); 1403 1404 constexpr bool IsRestricted = true; 1405 constexpr bool IsMempcpy = false; 1406 evalCopyCommon(C, CE, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK); 1407 } 1408 1409 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE, 1410 CharKind CK) const { 1411 // void *mempcpy(void *restrict dst, const void *restrict src, size_t n); 1412 // The return value is a pointer to the byte following the last written byte. 1413 DestinationArgExpr Dest = {{CE->getArg(0), 0}}; 1414 SourceArgExpr Src = {{CE->getArg(1), 1}}; 1415 SizeArgExpr Size = {{CE->getArg(2), 2}}; 1416 1417 constexpr bool IsRestricted = true; 1418 constexpr bool IsMempcpy = true; 1419 evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy, 1420 CK); 1421 } 1422 1423 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE, 1424 CharKind CK) const { 1425 // void *memmove(void *dst, const void *src, size_t n); 1426 // The return value is the address of the destination buffer. 1427 DestinationArgExpr Dest = {{CE->getArg(0), 0}}; 1428 SourceArgExpr Src = {{CE->getArg(1), 1}}; 1429 SizeArgExpr Size = {{CE->getArg(2), 2}}; 1430 1431 constexpr bool IsRestricted = false; 1432 constexpr bool IsMempcpy = false; 1433 evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy, 1434 CK); 1435 } 1436 1437 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const { 1438 // void bcopy(const void *src, void *dst, size_t n); 1439 SourceArgExpr Src{{CE->getArg(0), 0}}; 1440 DestinationArgExpr Dest = {{CE->getArg(1), 1}}; 1441 SizeArgExpr Size = {{CE->getArg(2), 2}}; 1442 1443 constexpr bool IsRestricted = false; 1444 constexpr bool IsMempcpy = false; 1445 evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy, 1446 CharKind::Regular); 1447 } 1448 1449 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE, 1450 CharKind CK) const { 1451 // int memcmp(const void *s1, const void *s2, size_t n); 1452 CurrentFunctionDescription = "memory comparison function"; 1453 1454 AnyArgExpr Left = {CE->getArg(0), 0}; 1455 AnyArgExpr Right = {CE->getArg(1), 1}; 1456 SizeArgExpr Size = {{CE->getArg(2), 2}}; 1457 1458 ProgramStateRef State = C.getState(); 1459 SValBuilder &Builder = C.getSValBuilder(); 1460 const LocationContext *LCtx = C.getLocationContext(); 1461 1462 // See if the size argument is zero. 1463 SVal sizeVal = State->getSVal(Size.Expression, LCtx); 1464 QualType sizeTy = Size.Expression->getType(); 1465 1466 ProgramStateRef stateZeroSize, stateNonZeroSize; 1467 std::tie(stateZeroSize, stateNonZeroSize) = 1468 assumeZero(C, State, sizeVal, sizeTy); 1469 1470 // If the size can be zero, the result will be 0 in that case, and we don't 1471 // have to check either of the buffers. 1472 if (stateZeroSize) { 1473 State = stateZeroSize; 1474 State = State->BindExpr(CE, LCtx, Builder.makeZeroVal(CE->getType())); 1475 C.addTransition(State); 1476 } 1477 1478 // If the size can be nonzero, we have to check the other arguments. 1479 if (stateNonZeroSize) { 1480 State = stateNonZeroSize; 1481 // If we know the two buffers are the same, we know the result is 0. 1482 // First, get the two buffers' addresses. Another checker will have already 1483 // made sure they're not undefined. 1484 DefinedOrUnknownSVal LV = 1485 State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>(); 1486 DefinedOrUnknownSVal RV = 1487 State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>(); 1488 1489 // See if they are the same. 1490 ProgramStateRef SameBuffer, NotSameBuffer; 1491 std::tie(SameBuffer, NotSameBuffer) = 1492 State->assume(Builder.evalEQ(State, LV, RV)); 1493 1494 // If the two arguments are the same buffer, we know the result is 0, 1495 // and we only need to check one size. 1496 if (SameBuffer && !NotSameBuffer) { 1497 State = SameBuffer; 1498 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read); 1499 if (State) { 1500 State = 1501 SameBuffer->BindExpr(CE, LCtx, Builder.makeZeroVal(CE->getType())); 1502 C.addTransition(State); 1503 } 1504 return; 1505 } 1506 1507 // If the two arguments might be different buffers, we have to check 1508 // the size of both of them. 1509 assert(NotSameBuffer); 1510 State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK); 1511 State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK); 1512 if (State) { 1513 // The return value is the comparison result, which we don't know. 1514 SVal CmpV = Builder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 1515 State = State->BindExpr(CE, LCtx, CmpV); 1516 C.addTransition(State); 1517 } 1518 } 1519 } 1520 1521 void CStringChecker::evalstrLength(CheckerContext &C, 1522 const CallExpr *CE) const { 1523 // size_t strlen(const char *s); 1524 evalstrLengthCommon(C, CE, /* IsStrnlen = */ false); 1525 } 1526 1527 void CStringChecker::evalstrnLength(CheckerContext &C, 1528 const CallExpr *CE) const { 1529 // size_t strnlen(const char *s, size_t maxlen); 1530 evalstrLengthCommon(C, CE, /* IsStrnlen = */ true); 1531 } 1532 1533 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE, 1534 bool IsStrnlen) const { 1535 CurrentFunctionDescription = "string length function"; 1536 ProgramStateRef state = C.getState(); 1537 const LocationContext *LCtx = C.getLocationContext(); 1538 1539 if (IsStrnlen) { 1540 const Expr *maxlenExpr = CE->getArg(1); 1541 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 1542 1543 ProgramStateRef stateZeroSize, stateNonZeroSize; 1544 std::tie(stateZeroSize, stateNonZeroSize) = 1545 assumeZero(C, state, maxlenVal, maxlenExpr->getType()); 1546 1547 // If the size can be zero, the result will be 0 in that case, and we don't 1548 // have to check the string itself. 1549 if (stateZeroSize) { 1550 SVal zero = C.getSValBuilder().makeZeroVal(CE->getType()); 1551 stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero); 1552 C.addTransition(stateZeroSize); 1553 } 1554 1555 // If the size is GUARANTEED to be zero, we're done! 1556 if (!stateNonZeroSize) 1557 return; 1558 1559 // Otherwise, record the assumption that the size is nonzero. 1560 state = stateNonZeroSize; 1561 } 1562 1563 // Check that the string argument is non-null. 1564 AnyArgExpr Arg = {CE->getArg(0), 0}; 1565 SVal ArgVal = state->getSVal(Arg.Expression, LCtx); 1566 state = checkNonNull(C, state, Arg, ArgVal); 1567 1568 if (!state) 1569 return; 1570 1571 SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal); 1572 1573 // If the argument isn't a valid C string, there's no valid state to 1574 // transition to. 1575 if (strLength.isUndef()) 1576 return; 1577 1578 DefinedOrUnknownSVal result = UnknownVal(); 1579 1580 // If the check is for strnlen() then bind the return value to no more than 1581 // the maxlen value. 1582 if (IsStrnlen) { 1583 QualType cmpTy = C.getSValBuilder().getConditionType(); 1584 1585 // It's a little unfortunate to be getting this again, 1586 // but it's not that expensive... 1587 const Expr *maxlenExpr = CE->getArg(1); 1588 SVal maxlenVal = state->getSVal(maxlenExpr, LCtx); 1589 1590 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); 1591 std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>(); 1592 1593 if (strLengthNL && maxlenValNL) { 1594 ProgramStateRef stateStringTooLong, stateStringNotTooLong; 1595 1596 // Check if the strLength is greater than the maxlen. 1597 std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume( 1598 C.getSValBuilder() 1599 .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy) 1600 .castAs<DefinedOrUnknownSVal>()); 1601 1602 if (stateStringTooLong && !stateStringNotTooLong) { 1603 // If the string is longer than maxlen, return maxlen. 1604 result = *maxlenValNL; 1605 } else if (stateStringNotTooLong && !stateStringTooLong) { 1606 // If the string is shorter than maxlen, return its length. 1607 result = *strLengthNL; 1608 } 1609 } 1610 1611 if (result.isUnknown()) { 1612 // If we don't have enough information for a comparison, there's 1613 // no guarantee the full string length will actually be returned. 1614 // All we know is the return value is the min of the string length 1615 // and the limit. This is better than nothing. 1616 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1617 C.blockCount()); 1618 NonLoc resultNL = result.castAs<NonLoc>(); 1619 1620 if (strLengthNL) { 1621 state = state->assume(C.getSValBuilder().evalBinOpNN( 1622 state, BO_LE, resultNL, *strLengthNL, cmpTy) 1623 .castAs<DefinedOrUnknownSVal>(), true); 1624 } 1625 1626 if (maxlenValNL) { 1627 state = state->assume(C.getSValBuilder().evalBinOpNN( 1628 state, BO_LE, resultNL, *maxlenValNL, cmpTy) 1629 .castAs<DefinedOrUnknownSVal>(), true); 1630 } 1631 } 1632 1633 } else { 1634 // This is a plain strlen(), not strnlen(). 1635 result = strLength.castAs<DefinedOrUnknownSVal>(); 1636 1637 // If we don't know the length of the string, conjure a return 1638 // value, so it can be used in constraints, at least. 1639 if (result.isUnknown()) { 1640 result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx, 1641 C.blockCount()); 1642 } 1643 } 1644 1645 // Bind the return value. 1646 assert(!result.isUnknown() && "Should have conjured a value by now"); 1647 state = state->BindExpr(CE, LCtx, result); 1648 C.addTransition(state); 1649 } 1650 1651 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const { 1652 // char *strcpy(char *restrict dst, const char *restrict src); 1653 evalStrcpyCommon(C, CE, 1654 /* ReturnEnd = */ false, 1655 /* IsBounded = */ false, 1656 /* appendK = */ ConcatFnKind::none); 1657 } 1658 1659 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const { 1660 // char *strncpy(char *restrict dst, const char *restrict src, size_t n); 1661 evalStrcpyCommon(C, CE, 1662 /* ReturnEnd = */ false, 1663 /* IsBounded = */ true, 1664 /* appendK = */ ConcatFnKind::none); 1665 } 1666 1667 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const { 1668 // char *stpcpy(char *restrict dst, const char *restrict src); 1669 evalStrcpyCommon(C, CE, 1670 /* ReturnEnd = */ true, 1671 /* IsBounded = */ false, 1672 /* appendK = */ ConcatFnKind::none); 1673 } 1674 1675 void CStringChecker::evalStrlcpy(CheckerContext &C, const CallExpr *CE) const { 1676 // size_t strlcpy(char *dest, const char *src, size_t size); 1677 evalStrcpyCommon(C, CE, 1678 /* ReturnEnd = */ true, 1679 /* IsBounded = */ true, 1680 /* appendK = */ ConcatFnKind::none, 1681 /* returnPtr = */ false); 1682 } 1683 1684 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const { 1685 // char *strcat(char *restrict s1, const char *restrict s2); 1686 evalStrcpyCommon(C, CE, 1687 /* ReturnEnd = */ false, 1688 /* IsBounded = */ false, 1689 /* appendK = */ ConcatFnKind::strcat); 1690 } 1691 1692 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const { 1693 // char *strncat(char *restrict s1, const char *restrict s2, size_t n); 1694 evalStrcpyCommon(C, CE, 1695 /* ReturnEnd = */ false, 1696 /* IsBounded = */ true, 1697 /* appendK = */ ConcatFnKind::strcat); 1698 } 1699 1700 void CStringChecker::evalStrlcat(CheckerContext &C, const CallExpr *CE) const { 1701 // size_t strlcat(char *dst, const char *src, size_t size); 1702 // It will append at most size - strlen(dst) - 1 bytes, 1703 // NULL-terminating the result. 1704 evalStrcpyCommon(C, CE, 1705 /* ReturnEnd = */ false, 1706 /* IsBounded = */ true, 1707 /* appendK = */ ConcatFnKind::strlcat, 1708 /* returnPtr = */ false); 1709 } 1710 1711 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, 1712 bool ReturnEnd, bool IsBounded, 1713 ConcatFnKind appendK, 1714 bool returnPtr) const { 1715 if (appendK == ConcatFnKind::none) 1716 CurrentFunctionDescription = "string copy function"; 1717 else 1718 CurrentFunctionDescription = "string concatenation function"; 1719 1720 ProgramStateRef state = C.getState(); 1721 const LocationContext *LCtx = C.getLocationContext(); 1722 1723 // Check that the destination is non-null. 1724 DestinationArgExpr Dst = {{CE->getArg(0), 0}}; 1725 SVal DstVal = state->getSVal(Dst.Expression, LCtx); 1726 state = checkNonNull(C, state, Dst, DstVal); 1727 if (!state) 1728 return; 1729 1730 // Check that the source is non-null. 1731 SourceArgExpr srcExpr = {{CE->getArg(1), 1}}; 1732 SVal srcVal = state->getSVal(srcExpr.Expression, LCtx); 1733 state = checkNonNull(C, state, srcExpr, srcVal); 1734 if (!state) 1735 return; 1736 1737 // Get the string length of the source. 1738 SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal); 1739 std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>(); 1740 1741 // Get the string length of the destination buffer. 1742 SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal); 1743 std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>(); 1744 1745 // If the source isn't a valid C string, give up. 1746 if (strLength.isUndef()) 1747 return; 1748 1749 SValBuilder &svalBuilder = C.getSValBuilder(); 1750 QualType cmpTy = svalBuilder.getConditionType(); 1751 QualType sizeTy = svalBuilder.getContext().getSizeType(); 1752 1753 // These two values allow checking two kinds of errors: 1754 // - actual overflows caused by a source that doesn't fit in the destination 1755 // - potential overflows caused by a bound that could exceed the destination 1756 SVal amountCopied = UnknownVal(); 1757 SVal maxLastElementIndex = UnknownVal(); 1758 const char *boundWarning = nullptr; 1759 1760 // FIXME: Why do we choose the srcExpr if the access has no size? 1761 // Note that the 3rd argument of the call would be the size parameter. 1762 SizeArgExpr SrcExprAsSizeDummy = { 1763 {srcExpr.Expression, srcExpr.ArgumentIndex}}; 1764 state = CheckOverlap( 1765 C, state, 1766 (IsBounded ? SizeArgExpr{{CE->getArg(2), 2}} : SrcExprAsSizeDummy), Dst, 1767 srcExpr); 1768 1769 if (!state) 1770 return; 1771 1772 // If the function is strncpy, strncat, etc... it is bounded. 1773 if (IsBounded) { 1774 // Get the max number of characters to copy. 1775 SizeArgExpr lenExpr = {{CE->getArg(2), 2}}; 1776 SVal lenVal = state->getSVal(lenExpr.Expression, LCtx); 1777 1778 // Protect against misdeclared strncpy(). 1779 lenVal = 1780 svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType()); 1781 1782 std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>(); 1783 1784 // If we know both values, we might be able to figure out how much 1785 // we're copying. 1786 if (strLengthNL && lenValNL) { 1787 switch (appendK) { 1788 case ConcatFnKind::none: 1789 case ConcatFnKind::strcat: { 1790 ProgramStateRef stateSourceTooLong, stateSourceNotTooLong; 1791 // Check if the max number to copy is less than the length of the src. 1792 // If the bound is equal to the source length, strncpy won't null- 1793 // terminate the result! 1794 std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume( 1795 svalBuilder 1796 .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy) 1797 .castAs<DefinedOrUnknownSVal>()); 1798 1799 if (stateSourceTooLong && !stateSourceNotTooLong) { 1800 // Max number to copy is less than the length of the src, so the 1801 // actual strLength copied is the max number arg. 1802 state = stateSourceTooLong; 1803 amountCopied = lenVal; 1804 1805 } else if (!stateSourceTooLong && stateSourceNotTooLong) { 1806 // The source buffer entirely fits in the bound. 1807 state = stateSourceNotTooLong; 1808 amountCopied = strLength; 1809 } 1810 break; 1811 } 1812 case ConcatFnKind::strlcat: 1813 if (!dstStrLengthNL) 1814 return; 1815 1816 // amountCopied = min (size - dstLen - 1 , srcLen) 1817 SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, 1818 *dstStrLengthNL, sizeTy); 1819 if (!isa<NonLoc>(freeSpace)) 1820 return; 1821 freeSpace = 1822 svalBuilder.evalBinOp(state, BO_Sub, freeSpace, 1823 svalBuilder.makeIntVal(1, sizeTy), sizeTy); 1824 std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>(); 1825 1826 // While unlikely, it is possible that the subtraction is 1827 // too complex to compute, let's check whether it succeeded. 1828 if (!freeSpaceNL) 1829 return; 1830 SVal hasEnoughSpace = svalBuilder.evalBinOpNN( 1831 state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy); 1832 1833 ProgramStateRef TrueState, FalseState; 1834 std::tie(TrueState, FalseState) = 1835 state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>()); 1836 1837 // srcStrLength <= size - dstStrLength -1 1838 if (TrueState && !FalseState) { 1839 amountCopied = strLength; 1840 } 1841 1842 // srcStrLength > size - dstStrLength -1 1843 if (!TrueState && FalseState) { 1844 amountCopied = freeSpace; 1845 } 1846 1847 if (TrueState && FalseState) 1848 amountCopied = UnknownVal(); 1849 break; 1850 } 1851 } 1852 // We still want to know if the bound is known to be too large. 1853 if (lenValNL) { 1854 switch (appendK) { 1855 case ConcatFnKind::strcat: 1856 // For strncat, the check is strlen(dst) + lenVal < sizeof(dst) 1857 1858 // Get the string length of the destination. If the destination is 1859 // memory that can't have a string length, we shouldn't be copying 1860 // into it anyway. 1861 if (dstStrLength.isUndef()) 1862 return; 1863 1864 if (dstStrLengthNL) { 1865 maxLastElementIndex = svalBuilder.evalBinOpNN( 1866 state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy); 1867 1868 boundWarning = "Size argument is greater than the free space in the " 1869 "destination buffer"; 1870 } 1871 break; 1872 case ConcatFnKind::none: 1873 case ConcatFnKind::strlcat: 1874 // For strncpy and strlcat, this is just checking 1875 // that lenVal <= sizeof(dst). 1876 // (Yes, strncpy and strncat differ in how they treat termination. 1877 // strncat ALWAYS terminates, but strncpy doesn't.) 1878 1879 // We need a special case for when the copy size is zero, in which 1880 // case strncpy will do no work at all. Our bounds check uses n-1 1881 // as the last element accessed, so n == 0 is problematic. 1882 ProgramStateRef StateZeroSize, StateNonZeroSize; 1883 std::tie(StateZeroSize, StateNonZeroSize) = 1884 assumeZero(C, state, *lenValNL, sizeTy); 1885 1886 // If the size is known to be zero, we're done. 1887 if (StateZeroSize && !StateNonZeroSize) { 1888 if (returnPtr) { 1889 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal); 1890 } else { 1891 if (appendK == ConcatFnKind::none) { 1892 // strlcpy returns strlen(src) 1893 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, strLength); 1894 } else { 1895 // strlcat returns strlen(src) + strlen(dst) 1896 SVal retSize = svalBuilder.evalBinOp( 1897 state, BO_Add, strLength, dstStrLength, sizeTy); 1898 StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, retSize); 1899 } 1900 } 1901 C.addTransition(StateZeroSize); 1902 return; 1903 } 1904 1905 // Otherwise, go ahead and figure out the last element we'll touch. 1906 // We don't record the non-zero assumption here because we can't 1907 // be sure. We won't warn on a possible zero. 1908 NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>(); 1909 maxLastElementIndex = 1910 svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy); 1911 boundWarning = "Size argument is greater than the length of the " 1912 "destination buffer"; 1913 break; 1914 } 1915 } 1916 } else { 1917 // The function isn't bounded. The amount copied should match the length 1918 // of the source buffer. 1919 amountCopied = strLength; 1920 } 1921 1922 assert(state); 1923 1924 // This represents the number of characters copied into the destination 1925 // buffer. (It may not actually be the strlen if the destination buffer 1926 // is not terminated.) 1927 SVal finalStrLength = UnknownVal(); 1928 SVal strlRetVal = UnknownVal(); 1929 1930 if (appendK == ConcatFnKind::none && !returnPtr) { 1931 // strlcpy returns the sizeof(src) 1932 strlRetVal = strLength; 1933 } 1934 1935 // If this is an appending function (strcat, strncat...) then set the 1936 // string length to strlen(src) + strlen(dst) since the buffer will 1937 // ultimately contain both. 1938 if (appendK != ConcatFnKind::none) { 1939 // Get the string length of the destination. If the destination is memory 1940 // that can't have a string length, we shouldn't be copying into it anyway. 1941 if (dstStrLength.isUndef()) 1942 return; 1943 1944 if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) { 1945 strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL, 1946 *dstStrLengthNL, sizeTy); 1947 } 1948 1949 std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>(); 1950 1951 // If we know both string lengths, we might know the final string length. 1952 if (amountCopiedNL && dstStrLengthNL) { 1953 // Make sure the two lengths together don't overflow a size_t. 1954 state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL); 1955 if (!state) 1956 return; 1957 1958 finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL, 1959 *dstStrLengthNL, sizeTy); 1960 } 1961 1962 // If we couldn't get a single value for the final string length, 1963 // we can at least bound it by the individual lengths. 1964 if (finalStrLength.isUnknown()) { 1965 // Try to get a "hypothetical" string length symbol, which we can later 1966 // set as a real value if that turns out to be the case. 1967 finalStrLength = getCStringLength(C, state, CE, DstVal, true); 1968 assert(!finalStrLength.isUndef()); 1969 1970 if (std::optional<NonLoc> finalStrLengthNL = 1971 finalStrLength.getAs<NonLoc>()) { 1972 if (amountCopiedNL && appendK == ConcatFnKind::none) { 1973 // we overwrite dst string with the src 1974 // finalStrLength >= srcStrLength 1975 SVal sourceInResult = svalBuilder.evalBinOpNN( 1976 state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy); 1977 state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(), 1978 true); 1979 if (!state) 1980 return; 1981 } 1982 1983 if (dstStrLengthNL && appendK != ConcatFnKind::none) { 1984 // we extend the dst string with the src 1985 // finalStrLength >= dstStrLength 1986 SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE, 1987 *finalStrLengthNL, 1988 *dstStrLengthNL, 1989 cmpTy); 1990 state = 1991 state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true); 1992 if (!state) 1993 return; 1994 } 1995 } 1996 } 1997 1998 } else { 1999 // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and 2000 // the final string length will match the input string length. 2001 finalStrLength = amountCopied; 2002 } 2003 2004 SVal Result; 2005 2006 if (returnPtr) { 2007 // The final result of the function will either be a pointer past the last 2008 // copied element, or a pointer to the start of the destination buffer. 2009 Result = (ReturnEnd ? UnknownVal() : DstVal); 2010 } else { 2011 if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none) 2012 //strlcpy, strlcat 2013 Result = strlRetVal; 2014 else 2015 Result = finalStrLength; 2016 } 2017 2018 assert(state); 2019 2020 // If the destination is a MemRegion, try to check for a buffer overflow and 2021 // record the new string length. 2022 if (std::optional<loc::MemRegionVal> dstRegVal = 2023 DstVal.getAs<loc::MemRegionVal>()) { 2024 QualType ptrTy = Dst.Expression->getType(); 2025 2026 // If we have an exact value on a bounded copy, use that to check for 2027 // overflows, rather than our estimate about how much is actually copied. 2028 if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) { 2029 SVal maxLastElement = 2030 svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy); 2031 2032 // Check if the first byte of the destination is writable. 2033 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write); 2034 if (!state) 2035 return; 2036 // Check if the last byte of the destination is writable. 2037 state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write); 2038 if (!state) 2039 return; 2040 } 2041 2042 // Then, if the final length is known... 2043 if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) { 2044 SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, 2045 *knownStrLength, ptrTy); 2046 2047 // ...and we haven't checked the bound, we'll check the actual copy. 2048 if (!boundWarning) { 2049 // Check if the first byte of the destination is writable. 2050 state = CheckLocation(C, state, Dst, DstVal, AccessKind::write); 2051 if (!state) 2052 return; 2053 // Check if the last byte of the destination is writable. 2054 state = CheckLocation(C, state, Dst, lastElement, AccessKind::write); 2055 if (!state) 2056 return; 2057 } 2058 2059 // If this is a stpcpy-style copy, the last element is the return value. 2060 if (returnPtr && ReturnEnd) 2061 Result = lastElement; 2062 } 2063 2064 // Invalidate the destination (regular invalidation without pointer-escaping 2065 // the address of the top-level region). This must happen before we set the 2066 // C string length because invalidation will clear the length. 2067 // FIXME: Even if we can't perfectly model the copy, we should see if we 2068 // can use LazyCompoundVals to copy the source values into the destination. 2069 // This would probably remove any existing bindings past the end of the 2070 // string, but that's still an improvement over blank invalidation. 2071 state = invalidateDestinationBufferBySize(C, state, Dst.Expression, 2072 *dstRegVal, amountCopied, 2073 C.getASTContext().getSizeType()); 2074 2075 // Invalidate the source (const-invalidation without const-pointer-escaping 2076 // the address of the top-level region). 2077 state = invalidateSourceBuffer(C, state, srcExpr.Expression, srcVal); 2078 2079 // Set the C string length of the destination, if we know it. 2080 if (IsBounded && (appendK == ConcatFnKind::none)) { 2081 // strncpy is annoying in that it doesn't guarantee to null-terminate 2082 // the result string. If the original string didn't fit entirely inside 2083 // the bound (including the null-terminator), we don't know how long the 2084 // result is. 2085 if (amountCopied != strLength) 2086 finalStrLength = UnknownVal(); 2087 } 2088 state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength); 2089 } 2090 2091 assert(state); 2092 2093 if (returnPtr) { 2094 // If this is a stpcpy-style copy, but we were unable to check for a buffer 2095 // overflow, we still need a result. Conjure a return value. 2096 if (ReturnEnd && Result.isUnknown()) { 2097 Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 2098 } 2099 } 2100 // Set the return value. 2101 state = state->BindExpr(CE, LCtx, Result); 2102 C.addTransition(state); 2103 } 2104 2105 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const { 2106 //int strcmp(const char *s1, const char *s2); 2107 evalStrcmpCommon(C, CE, /* IsBounded = */ false, /* IgnoreCase = */ false); 2108 } 2109 2110 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const { 2111 //int strncmp(const char *s1, const char *s2, size_t n); 2112 evalStrcmpCommon(C, CE, /* IsBounded = */ true, /* IgnoreCase = */ false); 2113 } 2114 2115 void CStringChecker::evalStrcasecmp(CheckerContext &C, 2116 const CallExpr *CE) const { 2117 //int strcasecmp(const char *s1, const char *s2); 2118 evalStrcmpCommon(C, CE, /* IsBounded = */ false, /* IgnoreCase = */ true); 2119 } 2120 2121 void CStringChecker::evalStrncasecmp(CheckerContext &C, 2122 const CallExpr *CE) const { 2123 //int strncasecmp(const char *s1, const char *s2, size_t n); 2124 evalStrcmpCommon(C, CE, /* IsBounded = */ true, /* IgnoreCase = */ true); 2125 } 2126 2127 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, 2128 bool IsBounded, bool IgnoreCase) const { 2129 CurrentFunctionDescription = "string comparison function"; 2130 ProgramStateRef state = C.getState(); 2131 const LocationContext *LCtx = C.getLocationContext(); 2132 2133 // Check that the first string is non-null 2134 AnyArgExpr Left = {CE->getArg(0), 0}; 2135 SVal LeftVal = state->getSVal(Left.Expression, LCtx); 2136 state = checkNonNull(C, state, Left, LeftVal); 2137 if (!state) 2138 return; 2139 2140 // Check that the second string is non-null. 2141 AnyArgExpr Right = {CE->getArg(1), 1}; 2142 SVal RightVal = state->getSVal(Right.Expression, LCtx); 2143 state = checkNonNull(C, state, Right, RightVal); 2144 if (!state) 2145 return; 2146 2147 // Get the string length of the first string or give up. 2148 SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal); 2149 if (LeftLength.isUndef()) 2150 return; 2151 2152 // Get the string length of the second string or give up. 2153 SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal); 2154 if (RightLength.isUndef()) 2155 return; 2156 2157 // If we know the two buffers are the same, we know the result is 0. 2158 // First, get the two buffers' addresses. Another checker will have already 2159 // made sure they're not undefined. 2160 DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>(); 2161 DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>(); 2162 2163 // See if they are the same. 2164 SValBuilder &svalBuilder = C.getSValBuilder(); 2165 DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV); 2166 ProgramStateRef StSameBuf, StNotSameBuf; 2167 std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf); 2168 2169 // If the two arguments might be the same buffer, we know the result is 0, 2170 // and we only need to check one size. 2171 if (StSameBuf) { 2172 StSameBuf = StSameBuf->BindExpr(CE, LCtx, 2173 svalBuilder.makeZeroVal(CE->getType())); 2174 C.addTransition(StSameBuf); 2175 2176 // If the two arguments are GUARANTEED to be the same, we're done! 2177 if (!StNotSameBuf) 2178 return; 2179 } 2180 2181 assert(StNotSameBuf); 2182 state = StNotSameBuf; 2183 2184 // At this point we can go about comparing the two buffers. 2185 // For now, we only do this if they're both known string literals. 2186 2187 // Attempt to extract string literals from both expressions. 2188 const StringLiteral *LeftStrLiteral = 2189 getCStringLiteral(C, state, Left.Expression, LeftVal); 2190 const StringLiteral *RightStrLiteral = 2191 getCStringLiteral(C, state, Right.Expression, RightVal); 2192 bool canComputeResult = false; 2193 SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, 2194 C.blockCount()); 2195 2196 if (LeftStrLiteral && RightStrLiteral) { 2197 StringRef LeftStrRef = LeftStrLiteral->getString(); 2198 StringRef RightStrRef = RightStrLiteral->getString(); 2199 2200 if (IsBounded) { 2201 // Get the max number of characters to compare. 2202 const Expr *lenExpr = CE->getArg(2); 2203 SVal lenVal = state->getSVal(lenExpr, LCtx); 2204 2205 // If the length is known, we can get the right substrings. 2206 if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) { 2207 // Create substrings of each to compare the prefix. 2208 LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue()); 2209 RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue()); 2210 canComputeResult = true; 2211 } 2212 } else { 2213 // This is a normal, unbounded strcmp. 2214 canComputeResult = true; 2215 } 2216 2217 if (canComputeResult) { 2218 // Real strcmp stops at null characters. 2219 size_t s1Term = LeftStrRef.find('\0'); 2220 if (s1Term != StringRef::npos) 2221 LeftStrRef = LeftStrRef.substr(0, s1Term); 2222 2223 size_t s2Term = RightStrRef.find('\0'); 2224 if (s2Term != StringRef::npos) 2225 RightStrRef = RightStrRef.substr(0, s2Term); 2226 2227 // Use StringRef's comparison methods to compute the actual result. 2228 int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef) 2229 : LeftStrRef.compare(RightStrRef); 2230 2231 // The strcmp function returns an integer greater than, equal to, or less 2232 // than zero, [c11, p7.24.4.2]. 2233 if (compareRes == 0) { 2234 resultVal = svalBuilder.makeIntVal(compareRes, CE->getType()); 2235 } 2236 else { 2237 DefinedSVal zeroVal = svalBuilder.makeIntVal(0, CE->getType()); 2238 // Constrain strcmp's result range based on the result of StringRef's 2239 // comparison methods. 2240 BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT; 2241 SVal compareWithZero = 2242 svalBuilder.evalBinOp(state, op, resultVal, zeroVal, 2243 svalBuilder.getConditionType()); 2244 DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>(); 2245 state = state->assume(compareWithZeroVal, true); 2246 } 2247 } 2248 } 2249 2250 state = state->BindExpr(CE, LCtx, resultVal); 2251 2252 // Record this as a possible path. 2253 C.addTransition(state); 2254 } 2255 2256 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const { 2257 // char *strsep(char **stringp, const char *delim); 2258 // Verify whether the search string parameter matches the return type. 2259 SourceArgExpr SearchStrPtr = {{CE->getArg(0), 0}}; 2260 2261 QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType(); 2262 if (CharPtrTy.isNull() || 2263 CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType()) 2264 return; 2265 2266 CurrentFunctionDescription = "strsep()"; 2267 ProgramStateRef State = C.getState(); 2268 const LocationContext *LCtx = C.getLocationContext(); 2269 2270 // Check that the search string pointer is non-null (though it may point to 2271 // a null string). 2272 SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx); 2273 State = checkNonNull(C, State, SearchStrPtr, SearchStrVal); 2274 if (!State) 2275 return; 2276 2277 // Check that the delimiter string is non-null. 2278 AnyArgExpr DelimStr = {CE->getArg(1), 1}; 2279 SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx); 2280 State = checkNonNull(C, State, DelimStr, DelimStrVal); 2281 if (!State) 2282 return; 2283 2284 SValBuilder &SVB = C.getSValBuilder(); 2285 SVal Result; 2286 if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) { 2287 // Get the current value of the search string pointer, as a char*. 2288 Result = State->getSVal(*SearchStrLoc, CharPtrTy); 2289 2290 // Invalidate the search string, representing the change of one delimiter 2291 // character to NUL. 2292 // As the replacement never overflows, do not invalidate its super region. 2293 State = invalidateDestinationBufferNeverOverflows( 2294 C, State, SearchStrPtr.Expression, Result); 2295 2296 // Overwrite the search string pointer. The new value is either an address 2297 // further along in the same string, or NULL if there are no more tokens. 2298 State = State->bindLoc(*SearchStrLoc, 2299 SVB.conjureSymbolVal(getTag(), 2300 CE, 2301 LCtx, 2302 CharPtrTy, 2303 C.blockCount()), 2304 LCtx); 2305 } else { 2306 assert(SearchStrVal.isUnknown()); 2307 // Conjure a symbolic value. It's the best we can do. 2308 Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 2309 } 2310 2311 // Set the return value, and finish. 2312 State = State->BindExpr(CE, LCtx, Result); 2313 C.addTransition(State); 2314 } 2315 2316 // These should probably be moved into a C++ standard library checker. 2317 void CStringChecker::evalStdCopy(CheckerContext &C, const CallExpr *CE) const { 2318 evalStdCopyCommon(C, CE); 2319 } 2320 2321 void CStringChecker::evalStdCopyBackward(CheckerContext &C, 2322 const CallExpr *CE) const { 2323 evalStdCopyCommon(C, CE); 2324 } 2325 2326 void CStringChecker::evalStdCopyCommon(CheckerContext &C, 2327 const CallExpr *CE) const { 2328 if (!CE->getArg(2)->getType()->isPointerType()) 2329 return; 2330 2331 ProgramStateRef State = C.getState(); 2332 2333 const LocationContext *LCtx = C.getLocationContext(); 2334 2335 // template <class _InputIterator, class _OutputIterator> 2336 // _OutputIterator 2337 // copy(_InputIterator __first, _InputIterator __last, 2338 // _OutputIterator __result) 2339 2340 // Invalidate the destination buffer 2341 const Expr *Dst = CE->getArg(2); 2342 SVal DstVal = State->getSVal(Dst, LCtx); 2343 // FIXME: As we do not know how many items are copied, we also invalidate the 2344 // super region containing the target location. 2345 State = 2346 invalidateDestinationBufferAlwaysEscapeSuperRegion(C, State, Dst, DstVal); 2347 2348 SValBuilder &SVB = C.getSValBuilder(); 2349 2350 SVal ResultVal = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount()); 2351 State = State->BindExpr(CE, LCtx, ResultVal); 2352 2353 C.addTransition(State); 2354 } 2355 2356 void CStringChecker::evalMemset(CheckerContext &C, const CallExpr *CE) const { 2357 // void *memset(void *s, int c, size_t n); 2358 CurrentFunctionDescription = "memory set function"; 2359 2360 DestinationArgExpr Buffer = {{CE->getArg(0), 0}}; 2361 AnyArgExpr CharE = {CE->getArg(1), 1}; 2362 SizeArgExpr Size = {{CE->getArg(2), 2}}; 2363 2364 ProgramStateRef State = C.getState(); 2365 2366 // See if the size argument is zero. 2367 const LocationContext *LCtx = C.getLocationContext(); 2368 SVal SizeVal = C.getSVal(Size.Expression); 2369 QualType SizeTy = Size.Expression->getType(); 2370 2371 ProgramStateRef ZeroSize, NonZeroSize; 2372 std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy); 2373 2374 // Get the value of the memory area. 2375 SVal BufferPtrVal = C.getSVal(Buffer.Expression); 2376 2377 // If the size is zero, there won't be any actual memory access, so 2378 // just bind the return value to the buffer and return. 2379 if (ZeroSize && !NonZeroSize) { 2380 ZeroSize = ZeroSize->BindExpr(CE, LCtx, BufferPtrVal); 2381 C.addTransition(ZeroSize); 2382 return; 2383 } 2384 2385 // Ensure the memory area is not null. 2386 // If it is NULL there will be a NULL pointer dereference. 2387 State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal); 2388 if (!State) 2389 return; 2390 2391 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write); 2392 if (!State) 2393 return; 2394 2395 // According to the values of the arguments, bind the value of the second 2396 // argument to the destination buffer and set string length, or just 2397 // invalidate the destination buffer. 2398 if (!memsetAux(Buffer.Expression, C.getSVal(CharE.Expression), 2399 Size.Expression, C, State)) 2400 return; 2401 2402 State = State->BindExpr(CE, LCtx, BufferPtrVal); 2403 C.addTransition(State); 2404 } 2405 2406 void CStringChecker::evalBzero(CheckerContext &C, const CallExpr *CE) const { 2407 CurrentFunctionDescription = "memory clearance function"; 2408 2409 DestinationArgExpr Buffer = {{CE->getArg(0), 0}}; 2410 SizeArgExpr Size = {{CE->getArg(1), 1}}; 2411 SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy); 2412 2413 ProgramStateRef State = C.getState(); 2414 2415 // See if the size argument is zero. 2416 SVal SizeVal = C.getSVal(Size.Expression); 2417 QualType SizeTy = Size.Expression->getType(); 2418 2419 ProgramStateRef StateZeroSize, StateNonZeroSize; 2420 std::tie(StateZeroSize, StateNonZeroSize) = 2421 assumeZero(C, State, SizeVal, SizeTy); 2422 2423 // If the size is zero, there won't be any actual memory access, 2424 // In this case we just return. 2425 if (StateZeroSize && !StateNonZeroSize) { 2426 C.addTransition(StateZeroSize); 2427 return; 2428 } 2429 2430 // Get the value of the memory area. 2431 SVal MemVal = C.getSVal(Buffer.Expression); 2432 2433 // Ensure the memory area is not null. 2434 // If it is NULL there will be a NULL pointer dereference. 2435 State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal); 2436 if (!State) 2437 return; 2438 2439 State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write); 2440 if (!State) 2441 return; 2442 2443 if (!memsetAux(Buffer.Expression, Zero, Size.Expression, C, State)) 2444 return; 2445 2446 C.addTransition(State); 2447 } 2448 2449 void CStringChecker::evalSprintf(CheckerContext &C, const CallExpr *CE) const { 2450 CurrentFunctionDescription = "'sprintf'"; 2451 bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___sprintf_chk; 2452 evalSprintfCommon(C, CE, /* IsBounded */ false, IsBI); 2453 } 2454 2455 void CStringChecker::evalSnprintf(CheckerContext &C, const CallExpr *CE) const { 2456 CurrentFunctionDescription = "'snprintf'"; 2457 bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___snprintf_chk; 2458 evalSprintfCommon(C, CE, /* IsBounded */ true, IsBI); 2459 } 2460 2461 void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallExpr *CE, 2462 bool IsBounded, bool IsBuiltin) const { 2463 ProgramStateRef State = C.getState(); 2464 DestinationArgExpr Dest = {{CE->getArg(0), 0}}; 2465 2466 const auto NumParams = CE->getCalleeDecl()->getAsFunction()->getNumParams(); 2467 assert(CE->getNumArgs() >= NumParams); 2468 2469 const auto AllArguments = 2470 llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs()); 2471 const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams); 2472 2473 for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) { 2474 // We consider only string buffers 2475 if (const QualType type = ArgExpr->getType(); 2476 !type->isAnyPointerType() || 2477 !type->getPointeeType()->isAnyCharacterType()) 2478 continue; 2479 SourceArgExpr Source = {{ArgExpr, unsigned(ArgIdx)}}; 2480 2481 // Ensure the buffers do not overlap. 2482 SizeArgExpr SrcExprAsSizeDummy = { 2483 {Source.Expression, Source.ArgumentIndex}}; 2484 State = CheckOverlap( 2485 C, State, 2486 (IsBounded ? SizeArgExpr{{CE->getArg(1), 1}} : SrcExprAsSizeDummy), 2487 Dest, Source); 2488 if (!State) 2489 return; 2490 } 2491 2492 C.addTransition(State); 2493 } 2494 2495 //===----------------------------------------------------------------------===// 2496 // The driver method, and other Checker callbacks. 2497 //===----------------------------------------------------------------------===// 2498 2499 CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call, 2500 CheckerContext &C) const { 2501 const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr()); 2502 if (!CE) 2503 return nullptr; 2504 2505 const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl()); 2506 if (!FD) 2507 return nullptr; 2508 2509 if (StdCopy.matches(Call)) 2510 return &CStringChecker::evalStdCopy; 2511 if (StdCopyBackward.matches(Call)) 2512 return &CStringChecker::evalStdCopyBackward; 2513 2514 // Pro-actively check that argument types are safe to do arithmetic upon. 2515 // We do not want to crash if someone accidentally passes a structure 2516 // into, say, a C++ overload of any of these functions. We could not check 2517 // that for std::copy because they may have arguments of other types. 2518 for (auto I : CE->arguments()) { 2519 QualType T = I->getType(); 2520 if (!T->isIntegralOrEnumerationType() && !T->isPointerType()) 2521 return nullptr; 2522 } 2523 2524 const FnCheck *Callback = Callbacks.lookup(Call); 2525 if (Callback) 2526 return *Callback; 2527 2528 return nullptr; 2529 } 2530 2531 bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const { 2532 FnCheck Callback = identifyCall(Call, C); 2533 2534 // If the callee isn't a string function, let another checker handle it. 2535 if (!Callback) 2536 return false; 2537 2538 // Check and evaluate the call. 2539 const auto *CE = cast<CallExpr>(Call.getOriginExpr()); 2540 Callback(this, C, CE); 2541 2542 // If the evaluate call resulted in no change, chain to the next eval call 2543 // handler. 2544 // Note, the custom CString evaluation calls assume that basic safety 2545 // properties are held. However, if the user chooses to turn off some of these 2546 // checks, we ignore the issues and leave the call evaluation to a generic 2547 // handler. 2548 return C.isDifferent(); 2549 } 2550 2551 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const { 2552 // Record string length for char a[] = "abc"; 2553 ProgramStateRef state = C.getState(); 2554 2555 for (const auto *I : DS->decls()) { 2556 const VarDecl *D = dyn_cast<VarDecl>(I); 2557 if (!D) 2558 continue; 2559 2560 // FIXME: Handle array fields of structs. 2561 if (!D->getType()->isArrayType()) 2562 continue; 2563 2564 const Expr *Init = D->getInit(); 2565 if (!Init) 2566 continue; 2567 if (!isa<StringLiteral>(Init)) 2568 continue; 2569 2570 Loc VarLoc = state->getLValue(D, C.getLocationContext()); 2571 const MemRegion *MR = VarLoc.getAsRegion(); 2572 if (!MR) 2573 continue; 2574 2575 SVal StrVal = C.getSVal(Init); 2576 assert(StrVal.isValid() && "Initializer string is unknown or undefined"); 2577 DefinedOrUnknownSVal strLength = 2578 getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>(); 2579 2580 state = state->set<CStringLength>(MR, strLength); 2581 } 2582 2583 C.addTransition(state); 2584 } 2585 2586 ProgramStateRef 2587 CStringChecker::checkRegionChanges(ProgramStateRef state, 2588 const InvalidatedSymbols *, 2589 ArrayRef<const MemRegion *> ExplicitRegions, 2590 ArrayRef<const MemRegion *> Regions, 2591 const LocationContext *LCtx, 2592 const CallEvent *Call) const { 2593 CStringLengthTy Entries = state->get<CStringLength>(); 2594 if (Entries.isEmpty()) 2595 return state; 2596 2597 llvm::SmallPtrSet<const MemRegion *, 8> Invalidated; 2598 llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions; 2599 2600 // First build sets for the changed regions and their super-regions. 2601 for (const MemRegion *MR : Regions) { 2602 Invalidated.insert(MR); 2603 2604 SuperRegions.insert(MR); 2605 while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) { 2606 MR = SR->getSuperRegion(); 2607 SuperRegions.insert(MR); 2608 } 2609 } 2610 2611 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 2612 2613 // Then loop over the entries in the current state. 2614 for (const MemRegion *MR : llvm::make_first_range(Entries)) { 2615 // Is this entry for a super-region of a changed region? 2616 if (SuperRegions.count(MR)) { 2617 Entries = F.remove(Entries, MR); 2618 continue; 2619 } 2620 2621 // Is this entry for a sub-region of a changed region? 2622 const MemRegion *Super = MR; 2623 while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) { 2624 Super = SR->getSuperRegion(); 2625 if (Invalidated.count(Super)) { 2626 Entries = F.remove(Entries, MR); 2627 break; 2628 } 2629 } 2630 } 2631 2632 return state->set<CStringLength>(Entries); 2633 } 2634 2635 void CStringChecker::checkLiveSymbols(ProgramStateRef state, 2636 SymbolReaper &SR) const { 2637 // Mark all symbols in our string length map as valid. 2638 CStringLengthTy Entries = state->get<CStringLength>(); 2639 2640 for (SVal Len : llvm::make_second_range(Entries)) { 2641 for (SymbolRef Sym : Len.symbols()) 2642 SR.markInUse(Sym); 2643 } 2644 } 2645 2646 void CStringChecker::checkDeadSymbols(SymbolReaper &SR, 2647 CheckerContext &C) const { 2648 ProgramStateRef state = C.getState(); 2649 CStringLengthTy Entries = state->get<CStringLength>(); 2650 if (Entries.isEmpty()) 2651 return; 2652 2653 CStringLengthTy::Factory &F = state->get_context<CStringLength>(); 2654 for (auto [Reg, Len] : Entries) { 2655 if (SymbolRef Sym = Len.getAsSymbol()) { 2656 if (SR.isDead(Sym)) 2657 Entries = F.remove(Entries, Reg); 2658 } 2659 } 2660 2661 state = state->set<CStringLength>(Entries); 2662 C.addTransition(state); 2663 } 2664 2665 void ento::registerCStringModeling(CheckerManager &Mgr) { 2666 Mgr.registerChecker<CStringChecker>(); 2667 } 2668 2669 bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) { 2670 return true; 2671 } 2672 2673 #define REGISTER_CHECKER(name) \ 2674 void ento::register##name(CheckerManager &mgr) { \ 2675 CStringChecker *checker = mgr.getChecker<CStringChecker>(); \ 2676 checker->Filter.Check##name = true; \ 2677 checker->Filter.CheckName##name = mgr.getCurrentCheckerName(); \ 2678 } \ 2679 \ 2680 bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; } 2681 2682 REGISTER_CHECKER(CStringNullArg) 2683 REGISTER_CHECKER(CStringOutOfBounds) 2684 REGISTER_CHECKER(CStringBufferOverlap) 2685 REGISTER_CHECKER(CStringNotNullTerm) 2686 REGISTER_CHECKER(CStringUninitializedRead) 2687