1 //= CStringChecker.cpp - Checks calls to C string functions --------*- C++ -*-//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This defines CStringChecker, which is an assortment of checks on calls
10 // to functions in <string.h>.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "InterCheckerAPI.h"
15 #include "clang/Basic/Builtins.h"
16 #include "clang/Basic/CharInfo.h"
17 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
18 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
19 #include "clang/StaticAnalyzer/Core/Checker.h"
20 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
21 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
22 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/DynamicExtent.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
26 #include "llvm/ADT/STLExtras.h"
27 #include "llvm/ADT/SmallString.h"
28 #include "llvm/ADT/StringExtras.h"
29 #include "llvm/Support/raw_ostream.h"
30 #include <functional>
31 #include <optional>
32 
33 using namespace clang;
34 using namespace ento;
35 using namespace std::placeholders;
36 
37 namespace {
38 struct AnyArgExpr {
39   const Expr *Expression;
40   unsigned ArgumentIndex;
41 };
42 struct SourceArgExpr : AnyArgExpr {};
43 struct DestinationArgExpr : AnyArgExpr {};
44 struct SizeArgExpr : AnyArgExpr {};
45 
46 using ErrorMessage = SmallString<128>;
47 enum class AccessKind { write, read };
48 
49 static ErrorMessage createOutOfBoundErrorMsg(StringRef FunctionDescription,
50                                              AccessKind Access) {
51   ErrorMessage Message;
52   llvm::raw_svector_ostream Os(Message);
53 
54   // Function classification like: Memory copy function
55   Os << toUppercase(FunctionDescription.front())
56      << &FunctionDescription.data()[1];
57 
58   if (Access == AccessKind::write) {
59     Os << " overflows the destination buffer";
60   } else { // read access
61     Os << " accesses out-of-bound array element";
62   }
63 
64   return Message;
65 }
66 
67 enum class ConcatFnKind { none = 0, strcat = 1, strlcat = 2 };
68 
69 enum class CharKind { Regular = 0, Wide };
70 constexpr CharKind CK_Regular = CharKind::Regular;
71 constexpr CharKind CK_Wide = CharKind::Wide;
72 
73 static QualType getCharPtrType(ASTContext &Ctx, CharKind CK) {
74   return Ctx.getPointerType(CK == CharKind::Regular ? Ctx.CharTy
75                                                     : Ctx.WideCharTy);
76 }
77 
78 class CStringChecker : public Checker< eval::Call,
79                                          check::PreStmt<DeclStmt>,
80                                          check::LiveSymbols,
81                                          check::DeadSymbols,
82                                          check::RegionChanges
83                                          > {
84   mutable std::unique_ptr<BugType> BT_Null, BT_Bounds, BT_Overlap,
85       BT_NotCString, BT_AdditionOverflow, BT_UninitRead;
86 
87   mutable const char *CurrentFunctionDescription = nullptr;
88 
89 public:
90   /// The filter is used to filter out the diagnostics which are not enabled by
91   /// the user.
92   struct CStringChecksFilter {
93     bool CheckCStringNullArg = false;
94     bool CheckCStringOutOfBounds = false;
95     bool CheckCStringBufferOverlap = false;
96     bool CheckCStringNotNullTerm = false;
97     bool CheckCStringUninitializedRead = false;
98 
99     CheckerNameRef CheckNameCStringNullArg;
100     CheckerNameRef CheckNameCStringOutOfBounds;
101     CheckerNameRef CheckNameCStringBufferOverlap;
102     CheckerNameRef CheckNameCStringNotNullTerm;
103     CheckerNameRef CheckNameCStringUninitializedRead;
104   };
105 
106   CStringChecksFilter Filter;
107 
108   static void *getTag() { static int tag; return &tag; }
109 
110   bool evalCall(const CallEvent &Call, CheckerContext &C) const;
111   void checkPreStmt(const DeclStmt *DS, CheckerContext &C) const;
112   void checkLiveSymbols(ProgramStateRef state, SymbolReaper &SR) const;
113   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
114 
115   ProgramStateRef
116     checkRegionChanges(ProgramStateRef state,
117                        const InvalidatedSymbols *,
118                        ArrayRef<const MemRegion *> ExplicitRegions,
119                        ArrayRef<const MemRegion *> Regions,
120                        const LocationContext *LCtx,
121                        const CallEvent *Call) const;
122 
123   using FnCheck = std::function<void(const CStringChecker *, CheckerContext &,
124                                      const CallExpr *)>;
125 
126   CallDescriptionMap<FnCheck> Callbacks = {
127       {{CDF_MaybeBuiltin, {"memcpy"}, 3},
128        std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Regular)},
129       {{CDF_MaybeBuiltin, {"wmemcpy"}, 3},
130        std::bind(&CStringChecker::evalMemcpy, _1, _2, _3, CK_Wide)},
131       {{CDF_MaybeBuiltin, {"mempcpy"}, 3},
132        std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Regular)},
133       {{CDF_None, {"wmempcpy"}, 3},
134        std::bind(&CStringChecker::evalMempcpy, _1, _2, _3, CK_Wide)},
135       {{CDF_MaybeBuiltin, {"memcmp"}, 3},
136        std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
137       {{CDF_MaybeBuiltin, {"wmemcmp"}, 3},
138        std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Wide)},
139       {{CDF_MaybeBuiltin, {"memmove"}, 3},
140        std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Regular)},
141       {{CDF_MaybeBuiltin, {"wmemmove"}, 3},
142        std::bind(&CStringChecker::evalMemmove, _1, _2, _3, CK_Wide)},
143       {{CDF_MaybeBuiltin, {"memset"}, 3}, &CStringChecker::evalMemset},
144       {{CDF_MaybeBuiltin, {"explicit_memset"}, 3}, &CStringChecker::evalMemset},
145       {{CDF_MaybeBuiltin, {"strcpy"}, 2}, &CStringChecker::evalStrcpy},
146       {{CDF_MaybeBuiltin, {"strncpy"}, 3}, &CStringChecker::evalStrncpy},
147       {{CDF_MaybeBuiltin, {"stpcpy"}, 2}, &CStringChecker::evalStpcpy},
148       {{CDF_MaybeBuiltin, {"strlcpy"}, 3}, &CStringChecker::evalStrlcpy},
149       {{CDF_MaybeBuiltin, {"strcat"}, 2}, &CStringChecker::evalStrcat},
150       {{CDF_MaybeBuiltin, {"strncat"}, 3}, &CStringChecker::evalStrncat},
151       {{CDF_MaybeBuiltin, {"strlcat"}, 3}, &CStringChecker::evalStrlcat},
152       {{CDF_MaybeBuiltin, {"strlen"}, 1}, &CStringChecker::evalstrLength},
153       {{CDF_MaybeBuiltin, {"wcslen"}, 1}, &CStringChecker::evalstrLength},
154       {{CDF_MaybeBuiltin, {"strnlen"}, 2}, &CStringChecker::evalstrnLength},
155       {{CDF_MaybeBuiltin, {"wcsnlen"}, 2}, &CStringChecker::evalstrnLength},
156       {{CDF_MaybeBuiltin, {"strcmp"}, 2}, &CStringChecker::evalStrcmp},
157       {{CDF_MaybeBuiltin, {"strncmp"}, 3}, &CStringChecker::evalStrncmp},
158       {{CDF_MaybeBuiltin, {"strcasecmp"}, 2}, &CStringChecker::evalStrcasecmp},
159       {{CDF_MaybeBuiltin, {"strncasecmp"}, 3},
160        &CStringChecker::evalStrncasecmp},
161       {{CDF_MaybeBuiltin, {"strsep"}, 2}, &CStringChecker::evalStrsep},
162       {{CDF_MaybeBuiltin, {"bcopy"}, 3}, &CStringChecker::evalBcopy},
163       {{CDF_MaybeBuiltin, {"bcmp"}, 3},
164        std::bind(&CStringChecker::evalMemcmp, _1, _2, _3, CK_Regular)},
165       {{CDF_MaybeBuiltin, {"bzero"}, 2}, &CStringChecker::evalBzero},
166       {{CDF_MaybeBuiltin, {"explicit_bzero"}, 2}, &CStringChecker::evalBzero},
167       {{CDF_MaybeBuiltin, {"sprintf"}, 2}, &CStringChecker::evalSprintf},
168       {{CDF_MaybeBuiltin, {"snprintf"}, 2}, &CStringChecker::evalSnprintf},
169   };
170 
171   // These require a bit of special handling.
172   CallDescription StdCopy{{"std", "copy"}, 3},
173       StdCopyBackward{{"std", "copy_backward"}, 3};
174 
175   FnCheck identifyCall(const CallEvent &Call, CheckerContext &C) const;
176   void evalMemcpy(CheckerContext &C, const CallExpr *CE, CharKind CK) const;
177   void evalMempcpy(CheckerContext &C, const CallExpr *CE, CharKind CK) const;
178   void evalMemmove(CheckerContext &C, const CallExpr *CE, CharKind CK) const;
179   void evalBcopy(CheckerContext &C, const CallExpr *CE) const;
180   void evalCopyCommon(CheckerContext &C, const CallExpr *CE,
181                       ProgramStateRef state, SizeArgExpr Size,
182                       DestinationArgExpr Dest, SourceArgExpr Source,
183                       bool Restricted, bool IsMempcpy, CharKind CK) const;
184 
185   void evalMemcmp(CheckerContext &C, const CallExpr *CE, CharKind CK) const;
186 
187   void evalstrLength(CheckerContext &C, const CallExpr *CE) const;
188   void evalstrnLength(CheckerContext &C, const CallExpr *CE) const;
189   void evalstrLengthCommon(CheckerContext &C,
190                            const CallExpr *CE,
191                            bool IsStrnlen = false) const;
192 
193   void evalStrcpy(CheckerContext &C, const CallExpr *CE) const;
194   void evalStrncpy(CheckerContext &C, const CallExpr *CE) const;
195   void evalStpcpy(CheckerContext &C, const CallExpr *CE) const;
196   void evalStrlcpy(CheckerContext &C, const CallExpr *CE) const;
197   void evalStrcpyCommon(CheckerContext &C, const CallExpr *CE, bool ReturnEnd,
198                         bool IsBounded, ConcatFnKind appendK,
199                         bool returnPtr = true) const;
200 
201   void evalStrcat(CheckerContext &C, const CallExpr *CE) const;
202   void evalStrncat(CheckerContext &C, const CallExpr *CE) const;
203   void evalStrlcat(CheckerContext &C, const CallExpr *CE) const;
204 
205   void evalStrcmp(CheckerContext &C, const CallExpr *CE) const;
206   void evalStrncmp(CheckerContext &C, const CallExpr *CE) const;
207   void evalStrcasecmp(CheckerContext &C, const CallExpr *CE) const;
208   void evalStrncasecmp(CheckerContext &C, const CallExpr *CE) const;
209   void evalStrcmpCommon(CheckerContext &C,
210                         const CallExpr *CE,
211                         bool IsBounded = false,
212                         bool IgnoreCase = false) const;
213 
214   void evalStrsep(CheckerContext &C, const CallExpr *CE) const;
215 
216   void evalStdCopy(CheckerContext &C, const CallExpr *CE) const;
217   void evalStdCopyBackward(CheckerContext &C, const CallExpr *CE) const;
218   void evalStdCopyCommon(CheckerContext &C, const CallExpr *CE) const;
219   void evalMemset(CheckerContext &C, const CallExpr *CE) const;
220   void evalBzero(CheckerContext &C, const CallExpr *CE) const;
221 
222   void evalSprintf(CheckerContext &C, const CallExpr *CE) const;
223   void evalSnprintf(CheckerContext &C, const CallExpr *CE) const;
224   void evalSprintfCommon(CheckerContext &C, const CallExpr *CE, bool IsBounded,
225                          bool IsBuiltin) const;
226 
227   // Utility methods
228   std::pair<ProgramStateRef , ProgramStateRef >
229   static assumeZero(CheckerContext &C,
230                     ProgramStateRef state, SVal V, QualType Ty);
231 
232   static ProgramStateRef setCStringLength(ProgramStateRef state,
233                                               const MemRegion *MR,
234                                               SVal strLength);
235   static SVal getCStringLengthForRegion(CheckerContext &C,
236                                         ProgramStateRef &state,
237                                         const Expr *Ex,
238                                         const MemRegion *MR,
239                                         bool hypothetical);
240   SVal getCStringLength(CheckerContext &C,
241                         ProgramStateRef &state,
242                         const Expr *Ex,
243                         SVal Buf,
244                         bool hypothetical = false) const;
245 
246   const StringLiteral *getCStringLiteral(CheckerContext &C,
247                                          ProgramStateRef &state,
248                                          const Expr *expr,
249                                          SVal val) const;
250 
251   /// Invalidate the destination buffer determined by characters copied.
252   static ProgramStateRef
253   invalidateDestinationBufferBySize(CheckerContext &C, ProgramStateRef S,
254                                     const Expr *BufE, SVal BufV, SVal SizeV,
255                                     QualType SizeTy);
256 
257   /// Operation never overflows, do not invalidate the super region.
258   static ProgramStateRef invalidateDestinationBufferNeverOverflows(
259       CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
260 
261   /// We do not know whether the operation can overflow (e.g. size is unknown),
262   /// invalidate the super region and escape related pointers.
263   static ProgramStateRef invalidateDestinationBufferAlwaysEscapeSuperRegion(
264       CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV);
265 
266   /// Invalidate the source buffer for escaping pointers.
267   static ProgramStateRef invalidateSourceBuffer(CheckerContext &C,
268                                                 ProgramStateRef S,
269                                                 const Expr *BufE, SVal BufV);
270 
271   /// @param InvalidationTraitOperations Determine how to invlidate the
272   /// MemRegion by setting the invalidation traits. Return true to cause pointer
273   /// escape, or false otherwise.
274   static ProgramStateRef invalidateBufferAux(
275       CheckerContext &C, ProgramStateRef State, const Expr *Ex, SVal V,
276       llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
277                               const MemRegion *)>
278           InvalidationTraitOperations);
279 
280   static bool SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
281                               const MemRegion *MR);
282 
283   static bool memsetAux(const Expr *DstBuffer, SVal CharE,
284                         const Expr *Size, CheckerContext &C,
285                         ProgramStateRef &State);
286 
287   // Re-usable checks
288   ProgramStateRef checkNonNull(CheckerContext &C, ProgramStateRef State,
289                                AnyArgExpr Arg, SVal l) const;
290   ProgramStateRef CheckLocation(CheckerContext &C, ProgramStateRef state,
291                                 AnyArgExpr Buffer, SVal Element,
292                                 AccessKind Access,
293                                 CharKind CK = CharKind::Regular) const;
294   ProgramStateRef CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
295                                     AnyArgExpr Buffer, SizeArgExpr Size,
296                                     AccessKind Access,
297                                     CharKind CK = CharKind::Regular) const;
298   ProgramStateRef CheckOverlap(CheckerContext &C, ProgramStateRef state,
299                                SizeArgExpr Size, AnyArgExpr First,
300                                AnyArgExpr Second,
301                                CharKind CK = CharKind::Regular) const;
302   void emitOverlapBug(CheckerContext &C,
303                       ProgramStateRef state,
304                       const Stmt *First,
305                       const Stmt *Second) const;
306 
307   void emitNullArgBug(CheckerContext &C, ProgramStateRef State, const Stmt *S,
308                       StringRef WarningMsg) const;
309   void emitOutOfBoundsBug(CheckerContext &C, ProgramStateRef State,
310                           const Stmt *S, StringRef WarningMsg) const;
311   void emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
312                          const Stmt *S, StringRef WarningMsg) const;
313   void emitAdditionOverflowBug(CheckerContext &C, ProgramStateRef State) const;
314   void emitUninitializedReadBug(CheckerContext &C, ProgramStateRef State,
315                              const Expr *E) const;
316   ProgramStateRef checkAdditionOverflow(CheckerContext &C,
317                                             ProgramStateRef state,
318                                             NonLoc left,
319                                             NonLoc right) const;
320 
321   // Return true if the destination buffer of the copy function may be in bound.
322   // Expects SVal of Size to be positive and unsigned.
323   // Expects SVal of FirstBuf to be a FieldRegion.
324   static bool isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
325                                 SVal BufVal, QualType BufTy, SVal LengthVal,
326                                 QualType LengthTy);
327 };
328 
329 } //end anonymous namespace
330 
331 REGISTER_MAP_WITH_PROGRAMSTATE(CStringLength, const MemRegion *, SVal)
332 
333 //===----------------------------------------------------------------------===//
334 // Individual checks and utility methods.
335 //===----------------------------------------------------------------------===//
336 
337 std::pair<ProgramStateRef , ProgramStateRef >
338 CStringChecker::assumeZero(CheckerContext &C, ProgramStateRef state, SVal V,
339                            QualType Ty) {
340   std::optional<DefinedSVal> val = V.getAs<DefinedSVal>();
341   if (!val)
342     return std::pair<ProgramStateRef , ProgramStateRef >(state, state);
343 
344   SValBuilder &svalBuilder = C.getSValBuilder();
345   DefinedOrUnknownSVal zero = svalBuilder.makeZeroVal(Ty);
346   return state->assume(svalBuilder.evalEQ(state, *val, zero));
347 }
348 
349 ProgramStateRef CStringChecker::checkNonNull(CheckerContext &C,
350                                              ProgramStateRef State,
351                                              AnyArgExpr Arg, SVal l) const {
352   // If a previous check has failed, propagate the failure.
353   if (!State)
354     return nullptr;
355 
356   ProgramStateRef stateNull, stateNonNull;
357   std::tie(stateNull, stateNonNull) =
358       assumeZero(C, State, l, Arg.Expression->getType());
359 
360   if (stateNull && !stateNonNull) {
361     if (Filter.CheckCStringNullArg) {
362       SmallString<80> buf;
363       llvm::raw_svector_ostream OS(buf);
364       assert(CurrentFunctionDescription);
365       OS << "Null pointer passed as " << (Arg.ArgumentIndex + 1)
366          << llvm::getOrdinalSuffix(Arg.ArgumentIndex + 1) << " argument to "
367          << CurrentFunctionDescription;
368 
369       emitNullArgBug(C, stateNull, Arg.Expression, OS.str());
370     }
371     return nullptr;
372   }
373 
374   // From here on, assume that the value is non-null.
375   assert(stateNonNull);
376   return stateNonNull;
377 }
378 
379 // FIXME: This was originally copied from ArrayBoundChecker.cpp. Refactor?
380 ProgramStateRef CStringChecker::CheckLocation(CheckerContext &C,
381                                               ProgramStateRef state,
382                                               AnyArgExpr Buffer, SVal Element,
383                                               AccessKind Access,
384                                               CharKind CK) const {
385 
386   // If a previous check has failed, propagate the failure.
387   if (!state)
388     return nullptr;
389 
390   // Check for out of bound array element access.
391   const MemRegion *R = Element.getAsRegion();
392   if (!R)
393     return state;
394 
395   const auto *ER = dyn_cast<ElementRegion>(R);
396   if (!ER)
397     return state;
398 
399   SValBuilder &svalBuilder = C.getSValBuilder();
400   ASTContext &Ctx = svalBuilder.getContext();
401 
402   // Get the index of the accessed element.
403   NonLoc Idx = ER->getIndex();
404 
405   if (CK == CharKind::Regular) {
406     if (ER->getValueType() != Ctx.CharTy)
407       return state;
408   } else {
409     if (ER->getValueType() != Ctx.WideCharTy)
410       return state;
411 
412     QualType SizeTy = Ctx.getSizeType();
413     NonLoc WideSize =
414         svalBuilder
415             .makeIntVal(Ctx.getTypeSizeInChars(Ctx.WideCharTy).getQuantity(),
416                         SizeTy)
417             .castAs<NonLoc>();
418     SVal Offset = svalBuilder.evalBinOpNN(state, BO_Mul, Idx, WideSize, SizeTy);
419     if (Offset.isUnknown())
420       return state;
421     Idx = Offset.castAs<NonLoc>();
422   }
423 
424   // Get the size of the array.
425   const auto *superReg = cast<SubRegion>(ER->getSuperRegion());
426   DefinedOrUnknownSVal Size =
427       getDynamicExtent(state, superReg, C.getSValBuilder());
428 
429   ProgramStateRef StInBound, StOutBound;
430   std::tie(StInBound, StOutBound) = state->assumeInBoundDual(Idx, Size);
431   if (StOutBound && !StInBound) {
432     // These checks are either enabled by the CString out-of-bounds checker
433     // explicitly or implicitly by the Malloc checker.
434     // In the latter case we only do modeling but do not emit warning.
435     if (!Filter.CheckCStringOutOfBounds)
436       return nullptr;
437 
438     // Emit a bug report.
439     ErrorMessage Message =
440         createOutOfBoundErrorMsg(CurrentFunctionDescription, Access);
441     emitOutOfBoundsBug(C, StOutBound, Buffer.Expression, Message);
442     return nullptr;
443   }
444 
445   // Ensure that we wouldn't read uninitialized value.
446   if (Access == AccessKind::read) {
447     if (Filter.CheckCStringUninitializedRead &&
448         StInBound->getSVal(ER).isUndef()) {
449       emitUninitializedReadBug(C, StInBound, Buffer.Expression);
450       return nullptr;
451     }
452   }
453 
454   // Array bound check succeeded.  From this point forward the array bound
455   // should always succeed.
456   return StInBound;
457 }
458 
459 ProgramStateRef
460 CStringChecker::CheckBufferAccess(CheckerContext &C, ProgramStateRef State,
461                                   AnyArgExpr Buffer, SizeArgExpr Size,
462                                   AccessKind Access, CharKind CK) const {
463   // If a previous check has failed, propagate the failure.
464   if (!State)
465     return nullptr;
466 
467   SValBuilder &svalBuilder = C.getSValBuilder();
468   ASTContext &Ctx = svalBuilder.getContext();
469 
470   QualType SizeTy = Size.Expression->getType();
471   QualType PtrTy = getCharPtrType(Ctx, CK);
472 
473   // Check that the first buffer is non-null.
474   SVal BufVal = C.getSVal(Buffer.Expression);
475   State = checkNonNull(C, State, Buffer, BufVal);
476   if (!State)
477     return nullptr;
478 
479   // If out-of-bounds checking is turned off, skip the rest.
480   if (!Filter.CheckCStringOutOfBounds)
481     return State;
482 
483   // Get the access length and make sure it is known.
484   // FIXME: This assumes the caller has already checked that the access length
485   // is positive. And that it's unsigned.
486   SVal LengthVal = C.getSVal(Size.Expression);
487   std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
488   if (!Length)
489     return State;
490 
491   // Compute the offset of the last element to be accessed: size-1.
492   NonLoc One = svalBuilder.makeIntVal(1, SizeTy).castAs<NonLoc>();
493   SVal Offset = svalBuilder.evalBinOpNN(State, BO_Sub, *Length, One, SizeTy);
494   if (Offset.isUnknown())
495     return nullptr;
496   NonLoc LastOffset = Offset.castAs<NonLoc>();
497 
498   // Check that the first buffer is sufficiently long.
499   SVal BufStart =
500       svalBuilder.evalCast(BufVal, PtrTy, Buffer.Expression->getType());
501   if (std::optional<Loc> BufLoc = BufStart.getAs<Loc>()) {
502 
503     SVal BufEnd =
504         svalBuilder.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
505     State = CheckLocation(C, State, Buffer, BufEnd, Access, CK);
506 
507     // If the buffer isn't large enough, abort.
508     if (!State)
509       return nullptr;
510   }
511 
512   // Large enough or not, return this state!
513   return State;
514 }
515 
516 ProgramStateRef CStringChecker::CheckOverlap(CheckerContext &C,
517                                              ProgramStateRef state,
518                                              SizeArgExpr Size, AnyArgExpr First,
519                                              AnyArgExpr Second,
520                                              CharKind CK) const {
521   if (!Filter.CheckCStringBufferOverlap)
522     return state;
523 
524   // Do a simple check for overlap: if the two arguments are from the same
525   // buffer, see if the end of the first is greater than the start of the second
526   // or vice versa.
527 
528   // If a previous check has failed, propagate the failure.
529   if (!state)
530     return nullptr;
531 
532   ProgramStateRef stateTrue, stateFalse;
533 
534   // Assume different address spaces cannot overlap.
535   if (First.Expression->getType()->getPointeeType().getAddressSpace() !=
536       Second.Expression->getType()->getPointeeType().getAddressSpace())
537     return state;
538 
539   // Get the buffer values and make sure they're known locations.
540   const LocationContext *LCtx = C.getLocationContext();
541   SVal firstVal = state->getSVal(First.Expression, LCtx);
542   SVal secondVal = state->getSVal(Second.Expression, LCtx);
543 
544   std::optional<Loc> firstLoc = firstVal.getAs<Loc>();
545   if (!firstLoc)
546     return state;
547 
548   std::optional<Loc> secondLoc = secondVal.getAs<Loc>();
549   if (!secondLoc)
550     return state;
551 
552   // Are the two values the same?
553   SValBuilder &svalBuilder = C.getSValBuilder();
554   std::tie(stateTrue, stateFalse) =
555       state->assume(svalBuilder.evalEQ(state, *firstLoc, *secondLoc));
556 
557   if (stateTrue && !stateFalse) {
558     // If the values are known to be equal, that's automatically an overlap.
559     emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
560     return nullptr;
561   }
562 
563   // assume the two expressions are not equal.
564   assert(stateFalse);
565   state = stateFalse;
566 
567   // Which value comes first?
568   QualType cmpTy = svalBuilder.getConditionType();
569   SVal reverse =
570       svalBuilder.evalBinOpLL(state, BO_GT, *firstLoc, *secondLoc, cmpTy);
571   std::optional<DefinedOrUnknownSVal> reverseTest =
572       reverse.getAs<DefinedOrUnknownSVal>();
573   if (!reverseTest)
574     return state;
575 
576   std::tie(stateTrue, stateFalse) = state->assume(*reverseTest);
577   if (stateTrue) {
578     if (stateFalse) {
579       // If we don't know which one comes first, we can't perform this test.
580       return state;
581     } else {
582       // Switch the values so that firstVal is before secondVal.
583       std::swap(firstLoc, secondLoc);
584 
585       // Switch the Exprs as well, so that they still correspond.
586       std::swap(First, Second);
587     }
588   }
589 
590   // Get the length, and make sure it too is known.
591   SVal LengthVal = state->getSVal(Size.Expression, LCtx);
592   std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
593   if (!Length)
594     return state;
595 
596   // Convert the first buffer's start address to char*.
597   // Bail out if the cast fails.
598   ASTContext &Ctx = svalBuilder.getContext();
599   QualType CharPtrTy = getCharPtrType(Ctx, CK);
600   SVal FirstStart =
601       svalBuilder.evalCast(*firstLoc, CharPtrTy, First.Expression->getType());
602   std::optional<Loc> FirstStartLoc = FirstStart.getAs<Loc>();
603   if (!FirstStartLoc)
604     return state;
605 
606   // Compute the end of the first buffer. Bail out if THAT fails.
607   SVal FirstEnd = svalBuilder.evalBinOpLN(state, BO_Add, *FirstStartLoc,
608                                           *Length, CharPtrTy);
609   std::optional<Loc> FirstEndLoc = FirstEnd.getAs<Loc>();
610   if (!FirstEndLoc)
611     return state;
612 
613   // Is the end of the first buffer past the start of the second buffer?
614   SVal Overlap =
615       svalBuilder.evalBinOpLL(state, BO_GT, *FirstEndLoc, *secondLoc, cmpTy);
616   std::optional<DefinedOrUnknownSVal> OverlapTest =
617       Overlap.getAs<DefinedOrUnknownSVal>();
618   if (!OverlapTest)
619     return state;
620 
621   std::tie(stateTrue, stateFalse) = state->assume(*OverlapTest);
622 
623   if (stateTrue && !stateFalse) {
624     // Overlap!
625     emitOverlapBug(C, stateTrue, First.Expression, Second.Expression);
626     return nullptr;
627   }
628 
629   // assume the two expressions don't overlap.
630   assert(stateFalse);
631   return stateFalse;
632 }
633 
634 void CStringChecker::emitOverlapBug(CheckerContext &C, ProgramStateRef state,
635                                   const Stmt *First, const Stmt *Second) const {
636   ExplodedNode *N = C.generateErrorNode(state);
637   if (!N)
638     return;
639 
640   if (!BT_Overlap)
641     BT_Overlap.reset(new BugType(Filter.CheckNameCStringBufferOverlap,
642                                  categories::UnixAPI, "Improper arguments"));
643 
644   // Generate a report for this bug.
645   auto report = std::make_unique<PathSensitiveBugReport>(
646       *BT_Overlap, "Arguments must not be overlapping buffers", N);
647   report->addRange(First->getSourceRange());
648   report->addRange(Second->getSourceRange());
649 
650   C.emitReport(std::move(report));
651 }
652 
653 void CStringChecker::emitNullArgBug(CheckerContext &C, ProgramStateRef State,
654                                     const Stmt *S, StringRef WarningMsg) const {
655   if (ExplodedNode *N = C.generateErrorNode(State)) {
656     if (!BT_Null)
657       BT_Null.reset(new BuiltinBug(
658           Filter.CheckNameCStringNullArg, categories::UnixAPI,
659           "Null pointer argument in call to byte string function"));
660 
661     BuiltinBug *BT = static_cast<BuiltinBug *>(BT_Null.get());
662     auto Report = std::make_unique<PathSensitiveBugReport>(*BT, WarningMsg, N);
663     Report->addRange(S->getSourceRange());
664     if (const auto *Ex = dyn_cast<Expr>(S))
665       bugreporter::trackExpressionValue(N, Ex, *Report);
666     C.emitReport(std::move(Report));
667   }
668 }
669 
670 void CStringChecker::emitUninitializedReadBug(CheckerContext &C,
671                                               ProgramStateRef State,
672                                               const Expr *E) const {
673   if (ExplodedNode *N = C.generateErrorNode(State)) {
674     const char *Msg =
675         "Bytes string function accesses uninitialized/garbage values";
676     if (!BT_UninitRead)
677       BT_UninitRead.reset(
678           new BuiltinBug(Filter.CheckNameCStringUninitializedRead,
679                          "Accessing unitialized/garbage values", Msg));
680 
681     BuiltinBug *BT = static_cast<BuiltinBug *>(BT_UninitRead.get());
682 
683     auto Report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
684     Report->addRange(E->getSourceRange());
685     bugreporter::trackExpressionValue(N, E, *Report);
686     C.emitReport(std::move(Report));
687   }
688 }
689 
690 void CStringChecker::emitOutOfBoundsBug(CheckerContext &C,
691                                         ProgramStateRef State, const Stmt *S,
692                                         StringRef WarningMsg) const {
693   if (ExplodedNode *N = C.generateErrorNode(State)) {
694     if (!BT_Bounds)
695       BT_Bounds.reset(new BuiltinBug(
696           Filter.CheckCStringOutOfBounds ? Filter.CheckNameCStringOutOfBounds
697                                          : Filter.CheckNameCStringNullArg,
698           "Out-of-bound array access",
699           "Byte string function accesses out-of-bound array element"));
700 
701     BuiltinBug *BT = static_cast<BuiltinBug *>(BT_Bounds.get());
702 
703     // FIXME: It would be nice to eventually make this diagnostic more clear,
704     // e.g., by referencing the original declaration or by saying *why* this
705     // reference is outside the range.
706     auto Report = std::make_unique<PathSensitiveBugReport>(*BT, WarningMsg, N);
707     Report->addRange(S->getSourceRange());
708     C.emitReport(std::move(Report));
709   }
710 }
711 
712 void CStringChecker::emitNotCStringBug(CheckerContext &C, ProgramStateRef State,
713                                        const Stmt *S,
714                                        StringRef WarningMsg) const {
715   if (ExplodedNode *N = C.generateNonFatalErrorNode(State)) {
716     if (!BT_NotCString)
717       BT_NotCString.reset(new BuiltinBug(
718           Filter.CheckNameCStringNotNullTerm, categories::UnixAPI,
719           "Argument is not a null-terminated string."));
720 
721     auto Report =
722         std::make_unique<PathSensitiveBugReport>(*BT_NotCString, WarningMsg, N);
723 
724     Report->addRange(S->getSourceRange());
725     C.emitReport(std::move(Report));
726   }
727 }
728 
729 void CStringChecker::emitAdditionOverflowBug(CheckerContext &C,
730                                              ProgramStateRef State) const {
731   if (ExplodedNode *N = C.generateErrorNode(State)) {
732     if (!BT_AdditionOverflow)
733       BT_AdditionOverflow.reset(
734           new BuiltinBug(Filter.CheckNameCStringOutOfBounds, "API",
735                          "Sum of expressions causes overflow."));
736 
737     // This isn't a great error message, but this should never occur in real
738     // code anyway -- you'd have to create a buffer longer than a size_t can
739     // represent, which is sort of a contradiction.
740     const char *WarningMsg =
741         "This expression will create a string whose length is too big to "
742         "be represented as a size_t";
743 
744     auto Report = std::make_unique<PathSensitiveBugReport>(*BT_AdditionOverflow,
745                                                            WarningMsg, N);
746     C.emitReport(std::move(Report));
747   }
748 }
749 
750 ProgramStateRef CStringChecker::checkAdditionOverflow(CheckerContext &C,
751                                                      ProgramStateRef state,
752                                                      NonLoc left,
753                                                      NonLoc right) const {
754   // If out-of-bounds checking is turned off, skip the rest.
755   if (!Filter.CheckCStringOutOfBounds)
756     return state;
757 
758   // If a previous check has failed, propagate the failure.
759   if (!state)
760     return nullptr;
761 
762   SValBuilder &svalBuilder = C.getSValBuilder();
763   BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
764 
765   QualType sizeTy = svalBuilder.getContext().getSizeType();
766   const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
767   NonLoc maxVal = svalBuilder.makeIntVal(maxValInt);
768 
769   SVal maxMinusRight;
770   if (isa<nonloc::ConcreteInt>(right)) {
771     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, right,
772                                                  sizeTy);
773   } else {
774     // Try switching the operands. (The order of these two assignments is
775     // important!)
776     maxMinusRight = svalBuilder.evalBinOpNN(state, BO_Sub, maxVal, left,
777                                             sizeTy);
778     left = right;
779   }
780 
781   if (std::optional<NonLoc> maxMinusRightNL = maxMinusRight.getAs<NonLoc>()) {
782     QualType cmpTy = svalBuilder.getConditionType();
783     // If left > max - right, we have an overflow.
784     SVal willOverflow = svalBuilder.evalBinOpNN(state, BO_GT, left,
785                                                 *maxMinusRightNL, cmpTy);
786 
787     ProgramStateRef stateOverflow, stateOkay;
788     std::tie(stateOverflow, stateOkay) =
789       state->assume(willOverflow.castAs<DefinedOrUnknownSVal>());
790 
791     if (stateOverflow && !stateOkay) {
792       // We have an overflow. Emit a bug report.
793       emitAdditionOverflowBug(C, stateOverflow);
794       return nullptr;
795     }
796 
797     // From now on, assume an overflow didn't occur.
798     assert(stateOkay);
799     state = stateOkay;
800   }
801 
802   return state;
803 }
804 
805 ProgramStateRef CStringChecker::setCStringLength(ProgramStateRef state,
806                                                 const MemRegion *MR,
807                                                 SVal strLength) {
808   assert(!strLength.isUndef() && "Attempt to set an undefined string length");
809 
810   MR = MR->StripCasts();
811 
812   switch (MR->getKind()) {
813   case MemRegion::StringRegionKind:
814     // FIXME: This can happen if we strcpy() into a string region. This is
815     // undefined [C99 6.4.5p6], but we should still warn about it.
816     return state;
817 
818   case MemRegion::SymbolicRegionKind:
819   case MemRegion::AllocaRegionKind:
820   case MemRegion::NonParamVarRegionKind:
821   case MemRegion::ParamVarRegionKind:
822   case MemRegion::FieldRegionKind:
823   case MemRegion::ObjCIvarRegionKind:
824     // These are the types we can currently track string lengths for.
825     break;
826 
827   case MemRegion::ElementRegionKind:
828     // FIXME: Handle element regions by upper-bounding the parent region's
829     // string length.
830     return state;
831 
832   default:
833     // Other regions (mostly non-data) can't have a reliable C string length.
834     // For now, just ignore the change.
835     // FIXME: These are rare but not impossible. We should output some kind of
836     // warning for things like strcpy((char[]){'a', 0}, "b");
837     return state;
838   }
839 
840   if (strLength.isUnknown())
841     return state->remove<CStringLength>(MR);
842 
843   return state->set<CStringLength>(MR, strLength);
844 }
845 
846 SVal CStringChecker::getCStringLengthForRegion(CheckerContext &C,
847                                                ProgramStateRef &state,
848                                                const Expr *Ex,
849                                                const MemRegion *MR,
850                                                bool hypothetical) {
851   if (!hypothetical) {
852     // If there's a recorded length, go ahead and return it.
853     const SVal *Recorded = state->get<CStringLength>(MR);
854     if (Recorded)
855       return *Recorded;
856   }
857 
858   // Otherwise, get a new symbol and update the state.
859   SValBuilder &svalBuilder = C.getSValBuilder();
860   QualType sizeTy = svalBuilder.getContext().getSizeType();
861   SVal strLength = svalBuilder.getMetadataSymbolVal(CStringChecker::getTag(),
862                                                     MR, Ex, sizeTy,
863                                                     C.getLocationContext(),
864                                                     C.blockCount());
865 
866   if (!hypothetical) {
867     if (std::optional<NonLoc> strLn = strLength.getAs<NonLoc>()) {
868       // In case of unbounded calls strlen etc bound the range to SIZE_MAX/4
869       BasicValueFactory &BVF = svalBuilder.getBasicValueFactory();
870       const llvm::APSInt &maxValInt = BVF.getMaxValue(sizeTy);
871       llvm::APSInt fourInt = APSIntType(maxValInt).getValue(4);
872       const llvm::APSInt *maxLengthInt = BVF.evalAPSInt(BO_Div, maxValInt,
873                                                         fourInt);
874       NonLoc maxLength = svalBuilder.makeIntVal(*maxLengthInt);
875       SVal evalLength = svalBuilder.evalBinOpNN(state, BO_LE, *strLn,
876                                                 maxLength, sizeTy);
877       state = state->assume(evalLength.castAs<DefinedOrUnknownSVal>(), true);
878     }
879     state = state->set<CStringLength>(MR, strLength);
880   }
881 
882   return strLength;
883 }
884 
885 SVal CStringChecker::getCStringLength(CheckerContext &C, ProgramStateRef &state,
886                                       const Expr *Ex, SVal Buf,
887                                       bool hypothetical) const {
888   const MemRegion *MR = Buf.getAsRegion();
889   if (!MR) {
890     // If we can't get a region, see if it's something we /know/ isn't a
891     // C string. In the context of locations, the only time we can issue such
892     // a warning is for labels.
893     if (std::optional<loc::GotoLabel> Label = Buf.getAs<loc::GotoLabel>()) {
894       if (Filter.CheckCStringNotNullTerm) {
895         SmallString<120> buf;
896         llvm::raw_svector_ostream os(buf);
897         assert(CurrentFunctionDescription);
898         os << "Argument to " << CurrentFunctionDescription
899            << " is the address of the label '" << Label->getLabel()->getName()
900            << "', which is not a null-terminated string";
901 
902         emitNotCStringBug(C, state, Ex, os.str());
903       }
904       return UndefinedVal();
905     }
906 
907     // If it's not a region and not a label, give up.
908     return UnknownVal();
909   }
910 
911   // If we have a region, strip casts from it and see if we can figure out
912   // its length. For anything we can't figure out, just return UnknownVal.
913   MR = MR->StripCasts();
914 
915   switch (MR->getKind()) {
916   case MemRegion::StringRegionKind: {
917     // Modifying the contents of string regions is undefined [C99 6.4.5p6],
918     // so we can assume that the byte length is the correct C string length.
919     SValBuilder &svalBuilder = C.getSValBuilder();
920     QualType sizeTy = svalBuilder.getContext().getSizeType();
921     const StringLiteral *strLit = cast<StringRegion>(MR)->getStringLiteral();
922     return svalBuilder.makeIntVal(strLit->getLength(), sizeTy);
923   }
924   case MemRegion::SymbolicRegionKind:
925   case MemRegion::AllocaRegionKind:
926   case MemRegion::NonParamVarRegionKind:
927   case MemRegion::ParamVarRegionKind:
928   case MemRegion::FieldRegionKind:
929   case MemRegion::ObjCIvarRegionKind:
930     return getCStringLengthForRegion(C, state, Ex, MR, hypothetical);
931   case MemRegion::CompoundLiteralRegionKind:
932     // FIXME: Can we track this? Is it necessary?
933     return UnknownVal();
934   case MemRegion::ElementRegionKind:
935     // FIXME: How can we handle this? It's not good enough to subtract the
936     // offset from the base string length; consider "123\x00567" and &a[5].
937     return UnknownVal();
938   default:
939     // Other regions (mostly non-data) can't have a reliable C string length.
940     // In this case, an error is emitted and UndefinedVal is returned.
941     // The caller should always be prepared to handle this case.
942     if (Filter.CheckCStringNotNullTerm) {
943       SmallString<120> buf;
944       llvm::raw_svector_ostream os(buf);
945 
946       assert(CurrentFunctionDescription);
947       os << "Argument to " << CurrentFunctionDescription << " is ";
948 
949       if (SummarizeRegion(os, C.getASTContext(), MR))
950         os << ", which is not a null-terminated string";
951       else
952         os << "not a null-terminated string";
953 
954       emitNotCStringBug(C, state, Ex, os.str());
955     }
956     return UndefinedVal();
957   }
958 }
959 
960 const StringLiteral *CStringChecker::getCStringLiteral(CheckerContext &C,
961   ProgramStateRef &state, const Expr *expr, SVal val) const {
962 
963   // Get the memory region pointed to by the val.
964   const MemRegion *bufRegion = val.getAsRegion();
965   if (!bufRegion)
966     return nullptr;
967 
968   // Strip casts off the memory region.
969   bufRegion = bufRegion->StripCasts();
970 
971   // Cast the memory region to a string region.
972   const StringRegion *strRegion= dyn_cast<StringRegion>(bufRegion);
973   if (!strRegion)
974     return nullptr;
975 
976   // Return the actual string in the string region.
977   return strRegion->getStringLiteral();
978 }
979 
980 bool CStringChecker::isFirstBufInBound(CheckerContext &C, ProgramStateRef State,
981                                        SVal BufVal, QualType BufTy,
982                                        SVal LengthVal, QualType LengthTy) {
983   // If we do not know that the buffer is long enough we return 'true'.
984   // Otherwise the parent region of this field region would also get
985   // invalidated, which would lead to warnings based on an unknown state.
986 
987   if (LengthVal.isUnknown())
988     return false;
989 
990   // Originally copied from CheckBufferAccess and CheckLocation.
991   SValBuilder &SB = C.getSValBuilder();
992   ASTContext &Ctx = C.getASTContext();
993 
994   QualType PtrTy = Ctx.getPointerType(Ctx.CharTy);
995 
996   std::optional<NonLoc> Length = LengthVal.getAs<NonLoc>();
997   if (!Length)
998     return true; // cf top comment.
999 
1000   // Compute the offset of the last element to be accessed: size-1.
1001   NonLoc One = SB.makeIntVal(1, LengthTy).castAs<NonLoc>();
1002   SVal Offset = SB.evalBinOpNN(State, BO_Sub, *Length, One, LengthTy);
1003   if (Offset.isUnknown())
1004     return true; // cf top comment
1005   NonLoc LastOffset = Offset.castAs<NonLoc>();
1006 
1007   // Check that the first buffer is sufficiently long.
1008   SVal BufStart = SB.evalCast(BufVal, PtrTy, BufTy);
1009   std::optional<Loc> BufLoc = BufStart.getAs<Loc>();
1010   if (!BufLoc)
1011     return true; // cf top comment.
1012 
1013   SVal BufEnd = SB.evalBinOpLN(State, BO_Add, *BufLoc, LastOffset, PtrTy);
1014 
1015   // Check for out of bound array element access.
1016   const MemRegion *R = BufEnd.getAsRegion();
1017   if (!R)
1018     return true; // cf top comment.
1019 
1020   const ElementRegion *ER = dyn_cast<ElementRegion>(R);
1021   if (!ER)
1022     return true; // cf top comment.
1023 
1024   // FIXME: Does this crash when a non-standard definition
1025   // of a library function is encountered?
1026   assert(ER->getValueType() == C.getASTContext().CharTy &&
1027          "isFirstBufInBound should only be called with char* ElementRegions");
1028 
1029   // Get the size of the array.
1030   const SubRegion *superReg = cast<SubRegion>(ER->getSuperRegion());
1031   DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, superReg, SB);
1032 
1033   // Get the index of the accessed element.
1034   DefinedOrUnknownSVal Idx = ER->getIndex().castAs<DefinedOrUnknownSVal>();
1035 
1036   ProgramStateRef StInBound = State->assumeInBound(Idx, SizeDV, true);
1037 
1038   return static_cast<bool>(StInBound);
1039 }
1040 
1041 ProgramStateRef CStringChecker::invalidateDestinationBufferBySize(
1042     CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV,
1043     SVal SizeV, QualType SizeTy) {
1044   auto InvalidationTraitOperations =
1045       [&C, S, BufTy = BufE->getType(), BufV, SizeV,
1046        SizeTy](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1047         // If destination buffer is a field region and access is in bound, do
1048         // not invalidate its super region.
1049         if (MemRegion::FieldRegionKind == R->getKind() &&
1050             isFirstBufInBound(C, S, BufV, BufTy, SizeV, SizeTy)) {
1051           ITraits.setTrait(
1052               R,
1053               RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1054         }
1055         return false;
1056       };
1057 
1058   return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1059 }
1060 
1061 ProgramStateRef
1062 CStringChecker::invalidateDestinationBufferAlwaysEscapeSuperRegion(
1063     CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
1064   auto InvalidationTraitOperations = [](RegionAndSymbolInvalidationTraits &,
1065                                         const MemRegion *R) {
1066     return isa<FieldRegion>(R);
1067   };
1068 
1069   return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1070 }
1071 
1072 ProgramStateRef CStringChecker::invalidateDestinationBufferNeverOverflows(
1073     CheckerContext &C, ProgramStateRef S, const Expr *BufE, SVal BufV) {
1074   auto InvalidationTraitOperations =
1075       [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1076         if (MemRegion::FieldRegionKind == R->getKind())
1077           ITraits.setTrait(
1078               R,
1079               RegionAndSymbolInvalidationTraits::TK_DoNotInvalidateSuperRegion);
1080         return false;
1081       };
1082 
1083   return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1084 }
1085 
1086 ProgramStateRef CStringChecker::invalidateSourceBuffer(CheckerContext &C,
1087                                                        ProgramStateRef S,
1088                                                        const Expr *BufE,
1089                                                        SVal BufV) {
1090   auto InvalidationTraitOperations =
1091       [](RegionAndSymbolInvalidationTraits &ITraits, const MemRegion *R) {
1092         ITraits.setTrait(
1093             R->getBaseRegion(),
1094             RegionAndSymbolInvalidationTraits::TK_PreserveContents);
1095         ITraits.setTrait(R,
1096                          RegionAndSymbolInvalidationTraits::TK_SuppressEscape);
1097         return true;
1098       };
1099 
1100   return invalidateBufferAux(C, S, BufE, BufV, InvalidationTraitOperations);
1101 }
1102 
1103 ProgramStateRef CStringChecker::invalidateBufferAux(
1104     CheckerContext &C, ProgramStateRef State, const Expr *E, SVal V,
1105     llvm::function_ref<bool(RegionAndSymbolInvalidationTraits &,
1106                             const MemRegion *)>
1107         InvalidationTraitOperations) {
1108   std::optional<Loc> L = V.getAs<Loc>();
1109   if (!L)
1110     return State;
1111 
1112   // FIXME: This is a simplified version of what's in CFRefCount.cpp -- it makes
1113   // some assumptions about the value that CFRefCount can't. Even so, it should
1114   // probably be refactored.
1115   if (std::optional<loc::MemRegionVal> MR = L->getAs<loc::MemRegionVal>()) {
1116     const MemRegion *R = MR->getRegion()->StripCasts();
1117 
1118     // Are we dealing with an ElementRegion?  If so, we should be invalidating
1119     // the super-region.
1120     if (const ElementRegion *ER = dyn_cast<ElementRegion>(R)) {
1121       R = ER->getSuperRegion();
1122       // FIXME: What about layers of ElementRegions?
1123     }
1124 
1125     // Invalidate this region.
1126     const LocationContext *LCtx = C.getPredecessor()->getLocationContext();
1127     RegionAndSymbolInvalidationTraits ITraits;
1128     bool CausesPointerEscape = InvalidationTraitOperations(ITraits, R);
1129 
1130     return State->invalidateRegions(R, E, C.blockCount(), LCtx,
1131                                     CausesPointerEscape, nullptr, nullptr,
1132                                     &ITraits);
1133   }
1134 
1135   // If we have a non-region value by chance, just remove the binding.
1136   // FIXME: is this necessary or correct? This handles the non-Region
1137   //  cases.  Is it ever valid to store to these?
1138   return State->killBinding(*L);
1139 }
1140 
1141 bool CStringChecker::SummarizeRegion(raw_ostream &os, ASTContext &Ctx,
1142                                      const MemRegion *MR) {
1143   switch (MR->getKind()) {
1144   case MemRegion::FunctionCodeRegionKind: {
1145     if (const auto *FD = cast<FunctionCodeRegion>(MR)->getDecl())
1146       os << "the address of the function '" << *FD << '\'';
1147     else
1148       os << "the address of a function";
1149     return true;
1150   }
1151   case MemRegion::BlockCodeRegionKind:
1152     os << "block text";
1153     return true;
1154   case MemRegion::BlockDataRegionKind:
1155     os << "a block";
1156     return true;
1157   case MemRegion::CXXThisRegionKind:
1158   case MemRegion::CXXTempObjectRegionKind:
1159     os << "a C++ temp object of type "
1160        << cast<TypedValueRegion>(MR)->getValueType();
1161     return true;
1162   case MemRegion::NonParamVarRegionKind:
1163     os << "a variable of type" << cast<TypedValueRegion>(MR)->getValueType();
1164     return true;
1165   case MemRegion::ParamVarRegionKind:
1166     os << "a parameter of type" << cast<TypedValueRegion>(MR)->getValueType();
1167     return true;
1168   case MemRegion::FieldRegionKind:
1169     os << "a field of type " << cast<TypedValueRegion>(MR)->getValueType();
1170     return true;
1171   case MemRegion::ObjCIvarRegionKind:
1172     os << "an instance variable of type "
1173        << cast<TypedValueRegion>(MR)->getValueType();
1174     return true;
1175   default:
1176     return false;
1177   }
1178 }
1179 
1180 bool CStringChecker::memsetAux(const Expr *DstBuffer, SVal CharVal,
1181                                const Expr *Size, CheckerContext &C,
1182                                ProgramStateRef &State) {
1183   SVal MemVal = C.getSVal(DstBuffer);
1184   SVal SizeVal = C.getSVal(Size);
1185   const MemRegion *MR = MemVal.getAsRegion();
1186   if (!MR)
1187     return false;
1188 
1189   // We're about to model memset by producing a "default binding" in the Store.
1190   // Our current implementation - RegionStore - doesn't support default bindings
1191   // that don't cover the whole base region. So we should first get the offset
1192   // and the base region to figure out whether the offset of buffer is 0.
1193   RegionOffset Offset = MR->getAsOffset();
1194   const MemRegion *BR = Offset.getRegion();
1195 
1196   std::optional<NonLoc> SizeNL = SizeVal.getAs<NonLoc>();
1197   if (!SizeNL)
1198     return false;
1199 
1200   SValBuilder &svalBuilder = C.getSValBuilder();
1201   ASTContext &Ctx = C.getASTContext();
1202 
1203   // void *memset(void *dest, int ch, size_t count);
1204   // For now we can only handle the case of offset is 0 and concrete char value.
1205   if (Offset.isValid() && !Offset.hasSymbolicOffset() &&
1206       Offset.getOffset() == 0) {
1207     // Get the base region's size.
1208     DefinedOrUnknownSVal SizeDV = getDynamicExtent(State, BR, svalBuilder);
1209 
1210     ProgramStateRef StateWholeReg, StateNotWholeReg;
1211     std::tie(StateWholeReg, StateNotWholeReg) =
1212         State->assume(svalBuilder.evalEQ(State, SizeDV, *SizeNL));
1213 
1214     // With the semantic of 'memset()', we should convert the CharVal to
1215     // unsigned char.
1216     CharVal = svalBuilder.evalCast(CharVal, Ctx.UnsignedCharTy, Ctx.IntTy);
1217 
1218     ProgramStateRef StateNullChar, StateNonNullChar;
1219     std::tie(StateNullChar, StateNonNullChar) =
1220         assumeZero(C, State, CharVal, Ctx.UnsignedCharTy);
1221 
1222     if (StateWholeReg && !StateNotWholeReg && StateNullChar &&
1223         !StateNonNullChar) {
1224       // If the 'memset()' acts on the whole region of destination buffer and
1225       // the value of the second argument of 'memset()' is zero, bind the second
1226       // argument's value to the destination buffer with 'default binding'.
1227       // FIXME: Since there is no perfect way to bind the non-zero character, we
1228       // can only deal with zero value here. In the future, we need to deal with
1229       // the binding of non-zero value in the case of whole region.
1230       State = State->bindDefaultZero(svalBuilder.makeLoc(BR),
1231                                      C.getLocationContext());
1232     } else {
1233       // If the destination buffer's extent is not equal to the value of
1234       // third argument, just invalidate buffer.
1235       State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
1236                                                 SizeVal, Size->getType());
1237     }
1238 
1239     if (StateNullChar && !StateNonNullChar) {
1240       // If the value of the second argument of 'memset()' is zero, set the
1241       // string length of destination buffer to 0 directly.
1242       State = setCStringLength(State, MR,
1243                                svalBuilder.makeZeroVal(Ctx.getSizeType()));
1244     } else if (!StateNullChar && StateNonNullChar) {
1245       SVal NewStrLen = svalBuilder.getMetadataSymbolVal(
1246           CStringChecker::getTag(), MR, DstBuffer, Ctx.getSizeType(),
1247           C.getLocationContext(), C.blockCount());
1248 
1249       // If the value of second argument is not zero, then the string length
1250       // is at least the size argument.
1251       SVal NewStrLenGESize = svalBuilder.evalBinOp(
1252           State, BO_GE, NewStrLen, SizeVal, svalBuilder.getConditionType());
1253 
1254       State = setCStringLength(
1255           State->assume(NewStrLenGESize.castAs<DefinedOrUnknownSVal>(), true),
1256           MR, NewStrLen);
1257     }
1258   } else {
1259     // If the offset is not zero and char value is not concrete, we can do
1260     // nothing but invalidate the buffer.
1261     State = invalidateDestinationBufferBySize(C, State, DstBuffer, MemVal,
1262                                               SizeVal, Size->getType());
1263   }
1264   return true;
1265 }
1266 
1267 //===----------------------------------------------------------------------===//
1268 // evaluation of individual function calls.
1269 //===----------------------------------------------------------------------===//
1270 
1271 void CStringChecker::evalCopyCommon(CheckerContext &C, const CallExpr *CE,
1272                                     ProgramStateRef state, SizeArgExpr Size,
1273                                     DestinationArgExpr Dest,
1274                                     SourceArgExpr Source, bool Restricted,
1275                                     bool IsMempcpy, CharKind CK) const {
1276   CurrentFunctionDescription = "memory copy function";
1277 
1278   // See if the size argument is zero.
1279   const LocationContext *LCtx = C.getLocationContext();
1280   SVal sizeVal = state->getSVal(Size.Expression, LCtx);
1281   QualType sizeTy = Size.Expression->getType();
1282 
1283   ProgramStateRef stateZeroSize, stateNonZeroSize;
1284   std::tie(stateZeroSize, stateNonZeroSize) =
1285       assumeZero(C, state, sizeVal, sizeTy);
1286 
1287   // Get the value of the Dest.
1288   SVal destVal = state->getSVal(Dest.Expression, LCtx);
1289 
1290   // If the size is zero, there won't be any actual memory access, so
1291   // just bind the return value to the destination buffer and return.
1292   if (stateZeroSize && !stateNonZeroSize) {
1293     stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, destVal);
1294     C.addTransition(stateZeroSize);
1295     return;
1296   }
1297 
1298   // If the size can be nonzero, we have to check the other arguments.
1299   if (stateNonZeroSize) {
1300     state = stateNonZeroSize;
1301 
1302     // Ensure the destination is not null. If it is NULL there will be a
1303     // NULL pointer dereference.
1304     state = checkNonNull(C, state, Dest, destVal);
1305     if (!state)
1306       return;
1307 
1308     // Get the value of the Src.
1309     SVal srcVal = state->getSVal(Source.Expression, LCtx);
1310 
1311     // Ensure the source is not null. If it is NULL there will be a
1312     // NULL pointer dereference.
1313     state = checkNonNull(C, state, Source, srcVal);
1314     if (!state)
1315       return;
1316 
1317     // Ensure the accesses are valid and that the buffers do not overlap.
1318     state = CheckBufferAccess(C, state, Dest, Size, AccessKind::write, CK);
1319     state = CheckBufferAccess(C, state, Source, Size, AccessKind::read, CK);
1320 
1321     if (Restricted)
1322       state = CheckOverlap(C, state, Size, Dest, Source, CK);
1323 
1324     if (!state)
1325       return;
1326 
1327     // If this is mempcpy, get the byte after the last byte copied and
1328     // bind the expr.
1329     if (IsMempcpy) {
1330       // Get the byte after the last byte copied.
1331       SValBuilder &SvalBuilder = C.getSValBuilder();
1332       ASTContext &Ctx = SvalBuilder.getContext();
1333       QualType CharPtrTy = getCharPtrType(Ctx, CK);
1334       SVal DestRegCharVal =
1335           SvalBuilder.evalCast(destVal, CharPtrTy, Dest.Expression->getType());
1336       SVal lastElement = C.getSValBuilder().evalBinOp(
1337           state, BO_Add, DestRegCharVal, sizeVal, Dest.Expression->getType());
1338       // If we don't know how much we copied, we can at least
1339       // conjure a return value for later.
1340       if (lastElement.isUnknown())
1341         lastElement = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1342                                                           C.blockCount());
1343 
1344       // The byte after the last byte copied is the return value.
1345       state = state->BindExpr(CE, LCtx, lastElement);
1346     } else {
1347       // All other copies return the destination buffer.
1348       // (Well, bcopy() has a void return type, but this won't hurt.)
1349       state = state->BindExpr(CE, LCtx, destVal);
1350     }
1351 
1352     // Invalidate the destination (regular invalidation without pointer-escaping
1353     // the address of the top-level region).
1354     // FIXME: Even if we can't perfectly model the copy, we should see if we
1355     // can use LazyCompoundVals to copy the source values into the destination.
1356     // This would probably remove any existing bindings past the end of the
1357     // copied region, but that's still an improvement over blank invalidation.
1358     state = invalidateDestinationBufferBySize(
1359         C, state, Dest.Expression, C.getSVal(Dest.Expression), sizeVal,
1360         Size.Expression->getType());
1361 
1362     // Invalidate the source (const-invalidation without const-pointer-escaping
1363     // the address of the top-level region).
1364     state = invalidateSourceBuffer(C, state, Source.Expression,
1365                                    C.getSVal(Source.Expression));
1366 
1367     C.addTransition(state);
1368   }
1369 }
1370 
1371 void CStringChecker::evalMemcpy(CheckerContext &C, const CallExpr *CE,
1372                                 CharKind CK) const {
1373   // void *memcpy(void *restrict dst, const void *restrict src, size_t n);
1374   // The return value is the address of the destination buffer.
1375   DestinationArgExpr Dest = {{CE->getArg(0), 0}};
1376   SourceArgExpr Src = {{CE->getArg(1), 1}};
1377   SizeArgExpr Size = {{CE->getArg(2), 2}};
1378 
1379   ProgramStateRef State = C.getState();
1380 
1381   constexpr bool IsRestricted = true;
1382   constexpr bool IsMempcpy = false;
1383   evalCopyCommon(C, CE, State, Size, Dest, Src, IsRestricted, IsMempcpy, CK);
1384 }
1385 
1386 void CStringChecker::evalMempcpy(CheckerContext &C, const CallExpr *CE,
1387                                  CharKind CK) const {
1388   // void *mempcpy(void *restrict dst, const void *restrict src, size_t n);
1389   // The return value is a pointer to the byte following the last written byte.
1390   DestinationArgExpr Dest = {{CE->getArg(0), 0}};
1391   SourceArgExpr Src = {{CE->getArg(1), 1}};
1392   SizeArgExpr Size = {{CE->getArg(2), 2}};
1393 
1394   constexpr bool IsRestricted = true;
1395   constexpr bool IsMempcpy = true;
1396   evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy,
1397                  CK);
1398 }
1399 
1400 void CStringChecker::evalMemmove(CheckerContext &C, const CallExpr *CE,
1401                                  CharKind CK) const {
1402   // void *memmove(void *dst, const void *src, size_t n);
1403   // The return value is the address of the destination buffer.
1404   DestinationArgExpr Dest = {{CE->getArg(0), 0}};
1405   SourceArgExpr Src = {{CE->getArg(1), 1}};
1406   SizeArgExpr Size = {{CE->getArg(2), 2}};
1407 
1408   constexpr bool IsRestricted = false;
1409   constexpr bool IsMempcpy = false;
1410   evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy,
1411                  CK);
1412 }
1413 
1414 void CStringChecker::evalBcopy(CheckerContext &C, const CallExpr *CE) const {
1415   // void bcopy(const void *src, void *dst, size_t n);
1416   SourceArgExpr Src{{CE->getArg(0), 0}};
1417   DestinationArgExpr Dest = {{CE->getArg(1), 1}};
1418   SizeArgExpr Size = {{CE->getArg(2), 2}};
1419 
1420   constexpr bool IsRestricted = false;
1421   constexpr bool IsMempcpy = false;
1422   evalCopyCommon(C, CE, C.getState(), Size, Dest, Src, IsRestricted, IsMempcpy,
1423                  CharKind::Regular);
1424 }
1425 
1426 void CStringChecker::evalMemcmp(CheckerContext &C, const CallExpr *CE,
1427                                 CharKind CK) const {
1428   // int memcmp(const void *s1, const void *s2, size_t n);
1429   CurrentFunctionDescription = "memory comparison function";
1430 
1431   AnyArgExpr Left = {CE->getArg(0), 0};
1432   AnyArgExpr Right = {CE->getArg(1), 1};
1433   SizeArgExpr Size = {{CE->getArg(2), 2}};
1434 
1435   ProgramStateRef State = C.getState();
1436   SValBuilder &Builder = C.getSValBuilder();
1437   const LocationContext *LCtx = C.getLocationContext();
1438 
1439   // See if the size argument is zero.
1440   SVal sizeVal = State->getSVal(Size.Expression, LCtx);
1441   QualType sizeTy = Size.Expression->getType();
1442 
1443   ProgramStateRef stateZeroSize, stateNonZeroSize;
1444   std::tie(stateZeroSize, stateNonZeroSize) =
1445       assumeZero(C, State, sizeVal, sizeTy);
1446 
1447   // If the size can be zero, the result will be 0 in that case, and we don't
1448   // have to check either of the buffers.
1449   if (stateZeroSize) {
1450     State = stateZeroSize;
1451     State = State->BindExpr(CE, LCtx, Builder.makeZeroVal(CE->getType()));
1452     C.addTransition(State);
1453   }
1454 
1455   // If the size can be nonzero, we have to check the other arguments.
1456   if (stateNonZeroSize) {
1457     State = stateNonZeroSize;
1458     // If we know the two buffers are the same, we know the result is 0.
1459     // First, get the two buffers' addresses. Another checker will have already
1460     // made sure they're not undefined.
1461     DefinedOrUnknownSVal LV =
1462         State->getSVal(Left.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1463     DefinedOrUnknownSVal RV =
1464         State->getSVal(Right.Expression, LCtx).castAs<DefinedOrUnknownSVal>();
1465 
1466     // See if they are the same.
1467     ProgramStateRef SameBuffer, NotSameBuffer;
1468     std::tie(SameBuffer, NotSameBuffer) =
1469         State->assume(Builder.evalEQ(State, LV, RV));
1470 
1471     // If the two arguments are the same buffer, we know the result is 0,
1472     // and we only need to check one size.
1473     if (SameBuffer && !NotSameBuffer) {
1474       State = SameBuffer;
1475       State = CheckBufferAccess(C, State, Left, Size, AccessKind::read);
1476       if (State) {
1477         State =
1478             SameBuffer->BindExpr(CE, LCtx, Builder.makeZeroVal(CE->getType()));
1479         C.addTransition(State);
1480       }
1481       return;
1482     }
1483 
1484     // If the two arguments might be different buffers, we have to check
1485     // the size of both of them.
1486     assert(NotSameBuffer);
1487     State = CheckBufferAccess(C, State, Right, Size, AccessKind::read, CK);
1488     State = CheckBufferAccess(C, State, Left, Size, AccessKind::read, CK);
1489     if (State) {
1490       // The return value is the comparison result, which we don't know.
1491       SVal CmpV = Builder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
1492       State = State->BindExpr(CE, LCtx, CmpV);
1493       C.addTransition(State);
1494     }
1495   }
1496 }
1497 
1498 void CStringChecker::evalstrLength(CheckerContext &C,
1499                                    const CallExpr *CE) const {
1500   // size_t strlen(const char *s);
1501   evalstrLengthCommon(C, CE, /* IsStrnlen = */ false);
1502 }
1503 
1504 void CStringChecker::evalstrnLength(CheckerContext &C,
1505                                     const CallExpr *CE) const {
1506   // size_t strnlen(const char *s, size_t maxlen);
1507   evalstrLengthCommon(C, CE, /* IsStrnlen = */ true);
1508 }
1509 
1510 void CStringChecker::evalstrLengthCommon(CheckerContext &C, const CallExpr *CE,
1511                                          bool IsStrnlen) const {
1512   CurrentFunctionDescription = "string length function";
1513   ProgramStateRef state = C.getState();
1514   const LocationContext *LCtx = C.getLocationContext();
1515 
1516   if (IsStrnlen) {
1517     const Expr *maxlenExpr = CE->getArg(1);
1518     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1519 
1520     ProgramStateRef stateZeroSize, stateNonZeroSize;
1521     std::tie(stateZeroSize, stateNonZeroSize) =
1522       assumeZero(C, state, maxlenVal, maxlenExpr->getType());
1523 
1524     // If the size can be zero, the result will be 0 in that case, and we don't
1525     // have to check the string itself.
1526     if (stateZeroSize) {
1527       SVal zero = C.getSValBuilder().makeZeroVal(CE->getType());
1528       stateZeroSize = stateZeroSize->BindExpr(CE, LCtx, zero);
1529       C.addTransition(stateZeroSize);
1530     }
1531 
1532     // If the size is GUARANTEED to be zero, we're done!
1533     if (!stateNonZeroSize)
1534       return;
1535 
1536     // Otherwise, record the assumption that the size is nonzero.
1537     state = stateNonZeroSize;
1538   }
1539 
1540   // Check that the string argument is non-null.
1541   AnyArgExpr Arg = {CE->getArg(0), 0};
1542   SVal ArgVal = state->getSVal(Arg.Expression, LCtx);
1543   state = checkNonNull(C, state, Arg, ArgVal);
1544 
1545   if (!state)
1546     return;
1547 
1548   SVal strLength = getCStringLength(C, state, Arg.Expression, ArgVal);
1549 
1550   // If the argument isn't a valid C string, there's no valid state to
1551   // transition to.
1552   if (strLength.isUndef())
1553     return;
1554 
1555   DefinedOrUnknownSVal result = UnknownVal();
1556 
1557   // If the check is for strnlen() then bind the return value to no more than
1558   // the maxlen value.
1559   if (IsStrnlen) {
1560     QualType cmpTy = C.getSValBuilder().getConditionType();
1561 
1562     // It's a little unfortunate to be getting this again,
1563     // but it's not that expensive...
1564     const Expr *maxlenExpr = CE->getArg(1);
1565     SVal maxlenVal = state->getSVal(maxlenExpr, LCtx);
1566 
1567     std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1568     std::optional<NonLoc> maxlenValNL = maxlenVal.getAs<NonLoc>();
1569 
1570     if (strLengthNL && maxlenValNL) {
1571       ProgramStateRef stateStringTooLong, stateStringNotTooLong;
1572 
1573       // Check if the strLength is greater than the maxlen.
1574       std::tie(stateStringTooLong, stateStringNotTooLong) = state->assume(
1575           C.getSValBuilder()
1576               .evalBinOpNN(state, BO_GT, *strLengthNL, *maxlenValNL, cmpTy)
1577               .castAs<DefinedOrUnknownSVal>());
1578 
1579       if (stateStringTooLong && !stateStringNotTooLong) {
1580         // If the string is longer than maxlen, return maxlen.
1581         result = *maxlenValNL;
1582       } else if (stateStringNotTooLong && !stateStringTooLong) {
1583         // If the string is shorter than maxlen, return its length.
1584         result = *strLengthNL;
1585       }
1586     }
1587 
1588     if (result.isUnknown()) {
1589       // If we don't have enough information for a comparison, there's
1590       // no guarantee the full string length will actually be returned.
1591       // All we know is the return value is the min of the string length
1592       // and the limit. This is better than nothing.
1593       result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1594                                                    C.blockCount());
1595       NonLoc resultNL = result.castAs<NonLoc>();
1596 
1597       if (strLengthNL) {
1598         state = state->assume(C.getSValBuilder().evalBinOpNN(
1599                                   state, BO_LE, resultNL, *strLengthNL, cmpTy)
1600                                   .castAs<DefinedOrUnknownSVal>(), true);
1601       }
1602 
1603       if (maxlenValNL) {
1604         state = state->assume(C.getSValBuilder().evalBinOpNN(
1605                                   state, BO_LE, resultNL, *maxlenValNL, cmpTy)
1606                                   .castAs<DefinedOrUnknownSVal>(), true);
1607       }
1608     }
1609 
1610   } else {
1611     // This is a plain strlen(), not strnlen().
1612     result = strLength.castAs<DefinedOrUnknownSVal>();
1613 
1614     // If we don't know the length of the string, conjure a return
1615     // value, so it can be used in constraints, at least.
1616     if (result.isUnknown()) {
1617       result = C.getSValBuilder().conjureSymbolVal(nullptr, CE, LCtx,
1618                                                    C.blockCount());
1619     }
1620   }
1621 
1622   // Bind the return value.
1623   assert(!result.isUnknown() && "Should have conjured a value by now");
1624   state = state->BindExpr(CE, LCtx, result);
1625   C.addTransition(state);
1626 }
1627 
1628 void CStringChecker::evalStrcpy(CheckerContext &C, const CallExpr *CE) const {
1629   // char *strcpy(char *restrict dst, const char *restrict src);
1630   evalStrcpyCommon(C, CE,
1631                    /* ReturnEnd = */ false,
1632                    /* IsBounded = */ false,
1633                    /* appendK = */ ConcatFnKind::none);
1634 }
1635 
1636 void CStringChecker::evalStrncpy(CheckerContext &C, const CallExpr *CE) const {
1637   // char *strncpy(char *restrict dst, const char *restrict src, size_t n);
1638   evalStrcpyCommon(C, CE,
1639                    /* ReturnEnd = */ false,
1640                    /* IsBounded = */ true,
1641                    /* appendK = */ ConcatFnKind::none);
1642 }
1643 
1644 void CStringChecker::evalStpcpy(CheckerContext &C, const CallExpr *CE) const {
1645   // char *stpcpy(char *restrict dst, const char *restrict src);
1646   evalStrcpyCommon(C, CE,
1647                    /* ReturnEnd = */ true,
1648                    /* IsBounded = */ false,
1649                    /* appendK = */ ConcatFnKind::none);
1650 }
1651 
1652 void CStringChecker::evalStrlcpy(CheckerContext &C, const CallExpr *CE) const {
1653   // size_t strlcpy(char *dest, const char *src, size_t size);
1654   evalStrcpyCommon(C, CE,
1655                    /* ReturnEnd = */ true,
1656                    /* IsBounded = */ true,
1657                    /* appendK = */ ConcatFnKind::none,
1658                    /* returnPtr = */ false);
1659 }
1660 
1661 void CStringChecker::evalStrcat(CheckerContext &C, const CallExpr *CE) const {
1662   // char *strcat(char *restrict s1, const char *restrict s2);
1663   evalStrcpyCommon(C, CE,
1664                    /* ReturnEnd = */ false,
1665                    /* IsBounded = */ false,
1666                    /* appendK = */ ConcatFnKind::strcat);
1667 }
1668 
1669 void CStringChecker::evalStrncat(CheckerContext &C, const CallExpr *CE) const {
1670   // char *strncat(char *restrict s1, const char *restrict s2, size_t n);
1671   evalStrcpyCommon(C, CE,
1672                    /* ReturnEnd = */ false,
1673                    /* IsBounded = */ true,
1674                    /* appendK = */ ConcatFnKind::strcat);
1675 }
1676 
1677 void CStringChecker::evalStrlcat(CheckerContext &C, const CallExpr *CE) const {
1678   // size_t strlcat(char *dst, const char *src, size_t size);
1679   // It will append at most size - strlen(dst) - 1 bytes,
1680   // NULL-terminating the result.
1681   evalStrcpyCommon(C, CE,
1682                    /* ReturnEnd = */ false,
1683                    /* IsBounded = */ true,
1684                    /* appendK = */ ConcatFnKind::strlcat,
1685                    /* returnPtr = */ false);
1686 }
1687 
1688 void CStringChecker::evalStrcpyCommon(CheckerContext &C, const CallExpr *CE,
1689                                       bool ReturnEnd, bool IsBounded,
1690                                       ConcatFnKind appendK,
1691                                       bool returnPtr) const {
1692   if (appendK == ConcatFnKind::none)
1693     CurrentFunctionDescription = "string copy function";
1694   else
1695     CurrentFunctionDescription = "string concatenation function";
1696 
1697   ProgramStateRef state = C.getState();
1698   const LocationContext *LCtx = C.getLocationContext();
1699 
1700   // Check that the destination is non-null.
1701   DestinationArgExpr Dst = {{CE->getArg(0), 0}};
1702   SVal DstVal = state->getSVal(Dst.Expression, LCtx);
1703   state = checkNonNull(C, state, Dst, DstVal);
1704   if (!state)
1705     return;
1706 
1707   // Check that the source is non-null.
1708   SourceArgExpr srcExpr = {{CE->getArg(1), 1}};
1709   SVal srcVal = state->getSVal(srcExpr.Expression, LCtx);
1710   state = checkNonNull(C, state, srcExpr, srcVal);
1711   if (!state)
1712     return;
1713 
1714   // Get the string length of the source.
1715   SVal strLength = getCStringLength(C, state, srcExpr.Expression, srcVal);
1716   std::optional<NonLoc> strLengthNL = strLength.getAs<NonLoc>();
1717 
1718   // Get the string length of the destination buffer.
1719   SVal dstStrLength = getCStringLength(C, state, Dst.Expression, DstVal);
1720   std::optional<NonLoc> dstStrLengthNL = dstStrLength.getAs<NonLoc>();
1721 
1722   // If the source isn't a valid C string, give up.
1723   if (strLength.isUndef())
1724     return;
1725 
1726   SValBuilder &svalBuilder = C.getSValBuilder();
1727   QualType cmpTy = svalBuilder.getConditionType();
1728   QualType sizeTy = svalBuilder.getContext().getSizeType();
1729 
1730   // These two values allow checking two kinds of errors:
1731   // - actual overflows caused by a source that doesn't fit in the destination
1732   // - potential overflows caused by a bound that could exceed the destination
1733   SVal amountCopied = UnknownVal();
1734   SVal maxLastElementIndex = UnknownVal();
1735   const char *boundWarning = nullptr;
1736 
1737   // FIXME: Why do we choose the srcExpr if the access has no size?
1738   //  Note that the 3rd argument of the call would be the size parameter.
1739   SizeArgExpr SrcExprAsSizeDummy = {
1740       {srcExpr.Expression, srcExpr.ArgumentIndex}};
1741   state = CheckOverlap(
1742       C, state,
1743       (IsBounded ? SizeArgExpr{{CE->getArg(2), 2}} : SrcExprAsSizeDummy), Dst,
1744       srcExpr);
1745 
1746   if (!state)
1747     return;
1748 
1749   // If the function is strncpy, strncat, etc... it is bounded.
1750   if (IsBounded) {
1751     // Get the max number of characters to copy.
1752     SizeArgExpr lenExpr = {{CE->getArg(2), 2}};
1753     SVal lenVal = state->getSVal(lenExpr.Expression, LCtx);
1754 
1755     // Protect against misdeclared strncpy().
1756     lenVal =
1757         svalBuilder.evalCast(lenVal, sizeTy, lenExpr.Expression->getType());
1758 
1759     std::optional<NonLoc> lenValNL = lenVal.getAs<NonLoc>();
1760 
1761     // If we know both values, we might be able to figure out how much
1762     // we're copying.
1763     if (strLengthNL && lenValNL) {
1764       switch (appendK) {
1765       case ConcatFnKind::none:
1766       case ConcatFnKind::strcat: {
1767         ProgramStateRef stateSourceTooLong, stateSourceNotTooLong;
1768         // Check if the max number to copy is less than the length of the src.
1769         // If the bound is equal to the source length, strncpy won't null-
1770         // terminate the result!
1771         std::tie(stateSourceTooLong, stateSourceNotTooLong) = state->assume(
1772             svalBuilder
1773                 .evalBinOpNN(state, BO_GE, *strLengthNL, *lenValNL, cmpTy)
1774                 .castAs<DefinedOrUnknownSVal>());
1775 
1776         if (stateSourceTooLong && !stateSourceNotTooLong) {
1777           // Max number to copy is less than the length of the src, so the
1778           // actual strLength copied is the max number arg.
1779           state = stateSourceTooLong;
1780           amountCopied = lenVal;
1781 
1782         } else if (!stateSourceTooLong && stateSourceNotTooLong) {
1783           // The source buffer entirely fits in the bound.
1784           state = stateSourceNotTooLong;
1785           amountCopied = strLength;
1786         }
1787         break;
1788       }
1789       case ConcatFnKind::strlcat:
1790         if (!dstStrLengthNL)
1791           return;
1792 
1793         // amountCopied = min (size - dstLen - 1 , srcLen)
1794         SVal freeSpace = svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL,
1795                                                  *dstStrLengthNL, sizeTy);
1796         if (!isa<NonLoc>(freeSpace))
1797           return;
1798         freeSpace =
1799             svalBuilder.evalBinOp(state, BO_Sub, freeSpace,
1800                                   svalBuilder.makeIntVal(1, sizeTy), sizeTy);
1801         std::optional<NonLoc> freeSpaceNL = freeSpace.getAs<NonLoc>();
1802 
1803         // While unlikely, it is possible that the subtraction is
1804         // too complex to compute, let's check whether it succeeded.
1805         if (!freeSpaceNL)
1806           return;
1807         SVal hasEnoughSpace = svalBuilder.evalBinOpNN(
1808             state, BO_LE, *strLengthNL, *freeSpaceNL, cmpTy);
1809 
1810         ProgramStateRef TrueState, FalseState;
1811         std::tie(TrueState, FalseState) =
1812             state->assume(hasEnoughSpace.castAs<DefinedOrUnknownSVal>());
1813 
1814         // srcStrLength <= size - dstStrLength -1
1815         if (TrueState && !FalseState) {
1816           amountCopied = strLength;
1817         }
1818 
1819         // srcStrLength > size - dstStrLength -1
1820         if (!TrueState && FalseState) {
1821           amountCopied = freeSpace;
1822         }
1823 
1824         if (TrueState && FalseState)
1825           amountCopied = UnknownVal();
1826         break;
1827       }
1828     }
1829     // We still want to know if the bound is known to be too large.
1830     if (lenValNL) {
1831       switch (appendK) {
1832       case ConcatFnKind::strcat:
1833         // For strncat, the check is strlen(dst) + lenVal < sizeof(dst)
1834 
1835         // Get the string length of the destination. If the destination is
1836         // memory that can't have a string length, we shouldn't be copying
1837         // into it anyway.
1838         if (dstStrLength.isUndef())
1839           return;
1840 
1841         if (dstStrLengthNL) {
1842           maxLastElementIndex = svalBuilder.evalBinOpNN(
1843               state, BO_Add, *lenValNL, *dstStrLengthNL, sizeTy);
1844 
1845           boundWarning = "Size argument is greater than the free space in the "
1846                          "destination buffer";
1847         }
1848         break;
1849       case ConcatFnKind::none:
1850       case ConcatFnKind::strlcat:
1851         // For strncpy and strlcat, this is just checking
1852         //  that lenVal <= sizeof(dst).
1853         // (Yes, strncpy and strncat differ in how they treat termination.
1854         // strncat ALWAYS terminates, but strncpy doesn't.)
1855 
1856         // We need a special case for when the copy size is zero, in which
1857         // case strncpy will do no work at all. Our bounds check uses n-1
1858         // as the last element accessed, so n == 0 is problematic.
1859         ProgramStateRef StateZeroSize, StateNonZeroSize;
1860         std::tie(StateZeroSize, StateNonZeroSize) =
1861             assumeZero(C, state, *lenValNL, sizeTy);
1862 
1863         // If the size is known to be zero, we're done.
1864         if (StateZeroSize && !StateNonZeroSize) {
1865           if (returnPtr) {
1866             StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, DstVal);
1867           } else {
1868             if (appendK == ConcatFnKind::none) {
1869               // strlcpy returns strlen(src)
1870               StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, strLength);
1871             } else {
1872               // strlcat returns strlen(src) + strlen(dst)
1873               SVal retSize = svalBuilder.evalBinOp(
1874                   state, BO_Add, strLength, dstStrLength, sizeTy);
1875               StateZeroSize = StateZeroSize->BindExpr(CE, LCtx, retSize);
1876             }
1877           }
1878           C.addTransition(StateZeroSize);
1879           return;
1880         }
1881 
1882         // Otherwise, go ahead and figure out the last element we'll touch.
1883         // We don't record the non-zero assumption here because we can't
1884         // be sure. We won't warn on a possible zero.
1885         NonLoc one = svalBuilder.makeIntVal(1, sizeTy).castAs<NonLoc>();
1886         maxLastElementIndex =
1887             svalBuilder.evalBinOpNN(state, BO_Sub, *lenValNL, one, sizeTy);
1888         boundWarning = "Size argument is greater than the length of the "
1889                        "destination buffer";
1890         break;
1891       }
1892     }
1893   } else {
1894     // The function isn't bounded. The amount copied should match the length
1895     // of the source buffer.
1896     amountCopied = strLength;
1897   }
1898 
1899   assert(state);
1900 
1901   // This represents the number of characters copied into the destination
1902   // buffer. (It may not actually be the strlen if the destination buffer
1903   // is not terminated.)
1904   SVal finalStrLength = UnknownVal();
1905   SVal strlRetVal = UnknownVal();
1906 
1907   if (appendK == ConcatFnKind::none && !returnPtr) {
1908     // strlcpy returns the sizeof(src)
1909     strlRetVal = strLength;
1910   }
1911 
1912   // If this is an appending function (strcat, strncat...) then set the
1913   // string length to strlen(src) + strlen(dst) since the buffer will
1914   // ultimately contain both.
1915   if (appendK != ConcatFnKind::none) {
1916     // Get the string length of the destination. If the destination is memory
1917     // that can't have a string length, we shouldn't be copying into it anyway.
1918     if (dstStrLength.isUndef())
1919       return;
1920 
1921     if (appendK == ConcatFnKind::strlcat && dstStrLengthNL && strLengthNL) {
1922       strlRetVal = svalBuilder.evalBinOpNN(state, BO_Add, *strLengthNL,
1923                                            *dstStrLengthNL, sizeTy);
1924     }
1925 
1926     std::optional<NonLoc> amountCopiedNL = amountCopied.getAs<NonLoc>();
1927 
1928     // If we know both string lengths, we might know the final string length.
1929     if (amountCopiedNL && dstStrLengthNL) {
1930       // Make sure the two lengths together don't overflow a size_t.
1931       state = checkAdditionOverflow(C, state, *amountCopiedNL, *dstStrLengthNL);
1932       if (!state)
1933         return;
1934 
1935       finalStrLength = svalBuilder.evalBinOpNN(state, BO_Add, *amountCopiedNL,
1936                                                *dstStrLengthNL, sizeTy);
1937     }
1938 
1939     // If we couldn't get a single value for the final string length,
1940     // we can at least bound it by the individual lengths.
1941     if (finalStrLength.isUnknown()) {
1942       // Try to get a "hypothetical" string length symbol, which we can later
1943       // set as a real value if that turns out to be the case.
1944       finalStrLength = getCStringLength(C, state, CE, DstVal, true);
1945       assert(!finalStrLength.isUndef());
1946 
1947       if (std::optional<NonLoc> finalStrLengthNL =
1948               finalStrLength.getAs<NonLoc>()) {
1949         if (amountCopiedNL && appendK == ConcatFnKind::none) {
1950           // we overwrite dst string with the src
1951           // finalStrLength >= srcStrLength
1952           SVal sourceInResult = svalBuilder.evalBinOpNN(
1953               state, BO_GE, *finalStrLengthNL, *amountCopiedNL, cmpTy);
1954           state = state->assume(sourceInResult.castAs<DefinedOrUnknownSVal>(),
1955                                 true);
1956           if (!state)
1957             return;
1958         }
1959 
1960         if (dstStrLengthNL && appendK != ConcatFnKind::none) {
1961           // we extend the dst string with the src
1962           // finalStrLength >= dstStrLength
1963           SVal destInResult = svalBuilder.evalBinOpNN(state, BO_GE,
1964                                                       *finalStrLengthNL,
1965                                                       *dstStrLengthNL,
1966                                                       cmpTy);
1967           state =
1968               state->assume(destInResult.castAs<DefinedOrUnknownSVal>(), true);
1969           if (!state)
1970             return;
1971         }
1972       }
1973     }
1974 
1975   } else {
1976     // Otherwise, this is a copy-over function (strcpy, strncpy, ...), and
1977     // the final string length will match the input string length.
1978     finalStrLength = amountCopied;
1979   }
1980 
1981   SVal Result;
1982 
1983   if (returnPtr) {
1984     // The final result of the function will either be a pointer past the last
1985     // copied element, or a pointer to the start of the destination buffer.
1986     Result = (ReturnEnd ? UnknownVal() : DstVal);
1987   } else {
1988     if (appendK == ConcatFnKind::strlcat || appendK == ConcatFnKind::none)
1989       //strlcpy, strlcat
1990       Result = strlRetVal;
1991     else
1992       Result = finalStrLength;
1993   }
1994 
1995   assert(state);
1996 
1997   // If the destination is a MemRegion, try to check for a buffer overflow and
1998   // record the new string length.
1999   if (std::optional<loc::MemRegionVal> dstRegVal =
2000           DstVal.getAs<loc::MemRegionVal>()) {
2001     QualType ptrTy = Dst.Expression->getType();
2002 
2003     // If we have an exact value on a bounded copy, use that to check for
2004     // overflows, rather than our estimate about how much is actually copied.
2005     if (std::optional<NonLoc> maxLastNL = maxLastElementIndex.getAs<NonLoc>()) {
2006       SVal maxLastElement =
2007           svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal, *maxLastNL, ptrTy);
2008 
2009       state = CheckLocation(C, state, Dst, maxLastElement, AccessKind::write);
2010       if (!state)
2011         return;
2012     }
2013 
2014     // Then, if the final length is known...
2015     if (std::optional<NonLoc> knownStrLength = finalStrLength.getAs<NonLoc>()) {
2016       SVal lastElement = svalBuilder.evalBinOpLN(state, BO_Add, *dstRegVal,
2017           *knownStrLength, ptrTy);
2018 
2019       // ...and we haven't checked the bound, we'll check the actual copy.
2020       if (!boundWarning) {
2021         state = CheckLocation(C, state, Dst, lastElement, AccessKind::write);
2022         if (!state)
2023           return;
2024       }
2025 
2026       // If this is a stpcpy-style copy, the last element is the return value.
2027       if (returnPtr && ReturnEnd)
2028         Result = lastElement;
2029     }
2030 
2031     // Invalidate the destination (regular invalidation without pointer-escaping
2032     // the address of the top-level region). This must happen before we set the
2033     // C string length because invalidation will clear the length.
2034     // FIXME: Even if we can't perfectly model the copy, we should see if we
2035     // can use LazyCompoundVals to copy the source values into the destination.
2036     // This would probably remove any existing bindings past the end of the
2037     // string, but that's still an improvement over blank invalidation.
2038     state = invalidateDestinationBufferBySize(C, state, Dst.Expression,
2039                                               *dstRegVal, amountCopied,
2040                                               C.getASTContext().getSizeType());
2041 
2042     // Invalidate the source (const-invalidation without const-pointer-escaping
2043     // the address of the top-level region).
2044     state = invalidateSourceBuffer(C, state, srcExpr.Expression, srcVal);
2045 
2046     // Set the C string length of the destination, if we know it.
2047     if (IsBounded && (appendK == ConcatFnKind::none)) {
2048       // strncpy is annoying in that it doesn't guarantee to null-terminate
2049       // the result string. If the original string didn't fit entirely inside
2050       // the bound (including the null-terminator), we don't know how long the
2051       // result is.
2052       if (amountCopied != strLength)
2053         finalStrLength = UnknownVal();
2054     }
2055     state = setCStringLength(state, dstRegVal->getRegion(), finalStrLength);
2056   }
2057 
2058   assert(state);
2059 
2060   if (returnPtr) {
2061     // If this is a stpcpy-style copy, but we were unable to check for a buffer
2062     // overflow, we still need a result. Conjure a return value.
2063     if (ReturnEnd && Result.isUnknown()) {
2064       Result = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
2065     }
2066   }
2067   // Set the return value.
2068   state = state->BindExpr(CE, LCtx, Result);
2069   C.addTransition(state);
2070 }
2071 
2072 void CStringChecker::evalStrcmp(CheckerContext &C, const CallExpr *CE) const {
2073   //int strcmp(const char *s1, const char *s2);
2074   evalStrcmpCommon(C, CE, /* IsBounded = */ false, /* IgnoreCase = */ false);
2075 }
2076 
2077 void CStringChecker::evalStrncmp(CheckerContext &C, const CallExpr *CE) const {
2078   //int strncmp(const char *s1, const char *s2, size_t n);
2079   evalStrcmpCommon(C, CE, /* IsBounded = */ true, /* IgnoreCase = */ false);
2080 }
2081 
2082 void CStringChecker::evalStrcasecmp(CheckerContext &C,
2083     const CallExpr *CE) const {
2084   //int strcasecmp(const char *s1, const char *s2);
2085   evalStrcmpCommon(C, CE, /* IsBounded = */ false, /* IgnoreCase = */ true);
2086 }
2087 
2088 void CStringChecker::evalStrncasecmp(CheckerContext &C,
2089     const CallExpr *CE) const {
2090   //int strncasecmp(const char *s1, const char *s2, size_t n);
2091   evalStrcmpCommon(C, CE, /* IsBounded = */ true, /* IgnoreCase = */ true);
2092 }
2093 
2094 void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE,
2095     bool IsBounded, bool IgnoreCase) const {
2096   CurrentFunctionDescription = "string comparison function";
2097   ProgramStateRef state = C.getState();
2098   const LocationContext *LCtx = C.getLocationContext();
2099 
2100   // Check that the first string is non-null
2101   AnyArgExpr Left = {CE->getArg(0), 0};
2102   SVal LeftVal = state->getSVal(Left.Expression, LCtx);
2103   state = checkNonNull(C, state, Left, LeftVal);
2104   if (!state)
2105     return;
2106 
2107   // Check that the second string is non-null.
2108   AnyArgExpr Right = {CE->getArg(1), 1};
2109   SVal RightVal = state->getSVal(Right.Expression, LCtx);
2110   state = checkNonNull(C, state, Right, RightVal);
2111   if (!state)
2112     return;
2113 
2114   // Get the string length of the first string or give up.
2115   SVal LeftLength = getCStringLength(C, state, Left.Expression, LeftVal);
2116   if (LeftLength.isUndef())
2117     return;
2118 
2119   // Get the string length of the second string or give up.
2120   SVal RightLength = getCStringLength(C, state, Right.Expression, RightVal);
2121   if (RightLength.isUndef())
2122     return;
2123 
2124   // If we know the two buffers are the same, we know the result is 0.
2125   // First, get the two buffers' addresses. Another checker will have already
2126   // made sure they're not undefined.
2127   DefinedOrUnknownSVal LV = LeftVal.castAs<DefinedOrUnknownSVal>();
2128   DefinedOrUnknownSVal RV = RightVal.castAs<DefinedOrUnknownSVal>();
2129 
2130   // See if they are the same.
2131   SValBuilder &svalBuilder = C.getSValBuilder();
2132   DefinedOrUnknownSVal SameBuf = svalBuilder.evalEQ(state, LV, RV);
2133   ProgramStateRef StSameBuf, StNotSameBuf;
2134   std::tie(StSameBuf, StNotSameBuf) = state->assume(SameBuf);
2135 
2136   // If the two arguments might be the same buffer, we know the result is 0,
2137   // and we only need to check one size.
2138   if (StSameBuf) {
2139     StSameBuf = StSameBuf->BindExpr(CE, LCtx,
2140         svalBuilder.makeZeroVal(CE->getType()));
2141     C.addTransition(StSameBuf);
2142 
2143     // If the two arguments are GUARANTEED to be the same, we're done!
2144     if (!StNotSameBuf)
2145       return;
2146   }
2147 
2148   assert(StNotSameBuf);
2149   state = StNotSameBuf;
2150 
2151   // At this point we can go about comparing the two buffers.
2152   // For now, we only do this if they're both known string literals.
2153 
2154   // Attempt to extract string literals from both expressions.
2155   const StringLiteral *LeftStrLiteral =
2156       getCStringLiteral(C, state, Left.Expression, LeftVal);
2157   const StringLiteral *RightStrLiteral =
2158       getCStringLiteral(C, state, Right.Expression, RightVal);
2159   bool canComputeResult = false;
2160   SVal resultVal = svalBuilder.conjureSymbolVal(nullptr, CE, LCtx,
2161       C.blockCount());
2162 
2163   if (LeftStrLiteral && RightStrLiteral) {
2164     StringRef LeftStrRef = LeftStrLiteral->getString();
2165     StringRef RightStrRef = RightStrLiteral->getString();
2166 
2167     if (IsBounded) {
2168       // Get the max number of characters to compare.
2169       const Expr *lenExpr = CE->getArg(2);
2170       SVal lenVal = state->getSVal(lenExpr, LCtx);
2171 
2172       // If the length is known, we can get the right substrings.
2173       if (const llvm::APSInt *len = svalBuilder.getKnownValue(state, lenVal)) {
2174         // Create substrings of each to compare the prefix.
2175         LeftStrRef = LeftStrRef.substr(0, (size_t)len->getZExtValue());
2176         RightStrRef = RightStrRef.substr(0, (size_t)len->getZExtValue());
2177         canComputeResult = true;
2178       }
2179     } else {
2180       // This is a normal, unbounded strcmp.
2181       canComputeResult = true;
2182     }
2183 
2184     if (canComputeResult) {
2185       // Real strcmp stops at null characters.
2186       size_t s1Term = LeftStrRef.find('\0');
2187       if (s1Term != StringRef::npos)
2188         LeftStrRef = LeftStrRef.substr(0, s1Term);
2189 
2190       size_t s2Term = RightStrRef.find('\0');
2191       if (s2Term != StringRef::npos)
2192         RightStrRef = RightStrRef.substr(0, s2Term);
2193 
2194       // Use StringRef's comparison methods to compute the actual result.
2195       int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef)
2196                                   : LeftStrRef.compare(RightStrRef);
2197 
2198       // The strcmp function returns an integer greater than, equal to, or less
2199       // than zero, [c11, p7.24.4.2].
2200       if (compareRes == 0) {
2201         resultVal = svalBuilder.makeIntVal(compareRes, CE->getType());
2202       }
2203       else {
2204         DefinedSVal zeroVal = svalBuilder.makeIntVal(0, CE->getType());
2205         // Constrain strcmp's result range based on the result of StringRef's
2206         // comparison methods.
2207         BinaryOperatorKind op = (compareRes > 0) ? BO_GT : BO_LT;
2208         SVal compareWithZero =
2209           svalBuilder.evalBinOp(state, op, resultVal, zeroVal,
2210               svalBuilder.getConditionType());
2211         DefinedSVal compareWithZeroVal = compareWithZero.castAs<DefinedSVal>();
2212         state = state->assume(compareWithZeroVal, true);
2213       }
2214     }
2215   }
2216 
2217   state = state->BindExpr(CE, LCtx, resultVal);
2218 
2219   // Record this as a possible path.
2220   C.addTransition(state);
2221 }
2222 
2223 void CStringChecker::evalStrsep(CheckerContext &C, const CallExpr *CE) const {
2224   // char *strsep(char **stringp, const char *delim);
2225   // Verify whether the search string parameter matches the return type.
2226   SourceArgExpr SearchStrPtr = {{CE->getArg(0), 0}};
2227 
2228   QualType CharPtrTy = SearchStrPtr.Expression->getType()->getPointeeType();
2229   if (CharPtrTy.isNull() ||
2230       CE->getType().getUnqualifiedType() != CharPtrTy.getUnqualifiedType())
2231     return;
2232 
2233   CurrentFunctionDescription = "strsep()";
2234   ProgramStateRef State = C.getState();
2235   const LocationContext *LCtx = C.getLocationContext();
2236 
2237   // Check that the search string pointer is non-null (though it may point to
2238   // a null string).
2239   SVal SearchStrVal = State->getSVal(SearchStrPtr.Expression, LCtx);
2240   State = checkNonNull(C, State, SearchStrPtr, SearchStrVal);
2241   if (!State)
2242     return;
2243 
2244   // Check that the delimiter string is non-null.
2245   AnyArgExpr DelimStr = {CE->getArg(1), 1};
2246   SVal DelimStrVal = State->getSVal(DelimStr.Expression, LCtx);
2247   State = checkNonNull(C, State, DelimStr, DelimStrVal);
2248   if (!State)
2249     return;
2250 
2251   SValBuilder &SVB = C.getSValBuilder();
2252   SVal Result;
2253   if (std::optional<Loc> SearchStrLoc = SearchStrVal.getAs<Loc>()) {
2254     // Get the current value of the search string pointer, as a char*.
2255     Result = State->getSVal(*SearchStrLoc, CharPtrTy);
2256 
2257     // Invalidate the search string, representing the change of one delimiter
2258     // character to NUL.
2259     // As the replacement never overflows, do not invalidate its super region.
2260     State = invalidateDestinationBufferNeverOverflows(
2261         C, State, SearchStrPtr.Expression, Result);
2262 
2263     // Overwrite the search string pointer. The new value is either an address
2264     // further along in the same string, or NULL if there are no more tokens.
2265     State = State->bindLoc(*SearchStrLoc,
2266         SVB.conjureSymbolVal(getTag(),
2267           CE,
2268           LCtx,
2269           CharPtrTy,
2270           C.blockCount()),
2271         LCtx);
2272   } else {
2273     assert(SearchStrVal.isUnknown());
2274     // Conjure a symbolic value. It's the best we can do.
2275     Result = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
2276   }
2277 
2278   // Set the return value, and finish.
2279   State = State->BindExpr(CE, LCtx, Result);
2280   C.addTransition(State);
2281 }
2282 
2283 // These should probably be moved into a C++ standard library checker.
2284 void CStringChecker::evalStdCopy(CheckerContext &C, const CallExpr *CE) const {
2285   evalStdCopyCommon(C, CE);
2286 }
2287 
2288 void CStringChecker::evalStdCopyBackward(CheckerContext &C,
2289     const CallExpr *CE) const {
2290   evalStdCopyCommon(C, CE);
2291 }
2292 
2293 void CStringChecker::evalStdCopyCommon(CheckerContext &C,
2294     const CallExpr *CE) const {
2295   if (!CE->getArg(2)->getType()->isPointerType())
2296     return;
2297 
2298   ProgramStateRef State = C.getState();
2299 
2300   const LocationContext *LCtx = C.getLocationContext();
2301 
2302   // template <class _InputIterator, class _OutputIterator>
2303   // _OutputIterator
2304   // copy(_InputIterator __first, _InputIterator __last,
2305   //        _OutputIterator __result)
2306 
2307   // Invalidate the destination buffer
2308   const Expr *Dst = CE->getArg(2);
2309   SVal DstVal = State->getSVal(Dst, LCtx);
2310   // FIXME: As we do not know how many items are copied, we also invalidate the
2311   // super region containing the target location.
2312   State =
2313       invalidateDestinationBufferAlwaysEscapeSuperRegion(C, State, Dst, DstVal);
2314 
2315   SValBuilder &SVB = C.getSValBuilder();
2316 
2317   SVal ResultVal = SVB.conjureSymbolVal(nullptr, CE, LCtx, C.blockCount());
2318   State = State->BindExpr(CE, LCtx, ResultVal);
2319 
2320   C.addTransition(State);
2321 }
2322 
2323 void CStringChecker::evalMemset(CheckerContext &C, const CallExpr *CE) const {
2324   // void *memset(void *s, int c, size_t n);
2325   CurrentFunctionDescription = "memory set function";
2326 
2327   DestinationArgExpr Buffer = {{CE->getArg(0), 0}};
2328   AnyArgExpr CharE = {CE->getArg(1), 1};
2329   SizeArgExpr Size = {{CE->getArg(2), 2}};
2330 
2331   ProgramStateRef State = C.getState();
2332 
2333   // See if the size argument is zero.
2334   const LocationContext *LCtx = C.getLocationContext();
2335   SVal SizeVal = C.getSVal(Size.Expression);
2336   QualType SizeTy = Size.Expression->getType();
2337 
2338   ProgramStateRef ZeroSize, NonZeroSize;
2339   std::tie(ZeroSize, NonZeroSize) = assumeZero(C, State, SizeVal, SizeTy);
2340 
2341   // Get the value of the memory area.
2342   SVal BufferPtrVal = C.getSVal(Buffer.Expression);
2343 
2344   // If the size is zero, there won't be any actual memory access, so
2345   // just bind the return value to the buffer and return.
2346   if (ZeroSize && !NonZeroSize) {
2347     ZeroSize = ZeroSize->BindExpr(CE, LCtx, BufferPtrVal);
2348     C.addTransition(ZeroSize);
2349     return;
2350   }
2351 
2352   // Ensure the memory area is not null.
2353   // If it is NULL there will be a NULL pointer dereference.
2354   State = checkNonNull(C, NonZeroSize, Buffer, BufferPtrVal);
2355   if (!State)
2356     return;
2357 
2358   State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2359   if (!State)
2360     return;
2361 
2362   // According to the values of the arguments, bind the value of the second
2363   // argument to the destination buffer and set string length, or just
2364   // invalidate the destination buffer.
2365   if (!memsetAux(Buffer.Expression, C.getSVal(CharE.Expression),
2366                  Size.Expression, C, State))
2367     return;
2368 
2369   State = State->BindExpr(CE, LCtx, BufferPtrVal);
2370   C.addTransition(State);
2371 }
2372 
2373 void CStringChecker::evalBzero(CheckerContext &C, const CallExpr *CE) const {
2374   CurrentFunctionDescription = "memory clearance function";
2375 
2376   DestinationArgExpr Buffer = {{CE->getArg(0), 0}};
2377   SizeArgExpr Size = {{CE->getArg(1), 1}};
2378   SVal Zero = C.getSValBuilder().makeZeroVal(C.getASTContext().IntTy);
2379 
2380   ProgramStateRef State = C.getState();
2381 
2382   // See if the size argument is zero.
2383   SVal SizeVal = C.getSVal(Size.Expression);
2384   QualType SizeTy = Size.Expression->getType();
2385 
2386   ProgramStateRef StateZeroSize, StateNonZeroSize;
2387   std::tie(StateZeroSize, StateNonZeroSize) =
2388     assumeZero(C, State, SizeVal, SizeTy);
2389 
2390   // If the size is zero, there won't be any actual memory access,
2391   // In this case we just return.
2392   if (StateZeroSize && !StateNonZeroSize) {
2393     C.addTransition(StateZeroSize);
2394     return;
2395   }
2396 
2397   // Get the value of the memory area.
2398   SVal MemVal = C.getSVal(Buffer.Expression);
2399 
2400   // Ensure the memory area is not null.
2401   // If it is NULL there will be a NULL pointer dereference.
2402   State = checkNonNull(C, StateNonZeroSize, Buffer, MemVal);
2403   if (!State)
2404     return;
2405 
2406   State = CheckBufferAccess(C, State, Buffer, Size, AccessKind::write);
2407   if (!State)
2408     return;
2409 
2410   if (!memsetAux(Buffer.Expression, Zero, Size.Expression, C, State))
2411     return;
2412 
2413   C.addTransition(State);
2414 }
2415 
2416 void CStringChecker::evalSprintf(CheckerContext &C, const CallExpr *CE) const {
2417   CurrentFunctionDescription = "'sprintf'";
2418   bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___sprintf_chk;
2419   evalSprintfCommon(C, CE, /* IsBounded */ false, IsBI);
2420 }
2421 
2422 void CStringChecker::evalSnprintf(CheckerContext &C, const CallExpr *CE) const {
2423   CurrentFunctionDescription = "'snprintf'";
2424   bool IsBI = CE->getBuiltinCallee() == Builtin::BI__builtin___snprintf_chk;
2425   evalSprintfCommon(C, CE, /* IsBounded */ true, IsBI);
2426 }
2427 
2428 void CStringChecker::evalSprintfCommon(CheckerContext &C, const CallExpr *CE,
2429                                        bool IsBounded, bool IsBuiltin) const {
2430   ProgramStateRef State = C.getState();
2431   DestinationArgExpr Dest = {{CE->getArg(0), 0}};
2432 
2433   const auto NumParams = CE->getCalleeDecl()->getAsFunction()->getNumParams();
2434   assert(CE->getNumArgs() >= NumParams);
2435 
2436   const auto AllArguments =
2437       llvm::make_range(CE->getArgs(), CE->getArgs() + CE->getNumArgs());
2438   const auto VariadicArguments = drop_begin(enumerate(AllArguments), NumParams);
2439 
2440   for (const auto &[ArgIdx, ArgExpr] : VariadicArguments) {
2441     // We consider only string buffers
2442     if (const QualType type = ArgExpr->getType();
2443         !type->isAnyPointerType() ||
2444         !type->getPointeeType()->isAnyCharacterType())
2445       continue;
2446     SourceArgExpr Source = {{ArgExpr, unsigned(ArgIdx)}};
2447 
2448     // Ensure the buffers do not overlap.
2449     SizeArgExpr SrcExprAsSizeDummy = {
2450         {Source.Expression, Source.ArgumentIndex}};
2451     State = CheckOverlap(
2452         C, State,
2453         (IsBounded ? SizeArgExpr{{CE->getArg(1), 1}} : SrcExprAsSizeDummy),
2454         Dest, Source);
2455     if (!State)
2456       return;
2457   }
2458 
2459   C.addTransition(State);
2460 }
2461 
2462 //===----------------------------------------------------------------------===//
2463 // The driver method, and other Checker callbacks.
2464 //===----------------------------------------------------------------------===//
2465 
2466 CStringChecker::FnCheck CStringChecker::identifyCall(const CallEvent &Call,
2467                                                      CheckerContext &C) const {
2468   const auto *CE = dyn_cast_or_null<CallExpr>(Call.getOriginExpr());
2469   if (!CE)
2470     return nullptr;
2471 
2472   const FunctionDecl *FD = dyn_cast_or_null<FunctionDecl>(Call.getDecl());
2473   if (!FD)
2474     return nullptr;
2475 
2476   if (StdCopy.matches(Call))
2477     return &CStringChecker::evalStdCopy;
2478   if (StdCopyBackward.matches(Call))
2479     return &CStringChecker::evalStdCopyBackward;
2480 
2481   // Pro-actively check that argument types are safe to do arithmetic upon.
2482   // We do not want to crash if someone accidentally passes a structure
2483   // into, say, a C++ overload of any of these functions. We could not check
2484   // that for std::copy because they may have arguments of other types.
2485   for (auto I : CE->arguments()) {
2486     QualType T = I->getType();
2487     if (!T->isIntegralOrEnumerationType() && !T->isPointerType())
2488       return nullptr;
2489   }
2490 
2491   const FnCheck *Callback = Callbacks.lookup(Call);
2492   if (Callback)
2493     return *Callback;
2494 
2495   return nullptr;
2496 }
2497 
2498 bool CStringChecker::evalCall(const CallEvent &Call, CheckerContext &C) const {
2499   FnCheck Callback = identifyCall(Call, C);
2500 
2501   // If the callee isn't a string function, let another checker handle it.
2502   if (!Callback)
2503     return false;
2504 
2505   // Check and evaluate the call.
2506   const auto *CE = cast<CallExpr>(Call.getOriginExpr());
2507   Callback(this, C, CE);
2508 
2509   // If the evaluate call resulted in no change, chain to the next eval call
2510   // handler.
2511   // Note, the custom CString evaluation calls assume that basic safety
2512   // properties are held. However, if the user chooses to turn off some of these
2513   // checks, we ignore the issues and leave the call evaluation to a generic
2514   // handler.
2515   return C.isDifferent();
2516 }
2517 
2518 void CStringChecker::checkPreStmt(const DeclStmt *DS, CheckerContext &C) const {
2519   // Record string length for char a[] = "abc";
2520   ProgramStateRef state = C.getState();
2521 
2522   for (const auto *I : DS->decls()) {
2523     const VarDecl *D = dyn_cast<VarDecl>(I);
2524     if (!D)
2525       continue;
2526 
2527     // FIXME: Handle array fields of structs.
2528     if (!D->getType()->isArrayType())
2529       continue;
2530 
2531     const Expr *Init = D->getInit();
2532     if (!Init)
2533       continue;
2534     if (!isa<StringLiteral>(Init))
2535       continue;
2536 
2537     Loc VarLoc = state->getLValue(D, C.getLocationContext());
2538     const MemRegion *MR = VarLoc.getAsRegion();
2539     if (!MR)
2540       continue;
2541 
2542     SVal StrVal = C.getSVal(Init);
2543     assert(StrVal.isValid() && "Initializer string is unknown or undefined");
2544     DefinedOrUnknownSVal strLength =
2545       getCStringLength(C, state, Init, StrVal).castAs<DefinedOrUnknownSVal>();
2546 
2547     state = state->set<CStringLength>(MR, strLength);
2548   }
2549 
2550   C.addTransition(state);
2551 }
2552 
2553 ProgramStateRef
2554 CStringChecker::checkRegionChanges(ProgramStateRef state,
2555     const InvalidatedSymbols *,
2556     ArrayRef<const MemRegion *> ExplicitRegions,
2557     ArrayRef<const MemRegion *> Regions,
2558     const LocationContext *LCtx,
2559     const CallEvent *Call) const {
2560   CStringLengthTy Entries = state->get<CStringLength>();
2561   if (Entries.isEmpty())
2562     return state;
2563 
2564   llvm::SmallPtrSet<const MemRegion *, 8> Invalidated;
2565   llvm::SmallPtrSet<const MemRegion *, 32> SuperRegions;
2566 
2567   // First build sets for the changed regions and their super-regions.
2568   for (const MemRegion *MR : Regions) {
2569     Invalidated.insert(MR);
2570 
2571     SuperRegions.insert(MR);
2572     while (const SubRegion *SR = dyn_cast<SubRegion>(MR)) {
2573       MR = SR->getSuperRegion();
2574       SuperRegions.insert(MR);
2575     }
2576   }
2577 
2578   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2579 
2580   // Then loop over the entries in the current state.
2581   for (const MemRegion *MR : llvm::make_first_range(Entries)) {
2582     // Is this entry for a super-region of a changed region?
2583     if (SuperRegions.count(MR)) {
2584       Entries = F.remove(Entries, MR);
2585       continue;
2586     }
2587 
2588     // Is this entry for a sub-region of a changed region?
2589     const MemRegion *Super = MR;
2590     while (const SubRegion *SR = dyn_cast<SubRegion>(Super)) {
2591       Super = SR->getSuperRegion();
2592       if (Invalidated.count(Super)) {
2593         Entries = F.remove(Entries, MR);
2594         break;
2595       }
2596     }
2597   }
2598 
2599   return state->set<CStringLength>(Entries);
2600 }
2601 
2602 void CStringChecker::checkLiveSymbols(ProgramStateRef state,
2603     SymbolReaper &SR) const {
2604   // Mark all symbols in our string length map as valid.
2605   CStringLengthTy Entries = state->get<CStringLength>();
2606 
2607   for (SVal Len : llvm::make_second_range(Entries)) {
2608     for (SymbolRef Sym : Len.symbols())
2609       SR.markInUse(Sym);
2610   }
2611 }
2612 
2613 void CStringChecker::checkDeadSymbols(SymbolReaper &SR,
2614     CheckerContext &C) const {
2615   ProgramStateRef state = C.getState();
2616   CStringLengthTy Entries = state->get<CStringLength>();
2617   if (Entries.isEmpty())
2618     return;
2619 
2620   CStringLengthTy::Factory &F = state->get_context<CStringLength>();
2621   for (auto [Reg, Len] : Entries) {
2622     if (SymbolRef Sym = Len.getAsSymbol()) {
2623       if (SR.isDead(Sym))
2624         Entries = F.remove(Entries, Reg);
2625     }
2626   }
2627 
2628   state = state->set<CStringLength>(Entries);
2629   C.addTransition(state);
2630 }
2631 
2632 void ento::registerCStringModeling(CheckerManager &Mgr) {
2633   Mgr.registerChecker<CStringChecker>();
2634 }
2635 
2636 bool ento::shouldRegisterCStringModeling(const CheckerManager &mgr) {
2637   return true;
2638 }
2639 
2640 #define REGISTER_CHECKER(name)                                                 \
2641   void ento::register##name(CheckerManager &mgr) {                             \
2642     CStringChecker *checker = mgr.getChecker<CStringChecker>();                \
2643     checker->Filter.Check##name = true;                                        \
2644     checker->Filter.CheckName##name = mgr.getCurrentCheckerName();             \
2645   }                                                                            \
2646                                                                                \
2647   bool ento::shouldRegister##name(const CheckerManager &mgr) { return true; }
2648 
2649 REGISTER_CHECKER(CStringNullArg)
2650 REGISTER_CHECKER(CStringOutOfBounds)
2651 REGISTER_CHECKER(CStringBufferOverlap)
2652 REGISTER_CHECKER(CStringNotNullTerm)
2653 REGISTER_CHECKER(CStringUninitializedRead)
2654