1 //===- SVals.h - Abstract Values for Static Analysis ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines SVal, Loc, and NonLoc, classes that represent
10 //  abstract r-values for use with path-sensitive value tracking.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H
15 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H
16 
17 #include "clang/AST/Expr.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Basic/LLVM.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
21 #include "llvm/ADT/FoldingSet.h"
22 #include "llvm/ADT/ImmutableList.h"
23 #include "llvm/ADT/PointerUnion.h"
24 #include "llvm/ADT/iterator_range.h"
25 #include "llvm/Support/Casting.h"
26 #include <cassert>
27 #include <cstdint>
28 #include <optional>
29 #include <utility>
30 
31 //==------------------------------------------------------------------------==//
32 //  Base SVal types.
33 //==------------------------------------------------------------------------==//
34 
35 namespace clang {
36 
37 class CXXBaseSpecifier;
38 class FunctionDecl;
39 class LabelDecl;
40 
41 namespace ento {
42 
43 class CompoundValData;
44 class LazyCompoundValData;
45 class MemRegion;
46 class PointerToMemberData;
47 class SValBuilder;
48 class TypedValueRegion;
49 
50 namespace nonloc {
51 
52 /// Sub-kinds for NonLoc values.
53 enum Kind {
54 #define NONLOC_SVAL(Id, Parent) Id ## Kind,
55 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
56 };
57 
58 } // namespace nonloc
59 
60 namespace loc {
61 
62 /// Sub-kinds for Loc values.
63 enum Kind {
64 #define LOC_SVAL(Id, Parent) Id ## Kind,
65 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
66 };
67 
68 } // namespace loc
69 
70 /// SVal - This represents a symbolic expression, which can be either
71 ///  an L-value or an R-value.
72 ///
73 class SVal {
74 public:
75   enum BaseKind {
76     // The enumerators must be representable using 2 bits.
77 #define BASIC_SVAL(Id, Parent) Id ## Kind,
78 #define ABSTRACT_SVAL_WITH_KIND(Id, Parent) Id ## Kind,
79 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
80   };
81   enum { BaseBits = 2, BaseMask = 0b11 };
82 
83 protected:
84   const void *Data = nullptr;
85 
86   /// The lowest 2 bits are a BaseKind (0 -- 3).
87   ///  The higher bits are an unsigned "kind" value.
88   unsigned Kind = 0;
89 
90   explicit SVal(const void *d, bool isLoc, unsigned ValKind)
91       : Data(d), Kind((isLoc ? LocKind : NonLocKind) | (ValKind << BaseBits)) {}
92 
93   explicit SVal(BaseKind k, const void *D = nullptr) : Data(D), Kind(k) {}
94 
95 public:
96   explicit SVal() = default;
97 
98   /// Convert to the specified SVal type, asserting that this SVal is of
99   /// the desired type.
100   template <typename T> T castAs() const { return llvm::cast<T>(*this); }
101 
102   /// Convert to the specified SVal type, returning std::nullopt if this SVal is
103   /// not of the desired type.
104   template <typename T> std::optional<T> getAs() const {
105     return llvm::dyn_cast<T>(*this);
106   }
107 
108   unsigned getRawKind() const { return Kind; }
109   BaseKind getBaseKind() const { return (BaseKind) (Kind & BaseMask); }
110   unsigned getSubKind() const { return Kind >> BaseBits; }
111 
112   // This method is required for using SVal in a FoldingSetNode.  It
113   // extracts a unique signature for this SVal object.
114   void Profile(llvm::FoldingSetNodeID &ID) const {
115     ID.AddInteger((unsigned) getRawKind());
116     ID.AddPointer(Data);
117   }
118 
119   bool operator==(SVal R) const {
120     return getRawKind() == R.getRawKind() && Data == R.Data;
121   }
122 
123   bool operator!=(SVal R) const { return !(*this == R); }
124 
125   bool isUnknown() const {
126     return getRawKind() == UnknownValKind;
127   }
128 
129   bool isUndef() const {
130     return getRawKind() == UndefinedValKind;
131   }
132 
133   bool isUnknownOrUndef() const {
134     return getRawKind() <= UnknownValKind;
135   }
136 
137   bool isValid() const {
138     return getRawKind() > UnknownValKind;
139   }
140 
141   bool isConstant() const;
142 
143   bool isConstant(int I) const;
144 
145   bool isZeroConstant() const;
146 
147   /// getAsFunctionDecl - If this SVal is a MemRegionVal and wraps a
148   /// CodeTextRegion wrapping a FunctionDecl, return that FunctionDecl.
149   /// Otherwise return 0.
150   const FunctionDecl *getAsFunctionDecl() const;
151 
152   /// If this SVal is a location and wraps a symbol, return that
153   ///  SymbolRef. Otherwise return 0.
154   ///
155   /// Casts are ignored during lookup.
156   /// \param IncludeBaseRegions The boolean that controls whether the search
157   /// should continue to the base regions if the region is not symbolic.
158   SymbolRef getAsLocSymbol(bool IncludeBaseRegions = false) const;
159 
160   /// Get the symbol in the SVal or its base region.
161   SymbolRef getLocSymbolInBase() const;
162 
163   /// If this SVal wraps a symbol return that SymbolRef.
164   /// Otherwise, return 0.
165   ///
166   /// Casts are ignored during lookup.
167   /// \param IncludeBaseRegions The boolean that controls whether the search
168   /// should continue to the base regions if the region is not symbolic.
169   SymbolRef getAsSymbol(bool IncludeBaseRegions = false) const;
170 
171   /// If this SVal is loc::ConcreteInt or nonloc::ConcreteInt,
172   /// return a pointer to APSInt which is held in it.
173   /// Otherwise, return nullptr.
174   const llvm::APSInt *getAsInteger() const;
175 
176   const MemRegion *getAsRegion() const;
177 
178   /// printJson - Pretty-prints in JSON format.
179   void printJson(raw_ostream &Out, bool AddQuotes) const;
180 
181   void dumpToStream(raw_ostream &OS) const;
182   void dump() const;
183 
184   llvm::iterator_range<SymExpr::symbol_iterator> symbols() const {
185     if (const SymExpr *SE = getAsSymbol(/*IncludeBaseRegions=*/true))
186       return SE->symbols();
187     SymExpr::symbol_iterator end{};
188     return llvm::make_range(end, end);
189   }
190 
191   /// Try to get a reasonable type for the given value.
192   ///
193   /// \returns The best approximation of the value type or Null.
194   /// In theory, all symbolic values should be typed, but this function
195   /// is still a WIP and might have a few blind spots.
196   ///
197   /// \note This function should not be used when the user has access to the
198   /// bound expression AST node as well, since AST always has exact types.
199   ///
200   /// \note Loc values are interpreted as pointer rvalues for the purposes of
201   /// this method.
202   QualType getType(const ASTContext &) const;
203 };
204 
205 inline raw_ostream &operator<<(raw_ostream &os, clang::ento::SVal V) {
206   V.dumpToStream(os);
207   return os;
208 }
209 
210 class UndefinedVal : public SVal {
211 public:
212   UndefinedVal() : SVal(UndefinedValKind) {}
213   static bool classof(SVal V) { return V.getBaseKind() == UndefinedValKind; }
214 };
215 
216 class DefinedOrUnknownSVal : public SVal {
217 public:
218   // We want calling these methods to be a compiler error since they are
219   // tautologically false.
220   bool isUndef() const = delete;
221   bool isValid() const = delete;
222 
223   static bool classof(SVal V) { return !V.isUndef(); }
224 
225 protected:
226   explicit DefinedOrUnknownSVal(const void *d, bool isLoc, unsigned ValKind)
227       : SVal(d, isLoc, ValKind) {}
228   explicit DefinedOrUnknownSVal(BaseKind k, void *D = nullptr) : SVal(k, D) {}
229 };
230 
231 class UnknownVal : public DefinedOrUnknownSVal {
232 public:
233   explicit UnknownVal() : DefinedOrUnknownSVal(UnknownValKind) {}
234 
235   static bool classof(SVal V) { return V.getBaseKind() == UnknownValKind; }
236 };
237 
238 class DefinedSVal : public DefinedOrUnknownSVal {
239 public:
240   // We want calling these methods to be a compiler error since they are
241   // tautologically true/false.
242   bool isUnknown() const = delete;
243   bool isUnknownOrUndef() const = delete;
244   bool isValid() const = delete;
245 
246   static bool classof(SVal V) { return !V.isUnknownOrUndef(); }
247 
248 protected:
249   explicit DefinedSVal(const void *d, bool isLoc, unsigned ValKind)
250       : DefinedOrUnknownSVal(d, isLoc, ValKind) {}
251 };
252 
253 /// Represents an SVal that is guaranteed to not be UnknownVal.
254 class KnownSVal : public SVal {
255 public:
256   KnownSVal(const DefinedSVal &V) : SVal(V) {}
257   KnownSVal(const UndefinedVal &V) : SVal(V) {}
258   static bool classof(SVal V) { return !V.isUnknown(); }
259 };
260 
261 class NonLoc : public DefinedSVal {
262 protected:
263   explicit NonLoc(unsigned SubKind, const void *d)
264       : DefinedSVal(d, false, SubKind) {}
265 
266 public:
267   void dumpToStream(raw_ostream &Out) const;
268 
269   static bool isCompoundType(QualType T) {
270     return T->isArrayType() || T->isRecordType() ||
271            T->isAnyComplexType() || T->isVectorType();
272   }
273 
274   static bool classof(SVal V) { return V.getBaseKind() == NonLocKind; }
275 };
276 
277 class Loc : public DefinedSVal {
278 protected:
279   explicit Loc(unsigned SubKind, const void *D)
280       : DefinedSVal(const_cast<void *>(D), true, SubKind) {}
281 
282 public:
283   void dumpToStream(raw_ostream &Out) const;
284 
285   static bool isLocType(QualType T) {
286     return T->isAnyPointerType() || T->isBlockPointerType() ||
287            T->isReferenceType() || T->isNullPtrType();
288   }
289 
290   static bool classof(SVal V) { return V.getBaseKind() == LocKind; }
291 };
292 
293 //==------------------------------------------------------------------------==//
294 //  Subclasses of NonLoc.
295 //==------------------------------------------------------------------------==//
296 
297 namespace nonloc {
298 
299 /// Represents symbolic expression that isn't a location.
300 class SymbolVal : public NonLoc {
301 public:
302   SymbolVal() = delete;
303   SymbolVal(SymbolRef sym) : NonLoc(SymbolValKind, sym) {
304     assert(sym);
305     assert(!Loc::isLocType(sym->getType()));
306   }
307 
308   LLVM_ATTRIBUTE_RETURNS_NONNULL
309   SymbolRef getSymbol() const {
310     return (const SymExpr *) Data;
311   }
312 
313   bool isExpression() const {
314     return !isa<SymbolData>(getSymbol());
315   }
316 
317   static bool classof(SVal V) {
318     return V.getBaseKind() == NonLocKind && V.getSubKind() == SymbolValKind;
319   }
320 
321   static bool classof(NonLoc V) { return V.getSubKind() == SymbolValKind; }
322 };
323 
324 /// Value representing integer constant.
325 class ConcreteInt : public NonLoc {
326 public:
327   explicit ConcreteInt(const llvm::APSInt& V) : NonLoc(ConcreteIntKind, &V) {}
328 
329   const llvm::APSInt& getValue() const {
330     return *static_cast<const llvm::APSInt *>(Data);
331   }
332 
333   static bool classof(SVal V) {
334     return V.getBaseKind() == NonLocKind && V.getSubKind() == ConcreteIntKind;
335   }
336 
337   static bool classof(NonLoc V) { return V.getSubKind() == ConcreteIntKind; }
338 };
339 
340 class LocAsInteger : public NonLoc {
341   friend class ento::SValBuilder;
342 
343   explicit LocAsInteger(const std::pair<SVal, uintptr_t> &data)
344       : NonLoc(LocAsIntegerKind, &data) {
345     // We do not need to represent loc::ConcreteInt as LocAsInteger,
346     // as it'd collapse into a nonloc::ConcreteInt instead.
347     assert(data.first.getBaseKind() == LocKind &&
348            (data.first.getSubKind() == loc::MemRegionValKind ||
349             data.first.getSubKind() == loc::GotoLabelKind));
350   }
351 
352 public:
353   Loc getLoc() const {
354     const std::pair<SVal, uintptr_t> *D =
355       static_cast<const std::pair<SVal, uintptr_t> *>(Data);
356     return D->first.castAs<Loc>();
357   }
358 
359   unsigned getNumBits() const {
360     const std::pair<SVal, uintptr_t> *D =
361       static_cast<const std::pair<SVal, uintptr_t> *>(Data);
362     return D->second;
363   }
364 
365   static bool classof(SVal V) {
366     return V.getBaseKind() == NonLocKind && V.getSubKind() == LocAsIntegerKind;
367   }
368 
369   static bool classof(NonLoc V) { return V.getSubKind() == LocAsIntegerKind; }
370 };
371 
372 class CompoundVal : public NonLoc {
373   friend class ento::SValBuilder;
374 
375   explicit CompoundVal(const CompoundValData *D) : NonLoc(CompoundValKind, D) {
376     assert(D);
377   }
378 
379 public:
380   LLVM_ATTRIBUTE_RETURNS_NONNULL
381   const CompoundValData* getValue() const {
382     return static_cast<const CompoundValData *>(Data);
383   }
384 
385   using iterator = llvm::ImmutableList<SVal>::iterator;
386 
387   iterator begin() const;
388   iterator end() const;
389 
390   static bool classof(SVal V) {
391     return V.getBaseKind() == NonLocKind && V.getSubKind() == CompoundValKind;
392   }
393 
394   static bool classof(NonLoc V) { return V.getSubKind() == CompoundValKind; }
395 };
396 
397 class LazyCompoundVal : public NonLoc {
398   friend class ento::SValBuilder;
399 
400   explicit LazyCompoundVal(const LazyCompoundValData *D)
401       : NonLoc(LazyCompoundValKind, D) {
402     assert(D);
403   }
404 
405 public:
406   LLVM_ATTRIBUTE_RETURNS_NONNULL
407   const LazyCompoundValData *getCVData() const {
408     return static_cast<const LazyCompoundValData *>(Data);
409   }
410 
411   /// It might return null.
412   const void *getStore() const;
413 
414   LLVM_ATTRIBUTE_RETURNS_NONNULL
415   const TypedValueRegion *getRegion() const;
416 
417   static bool classof(SVal V) {
418     return V.getBaseKind() == NonLocKind &&
419            V.getSubKind() == LazyCompoundValKind;
420   }
421 
422   static bool classof(NonLoc V) {
423     return V.getSubKind() == LazyCompoundValKind;
424   }
425 };
426 
427 /// Value representing pointer-to-member.
428 ///
429 /// This value is qualified as NonLoc because neither loading nor storing
430 /// operations are applied to it. Instead, the analyzer uses the L-value coming
431 /// from pointer-to-member applied to an object.
432 /// This SVal is represented by a NamedDecl which can be a member function
433 /// pointer or a member data pointer and an optional list of CXXBaseSpecifiers.
434 /// This list is required to accumulate the pointer-to-member cast history to
435 /// figure out the correct subobject field. In particular, implicit casts grow
436 /// this list and explicit casts like static_cast shrink this list.
437 class PointerToMember : public NonLoc {
438   friend class ento::SValBuilder;
439 
440 public:
441   using PTMDataType =
442       llvm::PointerUnion<const NamedDecl *, const PointerToMemberData *>;
443 
444   const PTMDataType getPTMData() const {
445     return PTMDataType::getFromOpaqueValue(const_cast<void *>(Data));
446   }
447 
448   bool isNullMemberPointer() const;
449 
450   const NamedDecl *getDecl() const;
451 
452   template<typename AdjustedDecl>
453   const AdjustedDecl *getDeclAs() const {
454     return dyn_cast_or_null<AdjustedDecl>(getDecl());
455   }
456 
457   using iterator = llvm::ImmutableList<const CXXBaseSpecifier *>::iterator;
458 
459   iterator begin() const;
460   iterator end() const;
461 
462   static bool classof(SVal V) {
463     return V.getBaseKind() == NonLocKind &&
464            V.getSubKind() == PointerToMemberKind;
465   }
466 
467   static bool classof(NonLoc V) {
468     return V.getSubKind() == PointerToMemberKind;
469   }
470 
471 private:
472   explicit PointerToMember(const PTMDataType D)
473       : NonLoc(PointerToMemberKind, D.getOpaqueValue()) {}
474 };
475 
476 } // namespace nonloc
477 
478 //==------------------------------------------------------------------------==//
479 //  Subclasses of Loc.
480 //==------------------------------------------------------------------------==//
481 
482 namespace loc {
483 
484 class GotoLabel : public Loc {
485 public:
486   explicit GotoLabel(const LabelDecl *Label) : Loc(GotoLabelKind, Label) {
487     assert(Label);
488   }
489 
490   const LabelDecl *getLabel() const {
491     return static_cast<const LabelDecl *>(Data);
492   }
493 
494   static bool classof(SVal V) {
495     return V.getBaseKind() == LocKind && V.getSubKind() == GotoLabelKind;
496   }
497 
498   static bool classof(Loc V) { return V.getSubKind() == GotoLabelKind; }
499 };
500 
501 class MemRegionVal : public Loc {
502 public:
503   explicit MemRegionVal(const MemRegion* r) : Loc(MemRegionValKind, r) {
504     assert(r);
505   }
506 
507   /// Get the underlining region.
508   const MemRegion *getRegion() const {
509     return static_cast<const MemRegion *>(Data);
510   }
511 
512   /// Get the underlining region and strip casts.
513   const MemRegion* stripCasts(bool StripBaseCasts = true) const;
514 
515   template <typename REGION>
516   const REGION* getRegionAs() const {
517     return dyn_cast<REGION>(getRegion());
518   }
519 
520   bool operator==(const MemRegionVal &R) const {
521     return getRegion() == R.getRegion();
522   }
523 
524   bool operator!=(const MemRegionVal &R) const {
525     return getRegion() != R.getRegion();
526   }
527 
528   static bool classof(SVal V) {
529     return V.getBaseKind() == LocKind && V.getSubKind() == MemRegionValKind;
530   }
531 
532   static bool classof(Loc V) { return V.getSubKind() == MemRegionValKind; }
533 };
534 
535 class ConcreteInt : public Loc {
536 public:
537   explicit ConcreteInt(const llvm::APSInt& V) : Loc(ConcreteIntKind, &V) {}
538 
539   const llvm::APSInt &getValue() const {
540     return *static_cast<const llvm::APSInt *>(Data);
541   }
542 
543   static bool classof(SVal V) {
544     return V.getBaseKind() == LocKind && V.getSubKind() == ConcreteIntKind;
545   }
546 
547   static bool classof(Loc V) { return V.getSubKind() == ConcreteIntKind; }
548 };
549 
550 } // namespace loc
551 } // namespace ento
552 } // namespace clang
553 
554 namespace llvm {
555 template <typename To, typename From>
556 struct CastInfo<
557     To, From,
558     std::enable_if_t<std::is_base_of<::clang::ento::SVal, From>::value>>
559     : public CastIsPossible<To, ::clang::ento::SVal> {
560   using Self = CastInfo<
561       To, From,
562       std::enable_if_t<std::is_base_of<::clang::ento::SVal, From>::value>>;
563   static bool isPossible(const From &V) {
564     return To::classof(*static_cast<const ::clang::ento::SVal *>(&V));
565   }
566   static std::optional<To> castFailed() { return std::optional<To>{}; }
567   static To doCast(const From &f) {
568     return *static_cast<const To *>(cast<::clang::ento::SVal>(&f));
569   }
570   static std::optional<To> doCastIfPossible(const From &f) {
571     if (!Self::isPossible(f))
572       return Self::castFailed();
573     return doCast(f);
574   }
575 };
576 } // namespace llvm
577 
578 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H
579