1 //===- SVals.h - Abstract Values for Static Analysis ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines SVal, Loc, and NonLoc, classes that represent
10 //  abstract r-values for use with path-sensitive value tracking.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H
15 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H
16 
17 #include "clang/AST/Expr.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Basic/LLVM.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
21 #include "llvm/ADT/FoldingSet.h"
22 #include "llvm/ADT/ImmutableList.h"
23 #include "llvm/ADT/PointerUnion.h"
24 #include "llvm/Support/Casting.h"
25 #include <cassert>
26 #include <cstdint>
27 #include <optional>
28 #include <utility>
29 
30 //==------------------------------------------------------------------------==//
31 //  Base SVal types.
32 //==------------------------------------------------------------------------==//
33 
34 namespace clang {
35 
36 class CXXBaseSpecifier;
37 class FunctionDecl;
38 class LabelDecl;
39 
40 namespace ento {
41 
42 class CompoundValData;
43 class LazyCompoundValData;
44 class MemRegion;
45 class PointerToMemberData;
46 class SValBuilder;
47 class TypedValueRegion;
48 
49 namespace nonloc {
50 
51 /// Sub-kinds for NonLoc values.
52 enum Kind {
53 #define NONLOC_SVAL(Id, Parent) Id ## Kind,
54 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
55 };
56 
57 } // namespace nonloc
58 
59 namespace loc {
60 
61 /// Sub-kinds for Loc values.
62 enum Kind {
63 #define LOC_SVAL(Id, Parent) Id ## Kind,
64 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
65 };
66 
67 } // namespace loc
68 
69 /// SVal - This represents a symbolic expression, which can be either
70 ///  an L-value or an R-value.
71 ///
72 class SVal {
73 public:
74   enum BaseKind {
75     // The enumerators must be representable using 2 bits.
76 #define BASIC_SVAL(Id, Parent) Id ## Kind,
77 #define ABSTRACT_SVAL_WITH_KIND(Id, Parent) Id ## Kind,
78 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
79   };
80   enum { BaseBits = 2, BaseMask = 0b11 };
81 
82 protected:
83   const void *Data = nullptr;
84 
85   /// The lowest 2 bits are a BaseKind (0 -- 3).
86   ///  The higher bits are an unsigned "kind" value.
87   unsigned Kind = 0;
88 
89   explicit SVal(const void *d, bool isLoc, unsigned ValKind)
90       : Data(d), Kind((isLoc ? LocKind : NonLocKind) | (ValKind << BaseBits)) {}
91 
92   explicit SVal(BaseKind k, const void *D = nullptr) : Data(D), Kind(k) {}
93 
94 public:
95   explicit SVal() = default;
96 
97   /// Convert to the specified SVal type, asserting that this SVal is of
98   /// the desired type.
99   template <typename T> T castAs() const { return llvm::cast<T>(*this); }
100 
101   /// Convert to the specified SVal type, returning std::nullopt if this SVal is
102   /// not of the desired type.
103   template <typename T> std::optional<T> getAs() const {
104     return llvm::dyn_cast<T>(*this);
105   }
106 
107   unsigned getRawKind() const { return Kind; }
108   BaseKind getBaseKind() const { return (BaseKind) (Kind & BaseMask); }
109   unsigned getSubKind() const { return Kind >> BaseBits; }
110 
111   // This method is required for using SVal in a FoldingSetNode.  It
112   // extracts a unique signature for this SVal object.
113   void Profile(llvm::FoldingSetNodeID &ID) const {
114     ID.AddInteger((unsigned) getRawKind());
115     ID.AddPointer(Data);
116   }
117 
118   bool operator==(SVal R) const {
119     return getRawKind() == R.getRawKind() && Data == R.Data;
120   }
121 
122   bool operator!=(SVal R) const { return !(*this == R); }
123 
124   bool isUnknown() const {
125     return getRawKind() == UnknownValKind;
126   }
127 
128   bool isUndef() const {
129     return getRawKind() == UndefinedValKind;
130   }
131 
132   bool isUnknownOrUndef() const {
133     return getRawKind() <= UnknownValKind;
134   }
135 
136   bool isValid() const {
137     return getRawKind() > UnknownValKind;
138   }
139 
140   bool isConstant() const;
141 
142   bool isConstant(int I) const;
143 
144   bool isZeroConstant() const;
145 
146   /// getAsFunctionDecl - If this SVal is a MemRegionVal and wraps a
147   /// CodeTextRegion wrapping a FunctionDecl, return that FunctionDecl.
148   /// Otherwise return 0.
149   const FunctionDecl *getAsFunctionDecl() const;
150 
151   /// If this SVal is a location and wraps a symbol, return that
152   ///  SymbolRef. Otherwise return 0.
153   ///
154   /// Casts are ignored during lookup.
155   /// \param IncludeBaseRegions The boolean that controls whether the search
156   /// should continue to the base regions if the region is not symbolic.
157   SymbolRef getAsLocSymbol(bool IncludeBaseRegions = false) const;
158 
159   /// Get the symbol in the SVal or its base region.
160   SymbolRef getLocSymbolInBase() const;
161 
162   /// If this SVal wraps a symbol return that SymbolRef.
163   /// Otherwise, return 0.
164   ///
165   /// Casts are ignored during lookup.
166   /// \param IncludeBaseRegions The boolean that controls whether the search
167   /// should continue to the base regions if the region is not symbolic.
168   SymbolRef getAsSymbol(bool IncludeBaseRegions = false) const;
169 
170   /// If this SVal is loc::ConcreteInt or nonloc::ConcreteInt,
171   /// return a pointer to APSInt which is held in it.
172   /// Otherwise, return nullptr.
173   const llvm::APSInt *getAsInteger() const;
174 
175   const MemRegion *getAsRegion() const;
176 
177   /// printJson - Pretty-prints in JSON format.
178   void printJson(raw_ostream &Out, bool AddQuotes) const;
179 
180   void dumpToStream(raw_ostream &OS) const;
181   void dump() const;
182 
183   SymExpr::symbol_iterator symbol_begin() const {
184     const SymExpr *SE = getAsSymbol(/*IncludeBaseRegions=*/true);
185     if (SE)
186       return SE->symbol_begin();
187     else
188       return SymExpr::symbol_iterator();
189   }
190 
191   SymExpr::symbol_iterator symbol_end() const {
192     return SymExpr::symbol_end();
193   }
194 
195   /// Try to get a reasonable type for the given value.
196   ///
197   /// \returns The best approximation of the value type or Null.
198   /// In theory, all symbolic values should be typed, but this function
199   /// is still a WIP and might have a few blind spots.
200   ///
201   /// \note This function should not be used when the user has access to the
202   /// bound expression AST node as well, since AST always has exact types.
203   ///
204   /// \note Loc values are interpreted as pointer rvalues for the purposes of
205   /// this method.
206   QualType getType(const ASTContext &) const;
207 };
208 
209 inline raw_ostream &operator<<(raw_ostream &os, clang::ento::SVal V) {
210   V.dumpToStream(os);
211   return os;
212 }
213 
214 class UndefinedVal : public SVal {
215 public:
216   UndefinedVal() : SVal(UndefinedValKind) {}
217   static bool classof(SVal V) { return V.getBaseKind() == UndefinedValKind; }
218 };
219 
220 class DefinedOrUnknownSVal : public SVal {
221 public:
222   // We want calling these methods to be a compiler error since they are
223   // tautologically false.
224   bool isUndef() const = delete;
225   bool isValid() const = delete;
226 
227   static bool classof(SVal V) { return !V.isUndef(); }
228 
229 protected:
230   explicit DefinedOrUnknownSVal(const void *d, bool isLoc, unsigned ValKind)
231       : SVal(d, isLoc, ValKind) {}
232   explicit DefinedOrUnknownSVal(BaseKind k, void *D = nullptr) : SVal(k, D) {}
233 };
234 
235 class UnknownVal : public DefinedOrUnknownSVal {
236 public:
237   explicit UnknownVal() : DefinedOrUnknownSVal(UnknownValKind) {}
238 
239   static bool classof(SVal V) { return V.getBaseKind() == UnknownValKind; }
240 };
241 
242 class DefinedSVal : public DefinedOrUnknownSVal {
243 public:
244   // We want calling these methods to be a compiler error since they are
245   // tautologically true/false.
246   bool isUnknown() const = delete;
247   bool isUnknownOrUndef() const = delete;
248   bool isValid() const = delete;
249 
250   static bool classof(SVal V) { return !V.isUnknownOrUndef(); }
251 
252 protected:
253   explicit DefinedSVal(const void *d, bool isLoc, unsigned ValKind)
254       : DefinedOrUnknownSVal(d, isLoc, ValKind) {}
255 };
256 
257 /// Represents an SVal that is guaranteed to not be UnknownVal.
258 class KnownSVal : public SVal {
259 public:
260   KnownSVal(const DefinedSVal &V) : SVal(V) {}
261   KnownSVal(const UndefinedVal &V) : SVal(V) {}
262   static bool classof(SVal V) { return !V.isUnknown(); }
263 };
264 
265 class NonLoc : public DefinedSVal {
266 protected:
267   explicit NonLoc(unsigned SubKind, const void *d)
268       : DefinedSVal(d, false, SubKind) {}
269 
270 public:
271   void dumpToStream(raw_ostream &Out) const;
272 
273   static bool isCompoundType(QualType T) {
274     return T->isArrayType() || T->isRecordType() ||
275            T->isAnyComplexType() || T->isVectorType();
276   }
277 
278   static bool classof(SVal V) { return V.getBaseKind() == NonLocKind; }
279 };
280 
281 class Loc : public DefinedSVal {
282 protected:
283   explicit Loc(unsigned SubKind, const void *D)
284       : DefinedSVal(const_cast<void *>(D), true, SubKind) {}
285 
286 public:
287   void dumpToStream(raw_ostream &Out) const;
288 
289   static bool isLocType(QualType T) {
290     return T->isAnyPointerType() || T->isBlockPointerType() ||
291            T->isReferenceType() || T->isNullPtrType();
292   }
293 
294   static bool classof(SVal V) { return V.getBaseKind() == LocKind; }
295 };
296 
297 //==------------------------------------------------------------------------==//
298 //  Subclasses of NonLoc.
299 //==------------------------------------------------------------------------==//
300 
301 namespace nonloc {
302 
303 /// Represents symbolic expression that isn't a location.
304 class SymbolVal : public NonLoc {
305 public:
306   SymbolVal() = delete;
307   SymbolVal(SymbolRef sym) : NonLoc(SymbolValKind, sym) {
308     assert(sym);
309     assert(!Loc::isLocType(sym->getType()));
310   }
311 
312   LLVM_ATTRIBUTE_RETURNS_NONNULL
313   SymbolRef getSymbol() const {
314     return (const SymExpr *) Data;
315   }
316 
317   bool isExpression() const {
318     return !isa<SymbolData>(getSymbol());
319   }
320 
321   static bool classof(SVal V) {
322     return V.getBaseKind() == NonLocKind && V.getSubKind() == SymbolValKind;
323   }
324 
325   static bool classof(NonLoc V) { return V.getSubKind() == SymbolValKind; }
326 };
327 
328 /// Value representing integer constant.
329 class ConcreteInt : public NonLoc {
330 public:
331   explicit ConcreteInt(const llvm::APSInt& V) : NonLoc(ConcreteIntKind, &V) {}
332 
333   const llvm::APSInt& getValue() const {
334     return *static_cast<const llvm::APSInt *>(Data);
335   }
336 
337   static bool classof(SVal V) {
338     return V.getBaseKind() == NonLocKind && V.getSubKind() == ConcreteIntKind;
339   }
340 
341   static bool classof(NonLoc V) { return V.getSubKind() == ConcreteIntKind; }
342 };
343 
344 class LocAsInteger : public NonLoc {
345   friend class ento::SValBuilder;
346 
347   explicit LocAsInteger(const std::pair<SVal, uintptr_t> &data)
348       : NonLoc(LocAsIntegerKind, &data) {
349     // We do not need to represent loc::ConcreteInt as LocAsInteger,
350     // as it'd collapse into a nonloc::ConcreteInt instead.
351     assert(data.first.getBaseKind() == LocKind &&
352            (data.first.getSubKind() == loc::MemRegionValKind ||
353             data.first.getSubKind() == loc::GotoLabelKind));
354   }
355 
356 public:
357   Loc getLoc() const {
358     const std::pair<SVal, uintptr_t> *D =
359       static_cast<const std::pair<SVal, uintptr_t> *>(Data);
360     return D->first.castAs<Loc>();
361   }
362 
363   unsigned getNumBits() const {
364     const std::pair<SVal, uintptr_t> *D =
365       static_cast<const std::pair<SVal, uintptr_t> *>(Data);
366     return D->second;
367   }
368 
369   static bool classof(SVal V) {
370     return V.getBaseKind() == NonLocKind && V.getSubKind() == LocAsIntegerKind;
371   }
372 
373   static bool classof(NonLoc V) { return V.getSubKind() == LocAsIntegerKind; }
374 };
375 
376 class CompoundVal : public NonLoc {
377   friend class ento::SValBuilder;
378 
379   explicit CompoundVal(const CompoundValData *D) : NonLoc(CompoundValKind, D) {
380     assert(D);
381   }
382 
383 public:
384   LLVM_ATTRIBUTE_RETURNS_NONNULL
385   const CompoundValData* getValue() const {
386     return static_cast<const CompoundValData *>(Data);
387   }
388 
389   using iterator = llvm::ImmutableList<SVal>::iterator;
390 
391   iterator begin() const;
392   iterator end() const;
393 
394   static bool classof(SVal V) {
395     return V.getBaseKind() == NonLocKind && V.getSubKind() == CompoundValKind;
396   }
397 
398   static bool classof(NonLoc V) { return V.getSubKind() == CompoundValKind; }
399 };
400 
401 class LazyCompoundVal : public NonLoc {
402   friend class ento::SValBuilder;
403 
404   explicit LazyCompoundVal(const LazyCompoundValData *D)
405       : NonLoc(LazyCompoundValKind, D) {
406     assert(D);
407   }
408 
409 public:
410   LLVM_ATTRIBUTE_RETURNS_NONNULL
411   const LazyCompoundValData *getCVData() const {
412     return static_cast<const LazyCompoundValData *>(Data);
413   }
414 
415   /// It might return null.
416   const void *getStore() const;
417 
418   LLVM_ATTRIBUTE_RETURNS_NONNULL
419   const TypedValueRegion *getRegion() const;
420 
421   static bool classof(SVal V) {
422     return V.getBaseKind() == NonLocKind &&
423            V.getSubKind() == LazyCompoundValKind;
424   }
425 
426   static bool classof(NonLoc V) {
427     return V.getSubKind() == LazyCompoundValKind;
428   }
429 };
430 
431 /// Value representing pointer-to-member.
432 ///
433 /// This value is qualified as NonLoc because neither loading nor storing
434 /// operations are applied to it. Instead, the analyzer uses the L-value coming
435 /// from pointer-to-member applied to an object.
436 /// This SVal is represented by a NamedDecl which can be a member function
437 /// pointer or a member data pointer and an optional list of CXXBaseSpecifiers.
438 /// This list is required to accumulate the pointer-to-member cast history to
439 /// figure out the correct subobject field. In particular, implicit casts grow
440 /// this list and explicit casts like static_cast shrink this list.
441 class PointerToMember : public NonLoc {
442   friend class ento::SValBuilder;
443 
444 public:
445   using PTMDataType =
446       llvm::PointerUnion<const NamedDecl *, const PointerToMemberData *>;
447 
448   const PTMDataType getPTMData() const {
449     return PTMDataType::getFromOpaqueValue(const_cast<void *>(Data));
450   }
451 
452   bool isNullMemberPointer() const;
453 
454   const NamedDecl *getDecl() const;
455 
456   template<typename AdjustedDecl>
457   const AdjustedDecl *getDeclAs() const {
458     return dyn_cast_or_null<AdjustedDecl>(getDecl());
459   }
460 
461   using iterator = llvm::ImmutableList<const CXXBaseSpecifier *>::iterator;
462 
463   iterator begin() const;
464   iterator end() const;
465 
466   static bool classof(SVal V) {
467     return V.getBaseKind() == NonLocKind &&
468            V.getSubKind() == PointerToMemberKind;
469   }
470 
471   static bool classof(NonLoc V) {
472     return V.getSubKind() == PointerToMemberKind;
473   }
474 
475 private:
476   explicit PointerToMember(const PTMDataType D)
477       : NonLoc(PointerToMemberKind, D.getOpaqueValue()) {}
478 };
479 
480 } // namespace nonloc
481 
482 //==------------------------------------------------------------------------==//
483 //  Subclasses of Loc.
484 //==------------------------------------------------------------------------==//
485 
486 namespace loc {
487 
488 class GotoLabel : public Loc {
489 public:
490   explicit GotoLabel(const LabelDecl *Label) : Loc(GotoLabelKind, Label) {
491     assert(Label);
492   }
493 
494   const LabelDecl *getLabel() const {
495     return static_cast<const LabelDecl *>(Data);
496   }
497 
498   static bool classof(SVal V) {
499     return V.getBaseKind() == LocKind && V.getSubKind() == GotoLabelKind;
500   }
501 
502   static bool classof(Loc V) { return V.getSubKind() == GotoLabelKind; }
503 };
504 
505 class MemRegionVal : public Loc {
506 public:
507   explicit MemRegionVal(const MemRegion* r) : Loc(MemRegionValKind, r) {
508     assert(r);
509   }
510 
511   /// Get the underlining region.
512   const MemRegion *getRegion() const {
513     return static_cast<const MemRegion *>(Data);
514   }
515 
516   /// Get the underlining region and strip casts.
517   const MemRegion* stripCasts(bool StripBaseCasts = true) const;
518 
519   template <typename REGION>
520   const REGION* getRegionAs() const {
521     return dyn_cast<REGION>(getRegion());
522   }
523 
524   bool operator==(const MemRegionVal &R) const {
525     return getRegion() == R.getRegion();
526   }
527 
528   bool operator!=(const MemRegionVal &R) const {
529     return getRegion() != R.getRegion();
530   }
531 
532   static bool classof(SVal V) {
533     return V.getBaseKind() == LocKind && V.getSubKind() == MemRegionValKind;
534   }
535 
536   static bool classof(Loc V) { return V.getSubKind() == MemRegionValKind; }
537 };
538 
539 class ConcreteInt : public Loc {
540 public:
541   explicit ConcreteInt(const llvm::APSInt& V) : Loc(ConcreteIntKind, &V) {}
542 
543   const llvm::APSInt &getValue() const {
544     return *static_cast<const llvm::APSInt *>(Data);
545   }
546 
547   static bool classof(SVal V) {
548     return V.getBaseKind() == LocKind && V.getSubKind() == ConcreteIntKind;
549   }
550 
551   static bool classof(Loc V) { return V.getSubKind() == ConcreteIntKind; }
552 };
553 
554 } // namespace loc
555 } // namespace ento
556 } // namespace clang
557 
558 namespace llvm {
559 template <typename To, typename From>
560 struct CastInfo<
561     To, From,
562     std::enable_if_t<std::is_base_of<::clang::ento::SVal, From>::value>>
563     : public CastIsPossible<To, ::clang::ento::SVal> {
564   using Self = CastInfo<
565       To, From,
566       std::enable_if_t<std::is_base_of<::clang::ento::SVal, From>::value>>;
567   static bool isPossible(const From &V) {
568     return To::classof(*static_cast<const ::clang::ento::SVal *>(&V));
569   }
570   static std::optional<To> castFailed() { return std::optional<To>{}; }
571   static To doCast(const From &f) {
572     return *static_cast<const To *>(cast<::clang::ento::SVal>(&f));
573   }
574   static std::optional<To> doCastIfPossible(const From &f) {
575     if (!Self::isPossible(f))
576       return Self::castFailed();
577     return doCast(f);
578   }
579 };
580 } // namespace llvm
581 
582 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H
583