1 //===- SVals.h - Abstract Values for Static Analysis ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines SVal, Loc, and NonLoc, classes that represent
10 //  abstract r-values for use with path-sensitive value tracking.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H
15 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H
16 
17 #include "clang/AST/Expr.h"
18 #include "clang/AST/Type.h"
19 #include "clang/Basic/LLVM.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/SymExpr.h"
21 #include "llvm/ADT/FoldingSet.h"
22 #include "llvm/ADT/ImmutableList.h"
23 #include "llvm/ADT/None.h"
24 #include "llvm/ADT/Optional.h"
25 #include "llvm/ADT/PointerUnion.h"
26 #include "llvm/Support/Casting.h"
27 #include <cassert>
28 #include <cstdint>
29 #include <utility>
30 
31 //==------------------------------------------------------------------------==//
32 //  Base SVal types.
33 //==------------------------------------------------------------------------==//
34 
35 namespace clang {
36 
37 class CXXBaseSpecifier;
38 class FunctionDecl;
39 class LabelDecl;
40 
41 namespace ento {
42 
43 class CompoundValData;
44 class LazyCompoundValData;
45 class MemRegion;
46 class PointerToMemberData;
47 class SValBuilder;
48 class TypedValueRegion;
49 
50 namespace nonloc {
51 
52 /// Sub-kinds for NonLoc values.
53 enum Kind {
54 #define NONLOC_SVAL(Id, Parent) Id ## Kind,
55 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
56 };
57 
58 } // namespace nonloc
59 
60 namespace loc {
61 
62 /// Sub-kinds for Loc values.
63 enum Kind {
64 #define LOC_SVAL(Id, Parent) Id ## Kind,
65 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
66 };
67 
68 } // namespace loc
69 
70 /// SVal - This represents a symbolic expression, which can be either
71 ///  an L-value or an R-value.
72 ///
73 class SVal {
74 public:
75   enum BaseKind {
76     // The enumerators must be representable using 2 bits.
77 #define BASIC_SVAL(Id, Parent) Id ## Kind,
78 #define ABSTRACT_SVAL_WITH_KIND(Id, Parent) Id ## Kind,
79 #include "clang/StaticAnalyzer/Core/PathSensitive/SVals.def"
80   };
81   enum { BaseBits = 2, BaseMask = 0b11 };
82 
83 protected:
84   const void *Data = nullptr;
85 
86   /// The lowest 2 bits are a BaseKind (0 -- 3).
87   ///  The higher bits are an unsigned "kind" value.
88   unsigned Kind = 0;
89 
90   explicit SVal(const void *d, bool isLoc, unsigned ValKind)
91       : Data(d), Kind((isLoc ? LocKind : NonLocKind) | (ValKind << BaseBits)) {}
92 
93   explicit SVal(BaseKind k, const void *D = nullptr) : Data(D), Kind(k) {}
94 
95 public:
96   explicit SVal() = default;
97 
98   /// Convert to the specified SVal type, asserting that this SVal is of
99   /// the desired type.
100   template <typename T> T castAs() const { return llvm::cast<T>(*this); }
101 
102   /// Convert to the specified SVal type, returning None if this SVal is
103   /// not of the desired type.
104   template <typename T> Optional<T> getAs() const {
105     return llvm::dyn_cast<T>(*this);
106   }
107 
108   unsigned getRawKind() const { return Kind; }
109   BaseKind getBaseKind() const { return (BaseKind) (Kind & BaseMask); }
110   unsigned getSubKind() const { return Kind >> BaseBits; }
111 
112   // This method is required for using SVal in a FoldingSetNode.  It
113   // extracts a unique signature for this SVal object.
114   void Profile(llvm::FoldingSetNodeID &ID) const {
115     ID.AddInteger((unsigned) getRawKind());
116     ID.AddPointer(Data);
117   }
118 
119   bool operator==(SVal R) const {
120     return getRawKind() == R.getRawKind() && Data == R.Data;
121   }
122 
123   bool operator!=(SVal R) const { return !(*this == R); }
124 
125   bool isUnknown() const {
126     return getRawKind() == UnknownValKind;
127   }
128 
129   bool isUndef() const {
130     return getRawKind() == UndefinedValKind;
131   }
132 
133   bool isUnknownOrUndef() const {
134     return getRawKind() <= UnknownValKind;
135   }
136 
137   bool isValid() const {
138     return getRawKind() > UnknownValKind;
139   }
140 
141   bool isConstant() const;
142 
143   bool isConstant(int I) const;
144 
145   bool isZeroConstant() const;
146 
147   /// getAsFunctionDecl - If this SVal is a MemRegionVal and wraps a
148   /// CodeTextRegion wrapping a FunctionDecl, return that FunctionDecl.
149   /// Otherwise return 0.
150   const FunctionDecl *getAsFunctionDecl() const;
151 
152   /// If this SVal is a location and wraps a symbol, return that
153   ///  SymbolRef. Otherwise return 0.
154   ///
155   /// Casts are ignored during lookup.
156   /// \param IncludeBaseRegions The boolean that controls whether the search
157   /// should continue to the base regions if the region is not symbolic.
158   SymbolRef getAsLocSymbol(bool IncludeBaseRegions = false) const;
159 
160   /// Get the symbol in the SVal or its base region.
161   SymbolRef getLocSymbolInBase() const;
162 
163   /// If this SVal wraps a symbol return that SymbolRef.
164   /// Otherwise, return 0.
165   ///
166   /// Casts are ignored during lookup.
167   /// \param IncludeBaseRegions The boolean that controls whether the search
168   /// should continue to the base regions if the region is not symbolic.
169   SymbolRef getAsSymbol(bool IncludeBaseRegions = false) const;
170 
171   /// If this SVal is loc::ConcreteInt or nonloc::ConcreteInt,
172   /// return a pointer to APSInt which is held in it.
173   /// Otherwise, return nullptr.
174   const llvm::APSInt *getAsInteger() const;
175 
176   const MemRegion *getAsRegion() const;
177 
178   /// printJson - Pretty-prints in JSON format.
179   void printJson(raw_ostream &Out, bool AddQuotes) const;
180 
181   void dumpToStream(raw_ostream &OS) const;
182   void dump() const;
183 
184   SymExpr::symbol_iterator symbol_begin() const {
185     const SymExpr *SE = getAsSymbol(/*IncludeBaseRegions=*/true);
186     if (SE)
187       return SE->symbol_begin();
188     else
189       return SymExpr::symbol_iterator();
190   }
191 
192   SymExpr::symbol_iterator symbol_end() const {
193     return SymExpr::symbol_end();
194   }
195 
196   /// Try to get a reasonable type for the given value.
197   ///
198   /// \returns The best approximation of the value type or Null.
199   /// In theory, all symbolic values should be typed, but this function
200   /// is still a WIP and might have a few blind spots.
201   ///
202   /// \note This function should not be used when the user has access to the
203   /// bound expression AST node as well, since AST always has exact types.
204   ///
205   /// \note Loc values are interpreted as pointer rvalues for the purposes of
206   /// this method.
207   QualType getType(const ASTContext &) const;
208 };
209 
210 inline raw_ostream &operator<<(raw_ostream &os, clang::ento::SVal V) {
211   V.dumpToStream(os);
212   return os;
213 }
214 
215 class UndefinedVal : public SVal {
216 public:
217   UndefinedVal() : SVal(UndefinedValKind) {}
218   static bool classof(SVal V) { return V.getBaseKind() == UndefinedValKind; }
219 };
220 
221 class DefinedOrUnknownSVal : public SVal {
222 public:
223   // We want calling these methods to be a compiler error since they are
224   // tautologically false.
225   bool isUndef() const = delete;
226   bool isValid() const = delete;
227 
228   static bool classof(SVal V) { return !V.isUndef(); }
229 
230 protected:
231   explicit DefinedOrUnknownSVal(const void *d, bool isLoc, unsigned ValKind)
232       : SVal(d, isLoc, ValKind) {}
233   explicit DefinedOrUnknownSVal(BaseKind k, void *D = nullptr) : SVal(k, D) {}
234 };
235 
236 class UnknownVal : public DefinedOrUnknownSVal {
237 public:
238   explicit UnknownVal() : DefinedOrUnknownSVal(UnknownValKind) {}
239 
240   static bool classof(SVal V) { return V.getBaseKind() == UnknownValKind; }
241 };
242 
243 class DefinedSVal : public DefinedOrUnknownSVal {
244 public:
245   // We want calling these methods to be a compiler error since they are
246   // tautologically true/false.
247   bool isUnknown() const = delete;
248   bool isUnknownOrUndef() const = delete;
249   bool isValid() const = delete;
250 
251   static bool classof(SVal V) { return !V.isUnknownOrUndef(); }
252 
253 protected:
254   explicit DefinedSVal(const void *d, bool isLoc, unsigned ValKind)
255       : DefinedOrUnknownSVal(d, isLoc, ValKind) {}
256 };
257 
258 /// Represents an SVal that is guaranteed to not be UnknownVal.
259 class KnownSVal : public SVal {
260 public:
261   KnownSVal(const DefinedSVal &V) : SVal(V) {}
262   KnownSVal(const UndefinedVal &V) : SVal(V) {}
263   static bool classof(SVal V) { return !V.isUnknown(); }
264 };
265 
266 class NonLoc : public DefinedSVal {
267 protected:
268   explicit NonLoc(unsigned SubKind, const void *d)
269       : DefinedSVal(d, false, SubKind) {}
270 
271 public:
272   void dumpToStream(raw_ostream &Out) const;
273 
274   static bool isCompoundType(QualType T) {
275     return T->isArrayType() || T->isRecordType() ||
276            T->isAnyComplexType() || T->isVectorType();
277   }
278 
279   static bool classof(SVal V) { return V.getBaseKind() == NonLocKind; }
280 };
281 
282 class Loc : public DefinedSVal {
283 protected:
284   explicit Loc(unsigned SubKind, const void *D)
285       : DefinedSVal(const_cast<void *>(D), true, SubKind) {}
286 
287 public:
288   void dumpToStream(raw_ostream &Out) const;
289 
290   static bool isLocType(QualType T) {
291     return T->isAnyPointerType() || T->isBlockPointerType() ||
292            T->isReferenceType() || T->isNullPtrType();
293   }
294 
295   static bool classof(SVal V) { return V.getBaseKind() == LocKind; }
296 };
297 
298 //==------------------------------------------------------------------------==//
299 //  Subclasses of NonLoc.
300 //==------------------------------------------------------------------------==//
301 
302 namespace nonloc {
303 
304 /// Represents symbolic expression that isn't a location.
305 class SymbolVal : public NonLoc {
306 public:
307   SymbolVal() = delete;
308   SymbolVal(SymbolRef sym) : NonLoc(SymbolValKind, sym) {
309     assert(sym);
310     assert(!Loc::isLocType(sym->getType()));
311   }
312 
313   LLVM_ATTRIBUTE_RETURNS_NONNULL
314   SymbolRef getSymbol() const {
315     return (const SymExpr *) Data;
316   }
317 
318   bool isExpression() const {
319     return !isa<SymbolData>(getSymbol());
320   }
321 
322   static bool classof(SVal V) {
323     return V.getBaseKind() == NonLocKind && V.getSubKind() == SymbolValKind;
324   }
325 
326   static bool classof(NonLoc V) { return V.getSubKind() == SymbolValKind; }
327 };
328 
329 /// Value representing integer constant.
330 class ConcreteInt : public NonLoc {
331 public:
332   explicit ConcreteInt(const llvm::APSInt& V) : NonLoc(ConcreteIntKind, &V) {}
333 
334   const llvm::APSInt& getValue() const {
335     return *static_cast<const llvm::APSInt *>(Data);
336   }
337 
338   static bool classof(SVal V) {
339     return V.getBaseKind() == NonLocKind && V.getSubKind() == ConcreteIntKind;
340   }
341 
342   static bool classof(NonLoc V) { return V.getSubKind() == ConcreteIntKind; }
343 };
344 
345 class LocAsInteger : public NonLoc {
346   friend class ento::SValBuilder;
347 
348   explicit LocAsInteger(const std::pair<SVal, uintptr_t> &data)
349       : NonLoc(LocAsIntegerKind, &data) {
350     // We do not need to represent loc::ConcreteInt as LocAsInteger,
351     // as it'd collapse into a nonloc::ConcreteInt instead.
352     assert(data.first.getBaseKind() == LocKind &&
353            (data.first.getSubKind() == loc::MemRegionValKind ||
354             data.first.getSubKind() == loc::GotoLabelKind));
355   }
356 
357 public:
358   Loc getLoc() const {
359     const std::pair<SVal, uintptr_t> *D =
360       static_cast<const std::pair<SVal, uintptr_t> *>(Data);
361     return D->first.castAs<Loc>();
362   }
363 
364   unsigned getNumBits() const {
365     const std::pair<SVal, uintptr_t> *D =
366       static_cast<const std::pair<SVal, uintptr_t> *>(Data);
367     return D->second;
368   }
369 
370   static bool classof(SVal V) {
371     return V.getBaseKind() == NonLocKind && V.getSubKind() == LocAsIntegerKind;
372   }
373 
374   static bool classof(NonLoc V) { return V.getSubKind() == LocAsIntegerKind; }
375 };
376 
377 class CompoundVal : public NonLoc {
378   friend class ento::SValBuilder;
379 
380   explicit CompoundVal(const CompoundValData *D) : NonLoc(CompoundValKind, D) {
381     assert(D);
382   }
383 
384 public:
385   LLVM_ATTRIBUTE_RETURNS_NONNULL
386   const CompoundValData* getValue() const {
387     return static_cast<const CompoundValData *>(Data);
388   }
389 
390   using iterator = llvm::ImmutableList<SVal>::iterator;
391 
392   iterator begin() const;
393   iterator end() const;
394 
395   static bool classof(SVal V) {
396     return V.getBaseKind() == NonLocKind && V.getSubKind() == CompoundValKind;
397   }
398 
399   static bool classof(NonLoc V) { return V.getSubKind() == CompoundValKind; }
400 };
401 
402 class LazyCompoundVal : public NonLoc {
403   friend class ento::SValBuilder;
404 
405   explicit LazyCompoundVal(const LazyCompoundValData *D)
406       : NonLoc(LazyCompoundValKind, D) {
407     assert(D);
408   }
409 
410 public:
411   LLVM_ATTRIBUTE_RETURNS_NONNULL
412   const LazyCompoundValData *getCVData() const {
413     return static_cast<const LazyCompoundValData *>(Data);
414   }
415 
416   /// It might return null.
417   const void *getStore() const;
418 
419   LLVM_ATTRIBUTE_RETURNS_NONNULL
420   const TypedValueRegion *getRegion() const;
421 
422   static bool classof(SVal V) {
423     return V.getBaseKind() == NonLocKind &&
424            V.getSubKind() == LazyCompoundValKind;
425   }
426 
427   static bool classof(NonLoc V) {
428     return V.getSubKind() == LazyCompoundValKind;
429   }
430 };
431 
432 /// Value representing pointer-to-member.
433 ///
434 /// This value is qualified as NonLoc because neither loading nor storing
435 /// operations are applied to it. Instead, the analyzer uses the L-value coming
436 /// from pointer-to-member applied to an object.
437 /// This SVal is represented by a NamedDecl which can be a member function
438 /// pointer or a member data pointer and an optional list of CXXBaseSpecifiers.
439 /// This list is required to accumulate the pointer-to-member cast history to
440 /// figure out the correct subobject field. In particular, implicit casts grow
441 /// this list and explicit casts like static_cast shrink this list.
442 class PointerToMember : public NonLoc {
443   friend class ento::SValBuilder;
444 
445 public:
446   using PTMDataType =
447       llvm::PointerUnion<const NamedDecl *, const PointerToMemberData *>;
448 
449   const PTMDataType getPTMData() const {
450     return PTMDataType::getFromOpaqueValue(const_cast<void *>(Data));
451   }
452 
453   bool isNullMemberPointer() const;
454 
455   const NamedDecl *getDecl() const;
456 
457   template<typename AdjustedDecl>
458   const AdjustedDecl *getDeclAs() const {
459     return dyn_cast_or_null<AdjustedDecl>(getDecl());
460   }
461 
462   using iterator = llvm::ImmutableList<const CXXBaseSpecifier *>::iterator;
463 
464   iterator begin() const;
465   iterator end() const;
466 
467   static bool classof(SVal V) {
468     return V.getBaseKind() == NonLocKind &&
469            V.getSubKind() == PointerToMemberKind;
470   }
471 
472   static bool classof(NonLoc V) {
473     return V.getSubKind() == PointerToMemberKind;
474   }
475 
476 private:
477   explicit PointerToMember(const PTMDataType D)
478       : NonLoc(PointerToMemberKind, D.getOpaqueValue()) {}
479 };
480 
481 } // namespace nonloc
482 
483 //==------------------------------------------------------------------------==//
484 //  Subclasses of Loc.
485 //==------------------------------------------------------------------------==//
486 
487 namespace loc {
488 
489 class GotoLabel : public Loc {
490 public:
491   explicit GotoLabel(const LabelDecl *Label) : Loc(GotoLabelKind, Label) {
492     assert(Label);
493   }
494 
495   const LabelDecl *getLabel() const {
496     return static_cast<const LabelDecl *>(Data);
497   }
498 
499   static bool classof(SVal V) {
500     return V.getBaseKind() == LocKind && V.getSubKind() == GotoLabelKind;
501   }
502 
503   static bool classof(Loc V) { return V.getSubKind() == GotoLabelKind; }
504 };
505 
506 class MemRegionVal : public Loc {
507 public:
508   explicit MemRegionVal(const MemRegion* r) : Loc(MemRegionValKind, r) {
509     assert(r);
510   }
511 
512   /// Get the underlining region.
513   const MemRegion *getRegion() const {
514     return static_cast<const MemRegion *>(Data);
515   }
516 
517   /// Get the underlining region and strip casts.
518   const MemRegion* stripCasts(bool StripBaseCasts = true) const;
519 
520   template <typename REGION>
521   const REGION* getRegionAs() const {
522     return dyn_cast<REGION>(getRegion());
523   }
524 
525   bool operator==(const MemRegionVal &R) const {
526     return getRegion() == R.getRegion();
527   }
528 
529   bool operator!=(const MemRegionVal &R) const {
530     return getRegion() != R.getRegion();
531   }
532 
533   static bool classof(SVal V) {
534     return V.getBaseKind() == LocKind && V.getSubKind() == MemRegionValKind;
535   }
536 
537   static bool classof(Loc V) { return V.getSubKind() == MemRegionValKind; }
538 };
539 
540 class ConcreteInt : public Loc {
541 public:
542   explicit ConcreteInt(const llvm::APSInt& V) : Loc(ConcreteIntKind, &V) {}
543 
544   const llvm::APSInt &getValue() const {
545     return *static_cast<const llvm::APSInt *>(Data);
546   }
547 
548   static bool classof(SVal V) {
549     return V.getBaseKind() == LocKind && V.getSubKind() == ConcreteIntKind;
550   }
551 
552   static bool classof(Loc V) { return V.getSubKind() == ConcreteIntKind; }
553 };
554 
555 } // namespace loc
556 } // namespace ento
557 } // namespace clang
558 
559 namespace llvm {
560 template <typename To, typename From>
561 struct CastInfo<
562     To, From,
563     std::enable_if_t<std::is_base_of<::clang::ento::SVal, From>::value>>
564     : public CastIsPossible<To, ::clang::ento::SVal> {
565   using Self = CastInfo<
566       To, From,
567       std::enable_if_t<std::is_base_of<::clang::ento::SVal, From>::value>>;
568   static bool isPossible(const From &V) {
569     return To::classof(*static_cast<const ::clang::ento::SVal *>(&V));
570   }
571   static Optional<To> castFailed() { return Optional<To>{}; }
572   static To doCast(const From &f) {
573     return *static_cast<const To *>(cast<::clang::ento::SVal>(&f));
574   }
575   static Optional<To> doCastIfPossible(const From &f) {
576     if (!Self::isPossible(f))
577       return Self::castFailed();
578     return doCast(f);
579   }
580 };
581 } // namespace llvm
582 
583 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_SVALS_H
584