1 //===- CallDescription.h - function/method call matching       --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 /// \file This file defines a generic mechanism for matching for function and
10 /// method calls of C, C++, and Objective-C languages. Instances of these
11 /// classes are frequently used together with the CallEvent classes.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLDESCRIPTION_H
16 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLDESCRIPTION_H
17 
18 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
19 #include "llvm/ADT/ArrayRef.h"
20 #include "llvm/ADT/Optional.h"
21 #include "llvm/Support/Compiler.h"
22 #include <vector>
23 
24 namespace clang {
25 class IdentifierInfo;
26 } // namespace clang
27 
28 namespace clang {
29 namespace ento {
30 
31 enum CallDescriptionFlags : unsigned {
32   CDF_None = 0,
33 
34   /// Describes a C standard function that is sometimes implemented as a macro
35   /// that expands to a compiler builtin with some __builtin prefix.
36   /// The builtin may as well have a few extra arguments on top of the requested
37   /// number of arguments.
38   CDF_MaybeBuiltin = 1 << 0,
39 };
40 
41 /// This class represents a description of a function call using the number of
42 /// arguments and the name of the function.
43 class CallDescription {
44   friend class CallEvent;
45   using MaybeCount = Optional<unsigned>;
46 
47   mutable Optional<const IdentifierInfo *> II;
48   // The list of the qualified names used to identify the specified CallEvent,
49   // e.g. "{a, b}" represent the qualified names, like "a::b".
50   std::vector<std::string> QualifiedName;
51   MaybeCount RequiredArgs;
52   MaybeCount RequiredParams;
53   int Flags;
54 
55 public:
56   /// Constructs a CallDescription object.
57   ///
58   /// @param QualifiedName The list of the name qualifiers of the function that
59   /// will be matched. The user is allowed to skip any of the qualifiers.
60   /// For example, {"std", "basic_string", "c_str"} would match both
61   /// std::basic_string<...>::c_str() and std::__1::basic_string<...>::c_str().
62   ///
63   /// @param RequiredArgs The number of arguments that is expected to match a
64   /// call. Omit this parameter to match every occurrence of call with a given
65   /// name regardless the number of arguments.
66   CallDescription(CallDescriptionFlags Flags,
67                   ArrayRef<const char *> QualifiedName,
68                   MaybeCount RequiredArgs = None,
69                   MaybeCount RequiredParams = None);
70 
71   /// Construct a CallDescription with default flags.
72   CallDescription(ArrayRef<const char *> QualifiedName,
73                   MaybeCount RequiredArgs = None,
74                   MaybeCount RequiredParams = None);
75 
76   CallDescription(std::nullptr_t) = delete;
77 
78   /// Get the name of the function that this object matches.
79   StringRef getFunctionName() const { return QualifiedName.back(); }
80 
81   /// Get the qualified name parts in reversed order.
82   /// E.g. { "std", "vector", "data" } -> "vector", "std"
83   auto begin_qualified_name_parts() const {
84     return std::next(QualifiedName.rbegin());
85   }
86   auto end_qualified_name_parts() const { return QualifiedName.rend(); }
87 
88   /// It's false, if and only if we expect a single identifier, such as
89   /// `getenv`. It's true for `std::swap`, or `my::detail::container::data`.
90   bool hasQualifiedNameParts() const { return QualifiedName.size() > 1; }
91 
92   /// @name Matching CallDescriptions against a CallEvent
93   /// @{
94 
95   /// Returns true if the CallEvent is a call to a function that matches
96   /// the CallDescription.
97   ///
98   /// \note This function is not intended to be used to match Obj-C method
99   /// calls.
100   bool matches(const CallEvent &Call) const;
101 
102   /// Returns true whether the CallEvent matches on any of the CallDescriptions
103   /// supplied.
104   ///
105   /// \note This function is not intended to be used to match Obj-C method
106   /// calls.
107   friend bool matchesAny(const CallEvent &Call, const CallDescription &CD1) {
108     return CD1.matches(Call);
109   }
110 
111   /// \copydoc clang::ento::CallDescription::matchesAny(const CallEvent &, const CallDescription &)
112   template <typename... Ts>
113   friend bool matchesAny(const CallEvent &Call, const CallDescription &CD1,
114                          const Ts &...CDs) {
115     return CD1.matches(Call) || matchesAny(Call, CDs...);
116   }
117   /// @}
118 
119   /// @name Matching CallDescriptions against a CallExpr
120   /// @{
121 
122   /// Returns true if the CallExpr is a call to a function that matches the
123   /// CallDescription.
124   ///
125   /// When available, always prefer matching with a CallEvent! This function
126   /// exists only when that is not available, for example, when _only_
127   /// syntactic check is done on a piece of code.
128   ///
129   /// Also, StdLibraryFunctionsChecker::Signature is likely a better candicade
130   /// for syntactic only matching if you are writing a new checker. This is
131   /// handy if a CallDescriptionMap is already there.
132   ///
133   /// The function is imprecise because CallEvent may know path sensitive
134   /// information, such as the precise argument count (see comments for
135   /// CallEvent::getNumArgs), the called function if it was called through a
136   /// function pointer, and other information not available syntactically.
137   bool matchesAsWritten(const CallExpr &CE) const;
138 
139   /// Returns true whether the CallExpr matches on any of the CallDescriptions
140   /// supplied.
141   ///
142   /// \note This function is not intended to be used to match Obj-C method
143   /// calls.
144   friend bool matchesAnyAsWritten(const CallExpr &CE,
145                                   const CallDescription &CD1) {
146     return CD1.matchesAsWritten(CE);
147   }
148 
149   /// \copydoc clang::ento::CallDescription::matchesAnyAsWritten(const CallExpr &, const CallDescription &)
150   template <typename... Ts>
151   friend bool matchesAnyAsWritten(const CallExpr &CE,
152                                   const CallDescription &CD1,
153                                   const Ts &...CDs) {
154     return CD1.matchesAsWritten(CE) || matchesAnyAsWritten(CE, CDs...);
155   }
156   /// @}
157 
158 private:
159   bool matchesImpl(const FunctionDecl *Callee, size_t ArgCount,
160                    size_t ParamCount) const;
161 };
162 
163 /// An immutable map from CallDescriptions to arbitrary data. Provides a unified
164 /// way for checkers to react on function calls.
165 template <typename T> class CallDescriptionMap {
166   friend class CallDescriptionSet;
167 
168   // Some call descriptions aren't easily hashable (eg., the ones with qualified
169   // names in which some sections are omitted), so let's put them
170   // in a simple vector and use linear lookup.
171   // TODO: Implement an actual map for fast lookup for "hashable" call
172   // descriptions (eg., the ones for C functions that just match the name).
173   std::vector<std::pair<CallDescription, T>> LinearMap;
174 
175 public:
176   CallDescriptionMap(
177       std::initializer_list<std::pair<CallDescription, T>> &&List)
178       : LinearMap(List) {}
179 
180   template <typename InputIt>
181   CallDescriptionMap(InputIt First, InputIt Last) : LinearMap(First, Last) {}
182 
183   ~CallDescriptionMap() = default;
184 
185   // These maps are usually stored once per checker, so let's make sure
186   // we don't do redundant copies.
187   CallDescriptionMap(const CallDescriptionMap &) = delete;
188   CallDescriptionMap &operator=(const CallDescription &) = delete;
189 
190   CallDescriptionMap(CallDescriptionMap &&) = default;
191   CallDescriptionMap &operator=(CallDescriptionMap &&) = default;
192 
193   LLVM_NODISCARD const T *lookup(const CallEvent &Call) const {
194     // Slow path: linear lookup.
195     // TODO: Implement some sort of fast path.
196     for (const std::pair<CallDescription, T> &I : LinearMap)
197       if (I.first.matches(Call))
198         return &I.second;
199 
200     return nullptr;
201   }
202 
203   /// When available, always prefer lookup with a CallEvent! This function
204   /// exists only when that is not available, for example, when _only_
205   /// syntactic check is done on a piece of code.
206   ///
207   /// Also, StdLibraryFunctionsChecker::Signature is likely a better candicade
208   /// for syntactic only matching if you are writing a new checker. This is
209   /// handy if a CallDescriptionMap is already there.
210   ///
211   /// The function is imprecise because CallEvent may know path sensitive
212   /// information, such as the precise argument count (see comments for
213   /// CallEvent::getNumArgs), the called function if it was called through a
214   /// function pointer, and other information not available syntactically.
215   LLVM_NODISCARD const T *lookupAsWritten(const CallExpr &Call) const {
216     // Slow path: linear lookup.
217     // TODO: Implement some sort of fast path.
218     for (const std::pair<CallDescription, T> &I : LinearMap)
219       if (I.first.matchesAsWritten(Call))
220         return &I.second;
221 
222     return nullptr;
223   }
224 };
225 
226 /// An immutable set of CallDescriptions.
227 /// Checkers can efficiently decide if a given CallEvent matches any
228 /// CallDescription in the set.
229 class CallDescriptionSet {
230   CallDescriptionMap<bool /*unused*/> Impl = {};
231 
232 public:
233   CallDescriptionSet(std::initializer_list<CallDescription> &&List);
234 
235   CallDescriptionSet(const CallDescriptionSet &) = delete;
236   CallDescriptionSet &operator=(const CallDescription &) = delete;
237 
238   LLVM_NODISCARD bool contains(const CallEvent &Call) const;
239 
240   /// When available, always prefer lookup with a CallEvent! This function
241   /// exists only when that is not available, for example, when _only_
242   /// syntactic check is done on a piece of code.
243   ///
244   /// Also, StdLibraryFunctionsChecker::Signature is likely a better candicade
245   /// for syntactic only matching if you are writing a new checker. This is
246   /// handy if a CallDescriptionMap is already there.
247   ///
248   /// The function is imprecise because CallEvent may know path sensitive
249   /// information, such as the precise argument count (see comments for
250   /// CallEvent::getNumArgs), the called function if it was called through a
251   /// function pointer, and other information not available syntactically.
252   LLVM_NODISCARD bool containsAsWritten(const CallExpr &CE) const;
253 };
254 
255 } // namespace ento
256 } // namespace clang
257 
258 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_CALLDESCRIPTION_H
259