1f4a2713aSLionel Sambuc //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2f4a2713aSLionel Sambuc //
3f4a2713aSLionel Sambuc //                     The LLVM Compiler Infrastructure
4f4a2713aSLionel Sambuc //
5f4a2713aSLionel Sambuc // This file is distributed under the University of Illinois Open Source
6f4a2713aSLionel Sambuc // License. See LICENSE.TXT for details.
7f4a2713aSLionel Sambuc //
8f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
9f4a2713aSLionel Sambuc //
10f4a2713aSLionel Sambuc // This checker defines the attack surface for generic taint propagation.
11f4a2713aSLionel Sambuc //
12f4a2713aSLionel Sambuc // The taint information produced by it might be useful to other checkers. For
13f4a2713aSLionel Sambuc // example, checkers should report errors which involve tainted data more
14f4a2713aSLionel Sambuc // aggressively, even if the involved symbols are under constrained.
15f4a2713aSLionel Sambuc //
16f4a2713aSLionel Sambuc //===----------------------------------------------------------------------===//
17f4a2713aSLionel Sambuc #include "ClangSACheckers.h"
18f4a2713aSLionel Sambuc #include "clang/AST/Attr.h"
19f4a2713aSLionel Sambuc #include "clang/Basic/Builtins.h"
20f4a2713aSLionel Sambuc #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
21f4a2713aSLionel Sambuc #include "clang/StaticAnalyzer/Core/Checker.h"
22f4a2713aSLionel Sambuc #include "clang/StaticAnalyzer/Core/CheckerManager.h"
23f4a2713aSLionel Sambuc #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
24f4a2713aSLionel Sambuc #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
25f4a2713aSLionel Sambuc #include <climits>
26f4a2713aSLionel Sambuc 
27f4a2713aSLionel Sambuc using namespace clang;
28f4a2713aSLionel Sambuc using namespace ento;
29f4a2713aSLionel Sambuc 
30f4a2713aSLionel Sambuc namespace {
31f4a2713aSLionel Sambuc class GenericTaintChecker : public Checker< check::PostStmt<CallExpr>,
32f4a2713aSLionel Sambuc                                             check::PreStmt<CallExpr> > {
33f4a2713aSLionel Sambuc public:
getTag()34f4a2713aSLionel Sambuc   static void *getTag() { static int Tag; return &Tag; }
35f4a2713aSLionel Sambuc 
36f4a2713aSLionel Sambuc   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
37f4a2713aSLionel Sambuc 
38f4a2713aSLionel Sambuc   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
39f4a2713aSLionel Sambuc 
40f4a2713aSLionel Sambuc private:
41f4a2713aSLionel Sambuc   static const unsigned InvalidArgIndex = UINT_MAX;
42f4a2713aSLionel Sambuc   /// Denotes the return vale.
43f4a2713aSLionel Sambuc   static const unsigned ReturnValueIndex = UINT_MAX - 1;
44f4a2713aSLionel Sambuc 
45*0a6a1f1dSLionel Sambuc   mutable std::unique_ptr<BugType> BT;
initBugType() const46f4a2713aSLionel Sambuc   inline void initBugType() const {
47f4a2713aSLionel Sambuc     if (!BT)
48*0a6a1f1dSLionel Sambuc       BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
49f4a2713aSLionel Sambuc   }
50f4a2713aSLionel Sambuc 
51f4a2713aSLionel Sambuc   /// \brief Catch taint related bugs. Check if tainted data is passed to a
52f4a2713aSLionel Sambuc   /// system call etc.
53f4a2713aSLionel Sambuc   bool checkPre(const CallExpr *CE, CheckerContext &C) const;
54f4a2713aSLionel Sambuc 
55f4a2713aSLionel Sambuc   /// \brief Add taint sources on a pre-visit.
56f4a2713aSLionel Sambuc   void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
57f4a2713aSLionel Sambuc 
58f4a2713aSLionel Sambuc   /// \brief Propagate taint generated at pre-visit.
59f4a2713aSLionel Sambuc   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
60f4a2713aSLionel Sambuc 
61f4a2713aSLionel Sambuc   /// \brief Add taint sources on a post visit.
62f4a2713aSLionel Sambuc   void addSourcesPost(const CallExpr *CE, CheckerContext &C) const;
63f4a2713aSLionel Sambuc 
64f4a2713aSLionel Sambuc   /// Check if the region the expression evaluates to is the standard input,
65f4a2713aSLionel Sambuc   /// and thus, is tainted.
66f4a2713aSLionel Sambuc   static bool isStdin(const Expr *E, CheckerContext &C);
67f4a2713aSLionel Sambuc 
68f4a2713aSLionel Sambuc   /// \brief Given a pointer argument, get the symbol of the value it contains
69f4a2713aSLionel Sambuc   /// (points to).
70f4a2713aSLionel Sambuc   static SymbolRef getPointedToSymbol(CheckerContext &C, const Expr *Arg);
71f4a2713aSLionel Sambuc 
72f4a2713aSLionel Sambuc   /// Functions defining the attack surface.
73f4a2713aSLionel Sambuc   typedef ProgramStateRef (GenericTaintChecker::*FnCheck)(const CallExpr *,
74f4a2713aSLionel Sambuc                                                        CheckerContext &C) const;
75f4a2713aSLionel Sambuc   ProgramStateRef postScanf(const CallExpr *CE, CheckerContext &C) const;
76f4a2713aSLionel Sambuc   ProgramStateRef postSocket(const CallExpr *CE, CheckerContext &C) const;
77f4a2713aSLionel Sambuc   ProgramStateRef postRetTaint(const CallExpr *CE, CheckerContext &C) const;
78f4a2713aSLionel Sambuc 
79f4a2713aSLionel Sambuc   /// Taint the scanned input if the file is tainted.
80f4a2713aSLionel Sambuc   ProgramStateRef preFscanf(const CallExpr *CE, CheckerContext &C) const;
81f4a2713aSLionel Sambuc 
82f4a2713aSLionel Sambuc   /// Check for CWE-134: Uncontrolled Format String.
83f4a2713aSLionel Sambuc   static const char MsgUncontrolledFormatString[];
84f4a2713aSLionel Sambuc   bool checkUncontrolledFormatString(const CallExpr *CE,
85f4a2713aSLionel Sambuc                                      CheckerContext &C) const;
86f4a2713aSLionel Sambuc 
87f4a2713aSLionel Sambuc   /// Check for:
88f4a2713aSLionel Sambuc   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
89f4a2713aSLionel Sambuc   /// CWE-78, "Failure to Sanitize Data into an OS Command"
90f4a2713aSLionel Sambuc   static const char MsgSanitizeSystemArgs[];
91f4a2713aSLionel Sambuc   bool checkSystemCall(const CallExpr *CE, StringRef Name,
92f4a2713aSLionel Sambuc                        CheckerContext &C) const;
93f4a2713aSLionel Sambuc 
94f4a2713aSLionel Sambuc   /// Check if tainted data is used as a buffer size ins strn.. functions,
95f4a2713aSLionel Sambuc   /// and allocators.
96f4a2713aSLionel Sambuc   static const char MsgTaintedBufferSize[];
97f4a2713aSLionel Sambuc   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
98f4a2713aSLionel Sambuc                               CheckerContext &C) const;
99f4a2713aSLionel Sambuc 
100f4a2713aSLionel Sambuc   /// Generate a report if the expression is tainted or points to tainted data.
101f4a2713aSLionel Sambuc   bool generateReportIfTainted(const Expr *E, const char Msg[],
102f4a2713aSLionel Sambuc                                CheckerContext &C) const;
103f4a2713aSLionel Sambuc 
104f4a2713aSLionel Sambuc 
105f4a2713aSLionel Sambuc   typedef SmallVector<unsigned, 2> ArgVector;
106f4a2713aSLionel Sambuc 
107f4a2713aSLionel Sambuc   /// \brief A struct used to specify taint propagation rules for a function.
108f4a2713aSLionel Sambuc   ///
109f4a2713aSLionel Sambuc   /// If any of the possible taint source arguments is tainted, all of the
110f4a2713aSLionel Sambuc   /// destination arguments should also be tainted. Use InvalidArgIndex in the
111f4a2713aSLionel Sambuc   /// src list to specify that all of the arguments can introduce taint. Use
112f4a2713aSLionel Sambuc   /// InvalidArgIndex in the dst arguments to signify that all the non-const
113f4a2713aSLionel Sambuc   /// pointer and reference arguments might be tainted on return. If
114f4a2713aSLionel Sambuc   /// ReturnValueIndex is added to the dst list, the return value will be
115f4a2713aSLionel Sambuc   /// tainted.
116f4a2713aSLionel Sambuc   struct TaintPropagationRule {
117f4a2713aSLionel Sambuc     /// List of arguments which can be taint sources and should be checked.
118f4a2713aSLionel Sambuc     ArgVector SrcArgs;
119f4a2713aSLionel Sambuc     /// List of arguments which should be tainted on function return.
120f4a2713aSLionel Sambuc     ArgVector DstArgs;
121f4a2713aSLionel Sambuc     // TODO: Check if using other data structures would be more optimal.
122f4a2713aSLionel Sambuc 
TaintPropagationRule__anona106c46b0111::GenericTaintChecker::TaintPropagationRule123f4a2713aSLionel Sambuc     TaintPropagationRule() {}
124f4a2713aSLionel Sambuc 
TaintPropagationRule__anona106c46b0111::GenericTaintChecker::TaintPropagationRule125f4a2713aSLionel Sambuc     TaintPropagationRule(unsigned SArg,
126f4a2713aSLionel Sambuc                          unsigned DArg, bool TaintRet = false) {
127f4a2713aSLionel Sambuc       SrcArgs.push_back(SArg);
128f4a2713aSLionel Sambuc       DstArgs.push_back(DArg);
129f4a2713aSLionel Sambuc       if (TaintRet)
130f4a2713aSLionel Sambuc         DstArgs.push_back(ReturnValueIndex);
131f4a2713aSLionel Sambuc     }
132f4a2713aSLionel Sambuc 
TaintPropagationRule__anona106c46b0111::GenericTaintChecker::TaintPropagationRule133f4a2713aSLionel Sambuc     TaintPropagationRule(unsigned SArg1, unsigned SArg2,
134f4a2713aSLionel Sambuc                          unsigned DArg, bool TaintRet = false) {
135f4a2713aSLionel Sambuc       SrcArgs.push_back(SArg1);
136f4a2713aSLionel Sambuc       SrcArgs.push_back(SArg2);
137f4a2713aSLionel Sambuc       DstArgs.push_back(DArg);
138f4a2713aSLionel Sambuc       if (TaintRet)
139f4a2713aSLionel Sambuc         DstArgs.push_back(ReturnValueIndex);
140f4a2713aSLionel Sambuc     }
141f4a2713aSLionel Sambuc 
142f4a2713aSLionel Sambuc     /// Get the propagation rule for a given function.
143f4a2713aSLionel Sambuc     static TaintPropagationRule
144f4a2713aSLionel Sambuc       getTaintPropagationRule(const FunctionDecl *FDecl,
145f4a2713aSLionel Sambuc                               StringRef Name,
146f4a2713aSLionel Sambuc                               CheckerContext &C);
147f4a2713aSLionel Sambuc 
addSrcArg__anona106c46b0111::GenericTaintChecker::TaintPropagationRule148f4a2713aSLionel Sambuc     inline void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
addDstArg__anona106c46b0111::GenericTaintChecker::TaintPropagationRule149f4a2713aSLionel Sambuc     inline void addDstArg(unsigned A)  { DstArgs.push_back(A); }
150f4a2713aSLionel Sambuc 
isNull__anona106c46b0111::GenericTaintChecker::TaintPropagationRule151f4a2713aSLionel Sambuc     inline bool isNull() const { return SrcArgs.empty(); }
152f4a2713aSLionel Sambuc 
isDestinationArgument__anona106c46b0111::GenericTaintChecker::TaintPropagationRule153f4a2713aSLionel Sambuc     inline bool isDestinationArgument(unsigned ArgNum) const {
154f4a2713aSLionel Sambuc       return (std::find(DstArgs.begin(),
155f4a2713aSLionel Sambuc                         DstArgs.end(), ArgNum) != DstArgs.end());
156f4a2713aSLionel Sambuc     }
157f4a2713aSLionel Sambuc 
isTaintedOrPointsToTainted__anona106c46b0111::GenericTaintChecker::TaintPropagationRule158f4a2713aSLionel Sambuc     static inline bool isTaintedOrPointsToTainted(const Expr *E,
159f4a2713aSLionel Sambuc                                                   ProgramStateRef State,
160f4a2713aSLionel Sambuc                                                   CheckerContext &C) {
161f4a2713aSLionel Sambuc       return (State->isTainted(E, C.getLocationContext()) || isStdin(E, C) ||
162f4a2713aSLionel Sambuc               (E->getType().getTypePtr()->isPointerType() &&
163f4a2713aSLionel Sambuc                State->isTainted(getPointedToSymbol(C, E))));
164f4a2713aSLionel Sambuc     }
165f4a2713aSLionel Sambuc 
166f4a2713aSLionel Sambuc     /// \brief Pre-process a function which propagates taint according to the
167f4a2713aSLionel Sambuc     /// taint rule.
168f4a2713aSLionel Sambuc     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
169f4a2713aSLionel Sambuc 
170f4a2713aSLionel Sambuc   };
171f4a2713aSLionel Sambuc };
172f4a2713aSLionel Sambuc 
173f4a2713aSLionel Sambuc const unsigned GenericTaintChecker::ReturnValueIndex;
174f4a2713aSLionel Sambuc const unsigned GenericTaintChecker::InvalidArgIndex;
175f4a2713aSLionel Sambuc 
176f4a2713aSLionel Sambuc const char GenericTaintChecker::MsgUncontrolledFormatString[] =
177f4a2713aSLionel Sambuc   "Untrusted data is used as a format string "
178f4a2713aSLionel Sambuc   "(CWE-134: Uncontrolled Format String)";
179f4a2713aSLionel Sambuc 
180f4a2713aSLionel Sambuc const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
181f4a2713aSLionel Sambuc   "Untrusted data is passed to a system call "
182f4a2713aSLionel Sambuc   "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
183f4a2713aSLionel Sambuc 
184f4a2713aSLionel Sambuc const char GenericTaintChecker::MsgTaintedBufferSize[] =
185f4a2713aSLionel Sambuc   "Untrusted data is used to specify the buffer size "
186f4a2713aSLionel Sambuc   "(CERT/STR31-C. Guarantee that storage for strings has sufficient space for "
187f4a2713aSLionel Sambuc   "character data and the null terminator)";
188f4a2713aSLionel Sambuc 
189f4a2713aSLionel Sambuc } // end of anonymous namespace
190f4a2713aSLionel Sambuc 
191f4a2713aSLionel Sambuc /// A set which is used to pass information from call pre-visit instruction
192f4a2713aSLionel Sambuc /// to the call post-visit. The values are unsigned integers, which are either
193f4a2713aSLionel Sambuc /// ReturnValueIndex, or indexes of the pointer/reference argument, which
194f4a2713aSLionel Sambuc /// points to data, which should be tainted on return.
REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit,unsigned)195f4a2713aSLionel Sambuc REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
196f4a2713aSLionel Sambuc 
197f4a2713aSLionel Sambuc GenericTaintChecker::TaintPropagationRule
198f4a2713aSLionel Sambuc GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
199f4a2713aSLionel Sambuc                                                      const FunctionDecl *FDecl,
200f4a2713aSLionel Sambuc                                                      StringRef Name,
201f4a2713aSLionel Sambuc                                                      CheckerContext &C) {
202f4a2713aSLionel Sambuc   // TODO: Currently, we might loose precision here: we always mark a return
203f4a2713aSLionel Sambuc   // value as tainted even if it's just a pointer, pointing to tainted data.
204f4a2713aSLionel Sambuc 
205f4a2713aSLionel Sambuc   // Check for exact name match for functions without builtin substitutes.
206f4a2713aSLionel Sambuc   TaintPropagationRule Rule = llvm::StringSwitch<TaintPropagationRule>(Name)
207f4a2713aSLionel Sambuc     .Case("atoi", TaintPropagationRule(0, ReturnValueIndex))
208f4a2713aSLionel Sambuc     .Case("atol", TaintPropagationRule(0, ReturnValueIndex))
209f4a2713aSLionel Sambuc     .Case("atoll", TaintPropagationRule(0, ReturnValueIndex))
210f4a2713aSLionel Sambuc     .Case("getc", TaintPropagationRule(0, ReturnValueIndex))
211f4a2713aSLionel Sambuc     .Case("fgetc", TaintPropagationRule(0, ReturnValueIndex))
212f4a2713aSLionel Sambuc     .Case("getc_unlocked", TaintPropagationRule(0, ReturnValueIndex))
213f4a2713aSLionel Sambuc     .Case("getw", TaintPropagationRule(0, ReturnValueIndex))
214f4a2713aSLionel Sambuc     .Case("toupper", TaintPropagationRule(0, ReturnValueIndex))
215f4a2713aSLionel Sambuc     .Case("tolower", TaintPropagationRule(0, ReturnValueIndex))
216f4a2713aSLionel Sambuc     .Case("strchr", TaintPropagationRule(0, ReturnValueIndex))
217f4a2713aSLionel Sambuc     .Case("strrchr", TaintPropagationRule(0, ReturnValueIndex))
218f4a2713aSLionel Sambuc     .Case("read", TaintPropagationRule(0, 2, 1, true))
219f4a2713aSLionel Sambuc     .Case("pread", TaintPropagationRule(InvalidArgIndex, 1, true))
220f4a2713aSLionel Sambuc     .Case("gets", TaintPropagationRule(InvalidArgIndex, 0, true))
221f4a2713aSLionel Sambuc     .Case("fgets", TaintPropagationRule(2, 0, true))
222f4a2713aSLionel Sambuc     .Case("getline", TaintPropagationRule(2, 0))
223f4a2713aSLionel Sambuc     .Case("getdelim", TaintPropagationRule(3, 0))
224f4a2713aSLionel Sambuc     .Case("fgetln", TaintPropagationRule(0, ReturnValueIndex))
225f4a2713aSLionel Sambuc     .Default(TaintPropagationRule());
226f4a2713aSLionel Sambuc 
227f4a2713aSLionel Sambuc   if (!Rule.isNull())
228f4a2713aSLionel Sambuc     return Rule;
229f4a2713aSLionel Sambuc 
230f4a2713aSLionel Sambuc   // Check if it's one of the memory setting/copying functions.
231f4a2713aSLionel Sambuc   // This check is specialized but faster then calling isCLibraryFunction.
232f4a2713aSLionel Sambuc   unsigned BId = 0;
233f4a2713aSLionel Sambuc   if ( (BId = FDecl->getMemoryFunctionKind()) )
234f4a2713aSLionel Sambuc     switch(BId) {
235f4a2713aSLionel Sambuc     case Builtin::BImemcpy:
236f4a2713aSLionel Sambuc     case Builtin::BImemmove:
237f4a2713aSLionel Sambuc     case Builtin::BIstrncpy:
238f4a2713aSLionel Sambuc     case Builtin::BIstrncat:
239f4a2713aSLionel Sambuc       return TaintPropagationRule(1, 2, 0, true);
240f4a2713aSLionel Sambuc     case Builtin::BIstrlcpy:
241f4a2713aSLionel Sambuc     case Builtin::BIstrlcat:
242f4a2713aSLionel Sambuc       return TaintPropagationRule(1, 2, 0, false);
243f4a2713aSLionel Sambuc     case Builtin::BIstrndup:
244f4a2713aSLionel Sambuc       return TaintPropagationRule(0, 1, ReturnValueIndex);
245f4a2713aSLionel Sambuc 
246f4a2713aSLionel Sambuc     default:
247f4a2713aSLionel Sambuc       break;
248f4a2713aSLionel Sambuc     };
249f4a2713aSLionel Sambuc 
250f4a2713aSLionel Sambuc   // Process all other functions which could be defined as builtins.
251f4a2713aSLionel Sambuc   if (Rule.isNull()) {
252f4a2713aSLionel Sambuc     if (C.isCLibraryFunction(FDecl, "snprintf") ||
253f4a2713aSLionel Sambuc         C.isCLibraryFunction(FDecl, "sprintf"))
254f4a2713aSLionel Sambuc       return TaintPropagationRule(InvalidArgIndex, 0, true);
255f4a2713aSLionel Sambuc     else if (C.isCLibraryFunction(FDecl, "strcpy") ||
256f4a2713aSLionel Sambuc              C.isCLibraryFunction(FDecl, "stpcpy") ||
257f4a2713aSLionel Sambuc              C.isCLibraryFunction(FDecl, "strcat"))
258f4a2713aSLionel Sambuc       return TaintPropagationRule(1, 0, true);
259f4a2713aSLionel Sambuc     else if (C.isCLibraryFunction(FDecl, "bcopy"))
260f4a2713aSLionel Sambuc       return TaintPropagationRule(0, 2, 1, false);
261f4a2713aSLionel Sambuc     else if (C.isCLibraryFunction(FDecl, "strdup") ||
262f4a2713aSLionel Sambuc              C.isCLibraryFunction(FDecl, "strdupa"))
263f4a2713aSLionel Sambuc       return TaintPropagationRule(0, ReturnValueIndex);
264f4a2713aSLionel Sambuc     else if (C.isCLibraryFunction(FDecl, "wcsdup"))
265f4a2713aSLionel Sambuc       return TaintPropagationRule(0, ReturnValueIndex);
266f4a2713aSLionel Sambuc   }
267f4a2713aSLionel Sambuc 
268f4a2713aSLionel Sambuc   // Skipping the following functions, since they might be used for cleansing
269f4a2713aSLionel Sambuc   // or smart memory copy:
270f4a2713aSLionel Sambuc   // - memccpy - copying until hitting a special character.
271f4a2713aSLionel Sambuc 
272f4a2713aSLionel Sambuc   return TaintPropagationRule();
273f4a2713aSLionel Sambuc }
274f4a2713aSLionel Sambuc 
checkPreStmt(const CallExpr * CE,CheckerContext & C) const275f4a2713aSLionel Sambuc void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
276f4a2713aSLionel Sambuc                                        CheckerContext &C) const {
277f4a2713aSLionel Sambuc   // Check for errors first.
278f4a2713aSLionel Sambuc   if (checkPre(CE, C))
279f4a2713aSLionel Sambuc     return;
280f4a2713aSLionel Sambuc 
281f4a2713aSLionel Sambuc   // Add taint second.
282f4a2713aSLionel Sambuc   addSourcesPre(CE, C);
283f4a2713aSLionel Sambuc }
284f4a2713aSLionel Sambuc 
checkPostStmt(const CallExpr * CE,CheckerContext & C) const285f4a2713aSLionel Sambuc void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
286f4a2713aSLionel Sambuc                                         CheckerContext &C) const {
287f4a2713aSLionel Sambuc   if (propagateFromPre(CE, C))
288f4a2713aSLionel Sambuc     return;
289f4a2713aSLionel Sambuc   addSourcesPost(CE, C);
290f4a2713aSLionel Sambuc }
291f4a2713aSLionel Sambuc 
addSourcesPre(const CallExpr * CE,CheckerContext & C) const292f4a2713aSLionel Sambuc void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
293f4a2713aSLionel Sambuc                                         CheckerContext &C) const {
294*0a6a1f1dSLionel Sambuc   ProgramStateRef State = nullptr;
295f4a2713aSLionel Sambuc   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
296f4a2713aSLionel Sambuc   if (!FDecl || FDecl->getKind() != Decl::Function)
297f4a2713aSLionel Sambuc     return;
298f4a2713aSLionel Sambuc 
299f4a2713aSLionel Sambuc   StringRef Name = C.getCalleeName(FDecl);
300f4a2713aSLionel Sambuc   if (Name.empty())
301f4a2713aSLionel Sambuc     return;
302f4a2713aSLionel Sambuc 
303f4a2713aSLionel Sambuc   // First, try generating a propagation rule for this function.
304f4a2713aSLionel Sambuc   TaintPropagationRule Rule =
305f4a2713aSLionel Sambuc     TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
306f4a2713aSLionel Sambuc   if (!Rule.isNull()) {
307f4a2713aSLionel Sambuc     State = Rule.process(CE, C);
308f4a2713aSLionel Sambuc     if (!State)
309f4a2713aSLionel Sambuc       return;
310f4a2713aSLionel Sambuc     C.addTransition(State);
311f4a2713aSLionel Sambuc     return;
312f4a2713aSLionel Sambuc   }
313f4a2713aSLionel Sambuc 
314f4a2713aSLionel Sambuc   // Otherwise, check if we have custom pre-processing implemented.
315f4a2713aSLionel Sambuc   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
316f4a2713aSLionel Sambuc     .Case("fscanf", &GenericTaintChecker::preFscanf)
317*0a6a1f1dSLionel Sambuc     .Default(nullptr);
318f4a2713aSLionel Sambuc   // Check and evaluate the call.
319f4a2713aSLionel Sambuc   if (evalFunction)
320f4a2713aSLionel Sambuc     State = (this->*evalFunction)(CE, C);
321f4a2713aSLionel Sambuc   if (!State)
322f4a2713aSLionel Sambuc     return;
323f4a2713aSLionel Sambuc   C.addTransition(State);
324f4a2713aSLionel Sambuc 
325f4a2713aSLionel Sambuc }
326f4a2713aSLionel Sambuc 
propagateFromPre(const CallExpr * CE,CheckerContext & C) const327f4a2713aSLionel Sambuc bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
328f4a2713aSLionel Sambuc                                            CheckerContext &C) const {
329f4a2713aSLionel Sambuc   ProgramStateRef State = C.getState();
330f4a2713aSLionel Sambuc 
331f4a2713aSLionel Sambuc   // Depending on what was tainted at pre-visit, we determined a set of
332f4a2713aSLionel Sambuc   // arguments which should be tainted after the function returns. These are
333f4a2713aSLionel Sambuc   // stored in the state as TaintArgsOnPostVisit set.
334f4a2713aSLionel Sambuc   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
335f4a2713aSLionel Sambuc   if (TaintArgs.isEmpty())
336f4a2713aSLionel Sambuc     return false;
337f4a2713aSLionel Sambuc 
338f4a2713aSLionel Sambuc   for (llvm::ImmutableSet<unsigned>::iterator
339f4a2713aSLionel Sambuc          I = TaintArgs.begin(), E = TaintArgs.end(); I != E; ++I) {
340f4a2713aSLionel Sambuc     unsigned ArgNum  = *I;
341f4a2713aSLionel Sambuc 
342f4a2713aSLionel Sambuc     // Special handling for the tainted return value.
343f4a2713aSLionel Sambuc     if (ArgNum == ReturnValueIndex) {
344f4a2713aSLionel Sambuc       State = State->addTaint(CE, C.getLocationContext());
345f4a2713aSLionel Sambuc       continue;
346f4a2713aSLionel Sambuc     }
347f4a2713aSLionel Sambuc 
348f4a2713aSLionel Sambuc     // The arguments are pointer arguments. The data they are pointing at is
349f4a2713aSLionel Sambuc     // tainted after the call.
350f4a2713aSLionel Sambuc     if (CE->getNumArgs() < (ArgNum + 1))
351f4a2713aSLionel Sambuc       return false;
352f4a2713aSLionel Sambuc     const Expr* Arg = CE->getArg(ArgNum);
353f4a2713aSLionel Sambuc     SymbolRef Sym = getPointedToSymbol(C, Arg);
354f4a2713aSLionel Sambuc     if (Sym)
355f4a2713aSLionel Sambuc       State = State->addTaint(Sym);
356f4a2713aSLionel Sambuc   }
357f4a2713aSLionel Sambuc 
358f4a2713aSLionel Sambuc   // Clear up the taint info from the state.
359f4a2713aSLionel Sambuc   State = State->remove<TaintArgsOnPostVisit>();
360f4a2713aSLionel Sambuc 
361f4a2713aSLionel Sambuc   if (State != C.getState()) {
362f4a2713aSLionel Sambuc     C.addTransition(State);
363f4a2713aSLionel Sambuc     return true;
364f4a2713aSLionel Sambuc   }
365f4a2713aSLionel Sambuc   return false;
366f4a2713aSLionel Sambuc }
367f4a2713aSLionel Sambuc 
addSourcesPost(const CallExpr * CE,CheckerContext & C) const368f4a2713aSLionel Sambuc void GenericTaintChecker::addSourcesPost(const CallExpr *CE,
369f4a2713aSLionel Sambuc                                          CheckerContext &C) const {
370f4a2713aSLionel Sambuc   // Define the attack surface.
371f4a2713aSLionel Sambuc   // Set the evaluation function by switching on the callee name.
372f4a2713aSLionel Sambuc   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
373f4a2713aSLionel Sambuc   if (!FDecl || FDecl->getKind() != Decl::Function)
374f4a2713aSLionel Sambuc     return;
375f4a2713aSLionel Sambuc 
376f4a2713aSLionel Sambuc   StringRef Name = C.getCalleeName(FDecl);
377f4a2713aSLionel Sambuc   if (Name.empty())
378f4a2713aSLionel Sambuc     return;
379f4a2713aSLionel Sambuc   FnCheck evalFunction = llvm::StringSwitch<FnCheck>(Name)
380f4a2713aSLionel Sambuc     .Case("scanf", &GenericTaintChecker::postScanf)
381f4a2713aSLionel Sambuc     // TODO: Add support for vfscanf & family.
382f4a2713aSLionel Sambuc     .Case("getchar", &GenericTaintChecker::postRetTaint)
383f4a2713aSLionel Sambuc     .Case("getchar_unlocked", &GenericTaintChecker::postRetTaint)
384f4a2713aSLionel Sambuc     .Case("getenv", &GenericTaintChecker::postRetTaint)
385f4a2713aSLionel Sambuc     .Case("fopen", &GenericTaintChecker::postRetTaint)
386f4a2713aSLionel Sambuc     .Case("fdopen", &GenericTaintChecker::postRetTaint)
387f4a2713aSLionel Sambuc     .Case("freopen", &GenericTaintChecker::postRetTaint)
388f4a2713aSLionel Sambuc     .Case("getch", &GenericTaintChecker::postRetTaint)
389f4a2713aSLionel Sambuc     .Case("wgetch", &GenericTaintChecker::postRetTaint)
390f4a2713aSLionel Sambuc     .Case("socket", &GenericTaintChecker::postSocket)
391*0a6a1f1dSLionel Sambuc     .Default(nullptr);
392f4a2713aSLionel Sambuc 
393f4a2713aSLionel Sambuc   // If the callee isn't defined, it is not of security concern.
394f4a2713aSLionel Sambuc   // Check and evaluate the call.
395*0a6a1f1dSLionel Sambuc   ProgramStateRef State = nullptr;
396f4a2713aSLionel Sambuc   if (evalFunction)
397f4a2713aSLionel Sambuc     State = (this->*evalFunction)(CE, C);
398f4a2713aSLionel Sambuc   if (!State)
399f4a2713aSLionel Sambuc     return;
400f4a2713aSLionel Sambuc 
401f4a2713aSLionel Sambuc   C.addTransition(State);
402f4a2713aSLionel Sambuc }
403f4a2713aSLionel Sambuc 
checkPre(const CallExpr * CE,CheckerContext & C) const404f4a2713aSLionel Sambuc bool GenericTaintChecker::checkPre(const CallExpr *CE, CheckerContext &C) const{
405f4a2713aSLionel Sambuc 
406f4a2713aSLionel Sambuc   if (checkUncontrolledFormatString(CE, C))
407f4a2713aSLionel Sambuc     return true;
408f4a2713aSLionel Sambuc 
409f4a2713aSLionel Sambuc   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
410f4a2713aSLionel Sambuc   if (!FDecl || FDecl->getKind() != Decl::Function)
411f4a2713aSLionel Sambuc     return false;
412f4a2713aSLionel Sambuc 
413f4a2713aSLionel Sambuc   StringRef Name = C.getCalleeName(FDecl);
414f4a2713aSLionel Sambuc   if (Name.empty())
415f4a2713aSLionel Sambuc     return false;
416f4a2713aSLionel Sambuc 
417f4a2713aSLionel Sambuc   if (checkSystemCall(CE, Name, C))
418f4a2713aSLionel Sambuc     return true;
419f4a2713aSLionel Sambuc 
420f4a2713aSLionel Sambuc   if (checkTaintedBufferSize(CE, FDecl, C))
421f4a2713aSLionel Sambuc     return true;
422f4a2713aSLionel Sambuc 
423f4a2713aSLionel Sambuc   return false;
424f4a2713aSLionel Sambuc }
425f4a2713aSLionel Sambuc 
getPointedToSymbol(CheckerContext & C,const Expr * Arg)426f4a2713aSLionel Sambuc SymbolRef GenericTaintChecker::getPointedToSymbol(CheckerContext &C,
427f4a2713aSLionel Sambuc                                                   const Expr* Arg) {
428f4a2713aSLionel Sambuc   ProgramStateRef State = C.getState();
429f4a2713aSLionel Sambuc   SVal AddrVal = State->getSVal(Arg->IgnoreParens(), C.getLocationContext());
430f4a2713aSLionel Sambuc   if (AddrVal.isUnknownOrUndef())
431*0a6a1f1dSLionel Sambuc     return nullptr;
432f4a2713aSLionel Sambuc 
433f4a2713aSLionel Sambuc   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
434f4a2713aSLionel Sambuc   if (!AddrLoc)
435*0a6a1f1dSLionel Sambuc     return nullptr;
436f4a2713aSLionel Sambuc 
437f4a2713aSLionel Sambuc   const PointerType *ArgTy =
438f4a2713aSLionel Sambuc     dyn_cast<PointerType>(Arg->getType().getCanonicalType().getTypePtr());
439f4a2713aSLionel Sambuc   SVal Val = State->getSVal(*AddrLoc,
440f4a2713aSLionel Sambuc                             ArgTy ? ArgTy->getPointeeType(): QualType());
441f4a2713aSLionel Sambuc   return Val.getAsSymbol();
442f4a2713aSLionel Sambuc }
443f4a2713aSLionel Sambuc 
444f4a2713aSLionel Sambuc ProgramStateRef
process(const CallExpr * CE,CheckerContext & C) const445f4a2713aSLionel Sambuc GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
446f4a2713aSLionel Sambuc                                                    CheckerContext &C) const {
447f4a2713aSLionel Sambuc   ProgramStateRef State = C.getState();
448f4a2713aSLionel Sambuc 
449f4a2713aSLionel Sambuc   // Check for taint in arguments.
450f4a2713aSLionel Sambuc   bool IsTainted = false;
451f4a2713aSLionel Sambuc   for (ArgVector::const_iterator I = SrcArgs.begin(),
452f4a2713aSLionel Sambuc                                  E = SrcArgs.end(); I != E; ++I) {
453f4a2713aSLionel Sambuc     unsigned ArgNum = *I;
454f4a2713aSLionel Sambuc 
455f4a2713aSLionel Sambuc     if (ArgNum == InvalidArgIndex) {
456f4a2713aSLionel Sambuc       // Check if any of the arguments is tainted, but skip the
457f4a2713aSLionel Sambuc       // destination arguments.
458f4a2713aSLionel Sambuc       for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
459f4a2713aSLionel Sambuc         if (isDestinationArgument(i))
460f4a2713aSLionel Sambuc           continue;
461f4a2713aSLionel Sambuc         if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
462f4a2713aSLionel Sambuc           break;
463f4a2713aSLionel Sambuc       }
464f4a2713aSLionel Sambuc       break;
465f4a2713aSLionel Sambuc     }
466f4a2713aSLionel Sambuc 
467f4a2713aSLionel Sambuc     if (CE->getNumArgs() < (ArgNum + 1))
468f4a2713aSLionel Sambuc       return State;
469f4a2713aSLionel Sambuc     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
470f4a2713aSLionel Sambuc       break;
471f4a2713aSLionel Sambuc   }
472f4a2713aSLionel Sambuc   if (!IsTainted)
473f4a2713aSLionel Sambuc     return State;
474f4a2713aSLionel Sambuc 
475f4a2713aSLionel Sambuc   // Mark the arguments which should be tainted after the function returns.
476f4a2713aSLionel Sambuc   for (ArgVector::const_iterator I = DstArgs.begin(),
477f4a2713aSLionel Sambuc                                  E = DstArgs.end(); I != E; ++I) {
478f4a2713aSLionel Sambuc     unsigned ArgNum = *I;
479f4a2713aSLionel Sambuc 
480f4a2713aSLionel Sambuc     // Should we mark all arguments as tainted?
481f4a2713aSLionel Sambuc     if (ArgNum == InvalidArgIndex) {
482f4a2713aSLionel Sambuc       // For all pointer and references that were passed in:
483f4a2713aSLionel Sambuc       //   If they are not pointing to const data, mark data as tainted.
484f4a2713aSLionel Sambuc       //   TODO: So far we are just going one level down; ideally we'd need to
485f4a2713aSLionel Sambuc       //         recurse here.
486f4a2713aSLionel Sambuc       for (unsigned int i = 0; i < CE->getNumArgs(); ++i) {
487f4a2713aSLionel Sambuc         const Expr *Arg = CE->getArg(i);
488f4a2713aSLionel Sambuc         // Process pointer argument.
489f4a2713aSLionel Sambuc         const Type *ArgTy = Arg->getType().getTypePtr();
490f4a2713aSLionel Sambuc         QualType PType = ArgTy->getPointeeType();
491f4a2713aSLionel Sambuc         if ((!PType.isNull() && !PType.isConstQualified())
492f4a2713aSLionel Sambuc             || (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
493f4a2713aSLionel Sambuc           State = State->add<TaintArgsOnPostVisit>(i);
494f4a2713aSLionel Sambuc       }
495f4a2713aSLionel Sambuc       continue;
496f4a2713aSLionel Sambuc     }
497f4a2713aSLionel Sambuc 
498f4a2713aSLionel Sambuc     // Should mark the return value?
499f4a2713aSLionel Sambuc     if (ArgNum == ReturnValueIndex) {
500f4a2713aSLionel Sambuc       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
501f4a2713aSLionel Sambuc       continue;
502f4a2713aSLionel Sambuc     }
503f4a2713aSLionel Sambuc 
504f4a2713aSLionel Sambuc     // Mark the given argument.
505f4a2713aSLionel Sambuc     assert(ArgNum < CE->getNumArgs());
506f4a2713aSLionel Sambuc     State = State->add<TaintArgsOnPostVisit>(ArgNum);
507f4a2713aSLionel Sambuc   }
508f4a2713aSLionel Sambuc 
509f4a2713aSLionel Sambuc   return State;
510f4a2713aSLionel Sambuc }
511f4a2713aSLionel Sambuc 
512f4a2713aSLionel Sambuc 
513f4a2713aSLionel Sambuc // If argument 0 (file descriptor) is tainted, all arguments except for arg 0
514f4a2713aSLionel Sambuc // and arg 1 should get taint.
preFscanf(const CallExpr * CE,CheckerContext & C) const515f4a2713aSLionel Sambuc ProgramStateRef GenericTaintChecker::preFscanf(const CallExpr *CE,
516f4a2713aSLionel Sambuc                                                    CheckerContext &C) const {
517f4a2713aSLionel Sambuc   assert(CE->getNumArgs() >= 2);
518f4a2713aSLionel Sambuc   ProgramStateRef State = C.getState();
519f4a2713aSLionel Sambuc 
520f4a2713aSLionel Sambuc   // Check is the file descriptor is tainted.
521f4a2713aSLionel Sambuc   if (State->isTainted(CE->getArg(0), C.getLocationContext()) ||
522f4a2713aSLionel Sambuc       isStdin(CE->getArg(0), C)) {
523f4a2713aSLionel Sambuc     // All arguments except for the first two should get taint.
524f4a2713aSLionel Sambuc     for (unsigned int i = 2; i < CE->getNumArgs(); ++i)
525f4a2713aSLionel Sambuc         State = State->add<TaintArgsOnPostVisit>(i);
526f4a2713aSLionel Sambuc     return State;
527f4a2713aSLionel Sambuc   }
528f4a2713aSLionel Sambuc 
529*0a6a1f1dSLionel Sambuc   return nullptr;
530f4a2713aSLionel Sambuc }
531f4a2713aSLionel Sambuc 
532f4a2713aSLionel Sambuc 
533f4a2713aSLionel Sambuc // If argument 0(protocol domain) is network, the return value should get taint.
postSocket(const CallExpr * CE,CheckerContext & C) const534f4a2713aSLionel Sambuc ProgramStateRef GenericTaintChecker::postSocket(const CallExpr *CE,
535f4a2713aSLionel Sambuc                                                 CheckerContext &C) const {
536f4a2713aSLionel Sambuc   ProgramStateRef State = C.getState();
537f4a2713aSLionel Sambuc   if (CE->getNumArgs() < 3)
538f4a2713aSLionel Sambuc     return State;
539f4a2713aSLionel Sambuc 
540f4a2713aSLionel Sambuc   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
541f4a2713aSLionel Sambuc   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
542f4a2713aSLionel Sambuc   // White list the internal communication protocols.
543f4a2713aSLionel Sambuc   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
544f4a2713aSLionel Sambuc       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
545f4a2713aSLionel Sambuc     return State;
546f4a2713aSLionel Sambuc   State = State->addTaint(CE, C.getLocationContext());
547f4a2713aSLionel Sambuc   return State;
548f4a2713aSLionel Sambuc }
549f4a2713aSLionel Sambuc 
postScanf(const CallExpr * CE,CheckerContext & C) const550f4a2713aSLionel Sambuc ProgramStateRef GenericTaintChecker::postScanf(const CallExpr *CE,
551f4a2713aSLionel Sambuc                                                    CheckerContext &C) const {
552f4a2713aSLionel Sambuc   ProgramStateRef State = C.getState();
553f4a2713aSLionel Sambuc   if (CE->getNumArgs() < 2)
554f4a2713aSLionel Sambuc     return State;
555f4a2713aSLionel Sambuc 
556f4a2713aSLionel Sambuc   // All arguments except for the very first one should get taint.
557f4a2713aSLionel Sambuc   for (unsigned int i = 1; i < CE->getNumArgs(); ++i) {
558f4a2713aSLionel Sambuc     // The arguments are pointer arguments. The data they are pointing at is
559f4a2713aSLionel Sambuc     // tainted after the call.
560f4a2713aSLionel Sambuc     const Expr* Arg = CE->getArg(i);
561f4a2713aSLionel Sambuc         SymbolRef Sym = getPointedToSymbol(C, Arg);
562f4a2713aSLionel Sambuc     if (Sym)
563f4a2713aSLionel Sambuc       State = State->addTaint(Sym);
564f4a2713aSLionel Sambuc   }
565f4a2713aSLionel Sambuc   return State;
566f4a2713aSLionel Sambuc }
567f4a2713aSLionel Sambuc 
postRetTaint(const CallExpr * CE,CheckerContext & C) const568f4a2713aSLionel Sambuc ProgramStateRef GenericTaintChecker::postRetTaint(const CallExpr *CE,
569f4a2713aSLionel Sambuc                                                   CheckerContext &C) const {
570f4a2713aSLionel Sambuc   return C.getState()->addTaint(CE, C.getLocationContext());
571f4a2713aSLionel Sambuc }
572f4a2713aSLionel Sambuc 
isStdin(const Expr * E,CheckerContext & C)573f4a2713aSLionel Sambuc bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
574f4a2713aSLionel Sambuc   ProgramStateRef State = C.getState();
575f4a2713aSLionel Sambuc   SVal Val = State->getSVal(E, C.getLocationContext());
576f4a2713aSLionel Sambuc 
577f4a2713aSLionel Sambuc   // stdin is a pointer, so it would be a region.
578f4a2713aSLionel Sambuc   const MemRegion *MemReg = Val.getAsRegion();
579f4a2713aSLionel Sambuc 
580f4a2713aSLionel Sambuc   // The region should be symbolic, we do not know it's value.
581f4a2713aSLionel Sambuc   const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
582f4a2713aSLionel Sambuc   if (!SymReg)
583f4a2713aSLionel Sambuc     return false;
584f4a2713aSLionel Sambuc 
585f4a2713aSLionel Sambuc   // Get it's symbol and find the declaration region it's pointing to.
586f4a2713aSLionel Sambuc   const SymbolRegionValue *Sm =dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
587f4a2713aSLionel Sambuc   if (!Sm)
588f4a2713aSLionel Sambuc     return false;
589f4a2713aSLionel Sambuc   const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
590f4a2713aSLionel Sambuc   if (!DeclReg)
591f4a2713aSLionel Sambuc     return false;
592f4a2713aSLionel Sambuc 
593f4a2713aSLionel Sambuc   // This region corresponds to a declaration, find out if it's a global/extern
594f4a2713aSLionel Sambuc   // variable named stdin with the proper type.
595f4a2713aSLionel Sambuc   if (const VarDecl *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
596f4a2713aSLionel Sambuc     D = D->getCanonicalDecl();
597f4a2713aSLionel Sambuc     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC())
598f4a2713aSLionel Sambuc         if (const PointerType * PtrTy =
599f4a2713aSLionel Sambuc               dyn_cast<PointerType>(D->getType().getTypePtr()))
600f4a2713aSLionel Sambuc           if (PtrTy->getPointeeType() == C.getASTContext().getFILEType())
601f4a2713aSLionel Sambuc             return true;
602f4a2713aSLionel Sambuc   }
603f4a2713aSLionel Sambuc   return false;
604f4a2713aSLionel Sambuc }
605f4a2713aSLionel Sambuc 
getPrintfFormatArgumentNum(const CallExpr * CE,const CheckerContext & C,unsigned int & ArgNum)606f4a2713aSLionel Sambuc static bool getPrintfFormatArgumentNum(const CallExpr *CE,
607f4a2713aSLionel Sambuc                                        const CheckerContext &C,
608f4a2713aSLionel Sambuc                                        unsigned int &ArgNum) {
609f4a2713aSLionel Sambuc   // Find if the function contains a format string argument.
610f4a2713aSLionel Sambuc   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
611f4a2713aSLionel Sambuc   // vsnprintf, syslog, custom annotated functions.
612f4a2713aSLionel Sambuc   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
613f4a2713aSLionel Sambuc   if (!FDecl)
614f4a2713aSLionel Sambuc     return false;
615*0a6a1f1dSLionel Sambuc   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
616f4a2713aSLionel Sambuc     ArgNum = Format->getFormatIdx() - 1;
617f4a2713aSLionel Sambuc     if ((Format->getType()->getName() == "printf") &&
618f4a2713aSLionel Sambuc          CE->getNumArgs() > ArgNum)
619f4a2713aSLionel Sambuc       return true;
620f4a2713aSLionel Sambuc   }
621f4a2713aSLionel Sambuc 
622f4a2713aSLionel Sambuc   // Or if a function is named setproctitle (this is a heuristic).
623f4a2713aSLionel Sambuc   if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
624f4a2713aSLionel Sambuc     ArgNum = 0;
625f4a2713aSLionel Sambuc     return true;
626f4a2713aSLionel Sambuc   }
627f4a2713aSLionel Sambuc 
628f4a2713aSLionel Sambuc   return false;
629f4a2713aSLionel Sambuc }
630f4a2713aSLionel Sambuc 
generateReportIfTainted(const Expr * E,const char Msg[],CheckerContext & C) const631f4a2713aSLionel Sambuc bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
632f4a2713aSLionel Sambuc                                                   const char Msg[],
633f4a2713aSLionel Sambuc                                                   CheckerContext &C) const {
634f4a2713aSLionel Sambuc   assert(E);
635f4a2713aSLionel Sambuc 
636f4a2713aSLionel Sambuc   // Check for taint.
637f4a2713aSLionel Sambuc   ProgramStateRef State = C.getState();
638f4a2713aSLionel Sambuc   if (!State->isTainted(getPointedToSymbol(C, E)) &&
639f4a2713aSLionel Sambuc       !State->isTainted(E, C.getLocationContext()))
640f4a2713aSLionel Sambuc     return false;
641f4a2713aSLionel Sambuc 
642f4a2713aSLionel Sambuc   // Generate diagnostic.
643f4a2713aSLionel Sambuc   if (ExplodedNode *N = C.addTransition()) {
644f4a2713aSLionel Sambuc     initBugType();
645f4a2713aSLionel Sambuc     BugReport *report = new BugReport(*BT, Msg, N);
646f4a2713aSLionel Sambuc     report->addRange(E->getSourceRange());
647f4a2713aSLionel Sambuc     C.emitReport(report);
648f4a2713aSLionel Sambuc     return true;
649f4a2713aSLionel Sambuc   }
650f4a2713aSLionel Sambuc   return false;
651f4a2713aSLionel Sambuc }
652f4a2713aSLionel Sambuc 
checkUncontrolledFormatString(const CallExpr * CE,CheckerContext & C) const653f4a2713aSLionel Sambuc bool GenericTaintChecker::checkUncontrolledFormatString(const CallExpr *CE,
654f4a2713aSLionel Sambuc                                                         CheckerContext &C) const{
655f4a2713aSLionel Sambuc   // Check if the function contains a format string argument.
656f4a2713aSLionel Sambuc   unsigned int ArgNum = 0;
657f4a2713aSLionel Sambuc   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
658f4a2713aSLionel Sambuc     return false;
659f4a2713aSLionel Sambuc 
660f4a2713aSLionel Sambuc   // If either the format string content or the pointer itself are tainted, warn.
661f4a2713aSLionel Sambuc   if (generateReportIfTainted(CE->getArg(ArgNum),
662f4a2713aSLionel Sambuc                               MsgUncontrolledFormatString, C))
663f4a2713aSLionel Sambuc     return true;
664f4a2713aSLionel Sambuc   return false;
665f4a2713aSLionel Sambuc }
666f4a2713aSLionel Sambuc 
checkSystemCall(const CallExpr * CE,StringRef Name,CheckerContext & C) const667f4a2713aSLionel Sambuc bool GenericTaintChecker::checkSystemCall(const CallExpr *CE,
668f4a2713aSLionel Sambuc                                           StringRef Name,
669f4a2713aSLionel Sambuc                                           CheckerContext &C) const {
670f4a2713aSLionel Sambuc   // TODO: It might make sense to run this check on demand. In some cases,
671f4a2713aSLionel Sambuc   // we should check if the environment has been cleansed here. We also might
672f4a2713aSLionel Sambuc   // need to know if the user was reset before these calls(seteuid).
673f4a2713aSLionel Sambuc   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
674f4a2713aSLionel Sambuc     .Case("system", 0)
675f4a2713aSLionel Sambuc     .Case("popen", 0)
676f4a2713aSLionel Sambuc     .Case("execl", 0)
677f4a2713aSLionel Sambuc     .Case("execle", 0)
678f4a2713aSLionel Sambuc     .Case("execlp", 0)
679f4a2713aSLionel Sambuc     .Case("execv", 0)
680f4a2713aSLionel Sambuc     .Case("execvp", 0)
681f4a2713aSLionel Sambuc     .Case("execvP", 0)
682f4a2713aSLionel Sambuc     .Case("execve", 0)
683f4a2713aSLionel Sambuc     .Case("dlopen", 0)
684f4a2713aSLionel Sambuc     .Default(UINT_MAX);
685f4a2713aSLionel Sambuc 
686f4a2713aSLionel Sambuc   if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
687f4a2713aSLionel Sambuc     return false;
688f4a2713aSLionel Sambuc 
689f4a2713aSLionel Sambuc   if (generateReportIfTainted(CE->getArg(ArgNum),
690f4a2713aSLionel Sambuc                               MsgSanitizeSystemArgs, C))
691f4a2713aSLionel Sambuc     return true;
692f4a2713aSLionel Sambuc 
693f4a2713aSLionel Sambuc   return false;
694f4a2713aSLionel Sambuc }
695f4a2713aSLionel Sambuc 
696f4a2713aSLionel Sambuc // TODO: Should this check be a part of the CString checker?
697f4a2713aSLionel Sambuc // If yes, should taint be a global setting?
checkTaintedBufferSize(const CallExpr * CE,const FunctionDecl * FDecl,CheckerContext & C) const698f4a2713aSLionel Sambuc bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
699f4a2713aSLionel Sambuc                                                  const FunctionDecl *FDecl,
700f4a2713aSLionel Sambuc                                                  CheckerContext &C) const {
701f4a2713aSLionel Sambuc   // If the function has a buffer size argument, set ArgNum.
702f4a2713aSLionel Sambuc   unsigned ArgNum = InvalidArgIndex;
703f4a2713aSLionel Sambuc   unsigned BId = 0;
704f4a2713aSLionel Sambuc   if ( (BId = FDecl->getMemoryFunctionKind()) )
705f4a2713aSLionel Sambuc     switch(BId) {
706f4a2713aSLionel Sambuc     case Builtin::BImemcpy:
707f4a2713aSLionel Sambuc     case Builtin::BImemmove:
708f4a2713aSLionel Sambuc     case Builtin::BIstrncpy:
709f4a2713aSLionel Sambuc       ArgNum = 2;
710f4a2713aSLionel Sambuc       break;
711f4a2713aSLionel Sambuc     case Builtin::BIstrndup:
712f4a2713aSLionel Sambuc       ArgNum = 1;
713f4a2713aSLionel Sambuc       break;
714f4a2713aSLionel Sambuc     default:
715f4a2713aSLionel Sambuc       break;
716f4a2713aSLionel Sambuc     };
717f4a2713aSLionel Sambuc 
718f4a2713aSLionel Sambuc   if (ArgNum == InvalidArgIndex) {
719f4a2713aSLionel Sambuc     if (C.isCLibraryFunction(FDecl, "malloc") ||
720f4a2713aSLionel Sambuc         C.isCLibraryFunction(FDecl, "calloc") ||
721f4a2713aSLionel Sambuc         C.isCLibraryFunction(FDecl, "alloca"))
722f4a2713aSLionel Sambuc       ArgNum = 0;
723f4a2713aSLionel Sambuc     else if (C.isCLibraryFunction(FDecl, "memccpy"))
724f4a2713aSLionel Sambuc       ArgNum = 3;
725f4a2713aSLionel Sambuc     else if (C.isCLibraryFunction(FDecl, "realloc"))
726f4a2713aSLionel Sambuc       ArgNum = 1;
727f4a2713aSLionel Sambuc     else if (C.isCLibraryFunction(FDecl, "bcopy"))
728f4a2713aSLionel Sambuc       ArgNum = 2;
729f4a2713aSLionel Sambuc   }
730f4a2713aSLionel Sambuc 
731f4a2713aSLionel Sambuc   if (ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
732f4a2713aSLionel Sambuc       generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C))
733f4a2713aSLionel Sambuc     return true;
734f4a2713aSLionel Sambuc 
735f4a2713aSLionel Sambuc   return false;
736f4a2713aSLionel Sambuc }
737f4a2713aSLionel Sambuc 
registerGenericTaintChecker(CheckerManager & mgr)738f4a2713aSLionel Sambuc void ento::registerGenericTaintChecker(CheckerManager &mgr) {
739f4a2713aSLionel Sambuc   mgr.registerChecker<GenericTaintChecker>();
740f4a2713aSLionel Sambuc }
741