1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Taint.h"
18 #include "Yaml.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23 #include "clang/StaticAnalyzer/Core/Checker.h"
24 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
27 #include "llvm/Support/YAMLTraits.h"
28 #include <algorithm>
29 #include <limits>
30 #include <unordered_map>
31 #include <utility>
32 
33 using namespace clang;
34 using namespace ento;
35 using namespace taint;
36 
37 namespace {
38 class GenericTaintChecker
39     : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
40 public:
41   static void *getTag() {
42     static int Tag;
43     return &Tag;
44   }
45 
46   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
47 
48   void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
49 
50   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
51                   const char *Sep) const override;
52 
53   using ArgVector = SmallVector<unsigned, 2>;
54   using SignedArgVector = SmallVector<int, 2>;
55 
56   enum class VariadicType { None, Src, Dst };
57 
58   /// Used to parse the configuration file.
59   struct TaintConfiguration {
60     using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
61 
62     struct Propagation {
63       std::string Name;
64       std::string Scope;
65       ArgVector SrcArgs;
66       SignedArgVector DstArgs;
67       VariadicType VarType;
68       unsigned VarIndex;
69     };
70 
71     std::vector<Propagation> Propagations;
72     std::vector<NameScopeArgs> Filters;
73     std::vector<NameScopeArgs> Sinks;
74 
75     TaintConfiguration() = default;
76     TaintConfiguration(const TaintConfiguration &) = default;
77     TaintConfiguration(TaintConfiguration &&) = default;
78     TaintConfiguration &operator=(const TaintConfiguration &) = default;
79     TaintConfiguration &operator=(TaintConfiguration &&) = default;
80   };
81 
82   /// Convert SignedArgVector to ArgVector.
83   ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
84                                SignedArgVector Args);
85 
86   /// Parse the config.
87   void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
88                           TaintConfiguration &&Config);
89 
90   static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
91   /// Denotes the return vale.
92   static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
93                                          1};
94 
95 private:
96   mutable std::unique_ptr<BugType> BT;
97   void initBugType() const {
98     if (!BT)
99       BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
100   }
101 
102   struct FunctionData {
103     FunctionData() = delete;
104     FunctionData(const FunctionData &) = default;
105     FunctionData(FunctionData &&) = default;
106     FunctionData &operator=(const FunctionData &) = delete;
107     FunctionData &operator=(FunctionData &&) = delete;
108 
109     static Optional<FunctionData> create(const CallExpr *CE,
110                                          const CheckerContext &C) {
111       const FunctionDecl *FDecl = C.getCalleeDecl(CE);
112       if (!FDecl || (FDecl->getKind() != Decl::Function &&
113                      FDecl->getKind() != Decl::CXXMethod))
114         return None;
115 
116       StringRef Name = C.getCalleeName(FDecl);
117       std::string FullName = FDecl->getQualifiedNameAsString();
118       if (Name.empty() || FullName.empty())
119         return None;
120 
121       return FunctionData{FDecl, Name, FullName};
122     }
123 
124     bool isInScope(StringRef Scope) const {
125       return StringRef(FullName).startswith(Scope);
126     }
127 
128     const FunctionDecl *const FDecl;
129     const StringRef Name;
130     const std::string FullName;
131   };
132 
133   /// Catch taint related bugs. Check if tainted data is passed to a
134   /// system call etc. Returns true on matching.
135   bool checkPre(const CallExpr *CE, const FunctionData &FData,
136                 CheckerContext &C) const;
137 
138   /// Add taint sources on a pre-visit. Returns true on matching.
139   bool addSourcesPre(const CallExpr *CE, const FunctionData &FData,
140                      CheckerContext &C) const;
141 
142   /// Mark filter's arguments not tainted on a pre-visit. Returns true on
143   /// matching.
144   bool addFiltersPre(const CallExpr *CE, const FunctionData &FData,
145                      CheckerContext &C) const;
146 
147   /// Propagate taint generated at pre-visit. Returns true on matching.
148   bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
149 
150   /// Check if the region the expression evaluates to is the standard input,
151   /// and thus, is tainted.
152   static bool isStdin(const Expr *E, CheckerContext &C);
153 
154   /// Given a pointer argument, return the value it points to.
155   static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
156 
157   /// Check for CWE-134: Uncontrolled Format String.
158   static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
159       "Untrusted data is used as a format string "
160       "(CWE-134: Uncontrolled Format String)";
161   bool checkUncontrolledFormatString(const CallExpr *CE,
162                                      CheckerContext &C) const;
163 
164   /// Check for:
165   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
166   /// CWE-78, "Failure to Sanitize Data into an OS Command"
167   static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
168       "Untrusted data is passed to a system call "
169       "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
170   bool checkSystemCall(const CallExpr *CE, StringRef Name,
171                        CheckerContext &C) const;
172 
173   /// Check if tainted data is used as a buffer size ins strn.. functions,
174   /// and allocators.
175   static constexpr llvm::StringLiteral MsgTaintedBufferSize =
176       "Untrusted data is used to specify the buffer size "
177       "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
178       "for character data and the null terminator)";
179   bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
180                               CheckerContext &C) const;
181 
182   /// Check if tainted data is used as a custom sink's parameter.
183   static constexpr llvm::StringLiteral MsgCustomSink =
184       "Untrusted data is passed to a user-defined sink";
185   bool checkCustomSinks(const CallExpr *CE, const FunctionData &FData,
186                         CheckerContext &C) const;
187 
188   /// Generate a report if the expression is tainted or points to tainted data.
189   bool generateReportIfTainted(const Expr *E, StringRef Msg,
190                                CheckerContext &C) const;
191 
192   struct TaintPropagationRule;
193   template <typename T>
194   using ConfigDataMap =
195       std::unordered_multimap<std::string, std::pair<std::string, T>>;
196   using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
197   using NameArgMap = ConfigDataMap<ArgVector>;
198 
199   /// Find a function with the given name and scope. Returns the first match
200   /// or the end of the map.
201   template <typename T>
202   static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
203                                    const FunctionData &FData);
204 
205   /// A struct used to specify taint propagation rules for a function.
206   ///
207   /// If any of the possible taint source arguments is tainted, all of the
208   /// destination arguments should also be tainted. Use InvalidArgIndex in the
209   /// src list to specify that all of the arguments can introduce taint. Use
210   /// InvalidArgIndex in the dst arguments to signify that all the non-const
211   /// pointer and reference arguments might be tainted on return. If
212   /// ReturnValueIndex is added to the dst list, the return value will be
213   /// tainted.
214   struct TaintPropagationRule {
215     using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
216                                          CheckerContext &C);
217 
218     /// List of arguments which can be taint sources and should be checked.
219     ArgVector SrcArgs;
220     /// List of arguments which should be tainted on function return.
221     ArgVector DstArgs;
222     /// Index for the first variadic parameter if exist.
223     unsigned VariadicIndex;
224     /// Show when a function has variadic parameters. If it has, it marks all
225     /// of them as source or destination.
226     VariadicType VarType;
227     /// Special function for tainted source determination. If defined, it can
228     /// override the default behavior.
229     PropagationFuncType PropagationFunc;
230 
231     TaintPropagationRule()
232         : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
233           PropagationFunc(nullptr) {}
234 
235     TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
236                          VariadicType Var = VariadicType::None,
237                          unsigned VarIndex = InvalidArgIndex,
238                          PropagationFuncType Func = nullptr)
239         : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
240           VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
241 
242     /// Get the propagation rule for a given function.
243     static TaintPropagationRule
244     getTaintPropagationRule(const NameRuleMap &CustomPropagations,
245                             const FunctionData &FData, CheckerContext &C);
246 
247     void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
248     void addDstArg(unsigned A) { DstArgs.push_back(A); }
249 
250     bool isNull() const {
251       return SrcArgs.empty() && DstArgs.empty() &&
252              VariadicType::None == VarType;
253     }
254 
255     bool isDestinationArgument(unsigned ArgNum) const {
256       return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
257     }
258 
259     static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
260                                            CheckerContext &C) {
261       if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
262         return true;
263 
264       if (!E->getType().getTypePtr()->isPointerType())
265         return false;
266 
267       Optional<SVal> V = getPointedToSVal(C, E);
268       return (V && isTainted(State, *V));
269     }
270 
271     /// Pre-process a function which propagates taint according to the
272     /// taint rule.
273     ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
274 
275     // Functions for custom taintedness propagation.
276     static bool postSocket(bool IsTainted, const CallExpr *CE,
277                            CheckerContext &C);
278   };
279 
280   /// Defines a map between the propagation function's name, scope
281   /// and TaintPropagationRule.
282   NameRuleMap CustomPropagations;
283 
284   /// Defines a map between the filter function's name, scope and filtering
285   /// args.
286   NameArgMap CustomFilters;
287 
288   /// Defines a map between the sink function's name, scope and sinking args.
289   NameArgMap CustomSinks;
290 };
291 
292 const unsigned GenericTaintChecker::ReturnValueIndex;
293 const unsigned GenericTaintChecker::InvalidArgIndex;
294 
295 // FIXME: these lines can be removed in C++17
296 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
297 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
298 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
299 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
300 } // end of anonymous namespace
301 
302 using TaintConfig = GenericTaintChecker::TaintConfiguration;
303 
304 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
305 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
306 
307 namespace llvm {
308 namespace yaml {
309 template <> struct MappingTraits<TaintConfig> {
310   static void mapping(IO &IO, TaintConfig &Config) {
311     IO.mapOptional("Propagations", Config.Propagations);
312     IO.mapOptional("Filters", Config.Filters);
313     IO.mapOptional("Sinks", Config.Sinks);
314   }
315 };
316 
317 template <> struct MappingTraits<TaintConfig::Propagation> {
318   static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
319     IO.mapRequired("Name", Propagation.Name);
320     IO.mapOptional("Scope", Propagation.Scope);
321     IO.mapOptional("SrcArgs", Propagation.SrcArgs);
322     IO.mapOptional("DstArgs", Propagation.DstArgs);
323     IO.mapOptional("VariadicType", Propagation.VarType,
324                    GenericTaintChecker::VariadicType::None);
325     IO.mapOptional("VariadicIndex", Propagation.VarIndex,
326                    GenericTaintChecker::InvalidArgIndex);
327   }
328 };
329 
330 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
331   static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
332     IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
333     IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
334     IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
335   }
336 };
337 
338 template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
339   static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
340     IO.mapRequired("Name", std::get<0>(NSA));
341     IO.mapOptional("Scope", std::get<1>(NSA));
342     IO.mapRequired("Args", std::get<2>(NSA));
343   }
344 };
345 } // namespace yaml
346 } // namespace llvm
347 
348 /// A set which is used to pass information from call pre-visit instruction
349 /// to the call post-visit. The values are unsigned integers, which are either
350 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
351 /// points to data, which should be tainted on return.
352 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
353 
354 GenericTaintChecker::ArgVector GenericTaintChecker::convertToArgVector(
355     CheckerManager &Mgr, const std::string &Option, SignedArgVector Args) {
356   ArgVector Result;
357   for (int Arg : Args) {
358     if (Arg == -1)
359       Result.push_back(ReturnValueIndex);
360     else if (Arg < -1) {
361       Result.push_back(InvalidArgIndex);
362       Mgr.reportInvalidCheckerOptionValue(
363           this, Option,
364           "an argument number for propagation rules greater or equal to -1");
365     } else
366       Result.push_back(static_cast<unsigned>(Arg));
367   }
368   return Result;
369 }
370 
371 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
372                                              const std::string &Option,
373                                              TaintConfiguration &&Config) {
374   for (auto &P : Config.Propagations) {
375     GenericTaintChecker::CustomPropagations.emplace(
376         P.Name,
377         std::make_pair(P.Scope, TaintPropagationRule{
378                                     std::move(P.SrcArgs),
379                                     convertToArgVector(Mgr, Option, P.DstArgs),
380                                     P.VarType, P.VarIndex}));
381   }
382 
383   for (auto &F : Config.Filters) {
384     GenericTaintChecker::CustomFilters.emplace(
385         std::get<0>(F),
386         std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
387   }
388 
389   for (auto &S : Config.Sinks) {
390     GenericTaintChecker::CustomSinks.emplace(
391         std::get<0>(S),
392         std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
393   }
394 }
395 
396 template <typename T>
397 auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
398                                                const FunctionData &FData) {
399   auto Range = Map.equal_range(FData.Name);
400   auto It =
401       std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
402         const auto &Value = Entry.second;
403         StringRef Scope = Value.first;
404         return Scope.empty() || FData.isInScope(Scope);
405       });
406   return It != Range.second ? It : Map.end();
407 }
408 
409 GenericTaintChecker::TaintPropagationRule
410 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
411     const NameRuleMap &CustomPropagations, const FunctionData &FData,
412     CheckerContext &C) {
413   // TODO: Currently, we might lose precision here: we always mark a return
414   // value as tainted even if it's just a pointer, pointing to tainted data.
415 
416   // Check for exact name match for functions without builtin substitutes.
417   // Use qualified name, because these are C functions without namespace.
418   TaintPropagationRule Rule =
419       llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
420           // Source functions
421           // TODO: Add support for vfscanf & family.
422           .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
423           .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
424           .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
425           .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
426           .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
427           .Case("getchar_unlocked",
428                 TaintPropagationRule({}, {ReturnValueIndex}))
429           .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
430           .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
431           .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
432           .Case("socket",
433                 TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
434                                      InvalidArgIndex,
435                                      &TaintPropagationRule::postSocket))
436           .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
437           // Propagating functions
438           .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
439           .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
440           .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
441           .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
442           .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
443           .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
444           .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
445           .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
446           .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
447           .Case("getdelim", TaintPropagationRule({3}, {0}))
448           .Case("getline", TaintPropagationRule({2}, {0}))
449           .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
450           .Case("pread",
451                 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
452           .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
453           .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
454           .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
455           .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
456           .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
457           .Default(TaintPropagationRule());
458 
459   if (!Rule.isNull())
460     return Rule;
461 
462   // Check if it's one of the memory setting/copying functions.
463   // This check is specialized but faster then calling isCLibraryFunction.
464   const FunctionDecl *FDecl = FData.FDecl;
465   unsigned BId = 0;
466   if ((BId = FDecl->getMemoryFunctionKind()))
467     switch (BId) {
468     case Builtin::BImemcpy:
469     case Builtin::BImemmove:
470     case Builtin::BIstrncpy:
471     case Builtin::BIstrncat:
472       return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
473     case Builtin::BIstrlcpy:
474     case Builtin::BIstrlcat:
475       return TaintPropagationRule({1, 2}, {0});
476     case Builtin::BIstrndup:
477       return TaintPropagationRule({0, 1}, {ReturnValueIndex});
478 
479     default:
480       break;
481     };
482 
483   // Process all other functions which could be defined as builtins.
484   if (Rule.isNull()) {
485     if (C.isCLibraryFunction(FDecl, "snprintf"))
486       return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
487                                   3);
488     else if (C.isCLibraryFunction(FDecl, "sprintf"))
489       return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
490                                   2);
491     else if (C.isCLibraryFunction(FDecl, "strcpy") ||
492              C.isCLibraryFunction(FDecl, "stpcpy") ||
493              C.isCLibraryFunction(FDecl, "strcat"))
494       return TaintPropagationRule({1}, {0, ReturnValueIndex});
495     else if (C.isCLibraryFunction(FDecl, "bcopy"))
496       return TaintPropagationRule({0, 2}, {1});
497     else if (C.isCLibraryFunction(FDecl, "strdup") ||
498              C.isCLibraryFunction(FDecl, "strdupa"))
499       return TaintPropagationRule({0}, {ReturnValueIndex});
500     else if (C.isCLibraryFunction(FDecl, "wcsdup"))
501       return TaintPropagationRule({0}, {ReturnValueIndex});
502   }
503 
504   // Skipping the following functions, since they might be used for cleansing
505   // or smart memory copy:
506   // - memccpy - copying until hitting a special character.
507 
508   auto It = findFunctionInConfig(CustomPropagations, FData);
509   if (It != CustomPropagations.end()) {
510     const auto &Value = It->second;
511     return Value.second;
512   }
513 
514   return TaintPropagationRule();
515 }
516 
517 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
518                                        CheckerContext &C) const {
519   Optional<FunctionData> FData = FunctionData::create(CE, C);
520   if (!FData)
521     return;
522 
523   // Check for taintedness related errors first: system call, uncontrolled
524   // format string, tainted buffer size.
525   if (checkPre(CE, *FData, C))
526     return;
527 
528   // Marks the function's arguments and/or return value tainted if it present in
529   // the list.
530   if (addSourcesPre(CE, *FData, C))
531     return;
532 
533   addFiltersPre(CE, *FData, C);
534 }
535 
536 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
537                                         CheckerContext &C) const {
538   // Set the marked values as tainted. The return value only accessible from
539   // checkPostStmt.
540   propagateFromPre(CE, C);
541 }
542 
543 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
544                                      const char *NL, const char *Sep) const {
545   printTaint(State, Out, NL, Sep);
546 }
547 
548 bool GenericTaintChecker::addSourcesPre(const CallExpr *CE,
549                                         const FunctionData &FData,
550                                         CheckerContext &C) const {
551   // First, try generating a propagation rule for this function.
552   TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
553       this->CustomPropagations, FData, C);
554   if (!Rule.isNull()) {
555     ProgramStateRef State = Rule.process(CE, C);
556     if (State) {
557       C.addTransition(State);
558       return true;
559     }
560   }
561   return false;
562 }
563 
564 bool GenericTaintChecker::addFiltersPre(const CallExpr *CE,
565                                         const FunctionData &FData,
566                                         CheckerContext &C) const {
567   auto It = findFunctionInConfig(CustomFilters, FData);
568   if (It == CustomFilters.end())
569     return false;
570 
571   ProgramStateRef State = C.getState();
572   const auto &Value = It->second;
573   const ArgVector &Args = Value.second;
574   for (unsigned ArgNum : Args) {
575     if (ArgNum >= CE->getNumArgs())
576       continue;
577 
578     const Expr *Arg = CE->getArg(ArgNum);
579     Optional<SVal> V = getPointedToSVal(C, Arg);
580     if (V)
581       State = removeTaint(State, *V);
582   }
583 
584   if (State != C.getState()) {
585     C.addTransition(State);
586     return true;
587   }
588   return false;
589 }
590 
591 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
592                                            CheckerContext &C) const {
593   ProgramStateRef State = C.getState();
594 
595   // Depending on what was tainted at pre-visit, we determined a set of
596   // arguments which should be tainted after the function returns. These are
597   // stored in the state as TaintArgsOnPostVisit set.
598   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
599   if (TaintArgs.isEmpty())
600     return false;
601 
602   for (unsigned ArgNum : TaintArgs) {
603     // Special handling for the tainted return value.
604     if (ArgNum == ReturnValueIndex) {
605       State = addTaint(State, CE, C.getLocationContext());
606       continue;
607     }
608 
609     // The arguments are pointer arguments. The data they are pointing at is
610     // tainted after the call.
611     if (CE->getNumArgs() < (ArgNum + 1))
612       return false;
613     const Expr *Arg = CE->getArg(ArgNum);
614     Optional<SVal> V = getPointedToSVal(C, Arg);
615     if (V)
616       State = addTaint(State, *V);
617   }
618 
619   // Clear up the taint info from the state.
620   State = State->remove<TaintArgsOnPostVisit>();
621 
622   if (State != C.getState()) {
623     C.addTransition(State);
624     return true;
625   }
626   return false;
627 }
628 
629 bool GenericTaintChecker::checkPre(const CallExpr *CE,
630                                    const FunctionData &FData,
631                                    CheckerContext &C) const {
632 
633   if (checkUncontrolledFormatString(CE, C))
634     return true;
635 
636   if (checkSystemCall(CE, FData.Name, C))
637     return true;
638 
639   if (checkTaintedBufferSize(CE, FData.FDecl, C))
640     return true;
641 
642   if (checkCustomSinks(CE, FData, C))
643     return true;
644 
645   return false;
646 }
647 
648 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
649                                                      const Expr *Arg) {
650   ProgramStateRef State = C.getState();
651   SVal AddrVal = C.getSVal(Arg->IgnoreParens());
652   if (AddrVal.isUnknownOrUndef())
653     return None;
654 
655   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
656   if (!AddrLoc)
657     return None;
658 
659   QualType ArgTy = Arg->getType().getCanonicalType();
660   if (!ArgTy->isPointerType())
661     return State->getSVal(*AddrLoc);
662 
663   QualType ValTy = ArgTy->getPointeeType();
664 
665   // Do not dereference void pointers. Treat them as byte pointers instead.
666   // FIXME: we might want to consider more than just the first byte.
667   if (ValTy->isVoidType())
668     ValTy = C.getASTContext().CharTy;
669 
670   return State->getSVal(*AddrLoc, ValTy);
671 }
672 
673 ProgramStateRef
674 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
675                                                    CheckerContext &C) const {
676   ProgramStateRef State = C.getState();
677 
678   // Check for taint in arguments.
679   bool IsTainted = true;
680   for (unsigned ArgNum : SrcArgs) {
681     if (ArgNum >= CE->getNumArgs())
682       continue;
683 
684     if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
685       break;
686   }
687 
688   // Check for taint in variadic arguments.
689   if (!IsTainted && VariadicType::Src == VarType) {
690     // Check if any of the arguments is tainted
691     for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
692       if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
693         break;
694     }
695   }
696 
697   if (PropagationFunc)
698     IsTainted = PropagationFunc(IsTainted, CE, C);
699 
700   if (!IsTainted)
701     return State;
702 
703   // Mark the arguments which should be tainted after the function returns.
704   for (unsigned ArgNum : DstArgs) {
705     // Should mark the return value?
706     if (ArgNum == ReturnValueIndex) {
707       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
708       continue;
709     }
710 
711     if (ArgNum >= CE->getNumArgs())
712       continue;
713 
714     // Mark the given argument.
715     State = State->add<TaintArgsOnPostVisit>(ArgNum);
716   }
717 
718   // Mark all variadic arguments tainted if present.
719   if (VariadicType::Dst == VarType) {
720     // For all pointer and references that were passed in:
721     //   If they are not pointing to const data, mark data as tainted.
722     //   TODO: So far we are just going one level down; ideally we'd need to
723     //         recurse here.
724     for (unsigned i = VariadicIndex; i < CE->getNumArgs(); ++i) {
725       const Expr *Arg = CE->getArg(i);
726       // Process pointer argument.
727       const Type *ArgTy = Arg->getType().getTypePtr();
728       QualType PType = ArgTy->getPointeeType();
729       if ((!PType.isNull() && !PType.isConstQualified()) ||
730           (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
731         State = State->add<TaintArgsOnPostVisit>(i);
732     }
733   }
734 
735   return State;
736 }
737 
738 // If argument 0(protocol domain) is network, the return value should get taint.
739 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
740                                                            const CallExpr *CE,
741                                                            CheckerContext &C) {
742   SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
743   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
744   // White list the internal communication protocols.
745   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
746       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
747     return false;
748 
749   return true;
750 }
751 
752 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
753   ProgramStateRef State = C.getState();
754   SVal Val = C.getSVal(E);
755 
756   // stdin is a pointer, so it would be a region.
757   const MemRegion *MemReg = Val.getAsRegion();
758 
759   // The region should be symbolic, we do not know it's value.
760   const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
761   if (!SymReg)
762     return false;
763 
764   // Get it's symbol and find the declaration region it's pointing to.
765   const SymbolRegionValue *Sm =
766       dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
767   if (!Sm)
768     return false;
769   const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
770   if (!DeclReg)
771     return false;
772 
773   // This region corresponds to a declaration, find out if it's a global/extern
774   // variable named stdin with the proper type.
775   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
776     D = D->getCanonicalDecl();
777     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
778       const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
779       if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
780                        C.getASTContext().getFILEType().getCanonicalType())
781         return true;
782     }
783   }
784   return false;
785 }
786 
787 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
788                                        const CheckerContext &C,
789                                        unsigned &ArgNum) {
790   // Find if the function contains a format string argument.
791   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
792   // vsnprintf, syslog, custom annotated functions.
793   const FunctionDecl *FDecl = C.getCalleeDecl(CE);
794   if (!FDecl)
795     return false;
796   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
797     ArgNum = Format->getFormatIdx() - 1;
798     if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
799       return true;
800   }
801 
802   // Or if a function is named setproctitle (this is a heuristic).
803   if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
804     ArgNum = 0;
805     return true;
806   }
807 
808   return false;
809 }
810 
811 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
812                                                   CheckerContext &C) const {
813   assert(E);
814 
815   // Check for taint.
816   ProgramStateRef State = C.getState();
817   Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
818   SVal TaintedSVal;
819   if (PointedToSVal && isTainted(State, *PointedToSVal))
820     TaintedSVal = *PointedToSVal;
821   else if (isTainted(State, E, C.getLocationContext()))
822     TaintedSVal = C.getSVal(E);
823   else
824     return false;
825 
826   // Generate diagnostic.
827   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
828     initBugType();
829     auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
830     report->addRange(E->getSourceRange());
831     report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
832     C.emitReport(std::move(report));
833     return true;
834   }
835   return false;
836 }
837 
838 bool GenericTaintChecker::checkUncontrolledFormatString(
839     const CallExpr *CE, CheckerContext &C) const {
840   // Check if the function contains a format string argument.
841   unsigned ArgNum = 0;
842   if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
843     return false;
844 
845   // If either the format string content or the pointer itself are tainted,
846   // warn.
847   return generateReportIfTainted(CE->getArg(ArgNum),
848                                  MsgUncontrolledFormatString, C);
849 }
850 
851 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
852                                           CheckerContext &C) const {
853   // TODO: It might make sense to run this check on demand. In some cases,
854   // we should check if the environment has been cleansed here. We also might
855   // need to know if the user was reset before these calls(seteuid).
856   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
857                         .Case("system", 0)
858                         .Case("popen", 0)
859                         .Case("execl", 0)
860                         .Case("execle", 0)
861                         .Case("execlp", 0)
862                         .Case("execv", 0)
863                         .Case("execvp", 0)
864                         .Case("execvP", 0)
865                         .Case("execve", 0)
866                         .Case("dlopen", 0)
867                         .Default(InvalidArgIndex);
868 
869   if (ArgNum == InvalidArgIndex || CE->getNumArgs() < (ArgNum + 1))
870     return false;
871 
872   return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
873 }
874 
875 // TODO: Should this check be a part of the CString checker?
876 // If yes, should taint be a global setting?
877 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
878                                                  const FunctionDecl *FDecl,
879                                                  CheckerContext &C) const {
880   // If the function has a buffer size argument, set ArgNum.
881   unsigned ArgNum = InvalidArgIndex;
882   unsigned BId = 0;
883   if ((BId = FDecl->getMemoryFunctionKind()))
884     switch (BId) {
885     case Builtin::BImemcpy:
886     case Builtin::BImemmove:
887     case Builtin::BIstrncpy:
888       ArgNum = 2;
889       break;
890     case Builtin::BIstrndup:
891       ArgNum = 1;
892       break;
893     default:
894       break;
895     };
896 
897   if (ArgNum == InvalidArgIndex) {
898     if (C.isCLibraryFunction(FDecl, "malloc") ||
899         C.isCLibraryFunction(FDecl, "calloc") ||
900         C.isCLibraryFunction(FDecl, "alloca"))
901       ArgNum = 0;
902     else if (C.isCLibraryFunction(FDecl, "memccpy"))
903       ArgNum = 3;
904     else if (C.isCLibraryFunction(FDecl, "realloc"))
905       ArgNum = 1;
906     else if (C.isCLibraryFunction(FDecl, "bcopy"))
907       ArgNum = 2;
908   }
909 
910   return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
911          generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
912 }
913 
914 bool GenericTaintChecker::checkCustomSinks(const CallExpr *CE,
915                                            const FunctionData &FData,
916                                            CheckerContext &C) const {
917   auto It = findFunctionInConfig(CustomSinks, FData);
918   if (It == CustomSinks.end())
919     return false;
920 
921   const auto &Value = It->second;
922   const GenericTaintChecker::ArgVector &Args = Value.second;
923   for (unsigned ArgNum : Args) {
924     if (ArgNum >= CE->getNumArgs())
925       continue;
926 
927     if (generateReportIfTainted(CE->getArg(ArgNum), MsgCustomSink, C))
928       return true;
929   }
930 
931   return false;
932 }
933 
934 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
935   auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
936   std::string Option{"Config"};
937   StringRef ConfigFile =
938       Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
939   llvm::Optional<TaintConfig> Config =
940       getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
941   if (Config)
942     Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
943 }
944 
945 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
946   return true;
947 }
948