1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Taint.h"
18 #include "Yaml.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23 #include "clang/StaticAnalyzer/Core/Checker.h"
24 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
27 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
28 #include "llvm/Support/YAMLTraits.h"
29 
30 #include <algorithm>
31 #include <limits>
32 #include <memory>
33 #include <unordered_map>
34 #include <utility>
35 
36 using namespace clang;
37 using namespace ento;
38 using namespace taint;
39 
40 namespace {
41 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
42 public:
getTag()43   static void *getTag() {
44     static int Tag;
45     return &Tag;
46   }
47 
48   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
49   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
50 
51   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
52                   const char *Sep) const override;
53 
54   using ArgVector = SmallVector<unsigned, 2>;
55   using SignedArgVector = SmallVector<int, 2>;
56 
57   enum class VariadicType { None, Src, Dst };
58 
59   /// Used to parse the configuration file.
60   struct TaintConfiguration {
61     using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
62 
63     struct Propagation {
64       std::string Name;
65       std::string Scope;
66       ArgVector SrcArgs;
67       SignedArgVector DstArgs;
68       VariadicType VarType;
69       unsigned VarIndex;
70     };
71 
72     std::vector<Propagation> Propagations;
73     std::vector<NameScopeArgs> Filters;
74     std::vector<NameScopeArgs> Sinks;
75 
76     TaintConfiguration() = default;
77     TaintConfiguration(const TaintConfiguration &) = default;
78     TaintConfiguration(TaintConfiguration &&) = default;
79     TaintConfiguration &operator=(const TaintConfiguration &) = default;
80     TaintConfiguration &operator=(TaintConfiguration &&) = default;
81   };
82 
83   /// Convert SignedArgVector to ArgVector.
84   ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
85                                const SignedArgVector &Args);
86 
87   /// Parse the config.
88   void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
89                           TaintConfiguration &&Config);
90 
91   static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
92   /// Denotes the return vale.
93   static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
94                                          1};
95 
96 private:
97   mutable std::unique_ptr<BugType> BT;
initBugType() const98   void initBugType() const {
99     if (!BT)
100       BT = std::make_unique<BugType>(this, "Use of Untrusted Data",
101                                      "Untrusted Data");
102   }
103 
104   struct FunctionData {
105     FunctionData() = delete;
FunctionData__anon4b3773b90111::GenericTaintChecker::FunctionData106     FunctionData(const FunctionDecl *FDecl, StringRef Name,
107                  std::string FullName)
108         : FDecl(FDecl), Name(Name), FullName(std::move(FullName)) {}
109     FunctionData(const FunctionData &) = default;
110     FunctionData(FunctionData &&) = default;
111     FunctionData &operator=(const FunctionData &) = delete;
112     FunctionData &operator=(FunctionData &&) = delete;
113 
create__anon4b3773b90111::GenericTaintChecker::FunctionData114     static Optional<FunctionData> create(const CallEvent &Call,
115                                          const CheckerContext &C) {
116       if (!Call.getDecl())
117         return None;
118 
119       const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
120       if (!FDecl || (FDecl->getKind() != Decl::Function &&
121                      FDecl->getKind() != Decl::CXXMethod))
122         return None;
123 
124       StringRef Name = C.getCalleeName(FDecl);
125       std::string FullName = FDecl->getQualifiedNameAsString();
126       if (Name.empty() || FullName.empty())
127         return None;
128 
129       return FunctionData{FDecl, Name, std::move(FullName)};
130     }
131 
isInScope__anon4b3773b90111::GenericTaintChecker::FunctionData132     bool isInScope(StringRef Scope) const {
133       return StringRef(FullName).startswith(Scope);
134     }
135 
136     const FunctionDecl *const FDecl;
137     const StringRef Name;
138     const std::string FullName;
139   };
140 
141   /// Catch taint related bugs. Check if tainted data is passed to a
142   /// system call etc. Returns true on matching.
143   bool checkPre(const CallEvent &Call, const FunctionData &FData,
144                 CheckerContext &C) const;
145 
146   /// Add taint sources on a pre-visit. Returns true on matching.
147   bool addSourcesPre(const CallEvent &Call, const FunctionData &FData,
148                      CheckerContext &C) const;
149 
150   /// Mark filter's arguments not tainted on a pre-visit. Returns true on
151   /// matching.
152   bool addFiltersPre(const CallEvent &Call, const FunctionData &FData,
153                      CheckerContext &C) const;
154 
155   /// Propagate taint generated at pre-visit. Returns true on matching.
156   static bool propagateFromPre(const CallEvent &Call, CheckerContext &C);
157 
158   /// Check if the region the expression evaluates to is the standard input,
159   /// and thus, is tainted.
160   static bool isStdin(const Expr *E, CheckerContext &C);
161 
162   /// Given a pointer argument, return the value it points to.
163   static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg);
164 
165   /// Check for CWE-134: Uncontrolled Format String.
166   static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
167       "Untrusted data is used as a format string "
168       "(CWE-134: Uncontrolled Format String)";
169   bool checkUncontrolledFormatString(const CallEvent &Call,
170                                      CheckerContext &C) const;
171 
172   /// Check for:
173   /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
174   /// CWE-78, "Failure to Sanitize Data into an OS Command"
175   static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
176       "Untrusted data is passed to a system call "
177       "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
178   bool checkSystemCall(const CallEvent &Call, StringRef Name,
179                        CheckerContext &C) const;
180 
181   /// Check if tainted data is used as a buffer size ins strn.. functions,
182   /// and allocators.
183   static constexpr llvm::StringLiteral MsgTaintedBufferSize =
184       "Untrusted data is used to specify the buffer size "
185       "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
186       "for character data and the null terminator)";
187   bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const;
188 
189   /// Check if tainted data is used as a custom sink's parameter.
190   static constexpr llvm::StringLiteral MsgCustomSink =
191       "Untrusted data is passed to a user-defined sink";
192   bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData,
193                         CheckerContext &C) const;
194 
195   /// Generate a report if the expression is tainted or points to tainted data.
196   bool generateReportIfTainted(const Expr *E, StringRef Msg,
197                                CheckerContext &C) const;
198 
199   struct TaintPropagationRule;
200   template <typename T>
201   using ConfigDataMap =
202       std::unordered_multimap<std::string, std::pair<std::string, T>>;
203   using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
204   using NameArgMap = ConfigDataMap<ArgVector>;
205 
206   /// Find a function with the given name and scope. Returns the first match
207   /// or the end of the map.
208   template <typename T>
209   static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
210                                    const FunctionData &FData);
211 
212   /// A struct used to specify taint propagation rules for a function.
213   ///
214   /// If any of the possible taint source arguments is tainted, all of the
215   /// destination arguments should also be tainted. Use InvalidArgIndex in the
216   /// src list to specify that all of the arguments can introduce taint. Use
217   /// InvalidArgIndex in the dst arguments to signify that all the non-const
218   /// pointer and reference arguments might be tainted on return. If
219   /// ReturnValueIndex is added to the dst list, the return value will be
220   /// tainted.
221   struct TaintPropagationRule {
222     using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call,
223                                          CheckerContext &C);
224 
225     /// List of arguments which can be taint sources and should be checked.
226     ArgVector SrcArgs;
227     /// List of arguments which should be tainted on function return.
228     ArgVector DstArgs;
229     /// Index for the first variadic parameter if exist.
230     unsigned VariadicIndex;
231     /// Show when a function has variadic parameters. If it has, it marks all
232     /// of them as source or destination.
233     VariadicType VarType;
234     /// Special function for tainted source determination. If defined, it can
235     /// override the default behavior.
236     PropagationFuncType PropagationFunc;
237 
TaintPropagationRule__anon4b3773b90111::GenericTaintChecker::TaintPropagationRule238     TaintPropagationRule()
239         : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
240           PropagationFunc(nullptr) {}
241 
TaintPropagationRule__anon4b3773b90111::GenericTaintChecker::TaintPropagationRule242     TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
243                          VariadicType Var = VariadicType::None,
244                          unsigned VarIndex = InvalidArgIndex,
245                          PropagationFuncType Func = nullptr)
246         : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
247           VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
248 
249     /// Get the propagation rule for a given function.
250     static TaintPropagationRule
251     getTaintPropagationRule(const NameRuleMap &CustomPropagations,
252                             const FunctionData &FData, CheckerContext &C);
253 
addSrcArg__anon4b3773b90111::GenericTaintChecker::TaintPropagationRule254     void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
addDstArg__anon4b3773b90111::GenericTaintChecker::TaintPropagationRule255     void addDstArg(unsigned A) { DstArgs.push_back(A); }
256 
isNull__anon4b3773b90111::GenericTaintChecker::TaintPropagationRule257     bool isNull() const {
258       return SrcArgs.empty() && DstArgs.empty() &&
259              VariadicType::None == VarType;
260     }
261 
isDestinationArgument__anon4b3773b90111::GenericTaintChecker::TaintPropagationRule262     bool isDestinationArgument(unsigned ArgNum) const {
263       return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
264     }
265 
isTaintedOrPointsToTainted__anon4b3773b90111::GenericTaintChecker::TaintPropagationRule266     static bool isTaintedOrPointsToTainted(const Expr *E,
267                                            const ProgramStateRef &State,
268                                            CheckerContext &C) {
269       if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
270         return true;
271 
272       if (!E->getType().getTypePtr()->isPointerType())
273         return false;
274 
275       Optional<SVal> V = getPointeeOf(C, E);
276       return (V && isTainted(State, *V));
277     }
278 
279     /// Pre-process a function which propagates taint according to the
280     /// taint rule.
281     ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const;
282 
283     // Functions for custom taintedness propagation.
284     static bool postSocket(bool IsTainted, const CallEvent &Call,
285                            CheckerContext &C);
286   };
287 
288   /// Defines a map between the propagation function's name, scope
289   /// and TaintPropagationRule.
290   NameRuleMap CustomPropagations;
291 
292   /// Defines a map between the filter function's name, scope and filtering
293   /// args.
294   NameArgMap CustomFilters;
295 
296   /// Defines a map between the sink function's name, scope and sinking args.
297   NameArgMap CustomSinks;
298 };
299 
300 const unsigned GenericTaintChecker::ReturnValueIndex;
301 const unsigned GenericTaintChecker::InvalidArgIndex;
302 
303 // FIXME: these lines can be removed in C++17
304 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
305 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
306 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
307 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
308 } // end of anonymous namespace
309 
310 using TaintConfig = GenericTaintChecker::TaintConfiguration;
311 
312 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
313 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
314 
315 namespace llvm {
316 namespace yaml {
317 template <> struct MappingTraits<TaintConfig> {
mappingllvm::yaml::MappingTraits318   static void mapping(IO &IO, TaintConfig &Config) {
319     IO.mapOptional("Propagations", Config.Propagations);
320     IO.mapOptional("Filters", Config.Filters);
321     IO.mapOptional("Sinks", Config.Sinks);
322   }
323 };
324 
325 template <> struct MappingTraits<TaintConfig::Propagation> {
mappingllvm::yaml::MappingTraits326   static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
327     IO.mapRequired("Name", Propagation.Name);
328     IO.mapOptional("Scope", Propagation.Scope);
329     IO.mapOptional("SrcArgs", Propagation.SrcArgs);
330     IO.mapOptional("DstArgs", Propagation.DstArgs);
331     IO.mapOptional("VariadicType", Propagation.VarType,
332                    GenericTaintChecker::VariadicType::None);
333     IO.mapOptional("VariadicIndex", Propagation.VarIndex,
334                    GenericTaintChecker::InvalidArgIndex);
335   }
336 };
337 
338 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
enumerationllvm::yaml::ScalarEnumerationTraits339   static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
340     IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
341     IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
342     IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
343   }
344 };
345 
346 template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
mappingllvm::yaml::MappingTraits347   static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
348     IO.mapRequired("Name", std::get<0>(NSA));
349     IO.mapOptional("Scope", std::get<1>(NSA));
350     IO.mapRequired("Args", std::get<2>(NSA));
351   }
352 };
353 } // namespace yaml
354 } // namespace llvm
355 
356 /// A set which is used to pass information from call pre-visit instruction
357 /// to the call post-visit. The values are unsigned integers, which are either
358 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
359 /// points to data, which should be tainted on return.
REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit,unsigned)360 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
361 
362 GenericTaintChecker::ArgVector
363 GenericTaintChecker::convertToArgVector(CheckerManager &Mgr,
364                                         const std::string &Option,
365                                         const SignedArgVector &Args) {
366   ArgVector Result;
367   for (int Arg : Args) {
368     if (Arg == -1)
369       Result.push_back(ReturnValueIndex);
370     else if (Arg < -1) {
371       Result.push_back(InvalidArgIndex);
372       Mgr.reportInvalidCheckerOptionValue(
373           this, Option,
374           "an argument number for propagation rules greater or equal to -1");
375     } else
376       Result.push_back(static_cast<unsigned>(Arg));
377   }
378   return Result;
379 }
380 
parseConfiguration(CheckerManager & Mgr,const std::string & Option,TaintConfiguration && Config)381 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
382                                              const std::string &Option,
383                                              TaintConfiguration &&Config) {
384   for (auto &P : Config.Propagations) {
385     GenericTaintChecker::CustomPropagations.emplace(
386         P.Name,
387         std::make_pair(P.Scope, TaintPropagationRule{
388                                     std::move(P.SrcArgs),
389                                     convertToArgVector(Mgr, Option, P.DstArgs),
390                                     P.VarType, P.VarIndex}));
391   }
392 
393   for (auto &F : Config.Filters) {
394     GenericTaintChecker::CustomFilters.emplace(
395         std::get<0>(F),
396         std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
397   }
398 
399   for (auto &S : Config.Sinks) {
400     GenericTaintChecker::CustomSinks.emplace(
401         std::get<0>(S),
402         std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
403   }
404 }
405 
406 template <typename T>
findFunctionInConfig(const ConfigDataMap<T> & Map,const FunctionData & FData)407 auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
408                                                const FunctionData &FData) {
409   auto Range = Map.equal_range(std::string(FData.Name));
410   auto It =
411       std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
412         const auto &Value = Entry.second;
413         StringRef Scope = Value.first;
414         return Scope.empty() || FData.isInScope(Scope);
415       });
416   return It != Range.second ? It : Map.end();
417 }
418 
419 GenericTaintChecker::TaintPropagationRule
getTaintPropagationRule(const NameRuleMap & CustomPropagations,const FunctionData & FData,CheckerContext & C)420 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
421     const NameRuleMap &CustomPropagations, const FunctionData &FData,
422     CheckerContext &C) {
423   // TODO: Currently, we might lose precision here: we always mark a return
424   // value as tainted even if it's just a pointer, pointing to tainted data.
425 
426   // Check for exact name match for functions without builtin substitutes.
427   // Use qualified name, because these are C functions without namespace.
428   TaintPropagationRule Rule =
429       llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
430           // Source functions
431           // TODO: Add support for vfscanf & family.
432           .Case("fdopen", {{}, {ReturnValueIndex}})
433           .Case("fopen", {{}, {ReturnValueIndex}})
434           .Case("freopen", {{}, {ReturnValueIndex}})
435           .Case("getch", {{}, {ReturnValueIndex}})
436           .Case("getchar", {{}, {ReturnValueIndex}})
437           .Case("getchar_unlocked", {{}, {ReturnValueIndex}})
438           .Case("getenv", {{}, {ReturnValueIndex}})
439           .Case("gets", {{}, {0, ReturnValueIndex}})
440           .Case("scanf", {{}, {}, VariadicType::Dst, 1})
441           .Case("socket", {{},
442                            {ReturnValueIndex},
443                            VariadicType::None,
444                            InvalidArgIndex,
445                            &TaintPropagationRule::postSocket})
446           .Case("wgetch", {{}, {ReturnValueIndex}})
447           // Propagating functions
448           .Case("atoi", {{0}, {ReturnValueIndex}})
449           .Case("atol", {{0}, {ReturnValueIndex}})
450           .Case("atoll", {{0}, {ReturnValueIndex}})
451           .Case("fgetc", {{0}, {ReturnValueIndex}})
452           .Case("fgetln", {{0}, {ReturnValueIndex}})
453           .Case("fgets", {{2}, {0, ReturnValueIndex}})
454           .Case("fscanf", {{0}, {}, VariadicType::Dst, 2})
455           .Case("sscanf", {{0}, {}, VariadicType::Dst, 2})
456           .Case("getc", {{0}, {ReturnValueIndex}})
457           .Case("getc_unlocked", {{0}, {ReturnValueIndex}})
458           .Case("getdelim", {{3}, {0}})
459           .Case("getline", {{2}, {0}})
460           .Case("getw", {{0}, {ReturnValueIndex}})
461           .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}})
462           .Case("read", {{0, 2}, {1, ReturnValueIndex}})
463           .Case("strchr", {{0}, {ReturnValueIndex}})
464           .Case("strrchr", {{0}, {ReturnValueIndex}})
465           .Case("tolower", {{0}, {ReturnValueIndex}})
466           .Case("toupper", {{0}, {ReturnValueIndex}})
467           .Default({});
468 
469   if (!Rule.isNull())
470     return Rule;
471   assert(FData.FDecl);
472 
473   // Check if it's one of the memory setting/copying functions.
474   // This check is specialized but faster then calling isCLibraryFunction.
475   const FunctionDecl *FDecl = FData.FDecl;
476   unsigned BId = 0;
477   if ((BId = FDecl->getMemoryFunctionKind())) {
478     switch (BId) {
479     case Builtin::BImemcpy:
480     case Builtin::BImemmove:
481     case Builtin::BIstrncpy:
482     case Builtin::BIstrncat:
483       return {{1, 2}, {0, ReturnValueIndex}};
484     case Builtin::BIstrlcpy:
485     case Builtin::BIstrlcat:
486       return {{1, 2}, {0}};
487     case Builtin::BIstrndup:
488       return {{0, 1}, {ReturnValueIndex}};
489 
490     default:
491       break;
492     }
493   }
494 
495   // Process all other functions which could be defined as builtins.
496   if (Rule.isNull()) {
497     const auto OneOf = [FDecl](const auto &... Name) {
498       // FIXME: use fold expression in C++17
499       using unused = int[];
500       bool ret = false;
501       static_cast<void>(unused{
502           0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...});
503       return ret;
504     };
505     if (OneOf("snprintf"))
506       return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3};
507     if (OneOf("sprintf"))
508       return {{}, {0, ReturnValueIndex}, VariadicType::Src, 2};
509     if (OneOf("strcpy", "stpcpy", "strcat"))
510       return {{1}, {0, ReturnValueIndex}};
511     if (OneOf("bcopy"))
512       return {{0, 2}, {1}};
513     if (OneOf("strdup", "strdupa", "wcsdup"))
514       return {{0}, {ReturnValueIndex}};
515   }
516 
517   // Skipping the following functions, since they might be used for cleansing or
518   // smart memory copy:
519   // - memccpy - copying until hitting a special character.
520 
521   auto It = findFunctionInConfig(CustomPropagations, FData);
522   if (It != CustomPropagations.end())
523     return It->second.second;
524   return {};
525 }
526 
checkPreCall(const CallEvent & Call,CheckerContext & C) const527 void GenericTaintChecker::checkPreCall(const CallEvent &Call,
528                                        CheckerContext &C) const {
529   Optional<FunctionData> FData = FunctionData::create(Call, C);
530   if (!FData)
531     return;
532 
533   // Check for taintedness related errors first: system call, uncontrolled
534   // format string, tainted buffer size.
535   if (checkPre(Call, *FData, C))
536     return;
537 
538   // Marks the function's arguments and/or return value tainted if it present in
539   // the list.
540   if (addSourcesPre(Call, *FData, C))
541     return;
542 
543   addFiltersPre(Call, *FData, C);
544 }
545 
checkPostCall(const CallEvent & Call,CheckerContext & C) const546 void GenericTaintChecker::checkPostCall(const CallEvent &Call,
547                                         CheckerContext &C) const {
548   // Set the marked values as tainted. The return value only accessible from
549   // checkPostStmt.
550   propagateFromPre(Call, C);
551 }
552 
printState(raw_ostream & Out,ProgramStateRef State,const char * NL,const char * Sep) const553 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
554                                      const char *NL, const char *Sep) const {
555   printTaint(State, Out, NL, Sep);
556 }
557 
addSourcesPre(const CallEvent & Call,const FunctionData & FData,CheckerContext & C) const558 bool GenericTaintChecker::addSourcesPre(const CallEvent &Call,
559                                         const FunctionData &FData,
560                                         CheckerContext &C) const {
561   // First, try generating a propagation rule for this function.
562   TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
563       this->CustomPropagations, FData, C);
564   if (!Rule.isNull()) {
565     ProgramStateRef State = Rule.process(Call, C);
566     if (State) {
567       C.addTransition(State);
568       return true;
569     }
570   }
571   return false;
572 }
573 
addFiltersPre(const CallEvent & Call,const FunctionData & FData,CheckerContext & C) const574 bool GenericTaintChecker::addFiltersPre(const CallEvent &Call,
575                                         const FunctionData &FData,
576                                         CheckerContext &C) const {
577   auto It = findFunctionInConfig(CustomFilters, FData);
578   if (It == CustomFilters.end())
579     return false;
580 
581   ProgramStateRef State = C.getState();
582   const auto &Value = It->second;
583   const ArgVector &Args = Value.second;
584   for (unsigned ArgNum : Args) {
585     if (ArgNum >= Call.getNumArgs())
586       continue;
587 
588     const Expr *Arg = Call.getArgExpr(ArgNum);
589     Optional<SVal> V = getPointeeOf(C, Arg);
590     if (V)
591       State = removeTaint(State, *V);
592   }
593 
594   if (State != C.getState()) {
595     C.addTransition(State);
596     return true;
597   }
598   return false;
599 }
600 
propagateFromPre(const CallEvent & Call,CheckerContext & C)601 bool GenericTaintChecker::propagateFromPre(const CallEvent &Call,
602                                            CheckerContext &C) {
603   ProgramStateRef State = C.getState();
604 
605   // Depending on what was tainted at pre-visit, we determined a set of
606   // arguments which should be tainted after the function returns. These are
607   // stored in the state as TaintArgsOnPostVisit set.
608   TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
609   if (TaintArgs.isEmpty())
610     return false;
611 
612   for (unsigned ArgNum : TaintArgs) {
613     // Special handling for the tainted return value.
614     if (ArgNum == ReturnValueIndex) {
615       State = addTaint(State, Call.getReturnValue());
616       continue;
617     }
618 
619     // The arguments are pointer arguments. The data they are pointing at is
620     // tainted after the call.
621     if (Call.getNumArgs() < (ArgNum + 1))
622       return false;
623     const Expr *Arg = Call.getArgExpr(ArgNum);
624     Optional<SVal> V = getPointeeOf(C, Arg);
625     if (V)
626       State = addTaint(State, *V);
627   }
628 
629   // Clear up the taint info from the state.
630   State = State->remove<TaintArgsOnPostVisit>();
631 
632   if (State != C.getState()) {
633     C.addTransition(State);
634     return true;
635   }
636   return false;
637 }
638 
checkPre(const CallEvent & Call,const FunctionData & FData,CheckerContext & C) const639 bool GenericTaintChecker::checkPre(const CallEvent &Call,
640                                    const FunctionData &FData,
641                                    CheckerContext &C) const {
642   if (checkUncontrolledFormatString(Call, C))
643     return true;
644 
645   if (checkSystemCall(Call, FData.Name, C))
646     return true;
647 
648   if (checkTaintedBufferSize(Call, C))
649     return true;
650 
651   return checkCustomSinks(Call, FData, C);
652 }
653 
getPointeeOf(CheckerContext & C,const Expr * Arg)654 Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C,
655                                                  const Expr *Arg) {
656   ProgramStateRef State = C.getState();
657   SVal AddrVal = C.getSVal(Arg->IgnoreParens());
658   if (AddrVal.isUnknownOrUndef())
659     return None;
660 
661   Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
662   if (!AddrLoc)
663     return None;
664 
665   QualType ArgTy = Arg->getType().getCanonicalType();
666   if (!ArgTy->isPointerType())
667     return State->getSVal(*AddrLoc);
668 
669   QualType ValTy = ArgTy->getPointeeType();
670 
671   // Do not dereference void pointers. Treat them as byte pointers instead.
672   // FIXME: we might want to consider more than just the first byte.
673   if (ValTy->isVoidType())
674     ValTy = C.getASTContext().CharTy;
675 
676   return State->getSVal(*AddrLoc, ValTy);
677 }
678 
679 ProgramStateRef
process(const CallEvent & Call,CheckerContext & C) const680 GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call,
681                                                    CheckerContext &C) const {
682   ProgramStateRef State = C.getState();
683 
684   // Check for taint in arguments.
685   bool IsTainted = true;
686   for (unsigned ArgNum : SrcArgs) {
687     if (ArgNum >= Call.getNumArgs())
688       continue;
689 
690     if ((IsTainted =
691              isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C)))
692       break;
693   }
694 
695   // Check for taint in variadic arguments.
696   if (!IsTainted && VariadicType::Src == VarType) {
697     // Check if any of the arguments is tainted
698     for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
699       if ((IsTainted =
700                isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C)))
701         break;
702     }
703   }
704 
705   if (PropagationFunc)
706     IsTainted = PropagationFunc(IsTainted, Call, C);
707 
708   if (!IsTainted)
709     return State;
710 
711   // Mark the arguments which should be tainted after the function returns.
712   for (unsigned ArgNum : DstArgs) {
713     // Should mark the return value?
714     if (ArgNum == ReturnValueIndex) {
715       State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
716       continue;
717     }
718 
719     if (ArgNum >= Call.getNumArgs())
720       continue;
721 
722     // Mark the given argument.
723     State = State->add<TaintArgsOnPostVisit>(ArgNum);
724   }
725 
726   // Mark all variadic arguments tainted if present.
727   if (VariadicType::Dst == VarType) {
728     // For all pointer and references that were passed in:
729     //   If they are not pointing to const data, mark data as tainted.
730     //   TODO: So far we are just going one level down; ideally we'd need to
731     //         recurse here.
732     for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
733       const Expr *Arg = Call.getArgExpr(i);
734       // Process pointer argument.
735       const Type *ArgTy = Arg->getType().getTypePtr();
736       QualType PType = ArgTy->getPointeeType();
737       if ((!PType.isNull() && !PType.isConstQualified()) ||
738           (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) {
739         State = State->add<TaintArgsOnPostVisit>(i);
740       }
741     }
742   }
743 
744   return State;
745 }
746 
747 // If argument 0(protocol domain) is network, the return value should get taint.
postSocket(bool,const CallEvent & Call,CheckerContext & C)748 bool GenericTaintChecker::TaintPropagationRule::postSocket(
749     bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) {
750   SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
751   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
752   // White list the internal communication protocols.
753   if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
754       DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
755     return false;
756   return true;
757 }
758 
isStdin(const Expr * E,CheckerContext & C)759 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
760   ProgramStateRef State = C.getState();
761   SVal Val = C.getSVal(E);
762 
763   // stdin is a pointer, so it would be a region.
764   const MemRegion *MemReg = Val.getAsRegion();
765 
766   // The region should be symbolic, we do not know it's value.
767   const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
768   if (!SymReg)
769     return false;
770 
771   // Get it's symbol and find the declaration region it's pointing to.
772   const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
773   if (!Sm)
774     return false;
775   const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
776   if (!DeclReg)
777     return false;
778 
779   // This region corresponds to a declaration, find out if it's a global/extern
780   // variable named stdin with the proper type.
781   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
782     D = D->getCanonicalDecl();
783     if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
784       const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
785       if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
786                        C.getASTContext().getFILEType().getCanonicalType())
787         return true;
788     }
789   }
790   return false;
791 }
792 
getPrintfFormatArgumentNum(const CallEvent & Call,const CheckerContext & C,unsigned & ArgNum)793 static bool getPrintfFormatArgumentNum(const CallEvent &Call,
794                                        const CheckerContext &C,
795                                        unsigned &ArgNum) {
796   // Find if the function contains a format string argument.
797   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
798   // vsnprintf, syslog, custom annotated functions.
799   const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
800   if (!FDecl)
801     return false;
802   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
803     ArgNum = Format->getFormatIdx() - 1;
804     if ((Format->getType()->getName() == "printf") &&
805         Call.getNumArgs() > ArgNum)
806       return true;
807   }
808 
809   // Or if a function is named setproctitle (this is a heuristic).
810   if (C.getCalleeName(FDecl).find("setproctitle") != StringRef::npos) {
811     ArgNum = 0;
812     return true;
813   }
814 
815   return false;
816 }
817 
generateReportIfTainted(const Expr * E,StringRef Msg,CheckerContext & C) const818 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
819                                                   CheckerContext &C) const {
820   assert(E);
821 
822   // Check for taint.
823   ProgramStateRef State = C.getState();
824   Optional<SVal> PointedToSVal = getPointeeOf(C, E);
825   SVal TaintedSVal;
826   if (PointedToSVal && isTainted(State, *PointedToSVal))
827     TaintedSVal = *PointedToSVal;
828   else if (isTainted(State, E, C.getLocationContext()))
829     TaintedSVal = C.getSVal(E);
830   else
831     return false;
832 
833   // Generate diagnostic.
834   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
835     initBugType();
836     auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
837     report->addRange(E->getSourceRange());
838     report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
839     C.emitReport(std::move(report));
840     return true;
841   }
842   return false;
843 }
844 
checkUncontrolledFormatString(const CallEvent & Call,CheckerContext & C) const845 bool GenericTaintChecker::checkUncontrolledFormatString(
846     const CallEvent &Call, CheckerContext &C) const {
847   // Check if the function contains a format string argument.
848   unsigned ArgNum = 0;
849   if (!getPrintfFormatArgumentNum(Call, C, ArgNum))
850     return false;
851 
852   // If either the format string content or the pointer itself are tainted,
853   // warn.
854   return generateReportIfTainted(Call.getArgExpr(ArgNum),
855                                  MsgUncontrolledFormatString, C);
856 }
857 
checkSystemCall(const CallEvent & Call,StringRef Name,CheckerContext & C) const858 bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name,
859                                           CheckerContext &C) const {
860   // TODO: It might make sense to run this check on demand. In some cases,
861   // we should check if the environment has been cleansed here. We also might
862   // need to know if the user was reset before these calls(seteuid).
863   unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
864                         .Case("system", 0)
865                         .Case("popen", 0)
866                         .Case("execl", 0)
867                         .Case("execle", 0)
868                         .Case("execlp", 0)
869                         .Case("execv", 0)
870                         .Case("execvp", 0)
871                         .Case("execvP", 0)
872                         .Case("execve", 0)
873                         .Case("dlopen", 0)
874                         .Default(InvalidArgIndex);
875 
876   if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1))
877     return false;
878 
879   return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs,
880                                  C);
881 }
882 
883 // TODO: Should this check be a part of the CString checker?
884 // If yes, should taint be a global setting?
checkTaintedBufferSize(const CallEvent & Call,CheckerContext & C) const885 bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call,
886                                                  CheckerContext &C) const {
887   const auto *FDecl = Call.getDecl()->getAsFunction();
888   // If the function has a buffer size argument, set ArgNum.
889   unsigned ArgNum = InvalidArgIndex;
890   unsigned BId = 0;
891   if ((BId = FDecl->getMemoryFunctionKind())) {
892     switch (BId) {
893     case Builtin::BImemcpy:
894     case Builtin::BImemmove:
895     case Builtin::BIstrncpy:
896       ArgNum = 2;
897       break;
898     case Builtin::BIstrndup:
899       ArgNum = 1;
900       break;
901     default:
902       break;
903     }
904   }
905 
906   if (ArgNum == InvalidArgIndex) {
907     using CCtx = CheckerContext;
908     if (CCtx::isCLibraryFunction(FDecl, "malloc") ||
909         CCtx::isCLibraryFunction(FDecl, "calloc") ||
910         CCtx::isCLibraryFunction(FDecl, "alloca"))
911       ArgNum = 0;
912     else if (CCtx::isCLibraryFunction(FDecl, "memccpy"))
913       ArgNum = 3;
914     else if (CCtx::isCLibraryFunction(FDecl, "realloc"))
915       ArgNum = 1;
916     else if (CCtx::isCLibraryFunction(FDecl, "bcopy"))
917       ArgNum = 2;
918   }
919 
920   return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum &&
921          generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize,
922                                  C);
923 }
924 
checkCustomSinks(const CallEvent & Call,const FunctionData & FData,CheckerContext & C) const925 bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call,
926                                            const FunctionData &FData,
927                                            CheckerContext &C) const {
928   auto It = findFunctionInConfig(CustomSinks, FData);
929   if (It == CustomSinks.end())
930     return false;
931 
932   const auto &Value = It->second;
933   const GenericTaintChecker::ArgVector &Args = Value.second;
934   for (unsigned ArgNum : Args) {
935     if (ArgNum >= Call.getNumArgs())
936       continue;
937 
938     if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C))
939       return true;
940   }
941 
942   return false;
943 }
944 
registerGenericTaintChecker(CheckerManager & Mgr)945 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
946   auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
947   std::string Option{"Config"};
948   StringRef ConfigFile =
949       Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
950   llvm::Optional<TaintConfig> Config =
951       getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
952   if (Config)
953     Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
954 }
955 
shouldRegisterGenericTaintChecker(const CheckerManager & mgr)956 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {
957   return true;
958 }
959