1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16 
17 #include "Yaml.h"
18 #include "clang/AST/Attr.h"
19 #include "clang/Basic/Builtins.h"
20 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
21 #include "clang/StaticAnalyzer/Checkers/Taint.h"
22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23 #include "clang/StaticAnalyzer/Core/Checker.h"
24 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallDescription.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
27 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
28 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
29 #include "llvm/ADT/StringExtras.h"
30 #include "llvm/Support/YAMLTraits.h"
31 
32 #include <limits>
33 #include <memory>
34 #include <optional>
35 #include <utility>
36 #include <vector>
37 
38 #define DEBUG_TYPE "taint-checker"
39 
40 using namespace clang;
41 using namespace ento;
42 using namespace taint;
43 
44 using llvm::ImmutableSet;
45 
46 namespace {
47 
48 class GenericTaintChecker;
49 
50 /// Check for CWE-134: Uncontrolled Format String.
51 constexpr llvm::StringLiteral MsgUncontrolledFormatString =
52     "Untrusted data is used as a format string "
53     "(CWE-134: Uncontrolled Format String)";
54 
55 /// Check for:
56 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
57 /// CWE-78, "Failure to Sanitize Data into an OS Command"
58 constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
59     "Untrusted data is passed to a system call "
60     "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
61 
62 /// Check if tainted data is used as a buffer size in strn.. functions,
63 /// and allocators.
64 constexpr llvm::StringLiteral MsgTaintedBufferSize =
65     "Untrusted data is used to specify the buffer size "
66     "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
67     "for character data and the null terminator)";
68 
69 /// Check if tainted data is used as a custom sink's parameter.
70 constexpr llvm::StringLiteral MsgCustomSink =
71     "Untrusted data is passed to a user-defined sink";
72 
73 using ArgIdxTy = int;
74 using ArgVecTy = llvm::SmallVector<ArgIdxTy, 2>;
75 
76 /// Denotes the return value.
77 constexpr ArgIdxTy ReturnValueIndex{-1};
78 
79 static ArgIdxTy fromArgumentCount(unsigned Count) {
80   assert(Count <=
81              static_cast<std::size_t>(std::numeric_limits<ArgIdxTy>::max()) &&
82          "ArgIdxTy is not large enough to represent the number of arguments.");
83   return Count;
84 }
85 
86 /// Check if the region the expression evaluates to is the standard input,
87 /// and thus, is tainted.
88 /// FIXME: Move this to Taint.cpp.
89 bool isStdin(SVal Val, const ASTContext &ACtx) {
90   // FIXME: What if Val is NonParamVarRegion?
91 
92   // The region should be symbolic, we do not know it's value.
93   const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(Val.getAsRegion());
94   if (!SymReg)
95     return false;
96 
97   // Get it's symbol and find the declaration region it's pointing to.
98   const auto *DeclReg =
99       dyn_cast_or_null<DeclRegion>(SymReg->getSymbol()->getOriginRegion());
100   if (!DeclReg)
101     return false;
102 
103   // This region corresponds to a declaration, find out if it's a global/extern
104   // variable named stdin with the proper type.
105   if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
106     D = D->getCanonicalDecl();
107     // FIXME: This should look for an exact match.
108     if (D->getName().contains("stdin") && D->isExternC()) {
109       const QualType FILETy = ACtx.getFILEType().getCanonicalType();
110       const QualType Ty = D->getType().getCanonicalType();
111 
112       if (Ty->isPointerType())
113         return Ty->getPointeeType() == FILETy;
114     }
115   }
116   return false;
117 }
118 
119 SVal getPointeeOf(ProgramStateRef State, Loc LValue) {
120   const QualType ArgTy = LValue.getType(State->getStateManager().getContext());
121   if (!ArgTy->isPointerType() || !ArgTy->getPointeeType()->isVoidType())
122     return State->getSVal(LValue);
123 
124   // Do not dereference void pointers. Treat them as byte pointers instead.
125   // FIXME: we might want to consider more than just the first byte.
126   return State->getSVal(LValue, State->getStateManager().getContext().CharTy);
127 }
128 
129 /// Given a pointer/reference argument, return the value it refers to.
130 std::optional<SVal> getPointeeOf(ProgramStateRef State, SVal Arg) {
131   if (auto LValue = Arg.getAs<Loc>())
132     return getPointeeOf(State, *LValue);
133   return std::nullopt;
134 }
135 
136 /// Given a pointer, return the SVal of its pointee or if it is tainted,
137 /// otherwise return the pointer's SVal if tainted.
138 /// Also considers stdin as a taint source.
139 std::optional<SVal> getTaintedPointeeOrPointer(ProgramStateRef State,
140                                                SVal Arg) {
141   if (auto Pointee = getPointeeOf(State, Arg))
142     if (isTainted(State, *Pointee)) // FIXME: isTainted(...) ? Pointee : None;
143       return Pointee;
144 
145   if (isTainted(State, Arg))
146     return Arg;
147   return std::nullopt;
148 }
149 
150 bool isTaintedOrPointsToTainted(ProgramStateRef State, SVal ExprSVal) {
151   return getTaintedPointeeOrPointer(State, ExprSVal).has_value();
152 }
153 
154 /// Helps in printing taint diagnostics.
155 /// Marks the incoming parameters of a function interesting (to be printed)
156 /// when the return value, or the outgoing parameters are tainted.
157 const NoteTag *taintOriginTrackerTag(CheckerContext &C,
158                                      std::vector<SymbolRef> TaintedSymbols,
159                                      std::vector<ArgIdxTy> TaintedArgs,
160                                      const LocationContext *CallLocation) {
161   return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
162                        TaintedArgs = std::move(TaintedArgs), CallLocation](
163                           PathSensitiveBugReport &BR) -> std::string {
164     SmallString<256> Msg;
165     // We give diagnostics only for taint related reports
166     if (!BR.isInteresting(CallLocation) ||
167         BR.getBugType().getCategory() != categories::TaintedData) {
168       return "";
169     }
170     if (TaintedSymbols.empty())
171       return "Taint originated here";
172 
173     for (auto Sym : TaintedSymbols) {
174       BR.markInteresting(Sym);
175     }
176     LLVM_DEBUG(for (auto Arg
177                     : TaintedArgs) {
178       llvm::dbgs() << "Taint Propagated from argument " << Arg + 1 << "\n";
179     });
180     return "";
181   });
182 }
183 
184 /// Helps in printing taint diagnostics.
185 /// Marks the function interesting (to be printed)
186 /// when the return value, or the outgoing parameters are tainted.
187 const NoteTag *taintPropagationExplainerTag(
188     CheckerContext &C, std::vector<SymbolRef> TaintedSymbols,
189     std::vector<ArgIdxTy> TaintedArgs, const LocationContext *CallLocation) {
190   assert(TaintedSymbols.size() == TaintedArgs.size());
191   return C.getNoteTag([TaintedSymbols = std::move(TaintedSymbols),
192                        TaintedArgs = std::move(TaintedArgs), CallLocation](
193                           PathSensitiveBugReport &BR) -> std::string {
194     SmallString<256> Msg;
195     llvm::raw_svector_ostream Out(Msg);
196     // We give diagnostics only for taint related reports
197     if (TaintedSymbols.empty() ||
198         BR.getBugType().getCategory() != categories::TaintedData) {
199       return "";
200     }
201     int nofTaintedArgs = 0;
202     for (auto [Idx, Sym] : llvm::enumerate(TaintedSymbols)) {
203       if (BR.isInteresting(Sym)) {
204         BR.markInteresting(CallLocation);
205         if (TaintedArgs[Idx] != ReturnValueIndex) {
206           LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to argument "
207                                   << TaintedArgs[Idx] + 1 << "\n");
208           if (nofTaintedArgs == 0)
209             Out << "Taint propagated to the ";
210           else
211             Out << ", ";
212           Out << TaintedArgs[Idx] + 1
213               << llvm::getOrdinalSuffix(TaintedArgs[Idx] + 1) << " argument";
214           nofTaintedArgs++;
215         } else {
216           LLVM_DEBUG(llvm::dbgs() << "Taint Propagated to return value.\n");
217           Out << "Taint propagated to the return value";
218         }
219       }
220     }
221     return std::string(Out.str());
222   });
223 }
224 
225 /// ArgSet is used to describe arguments relevant for taint detection or
226 /// taint application. A discrete set of argument indexes and a variadic
227 /// argument list signified by a starting index are supported.
228 class ArgSet {
229 public:
230   ArgSet() = default;
231   ArgSet(ArgVecTy &&DiscreteArgs,
232          std::optional<ArgIdxTy> VariadicIndex = std::nullopt)
233       : DiscreteArgs(std::move(DiscreteArgs)),
234         VariadicIndex(std::move(VariadicIndex)) {}
235 
236   bool contains(ArgIdxTy ArgIdx) const {
237     if (llvm::is_contained(DiscreteArgs, ArgIdx))
238       return true;
239 
240     return VariadicIndex && ArgIdx >= *VariadicIndex;
241   }
242 
243   bool isEmpty() const { return DiscreteArgs.empty() && !VariadicIndex; }
244 
245 private:
246   ArgVecTy DiscreteArgs;
247   std::optional<ArgIdxTy> VariadicIndex;
248 };
249 
250 /// A struct used to specify taint propagation rules for a function.
251 ///
252 /// If any of the possible taint source arguments is tainted, all of the
253 /// destination arguments should also be tainted. If ReturnValueIndex is added
254 /// to the dst list, the return value will be tainted.
255 class GenericTaintRule {
256   /// Arguments which are taints sinks and should be checked, and a report
257   /// should be emitted if taint reaches these.
258   ArgSet SinkArgs;
259   /// Arguments which should be sanitized on function return.
260   ArgSet FilterArgs;
261   /// Arguments which can participate in taint propagation. If any of the
262   /// arguments in PropSrcArgs is tainted, all arguments in  PropDstArgs should
263   /// be tainted.
264   ArgSet PropSrcArgs;
265   ArgSet PropDstArgs;
266 
267   /// A message that explains why the call is sensitive to taint.
268   std::optional<StringRef> SinkMsg;
269 
270   GenericTaintRule() = default;
271 
272   GenericTaintRule(ArgSet &&Sink, ArgSet &&Filter, ArgSet &&Src, ArgSet &&Dst,
273                    std::optional<StringRef> SinkMsg = std::nullopt)
274       : SinkArgs(std::move(Sink)), FilterArgs(std::move(Filter)),
275         PropSrcArgs(std::move(Src)), PropDstArgs(std::move(Dst)),
276         SinkMsg(SinkMsg) {}
277 
278 public:
279   /// Make a rule that reports a warning if taint reaches any of \p FilterArgs
280   /// arguments.
281   static GenericTaintRule Sink(ArgSet &&SinkArgs,
282                                std::optional<StringRef> Msg = std::nullopt) {
283     return {std::move(SinkArgs), {}, {}, {}, Msg};
284   }
285 
286   /// Make a rule that sanitizes all FilterArgs arguments.
287   static GenericTaintRule Filter(ArgSet &&FilterArgs) {
288     return {{}, std::move(FilterArgs), {}, {}};
289   }
290 
291   /// Make a rule that unconditionally taints all Args.
292   /// If Func is provided, it must also return true for taint to propagate.
293   static GenericTaintRule Source(ArgSet &&SourceArgs) {
294     return {{}, {}, {}, std::move(SourceArgs)};
295   }
296 
297   /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted.
298   static GenericTaintRule Prop(ArgSet &&SrcArgs, ArgSet &&DstArgs) {
299     return {{}, {}, std::move(SrcArgs), std::move(DstArgs)};
300   }
301 
302   /// Make a rule that taints all PropDstArgs if any of PropSrcArgs is tainted.
303   static GenericTaintRule
304   SinkProp(ArgSet &&SinkArgs, ArgSet &&SrcArgs, ArgSet &&DstArgs,
305            std::optional<StringRef> Msg = std::nullopt) {
306     return {
307         std::move(SinkArgs), {}, std::move(SrcArgs), std::move(DstArgs), Msg};
308   }
309 
310   /// Process a function which could either be a taint source, a taint sink, a
311   /// taint filter or a taint propagator.
312   void process(const GenericTaintChecker &Checker, const CallEvent &Call,
313                CheckerContext &C) const;
314 
315   /// Handles the resolution of indexes of type ArgIdxTy to Expr*-s.
316   static const Expr *GetArgExpr(ArgIdxTy ArgIdx, const CallEvent &Call) {
317     return ArgIdx == ReturnValueIndex ? Call.getOriginExpr()
318                                       : Call.getArgExpr(ArgIdx);
319   };
320 
321   /// Functions for custom taintedness propagation.
322   static bool UntrustedEnv(CheckerContext &C);
323 };
324 
325 using RuleLookupTy = CallDescriptionMap<GenericTaintRule>;
326 
327 /// Used to parse the configuration file.
328 struct TaintConfiguration {
329   using NameScopeArgs = std::tuple<std::string, std::string, ArgVecTy>;
330   enum class VariadicType { None, Src, Dst };
331 
332   struct Common {
333     std::string Name;
334     std::string Scope;
335   };
336 
337   struct Sink : Common {
338     ArgVecTy SinkArgs;
339   };
340 
341   struct Filter : Common {
342     ArgVecTy FilterArgs;
343   };
344 
345   struct Propagation : Common {
346     ArgVecTy SrcArgs;
347     ArgVecTy DstArgs;
348     VariadicType VarType;
349     ArgIdxTy VarIndex;
350   };
351 
352   std::vector<Propagation> Propagations;
353   std::vector<Filter> Filters;
354   std::vector<Sink> Sinks;
355 
356   TaintConfiguration() = default;
357   TaintConfiguration(const TaintConfiguration &) = default;
358   TaintConfiguration(TaintConfiguration &&) = default;
359   TaintConfiguration &operator=(const TaintConfiguration &) = default;
360   TaintConfiguration &operator=(TaintConfiguration &&) = default;
361 };
362 
363 struct GenericTaintRuleParser {
364   GenericTaintRuleParser(CheckerManager &Mgr) : Mgr(Mgr) {}
365   /// Container type used to gather call identification objects grouped into
366   /// pairs with their corresponding taint rules. It is temporary as it is used
367   /// to finally initialize RuleLookupTy, which is considered to be immutable.
368   using RulesContTy = std::vector<std::pair<CallDescription, GenericTaintRule>>;
369   RulesContTy parseConfiguration(const std::string &Option,
370                                  TaintConfiguration &&Config) const;
371 
372 private:
373   using NamePartsTy = llvm::SmallVector<StringRef, 2>;
374 
375   /// Validate part of the configuration, which contains a list of argument
376   /// indexes.
377   void validateArgVector(const std::string &Option, const ArgVecTy &Args) const;
378 
379   template <typename Config> static NamePartsTy parseNameParts(const Config &C);
380 
381   // Takes the config and creates a CallDescription for it and associates a Rule
382   // with that.
383   template <typename Config>
384   static void consumeRulesFromConfig(const Config &C, GenericTaintRule &&Rule,
385                                      RulesContTy &Rules);
386 
387   void parseConfig(const std::string &Option, TaintConfiguration::Sink &&P,
388                    RulesContTy &Rules) const;
389   void parseConfig(const std::string &Option, TaintConfiguration::Filter &&P,
390                    RulesContTy &Rules) const;
391   void parseConfig(const std::string &Option,
392                    TaintConfiguration::Propagation &&P,
393                    RulesContTy &Rules) const;
394 
395   CheckerManager &Mgr;
396 };
397 
398 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
399 public:
400   void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
401   void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
402 
403   void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
404                   const char *Sep) const override;
405 
406   /// Generate a report if the expression is tainted or points to tainted data.
407   bool generateReportIfTainted(const Expr *E, StringRef Msg,
408                                CheckerContext &C) const;
409 
410 private:
411   const BugType BT{this, "Use of Untrusted Data", categories::TaintedData};
412 
413   bool checkUncontrolledFormatString(const CallEvent &Call,
414                                      CheckerContext &C) const;
415 
416   void taintUnsafeSocketProtocol(const CallEvent &Call,
417                                  CheckerContext &C) const;
418 
419   /// Default taint rules are initalized with the help of a CheckerContext to
420   /// access the names of built-in functions like memcpy.
421   void initTaintRules(CheckerContext &C) const;
422 
423   /// CallDescription currently cannot restrict matches to the global namespace
424   /// only, which is why multiple CallDescriptionMaps are used, as we want to
425   /// disambiguate global C functions from functions inside user-defined
426   /// namespaces.
427   // TODO: Remove separation to simplify matching logic once CallDescriptions
428   // are more expressive.
429 
430   mutable std::optional<RuleLookupTy> StaticTaintRules;
431   mutable std::optional<RuleLookupTy> DynamicTaintRules;
432 };
433 } // end of anonymous namespace
434 
435 /// YAML serialization mapping.
436 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Sink)
437 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Filter)
438 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfiguration::Propagation)
439 
440 namespace llvm {
441 namespace yaml {
442 template <> struct MappingTraits<TaintConfiguration> {
443   static void mapping(IO &IO, TaintConfiguration &Config) {
444     IO.mapOptional("Propagations", Config.Propagations);
445     IO.mapOptional("Filters", Config.Filters);
446     IO.mapOptional("Sinks", Config.Sinks);
447   }
448 };
449 
450 template <> struct MappingTraits<TaintConfiguration::Sink> {
451   static void mapping(IO &IO, TaintConfiguration::Sink &Sink) {
452     IO.mapRequired("Name", Sink.Name);
453     IO.mapOptional("Scope", Sink.Scope);
454     IO.mapRequired("Args", Sink.SinkArgs);
455   }
456 };
457 
458 template <> struct MappingTraits<TaintConfiguration::Filter> {
459   static void mapping(IO &IO, TaintConfiguration::Filter &Filter) {
460     IO.mapRequired("Name", Filter.Name);
461     IO.mapOptional("Scope", Filter.Scope);
462     IO.mapRequired("Args", Filter.FilterArgs);
463   }
464 };
465 
466 template <> struct MappingTraits<TaintConfiguration::Propagation> {
467   static void mapping(IO &IO, TaintConfiguration::Propagation &Propagation) {
468     IO.mapRequired("Name", Propagation.Name);
469     IO.mapOptional("Scope", Propagation.Scope);
470     IO.mapOptional("SrcArgs", Propagation.SrcArgs);
471     IO.mapOptional("DstArgs", Propagation.DstArgs);
472     IO.mapOptional("VariadicType", Propagation.VarType);
473     IO.mapOptional("VariadicIndex", Propagation.VarIndex);
474   }
475 };
476 
477 template <> struct ScalarEnumerationTraits<TaintConfiguration::VariadicType> {
478   static void enumeration(IO &IO, TaintConfiguration::VariadicType &Value) {
479     IO.enumCase(Value, "None", TaintConfiguration::VariadicType::None);
480     IO.enumCase(Value, "Src", TaintConfiguration::VariadicType::Src);
481     IO.enumCase(Value, "Dst", TaintConfiguration::VariadicType::Dst);
482   }
483 };
484 } // namespace yaml
485 } // namespace llvm
486 
487 /// A set which is used to pass information from call pre-visit instruction
488 /// to the call post-visit. The values are signed integers, which are either
489 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
490 /// points to data, which should be tainted on return.
491 REGISTER_MAP_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, const LocationContext *,
492                                ImmutableSet<ArgIdxTy>)
493 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ArgIdxFactory, ArgIdxTy)
494 
495 void GenericTaintRuleParser::validateArgVector(const std::string &Option,
496                                                const ArgVecTy &Args) const {
497   for (ArgIdxTy Arg : Args) {
498     if (Arg < ReturnValueIndex) {
499       Mgr.reportInvalidCheckerOptionValue(
500           Mgr.getChecker<GenericTaintChecker>(), Option,
501           "an argument number for propagation rules greater or equal to -1");
502     }
503   }
504 }
505 
506 template <typename Config>
507 GenericTaintRuleParser::NamePartsTy
508 GenericTaintRuleParser::parseNameParts(const Config &C) {
509   NamePartsTy NameParts;
510   if (!C.Scope.empty()) {
511     // If the Scope argument contains multiple "::" parts, those are considered
512     // namespace identifiers.
513     StringRef{C.Scope}.split(NameParts, "::", /*MaxSplit*/ -1,
514                              /*KeepEmpty*/ false);
515   }
516   NameParts.emplace_back(C.Name);
517   return NameParts;
518 }
519 
520 template <typename Config>
521 void GenericTaintRuleParser::consumeRulesFromConfig(const Config &C,
522                                                     GenericTaintRule &&Rule,
523                                                     RulesContTy &Rules) {
524   NamePartsTy NameParts = parseNameParts(C);
525   Rules.emplace_back(CallDescription(NameParts), std::move(Rule));
526 }
527 
528 void GenericTaintRuleParser::parseConfig(const std::string &Option,
529                                          TaintConfiguration::Sink &&S,
530                                          RulesContTy &Rules) const {
531   validateArgVector(Option, S.SinkArgs);
532   consumeRulesFromConfig(S, GenericTaintRule::Sink(std::move(S.SinkArgs)),
533                          Rules);
534 }
535 
536 void GenericTaintRuleParser::parseConfig(const std::string &Option,
537                                          TaintConfiguration::Filter &&S,
538                                          RulesContTy &Rules) const {
539   validateArgVector(Option, S.FilterArgs);
540   consumeRulesFromConfig(S, GenericTaintRule::Filter(std::move(S.FilterArgs)),
541                          Rules);
542 }
543 
544 void GenericTaintRuleParser::parseConfig(const std::string &Option,
545                                          TaintConfiguration::Propagation &&P,
546                                          RulesContTy &Rules) const {
547   validateArgVector(Option, P.SrcArgs);
548   validateArgVector(Option, P.DstArgs);
549   bool IsSrcVariadic = P.VarType == TaintConfiguration::VariadicType::Src;
550   bool IsDstVariadic = P.VarType == TaintConfiguration::VariadicType::Dst;
551   std::optional<ArgIdxTy> JustVarIndex = P.VarIndex;
552 
553   ArgSet SrcDesc(std::move(P.SrcArgs),
554                  IsSrcVariadic ? JustVarIndex : std::nullopt);
555   ArgSet DstDesc(std::move(P.DstArgs),
556                  IsDstVariadic ? JustVarIndex : std::nullopt);
557 
558   consumeRulesFromConfig(
559       P, GenericTaintRule::Prop(std::move(SrcDesc), std::move(DstDesc)), Rules);
560 }
561 
562 GenericTaintRuleParser::RulesContTy
563 GenericTaintRuleParser::parseConfiguration(const std::string &Option,
564                                            TaintConfiguration &&Config) const {
565 
566   RulesContTy Rules;
567 
568   for (auto &F : Config.Filters)
569     parseConfig(Option, std::move(F), Rules);
570 
571   for (auto &S : Config.Sinks)
572     parseConfig(Option, std::move(S), Rules);
573 
574   for (auto &P : Config.Propagations)
575     parseConfig(Option, std::move(P), Rules);
576 
577   return Rules;
578 }
579 
580 void GenericTaintChecker::initTaintRules(CheckerContext &C) const {
581   // Check for exact name match for functions without builtin substitutes.
582   // Use qualified name, because these are C functions without namespace.
583 
584   if (StaticTaintRules || DynamicTaintRules)
585     return;
586 
587   using RulesConstructionTy =
588       std::vector<std::pair<CallDescription, GenericTaintRule>>;
589   using TR = GenericTaintRule;
590 
591   const Builtin::Context &BI = C.getASTContext().BuiltinInfo;
592 
593   RulesConstructionTy GlobalCRules{
594       // Sources
595       {{{"fdopen"}}, TR::Source({{ReturnValueIndex}})},
596       {{{"fopen"}}, TR::Source({{ReturnValueIndex}})},
597       {{{"freopen"}}, TR::Source({{ReturnValueIndex}})},
598       {{{"getch"}}, TR::Source({{ReturnValueIndex}})},
599       {{{"getchar"}}, TR::Source({{ReturnValueIndex}})},
600       {{{"getchar_unlocked"}}, TR::Source({{ReturnValueIndex}})},
601       {{{"gets"}}, TR::Source({{0}, ReturnValueIndex})},
602       {{{"gets_s"}}, TR::Source({{0}, ReturnValueIndex})},
603       {{{"scanf"}}, TR::Source({{}, 1})},
604       {{{"scanf_s"}}, TR::Source({{}, {1}})},
605       {{{"wgetch"}}, TR::Source({{}, ReturnValueIndex})},
606       // Sometimes the line between taint sources and propagators is blurry.
607       // _IO_getc is choosen to be a source, but could also be a propagator.
608       // This way it is simpler, as modeling it as a propagator would require
609       // to model the possible sources of _IO_FILE * values, which the _IO_getc
610       // function takes as parameters.
611       {{{"_IO_getc"}}, TR::Source({{ReturnValueIndex}})},
612       {{{"getcwd"}}, TR::Source({{0, ReturnValueIndex}})},
613       {{{"getwd"}}, TR::Source({{0, ReturnValueIndex}})},
614       {{{"readlink"}}, TR::Source({{1, ReturnValueIndex}})},
615       {{{"readlinkat"}}, TR::Source({{2, ReturnValueIndex}})},
616       {{{"get_current_dir_name"}}, TR::Source({{ReturnValueIndex}})},
617       {{{"gethostname"}}, TR::Source({{0}})},
618       {{{"getnameinfo"}}, TR::Source({{2, 4}})},
619       {{{"getseuserbyname"}}, TR::Source({{1, 2}})},
620       {{{"getgroups"}}, TR::Source({{1, ReturnValueIndex}})},
621       {{{"getlogin"}}, TR::Source({{ReturnValueIndex}})},
622       {{{"getlogin_r"}}, TR::Source({{0}})},
623 
624       // Props
625       {{{"atoi"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
626       {{{"atol"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
627       {{{"atoll"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
628       {{{"fgetc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
629       {{{"fgetln"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
630       {{{"fgets"}}, TR::Prop({{2}}, {{0, ReturnValueIndex}})},
631       {{{"fscanf"}}, TR::Prop({{0}}, {{}, 2})},
632       {{{"fscanf_s"}}, TR::Prop({{0}}, {{}, {2}})},
633       {{{"sscanf"}}, TR::Prop({{0}}, {{}, 2})},
634 
635       {{{"getc"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
636       {{{"getc_unlocked"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
637       {{{"getdelim"}}, TR::Prop({{3}}, {{0}})},
638       {{{"getline"}}, TR::Prop({{2}}, {{0}})},
639       {{{"getw"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
640       {{{"pread"}}, TR::Prop({{0, 1, 2, 3}}, {{1, ReturnValueIndex}})},
641       {{{"read"}}, TR::Prop({{0, 2}}, {{1, ReturnValueIndex}})},
642       {{{"strchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
643       {{{"strrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
644       {{{"tolower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
645       {{{"toupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
646       {{{"fread"}}, TR::Prop({{3}}, {{0, ReturnValueIndex}})},
647       {{{"recv"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
648       {{{"recvfrom"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
649 
650       {{{"ttyname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
651       {{{"ttyname_r"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
652 
653       {{{"basename"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
654       {{{"dirname"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
655       {{{"fnmatch"}}, TR::Prop({{1}}, {{ReturnValueIndex}})},
656       {{{"memchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
657       {{{"memrchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
658       {{{"rawmemchr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
659 
660       {{{"mbtowc"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
661       {{{"wctomb"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
662       {{{"wcwidth"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
663 
664       {{{"memcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
665       {{{"memcpy"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
666       {{{"memmove"}}, TR::Prop({{1}}, {{0, ReturnValueIndex}})},
667       // If memmem was called with a tainted needle and the search was
668       // successful, that would mean that the value pointed by the return value
669       // has the same content as the needle. If we choose to go by the policy of
670       // content equivalence implies taintedness equivalence, that would mean
671       // haystack should be considered a propagation source argument.
672       {{{"memmem"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
673 
674       // The comment for memmem above also applies to strstr.
675       {{{"strstr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
676       {{{"strcasestr"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
677 
678       {{{"strchrnul"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
679 
680       {{{"index"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
681       {{{"rindex"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
682 
683       // FIXME: In case of arrays, only the first element of the array gets
684       // tainted.
685       {{{"qsort"}}, TR::Prop({{0}}, {{0}})},
686       {{{"qsort_r"}}, TR::Prop({{0}}, {{0}})},
687 
688       {{{"strcmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
689       {{{"strcasecmp"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
690       {{{"strncmp"}}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
691       {{{"strncasecmp"}}, TR::Prop({{0, 1, 2}}, {{ReturnValueIndex}})},
692       {{{"strspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
693       {{{"strcspn"}}, TR::Prop({{0, 1}}, {{ReturnValueIndex}})},
694       {{{"strpbrk"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
695       {{{"strndup"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
696       {{{"strndupa"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
697       {{{"strlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
698       {{{"strnlen"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
699       {{{"strtol"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
700       {{{"strtoll"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
701       {{{"strtoul"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
702       {{{"strtoull"}}, TR::Prop({{0}}, {{1, ReturnValueIndex}})},
703 
704       {{{"isalnum"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
705       {{{"isalpha"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
706       {{{"isascii"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
707       {{{"isblank"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
708       {{{"iscntrl"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
709       {{{"isdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
710       {{{"isgraph"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
711       {{{"islower"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
712       {{{"isprint"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
713       {{{"ispunct"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
714       {{{"isspace"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
715       {{{"isupper"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
716       {{{"isxdigit"}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
717 
718       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncat)}},
719        TR::Prop({{1, 2}}, {{0, ReturnValueIndex}})},
720       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcpy)}},
721        TR::Prop({{1, 2}}, {{0}})},
722       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrlcat)}},
723        TR::Prop({{1, 2}}, {{0}})},
724       {{CDF_MaybeBuiltin, {{"snprintf"}}},
725        TR::Prop({{1}, 3}, {{0, ReturnValueIndex}})},
726       {{CDF_MaybeBuiltin, {{"sprintf"}}},
727        TR::Prop({{1}, 2}, {{0, ReturnValueIndex}})},
728       {{CDF_MaybeBuiltin, {{"strcpy"}}},
729        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
730       {{CDF_MaybeBuiltin, {{"stpcpy"}}},
731        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
732       {{CDF_MaybeBuiltin, {{"strcat"}}},
733        TR::Prop({{1}}, {{0, ReturnValueIndex}})},
734       {{CDF_MaybeBuiltin, {{"strdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
735       {{CDF_MaybeBuiltin, {{"strdupa"}}},
736        TR::Prop({{0}}, {{ReturnValueIndex}})},
737       {{CDF_MaybeBuiltin, {{"wcsdup"}}}, TR::Prop({{0}}, {{ReturnValueIndex}})},
738 
739       // Sinks
740       {{{"system"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
741       {{{"popen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
742       {{{"execl"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
743       {{{"execle"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
744       {{{"execlp"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
745       {{{"execvp"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
746       {{{"execvP"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
747       {{{"execve"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
748       {{{"dlopen"}}, TR::Sink({{0}}, MsgSanitizeSystemArgs)},
749       {{CDF_MaybeBuiltin, {{"malloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
750       {{CDF_MaybeBuiltin, {{"calloc"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
751       {{CDF_MaybeBuiltin, {{"alloca"}}}, TR::Sink({{0}}, MsgTaintedBufferSize)},
752       {{CDF_MaybeBuiltin, {{"memccpy"}}},
753        TR::Sink({{3}}, MsgTaintedBufferSize)},
754       {{CDF_MaybeBuiltin, {{"realloc"}}},
755        TR::Sink({{1}}, MsgTaintedBufferSize)},
756       {{{{"setproctitle"}}}, TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
757       {{{{"setproctitle_fast"}}},
758        TR::Sink({{0}, 1}, MsgUncontrolledFormatString)},
759 
760       // SinkProps
761       {{CDF_MaybeBuiltin, BI.getName(Builtin::BImemcpy)},
762        TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
763                     MsgTaintedBufferSize)},
764       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BImemmove)}},
765        TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
766                     MsgTaintedBufferSize)},
767       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrncpy)}},
768        TR::SinkProp({{2}}, {{1, 2}}, {{0, ReturnValueIndex}},
769                     MsgTaintedBufferSize)},
770       {{CDF_MaybeBuiltin, {BI.getName(Builtin::BIstrndup)}},
771        TR::SinkProp({{1}}, {{0, 1}}, {{ReturnValueIndex}},
772                     MsgTaintedBufferSize)},
773       {{CDF_MaybeBuiltin, {{"bcopy"}}},
774        TR::SinkProp({{2}}, {{0, 2}}, {{1}}, MsgTaintedBufferSize)}};
775 
776   // `getenv` returns taint only in untrusted environments.
777   if (TR::UntrustedEnv(C)) {
778     // void setproctitle_init(int argc, char *argv[], char *envp[])
779     GlobalCRules.push_back(
780         {{{"setproctitle_init"}}, TR::Sink({{1, 2}}, MsgCustomSink)});
781     GlobalCRules.push_back({{{"getenv"}}, TR::Source({{ReturnValueIndex}})});
782   }
783 
784   StaticTaintRules.emplace(std::make_move_iterator(GlobalCRules.begin()),
785                            std::make_move_iterator(GlobalCRules.end()));
786 
787   // User-provided taint configuration.
788   CheckerManager *Mgr = C.getAnalysisManager().getCheckerManager();
789   assert(Mgr);
790   GenericTaintRuleParser ConfigParser{*Mgr};
791   std::string Option{"Config"};
792   StringRef ConfigFile =
793       Mgr->getAnalyzerOptions().getCheckerStringOption(this, Option);
794   std::optional<TaintConfiguration> Config =
795       getConfiguration<TaintConfiguration>(*Mgr, this, Option, ConfigFile);
796   if (!Config) {
797     // We don't have external taint config, no parsing required.
798     DynamicTaintRules = RuleLookupTy{};
799     return;
800   }
801 
802   GenericTaintRuleParser::RulesContTy Rules{
803       ConfigParser.parseConfiguration(Option, std::move(*Config))};
804 
805   DynamicTaintRules.emplace(std::make_move_iterator(Rules.begin()),
806                             std::make_move_iterator(Rules.end()));
807 }
808 
809 void GenericTaintChecker::checkPreCall(const CallEvent &Call,
810                                        CheckerContext &C) const {
811   initTaintRules(C);
812 
813   // FIXME: this should be much simpler.
814   if (const auto *Rule =
815           Call.isGlobalCFunction() ? StaticTaintRules->lookup(Call) : nullptr)
816     Rule->process(*this, Call, C);
817   else if (const auto *Rule = DynamicTaintRules->lookup(Call))
818     Rule->process(*this, Call, C);
819 
820   // FIXME: These edge cases are to be eliminated from here eventually.
821   //
822   // Additional check that is not supported by CallDescription.
823   // TODO: Make CallDescription be able to match attributes such as printf-like
824   // arguments.
825   checkUncontrolledFormatString(Call, C);
826 
827   // TODO: Modeling sockets should be done in a specific checker.
828   // Socket is a source, which taints the return value.
829   taintUnsafeSocketProtocol(Call, C);
830 }
831 
832 void GenericTaintChecker::checkPostCall(const CallEvent &Call,
833                                         CheckerContext &C) const {
834   // Set the marked values as tainted. The return value only accessible from
835   // checkPostStmt.
836   ProgramStateRef State = C.getState();
837   const StackFrameContext *CurrentFrame = C.getStackFrame();
838 
839   // Depending on what was tainted at pre-visit, we determined a set of
840   // arguments which should be tainted after the function returns. These are
841   // stored in the state as TaintArgsOnPostVisit set.
842   TaintArgsOnPostVisitTy TaintArgsMap = State->get<TaintArgsOnPostVisit>();
843 
844   const ImmutableSet<ArgIdxTy> *TaintArgs = TaintArgsMap.lookup(CurrentFrame);
845   if (!TaintArgs)
846     return;
847   assert(!TaintArgs->isEmpty());
848 
849   LLVM_DEBUG(for (ArgIdxTy I
850                   : *TaintArgs) {
851     llvm::dbgs() << "PostCall<";
852     Call.dump(llvm::dbgs());
853     llvm::dbgs() << "> actually wants to taint arg index: " << I << '\n';
854   });
855 
856   const NoteTag *InjectionTag = nullptr;
857   std::vector<SymbolRef> TaintedSymbols;
858   std::vector<ArgIdxTy> TaintedIndexes;
859   for (ArgIdxTy ArgNum : *TaintArgs) {
860     // Special handling for the tainted return value.
861     if (ArgNum == ReturnValueIndex) {
862       State = addTaint(State, Call.getReturnValue());
863       std::vector<SymbolRef> TaintedSyms =
864           getTaintedSymbols(State, Call.getReturnValue());
865       if (!TaintedSyms.empty()) {
866         TaintedSymbols.push_back(TaintedSyms[0]);
867         TaintedIndexes.push_back(ArgNum);
868       }
869       continue;
870     }
871     // The arguments are pointer arguments. The data they are pointing at is
872     // tainted after the call.
873     if (auto V = getPointeeOf(State, Call.getArgSVal(ArgNum))) {
874       State = addTaint(State, *V);
875       std::vector<SymbolRef> TaintedSyms = getTaintedSymbols(State, *V);
876       if (!TaintedSyms.empty()) {
877         TaintedSymbols.push_back(TaintedSyms[0]);
878         TaintedIndexes.push_back(ArgNum);
879       }
880     }
881   }
882   // Create a NoteTag callback, which prints to the user where the taintedness
883   // was propagated to.
884   InjectionTag = taintPropagationExplainerTag(C, TaintedSymbols, TaintedIndexes,
885                                               Call.getCalleeStackFrame(0));
886   // Clear up the taint info from the state.
887   State = State->remove<TaintArgsOnPostVisit>(CurrentFrame);
888   C.addTransition(State, InjectionTag);
889 }
890 
891 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
892                                      const char *NL, const char *Sep) const {
893   printTaint(State, Out, NL, Sep);
894 }
895 
896 void GenericTaintRule::process(const GenericTaintChecker &Checker,
897                                const CallEvent &Call, CheckerContext &C) const {
898   ProgramStateRef State = C.getState();
899   const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs());
900 
901   /// Iterate every call argument, and get their corresponding Expr and SVal.
902   const auto ForEachCallArg = [&C, &Call, CallNumArgs](auto &&Fun) {
903     for (ArgIdxTy I = ReturnValueIndex; I < CallNumArgs; ++I) {
904       const Expr *E = GetArgExpr(I, Call);
905       Fun(I, E, C.getSVal(E));
906     }
907   };
908 
909   /// Check for taint sinks.
910   ForEachCallArg([this, &Checker, &C, &State](ArgIdxTy I, const Expr *E, SVal) {
911     // Add taintedness to stdin parameters
912     if (isStdin(C.getSVal(E), C.getASTContext())) {
913       State = addTaint(State, C.getSVal(E));
914     }
915     if (SinkArgs.contains(I) && isTaintedOrPointsToTainted(State, C.getSVal(E)))
916       Checker.generateReportIfTainted(E, SinkMsg.value_or(MsgCustomSink), C);
917   });
918 
919   /// Check for taint filters.
920   ForEachCallArg([this, &State](ArgIdxTy I, const Expr *E, SVal S) {
921     if (FilterArgs.contains(I)) {
922       State = removeTaint(State, S);
923       if (auto P = getPointeeOf(State, S))
924         State = removeTaint(State, *P);
925     }
926   });
927 
928   /// Check for taint propagation sources.
929   /// A rule will make the destination variables tainted if PropSrcArgs
930   /// is empty (taints the destination
931   /// arguments unconditionally), or if any of its signified
932   /// args are tainted in context of the current CallEvent.
933   bool IsMatching = PropSrcArgs.isEmpty();
934   std::vector<SymbolRef> TaintedSymbols;
935   std::vector<ArgIdxTy> TaintedIndexes;
936   ForEachCallArg([this, &C, &IsMatching, &State, &TaintedSymbols,
937                   &TaintedIndexes](ArgIdxTy I, const Expr *E, SVal) {
938     std::optional<SVal> TaintedSVal =
939         getTaintedPointeeOrPointer(State, C.getSVal(E));
940     IsMatching =
941         IsMatching || (PropSrcArgs.contains(I) && TaintedSVal.has_value());
942 
943     // We track back tainted arguments except for stdin
944     if (TaintedSVal && !isStdin(*TaintedSVal, C.getASTContext())) {
945       std::vector<SymbolRef> TaintedArgSyms =
946           getTaintedSymbols(State, *TaintedSVal);
947       if (!TaintedArgSyms.empty()) {
948         llvm::append_range(TaintedSymbols, TaintedArgSyms);
949         TaintedIndexes.push_back(I);
950       }
951     }
952   });
953 
954   // Early return for propagation rules which dont match.
955   // Matching propagations, Sinks and Filters will pass this point.
956   if (!IsMatching)
957     return;
958 
959   const auto WouldEscape = [](SVal V, QualType Ty) -> bool {
960     if (!isa<Loc>(V))
961       return false;
962 
963     const bool IsNonConstRef = Ty->isReferenceType() && !Ty.isConstQualified();
964     const bool IsNonConstPtr =
965         Ty->isPointerType() && !Ty->getPointeeType().isConstQualified();
966 
967     return IsNonConstRef || IsNonConstPtr;
968   };
969 
970   /// Propagate taint where it is necessary.
971   auto &F = State->getStateManager().get_context<ArgIdxFactory>();
972   ImmutableSet<ArgIdxTy> Result = F.getEmptySet();
973   ForEachCallArg(
974       [&](ArgIdxTy I, const Expr *E, SVal V) {
975         if (PropDstArgs.contains(I)) {
976           LLVM_DEBUG(llvm::dbgs() << "PreCall<"; Call.dump(llvm::dbgs());
977                      llvm::dbgs()
978                      << "> prepares tainting arg index: " << I << '\n';);
979           Result = F.add(Result, I);
980         }
981 
982         // Taint property gets lost if the variable is passed as a
983         // non-const pointer or reference to a function which is
984         // not inlined. For matching rules we want to preserve the taintedness.
985         // TODO: We should traverse all reachable memory regions via the
986         // escaping parameter. Instead of doing that we simply mark only the
987         // referred memory region as tainted.
988         if (WouldEscape(V, E->getType()) && getTaintedPointeeOrPointer(State, V)) {
989           LLVM_DEBUG(if (!Result.contains(I)) {
990             llvm::dbgs() << "PreCall<";
991             Call.dump(llvm::dbgs());
992             llvm::dbgs() << "> prepares tainting arg index: " << I << '\n';
993           });
994           Result = F.add(Result, I);
995         }
996       });
997 
998   if (!Result.isEmpty())
999     State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result);
1000   const NoteTag *InjectionTag = taintOriginTrackerTag(
1001       C, std::move(TaintedSymbols), std::move(TaintedIndexes),
1002       Call.getCalleeStackFrame(0));
1003   C.addTransition(State, InjectionTag);
1004 }
1005 
1006 bool GenericTaintRule::UntrustedEnv(CheckerContext &C) {
1007   return !C.getAnalysisManager()
1008               .getAnalyzerOptions()
1009               .ShouldAssumeControlledEnvironment;
1010 }
1011 
1012 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
1013                                                   CheckerContext &C) const {
1014   assert(E);
1015   std::optional<SVal> TaintedSVal =
1016       getTaintedPointeeOrPointer(C.getState(), C.getSVal(E));
1017 
1018   if (!TaintedSVal)
1019     return false;
1020 
1021   // Generate diagnostic.
1022   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
1023     auto report = std::make_unique<PathSensitiveBugReport>(BT, Msg, N);
1024     report->addRange(E->getSourceRange());
1025     for (auto TaintedSym : getTaintedSymbols(C.getState(), *TaintedSVal)) {
1026       report->markInteresting(TaintedSym);
1027     }
1028 
1029     C.emitReport(std::move(report));
1030     return true;
1031   }
1032   return false;
1033 }
1034 
1035 /// TODO: remove checking for printf format attributes and socket whitelisting
1036 /// from GenericTaintChecker, and that means the following functions:
1037 /// getPrintfFormatArgumentNum,
1038 /// GenericTaintChecker::checkUncontrolledFormatString,
1039 /// GenericTaintChecker::taintUnsafeSocketProtocol
1040 
1041 static bool getPrintfFormatArgumentNum(const CallEvent &Call,
1042                                        const CheckerContext &C,
1043                                        ArgIdxTy &ArgNum) {
1044   // Find if the function contains a format string argument.
1045   // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
1046   // vsnprintf, syslog, custom annotated functions.
1047   const Decl *CallDecl = Call.getDecl();
1048   if (!CallDecl)
1049     return false;
1050   const FunctionDecl *FDecl = CallDecl->getAsFunction();
1051   if (!FDecl)
1052     return false;
1053 
1054   const ArgIdxTy CallNumArgs = fromArgumentCount(Call.getNumArgs());
1055 
1056   for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
1057     ArgNum = Format->getFormatIdx() - 1;
1058     if ((Format->getType()->getName() == "printf") && CallNumArgs > ArgNum)
1059       return true;
1060   }
1061 
1062   return false;
1063 }
1064 
1065 bool GenericTaintChecker::checkUncontrolledFormatString(
1066     const CallEvent &Call, CheckerContext &C) const {
1067   // Check if the function contains a format string argument.
1068   ArgIdxTy ArgNum = 0;
1069   if (!getPrintfFormatArgumentNum(Call, C, ArgNum))
1070     return false;
1071 
1072   // If either the format string content or the pointer itself are tainted,
1073   // warn.
1074   return generateReportIfTainted(Call.getArgExpr(ArgNum),
1075                                  MsgUncontrolledFormatString, C);
1076 }
1077 
1078 void GenericTaintChecker::taintUnsafeSocketProtocol(const CallEvent &Call,
1079                                                     CheckerContext &C) const {
1080   if (Call.getNumArgs() < 1)
1081     return;
1082   const IdentifierInfo *ID = Call.getCalleeIdentifier();
1083   if (!ID)
1084     return;
1085   if (!ID->getName().equals("socket"))
1086     return;
1087 
1088   SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
1089   StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
1090   // Allow internal communication protocols.
1091   bool SafeProtocol = DomName.equals("AF_SYSTEM") ||
1092                       DomName.equals("AF_LOCAL") || DomName.equals("AF_UNIX") ||
1093                       DomName.equals("AF_RESERVED_36");
1094   if (SafeProtocol)
1095     return;
1096 
1097   ProgramStateRef State = C.getState();
1098   auto &F = State->getStateManager().get_context<ArgIdxFactory>();
1099   ImmutableSet<ArgIdxTy> Result = F.add(F.getEmptySet(), ReturnValueIndex);
1100   State = State->set<TaintArgsOnPostVisit>(C.getStackFrame(), Result);
1101   C.addTransition(State);
1102 }
1103 
1104 /// Checker registration
1105 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
1106   Mgr.registerChecker<GenericTaintChecker>();
1107 }
1108 
1109 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {
1110   return true;
1111 }
1112