1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16
17 #include "Taint.h"
18 #include "Yaml.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
21 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
22 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
23 #include "clang/StaticAnalyzer/Core/Checker.h"
24 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/CallEvent.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
27 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
28 #include "llvm/Support/YAMLTraits.h"
29
30 #include <algorithm>
31 #include <limits>
32 #include <memory>
33 #include <unordered_map>
34 #include <utility>
35
36 using namespace clang;
37 using namespace ento;
38 using namespace taint;
39
40 namespace {
41 class GenericTaintChecker : public Checker<check::PreCall, check::PostCall> {
42 public:
getTag()43 static void *getTag() {
44 static int Tag;
45 return &Tag;
46 }
47
48 void checkPreCall(const CallEvent &Call, CheckerContext &C) const;
49 void checkPostCall(const CallEvent &Call, CheckerContext &C) const;
50
51 void printState(raw_ostream &Out, ProgramStateRef State, const char *NL,
52 const char *Sep) const override;
53
54 using ArgVector = SmallVector<unsigned, 2>;
55 using SignedArgVector = SmallVector<int, 2>;
56
57 enum class VariadicType { None, Src, Dst };
58
59 /// Used to parse the configuration file.
60 struct TaintConfiguration {
61 using NameScopeArgs = std::tuple<std::string, std::string, ArgVector>;
62
63 struct Propagation {
64 std::string Name;
65 std::string Scope;
66 ArgVector SrcArgs;
67 SignedArgVector DstArgs;
68 VariadicType VarType;
69 unsigned VarIndex;
70 };
71
72 std::vector<Propagation> Propagations;
73 std::vector<NameScopeArgs> Filters;
74 std::vector<NameScopeArgs> Sinks;
75
76 TaintConfiguration() = default;
77 TaintConfiguration(const TaintConfiguration &) = default;
78 TaintConfiguration(TaintConfiguration &&) = default;
79 TaintConfiguration &operator=(const TaintConfiguration &) = default;
80 TaintConfiguration &operator=(TaintConfiguration &&) = default;
81 };
82
83 /// Convert SignedArgVector to ArgVector.
84 ArgVector convertToArgVector(CheckerManager &Mgr, const std::string &Option,
85 const SignedArgVector &Args);
86
87 /// Parse the config.
88 void parseConfiguration(CheckerManager &Mgr, const std::string &Option,
89 TaintConfiguration &&Config);
90
91 static const unsigned InvalidArgIndex{std::numeric_limits<unsigned>::max()};
92 /// Denotes the return vale.
93 static const unsigned ReturnValueIndex{std::numeric_limits<unsigned>::max() -
94 1};
95
96 private:
97 mutable std::unique_ptr<BugType> BT;
initBugType() const98 void initBugType() const {
99 if (!BT)
100 BT = std::make_unique<BugType>(this, "Use of Untrusted Data",
101 "Untrusted Data");
102 }
103
104 struct FunctionData {
105 FunctionData() = delete;
FunctionData__anon4b3773b90111::GenericTaintChecker::FunctionData106 FunctionData(const FunctionDecl *FDecl, StringRef Name,
107 std::string FullName)
108 : FDecl(FDecl), Name(Name), FullName(std::move(FullName)) {}
109 FunctionData(const FunctionData &) = default;
110 FunctionData(FunctionData &&) = default;
111 FunctionData &operator=(const FunctionData &) = delete;
112 FunctionData &operator=(FunctionData &&) = delete;
113
create__anon4b3773b90111::GenericTaintChecker::FunctionData114 static Optional<FunctionData> create(const CallEvent &Call,
115 const CheckerContext &C) {
116 if (!Call.getDecl())
117 return None;
118
119 const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
120 if (!FDecl || (FDecl->getKind() != Decl::Function &&
121 FDecl->getKind() != Decl::CXXMethod))
122 return None;
123
124 StringRef Name = C.getCalleeName(FDecl);
125 std::string FullName = FDecl->getQualifiedNameAsString();
126 if (Name.empty() || FullName.empty())
127 return None;
128
129 return FunctionData{FDecl, Name, std::move(FullName)};
130 }
131
isInScope__anon4b3773b90111::GenericTaintChecker::FunctionData132 bool isInScope(StringRef Scope) const {
133 return StringRef(FullName).startswith(Scope);
134 }
135
136 const FunctionDecl *const FDecl;
137 const StringRef Name;
138 const std::string FullName;
139 };
140
141 /// Catch taint related bugs. Check if tainted data is passed to a
142 /// system call etc. Returns true on matching.
143 bool checkPre(const CallEvent &Call, const FunctionData &FData,
144 CheckerContext &C) const;
145
146 /// Add taint sources on a pre-visit. Returns true on matching.
147 bool addSourcesPre(const CallEvent &Call, const FunctionData &FData,
148 CheckerContext &C) const;
149
150 /// Mark filter's arguments not tainted on a pre-visit. Returns true on
151 /// matching.
152 bool addFiltersPre(const CallEvent &Call, const FunctionData &FData,
153 CheckerContext &C) const;
154
155 /// Propagate taint generated at pre-visit. Returns true on matching.
156 static bool propagateFromPre(const CallEvent &Call, CheckerContext &C);
157
158 /// Check if the region the expression evaluates to is the standard input,
159 /// and thus, is tainted.
160 static bool isStdin(const Expr *E, CheckerContext &C);
161
162 /// Given a pointer argument, return the value it points to.
163 static Optional<SVal> getPointeeOf(CheckerContext &C, const Expr *Arg);
164
165 /// Check for CWE-134: Uncontrolled Format String.
166 static constexpr llvm::StringLiteral MsgUncontrolledFormatString =
167 "Untrusted data is used as a format string "
168 "(CWE-134: Uncontrolled Format String)";
169 bool checkUncontrolledFormatString(const CallEvent &Call,
170 CheckerContext &C) const;
171
172 /// Check for:
173 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
174 /// CWE-78, "Failure to Sanitize Data into an OS Command"
175 static constexpr llvm::StringLiteral MsgSanitizeSystemArgs =
176 "Untrusted data is passed to a system call "
177 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
178 bool checkSystemCall(const CallEvent &Call, StringRef Name,
179 CheckerContext &C) const;
180
181 /// Check if tainted data is used as a buffer size ins strn.. functions,
182 /// and allocators.
183 static constexpr llvm::StringLiteral MsgTaintedBufferSize =
184 "Untrusted data is used to specify the buffer size "
185 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
186 "for character data and the null terminator)";
187 bool checkTaintedBufferSize(const CallEvent &Call, CheckerContext &C) const;
188
189 /// Check if tainted data is used as a custom sink's parameter.
190 static constexpr llvm::StringLiteral MsgCustomSink =
191 "Untrusted data is passed to a user-defined sink";
192 bool checkCustomSinks(const CallEvent &Call, const FunctionData &FData,
193 CheckerContext &C) const;
194
195 /// Generate a report if the expression is tainted or points to tainted data.
196 bool generateReportIfTainted(const Expr *E, StringRef Msg,
197 CheckerContext &C) const;
198
199 struct TaintPropagationRule;
200 template <typename T>
201 using ConfigDataMap =
202 std::unordered_multimap<std::string, std::pair<std::string, T>>;
203 using NameRuleMap = ConfigDataMap<TaintPropagationRule>;
204 using NameArgMap = ConfigDataMap<ArgVector>;
205
206 /// Find a function with the given name and scope. Returns the first match
207 /// or the end of the map.
208 template <typename T>
209 static auto findFunctionInConfig(const ConfigDataMap<T> &Map,
210 const FunctionData &FData);
211
212 /// A struct used to specify taint propagation rules for a function.
213 ///
214 /// If any of the possible taint source arguments is tainted, all of the
215 /// destination arguments should also be tainted. Use InvalidArgIndex in the
216 /// src list to specify that all of the arguments can introduce taint. Use
217 /// InvalidArgIndex in the dst arguments to signify that all the non-const
218 /// pointer and reference arguments might be tainted on return. If
219 /// ReturnValueIndex is added to the dst list, the return value will be
220 /// tainted.
221 struct TaintPropagationRule {
222 using PropagationFuncType = bool (*)(bool IsTainted, const CallEvent &Call,
223 CheckerContext &C);
224
225 /// List of arguments which can be taint sources and should be checked.
226 ArgVector SrcArgs;
227 /// List of arguments which should be tainted on function return.
228 ArgVector DstArgs;
229 /// Index for the first variadic parameter if exist.
230 unsigned VariadicIndex;
231 /// Show when a function has variadic parameters. If it has, it marks all
232 /// of them as source or destination.
233 VariadicType VarType;
234 /// Special function for tainted source determination. If defined, it can
235 /// override the default behavior.
236 PropagationFuncType PropagationFunc;
237
TaintPropagationRule__anon4b3773b90111::GenericTaintChecker::TaintPropagationRule238 TaintPropagationRule()
239 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
240 PropagationFunc(nullptr) {}
241
TaintPropagationRule__anon4b3773b90111::GenericTaintChecker::TaintPropagationRule242 TaintPropagationRule(ArgVector &&Src, ArgVector &&Dst,
243 VariadicType Var = VariadicType::None,
244 unsigned VarIndex = InvalidArgIndex,
245 PropagationFuncType Func = nullptr)
246 : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
247 VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
248
249 /// Get the propagation rule for a given function.
250 static TaintPropagationRule
251 getTaintPropagationRule(const NameRuleMap &CustomPropagations,
252 const FunctionData &FData, CheckerContext &C);
253
addSrcArg__anon4b3773b90111::GenericTaintChecker::TaintPropagationRule254 void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
addDstArg__anon4b3773b90111::GenericTaintChecker::TaintPropagationRule255 void addDstArg(unsigned A) { DstArgs.push_back(A); }
256
isNull__anon4b3773b90111::GenericTaintChecker::TaintPropagationRule257 bool isNull() const {
258 return SrcArgs.empty() && DstArgs.empty() &&
259 VariadicType::None == VarType;
260 }
261
isDestinationArgument__anon4b3773b90111::GenericTaintChecker::TaintPropagationRule262 bool isDestinationArgument(unsigned ArgNum) const {
263 return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
264 }
265
isTaintedOrPointsToTainted__anon4b3773b90111::GenericTaintChecker::TaintPropagationRule266 static bool isTaintedOrPointsToTainted(const Expr *E,
267 const ProgramStateRef &State,
268 CheckerContext &C) {
269 if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
270 return true;
271
272 if (!E->getType().getTypePtr()->isPointerType())
273 return false;
274
275 Optional<SVal> V = getPointeeOf(C, E);
276 return (V && isTainted(State, *V));
277 }
278
279 /// Pre-process a function which propagates taint according to the
280 /// taint rule.
281 ProgramStateRef process(const CallEvent &Call, CheckerContext &C) const;
282
283 // Functions for custom taintedness propagation.
284 static bool postSocket(bool IsTainted, const CallEvent &Call,
285 CheckerContext &C);
286 };
287
288 /// Defines a map between the propagation function's name, scope
289 /// and TaintPropagationRule.
290 NameRuleMap CustomPropagations;
291
292 /// Defines a map between the filter function's name, scope and filtering
293 /// args.
294 NameArgMap CustomFilters;
295
296 /// Defines a map between the sink function's name, scope and sinking args.
297 NameArgMap CustomSinks;
298 };
299
300 const unsigned GenericTaintChecker::ReturnValueIndex;
301 const unsigned GenericTaintChecker::InvalidArgIndex;
302
303 // FIXME: these lines can be removed in C++17
304 constexpr llvm::StringLiteral GenericTaintChecker::MsgUncontrolledFormatString;
305 constexpr llvm::StringLiteral GenericTaintChecker::MsgSanitizeSystemArgs;
306 constexpr llvm::StringLiteral GenericTaintChecker::MsgTaintedBufferSize;
307 constexpr llvm::StringLiteral GenericTaintChecker::MsgCustomSink;
308 } // end of anonymous namespace
309
310 using TaintConfig = GenericTaintChecker::TaintConfiguration;
311
312 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::Propagation)
313 LLVM_YAML_IS_SEQUENCE_VECTOR(TaintConfig::NameScopeArgs)
314
315 namespace llvm {
316 namespace yaml {
317 template <> struct MappingTraits<TaintConfig> {
mappingllvm::yaml::MappingTraits318 static void mapping(IO &IO, TaintConfig &Config) {
319 IO.mapOptional("Propagations", Config.Propagations);
320 IO.mapOptional("Filters", Config.Filters);
321 IO.mapOptional("Sinks", Config.Sinks);
322 }
323 };
324
325 template <> struct MappingTraits<TaintConfig::Propagation> {
mappingllvm::yaml::MappingTraits326 static void mapping(IO &IO, TaintConfig::Propagation &Propagation) {
327 IO.mapRequired("Name", Propagation.Name);
328 IO.mapOptional("Scope", Propagation.Scope);
329 IO.mapOptional("SrcArgs", Propagation.SrcArgs);
330 IO.mapOptional("DstArgs", Propagation.DstArgs);
331 IO.mapOptional("VariadicType", Propagation.VarType,
332 GenericTaintChecker::VariadicType::None);
333 IO.mapOptional("VariadicIndex", Propagation.VarIndex,
334 GenericTaintChecker::InvalidArgIndex);
335 }
336 };
337
338 template <> struct ScalarEnumerationTraits<GenericTaintChecker::VariadicType> {
enumerationllvm::yaml::ScalarEnumerationTraits339 static void enumeration(IO &IO, GenericTaintChecker::VariadicType &Value) {
340 IO.enumCase(Value, "None", GenericTaintChecker::VariadicType::None);
341 IO.enumCase(Value, "Src", GenericTaintChecker::VariadicType::Src);
342 IO.enumCase(Value, "Dst", GenericTaintChecker::VariadicType::Dst);
343 }
344 };
345
346 template <> struct MappingTraits<TaintConfig::NameScopeArgs> {
mappingllvm::yaml::MappingTraits347 static void mapping(IO &IO, TaintConfig::NameScopeArgs &NSA) {
348 IO.mapRequired("Name", std::get<0>(NSA));
349 IO.mapOptional("Scope", std::get<1>(NSA));
350 IO.mapRequired("Args", std::get<2>(NSA));
351 }
352 };
353 } // namespace yaml
354 } // namespace llvm
355
356 /// A set which is used to pass information from call pre-visit instruction
357 /// to the call post-visit. The values are unsigned integers, which are either
358 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
359 /// points to data, which should be tainted on return.
REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit,unsigned)360 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
361
362 GenericTaintChecker::ArgVector
363 GenericTaintChecker::convertToArgVector(CheckerManager &Mgr,
364 const std::string &Option,
365 const SignedArgVector &Args) {
366 ArgVector Result;
367 for (int Arg : Args) {
368 if (Arg == -1)
369 Result.push_back(ReturnValueIndex);
370 else if (Arg < -1) {
371 Result.push_back(InvalidArgIndex);
372 Mgr.reportInvalidCheckerOptionValue(
373 this, Option,
374 "an argument number for propagation rules greater or equal to -1");
375 } else
376 Result.push_back(static_cast<unsigned>(Arg));
377 }
378 return Result;
379 }
380
parseConfiguration(CheckerManager & Mgr,const std::string & Option,TaintConfiguration && Config)381 void GenericTaintChecker::parseConfiguration(CheckerManager &Mgr,
382 const std::string &Option,
383 TaintConfiguration &&Config) {
384 for (auto &P : Config.Propagations) {
385 GenericTaintChecker::CustomPropagations.emplace(
386 P.Name,
387 std::make_pair(P.Scope, TaintPropagationRule{
388 std::move(P.SrcArgs),
389 convertToArgVector(Mgr, Option, P.DstArgs),
390 P.VarType, P.VarIndex}));
391 }
392
393 for (auto &F : Config.Filters) {
394 GenericTaintChecker::CustomFilters.emplace(
395 std::get<0>(F),
396 std::make_pair(std::move(std::get<1>(F)), std::move(std::get<2>(F))));
397 }
398
399 for (auto &S : Config.Sinks) {
400 GenericTaintChecker::CustomSinks.emplace(
401 std::get<0>(S),
402 std::make_pair(std::move(std::get<1>(S)), std::move(std::get<2>(S))));
403 }
404 }
405
406 template <typename T>
findFunctionInConfig(const ConfigDataMap<T> & Map,const FunctionData & FData)407 auto GenericTaintChecker::findFunctionInConfig(const ConfigDataMap<T> &Map,
408 const FunctionData &FData) {
409 auto Range = Map.equal_range(std::string(FData.Name));
410 auto It =
411 std::find_if(Range.first, Range.second, [&FData](const auto &Entry) {
412 const auto &Value = Entry.second;
413 StringRef Scope = Value.first;
414 return Scope.empty() || FData.isInScope(Scope);
415 });
416 return It != Range.second ? It : Map.end();
417 }
418
419 GenericTaintChecker::TaintPropagationRule
getTaintPropagationRule(const NameRuleMap & CustomPropagations,const FunctionData & FData,CheckerContext & C)420 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
421 const NameRuleMap &CustomPropagations, const FunctionData &FData,
422 CheckerContext &C) {
423 // TODO: Currently, we might lose precision here: we always mark a return
424 // value as tainted even if it's just a pointer, pointing to tainted data.
425
426 // Check for exact name match for functions without builtin substitutes.
427 // Use qualified name, because these are C functions without namespace.
428 TaintPropagationRule Rule =
429 llvm::StringSwitch<TaintPropagationRule>(FData.FullName)
430 // Source functions
431 // TODO: Add support for vfscanf & family.
432 .Case("fdopen", {{}, {ReturnValueIndex}})
433 .Case("fopen", {{}, {ReturnValueIndex}})
434 .Case("freopen", {{}, {ReturnValueIndex}})
435 .Case("getch", {{}, {ReturnValueIndex}})
436 .Case("getchar", {{}, {ReturnValueIndex}})
437 .Case("getchar_unlocked", {{}, {ReturnValueIndex}})
438 .Case("getenv", {{}, {ReturnValueIndex}})
439 .Case("gets", {{}, {0, ReturnValueIndex}})
440 .Case("scanf", {{}, {}, VariadicType::Dst, 1})
441 .Case("socket", {{},
442 {ReturnValueIndex},
443 VariadicType::None,
444 InvalidArgIndex,
445 &TaintPropagationRule::postSocket})
446 .Case("wgetch", {{}, {ReturnValueIndex}})
447 // Propagating functions
448 .Case("atoi", {{0}, {ReturnValueIndex}})
449 .Case("atol", {{0}, {ReturnValueIndex}})
450 .Case("atoll", {{0}, {ReturnValueIndex}})
451 .Case("fgetc", {{0}, {ReturnValueIndex}})
452 .Case("fgetln", {{0}, {ReturnValueIndex}})
453 .Case("fgets", {{2}, {0, ReturnValueIndex}})
454 .Case("fscanf", {{0}, {}, VariadicType::Dst, 2})
455 .Case("sscanf", {{0}, {}, VariadicType::Dst, 2})
456 .Case("getc", {{0}, {ReturnValueIndex}})
457 .Case("getc_unlocked", {{0}, {ReturnValueIndex}})
458 .Case("getdelim", {{3}, {0}})
459 .Case("getline", {{2}, {0}})
460 .Case("getw", {{0}, {ReturnValueIndex}})
461 .Case("pread", {{0, 1, 2, 3}, {1, ReturnValueIndex}})
462 .Case("read", {{0, 2}, {1, ReturnValueIndex}})
463 .Case("strchr", {{0}, {ReturnValueIndex}})
464 .Case("strrchr", {{0}, {ReturnValueIndex}})
465 .Case("tolower", {{0}, {ReturnValueIndex}})
466 .Case("toupper", {{0}, {ReturnValueIndex}})
467 .Default({});
468
469 if (!Rule.isNull())
470 return Rule;
471 assert(FData.FDecl);
472
473 // Check if it's one of the memory setting/copying functions.
474 // This check is specialized but faster then calling isCLibraryFunction.
475 const FunctionDecl *FDecl = FData.FDecl;
476 unsigned BId = 0;
477 if ((BId = FDecl->getMemoryFunctionKind())) {
478 switch (BId) {
479 case Builtin::BImemcpy:
480 case Builtin::BImemmove:
481 case Builtin::BIstrncpy:
482 case Builtin::BIstrncat:
483 return {{1, 2}, {0, ReturnValueIndex}};
484 case Builtin::BIstrlcpy:
485 case Builtin::BIstrlcat:
486 return {{1, 2}, {0}};
487 case Builtin::BIstrndup:
488 return {{0, 1}, {ReturnValueIndex}};
489
490 default:
491 break;
492 }
493 }
494
495 // Process all other functions which could be defined as builtins.
496 if (Rule.isNull()) {
497 const auto OneOf = [FDecl](const auto &... Name) {
498 // FIXME: use fold expression in C++17
499 using unused = int[];
500 bool ret = false;
501 static_cast<void>(unused{
502 0, (ret |= CheckerContext::isCLibraryFunction(FDecl, Name), 0)...});
503 return ret;
504 };
505 if (OneOf("snprintf"))
506 return {{1}, {0, ReturnValueIndex}, VariadicType::Src, 3};
507 if (OneOf("sprintf"))
508 return {{}, {0, ReturnValueIndex}, VariadicType::Src, 2};
509 if (OneOf("strcpy", "stpcpy", "strcat"))
510 return {{1}, {0, ReturnValueIndex}};
511 if (OneOf("bcopy"))
512 return {{0, 2}, {1}};
513 if (OneOf("strdup", "strdupa", "wcsdup"))
514 return {{0}, {ReturnValueIndex}};
515 }
516
517 // Skipping the following functions, since they might be used for cleansing or
518 // smart memory copy:
519 // - memccpy - copying until hitting a special character.
520
521 auto It = findFunctionInConfig(CustomPropagations, FData);
522 if (It != CustomPropagations.end())
523 return It->second.second;
524 return {};
525 }
526
checkPreCall(const CallEvent & Call,CheckerContext & C) const527 void GenericTaintChecker::checkPreCall(const CallEvent &Call,
528 CheckerContext &C) const {
529 Optional<FunctionData> FData = FunctionData::create(Call, C);
530 if (!FData)
531 return;
532
533 // Check for taintedness related errors first: system call, uncontrolled
534 // format string, tainted buffer size.
535 if (checkPre(Call, *FData, C))
536 return;
537
538 // Marks the function's arguments and/or return value tainted if it present in
539 // the list.
540 if (addSourcesPre(Call, *FData, C))
541 return;
542
543 addFiltersPre(Call, *FData, C);
544 }
545
checkPostCall(const CallEvent & Call,CheckerContext & C) const546 void GenericTaintChecker::checkPostCall(const CallEvent &Call,
547 CheckerContext &C) const {
548 // Set the marked values as tainted. The return value only accessible from
549 // checkPostStmt.
550 propagateFromPre(Call, C);
551 }
552
printState(raw_ostream & Out,ProgramStateRef State,const char * NL,const char * Sep) const553 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
554 const char *NL, const char *Sep) const {
555 printTaint(State, Out, NL, Sep);
556 }
557
addSourcesPre(const CallEvent & Call,const FunctionData & FData,CheckerContext & C) const558 bool GenericTaintChecker::addSourcesPre(const CallEvent &Call,
559 const FunctionData &FData,
560 CheckerContext &C) const {
561 // First, try generating a propagation rule for this function.
562 TaintPropagationRule Rule = TaintPropagationRule::getTaintPropagationRule(
563 this->CustomPropagations, FData, C);
564 if (!Rule.isNull()) {
565 ProgramStateRef State = Rule.process(Call, C);
566 if (State) {
567 C.addTransition(State);
568 return true;
569 }
570 }
571 return false;
572 }
573
addFiltersPre(const CallEvent & Call,const FunctionData & FData,CheckerContext & C) const574 bool GenericTaintChecker::addFiltersPre(const CallEvent &Call,
575 const FunctionData &FData,
576 CheckerContext &C) const {
577 auto It = findFunctionInConfig(CustomFilters, FData);
578 if (It == CustomFilters.end())
579 return false;
580
581 ProgramStateRef State = C.getState();
582 const auto &Value = It->second;
583 const ArgVector &Args = Value.second;
584 for (unsigned ArgNum : Args) {
585 if (ArgNum >= Call.getNumArgs())
586 continue;
587
588 const Expr *Arg = Call.getArgExpr(ArgNum);
589 Optional<SVal> V = getPointeeOf(C, Arg);
590 if (V)
591 State = removeTaint(State, *V);
592 }
593
594 if (State != C.getState()) {
595 C.addTransition(State);
596 return true;
597 }
598 return false;
599 }
600
propagateFromPre(const CallEvent & Call,CheckerContext & C)601 bool GenericTaintChecker::propagateFromPre(const CallEvent &Call,
602 CheckerContext &C) {
603 ProgramStateRef State = C.getState();
604
605 // Depending on what was tainted at pre-visit, we determined a set of
606 // arguments which should be tainted after the function returns. These are
607 // stored in the state as TaintArgsOnPostVisit set.
608 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
609 if (TaintArgs.isEmpty())
610 return false;
611
612 for (unsigned ArgNum : TaintArgs) {
613 // Special handling for the tainted return value.
614 if (ArgNum == ReturnValueIndex) {
615 State = addTaint(State, Call.getReturnValue());
616 continue;
617 }
618
619 // The arguments are pointer arguments. The data they are pointing at is
620 // tainted after the call.
621 if (Call.getNumArgs() < (ArgNum + 1))
622 return false;
623 const Expr *Arg = Call.getArgExpr(ArgNum);
624 Optional<SVal> V = getPointeeOf(C, Arg);
625 if (V)
626 State = addTaint(State, *V);
627 }
628
629 // Clear up the taint info from the state.
630 State = State->remove<TaintArgsOnPostVisit>();
631
632 if (State != C.getState()) {
633 C.addTransition(State);
634 return true;
635 }
636 return false;
637 }
638
checkPre(const CallEvent & Call,const FunctionData & FData,CheckerContext & C) const639 bool GenericTaintChecker::checkPre(const CallEvent &Call,
640 const FunctionData &FData,
641 CheckerContext &C) const {
642 if (checkUncontrolledFormatString(Call, C))
643 return true;
644
645 if (checkSystemCall(Call, FData.Name, C))
646 return true;
647
648 if (checkTaintedBufferSize(Call, C))
649 return true;
650
651 return checkCustomSinks(Call, FData, C);
652 }
653
getPointeeOf(CheckerContext & C,const Expr * Arg)654 Optional<SVal> GenericTaintChecker::getPointeeOf(CheckerContext &C,
655 const Expr *Arg) {
656 ProgramStateRef State = C.getState();
657 SVal AddrVal = C.getSVal(Arg->IgnoreParens());
658 if (AddrVal.isUnknownOrUndef())
659 return None;
660
661 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
662 if (!AddrLoc)
663 return None;
664
665 QualType ArgTy = Arg->getType().getCanonicalType();
666 if (!ArgTy->isPointerType())
667 return State->getSVal(*AddrLoc);
668
669 QualType ValTy = ArgTy->getPointeeType();
670
671 // Do not dereference void pointers. Treat them as byte pointers instead.
672 // FIXME: we might want to consider more than just the first byte.
673 if (ValTy->isVoidType())
674 ValTy = C.getASTContext().CharTy;
675
676 return State->getSVal(*AddrLoc, ValTy);
677 }
678
679 ProgramStateRef
process(const CallEvent & Call,CheckerContext & C) const680 GenericTaintChecker::TaintPropagationRule::process(const CallEvent &Call,
681 CheckerContext &C) const {
682 ProgramStateRef State = C.getState();
683
684 // Check for taint in arguments.
685 bool IsTainted = true;
686 for (unsigned ArgNum : SrcArgs) {
687 if (ArgNum >= Call.getNumArgs())
688 continue;
689
690 if ((IsTainted =
691 isTaintedOrPointsToTainted(Call.getArgExpr(ArgNum), State, C)))
692 break;
693 }
694
695 // Check for taint in variadic arguments.
696 if (!IsTainted && VariadicType::Src == VarType) {
697 // Check if any of the arguments is tainted
698 for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
699 if ((IsTainted =
700 isTaintedOrPointsToTainted(Call.getArgExpr(i), State, C)))
701 break;
702 }
703 }
704
705 if (PropagationFunc)
706 IsTainted = PropagationFunc(IsTainted, Call, C);
707
708 if (!IsTainted)
709 return State;
710
711 // Mark the arguments which should be tainted after the function returns.
712 for (unsigned ArgNum : DstArgs) {
713 // Should mark the return value?
714 if (ArgNum == ReturnValueIndex) {
715 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
716 continue;
717 }
718
719 if (ArgNum >= Call.getNumArgs())
720 continue;
721
722 // Mark the given argument.
723 State = State->add<TaintArgsOnPostVisit>(ArgNum);
724 }
725
726 // Mark all variadic arguments tainted if present.
727 if (VariadicType::Dst == VarType) {
728 // For all pointer and references that were passed in:
729 // If they are not pointing to const data, mark data as tainted.
730 // TODO: So far we are just going one level down; ideally we'd need to
731 // recurse here.
732 for (unsigned i = VariadicIndex; i < Call.getNumArgs(); ++i) {
733 const Expr *Arg = Call.getArgExpr(i);
734 // Process pointer argument.
735 const Type *ArgTy = Arg->getType().getTypePtr();
736 QualType PType = ArgTy->getPointeeType();
737 if ((!PType.isNull() && !PType.isConstQualified()) ||
738 (ArgTy->isReferenceType() && !Arg->getType().isConstQualified())) {
739 State = State->add<TaintArgsOnPostVisit>(i);
740 }
741 }
742 }
743
744 return State;
745 }
746
747 // If argument 0(protocol domain) is network, the return value should get taint.
postSocket(bool,const CallEvent & Call,CheckerContext & C)748 bool GenericTaintChecker::TaintPropagationRule::postSocket(
749 bool /*IsTainted*/, const CallEvent &Call, CheckerContext &C) {
750 SourceLocation DomLoc = Call.getArgExpr(0)->getExprLoc();
751 StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
752 // White list the internal communication protocols.
753 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
754 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
755 return false;
756 return true;
757 }
758
isStdin(const Expr * E,CheckerContext & C)759 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
760 ProgramStateRef State = C.getState();
761 SVal Val = C.getSVal(E);
762
763 // stdin is a pointer, so it would be a region.
764 const MemRegion *MemReg = Val.getAsRegion();
765
766 // The region should be symbolic, we do not know it's value.
767 const auto *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
768 if (!SymReg)
769 return false;
770
771 // Get it's symbol and find the declaration region it's pointing to.
772 const auto *Sm = dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
773 if (!Sm)
774 return false;
775 const auto *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
776 if (!DeclReg)
777 return false;
778
779 // This region corresponds to a declaration, find out if it's a global/extern
780 // variable named stdin with the proper type.
781 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
782 D = D->getCanonicalDecl();
783 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
784 const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
785 if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
786 C.getASTContext().getFILEType().getCanonicalType())
787 return true;
788 }
789 }
790 return false;
791 }
792
getPrintfFormatArgumentNum(const CallEvent & Call,const CheckerContext & C,unsigned & ArgNum)793 static bool getPrintfFormatArgumentNum(const CallEvent &Call,
794 const CheckerContext &C,
795 unsigned &ArgNum) {
796 // Find if the function contains a format string argument.
797 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
798 // vsnprintf, syslog, custom annotated functions.
799 const FunctionDecl *FDecl = Call.getDecl()->getAsFunction();
800 if (!FDecl)
801 return false;
802 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
803 ArgNum = Format->getFormatIdx() - 1;
804 if ((Format->getType()->getName() == "printf") &&
805 Call.getNumArgs() > ArgNum)
806 return true;
807 }
808
809 // Or if a function is named setproctitle (this is a heuristic).
810 if (C.getCalleeName(FDecl).find("setproctitle") != StringRef::npos) {
811 ArgNum = 0;
812 return true;
813 }
814
815 return false;
816 }
817
generateReportIfTainted(const Expr * E,StringRef Msg,CheckerContext & C) const818 bool GenericTaintChecker::generateReportIfTainted(const Expr *E, StringRef Msg,
819 CheckerContext &C) const {
820 assert(E);
821
822 // Check for taint.
823 ProgramStateRef State = C.getState();
824 Optional<SVal> PointedToSVal = getPointeeOf(C, E);
825 SVal TaintedSVal;
826 if (PointedToSVal && isTainted(State, *PointedToSVal))
827 TaintedSVal = *PointedToSVal;
828 else if (isTainted(State, E, C.getLocationContext()))
829 TaintedSVal = C.getSVal(E);
830 else
831 return false;
832
833 // Generate diagnostic.
834 if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
835 initBugType();
836 auto report = std::make_unique<PathSensitiveBugReport>(*BT, Msg, N);
837 report->addRange(E->getSourceRange());
838 report->addVisitor(std::make_unique<TaintBugVisitor>(TaintedSVal));
839 C.emitReport(std::move(report));
840 return true;
841 }
842 return false;
843 }
844
checkUncontrolledFormatString(const CallEvent & Call,CheckerContext & C) const845 bool GenericTaintChecker::checkUncontrolledFormatString(
846 const CallEvent &Call, CheckerContext &C) const {
847 // Check if the function contains a format string argument.
848 unsigned ArgNum = 0;
849 if (!getPrintfFormatArgumentNum(Call, C, ArgNum))
850 return false;
851
852 // If either the format string content or the pointer itself are tainted,
853 // warn.
854 return generateReportIfTainted(Call.getArgExpr(ArgNum),
855 MsgUncontrolledFormatString, C);
856 }
857
checkSystemCall(const CallEvent & Call,StringRef Name,CheckerContext & C) const858 bool GenericTaintChecker::checkSystemCall(const CallEvent &Call, StringRef Name,
859 CheckerContext &C) const {
860 // TODO: It might make sense to run this check on demand. In some cases,
861 // we should check if the environment has been cleansed here. We also might
862 // need to know if the user was reset before these calls(seteuid).
863 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
864 .Case("system", 0)
865 .Case("popen", 0)
866 .Case("execl", 0)
867 .Case("execle", 0)
868 .Case("execlp", 0)
869 .Case("execv", 0)
870 .Case("execvp", 0)
871 .Case("execvP", 0)
872 .Case("execve", 0)
873 .Case("dlopen", 0)
874 .Default(InvalidArgIndex);
875
876 if (ArgNum == InvalidArgIndex || Call.getNumArgs() < (ArgNum + 1))
877 return false;
878
879 return generateReportIfTainted(Call.getArgExpr(ArgNum), MsgSanitizeSystemArgs,
880 C);
881 }
882
883 // TODO: Should this check be a part of the CString checker?
884 // If yes, should taint be a global setting?
checkTaintedBufferSize(const CallEvent & Call,CheckerContext & C) const885 bool GenericTaintChecker::checkTaintedBufferSize(const CallEvent &Call,
886 CheckerContext &C) const {
887 const auto *FDecl = Call.getDecl()->getAsFunction();
888 // If the function has a buffer size argument, set ArgNum.
889 unsigned ArgNum = InvalidArgIndex;
890 unsigned BId = 0;
891 if ((BId = FDecl->getMemoryFunctionKind())) {
892 switch (BId) {
893 case Builtin::BImemcpy:
894 case Builtin::BImemmove:
895 case Builtin::BIstrncpy:
896 ArgNum = 2;
897 break;
898 case Builtin::BIstrndup:
899 ArgNum = 1;
900 break;
901 default:
902 break;
903 }
904 }
905
906 if (ArgNum == InvalidArgIndex) {
907 using CCtx = CheckerContext;
908 if (CCtx::isCLibraryFunction(FDecl, "malloc") ||
909 CCtx::isCLibraryFunction(FDecl, "calloc") ||
910 CCtx::isCLibraryFunction(FDecl, "alloca"))
911 ArgNum = 0;
912 else if (CCtx::isCLibraryFunction(FDecl, "memccpy"))
913 ArgNum = 3;
914 else if (CCtx::isCLibraryFunction(FDecl, "realloc"))
915 ArgNum = 1;
916 else if (CCtx::isCLibraryFunction(FDecl, "bcopy"))
917 ArgNum = 2;
918 }
919
920 return ArgNum != InvalidArgIndex && Call.getNumArgs() > ArgNum &&
921 generateReportIfTainted(Call.getArgExpr(ArgNum), MsgTaintedBufferSize,
922 C);
923 }
924
checkCustomSinks(const CallEvent & Call,const FunctionData & FData,CheckerContext & C) const925 bool GenericTaintChecker::checkCustomSinks(const CallEvent &Call,
926 const FunctionData &FData,
927 CheckerContext &C) const {
928 auto It = findFunctionInConfig(CustomSinks, FData);
929 if (It == CustomSinks.end())
930 return false;
931
932 const auto &Value = It->second;
933 const GenericTaintChecker::ArgVector &Args = Value.second;
934 for (unsigned ArgNum : Args) {
935 if (ArgNum >= Call.getNumArgs())
936 continue;
937
938 if (generateReportIfTainted(Call.getArgExpr(ArgNum), MsgCustomSink, C))
939 return true;
940 }
941
942 return false;
943 }
944
registerGenericTaintChecker(CheckerManager & Mgr)945 void ento::registerGenericTaintChecker(CheckerManager &Mgr) {
946 auto *Checker = Mgr.registerChecker<GenericTaintChecker>();
947 std::string Option{"Config"};
948 StringRef ConfigFile =
949 Mgr.getAnalyzerOptions().getCheckerStringOption(Checker, Option);
950 llvm::Optional<TaintConfig> Config =
951 getConfiguration<TaintConfig>(Mgr, Checker, Option, ConfigFile);
952 if (Config)
953 Checker->parseConfiguration(Mgr, Option, std::move(Config.getValue()));
954 }
955
shouldRegisterGenericTaintChecker(const CheckerManager & mgr)956 bool ento::shouldRegisterGenericTaintChecker(const CheckerManager &mgr) {
957 return true;
958 }
959