1 //== GenericTaintChecker.cpp ----------------------------------- -*- C++ -*--=//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This checker defines the attack surface for generic taint propagation.
10 //
11 // The taint information produced by it might be useful to other checkers. For
12 // example, checkers should report errors which involve tainted data more
13 // aggressively, even if the involved symbols are under constrained.
14 //
15 //===----------------------------------------------------------------------===//
16
17 #include "Taint.h"
18 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
19 #include "clang/AST/Attr.h"
20 #include "clang/Basic/Builtins.h"
21 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
22 #include "clang/StaticAnalyzer/Core/Checker.h"
23 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
26 #include <climits>
27 #include <initializer_list>
28 #include <utility>
29
30 using namespace clang;
31 using namespace ento;
32 using namespace taint;
33
34 namespace {
35 class GenericTaintChecker
36 : public Checker<check::PostStmt<CallExpr>, check::PreStmt<CallExpr>> {
37 public:
getTag()38 static void *getTag() {
39 static int Tag;
40 return &Tag;
41 }
42
43 void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
44
45 void checkPreStmt(const CallExpr *CE, CheckerContext &C) const;
46
47 void printState(raw_ostream &Out, ProgramStateRef State,
48 const char *NL, const char *Sep) const override;
49
50 private:
51 static const unsigned InvalidArgIndex = UINT_MAX;
52 /// Denotes the return vale.
53 static const unsigned ReturnValueIndex = UINT_MAX - 1;
54
55 mutable std::unique_ptr<BugType> BT;
initBugType() const56 void initBugType() const {
57 if (!BT)
58 BT.reset(new BugType(this, "Use of Untrusted Data", "Untrusted Data"));
59 }
60
61 /// Catch taint related bugs. Check if tainted data is passed to a
62 /// system call etc.
63 bool checkPre(const CallExpr *CE, CheckerContext &C) const;
64
65 /// Add taint sources on a pre-visit.
66 void addSourcesPre(const CallExpr *CE, CheckerContext &C) const;
67
68 /// Propagate taint generated at pre-visit.
69 bool propagateFromPre(const CallExpr *CE, CheckerContext &C) const;
70
71 /// Check if the region the expression evaluates to is the standard input,
72 /// and thus, is tainted.
73 static bool isStdin(const Expr *E, CheckerContext &C);
74
75 /// Given a pointer argument, return the value it points to.
76 static Optional<SVal> getPointedToSVal(CheckerContext &C, const Expr *Arg);
77
78 /// Check for CWE-134: Uncontrolled Format String.
79 static const char MsgUncontrolledFormatString[];
80 bool checkUncontrolledFormatString(const CallExpr *CE,
81 CheckerContext &C) const;
82
83 /// Check for:
84 /// CERT/STR02-C. "Sanitize data passed to complex subsystems"
85 /// CWE-78, "Failure to Sanitize Data into an OS Command"
86 static const char MsgSanitizeSystemArgs[];
87 bool checkSystemCall(const CallExpr *CE, StringRef Name,
88 CheckerContext &C) const;
89
90 /// Check if tainted data is used as a buffer size ins strn.. functions,
91 /// and allocators.
92 static const char MsgTaintedBufferSize[];
93 bool checkTaintedBufferSize(const CallExpr *CE, const FunctionDecl *FDecl,
94 CheckerContext &C) const;
95
96 /// Generate a report if the expression is tainted or points to tainted data.
97 bool generateReportIfTainted(const Expr *E, const char Msg[],
98 CheckerContext &C) const;
99
100 using ArgVector = SmallVector<unsigned, 2>;
101
102 /// A struct used to specify taint propagation rules for a function.
103 ///
104 /// If any of the possible taint source arguments is tainted, all of the
105 /// destination arguments should also be tainted. Use InvalidArgIndex in the
106 /// src list to specify that all of the arguments can introduce taint. Use
107 /// InvalidArgIndex in the dst arguments to signify that all the non-const
108 /// pointer and reference arguments might be tainted on return. If
109 /// ReturnValueIndex is added to the dst list, the return value will be
110 /// tainted.
111 struct TaintPropagationRule {
112 enum class VariadicType { None, Src, Dst };
113
114 using PropagationFuncType = bool (*)(bool IsTainted, const CallExpr *,
115 CheckerContext &C);
116
117 /// List of arguments which can be taint sources and should be checked.
118 ArgVector SrcArgs;
119 /// List of arguments which should be tainted on function return.
120 ArgVector DstArgs;
121 /// Index for the first variadic parameter if exist.
122 unsigned VariadicIndex;
123 /// Show when a function has variadic parameters. If it has, it marks all
124 /// of them as source or destination.
125 VariadicType VarType;
126 /// Special function for tainted source determination. If defined, it can
127 /// override the default behavior.
128 PropagationFuncType PropagationFunc;
129
TaintPropagationRule__anon2f692a700111::GenericTaintChecker::TaintPropagationRule130 TaintPropagationRule()
131 : VariadicIndex(InvalidArgIndex), VarType(VariadicType::None),
132 PropagationFunc(nullptr) {}
133
TaintPropagationRule__anon2f692a700111::GenericTaintChecker::TaintPropagationRule134 TaintPropagationRule(std::initializer_list<unsigned> &&Src,
135 std::initializer_list<unsigned> &&Dst,
136 VariadicType Var = VariadicType::None,
137 unsigned VarIndex = InvalidArgIndex,
138 PropagationFuncType Func = nullptr)
139 : SrcArgs(std::move(Src)), DstArgs(std::move(Dst)),
140 VariadicIndex(VarIndex), VarType(Var), PropagationFunc(Func) {}
141
142 /// Get the propagation rule for a given function.
143 static TaintPropagationRule
144 getTaintPropagationRule(const FunctionDecl *FDecl, StringRef Name,
145 CheckerContext &C);
146
addSrcArg__anon2f692a700111::GenericTaintChecker::TaintPropagationRule147 void addSrcArg(unsigned A) { SrcArgs.push_back(A); }
addDstArg__anon2f692a700111::GenericTaintChecker::TaintPropagationRule148 void addDstArg(unsigned A) { DstArgs.push_back(A); }
149
isNull__anon2f692a700111::GenericTaintChecker::TaintPropagationRule150 bool isNull() const {
151 return SrcArgs.empty() && DstArgs.empty() &&
152 VariadicType::None == VarType;
153 }
154
isDestinationArgument__anon2f692a700111::GenericTaintChecker::TaintPropagationRule155 bool isDestinationArgument(unsigned ArgNum) const {
156 return (llvm::find(DstArgs, ArgNum) != DstArgs.end());
157 }
158
isTaintedOrPointsToTainted__anon2f692a700111::GenericTaintChecker::TaintPropagationRule159 static bool isTaintedOrPointsToTainted(const Expr *E, ProgramStateRef State,
160 CheckerContext &C) {
161 if (isTainted(State, E, C.getLocationContext()) || isStdin(E, C))
162 return true;
163
164 if (!E->getType().getTypePtr()->isPointerType())
165 return false;
166
167 Optional<SVal> V = getPointedToSVal(C, E);
168 return (V && isTainted(State, *V));
169 }
170
171 /// Pre-process a function which propagates taint according to the
172 /// taint rule.
173 ProgramStateRef process(const CallExpr *CE, CheckerContext &C) const;
174
175 // Functions for custom taintedness propagation.
176 static bool postSocket(bool IsTainted, const CallExpr *CE,
177 CheckerContext &C);
178 };
179 };
180
181 const unsigned GenericTaintChecker::ReturnValueIndex;
182 const unsigned GenericTaintChecker::InvalidArgIndex;
183
184 const char GenericTaintChecker::MsgUncontrolledFormatString[] =
185 "Untrusted data is used as a format string "
186 "(CWE-134: Uncontrolled Format String)";
187
188 const char GenericTaintChecker::MsgSanitizeSystemArgs[] =
189 "Untrusted data is passed to a system call "
190 "(CERT/STR02-C. Sanitize data passed to complex subsystems)";
191
192 const char GenericTaintChecker::MsgTaintedBufferSize[] =
193 "Untrusted data is used to specify the buffer size "
194 "(CERT/STR31-C. Guarantee that storage for strings has sufficient space "
195 "for character data and the null terminator)";
196
197 } // end of anonymous namespace
198
199 /// A set which is used to pass information from call pre-visit instruction
200 /// to the call post-visit. The values are unsigned integers, which are either
201 /// ReturnValueIndex, or indexes of the pointer/reference argument, which
202 /// points to data, which should be tainted on return.
REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit,unsigned)203 REGISTER_SET_WITH_PROGRAMSTATE(TaintArgsOnPostVisit, unsigned)
204
205 GenericTaintChecker::TaintPropagationRule
206 GenericTaintChecker::TaintPropagationRule::getTaintPropagationRule(
207 const FunctionDecl *FDecl, StringRef Name, CheckerContext &C) {
208 // TODO: Currently, we might lose precision here: we always mark a return
209 // value as tainted even if it's just a pointer, pointing to tainted data.
210
211 // Check for exact name match for functions without builtin substitutes.
212 TaintPropagationRule Rule =
213 llvm::StringSwitch<TaintPropagationRule>(Name)
214 // Source functions
215 // TODO: Add support for vfscanf & family.
216 .Case("fdopen", TaintPropagationRule({}, {ReturnValueIndex}))
217 .Case("fopen", TaintPropagationRule({}, {ReturnValueIndex}))
218 .Case("freopen", TaintPropagationRule({}, {ReturnValueIndex}))
219 .Case("getch", TaintPropagationRule({}, {ReturnValueIndex}))
220 .Case("getchar", TaintPropagationRule({}, {ReturnValueIndex}))
221 .Case("getchar_unlocked", TaintPropagationRule({}, {ReturnValueIndex}))
222 .Case("getenv", TaintPropagationRule({}, {ReturnValueIndex}))
223 .Case("gets", TaintPropagationRule({}, {0, ReturnValueIndex}))
224 .Case("scanf", TaintPropagationRule({}, {}, VariadicType::Dst, 1))
225 .Case("socket",
226 TaintPropagationRule({}, {ReturnValueIndex}, VariadicType::None,
227 InvalidArgIndex,
228 &TaintPropagationRule::postSocket))
229 .Case("wgetch", TaintPropagationRule({}, {ReturnValueIndex}))
230 // Propagating functions
231 .Case("atoi", TaintPropagationRule({0}, {ReturnValueIndex}))
232 .Case("atol", TaintPropagationRule({0}, {ReturnValueIndex}))
233 .Case("atoll", TaintPropagationRule({0}, {ReturnValueIndex}))
234 .Case("fgetc", TaintPropagationRule({0}, {ReturnValueIndex}))
235 .Case("fgetln", TaintPropagationRule({0}, {ReturnValueIndex}))
236 .Case("fgets", TaintPropagationRule({2}, {0, ReturnValueIndex}))
237 .Case("fscanf", TaintPropagationRule({0}, {}, VariadicType::Dst, 2))
238 .Case("getc", TaintPropagationRule({0}, {ReturnValueIndex}))
239 .Case("getc_unlocked", TaintPropagationRule({0}, {ReturnValueIndex}))
240 .Case("getdelim", TaintPropagationRule({3}, {0}))
241 .Case("getline", TaintPropagationRule({2}, {0}))
242 .Case("getw", TaintPropagationRule({0}, {ReturnValueIndex}))
243 .Case("pread",
244 TaintPropagationRule({0, 1, 2, 3}, {1, ReturnValueIndex}))
245 .Case("read", TaintPropagationRule({0, 2}, {1, ReturnValueIndex}))
246 .Case("strchr", TaintPropagationRule({0}, {ReturnValueIndex}))
247 .Case("strrchr", TaintPropagationRule({0}, {ReturnValueIndex}))
248 .Case("tolower", TaintPropagationRule({0}, {ReturnValueIndex}))
249 .Case("toupper", TaintPropagationRule({0}, {ReturnValueIndex}))
250 .Default(TaintPropagationRule());
251
252 if (!Rule.isNull())
253 return Rule;
254
255 // Check if it's one of the memory setting/copying functions.
256 // This check is specialized but faster then calling isCLibraryFunction.
257 unsigned BId = 0;
258 if ((BId = FDecl->getMemoryFunctionKind()))
259 switch (BId) {
260 case Builtin::BImemcpy:
261 case Builtin::BImemmove:
262 case Builtin::BIstrncpy:
263 case Builtin::BIstrncat:
264 return TaintPropagationRule({1, 2}, {0, ReturnValueIndex});
265 case Builtin::BIstrlcpy:
266 case Builtin::BIstrlcat:
267 return TaintPropagationRule({1, 2}, {0});
268 case Builtin::BIstrndup:
269 return TaintPropagationRule({0, 1}, {ReturnValueIndex});
270
271 default:
272 break;
273 };
274
275 // Process all other functions which could be defined as builtins.
276 if (Rule.isNull()) {
277 if (C.isCLibraryFunction(FDecl, "snprintf"))
278 return TaintPropagationRule({1}, {0, ReturnValueIndex}, VariadicType::Src,
279 3);
280 else if (C.isCLibraryFunction(FDecl, "sprintf"))
281 return TaintPropagationRule({}, {0, ReturnValueIndex}, VariadicType::Src,
282 2);
283 else if (C.isCLibraryFunction(FDecl, "strcpy") ||
284 C.isCLibraryFunction(FDecl, "stpcpy") ||
285 C.isCLibraryFunction(FDecl, "strcat"))
286 return TaintPropagationRule({1}, {0, ReturnValueIndex});
287 else if (C.isCLibraryFunction(FDecl, "bcopy"))
288 return TaintPropagationRule({0, 2}, {1});
289 else if (C.isCLibraryFunction(FDecl, "strdup") ||
290 C.isCLibraryFunction(FDecl, "strdupa"))
291 return TaintPropagationRule({0}, {ReturnValueIndex});
292 else if (C.isCLibraryFunction(FDecl, "wcsdup"))
293 return TaintPropagationRule({0}, {ReturnValueIndex});
294 }
295
296 // Skipping the following functions, since they might be used for cleansing
297 // or smart memory copy:
298 // - memccpy - copying until hitting a special character.
299
300 return TaintPropagationRule();
301 }
302
checkPreStmt(const CallExpr * CE,CheckerContext & C) const303 void GenericTaintChecker::checkPreStmt(const CallExpr *CE,
304 CheckerContext &C) const {
305 // Check for taintedness related errors first: system call, uncontrolled
306 // format string, tainted buffer size.
307 if (checkPre(CE, C))
308 return;
309
310 // Marks the function's arguments and/or return value tainted if it present in
311 // the list.
312 addSourcesPre(CE, C);
313 }
314
checkPostStmt(const CallExpr * CE,CheckerContext & C) const315 void GenericTaintChecker::checkPostStmt(const CallExpr *CE,
316 CheckerContext &C) const {
317 // Set the marked values as tainted. The return value only accessible from
318 // checkPostStmt.
319 propagateFromPre(CE, C);
320 }
321
printState(raw_ostream & Out,ProgramStateRef State,const char * NL,const char * Sep) const322 void GenericTaintChecker::printState(raw_ostream &Out, ProgramStateRef State,
323 const char *NL, const char *Sep) const {
324 printTaint(State, Out, NL, Sep);
325 }
326
addSourcesPre(const CallExpr * CE,CheckerContext & C) const327 void GenericTaintChecker::addSourcesPre(const CallExpr *CE,
328 CheckerContext &C) const {
329 ProgramStateRef State = nullptr;
330 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
331 if (!FDecl || FDecl->getKind() != Decl::Function)
332 return;
333
334 StringRef Name = C.getCalleeName(FDecl);
335 if (Name.empty())
336 return;
337
338 // First, try generating a propagation rule for this function.
339 TaintPropagationRule Rule =
340 TaintPropagationRule::getTaintPropagationRule(FDecl, Name, C);
341 if (!Rule.isNull()) {
342 State = Rule.process(CE, C);
343 if (!State)
344 return;
345 C.addTransition(State);
346 return;
347 }
348
349 if (!State)
350 return;
351 C.addTransition(State);
352 }
353
propagateFromPre(const CallExpr * CE,CheckerContext & C) const354 bool GenericTaintChecker::propagateFromPre(const CallExpr *CE,
355 CheckerContext &C) const {
356 ProgramStateRef State = C.getState();
357
358 // Depending on what was tainted at pre-visit, we determined a set of
359 // arguments which should be tainted after the function returns. These are
360 // stored in the state as TaintArgsOnPostVisit set.
361 TaintArgsOnPostVisitTy TaintArgs = State->get<TaintArgsOnPostVisit>();
362 if (TaintArgs.isEmpty())
363 return false;
364
365 for (unsigned ArgNum : TaintArgs) {
366 // Special handling for the tainted return value.
367 if (ArgNum == ReturnValueIndex) {
368 State = addTaint(State, CE, C.getLocationContext());
369 continue;
370 }
371
372 // The arguments are pointer arguments. The data they are pointing at is
373 // tainted after the call.
374 if (CE->getNumArgs() < (ArgNum + 1))
375 return false;
376 const Expr *Arg = CE->getArg(ArgNum);
377 Optional<SVal> V = getPointedToSVal(C, Arg);
378 if (V)
379 State = addTaint(State, *V);
380 }
381
382 // Clear up the taint info from the state.
383 State = State->remove<TaintArgsOnPostVisit>();
384
385 if (State != C.getState()) {
386 C.addTransition(State);
387 return true;
388 }
389 return false;
390 }
391
checkPre(const CallExpr * CE,CheckerContext & C) const392 bool GenericTaintChecker::checkPre(const CallExpr *CE,
393 CheckerContext &C) const {
394
395 if (checkUncontrolledFormatString(CE, C))
396 return true;
397
398 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
399 if (!FDecl || FDecl->getKind() != Decl::Function)
400 return false;
401
402 StringRef Name = C.getCalleeName(FDecl);
403 if (Name.empty())
404 return false;
405
406 if (checkSystemCall(CE, Name, C))
407 return true;
408
409 if (checkTaintedBufferSize(CE, FDecl, C))
410 return true;
411
412 return false;
413 }
414
getPointedToSVal(CheckerContext & C,const Expr * Arg)415 Optional<SVal> GenericTaintChecker::getPointedToSVal(CheckerContext &C,
416 const Expr *Arg) {
417 ProgramStateRef State = C.getState();
418 SVal AddrVal = C.getSVal(Arg->IgnoreParens());
419 if (AddrVal.isUnknownOrUndef())
420 return None;
421
422 Optional<Loc> AddrLoc = AddrVal.getAs<Loc>();
423 if (!AddrLoc)
424 return None;
425
426 QualType ArgTy = Arg->getType().getCanonicalType();
427 if (!ArgTy->isPointerType())
428 return None;
429
430 QualType ValTy = ArgTy->getPointeeType();
431
432 // Do not dereference void pointers. Treat them as byte pointers instead.
433 // FIXME: we might want to consider more than just the first byte.
434 if (ValTy->isVoidType())
435 ValTy = C.getASTContext().CharTy;
436
437 return State->getSVal(*AddrLoc, ValTy);
438 }
439
440 ProgramStateRef
process(const CallExpr * CE,CheckerContext & C) const441 GenericTaintChecker::TaintPropagationRule::process(const CallExpr *CE,
442 CheckerContext &C) const {
443 ProgramStateRef State = C.getState();
444
445 // Check for taint in arguments.
446 bool IsTainted = true;
447 for (unsigned ArgNum : SrcArgs) {
448 if (ArgNum >= CE->getNumArgs())
449 return State;
450 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(ArgNum), State, C)))
451 break;
452 }
453
454 // Check for taint in variadic arguments.
455 if (!IsTainted && VariadicType::Src == VarType) {
456 // Check if any of the arguments is tainted
457 for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
458 if ((IsTainted = isTaintedOrPointsToTainted(CE->getArg(i), State, C)))
459 break;
460 }
461 }
462
463 if (PropagationFunc)
464 IsTainted = PropagationFunc(IsTainted, CE, C);
465
466 if (!IsTainted)
467 return State;
468
469 // Mark the arguments which should be tainted after the function returns.
470 for (unsigned ArgNum : DstArgs) {
471 // Should mark the return value?
472 if (ArgNum == ReturnValueIndex) {
473 State = State->add<TaintArgsOnPostVisit>(ReturnValueIndex);
474 continue;
475 }
476
477 // Mark the given argument.
478 assert(ArgNum < CE->getNumArgs());
479 State = State->add<TaintArgsOnPostVisit>(ArgNum);
480 }
481
482 // Mark all variadic arguments tainted if present.
483 if (VariadicType::Dst == VarType) {
484 // For all pointer and references that were passed in:
485 // If they are not pointing to const data, mark data as tainted.
486 // TODO: So far we are just going one level down; ideally we'd need to
487 // recurse here.
488 for (unsigned int i = VariadicIndex; i < CE->getNumArgs(); ++i) {
489 const Expr *Arg = CE->getArg(i);
490 // Process pointer argument.
491 const Type *ArgTy = Arg->getType().getTypePtr();
492 QualType PType = ArgTy->getPointeeType();
493 if ((!PType.isNull() && !PType.isConstQualified()) ||
494 (ArgTy->isReferenceType() && !Arg->getType().isConstQualified()))
495 State = State->add<TaintArgsOnPostVisit>(i);
496 }
497 }
498
499 return State;
500 }
501
502 // If argument 0(protocol domain) is network, the return value should get taint.
postSocket(bool,const CallExpr * CE,CheckerContext & C)503 bool GenericTaintChecker::TaintPropagationRule::postSocket(bool /*IsTainted*/,
504 const CallExpr *CE,
505 CheckerContext &C) {
506 SourceLocation DomLoc = CE->getArg(0)->getExprLoc();
507 StringRef DomName = C.getMacroNameOrSpelling(DomLoc);
508 // White list the internal communication protocols.
509 if (DomName.equals("AF_SYSTEM") || DomName.equals("AF_LOCAL") ||
510 DomName.equals("AF_UNIX") || DomName.equals("AF_RESERVED_36"))
511 return false;
512
513 return true;
514 }
515
isStdin(const Expr * E,CheckerContext & C)516 bool GenericTaintChecker::isStdin(const Expr *E, CheckerContext &C) {
517 ProgramStateRef State = C.getState();
518 SVal Val = C.getSVal(E);
519
520 // stdin is a pointer, so it would be a region.
521 const MemRegion *MemReg = Val.getAsRegion();
522
523 // The region should be symbolic, we do not know it's value.
524 const SymbolicRegion *SymReg = dyn_cast_or_null<SymbolicRegion>(MemReg);
525 if (!SymReg)
526 return false;
527
528 // Get it's symbol and find the declaration region it's pointing to.
529 const SymbolRegionValue *Sm =
530 dyn_cast<SymbolRegionValue>(SymReg->getSymbol());
531 if (!Sm)
532 return false;
533 const DeclRegion *DeclReg = dyn_cast_or_null<DeclRegion>(Sm->getRegion());
534 if (!DeclReg)
535 return false;
536
537 // This region corresponds to a declaration, find out if it's a global/extern
538 // variable named stdin with the proper type.
539 if (const auto *D = dyn_cast_or_null<VarDecl>(DeclReg->getDecl())) {
540 D = D->getCanonicalDecl();
541 if ((D->getName().find("stdin") != StringRef::npos) && D->isExternC()) {
542 const auto *PtrTy = dyn_cast<PointerType>(D->getType().getTypePtr());
543 if (PtrTy && PtrTy->getPointeeType().getCanonicalType() ==
544 C.getASTContext().getFILEType().getCanonicalType())
545 return true;
546 }
547 }
548 return false;
549 }
550
getPrintfFormatArgumentNum(const CallExpr * CE,const CheckerContext & C,unsigned int & ArgNum)551 static bool getPrintfFormatArgumentNum(const CallExpr *CE,
552 const CheckerContext &C,
553 unsigned int &ArgNum) {
554 // Find if the function contains a format string argument.
555 // Handles: fprintf, printf, sprintf, snprintf, vfprintf, vprintf, vsprintf,
556 // vsnprintf, syslog, custom annotated functions.
557 const FunctionDecl *FDecl = C.getCalleeDecl(CE);
558 if (!FDecl)
559 return false;
560 for (const auto *Format : FDecl->specific_attrs<FormatAttr>()) {
561 ArgNum = Format->getFormatIdx() - 1;
562 if ((Format->getType()->getName() == "printf") && CE->getNumArgs() > ArgNum)
563 return true;
564 }
565
566 // Or if a function is named setproctitle (this is a heuristic).
567 if (C.getCalleeName(CE).find("setproctitle") != StringRef::npos) {
568 ArgNum = 0;
569 return true;
570 }
571
572 return false;
573 }
574
generateReportIfTainted(const Expr * E,const char Msg[],CheckerContext & C) const575 bool GenericTaintChecker::generateReportIfTainted(const Expr *E,
576 const char Msg[],
577 CheckerContext &C) const {
578 assert(E);
579
580 // Check for taint.
581 ProgramStateRef State = C.getState();
582 Optional<SVal> PointedToSVal = getPointedToSVal(C, E);
583 SVal TaintedSVal;
584 if (PointedToSVal && isTainted(State, *PointedToSVal))
585 TaintedSVal = *PointedToSVal;
586 else if (isTainted(State, E, C.getLocationContext()))
587 TaintedSVal = C.getSVal(E);
588 else
589 return false;
590
591 // Generate diagnostic.
592 if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
593 initBugType();
594 auto report = llvm::make_unique<BugReport>(*BT, Msg, N);
595 report->addRange(E->getSourceRange());
596 report->addVisitor(llvm::make_unique<TaintBugVisitor>(TaintedSVal));
597 C.emitReport(std::move(report));
598 return true;
599 }
600 return false;
601 }
602
checkUncontrolledFormatString(const CallExpr * CE,CheckerContext & C) const603 bool GenericTaintChecker::checkUncontrolledFormatString(
604 const CallExpr *CE, CheckerContext &C) const {
605 // Check if the function contains a format string argument.
606 unsigned int ArgNum = 0;
607 if (!getPrintfFormatArgumentNum(CE, C, ArgNum))
608 return false;
609
610 // If either the format string content or the pointer itself are tainted,
611 // warn.
612 return generateReportIfTainted(CE->getArg(ArgNum),
613 MsgUncontrolledFormatString, C);
614 }
615
checkSystemCall(const CallExpr * CE,StringRef Name,CheckerContext & C) const616 bool GenericTaintChecker::checkSystemCall(const CallExpr *CE, StringRef Name,
617 CheckerContext &C) const {
618 // TODO: It might make sense to run this check on demand. In some cases,
619 // we should check if the environment has been cleansed here. We also might
620 // need to know if the user was reset before these calls(seteuid).
621 unsigned ArgNum = llvm::StringSwitch<unsigned>(Name)
622 .Case("system", 0)
623 .Case("popen", 0)
624 .Case("execl", 0)
625 .Case("execle", 0)
626 .Case("execlp", 0)
627 .Case("execv", 0)
628 .Case("execvp", 0)
629 .Case("execvP", 0)
630 .Case("execve", 0)
631 .Case("dlopen", 0)
632 .Default(UINT_MAX);
633
634 if (ArgNum == UINT_MAX || CE->getNumArgs() < (ArgNum + 1))
635 return false;
636
637 return generateReportIfTainted(CE->getArg(ArgNum), MsgSanitizeSystemArgs, C);
638 }
639
640 // TODO: Should this check be a part of the CString checker?
641 // If yes, should taint be a global setting?
checkTaintedBufferSize(const CallExpr * CE,const FunctionDecl * FDecl,CheckerContext & C) const642 bool GenericTaintChecker::checkTaintedBufferSize(const CallExpr *CE,
643 const FunctionDecl *FDecl,
644 CheckerContext &C) const {
645 // If the function has a buffer size argument, set ArgNum.
646 unsigned ArgNum = InvalidArgIndex;
647 unsigned BId = 0;
648 if ((BId = FDecl->getMemoryFunctionKind()))
649 switch (BId) {
650 case Builtin::BImemcpy:
651 case Builtin::BImemmove:
652 case Builtin::BIstrncpy:
653 ArgNum = 2;
654 break;
655 case Builtin::BIstrndup:
656 ArgNum = 1;
657 break;
658 default:
659 break;
660 };
661
662 if (ArgNum == InvalidArgIndex) {
663 if (C.isCLibraryFunction(FDecl, "malloc") ||
664 C.isCLibraryFunction(FDecl, "calloc") ||
665 C.isCLibraryFunction(FDecl, "alloca"))
666 ArgNum = 0;
667 else if (C.isCLibraryFunction(FDecl, "memccpy"))
668 ArgNum = 3;
669 else if (C.isCLibraryFunction(FDecl, "realloc"))
670 ArgNum = 1;
671 else if (C.isCLibraryFunction(FDecl, "bcopy"))
672 ArgNum = 2;
673 }
674
675 return ArgNum != InvalidArgIndex && CE->getNumArgs() > ArgNum &&
676 generateReportIfTainted(CE->getArg(ArgNum), MsgTaintedBufferSize, C);
677 }
678
registerGenericTaintChecker(CheckerManager & mgr)679 void ento::registerGenericTaintChecker(CheckerManager &mgr) {
680 mgr.registerChecker<GenericTaintChecker>();
681 }
682
shouldRegisterGenericTaintChecker(const LangOptions & LO)683 bool ento::shouldRegisterGenericTaintChecker(const LangOptions &LO) {
684 return true;
685 }
686