1 //=== PointerArithChecker.cpp - Pointer arithmetic checker -----*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This files defines PointerArithChecker, a builtin checker that checks for
10 // pointer arithmetic on locations other than array elements.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/AST/DeclCXX.h"
15 #include "clang/AST/ExprCXX.h"
16 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
17 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
18 #include "clang/StaticAnalyzer/Core/Checker.h"
19 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21 #include "llvm/ADT/StringRef.h"
22 
23 using namespace clang;
24 using namespace ento;
25 
26 namespace {
27 enum class AllocKind {
28   SingleObject,
29   Array,
30   Unknown,
31   Reinterpreted // Single object interpreted as an array.
32 };
33 } // end namespace
34 
35 namespace llvm {
36 template <> struct FoldingSetTrait<AllocKind> {
37   static inline void Profile(AllocKind X, FoldingSetNodeID &ID) {
38     ID.AddInteger(static_cast<int>(X));
39   }
40 };
41 } // end namespace llvm
42 
43 namespace {
44 class PointerArithChecker
45     : public Checker<
46           check::PreStmt<BinaryOperator>, check::PreStmt<UnaryOperator>,
47           check::PreStmt<ArraySubscriptExpr>, check::PreStmt<CastExpr>,
48           check::PostStmt<CastExpr>, check::PostStmt<CXXNewExpr>,
49           check::PostStmt<CallExpr>, check::DeadSymbols> {
50   AllocKind getKindOfNewOp(const CXXNewExpr *NE, const FunctionDecl *FD) const;
51   const MemRegion *getArrayRegion(const MemRegion *Region, bool &Polymorphic,
52                                   AllocKind &AKind, CheckerContext &C) const;
53   const MemRegion *getPointedRegion(const MemRegion *Region,
54                                     CheckerContext &C) const;
55   void reportPointerArithMisuse(const Expr *E, CheckerContext &C,
56                                 bool PointedNeeded = false) const;
57   void initAllocIdentifiers(ASTContext &C) const;
58 
59   const BugType BT_pointerArith{this, "Dangerous pointer arithmetic"};
60   const BugType BT_polyArray{this, "Dangerous pointer arithmetic"};
61   mutable llvm::SmallSet<IdentifierInfo *, 8> AllocFunctions;
62 
63 public:
64   void checkPreStmt(const UnaryOperator *UOp, CheckerContext &C) const;
65   void checkPreStmt(const BinaryOperator *BOp, CheckerContext &C) const;
66   void checkPreStmt(const ArraySubscriptExpr *SubExpr, CheckerContext &C) const;
67   void checkPreStmt(const CastExpr *CE, CheckerContext &C) const;
68   void checkPostStmt(const CastExpr *CE, CheckerContext &C) const;
69   void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const;
70   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
71   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
72 };
73 } // end namespace
74 
75 REGISTER_MAP_WITH_PROGRAMSTATE(RegionState, const MemRegion *, AllocKind)
76 
77 void PointerArithChecker::checkDeadSymbols(SymbolReaper &SR,
78                                            CheckerContext &C) const {
79   // TODO: intentional leak. Some information is garbage collected too early,
80   // see http://reviews.llvm.org/D14203 for further information.
81   /*ProgramStateRef State = C.getState();
82   RegionStateTy RegionStates = State->get<RegionState>();
83   for (const MemRegion *Reg: llvm::make_first_range(RegionStates)) {
84     if (!SR.isLiveRegion(Reg))
85       State = State->remove<RegionState>(Reg);
86   }
87   C.addTransition(State);*/
88 }
89 
90 AllocKind PointerArithChecker::getKindOfNewOp(const CXXNewExpr *NE,
91                                               const FunctionDecl *FD) const {
92   // This checker try not to assume anything about placement and overloaded
93   // new to avoid false positives.
94   if (isa<CXXMethodDecl>(FD))
95     return AllocKind::Unknown;
96   if (FD->getNumParams() != 1 || FD->isVariadic())
97     return AllocKind::Unknown;
98   if (NE->isArray())
99     return AllocKind::Array;
100 
101   return AllocKind::SingleObject;
102 }
103 
104 const MemRegion *
105 PointerArithChecker::getPointedRegion(const MemRegion *Region,
106                                       CheckerContext &C) const {
107   assert(Region);
108   ProgramStateRef State = C.getState();
109   SVal S = State->getSVal(Region);
110   return S.getAsRegion();
111 }
112 
113 /// Checks whether a region is the part of an array.
114 /// In case there is a derived to base cast above the array element, the
115 /// Polymorphic output value is set to true. AKind output value is set to the
116 /// allocation kind of the inspected region.
117 const MemRegion *PointerArithChecker::getArrayRegion(const MemRegion *Region,
118                                                      bool &Polymorphic,
119                                                      AllocKind &AKind,
120                                                      CheckerContext &C) const {
121   assert(Region);
122   while (const auto *BaseRegion = dyn_cast<CXXBaseObjectRegion>(Region)) {
123     Region = BaseRegion->getSuperRegion();
124     Polymorphic = true;
125   }
126   if (const auto *ElemRegion = dyn_cast<ElementRegion>(Region)) {
127     Region = ElemRegion->getSuperRegion();
128   }
129 
130   ProgramStateRef State = C.getState();
131   if (const AllocKind *Kind = State->get<RegionState>(Region)) {
132     AKind = *Kind;
133     if (*Kind == AllocKind::Array)
134       return Region;
135     else
136       return nullptr;
137   }
138   // When the region is symbolic and we do not have any information about it,
139   // assume that this is an array to avoid false positives.
140   if (isa<SymbolicRegion>(Region))
141     return Region;
142 
143   // No AllocKind stored and not symbolic, assume that it points to a single
144   // object.
145   return nullptr;
146 }
147 
148 void PointerArithChecker::reportPointerArithMisuse(const Expr *E,
149                                                    CheckerContext &C,
150                                                    bool PointedNeeded) const {
151   SourceRange SR = E->getSourceRange();
152   if (SR.isInvalid())
153     return;
154 
155   ProgramStateRef State = C.getState();
156   const MemRegion *Region = C.getSVal(E).getAsRegion();
157   if (!Region)
158     return;
159   if (PointedNeeded)
160     Region = getPointedRegion(Region, C);
161   if (!Region)
162     return;
163 
164   bool IsPolymorphic = false;
165   AllocKind Kind = AllocKind::Unknown;
166   if (const MemRegion *ArrayRegion =
167           getArrayRegion(Region, IsPolymorphic, Kind, C)) {
168     if (!IsPolymorphic)
169       return;
170     if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
171       constexpr llvm::StringLiteral Msg =
172           "Pointer arithmetic on a pointer to base class is dangerous "
173           "because derived and base class may have different size.";
174       auto R = std::make_unique<PathSensitiveBugReport>(BT_polyArray, Msg, N);
175       R->addRange(E->getSourceRange());
176       R->markInteresting(ArrayRegion);
177       C.emitReport(std::move(R));
178     }
179     return;
180   }
181 
182   if (Kind == AllocKind::Reinterpreted)
183     return;
184 
185   // We might not have enough information about symbolic regions.
186   if (Kind != AllocKind::SingleObject &&
187       Region->getKind() == MemRegion::Kind::SymbolicRegionKind)
188     return;
189 
190   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
191     constexpr llvm::StringLiteral Msg =
192         "Pointer arithmetic on non-array variables relies on memory layout, "
193         "which is dangerous.";
194     auto R = std::make_unique<PathSensitiveBugReport>(BT_pointerArith, Msg, N);
195     R->addRange(SR);
196     R->markInteresting(Region);
197     C.emitReport(std::move(R));
198   }
199 }
200 
201 void PointerArithChecker::initAllocIdentifiers(ASTContext &C) const {
202   if (!AllocFunctions.empty())
203     return;
204   AllocFunctions.insert(&C.Idents.get("alloca"));
205   AllocFunctions.insert(&C.Idents.get("malloc"));
206   AllocFunctions.insert(&C.Idents.get("realloc"));
207   AllocFunctions.insert(&C.Idents.get("calloc"));
208   AllocFunctions.insert(&C.Idents.get("valloc"));
209 }
210 
211 void PointerArithChecker::checkPostStmt(const CallExpr *CE,
212                                         CheckerContext &C) const {
213   ProgramStateRef State = C.getState();
214   const FunctionDecl *FD = C.getCalleeDecl(CE);
215   if (!FD)
216     return;
217   IdentifierInfo *FunI = FD->getIdentifier();
218   initAllocIdentifiers(C.getASTContext());
219   if (AllocFunctions.count(FunI) == 0)
220     return;
221 
222   SVal SV = C.getSVal(CE);
223   const MemRegion *Region = SV.getAsRegion();
224   if (!Region)
225     return;
226   // Assume that C allocation functions allocate arrays to avoid false
227   // positives.
228   // TODO: Add heuristics to distinguish alloc calls that allocates single
229   // objecs.
230   State = State->set<RegionState>(Region, AllocKind::Array);
231   C.addTransition(State);
232 }
233 
234 void PointerArithChecker::checkPostStmt(const CXXNewExpr *NE,
235                                         CheckerContext &C) const {
236   const FunctionDecl *FD = NE->getOperatorNew();
237   if (!FD)
238     return;
239 
240   AllocKind Kind = getKindOfNewOp(NE, FD);
241 
242   ProgramStateRef State = C.getState();
243   SVal AllocedVal = C.getSVal(NE);
244   const MemRegion *Region = AllocedVal.getAsRegion();
245   if (!Region)
246     return;
247   State = State->set<RegionState>(Region, Kind);
248   C.addTransition(State);
249 }
250 
251 void PointerArithChecker::checkPostStmt(const CastExpr *CE,
252                                         CheckerContext &C) const {
253   if (CE->getCastKind() != CastKind::CK_BitCast)
254     return;
255 
256   const Expr *CastedExpr = CE->getSubExpr();
257   ProgramStateRef State = C.getState();
258   SVal CastedVal = C.getSVal(CastedExpr);
259 
260   const MemRegion *Region = CastedVal.getAsRegion();
261   if (!Region)
262     return;
263 
264   // Suppress reinterpret casted hits.
265   State = State->set<RegionState>(Region, AllocKind::Reinterpreted);
266   C.addTransition(State);
267 }
268 
269 void PointerArithChecker::checkPreStmt(const CastExpr *CE,
270                                        CheckerContext &C) const {
271   if (CE->getCastKind() != CastKind::CK_ArrayToPointerDecay)
272     return;
273 
274   const Expr *CastedExpr = CE->getSubExpr();
275   ProgramStateRef State = C.getState();
276   SVal CastedVal = C.getSVal(CastedExpr);
277 
278   const MemRegion *Region = CastedVal.getAsRegion();
279   if (!Region)
280     return;
281 
282   if (const AllocKind *Kind = State->get<RegionState>(Region)) {
283     if (*Kind == AllocKind::Array || *Kind == AllocKind::Reinterpreted)
284       return;
285   }
286   State = State->set<RegionState>(Region, AllocKind::Array);
287   C.addTransition(State);
288 }
289 
290 void PointerArithChecker::checkPreStmt(const UnaryOperator *UOp,
291                                        CheckerContext &C) const {
292   if (!UOp->isIncrementDecrementOp() || !UOp->getType()->isPointerType())
293     return;
294   reportPointerArithMisuse(UOp->getSubExpr(), C, true);
295 }
296 
297 void PointerArithChecker::checkPreStmt(const ArraySubscriptExpr *SubsExpr,
298                                        CheckerContext &C) const {
299   SVal Idx = C.getSVal(SubsExpr->getIdx());
300 
301   // Indexing with 0 is OK.
302   if (Idx.isZeroConstant())
303     return;
304 
305   // Indexing vector-type expressions is also OK.
306   if (SubsExpr->getBase()->getType()->isVectorType())
307     return;
308   reportPointerArithMisuse(SubsExpr->getBase(), C);
309 }
310 
311 void PointerArithChecker::checkPreStmt(const BinaryOperator *BOp,
312                                        CheckerContext &C) const {
313   BinaryOperatorKind OpKind = BOp->getOpcode();
314   if (!BOp->isAdditiveOp() && OpKind != BO_AddAssign && OpKind != BO_SubAssign)
315     return;
316 
317   const Expr *Lhs = BOp->getLHS();
318   const Expr *Rhs = BOp->getRHS();
319   ProgramStateRef State = C.getState();
320 
321   if (Rhs->getType()->isIntegerType() && Lhs->getType()->isPointerType()) {
322     SVal RHSVal = C.getSVal(Rhs);
323     if (State->isNull(RHSVal).isConstrainedTrue())
324       return;
325     reportPointerArithMisuse(Lhs, C, !BOp->isAdditiveOp());
326   }
327   // The int += ptr; case is not valid C++.
328   if (Lhs->getType()->isIntegerType() && Rhs->getType()->isPointerType()) {
329     SVal LHSVal = C.getSVal(Lhs);
330     if (State->isNull(LHSVal).isConstrainedTrue())
331       return;
332     reportPointerArithMisuse(Rhs, C);
333   }
334 }
335 
336 void ento::registerPointerArithChecker(CheckerManager &mgr) {
337   mgr.registerChecker<PointerArithChecker>();
338 }
339 
340 bool ento::shouldRegisterPointerArithChecker(const CheckerManager &mgr) {
341   return true;
342 }
343