1 //=== PointerArithChecker.cpp - Pointer arithmetic checker -----*- C++ -*--===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This files defines PointerArithChecker, a builtin checker that checks for
10 // pointer arithmetic on locations other than array elements.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/StaticAnalyzer/Checkers/BuiltinCheckerRegistration.h"
15 #include "clang/AST/DeclCXX.h"
16 #include "clang/AST/ExprCXX.h"
17 #include "clang/StaticAnalyzer/Core/BugReporter/BugType.h"
18 #include "clang/StaticAnalyzer/Core/Checker.h"
19 #include "clang/StaticAnalyzer/Core/CheckerManager.h"
20 #include "clang/StaticAnalyzer/Core/PathSensitive/CheckerContext.h"
21 
22 using namespace clang;
23 using namespace ento;
24 
25 namespace {
26 enum class AllocKind {
27   SingleObject,
28   Array,
29   Unknown,
30   Reinterpreted // Single object interpreted as an array.
31 };
32 } // end namespace
33 
34 namespace llvm {
35 template <> struct FoldingSetTrait<AllocKind> {
36   static inline void Profile(AllocKind X, FoldingSetNodeID &ID) {
37     ID.AddInteger(static_cast<int>(X));
38   }
39 };
40 } // end namespace llvm
41 
42 namespace {
43 class PointerArithChecker
44     : public Checker<
45           check::PreStmt<BinaryOperator>, check::PreStmt<UnaryOperator>,
46           check::PreStmt<ArraySubscriptExpr>, check::PreStmt<CastExpr>,
47           check::PostStmt<CastExpr>, check::PostStmt<CXXNewExpr>,
48           check::PostStmt<CallExpr>, check::DeadSymbols> {
49   AllocKind getKindOfNewOp(const CXXNewExpr *NE, const FunctionDecl *FD) const;
50   const MemRegion *getArrayRegion(const MemRegion *Region, bool &Polymorphic,
51                                   AllocKind &AKind, CheckerContext &C) const;
52   const MemRegion *getPointedRegion(const MemRegion *Region,
53                                     CheckerContext &C) const;
54   void reportPointerArithMisuse(const Expr *E, CheckerContext &C,
55                                 bool PointedNeeded = false) const;
56   void initAllocIdentifiers(ASTContext &C) const;
57 
58   mutable std::unique_ptr<BuiltinBug> BT_pointerArith;
59   mutable std::unique_ptr<BuiltinBug> BT_polyArray;
60   mutable llvm::SmallSet<IdentifierInfo *, 8> AllocFunctions;
61 
62 public:
63   void checkPreStmt(const UnaryOperator *UOp, CheckerContext &C) const;
64   void checkPreStmt(const BinaryOperator *BOp, CheckerContext &C) const;
65   void checkPreStmt(const ArraySubscriptExpr *SubExpr, CheckerContext &C) const;
66   void checkPreStmt(const CastExpr *CE, CheckerContext &C) const;
67   void checkPostStmt(const CastExpr *CE, CheckerContext &C) const;
68   void checkPostStmt(const CXXNewExpr *NE, CheckerContext &C) const;
69   void checkPostStmt(const CallExpr *CE, CheckerContext &C) const;
70   void checkDeadSymbols(SymbolReaper &SR, CheckerContext &C) const;
71 };
72 } // end namespace
73 
74 REGISTER_MAP_WITH_PROGRAMSTATE(RegionState, const MemRegion *, AllocKind)
75 
76 void PointerArithChecker::checkDeadSymbols(SymbolReaper &SR,
77                                            CheckerContext &C) const {
78   // TODO: intentional leak. Some information is garbage collected too early,
79   // see http://reviews.llvm.org/D14203 for further information.
80   /*ProgramStateRef State = C.getState();
81   RegionStateTy RegionStates = State->get<RegionState>();
82   for (const MemRegion *Reg: llvm::make_first_range(RegionStates)) {
83     if (!SR.isLiveRegion(Reg))
84       State = State->remove<RegionState>(Reg);
85   }
86   C.addTransition(State);*/
87 }
88 
89 AllocKind PointerArithChecker::getKindOfNewOp(const CXXNewExpr *NE,
90                                               const FunctionDecl *FD) const {
91   // This checker try not to assume anything about placement and overloaded
92   // new to avoid false positives.
93   if (isa<CXXMethodDecl>(FD))
94     return AllocKind::Unknown;
95   if (FD->getNumParams() != 1 || FD->isVariadic())
96     return AllocKind::Unknown;
97   if (NE->isArray())
98     return AllocKind::Array;
99 
100   return AllocKind::SingleObject;
101 }
102 
103 const MemRegion *
104 PointerArithChecker::getPointedRegion(const MemRegion *Region,
105                                       CheckerContext &C) const {
106   assert(Region);
107   ProgramStateRef State = C.getState();
108   SVal S = State->getSVal(Region);
109   return S.getAsRegion();
110 }
111 
112 /// Checks whether a region is the part of an array.
113 /// In case there is a derived to base cast above the array element, the
114 /// Polymorphic output value is set to true. AKind output value is set to the
115 /// allocation kind of the inspected region.
116 const MemRegion *PointerArithChecker::getArrayRegion(const MemRegion *Region,
117                                                      bool &Polymorphic,
118                                                      AllocKind &AKind,
119                                                      CheckerContext &C) const {
120   assert(Region);
121   while (const auto *BaseRegion = dyn_cast<CXXBaseObjectRegion>(Region)) {
122     Region = BaseRegion->getSuperRegion();
123     Polymorphic = true;
124   }
125   if (const auto *ElemRegion = dyn_cast<ElementRegion>(Region)) {
126     Region = ElemRegion->getSuperRegion();
127   }
128 
129   ProgramStateRef State = C.getState();
130   if (const AllocKind *Kind = State->get<RegionState>(Region)) {
131     AKind = *Kind;
132     if (*Kind == AllocKind::Array)
133       return Region;
134     else
135       return nullptr;
136   }
137   // When the region is symbolic and we do not have any information about it,
138   // assume that this is an array to avoid false positives.
139   if (isa<SymbolicRegion>(Region))
140     return Region;
141 
142   // No AllocKind stored and not symbolic, assume that it points to a single
143   // object.
144   return nullptr;
145 }
146 
147 void PointerArithChecker::reportPointerArithMisuse(const Expr *E,
148                                                    CheckerContext &C,
149                                                    bool PointedNeeded) const {
150   SourceRange SR = E->getSourceRange();
151   if (SR.isInvalid())
152     return;
153 
154   ProgramStateRef State = C.getState();
155   const MemRegion *Region = C.getSVal(E).getAsRegion();
156   if (!Region)
157     return;
158   if (PointedNeeded)
159     Region = getPointedRegion(Region, C);
160   if (!Region)
161     return;
162 
163   bool IsPolymorphic = false;
164   AllocKind Kind = AllocKind::Unknown;
165   if (const MemRegion *ArrayRegion =
166           getArrayRegion(Region, IsPolymorphic, Kind, C)) {
167     if (!IsPolymorphic)
168       return;
169     if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
170       if (!BT_polyArray)
171         BT_polyArray.reset(new BuiltinBug(
172             this, "Dangerous pointer arithmetic",
173             "Pointer arithmetic on a pointer to base class is dangerous "
174             "because derived and base class may have different size."));
175       auto R = std::make_unique<PathSensitiveBugReport>(
176           *BT_polyArray, BT_polyArray->getDescription(), N);
177       R->addRange(E->getSourceRange());
178       R->markInteresting(ArrayRegion);
179       C.emitReport(std::move(R));
180     }
181     return;
182   }
183 
184   if (Kind == AllocKind::Reinterpreted)
185     return;
186 
187   // We might not have enough information about symbolic regions.
188   if (Kind != AllocKind::SingleObject &&
189       Region->getKind() == MemRegion::Kind::SymbolicRegionKind)
190     return;
191 
192   if (ExplodedNode *N = C.generateNonFatalErrorNode()) {
193     if (!BT_pointerArith)
194       BT_pointerArith.reset(new BuiltinBug(this, "Dangerous pointer arithmetic",
195                                            "Pointer arithmetic on non-array "
196                                            "variables relies on memory layout, "
197                                            "which is dangerous."));
198     auto R = std::make_unique<PathSensitiveBugReport>(
199         *BT_pointerArith, BT_pointerArith->getDescription(), N);
200     R->addRange(SR);
201     R->markInteresting(Region);
202     C.emitReport(std::move(R));
203   }
204 }
205 
206 void PointerArithChecker::initAllocIdentifiers(ASTContext &C) const {
207   if (!AllocFunctions.empty())
208     return;
209   AllocFunctions.insert(&C.Idents.get("alloca"));
210   AllocFunctions.insert(&C.Idents.get("malloc"));
211   AllocFunctions.insert(&C.Idents.get("realloc"));
212   AllocFunctions.insert(&C.Idents.get("calloc"));
213   AllocFunctions.insert(&C.Idents.get("valloc"));
214 }
215 
216 void PointerArithChecker::checkPostStmt(const CallExpr *CE,
217                                         CheckerContext &C) const {
218   ProgramStateRef State = C.getState();
219   const FunctionDecl *FD = C.getCalleeDecl(CE);
220   if (!FD)
221     return;
222   IdentifierInfo *FunI = FD->getIdentifier();
223   initAllocIdentifiers(C.getASTContext());
224   if (AllocFunctions.count(FunI) == 0)
225     return;
226 
227   SVal SV = C.getSVal(CE);
228   const MemRegion *Region = SV.getAsRegion();
229   if (!Region)
230     return;
231   // Assume that C allocation functions allocate arrays to avoid false
232   // positives.
233   // TODO: Add heuristics to distinguish alloc calls that allocates single
234   // objecs.
235   State = State->set<RegionState>(Region, AllocKind::Array);
236   C.addTransition(State);
237 }
238 
239 void PointerArithChecker::checkPostStmt(const CXXNewExpr *NE,
240                                         CheckerContext &C) const {
241   const FunctionDecl *FD = NE->getOperatorNew();
242   if (!FD)
243     return;
244 
245   AllocKind Kind = getKindOfNewOp(NE, FD);
246 
247   ProgramStateRef State = C.getState();
248   SVal AllocedVal = C.getSVal(NE);
249   const MemRegion *Region = AllocedVal.getAsRegion();
250   if (!Region)
251     return;
252   State = State->set<RegionState>(Region, Kind);
253   C.addTransition(State);
254 }
255 
256 void PointerArithChecker::checkPostStmt(const CastExpr *CE,
257                                         CheckerContext &C) const {
258   if (CE->getCastKind() != CastKind::CK_BitCast)
259     return;
260 
261   const Expr *CastedExpr = CE->getSubExpr();
262   ProgramStateRef State = C.getState();
263   SVal CastedVal = C.getSVal(CastedExpr);
264 
265   const MemRegion *Region = CastedVal.getAsRegion();
266   if (!Region)
267     return;
268 
269   // Suppress reinterpret casted hits.
270   State = State->set<RegionState>(Region, AllocKind::Reinterpreted);
271   C.addTransition(State);
272 }
273 
274 void PointerArithChecker::checkPreStmt(const CastExpr *CE,
275                                        CheckerContext &C) const {
276   if (CE->getCastKind() != CastKind::CK_ArrayToPointerDecay)
277     return;
278 
279   const Expr *CastedExpr = CE->getSubExpr();
280   ProgramStateRef State = C.getState();
281   SVal CastedVal = C.getSVal(CastedExpr);
282 
283   const MemRegion *Region = CastedVal.getAsRegion();
284   if (!Region)
285     return;
286 
287   if (const AllocKind *Kind = State->get<RegionState>(Region)) {
288     if (*Kind == AllocKind::Array || *Kind == AllocKind::Reinterpreted)
289       return;
290   }
291   State = State->set<RegionState>(Region, AllocKind::Array);
292   C.addTransition(State);
293 }
294 
295 void PointerArithChecker::checkPreStmt(const UnaryOperator *UOp,
296                                        CheckerContext &C) const {
297   if (!UOp->isIncrementDecrementOp() || !UOp->getType()->isPointerType())
298     return;
299   reportPointerArithMisuse(UOp->getSubExpr(), C, true);
300 }
301 
302 void PointerArithChecker::checkPreStmt(const ArraySubscriptExpr *SubsExpr,
303                                        CheckerContext &C) const {
304   SVal Idx = C.getSVal(SubsExpr->getIdx());
305 
306   // Indexing with 0 is OK.
307   if (Idx.isZeroConstant())
308     return;
309 
310   // Indexing vector-type expressions is also OK.
311   if (SubsExpr->getBase()->getType()->isVectorType())
312     return;
313   reportPointerArithMisuse(SubsExpr->getBase(), C);
314 }
315 
316 void PointerArithChecker::checkPreStmt(const BinaryOperator *BOp,
317                                        CheckerContext &C) const {
318   BinaryOperatorKind OpKind = BOp->getOpcode();
319   if (!BOp->isAdditiveOp() && OpKind != BO_AddAssign && OpKind != BO_SubAssign)
320     return;
321 
322   const Expr *Lhs = BOp->getLHS();
323   const Expr *Rhs = BOp->getRHS();
324   ProgramStateRef State = C.getState();
325 
326   if (Rhs->getType()->isIntegerType() && Lhs->getType()->isPointerType()) {
327     SVal RHSVal = C.getSVal(Rhs);
328     if (State->isNull(RHSVal).isConstrainedTrue())
329       return;
330     reportPointerArithMisuse(Lhs, C, !BOp->isAdditiveOp());
331   }
332   // The int += ptr; case is not valid C++.
333   if (Lhs->getType()->isIntegerType() && Rhs->getType()->isPointerType()) {
334     SVal LHSVal = C.getSVal(Lhs);
335     if (State->isNull(LHSVal).isConstrainedTrue())
336       return;
337     reportPointerArithMisuse(Rhs, C);
338   }
339 }
340 
341 void ento::registerPointerArithChecker(CheckerManager &mgr) {
342   mgr.registerChecker<PointerArithChecker>();
343 }
344 
345 bool ento::shouldRegisterPointerArithChecker(const CheckerManager &mgr) {
346   return true;
347 }
348