1 //== RangedConstraintManager.h ----------------------------------*- C++ -*--==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  Ranged constraint manager, built on SimpleConstraintManager.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_RANGEDCONSTRAINTMANAGER_H
14 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_RANGEDCONSTRAINTMANAGER_H
15 
16 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
17 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
18 #include "clang/StaticAnalyzer/Core/PathSensitive/SimpleConstraintManager.h"
19 #include "llvm/ADT/APSInt.h"
20 #include "llvm/Support/Allocator.h"
21 
22 namespace clang {
23 
24 namespace ento {
25 
26 /// A Range represents the closed range [from, to].  The caller must
27 /// guarantee that from <= to.  Note that Range is immutable, so as not
28 /// to subvert RangeSet's immutability.
29 class Range {
30 public:
Range(const llvm::APSInt & From,const llvm::APSInt & To)31   Range(const llvm::APSInt &From, const llvm::APSInt &To) : Impl(&From, &To) {
32     assert(From <= To);
33   }
34 
Range(const llvm::APSInt & Point)35   Range(const llvm::APSInt &Point) : Range(Point, Point) {}
36 
Includes(const llvm::APSInt & Point)37   bool Includes(const llvm::APSInt &Point) const {
38     return From() <= Point && Point <= To();
39   }
From()40   const llvm::APSInt &From() const { return *Impl.first; }
To()41   const llvm::APSInt &To() const { return *Impl.second; }
getConcreteValue()42   const llvm::APSInt *getConcreteValue() const {
43     return &From() == &To() ? &From() : nullptr;
44   }
45 
Profile(llvm::FoldingSetNodeID & ID)46   void Profile(llvm::FoldingSetNodeID &ID) const {
47     ID.AddPointer(&From());
48     ID.AddPointer(&To());
49   }
50   void dump(raw_ostream &OS) const;
51   void dump() const;
52 
53   // In order to keep non-overlapping ranges sorted, we can compare only From
54   // points.
55   bool operator<(const Range &RHS) const { return From() < RHS.From(); }
56 
57   bool operator==(const Range &RHS) const { return Impl == RHS.Impl; }
58   bool operator!=(const Range &RHS) const { return !operator==(RHS); }
59 
60 private:
61   std::pair<const llvm::APSInt *, const llvm::APSInt *> Impl;
62 };
63 
64 /// @class RangeSet is a persistent set of non-overlapping ranges.
65 ///
66 /// New RangeSet objects can be ONLY produced by RangeSet::Factory object, which
67 /// also supports the most common operations performed on range sets.
68 ///
69 /// Empty set corresponds to an overly constrained symbol meaning that there
70 /// are no possible values for that symbol.
71 class RangeSet {
72 public:
73   class Factory;
74 
75 private:
76   // We use llvm::SmallVector as the underlying container for the following
77   // reasons:
78   //
79   //   * Range sets are usually very simple, 1 or 2 ranges.
80   //     That's why llvm::ImmutableSet is not perfect.
81   //
82   //   * Ranges in sets are NOT overlapping, so it is natural to keep them
83   //     sorted for efficient operations and queries.  For this reason,
84   //     llvm::SmallSet doesn't fit the requirements, it is not sorted when it
85   //     is a vector.
86   //
87   //   * Range set operations usually a bit harder than add/remove a range.
88   //     Complex operations might do many of those for just one range set.
89   //     Formerly it used to be llvm::ImmutableSet, which is inefficient for our
90   //     purposes as we want to make these operations BOTH immutable AND
91   //     efficient.
92   //
93   //   * Iteration over ranges is widespread and a more cache-friendly
94   //     structure is preferred.
95   using ImplType = llvm::SmallVector<Range, 4>;
96 
97   struct ContainerType : public ImplType, public llvm::FoldingSetNode {
ProfileContainerType98     void Profile(llvm::FoldingSetNodeID &ID) const {
99       for (const Range &It : *this) {
100         It.Profile(ID);
101       }
102     }
103   };
104   // This is a non-owning pointer to an actual container.
105   // The memory is fully managed by the factory and is alive as long as the
106   // factory itself is alive.
107   // It is a pointer as opposed to a reference, so we can easily reassign
108   // RangeSet objects.
109   using UnderlyingType = const ContainerType *;
110   UnderlyingType Impl;
111 
112 public:
113   using const_iterator = ImplType::const_iterator;
114 
begin()115   const_iterator begin() const { return Impl->begin(); }
end()116   const_iterator end() const { return Impl->end(); }
size()117   size_t size() const { return Impl->size(); }
118 
isEmpty()119   bool isEmpty() const { return Impl->empty(); }
120 
121   class Factory {
122   public:
Factory(BasicValueFactory & BV)123     Factory(BasicValueFactory &BV) : ValueFactory(BV) {}
124 
125     /// Create a new set with all ranges from both LHS and RHS.
126     /// Possible intersections are not checked here.
127     ///
128     /// Complexity: O(N + M)
129     ///             where N = size(LHS), M = size(RHS)
130     RangeSet add(RangeSet LHS, RangeSet RHS);
131     /// Create a new set with all ranges from the original set plus the new one.
132     /// Possible intersections are not checked here.
133     ///
134     /// Complexity: O(N)
135     ///             where N = size(Original)
136     RangeSet add(RangeSet Original, Range Element);
137     /// Create a new set with all ranges from the original set plus the point.
138     /// Possible intersections are not checked here.
139     ///
140     /// Complexity: O(N)
141     ///             where N = size(Original)
142     RangeSet add(RangeSet Original, const llvm::APSInt &Point);
143     /// Create a new set which is a union of two given ranges.
144     /// Possible intersections are not checked here.
145     ///
146     /// Complexity: O(N + M)
147     ///             where N = size(LHS), M = size(RHS)
148     RangeSet unite(RangeSet LHS, RangeSet RHS);
149     /// Create a new set by uniting given range set with the given range.
150     /// All intersections and adjacent ranges are handled here.
151     ///
152     /// Complexity: O(N)
153     ///             where N = size(Original)
154     RangeSet unite(RangeSet Original, Range Element);
155     /// Create a new set by uniting given range set with the given point.
156     /// All intersections and adjacent ranges are handled here.
157     ///
158     /// Complexity: O(N)
159     ///             where N = size(Original)
160     RangeSet unite(RangeSet Original, llvm::APSInt Point);
161     /// Create a new set by uniting given range set with the given range
162     /// between points. All intersections and adjacent ranges are handled here.
163     ///
164     /// Complexity: O(N)
165     ///             where N = size(Original)
166     RangeSet unite(RangeSet Original, llvm::APSInt From, llvm::APSInt To);
167 
getEmptySet()168     RangeSet getEmptySet() { return &EmptySet; }
169 
170     /// Create a new set with just one range.
171     /// @{
172     RangeSet getRangeSet(Range Origin);
getRangeSet(const llvm::APSInt & From,const llvm::APSInt & To)173     RangeSet getRangeSet(const llvm::APSInt &From, const llvm::APSInt &To) {
174       return getRangeSet(Range(From, To));
175     }
getRangeSet(const llvm::APSInt & Origin)176     RangeSet getRangeSet(const llvm::APSInt &Origin) {
177       return getRangeSet(Origin, Origin);
178     }
179     /// @}
180 
181     /// Intersect the given range sets.
182     ///
183     /// Complexity: O(N + M)
184     ///             where N = size(LHS), M = size(RHS)
185     RangeSet intersect(RangeSet LHS, RangeSet RHS);
186     /// Intersect the given set with the closed range [Lower, Upper].
187     ///
188     /// Unlike the Range type, this range uses modular arithmetic, corresponding
189     /// to the common treatment of C integer overflow. Thus, if the Lower bound
190     /// is greater than the Upper bound, the range is taken to wrap around. This
191     /// is equivalent to taking the intersection with the two ranges [Min,
192     /// Upper] and [Lower, Max], or, alternatively, /removing/ all integers
193     /// between Upper and Lower.
194     ///
195     /// Complexity: O(N)
196     ///             where N = size(What)
197     RangeSet intersect(RangeSet What, llvm::APSInt Lower, llvm::APSInt Upper);
198     /// Intersect the given range with the given point.
199     ///
200     /// The result can be either an empty set or a set containing the given
201     /// point depending on whether the point is in the range set.
202     ///
203     /// Complexity: O(logN)
204     ///             where N = size(What)
205     RangeSet intersect(RangeSet What, llvm::APSInt Point);
206 
207     /// Delete the given point from the range set.
208     ///
209     /// Complexity: O(N)
210     ///             where N = size(From)
211     RangeSet deletePoint(RangeSet From, const llvm::APSInt &Point);
212     /// Negate the given range set.
213     ///
214     /// Turn all [A, B] ranges to [-B, -A], when "-" is a C-like unary minus
215     /// operation under the values of the type.
216     ///
217     /// We also handle MIN because applying unary minus to MIN does not change
218     /// it.
219     /// Example 1:
220     /// char x = -128;        // -128 is a MIN value in a range of 'char'
221     /// char y = -x;          // y: -128
222     ///
223     /// Example 2:
224     /// unsigned char x = 0;  // 0 is a MIN value in a range of 'unsigned char'
225     /// unsigned char y = -x; // y: 0
226     ///
227     /// And it makes us to separate the range
228     /// like [MIN, N] to [MIN, MIN] U [-N, MAX].
229     /// For instance, whole range is {-128..127} and subrange is [-128,-126],
230     /// thus [-128,-127,-126,...] negates to [-128,...,126,127].
231     ///
232     /// Negate restores disrupted ranges on bounds,
233     /// e.g. [MIN, B] => [MIN, MIN] U [-B, MAX] => [MIN, B].
234     ///
235     /// Negate is a self-inverse function, i.e. negate(negate(R)) == R.
236     ///
237     /// Complexity: O(N)
238     ///             where N = size(What)
239     RangeSet negate(RangeSet What);
240     /// Performs promotions, truncations and conversions of the given set.
241     ///
242     /// This function is optimized for each of the six cast cases:
243     /// - noop
244     /// - conversion
245     /// - truncation
246     /// - truncation-conversion
247     /// - promotion
248     /// - promotion-conversion
249     ///
250     /// NOTE: This function is NOT self-inverse for truncations, because of
251     ///       the higher bits loss:
252     ///     - castTo(castTo(OrigRangeOfInt, char), int) != OrigRangeOfInt.
253     ///     - castTo(castTo(OrigRangeOfChar, int), char) == OrigRangeOfChar.
254     ///       But it is self-inverse for all the rest casts.
255     ///
256     /// Complexity:
257     ///     - Noop                               O(1);
258     ///     - Truncation                         O(N^2);
259     ///     - Another case                       O(N);
260     ///     where N = size(What)
261     RangeSet castTo(RangeSet What, APSIntType Ty);
262     RangeSet castTo(RangeSet What, QualType T);
263 
264     /// Return associated value factory.
getValueFactory()265     BasicValueFactory &getValueFactory() const { return ValueFactory; }
266 
267   private:
268     /// Return a persistent version of the given container.
269     RangeSet makePersistent(ContainerType &&From);
270     /// Construct a new persistent version of the given container.
271     ContainerType *construct(ContainerType &&From);
272 
273     RangeSet intersect(const ContainerType &LHS, const ContainerType &RHS);
274     /// NOTE: This function relies on the fact that all values in the
275     /// containers are persistent (created via BasicValueFactory::getValue).
276     ContainerType unite(const ContainerType &LHS, const ContainerType &RHS);
277 
278     /// This is a helper function for `castTo` method. Implies not to be used
279     /// separately.
280     /// Performs a truncation case of a cast operation.
281     ContainerType truncateTo(RangeSet What, APSIntType Ty);
282 
283     /// This is a helper function for `castTo` method. Implies not to be used
284     /// separately.
285     /// Performs a conversion case and a promotion-conversion case for signeds
286     /// of a cast operation.
287     ContainerType convertTo(RangeSet What, APSIntType Ty);
288 
289     /// This is a helper function for `castTo` method. Implies not to be used
290     /// separately.
291     /// Performs a promotion for unsigneds only.
292     ContainerType promoteTo(RangeSet What, APSIntType Ty);
293 
294     // Many operations include producing new APSInt values and that's why
295     // we need this factory.
296     BasicValueFactory &ValueFactory;
297     // Allocator for all the created containers.
298     // Containers might own their own memory and that's why it is specific
299     // for the type, so it calls container destructors upon deletion.
300     llvm::SpecificBumpPtrAllocator<ContainerType> Arena;
301     // Usually we deal with the same ranges and range sets over and over.
302     // Here we track all created containers and try not to repeat ourselves.
303     llvm::FoldingSet<ContainerType> Cache;
304     static ContainerType EmptySet;
305   };
306 
307   RangeSet(const RangeSet &) = default;
308   RangeSet &operator=(const RangeSet &) = default;
309   RangeSet(RangeSet &&) = default;
310   RangeSet &operator=(RangeSet &&) = default;
311   ~RangeSet() = default;
312 
313   /// Construct a new RangeSet representing '{ [From, To] }'.
RangeSet(Factory & F,const llvm::APSInt & From,const llvm::APSInt & To)314   RangeSet(Factory &F, const llvm::APSInt &From, const llvm::APSInt &To)
315       : RangeSet(F.getRangeSet(From, To)) {}
316 
317   /// Construct a new RangeSet representing the given point as a range.
RangeSet(Factory & F,const llvm::APSInt & Point)318   RangeSet(Factory &F, const llvm::APSInt &Point)
319       : RangeSet(F.getRangeSet(Point)) {}
320 
Profile(llvm::FoldingSetNodeID & ID,const RangeSet & RS)321   static void Profile(llvm::FoldingSetNodeID &ID, const RangeSet &RS) {
322     ID.AddPointer(RS.Impl);
323   }
324 
325   /// Profile - Generates a hash profile of this RangeSet for use
326   ///  by FoldingSet.
Profile(llvm::FoldingSetNodeID & ID)327   void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, *this); }
328 
329   /// getConcreteValue - If a symbol is constrained to equal a specific integer
330   ///  constant then this method returns that value.  Otherwise, it returns
331   ///  NULL.
getConcreteValue()332   const llvm::APSInt *getConcreteValue() const {
333     return Impl->size() == 1 ? begin()->getConcreteValue() : nullptr;
334   }
335 
336   /// Get the minimal value covered by the ranges in the set.
337   ///
338   /// Complexity: O(1)
339   const llvm::APSInt &getMinValue() const;
340   /// Get the maximal value covered by the ranges in the set.
341   ///
342   /// Complexity: O(1)
343   const llvm::APSInt &getMaxValue() const;
344 
345   bool isUnsigned() const;
346   uint32_t getBitWidth() const;
347   APSIntType getAPSIntType() const;
348 
349   /// Test whether the given point is contained by any of the ranges.
350   ///
351   /// Complexity: O(logN)
352   ///             where N = size(this)
contains(llvm::APSInt Point)353   bool contains(llvm::APSInt Point) const { return containsImpl(Point); }
354 
containsZero()355   bool containsZero() const {
356     APSIntType T{getMinValue()};
357     return contains(T.getZeroValue());
358   }
359 
360   /// Test if the range is the [0,0] range.
361   ///
362   /// Complexity: O(1)
encodesFalseRange()363   bool encodesFalseRange() const {
364     const llvm::APSInt *Constant = getConcreteValue();
365     return Constant && Constant->isZero();
366   }
367 
368   /// Test if the range doesn't contain zero.
369   ///
370   /// Complexity: O(logN)
371   ///             where N = size(this)
encodesTrueRange()372   bool encodesTrueRange() const { return !containsZero(); }
373 
374   void dump(raw_ostream &OS) const;
375   void dump() const;
376 
377   bool operator==(const RangeSet &Other) const { return *Impl == *Other.Impl; }
378   bool operator!=(const RangeSet &Other) const { return !(*this == Other); }
379 
380 private:
RangeSet(ContainerType * RawContainer)381   /* implicit */ RangeSet(ContainerType *RawContainer) : Impl(RawContainer) {}
RangeSet(UnderlyingType Ptr)382   /* implicit */ RangeSet(UnderlyingType Ptr) : Impl(Ptr) {}
383 
384   /// Pin given points to the type represented by the current range set.
385   ///
386   /// This makes parameter points to be in-out parameters.
387   /// In order to maintain consistent types across all of the ranges in the set
388   /// and to keep all the operations to compare ONLY points of the same type, we
389   /// need to pin every point before any operation.
390   ///
391   /// @Returns true if the given points can be converted to the target type
392   ///          without changing the values (i.e. trivially) and false otherwise.
393   /// @{
394   bool pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const;
395   bool pin(llvm::APSInt &Point) const;
396   /// @}
397 
398   // This version of this function modifies its arguments (pins it).
399   bool containsImpl(llvm::APSInt &Point) const;
400 
401   friend class Factory;
402 };
403 
404 using ConstraintMap = llvm::ImmutableMap<SymbolRef, RangeSet>;
405 ConstraintMap getConstraintMap(ProgramStateRef State);
406 
407 class RangedConstraintManager : public SimpleConstraintManager {
408 public:
RangedConstraintManager(ExprEngine * EE,SValBuilder & SB)409   RangedConstraintManager(ExprEngine *EE, SValBuilder &SB)
410       : SimpleConstraintManager(EE, SB) {}
411 
412   ~RangedConstraintManager() override;
413 
414   //===------------------------------------------------------------------===//
415   // Implementation for interface from SimpleConstraintManager.
416   //===------------------------------------------------------------------===//
417 
418   ProgramStateRef assumeSym(ProgramStateRef State, SymbolRef Sym,
419                             bool Assumption) override;
420 
421   ProgramStateRef assumeSymInclusiveRange(ProgramStateRef State, SymbolRef Sym,
422                                           const llvm::APSInt &From,
423                                           const llvm::APSInt &To,
424                                           bool InRange) override;
425 
426   ProgramStateRef assumeSymUnsupported(ProgramStateRef State, SymbolRef Sym,
427                                        bool Assumption) override;
428 
429 protected:
430   /// Assume a constraint between a symbolic expression and a concrete integer.
431   virtual ProgramStateRef assumeSymRel(ProgramStateRef State, SymbolRef Sym,
432                                        BinaryOperator::Opcode op,
433                                        const llvm::APSInt &Int);
434 
435   //===------------------------------------------------------------------===//
436   // Interface that subclasses must implement.
437   //===------------------------------------------------------------------===//
438 
439   // Each of these is of the form "$Sym+Adj <> V", where "<>" is the comparison
440   // operation for the method being invoked.
441 
442   virtual ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym,
443                                       const llvm::APSInt &V,
444                                       const llvm::APSInt &Adjustment) = 0;
445 
446   virtual ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym,
447                                       const llvm::APSInt &V,
448                                       const llvm::APSInt &Adjustment) = 0;
449 
450   virtual ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym,
451                                       const llvm::APSInt &V,
452                                       const llvm::APSInt &Adjustment) = 0;
453 
454   virtual ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym,
455                                       const llvm::APSInt &V,
456                                       const llvm::APSInt &Adjustment) = 0;
457 
458   virtual ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym,
459                                       const llvm::APSInt &V,
460                                       const llvm::APSInt &Adjustment) = 0;
461 
462   virtual ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym,
463                                       const llvm::APSInt &V,
464                                       const llvm::APSInt &Adjustment) = 0;
465 
466   virtual ProgramStateRef assumeSymWithinInclusiveRange(
467       ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
468       const llvm::APSInt &To, const llvm::APSInt &Adjustment) = 0;
469 
470   virtual ProgramStateRef assumeSymOutsideInclusiveRange(
471       ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
472       const llvm::APSInt &To, const llvm::APSInt &Adjustment) = 0;
473 
474   //===------------------------------------------------------------------===//
475   // Internal implementation.
476   //===------------------------------------------------------------------===//
477 private:
478   static void computeAdjustment(SymbolRef &Sym, llvm::APSInt &Adjustment);
479 };
480 
481 /// Try to simplify a given symbolic expression based on the constraints in
482 /// State. This is needed because the Environment bindings are not getting
483 /// updated when a new constraint is added to the State. If the symbol is
484 /// simplified to a non-symbol (e.g. to a constant) then the original symbol
485 /// is returned. We use this function in the family of assumeSymNE/EQ/LT/../GE
486 /// functions where we can work only with symbols. Use the other function
487 /// (simplifyToSVal) if you are interested in a simplification that may yield
488 /// a concrete constant value.
489 SymbolRef simplify(ProgramStateRef State, SymbolRef Sym);
490 
491 /// Try to simplify a given symbolic expression's associated `SVal` based on the
492 /// constraints in State. This is very similar to `simplify`, but this function
493 /// always returns the simplified SVal. The simplified SVal might be a single
494 /// constant (i.e. `ConcreteInt`).
495 SVal simplifyToSVal(ProgramStateRef State, SymbolRef Sym);
496 
497 } // namespace ento
498 } // namespace clang
499 
500 REGISTER_FACTORY_WITH_PROGRAMSTATE(ConstraintMap)
501 
502 #endif
503