1 //== RangeConstraintManager.cpp - Manage range constraints.------*- C++ -*--==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines RangeConstraintManager, a class that tracks simple
10 //  equality and inequality constraints on symbolic values of ProgramState.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "clang/Basic/JsonSupport.h"
15 #include "clang/StaticAnalyzer/Core/PathSensitive/APSIntType.h"
16 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState.h"
17 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramStateTrait.h"
18 #include "clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h"
19 #include "clang/StaticAnalyzer/Core/PathSensitive/SValVisitor.h"
20 #include "llvm/ADT/FoldingSet.h"
21 #include "llvm/ADT/ImmutableSet.h"
22 #include "llvm/ADT/STLExtras.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/StringExtras.h"
25 #include "llvm/Support/Compiler.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include <algorithm>
28 #include <iterator>
29 
30 using namespace clang;
31 using namespace ento;
32 
33 // This class can be extended with other tables which will help to reason
34 // about ranges more precisely.
35 class OperatorRelationsTable {
36   static_assert(BO_LT < BO_GT && BO_GT < BO_LE && BO_LE < BO_GE &&
37                     BO_GE < BO_EQ && BO_EQ < BO_NE,
38                 "This class relies on operators order. Rework it otherwise.");
39 
40 public:
41   enum TriStateKind {
42     False = 0,
43     True,
44     Unknown,
45   };
46 
47 private:
48   // CmpOpTable holds states which represent the corresponding range for
49   // branching an exploded graph. We can reason about the branch if there is
50   // a previously known fact of the existence of a comparison expression with
51   // operands used in the current expression.
52   // E.g. assuming (x < y) is true that means (x != y) is surely true.
53   // if (x previous_operation y)  // <    | !=      | >
54   //   if (x operation y)         // !=   | >       | <
55   //     tristate                 // True | Unknown | False
56   //
57   // CmpOpTable represents next:
58   // __|< |> |<=|>=|==|!=|UnknownX2|
59   // < |1 |0 |* |0 |0 |* |1        |
60   // > |0 |1 |0 |* |0 |* |1        |
61   // <=|1 |0 |1 |* |1 |* |0        |
62   // >=|0 |1 |* |1 |1 |* |0        |
63   // ==|0 |0 |* |* |1 |0 |1        |
64   // !=|1 |1 |* |* |0 |1 |0        |
65   //
66   // Columns stands for a previous operator.
67   // Rows stands for a current operator.
68   // Each row has exactly two `Unknown` cases.
69   // UnknownX2 means that both `Unknown` previous operators are met in code,
70   // and there is a special column for that, for example:
71   // if (x >= y)
72   //   if (x != y)
73   //     if (x <= y)
74   //       False only
75   static constexpr size_t CmpOpCount = BO_NE - BO_LT + 1;
76   const TriStateKind CmpOpTable[CmpOpCount][CmpOpCount + 1] = {
77       // <      >      <=     >=     ==     !=    UnknownX2
78       {True, False, Unknown, False, False, Unknown, True}, // <
79       {False, True, False, Unknown, False, Unknown, True}, // >
80       {True, False, True, Unknown, True, Unknown, False},  // <=
81       {False, True, Unknown, True, True, Unknown, False},  // >=
82       {False, False, Unknown, Unknown, True, False, True}, // ==
83       {True, True, Unknown, Unknown, False, True, False},  // !=
84   };
85 
86   static size_t getIndexFromOp(BinaryOperatorKind OP) {
87     return static_cast<size_t>(OP - BO_LT);
88   }
89 
90 public:
91   constexpr size_t getCmpOpCount() const { return CmpOpCount; }
92 
93   static BinaryOperatorKind getOpFromIndex(size_t Index) {
94     return static_cast<BinaryOperatorKind>(Index + BO_LT);
95   }
96 
97   TriStateKind getCmpOpState(BinaryOperatorKind CurrentOP,
98                              BinaryOperatorKind QueriedOP) const {
99     return CmpOpTable[getIndexFromOp(CurrentOP)][getIndexFromOp(QueriedOP)];
100   }
101 
102   TriStateKind getCmpOpStateForUnknownX2(BinaryOperatorKind CurrentOP) const {
103     return CmpOpTable[getIndexFromOp(CurrentOP)][CmpOpCount];
104   }
105 };
106 
107 //===----------------------------------------------------------------------===//
108 //                           RangeSet implementation
109 //===----------------------------------------------------------------------===//
110 
111 RangeSet::ContainerType RangeSet::Factory::EmptySet{};
112 
113 RangeSet RangeSet::Factory::add(RangeSet LHS, RangeSet RHS) {
114   ContainerType Result;
115   Result.reserve(LHS.size() + RHS.size());
116   std::merge(LHS.begin(), LHS.end(), RHS.begin(), RHS.end(),
117              std::back_inserter(Result));
118   return makePersistent(std::move(Result));
119 }
120 
121 RangeSet RangeSet::Factory::add(RangeSet Original, Range Element) {
122   ContainerType Result;
123   Result.reserve(Original.size() + 1);
124 
125   const_iterator Lower = llvm::lower_bound(Original, Element);
126   Result.insert(Result.end(), Original.begin(), Lower);
127   Result.push_back(Element);
128   Result.insert(Result.end(), Lower, Original.end());
129 
130   return makePersistent(std::move(Result));
131 }
132 
133 RangeSet RangeSet::Factory::add(RangeSet Original, const llvm::APSInt &Point) {
134   return add(Original, Range(Point));
135 }
136 
137 RangeSet RangeSet::Factory::unite(RangeSet LHS, RangeSet RHS) {
138   ContainerType Result = unite(*LHS.Impl, *RHS.Impl);
139   return makePersistent(std::move(Result));
140 }
141 
142 RangeSet RangeSet::Factory::unite(RangeSet Original, Range R) {
143   ContainerType Result;
144   Result.push_back(R);
145   Result = unite(*Original.Impl, Result);
146   return makePersistent(std::move(Result));
147 }
148 
149 RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt Point) {
150   return unite(Original, Range(ValueFactory.getValue(Point)));
151 }
152 
153 RangeSet RangeSet::Factory::unite(RangeSet Original, llvm::APSInt From,
154                                   llvm::APSInt To) {
155   return unite(Original,
156                Range(ValueFactory.getValue(From), ValueFactory.getValue(To)));
157 }
158 
159 template <typename T>
160 void swapIterators(T &First, T &FirstEnd, T &Second, T &SecondEnd) {
161   std::swap(First, Second);
162   std::swap(FirstEnd, SecondEnd);
163 }
164 
165 RangeSet::ContainerType RangeSet::Factory::unite(const ContainerType &LHS,
166                                                  const ContainerType &RHS) {
167   if (LHS.empty())
168     return RHS;
169   if (RHS.empty())
170     return LHS;
171 
172   using llvm::APSInt;
173   using iterator = ContainerType::const_iterator;
174 
175   iterator First = LHS.begin();
176   iterator FirstEnd = LHS.end();
177   iterator Second = RHS.begin();
178   iterator SecondEnd = RHS.end();
179   APSIntType Ty = APSIntType(First->From());
180   const APSInt Min = Ty.getMinValue();
181 
182   // Handle a corner case first when both range sets start from MIN.
183   // This helps to avoid complicated conditions below. Specifically, this
184   // particular check for `MIN` is not needed in the loop below every time
185   // when we do `Second->From() - One` operation.
186   if (Min == First->From() && Min == Second->From()) {
187     if (First->To() > Second->To()) {
188       //    [ First    ]--->
189       //    [ Second ]----->
190       // MIN^
191       // The Second range is entirely inside the First one.
192 
193       // Check if Second is the last in its RangeSet.
194       if (++Second == SecondEnd)
195         //    [ First     ]--[ First + 1 ]--->
196         //    [ Second ]--------------------->
197         // MIN^
198         // The Union is equal to First's RangeSet.
199         return LHS;
200     } else {
201       // case 1: [ First ]----->
202       // case 2: [ First   ]--->
203       //         [ Second  ]--->
204       //      MIN^
205       // The First range is entirely inside or equal to the Second one.
206 
207       // Check if First is the last in its RangeSet.
208       if (++First == FirstEnd)
209         //    [ First ]----------------------->
210         //    [ Second  ]--[ Second + 1 ]---->
211         // MIN^
212         // The Union is equal to Second's RangeSet.
213         return RHS;
214     }
215   }
216 
217   const APSInt One = Ty.getValue(1);
218   ContainerType Result;
219 
220   // This is called when there are no ranges left in one of the ranges.
221   // Append the rest of the ranges from another range set to the Result
222   // and return with that.
223   const auto AppendTheRest = [&Result](iterator I, iterator E) {
224     Result.append(I, E);
225     return Result;
226   };
227 
228   while (true) {
229     // We want to keep the following invariant at all times:
230     // ---[ First ------>
231     // -----[ Second --->
232     if (First->From() > Second->From())
233       swapIterators(First, FirstEnd, Second, SecondEnd);
234 
235     // The Union definitely starts with First->From().
236     // ----------[ First ------>
237     // ------------[ Second --->
238     // ----------[ Union ------>
239     // UnionStart^
240     const llvm::APSInt &UnionStart = First->From();
241 
242     // Loop where the invariant holds.
243     while (true) {
244       // Skip all enclosed ranges.
245       // ---[                  First                     ]--->
246       // -----[ Second ]--[ Second + 1 ]--[ Second + N ]----->
247       while (First->To() >= Second->To()) {
248         // Check if Second is the last in its RangeSet.
249         if (++Second == SecondEnd) {
250           // Append the Union.
251           // ---[ Union      ]--->
252           // -----[ Second ]----->
253           // --------[ First ]--->
254           //         UnionEnd^
255           Result.emplace_back(UnionStart, First->To());
256           // ---[ Union ]----------------->
257           // --------------[ First + 1]--->
258           // Append all remaining ranges from the First's RangeSet.
259           return AppendTheRest(++First, FirstEnd);
260         }
261       }
262 
263       // Check if First and Second are disjoint. It means that we find
264       // the end of the Union. Exit the loop and append the Union.
265       // ---[ First ]=------------->
266       // ------------=[ Second ]--->
267       // ----MinusOne^
268       if (First->To() < Second->From() - One)
269         break;
270 
271       // First is entirely inside the Union. Go next.
272       // ---[ Union ----------->
273       // ---- [ First ]-------->
274       // -------[ Second ]----->
275       // Check if First is the last in its RangeSet.
276       if (++First == FirstEnd) {
277         // Append the Union.
278         // ---[ Union       ]--->
279         // -----[ First ]------->
280         // --------[ Second ]--->
281         //          UnionEnd^
282         Result.emplace_back(UnionStart, Second->To());
283         // ---[ Union ]------------------>
284         // --------------[ Second + 1]--->
285         // Append all remaining ranges from the Second's RangeSet.
286         return AppendTheRest(++Second, SecondEnd);
287       }
288 
289       // We know that we are at one of the two cases:
290       // case 1: --[ First ]--------->
291       // case 2: ----[ First ]------->
292       // --------[ Second ]---------->
293       // In both cases First starts after Second->From().
294       // Make sure that the loop invariant holds.
295       swapIterators(First, FirstEnd, Second, SecondEnd);
296     }
297 
298     // Here First and Second are disjoint.
299     // Append the Union.
300     // ---[ Union    ]--------------->
301     // -----------------[ Second ]--->
302     // ------[ First ]--------------->
303     //       UnionEnd^
304     Result.emplace_back(UnionStart, First->To());
305 
306     // Check if First is the last in its RangeSet.
307     if (++First == FirstEnd)
308       // ---[ Union ]--------------->
309       // --------------[ Second ]--->
310       // Append all remaining ranges from the Second's RangeSet.
311       return AppendTheRest(Second, SecondEnd);
312   }
313 
314   llvm_unreachable("Normally, we should not reach here");
315 }
316 
317 RangeSet RangeSet::Factory::getRangeSet(Range From) {
318   ContainerType Result;
319   Result.push_back(From);
320   return makePersistent(std::move(Result));
321 }
322 
323 RangeSet RangeSet::Factory::makePersistent(ContainerType &&From) {
324   llvm::FoldingSetNodeID ID;
325   void *InsertPos;
326 
327   From.Profile(ID);
328   ContainerType *Result = Cache.FindNodeOrInsertPos(ID, InsertPos);
329 
330   if (!Result) {
331     // It is cheaper to fully construct the resulting range on stack
332     // and move it to the freshly allocated buffer if we don't have
333     // a set like this already.
334     Result = construct(std::move(From));
335     Cache.InsertNode(Result, InsertPos);
336   }
337 
338   return Result;
339 }
340 
341 RangeSet::ContainerType *RangeSet::Factory::construct(ContainerType &&From) {
342   void *Buffer = Arena.Allocate();
343   return new (Buffer) ContainerType(std::move(From));
344 }
345 
346 const llvm::APSInt &RangeSet::getMinValue() const {
347   assert(!isEmpty());
348   return begin()->From();
349 }
350 
351 const llvm::APSInt &RangeSet::getMaxValue() const {
352   assert(!isEmpty());
353   return std::prev(end())->To();
354 }
355 
356 bool clang::ento::RangeSet::isUnsigned() const {
357   assert(!isEmpty());
358   return begin()->From().isUnsigned();
359 }
360 
361 uint32_t clang::ento::RangeSet::getBitWidth() const {
362   assert(!isEmpty());
363   return begin()->From().getBitWidth();
364 }
365 
366 APSIntType clang::ento::RangeSet::getAPSIntType() const {
367   assert(!isEmpty());
368   return APSIntType(begin()->From());
369 }
370 
371 bool RangeSet::containsImpl(llvm::APSInt &Point) const {
372   if (isEmpty() || !pin(Point))
373     return false;
374 
375   Range Dummy(Point);
376   const_iterator It = llvm::upper_bound(*this, Dummy);
377   if (It == begin())
378     return false;
379 
380   return std::prev(It)->Includes(Point);
381 }
382 
383 bool RangeSet::pin(llvm::APSInt &Point) const {
384   APSIntType Type(getMinValue());
385   if (Type.testInRange(Point, true) != APSIntType::RTR_Within)
386     return false;
387 
388   Type.apply(Point);
389   return true;
390 }
391 
392 bool RangeSet::pin(llvm::APSInt &Lower, llvm::APSInt &Upper) const {
393   // This function has nine cases, the cartesian product of range-testing
394   // both the upper and lower bounds against the symbol's type.
395   // Each case requires a different pinning operation.
396   // The function returns false if the described range is entirely outside
397   // the range of values for the associated symbol.
398   APSIntType Type(getMinValue());
399   APSIntType::RangeTestResultKind LowerTest = Type.testInRange(Lower, true);
400   APSIntType::RangeTestResultKind UpperTest = Type.testInRange(Upper, true);
401 
402   switch (LowerTest) {
403   case APSIntType::RTR_Below:
404     switch (UpperTest) {
405     case APSIntType::RTR_Below:
406       // The entire range is outside the symbol's set of possible values.
407       // If this is a conventionally-ordered range, the state is infeasible.
408       if (Lower <= Upper)
409         return false;
410 
411       // However, if the range wraps around, it spans all possible values.
412       Lower = Type.getMinValue();
413       Upper = Type.getMaxValue();
414       break;
415     case APSIntType::RTR_Within:
416       // The range starts below what's possible but ends within it. Pin.
417       Lower = Type.getMinValue();
418       Type.apply(Upper);
419       break;
420     case APSIntType::RTR_Above:
421       // The range spans all possible values for the symbol. Pin.
422       Lower = Type.getMinValue();
423       Upper = Type.getMaxValue();
424       break;
425     }
426     break;
427   case APSIntType::RTR_Within:
428     switch (UpperTest) {
429     case APSIntType::RTR_Below:
430       // The range wraps around, but all lower values are not possible.
431       Type.apply(Lower);
432       Upper = Type.getMaxValue();
433       break;
434     case APSIntType::RTR_Within:
435       // The range may or may not wrap around, but both limits are valid.
436       Type.apply(Lower);
437       Type.apply(Upper);
438       break;
439     case APSIntType::RTR_Above:
440       // The range starts within what's possible but ends above it. Pin.
441       Type.apply(Lower);
442       Upper = Type.getMaxValue();
443       break;
444     }
445     break;
446   case APSIntType::RTR_Above:
447     switch (UpperTest) {
448     case APSIntType::RTR_Below:
449       // The range wraps but is outside the symbol's set of possible values.
450       return false;
451     case APSIntType::RTR_Within:
452       // The range starts above what's possible but ends within it (wrap).
453       Lower = Type.getMinValue();
454       Type.apply(Upper);
455       break;
456     case APSIntType::RTR_Above:
457       // The entire range is outside the symbol's set of possible values.
458       // If this is a conventionally-ordered range, the state is infeasible.
459       if (Lower <= Upper)
460         return false;
461 
462       // However, if the range wraps around, it spans all possible values.
463       Lower = Type.getMinValue();
464       Upper = Type.getMaxValue();
465       break;
466     }
467     break;
468   }
469 
470   return true;
471 }
472 
473 RangeSet RangeSet::Factory::intersect(RangeSet What, llvm::APSInt Lower,
474                                       llvm::APSInt Upper) {
475   if (What.isEmpty() || !What.pin(Lower, Upper))
476     return getEmptySet();
477 
478   ContainerType DummyContainer;
479 
480   if (Lower <= Upper) {
481     // [Lower, Upper] is a regular range.
482     //
483     // Shortcut: check that there is even a possibility of the intersection
484     //           by checking the two following situations:
485     //
486     //               <---[  What  ]---[------]------>
487     //                              Lower  Upper
488     //                            -or-
489     //               <----[------]----[  What  ]---->
490     //                  Lower  Upper
491     if (What.getMaxValue() < Lower || Upper < What.getMinValue())
492       return getEmptySet();
493 
494     DummyContainer.push_back(
495         Range(ValueFactory.getValue(Lower), ValueFactory.getValue(Upper)));
496   } else {
497     // [Lower, Upper] is an inverted range, i.e. [MIN, Upper] U [Lower, MAX]
498     //
499     // Shortcut: check that there is even a possibility of the intersection
500     //           by checking the following situation:
501     //
502     //               <------]---[  What  ]---[------>
503     //                    Upper             Lower
504     if (What.getMaxValue() < Lower && Upper < What.getMinValue())
505       return getEmptySet();
506 
507     DummyContainer.push_back(
508         Range(ValueFactory.getMinValue(Upper), ValueFactory.getValue(Upper)));
509     DummyContainer.push_back(
510         Range(ValueFactory.getValue(Lower), ValueFactory.getMaxValue(Lower)));
511   }
512 
513   return intersect(*What.Impl, DummyContainer);
514 }
515 
516 RangeSet RangeSet::Factory::intersect(const RangeSet::ContainerType &LHS,
517                                       const RangeSet::ContainerType &RHS) {
518   ContainerType Result;
519   Result.reserve(std::max(LHS.size(), RHS.size()));
520 
521   const_iterator First = LHS.begin(), Second = RHS.begin(),
522                  FirstEnd = LHS.end(), SecondEnd = RHS.end();
523 
524   // If we ran out of ranges in one set, but not in the other,
525   // it means that those elements are definitely not in the
526   // intersection.
527   while (First != FirstEnd && Second != SecondEnd) {
528     // We want to keep the following invariant at all times:
529     //
530     //    ----[ First ---------------------->
531     //    --------[ Second ----------------->
532     if (Second->From() < First->From())
533       swapIterators(First, FirstEnd, Second, SecondEnd);
534 
535     // Loop where the invariant holds:
536     do {
537       // Check for the following situation:
538       //
539       //    ----[ First ]--------------------->
540       //    ---------------[ Second ]--------->
541       //
542       // which means that...
543       if (Second->From() > First->To()) {
544         // ...First is not in the intersection.
545         //
546         // We should move on to the next range after First and break out of the
547         // loop because the invariant might not be true.
548         ++First;
549         break;
550       }
551 
552       // We have a guaranteed intersection at this point!
553       // And this is the current situation:
554       //
555       //    ----[   First   ]----------------->
556       //    -------[ Second ------------------>
557       //
558       // Additionally, it definitely starts with Second->From().
559       const llvm::APSInt &IntersectionStart = Second->From();
560 
561       // It is important to know which of the two ranges' ends
562       // is greater.  That "longer" range might have some other
563       // intersections, while the "shorter" range might not.
564       if (Second->To() > First->To()) {
565         // Here we make a decision to keep First as the "longer"
566         // range.
567         swapIterators(First, FirstEnd, Second, SecondEnd);
568       }
569 
570       // At this point, we have the following situation:
571       //
572       //    ---- First      ]-------------------->
573       //    ---- Second ]--[  Second+1 ---------->
574       //
575       // We don't know the relationship between First->From and
576       // Second->From and we don't know whether Second+1 intersects
577       // with First.
578       //
579       // However, we know that [IntersectionStart, Second->To] is
580       // a part of the intersection...
581       Result.push_back(Range(IntersectionStart, Second->To()));
582       ++Second;
583       // ...and that the invariant will hold for a valid Second+1
584       // because First->From <= Second->To < (Second+1)->From.
585     } while (Second != SecondEnd);
586   }
587 
588   if (Result.empty())
589     return getEmptySet();
590 
591   return makePersistent(std::move(Result));
592 }
593 
594 RangeSet RangeSet::Factory::intersect(RangeSet LHS, RangeSet RHS) {
595   // Shortcut: let's see if the intersection is even possible.
596   if (LHS.isEmpty() || RHS.isEmpty() || LHS.getMaxValue() < RHS.getMinValue() ||
597       RHS.getMaxValue() < LHS.getMinValue())
598     return getEmptySet();
599 
600   return intersect(*LHS.Impl, *RHS.Impl);
601 }
602 
603 RangeSet RangeSet::Factory::intersect(RangeSet LHS, llvm::APSInt Point) {
604   if (LHS.containsImpl(Point))
605     return getRangeSet(ValueFactory.getValue(Point));
606 
607   return getEmptySet();
608 }
609 
610 RangeSet RangeSet::Factory::negate(RangeSet What) {
611   if (What.isEmpty())
612     return getEmptySet();
613 
614   const llvm::APSInt SampleValue = What.getMinValue();
615   const llvm::APSInt &MIN = ValueFactory.getMinValue(SampleValue);
616   const llvm::APSInt &MAX = ValueFactory.getMaxValue(SampleValue);
617 
618   ContainerType Result;
619   Result.reserve(What.size() + (SampleValue == MIN));
620 
621   // Handle a special case for MIN value.
622   const_iterator It = What.begin();
623   const_iterator End = What.end();
624 
625   const llvm::APSInt &From = It->From();
626   const llvm::APSInt &To = It->To();
627 
628   if (From == MIN) {
629     // If the range [From, To] is [MIN, MAX], then result is also [MIN, MAX].
630     if (To == MAX) {
631       return What;
632     }
633 
634     const_iterator Last = std::prev(End);
635 
636     // Try to find and unite the following ranges:
637     // [MIN, MIN] & [MIN + 1, N] => [MIN, N].
638     if (Last->To() == MAX) {
639       // It means that in the original range we have ranges
640       //   [MIN, A], ... , [B, MAX]
641       // And the result should be [MIN, -B], ..., [-A, MAX]
642       Result.emplace_back(MIN, ValueFactory.getValue(-Last->From()));
643       // We already negated Last, so we can skip it.
644       End = Last;
645     } else {
646       // Add a separate range for the lowest value.
647       Result.emplace_back(MIN, MIN);
648     }
649 
650     // Skip adding the second range in case when [From, To] are [MIN, MIN].
651     if (To != MIN) {
652       Result.emplace_back(ValueFactory.getValue(-To), MAX);
653     }
654 
655     // Skip the first range in the loop.
656     ++It;
657   }
658 
659   // Negate all other ranges.
660   for (; It != End; ++It) {
661     // Negate int values.
662     const llvm::APSInt &NewFrom = ValueFactory.getValue(-It->To());
663     const llvm::APSInt &NewTo = ValueFactory.getValue(-It->From());
664 
665     // Add a negated range.
666     Result.emplace_back(NewFrom, NewTo);
667   }
668 
669   llvm::sort(Result);
670   return makePersistent(std::move(Result));
671 }
672 
673 // Convert range set to the given integral type using truncation and promotion.
674 // This works similar to APSIntType::apply function but for the range set.
675 RangeSet RangeSet::Factory::castTo(RangeSet What, APSIntType Ty) {
676   // Set is empty or NOOP (aka cast to the same type).
677   if (What.isEmpty() || What.getAPSIntType() == Ty)
678     return What;
679 
680   const bool IsConversion = What.isUnsigned() != Ty.isUnsigned();
681   const bool IsTruncation = What.getBitWidth() > Ty.getBitWidth();
682   const bool IsPromotion = What.getBitWidth() < Ty.getBitWidth();
683 
684   if (IsTruncation)
685     return makePersistent(truncateTo(What, Ty));
686 
687   // Here we handle 2 cases:
688   // - IsConversion && !IsPromotion.
689   //   In this case we handle changing a sign with same bitwidth: char -> uchar,
690   //   uint -> int. Here we convert negatives to positives and positives which
691   //   is out of range to negatives. We use convertTo function for that.
692   // - IsConversion && IsPromotion && !What.isUnsigned().
693   //   In this case we handle changing a sign from signeds to unsigneds with
694   //   higher bitwidth: char -> uint, int-> uint64. The point is that we also
695   //   need convert negatives to positives and use convertTo function as well.
696   //   For example, we don't need such a convertion when converting unsigned to
697   //   signed with higher bitwidth, because all the values of unsigned is valid
698   //   for the such signed.
699   if (IsConversion && (!IsPromotion || !What.isUnsigned()))
700     return makePersistent(convertTo(What, Ty));
701 
702   assert(IsPromotion && "Only promotion operation from unsigneds left.");
703   return makePersistent(promoteTo(What, Ty));
704 }
705 
706 RangeSet RangeSet::Factory::castTo(RangeSet What, QualType T) {
707   assert(T->isIntegralOrEnumerationType() && "T shall be an integral type.");
708   return castTo(What, ValueFactory.getAPSIntType(T));
709 }
710 
711 RangeSet::ContainerType RangeSet::Factory::truncateTo(RangeSet What,
712                                                       APSIntType Ty) {
713   using llvm::APInt;
714   using llvm::APSInt;
715   ContainerType Result;
716   ContainerType Dummy;
717   // CastRangeSize is an amount of all possible values of cast type.
718   // Example: `char` has 256 values; `short` has 65536 values.
719   // But in fact we use `amount of values` - 1, because
720   // we can't keep `amount of values of UINT64` inside uint64_t.
721   // E.g. 256 is an amount of all possible values of `char` and we can't keep
722   // it inside `char`.
723   // And it's OK, it's enough to do correct calculations.
724   uint64_t CastRangeSize = APInt::getMaxValue(Ty.getBitWidth()).getZExtValue();
725   for (const Range &R : What) {
726     // Get bounds of the given range.
727     APSInt FromInt = R.From();
728     APSInt ToInt = R.To();
729     // CurrentRangeSize is an amount of all possible values of the current
730     // range minus one.
731     uint64_t CurrentRangeSize = (ToInt - FromInt).getZExtValue();
732     // This is an optimization for a specific case when this Range covers
733     // the whole range of the target type.
734     Dummy.clear();
735     if (CurrentRangeSize >= CastRangeSize) {
736       Dummy.emplace_back(ValueFactory.getMinValue(Ty),
737                          ValueFactory.getMaxValue(Ty));
738       Result = std::move(Dummy);
739       break;
740     }
741     // Cast the bounds.
742     Ty.apply(FromInt);
743     Ty.apply(ToInt);
744     const APSInt &PersistentFrom = ValueFactory.getValue(FromInt);
745     const APSInt &PersistentTo = ValueFactory.getValue(ToInt);
746     if (FromInt > ToInt) {
747       Dummy.emplace_back(ValueFactory.getMinValue(Ty), PersistentTo);
748       Dummy.emplace_back(PersistentFrom, ValueFactory.getMaxValue(Ty));
749     } else
750       Dummy.emplace_back(PersistentFrom, PersistentTo);
751     // Every range retrieved after truncation potentialy has garbage values.
752     // So, we have to unite every next range with the previouses.
753     Result = unite(Result, Dummy);
754   }
755 
756   return Result;
757 }
758 
759 // Divide the convertion into two phases (presented as loops here).
760 // First phase(loop) works when casted values go in ascending order.
761 // E.g. char{1,3,5,127} -> uint{1,3,5,127}
762 // Interrupt the first phase and go to second one when casted values start
763 // go in descending order. That means that we crossed over the middle of
764 // the type value set (aka 0 for signeds and MAX/2+1 for unsigneds).
765 // For instance:
766 // 1: uchar{1,3,5,128,255} -> char{1,3,5,-128,-1}
767 //    Here we put {1,3,5} to one array and {-128, -1} to another
768 // 2: char{-128,-127,-1,0,1,2} -> uchar{128,129,255,0,1,3}
769 //    Here we put {128,129,255} to one array and {0,1,3} to another.
770 // After that we unite both arrays.
771 // NOTE: We don't just concatenate the arrays, because they may have
772 // adjacent ranges, e.g.:
773 // 1: char(-128, 127) -> uchar -> arr1(128, 255), arr2(0, 127) ->
774 //    unite -> uchar(0, 255)
775 // 2: uchar(0, 1)U(254, 255) -> char -> arr1(0, 1), arr2(-2, -1) ->
776 //    unite -> uchar(-2, 1)
777 RangeSet::ContainerType RangeSet::Factory::convertTo(RangeSet What,
778                                                      APSIntType Ty) {
779   using llvm::APInt;
780   using llvm::APSInt;
781   using Bounds = std::pair<const APSInt &, const APSInt &>;
782   ContainerType AscendArray;
783   ContainerType DescendArray;
784   auto CastRange = [Ty, &VF = ValueFactory](const Range &R) -> Bounds {
785     // Get bounds of the given range.
786     APSInt FromInt = R.From();
787     APSInt ToInt = R.To();
788     // Cast the bounds.
789     Ty.apply(FromInt);
790     Ty.apply(ToInt);
791     return {VF.getValue(FromInt), VF.getValue(ToInt)};
792   };
793   // Phase 1. Fill the first array.
794   APSInt LastConvertedInt = Ty.getMinValue();
795   const auto *It = What.begin();
796   const auto *E = What.end();
797   while (It != E) {
798     Bounds NewBounds = CastRange(*(It++));
799     // If values stop going acsending order, go to the second phase(loop).
800     if (NewBounds.first < LastConvertedInt) {
801       DescendArray.emplace_back(NewBounds.first, NewBounds.second);
802       break;
803     }
804     // If the range contains a midpoint, then split the range.
805     // E.g. char(-5, 5) -> uchar(251, 5)
806     // Here we shall add a range (251, 255) to the first array and (0, 5) to the
807     // second one.
808     if (NewBounds.first > NewBounds.second) {
809       DescendArray.emplace_back(ValueFactory.getMinValue(Ty), NewBounds.second);
810       AscendArray.emplace_back(NewBounds.first, ValueFactory.getMaxValue(Ty));
811     } else
812       // Values are going acsending order.
813       AscendArray.emplace_back(NewBounds.first, NewBounds.second);
814     LastConvertedInt = NewBounds.first;
815   }
816   // Phase 2. Fill the second array.
817   while (It != E) {
818     Bounds NewBounds = CastRange(*(It++));
819     DescendArray.emplace_back(NewBounds.first, NewBounds.second);
820   }
821   // Unite both arrays.
822   return unite(AscendArray, DescendArray);
823 }
824 
825 /// Promotion from unsigneds to signeds/unsigneds left.
826 RangeSet::ContainerType RangeSet::Factory::promoteTo(RangeSet What,
827                                                      APSIntType Ty) {
828   ContainerType Result;
829   // We definitely know the size of the result set.
830   Result.reserve(What.size());
831 
832   // Each unsigned value fits every larger type without any changes,
833   // whether the larger type is signed or unsigned. So just promote and push
834   // back each range one by one.
835   for (const Range &R : What) {
836     // Get bounds of the given range.
837     llvm::APSInt FromInt = R.From();
838     llvm::APSInt ToInt = R.To();
839     // Cast the bounds.
840     Ty.apply(FromInt);
841     Ty.apply(ToInt);
842     Result.emplace_back(ValueFactory.getValue(FromInt),
843                         ValueFactory.getValue(ToInt));
844   }
845   return Result;
846 }
847 
848 RangeSet RangeSet::Factory::deletePoint(RangeSet From,
849                                         const llvm::APSInt &Point) {
850   if (!From.contains(Point))
851     return From;
852 
853   llvm::APSInt Upper = Point;
854   llvm::APSInt Lower = Point;
855 
856   ++Upper;
857   --Lower;
858 
859   // Notice that the lower bound is greater than the upper bound.
860   return intersect(From, Upper, Lower);
861 }
862 
863 LLVM_DUMP_METHOD void Range::dump(raw_ostream &OS) const {
864   OS << '[' << toString(From(), 10) << ", " << toString(To(), 10) << ']';
865 }
866 LLVM_DUMP_METHOD void Range::dump() const { dump(llvm::errs()); }
867 
868 LLVM_DUMP_METHOD void RangeSet::dump(raw_ostream &OS) const {
869   OS << "{ ";
870   llvm::interleaveComma(*this, OS, [&OS](const Range &R) { R.dump(OS); });
871   OS << " }";
872 }
873 LLVM_DUMP_METHOD void RangeSet::dump() const { dump(llvm::errs()); }
874 
875 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(SymbolSet, SymbolRef)
876 
877 namespace {
878 class EquivalenceClass;
879 } // end anonymous namespace
880 
881 REGISTER_MAP_WITH_PROGRAMSTATE(ClassMap, SymbolRef, EquivalenceClass)
882 REGISTER_MAP_WITH_PROGRAMSTATE(ClassMembers, EquivalenceClass, SymbolSet)
883 REGISTER_MAP_WITH_PROGRAMSTATE(ConstraintRange, EquivalenceClass, RangeSet)
884 
885 REGISTER_SET_FACTORY_WITH_PROGRAMSTATE(ClassSet, EquivalenceClass)
886 REGISTER_MAP_WITH_PROGRAMSTATE(DisequalityMap, EquivalenceClass, ClassSet)
887 
888 namespace {
889 /// This class encapsulates a set of symbols equal to each other.
890 ///
891 /// The main idea of the approach requiring such classes is in narrowing
892 /// and sharing constraints between symbols within the class.  Also we can
893 /// conclude that there is no practical need in storing constraints for
894 /// every member of the class separately.
895 ///
896 /// Main terminology:
897 ///
898 ///   * "Equivalence class" is an object of this class, which can be efficiently
899 ///     compared to other classes.  It represents the whole class without
900 ///     storing the actual in it.  The members of the class however can be
901 ///     retrieved from the state.
902 ///
903 ///   * "Class members" are the symbols corresponding to the class.  This means
904 ///     that A == B for every member symbols A and B from the class.  Members of
905 ///     each class are stored in the state.
906 ///
907 ///   * "Trivial class" is a class that has and ever had only one same symbol.
908 ///
909 ///   * "Merge operation" merges two classes into one.  It is the main operation
910 ///     to produce non-trivial classes.
911 ///     If, at some point, we can assume that two symbols from two distinct
912 ///     classes are equal, we can merge these classes.
913 class EquivalenceClass : public llvm::FoldingSetNode {
914 public:
915   /// Find equivalence class for the given symbol in the given state.
916   LLVM_NODISCARD static inline EquivalenceClass find(ProgramStateRef State,
917                                                      SymbolRef Sym);
918 
919   /// Merge classes for the given symbols and return a new state.
920   LLVM_NODISCARD static inline ProgramStateRef merge(RangeSet::Factory &F,
921                                                      ProgramStateRef State,
922                                                      SymbolRef First,
923                                                      SymbolRef Second);
924   // Merge this class with the given class and return a new state.
925   LLVM_NODISCARD inline ProgramStateRef
926   merge(RangeSet::Factory &F, ProgramStateRef State, EquivalenceClass Other);
927 
928   /// Return a set of class members for the given state.
929   LLVM_NODISCARD inline SymbolSet getClassMembers(ProgramStateRef State) const;
930 
931   /// Return true if the current class is trivial in the given state.
932   /// A class is trivial if and only if there is not any member relations stored
933   /// to it in State/ClassMembers.
934   /// An equivalence class with one member might seem as it does not hold any
935   /// meaningful information, i.e. that is a tautology. However, during the
936   /// removal of dead symbols we do not remove classes with one member for
937   /// resource and performance reasons. Consequently, a class with one member is
938   /// not necessarily trivial. It could happen that we have a class with two
939   /// members and then during the removal of dead symbols we remove one of its
940   /// members. In this case, the class is still non-trivial (it still has the
941   /// mappings in ClassMembers), even though it has only one member.
942   LLVM_NODISCARD inline bool isTrivial(ProgramStateRef State) const;
943 
944   /// Return true if the current class is trivial and its only member is dead.
945   LLVM_NODISCARD inline bool isTriviallyDead(ProgramStateRef State,
946                                              SymbolReaper &Reaper) const;
947 
948   LLVM_NODISCARD static inline ProgramStateRef
949   markDisequal(RangeSet::Factory &F, ProgramStateRef State, SymbolRef First,
950                SymbolRef Second);
951   LLVM_NODISCARD static inline ProgramStateRef
952   markDisequal(RangeSet::Factory &F, ProgramStateRef State,
953                EquivalenceClass First, EquivalenceClass Second);
954   LLVM_NODISCARD inline ProgramStateRef
955   markDisequal(RangeSet::Factory &F, ProgramStateRef State,
956                EquivalenceClass Other) const;
957   LLVM_NODISCARD static inline ClassSet
958   getDisequalClasses(ProgramStateRef State, SymbolRef Sym);
959   LLVM_NODISCARD inline ClassSet
960   getDisequalClasses(ProgramStateRef State) const;
961   LLVM_NODISCARD inline ClassSet
962   getDisequalClasses(DisequalityMapTy Map, ClassSet::Factory &Factory) const;
963 
964   LLVM_NODISCARD static inline Optional<bool> areEqual(ProgramStateRef State,
965                                                        EquivalenceClass First,
966                                                        EquivalenceClass Second);
967   LLVM_NODISCARD static inline Optional<bool>
968   areEqual(ProgramStateRef State, SymbolRef First, SymbolRef Second);
969 
970   /// Remove one member from the class.
971   LLVM_NODISCARD ProgramStateRef removeMember(ProgramStateRef State,
972                                               const SymbolRef Old);
973 
974   /// Iterate over all symbols and try to simplify them.
975   LLVM_NODISCARD static inline ProgramStateRef simplify(SValBuilder &SVB,
976                                                         RangeSet::Factory &F,
977                                                         ProgramStateRef State,
978                                                         EquivalenceClass Class);
979 
980   void dumpToStream(ProgramStateRef State, raw_ostream &os) const;
981   LLVM_DUMP_METHOD void dump(ProgramStateRef State) const {
982     dumpToStream(State, llvm::errs());
983   }
984 
985   /// Check equivalence data for consistency.
986   LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED static bool
987   isClassDataConsistent(ProgramStateRef State);
988 
989   LLVM_NODISCARD QualType getType() const {
990     return getRepresentativeSymbol()->getType();
991   }
992 
993   EquivalenceClass() = delete;
994   EquivalenceClass(const EquivalenceClass &) = default;
995   EquivalenceClass &operator=(const EquivalenceClass &) = delete;
996   EquivalenceClass(EquivalenceClass &&) = default;
997   EquivalenceClass &operator=(EquivalenceClass &&) = delete;
998 
999   bool operator==(const EquivalenceClass &Other) const {
1000     return ID == Other.ID;
1001   }
1002   bool operator<(const EquivalenceClass &Other) const { return ID < Other.ID; }
1003   bool operator!=(const EquivalenceClass &Other) const {
1004     return !operator==(Other);
1005   }
1006 
1007   static void Profile(llvm::FoldingSetNodeID &ID, uintptr_t CID) {
1008     ID.AddInteger(CID);
1009   }
1010 
1011   void Profile(llvm::FoldingSetNodeID &ID) const { Profile(ID, this->ID); }
1012 
1013 private:
1014   /* implicit */ EquivalenceClass(SymbolRef Sym)
1015       : ID(reinterpret_cast<uintptr_t>(Sym)) {}
1016 
1017   /// This function is intended to be used ONLY within the class.
1018   /// The fact that ID is a pointer to a symbol is an implementation detail
1019   /// and should stay that way.
1020   /// In the current implementation, we use it to retrieve the only member
1021   /// of the trivial class.
1022   SymbolRef getRepresentativeSymbol() const {
1023     return reinterpret_cast<SymbolRef>(ID);
1024   }
1025   static inline SymbolSet::Factory &getMembersFactory(ProgramStateRef State);
1026 
1027   inline ProgramStateRef mergeImpl(RangeSet::Factory &F, ProgramStateRef State,
1028                                    SymbolSet Members, EquivalenceClass Other,
1029                                    SymbolSet OtherMembers);
1030 
1031   static inline bool
1032   addToDisequalityInfo(DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
1033                        RangeSet::Factory &F, ProgramStateRef State,
1034                        EquivalenceClass First, EquivalenceClass Second);
1035 
1036   /// This is a unique identifier of the class.
1037   uintptr_t ID;
1038 };
1039 
1040 //===----------------------------------------------------------------------===//
1041 //                             Constraint functions
1042 //===----------------------------------------------------------------------===//
1043 
1044 LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED bool
1045 areFeasible(ConstraintRangeTy Constraints) {
1046   return llvm::none_of(
1047       Constraints,
1048       [](const std::pair<EquivalenceClass, RangeSet> &ClassConstraint) {
1049         return ClassConstraint.second.isEmpty();
1050       });
1051 }
1052 
1053 LLVM_NODISCARD inline const RangeSet *getConstraint(ProgramStateRef State,
1054                                                     EquivalenceClass Class) {
1055   return State->get<ConstraintRange>(Class);
1056 }
1057 
1058 LLVM_NODISCARD inline const RangeSet *getConstraint(ProgramStateRef State,
1059                                                     SymbolRef Sym) {
1060   return getConstraint(State, EquivalenceClass::find(State, Sym));
1061 }
1062 
1063 LLVM_NODISCARD ProgramStateRef setConstraint(ProgramStateRef State,
1064                                              EquivalenceClass Class,
1065                                              RangeSet Constraint) {
1066   return State->set<ConstraintRange>(Class, Constraint);
1067 }
1068 
1069 LLVM_NODISCARD ProgramStateRef setConstraints(ProgramStateRef State,
1070                                               ConstraintRangeTy Constraints) {
1071   return State->set<ConstraintRange>(Constraints);
1072 }
1073 
1074 //===----------------------------------------------------------------------===//
1075 //                       Equality/diseqiality abstraction
1076 //===----------------------------------------------------------------------===//
1077 
1078 /// A small helper function for detecting symbolic (dis)equality.
1079 ///
1080 /// Equality check can have different forms (like a == b or a - b) and this
1081 /// class encapsulates those away if the only thing the user wants to check -
1082 /// whether it's equality/diseqiality or not.
1083 ///
1084 /// \returns true if assuming this Sym to be true means equality of operands
1085 ///          false if it means disequality of operands
1086 ///          None otherwise
1087 Optional<bool> meansEquality(const SymSymExpr *Sym) {
1088   switch (Sym->getOpcode()) {
1089   case BO_Sub:
1090     // This case is: A - B != 0 -> disequality check.
1091     return false;
1092   case BO_EQ:
1093     // This case is: A == B != 0 -> equality check.
1094     return true;
1095   case BO_NE:
1096     // This case is: A != B != 0 -> diseqiality check.
1097     return false;
1098   default:
1099     return llvm::None;
1100   }
1101 }
1102 
1103 //===----------------------------------------------------------------------===//
1104 //                            Intersection functions
1105 //===----------------------------------------------------------------------===//
1106 
1107 template <class SecondTy, class... RestTy>
1108 LLVM_NODISCARD inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1109                                          SecondTy Second, RestTy... Tail);
1110 
1111 template <class... RangeTy> struct IntersectionTraits;
1112 
1113 template <class... TailTy> struct IntersectionTraits<RangeSet, TailTy...> {
1114   // Found RangeSet, no need to check any further
1115   using Type = RangeSet;
1116 };
1117 
1118 template <> struct IntersectionTraits<> {
1119   // We ran out of types, and we didn't find any RangeSet, so the result should
1120   // be optional.
1121   using Type = Optional<RangeSet>;
1122 };
1123 
1124 template <class OptionalOrPointer, class... TailTy>
1125 struct IntersectionTraits<OptionalOrPointer, TailTy...> {
1126   // If current type is Optional or a raw pointer, we should keep looking.
1127   using Type = typename IntersectionTraits<TailTy...>::Type;
1128 };
1129 
1130 template <class EndTy>
1131 LLVM_NODISCARD inline EndTy intersect(RangeSet::Factory &F, EndTy End) {
1132   // If the list contains only RangeSet or Optional<RangeSet>, simply return
1133   // that range set.
1134   return End;
1135 }
1136 
1137 LLVM_NODISCARD LLVM_ATTRIBUTE_UNUSED inline Optional<RangeSet>
1138 intersect(RangeSet::Factory &F, const RangeSet *End) {
1139   // This is an extraneous conversion from a raw pointer into Optional<RangeSet>
1140   if (End) {
1141     return *End;
1142   }
1143   return llvm::None;
1144 }
1145 
1146 template <class... RestTy>
1147 LLVM_NODISCARD inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1148                                          RangeSet Second, RestTy... Tail) {
1149   // Here we call either the <RangeSet,RangeSet,...> or <RangeSet,...> version
1150   // of the function and can be sure that the result is RangeSet.
1151   return intersect(F, F.intersect(Head, Second), Tail...);
1152 }
1153 
1154 template <class SecondTy, class... RestTy>
1155 LLVM_NODISCARD inline RangeSet intersect(RangeSet::Factory &F, RangeSet Head,
1156                                          SecondTy Second, RestTy... Tail) {
1157   if (Second) {
1158     // Here we call the <RangeSet,RangeSet,...> version of the function...
1159     return intersect(F, Head, *Second, Tail...);
1160   }
1161   // ...and here it is either <RangeSet,RangeSet,...> or <RangeSet,...>, which
1162   // means that the result is definitely RangeSet.
1163   return intersect(F, Head, Tail...);
1164 }
1165 
1166 /// Main generic intersect function.
1167 /// It intersects all of the given range sets.  If some of the given arguments
1168 /// don't hold a range set (nullptr or llvm::None), the function will skip them.
1169 ///
1170 /// Available representations for the arguments are:
1171 ///   * RangeSet
1172 ///   * Optional<RangeSet>
1173 ///   * RangeSet *
1174 /// Pointer to a RangeSet is automatically assumed to be nullable and will get
1175 /// checked as well as the optional version.  If this behaviour is undesired,
1176 /// please dereference the pointer in the call.
1177 ///
1178 /// Return type depends on the arguments' types.  If we can be sure in compile
1179 /// time that there will be a range set as a result, the returning type is
1180 /// simply RangeSet, in other cases we have to back off to Optional<RangeSet>.
1181 ///
1182 /// Please, prefer optional range sets to raw pointers.  If the last argument is
1183 /// a raw pointer and all previous arguments are None, it will cost one
1184 /// additional check to convert RangeSet * into Optional<RangeSet>.
1185 template <class HeadTy, class SecondTy, class... RestTy>
1186 LLVM_NODISCARD inline
1187     typename IntersectionTraits<HeadTy, SecondTy, RestTy...>::Type
1188     intersect(RangeSet::Factory &F, HeadTy Head, SecondTy Second,
1189               RestTy... Tail) {
1190   if (Head) {
1191     return intersect(F, *Head, Second, Tail...);
1192   }
1193   return intersect(F, Second, Tail...);
1194 }
1195 
1196 //===----------------------------------------------------------------------===//
1197 //                           Symbolic reasoning logic
1198 //===----------------------------------------------------------------------===//
1199 
1200 /// A little component aggregating all of the reasoning we have about
1201 /// the ranges of symbolic expressions.
1202 ///
1203 /// Even when we don't know the exact values of the operands, we still
1204 /// can get a pretty good estimate of the result's range.
1205 class SymbolicRangeInferrer
1206     : public SymExprVisitor<SymbolicRangeInferrer, RangeSet> {
1207 public:
1208   template <class SourceType>
1209   static RangeSet inferRange(RangeSet::Factory &F, ProgramStateRef State,
1210                              SourceType Origin) {
1211     SymbolicRangeInferrer Inferrer(F, State);
1212     return Inferrer.infer(Origin);
1213   }
1214 
1215   RangeSet VisitSymExpr(SymbolRef Sym) {
1216     if (Optional<RangeSet> RS = getRangeForNegatedSym(Sym))
1217       return *RS;
1218     // If we've reached this line, the actual type of the symbolic
1219     // expression is not supported for advanced inference.
1220     // In this case, we simply backoff to the default "let's simply
1221     // infer the range from the expression's type".
1222     return infer(Sym->getType());
1223   }
1224 
1225   RangeSet VisitUnarySymExpr(const UnarySymExpr *USE) {
1226     if (Optional<RangeSet> RS = getRangeForNegatedUnarySym(USE))
1227       return *RS;
1228     return infer(USE->getType());
1229   }
1230 
1231   RangeSet VisitSymIntExpr(const SymIntExpr *Sym) {
1232     return VisitBinaryOperator(Sym);
1233   }
1234 
1235   RangeSet VisitIntSymExpr(const IntSymExpr *Sym) {
1236     return VisitBinaryOperator(Sym);
1237   }
1238 
1239   RangeSet VisitSymSymExpr(const SymSymExpr *SSE) {
1240     return intersect(
1241         RangeFactory,
1242         // If Sym is a difference of symbols A - B, then maybe we have range
1243         // set stored for B - A.
1244         //
1245         // If we have range set stored for both A - B and B - A then
1246         // calculate the effective range set by intersecting the range set
1247         // for A - B and the negated range set of B - A.
1248         getRangeForNegatedSymSym(SSE),
1249         // If Sym is a comparison expression (except <=>),
1250         // find any other comparisons with the same operands.
1251         // See function description.
1252         getRangeForComparisonSymbol(SSE),
1253         // If Sym is (dis)equality, we might have some information
1254         // on that in our equality classes data structure.
1255         getRangeForEqualities(SSE),
1256         // And we should always check what we can get from the operands.
1257         VisitBinaryOperator(SSE));
1258   }
1259 
1260 private:
1261   SymbolicRangeInferrer(RangeSet::Factory &F, ProgramStateRef S)
1262       : ValueFactory(F.getValueFactory()), RangeFactory(F), State(S) {}
1263 
1264   /// Infer range information from the given integer constant.
1265   ///
1266   /// It's not a real "inference", but is here for operating with
1267   /// sub-expressions in a more polymorphic manner.
1268   RangeSet inferAs(const llvm::APSInt &Val, QualType) {
1269     return {RangeFactory, Val};
1270   }
1271 
1272   /// Infer range information from symbol in the context of the given type.
1273   RangeSet inferAs(SymbolRef Sym, QualType DestType) {
1274     QualType ActualType = Sym->getType();
1275     // Check that we can reason about the symbol at all.
1276     if (ActualType->isIntegralOrEnumerationType() ||
1277         Loc::isLocType(ActualType)) {
1278       return infer(Sym);
1279     }
1280     // Otherwise, let's simply infer from the destination type.
1281     // We couldn't figure out nothing else about that expression.
1282     return infer(DestType);
1283   }
1284 
1285   RangeSet infer(SymbolRef Sym) {
1286     return intersect(RangeFactory,
1287                      // Of course, we should take the constraint directly
1288                      // associated with this symbol into consideration.
1289                      getConstraint(State, Sym),
1290                      // Apart from the Sym itself, we can infer quite a lot if
1291                      // we look into subexpressions of Sym.
1292                      Visit(Sym));
1293   }
1294 
1295   RangeSet infer(EquivalenceClass Class) {
1296     if (const RangeSet *AssociatedConstraint = getConstraint(State, Class))
1297       return *AssociatedConstraint;
1298 
1299     return infer(Class.getType());
1300   }
1301 
1302   /// Infer range information solely from the type.
1303   RangeSet infer(QualType T) {
1304     // Lazily generate a new RangeSet representing all possible values for the
1305     // given symbol type.
1306     RangeSet Result(RangeFactory, ValueFactory.getMinValue(T),
1307                     ValueFactory.getMaxValue(T));
1308 
1309     // References are known to be non-zero.
1310     if (T->isReferenceType())
1311       return assumeNonZero(Result, T);
1312 
1313     return Result;
1314   }
1315 
1316   template <class BinarySymExprTy>
1317   RangeSet VisitBinaryOperator(const BinarySymExprTy *Sym) {
1318     // TODO #1: VisitBinaryOperator implementation might not make a good
1319     // use of the inferred ranges.  In this case, we might be calculating
1320     // everything for nothing.  This being said, we should introduce some
1321     // sort of laziness mechanism here.
1322     //
1323     // TODO #2: We didn't go into the nested expressions before, so it
1324     // might cause us spending much more time doing the inference.
1325     // This can be a problem for deeply nested expressions that are
1326     // involved in conditions and get tested continuously.  We definitely
1327     // need to address this issue and introduce some sort of caching
1328     // in here.
1329     QualType ResultType = Sym->getType();
1330     return VisitBinaryOperator(inferAs(Sym->getLHS(), ResultType),
1331                                Sym->getOpcode(),
1332                                inferAs(Sym->getRHS(), ResultType), ResultType);
1333   }
1334 
1335   RangeSet VisitBinaryOperator(RangeSet LHS, BinaryOperator::Opcode Op,
1336                                RangeSet RHS, QualType T) {
1337     switch (Op) {
1338     case BO_Or:
1339       return VisitBinaryOperator<BO_Or>(LHS, RHS, T);
1340     case BO_And:
1341       return VisitBinaryOperator<BO_And>(LHS, RHS, T);
1342     case BO_Rem:
1343       return VisitBinaryOperator<BO_Rem>(LHS, RHS, T);
1344     default:
1345       return infer(T);
1346     }
1347   }
1348 
1349   //===----------------------------------------------------------------------===//
1350   //                         Ranges and operators
1351   //===----------------------------------------------------------------------===//
1352 
1353   /// Return a rough approximation of the given range set.
1354   ///
1355   /// For the range set:
1356   ///   { [x_0, y_0], [x_1, y_1], ... , [x_N, y_N] }
1357   /// it will return the range [x_0, y_N].
1358   static Range fillGaps(RangeSet Origin) {
1359     assert(!Origin.isEmpty());
1360     return {Origin.getMinValue(), Origin.getMaxValue()};
1361   }
1362 
1363   /// Try to convert given range into the given type.
1364   ///
1365   /// It will return llvm::None only when the trivial conversion is possible.
1366   llvm::Optional<Range> convert(const Range &Origin, APSIntType To) {
1367     if (To.testInRange(Origin.From(), false) != APSIntType::RTR_Within ||
1368         To.testInRange(Origin.To(), false) != APSIntType::RTR_Within) {
1369       return llvm::None;
1370     }
1371     return Range(ValueFactory.Convert(To, Origin.From()),
1372                  ValueFactory.Convert(To, Origin.To()));
1373   }
1374 
1375   template <BinaryOperator::Opcode Op>
1376   RangeSet VisitBinaryOperator(RangeSet LHS, RangeSet RHS, QualType T) {
1377     // We should propagate information about unfeasbility of one of the
1378     // operands to the resulting range.
1379     if (LHS.isEmpty() || RHS.isEmpty()) {
1380       return RangeFactory.getEmptySet();
1381     }
1382 
1383     Range CoarseLHS = fillGaps(LHS);
1384     Range CoarseRHS = fillGaps(RHS);
1385 
1386     APSIntType ResultType = ValueFactory.getAPSIntType(T);
1387 
1388     // We need to convert ranges to the resulting type, so we can compare values
1389     // and combine them in a meaningful (in terms of the given operation) way.
1390     auto ConvertedCoarseLHS = convert(CoarseLHS, ResultType);
1391     auto ConvertedCoarseRHS = convert(CoarseRHS, ResultType);
1392 
1393     // It is hard to reason about ranges when conversion changes
1394     // borders of the ranges.
1395     if (!ConvertedCoarseLHS || !ConvertedCoarseRHS) {
1396       return infer(T);
1397     }
1398 
1399     return VisitBinaryOperator<Op>(*ConvertedCoarseLHS, *ConvertedCoarseRHS, T);
1400   }
1401 
1402   template <BinaryOperator::Opcode Op>
1403   RangeSet VisitBinaryOperator(Range LHS, Range RHS, QualType T) {
1404     return infer(T);
1405   }
1406 
1407   /// Return a symmetrical range for the given range and type.
1408   ///
1409   /// If T is signed, return the smallest range [-x..x] that covers the original
1410   /// range, or [-min(T), max(T)] if the aforementioned symmetric range doesn't
1411   /// exist due to original range covering min(T)).
1412   ///
1413   /// If T is unsigned, return the smallest range [0..x] that covers the
1414   /// original range.
1415   Range getSymmetricalRange(Range Origin, QualType T) {
1416     APSIntType RangeType = ValueFactory.getAPSIntType(T);
1417 
1418     if (RangeType.isUnsigned()) {
1419       return Range(ValueFactory.getMinValue(RangeType), Origin.To());
1420     }
1421 
1422     if (Origin.From().isMinSignedValue()) {
1423       // If mini is a minimal signed value, absolute value of it is greater
1424       // than the maximal signed value.  In order to avoid these
1425       // complications, we simply return the whole range.
1426       return {ValueFactory.getMinValue(RangeType),
1427               ValueFactory.getMaxValue(RangeType)};
1428     }
1429 
1430     // At this point, we are sure that the type is signed and we can safely
1431     // use unary - operator.
1432     //
1433     // While calculating absolute maximum, we can use the following formula
1434     // because of these reasons:
1435     //   * If From >= 0 then To >= From and To >= -From.
1436     //     AbsMax == To == max(To, -From)
1437     //   * If To <= 0 then -From >= -To and -From >= From.
1438     //     AbsMax == -From == max(-From, To)
1439     //   * Otherwise, From <= 0, To >= 0, and
1440     //     AbsMax == max(abs(From), abs(To))
1441     llvm::APSInt AbsMax = std::max(-Origin.From(), Origin.To());
1442 
1443     // Intersection is guaranteed to be non-empty.
1444     return {ValueFactory.getValue(-AbsMax), ValueFactory.getValue(AbsMax)};
1445   }
1446 
1447   /// Return a range set subtracting zero from \p Domain.
1448   RangeSet assumeNonZero(RangeSet Domain, QualType T) {
1449     APSIntType IntType = ValueFactory.getAPSIntType(T);
1450     return RangeFactory.deletePoint(Domain, IntType.getZeroValue());
1451   }
1452 
1453   template <typename ProduceNegatedSymFunc>
1454   Optional<RangeSet> getRangeForNegatedExpr(ProduceNegatedSymFunc F,
1455                                             QualType T) {
1456     // Do not negate if the type cannot be meaningfully negated.
1457     if (!T->isUnsignedIntegerOrEnumerationType() &&
1458         !T->isSignedIntegerOrEnumerationType())
1459       return llvm::None;
1460 
1461     if (SymbolRef NegatedSym = F())
1462       if (const RangeSet *NegatedRange = getConstraint(State, NegatedSym))
1463         return RangeFactory.negate(*NegatedRange);
1464 
1465     return llvm::None;
1466   }
1467 
1468   Optional<RangeSet> getRangeForNegatedUnarySym(const UnarySymExpr *USE) {
1469     // Just get the operand when we negate a symbol that is already negated.
1470     // -(-a) == a
1471     return getRangeForNegatedExpr(
1472         [USE]() -> SymbolRef {
1473           if (USE->getOpcode() == UO_Minus)
1474             return USE->getOperand();
1475           return nullptr;
1476         },
1477         USE->getType());
1478   }
1479 
1480   Optional<RangeSet> getRangeForNegatedSymSym(const SymSymExpr *SSE) {
1481     return getRangeForNegatedExpr(
1482         [SSE, State = this->State]() -> SymbolRef {
1483           if (SSE->getOpcode() == BO_Sub)
1484             return State->getSymbolManager().getSymSymExpr(
1485                 SSE->getRHS(), BO_Sub, SSE->getLHS(), SSE->getType());
1486           return nullptr;
1487         },
1488         SSE->getType());
1489   }
1490 
1491   Optional<RangeSet> getRangeForNegatedSym(SymbolRef Sym) {
1492     return getRangeForNegatedExpr(
1493         [Sym, State = this->State]() {
1494           return State->getSymbolManager().getUnarySymExpr(Sym, UO_Minus,
1495                                                            Sym->getType());
1496         },
1497         Sym->getType());
1498   }
1499 
1500   // Returns ranges only for binary comparison operators (except <=>)
1501   // when left and right operands are symbolic values.
1502   // Finds any other comparisons with the same operands.
1503   // Then do logical calculations and refuse impossible branches.
1504   // E.g. (x < y) and (x > y) at the same time are impossible.
1505   // E.g. (x >= y) and (x != y) at the same time makes (x > y) true only.
1506   // E.g. (x == y) and (y == x) are just reversed but the same.
1507   // It covers all possible combinations (see CmpOpTable description).
1508   // Note that `x` and `y` can also stand for subexpressions,
1509   // not only for actual symbols.
1510   Optional<RangeSet> getRangeForComparisonSymbol(const SymSymExpr *SSE) {
1511     const BinaryOperatorKind CurrentOP = SSE->getOpcode();
1512 
1513     // We currently do not support <=> (C++20).
1514     if (!BinaryOperator::isComparisonOp(CurrentOP) || (CurrentOP == BO_Cmp))
1515       return llvm::None;
1516 
1517     static const OperatorRelationsTable CmpOpTable{};
1518 
1519     const SymExpr *LHS = SSE->getLHS();
1520     const SymExpr *RHS = SSE->getRHS();
1521     QualType T = SSE->getType();
1522 
1523     SymbolManager &SymMgr = State->getSymbolManager();
1524 
1525     // We use this variable to store the last queried operator (`QueriedOP`)
1526     // for which the `getCmpOpState` returned with `Unknown`. If there are two
1527     // different OPs that returned `Unknown` then we have to query the special
1528     // `UnknownX2` column. We assume that `getCmpOpState(CurrentOP, CurrentOP)`
1529     // never returns `Unknown`, so `CurrentOP` is a good initial value.
1530     BinaryOperatorKind LastQueriedOpToUnknown = CurrentOP;
1531 
1532     // Loop goes through all of the columns exept the last one ('UnknownX2').
1533     // We treat `UnknownX2` column separately at the end of the loop body.
1534     for (size_t i = 0; i < CmpOpTable.getCmpOpCount(); ++i) {
1535 
1536       // Let's find an expression e.g. (x < y).
1537       BinaryOperatorKind QueriedOP = OperatorRelationsTable::getOpFromIndex(i);
1538       const SymSymExpr *SymSym = SymMgr.getSymSymExpr(LHS, QueriedOP, RHS, T);
1539       const RangeSet *QueriedRangeSet = getConstraint(State, SymSym);
1540 
1541       // If ranges were not previously found,
1542       // try to find a reversed expression (y > x).
1543       if (!QueriedRangeSet) {
1544         const BinaryOperatorKind ROP =
1545             BinaryOperator::reverseComparisonOp(QueriedOP);
1546         SymSym = SymMgr.getSymSymExpr(RHS, ROP, LHS, T);
1547         QueriedRangeSet = getConstraint(State, SymSym);
1548       }
1549 
1550       if (!QueriedRangeSet || QueriedRangeSet->isEmpty())
1551         continue;
1552 
1553       const llvm::APSInt *ConcreteValue = QueriedRangeSet->getConcreteValue();
1554       const bool isInFalseBranch =
1555           ConcreteValue ? (*ConcreteValue == 0) : false;
1556 
1557       // If it is a false branch, we shall be guided by opposite operator,
1558       // because the table is made assuming we are in the true branch.
1559       // E.g. when (x <= y) is false, then (x > y) is true.
1560       if (isInFalseBranch)
1561         QueriedOP = BinaryOperator::negateComparisonOp(QueriedOP);
1562 
1563       OperatorRelationsTable::TriStateKind BranchState =
1564           CmpOpTable.getCmpOpState(CurrentOP, QueriedOP);
1565 
1566       if (BranchState == OperatorRelationsTable::Unknown) {
1567         if (LastQueriedOpToUnknown != CurrentOP &&
1568             LastQueriedOpToUnknown != QueriedOP) {
1569           // If we got the Unknown state for both different operators.
1570           // if (x <= y)    // assume true
1571           //   if (x != y)  // assume true
1572           //     if (x < y) // would be also true
1573           // Get a state from `UnknownX2` column.
1574           BranchState = CmpOpTable.getCmpOpStateForUnknownX2(CurrentOP);
1575         } else {
1576           LastQueriedOpToUnknown = QueriedOP;
1577           continue;
1578         }
1579       }
1580 
1581       return (BranchState == OperatorRelationsTable::True) ? getTrueRange(T)
1582                                                            : getFalseRange(T);
1583     }
1584 
1585     return llvm::None;
1586   }
1587 
1588   Optional<RangeSet> getRangeForEqualities(const SymSymExpr *Sym) {
1589     Optional<bool> Equality = meansEquality(Sym);
1590 
1591     if (!Equality)
1592       return llvm::None;
1593 
1594     if (Optional<bool> AreEqual =
1595             EquivalenceClass::areEqual(State, Sym->getLHS(), Sym->getRHS())) {
1596       // Here we cover two cases at once:
1597       //   * if Sym is equality and its operands are known to be equal -> true
1598       //   * if Sym is disequality and its operands are disequal -> true
1599       if (*AreEqual == *Equality) {
1600         return getTrueRange(Sym->getType());
1601       }
1602       // Opposite combinations result in false.
1603       return getFalseRange(Sym->getType());
1604     }
1605 
1606     return llvm::None;
1607   }
1608 
1609   RangeSet getTrueRange(QualType T) {
1610     RangeSet TypeRange = infer(T);
1611     return assumeNonZero(TypeRange, T);
1612   }
1613 
1614   RangeSet getFalseRange(QualType T) {
1615     const llvm::APSInt &Zero = ValueFactory.getValue(0, T);
1616     return RangeSet(RangeFactory, Zero);
1617   }
1618 
1619   BasicValueFactory &ValueFactory;
1620   RangeSet::Factory &RangeFactory;
1621   ProgramStateRef State;
1622 };
1623 
1624 //===----------------------------------------------------------------------===//
1625 //               Range-based reasoning about symbolic operations
1626 //===----------------------------------------------------------------------===//
1627 
1628 template <>
1629 RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Or>(Range LHS, Range RHS,
1630                                                            QualType T) {
1631   APSIntType ResultType = ValueFactory.getAPSIntType(T);
1632   llvm::APSInt Zero = ResultType.getZeroValue();
1633 
1634   bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1635   bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1636 
1637   bool IsLHSNegative = LHS.To() < Zero;
1638   bool IsRHSNegative = RHS.To() < Zero;
1639 
1640   // Check if both ranges have the same sign.
1641   if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
1642       (IsLHSNegative && IsRHSNegative)) {
1643     // The result is definitely greater or equal than any of the operands.
1644     const llvm::APSInt &Min = std::max(LHS.From(), RHS.From());
1645 
1646     // We estimate maximal value for positives as the maximal value for the
1647     // given type.  For negatives, we estimate it with -1 (e.g. 0x11111111).
1648     //
1649     // TODO: We basically, limit the resulting range from below, but don't do
1650     //       anything with the upper bound.
1651     //
1652     //       For positive operands, it can be done as follows: for the upper
1653     //       bound of LHS and RHS we calculate the most significant bit set.
1654     //       Let's call it the N-th bit.  Then we can estimate the maximal
1655     //       number to be 2^(N+1)-1, i.e. the number with all the bits up to
1656     //       the N-th bit set.
1657     const llvm::APSInt &Max = IsLHSNegative
1658                                   ? ValueFactory.getValue(--Zero)
1659                                   : ValueFactory.getMaxValue(ResultType);
1660 
1661     return {RangeFactory, ValueFactory.getValue(Min), Max};
1662   }
1663 
1664   // Otherwise, let's check if at least one of the operands is negative.
1665   if (IsLHSNegative || IsRHSNegative) {
1666     // This means that the result is definitely negative as well.
1667     return {RangeFactory, ValueFactory.getMinValue(ResultType),
1668             ValueFactory.getValue(--Zero)};
1669   }
1670 
1671   RangeSet DefaultRange = infer(T);
1672 
1673   // It is pretty hard to reason about operands with different signs
1674   // (and especially with possibly different signs).  We simply check if it
1675   // can be zero.  In order to conclude that the result could not be zero,
1676   // at least one of the operands should be definitely not zero itself.
1677   if (!LHS.Includes(Zero) || !RHS.Includes(Zero)) {
1678     return assumeNonZero(DefaultRange, T);
1679   }
1680 
1681   // Nothing much else to do here.
1682   return DefaultRange;
1683 }
1684 
1685 template <>
1686 RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_And>(Range LHS,
1687                                                             Range RHS,
1688                                                             QualType T) {
1689   APSIntType ResultType = ValueFactory.getAPSIntType(T);
1690   llvm::APSInt Zero = ResultType.getZeroValue();
1691 
1692   bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1693   bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1694 
1695   bool IsLHSNegative = LHS.To() < Zero;
1696   bool IsRHSNegative = RHS.To() < Zero;
1697 
1698   // Check if both ranges have the same sign.
1699   if ((IsLHSPositiveOrZero && IsRHSPositiveOrZero) ||
1700       (IsLHSNegative && IsRHSNegative)) {
1701     // The result is definitely less or equal than any of the operands.
1702     const llvm::APSInt &Max = std::min(LHS.To(), RHS.To());
1703 
1704     // We conservatively estimate lower bound to be the smallest positive
1705     // or negative value corresponding to the sign of the operands.
1706     const llvm::APSInt &Min = IsLHSNegative
1707                                   ? ValueFactory.getMinValue(ResultType)
1708                                   : ValueFactory.getValue(Zero);
1709 
1710     return {RangeFactory, Min, Max};
1711   }
1712 
1713   // Otherwise, let's check if at least one of the operands is positive.
1714   if (IsLHSPositiveOrZero || IsRHSPositiveOrZero) {
1715     // This makes result definitely positive.
1716     //
1717     // We can also reason about a maximal value by finding the maximal
1718     // value of the positive operand.
1719     const llvm::APSInt &Max = IsLHSPositiveOrZero ? LHS.To() : RHS.To();
1720 
1721     // The minimal value on the other hand is much harder to reason about.
1722     // The only thing we know for sure is that the result is positive.
1723     return {RangeFactory, ValueFactory.getValue(Zero),
1724             ValueFactory.getValue(Max)};
1725   }
1726 
1727   // Nothing much else to do here.
1728   return infer(T);
1729 }
1730 
1731 template <>
1732 RangeSet SymbolicRangeInferrer::VisitBinaryOperator<BO_Rem>(Range LHS,
1733                                                             Range RHS,
1734                                                             QualType T) {
1735   llvm::APSInt Zero = ValueFactory.getAPSIntType(T).getZeroValue();
1736 
1737   Range ConservativeRange = getSymmetricalRange(RHS, T);
1738 
1739   llvm::APSInt Max = ConservativeRange.To();
1740   llvm::APSInt Min = ConservativeRange.From();
1741 
1742   if (Max == Zero) {
1743     // It's an undefined behaviour to divide by 0 and it seems like we know
1744     // for sure that RHS is 0.  Let's say that the resulting range is
1745     // simply infeasible for that matter.
1746     return RangeFactory.getEmptySet();
1747   }
1748 
1749   // At this point, our conservative range is closed.  The result, however,
1750   // couldn't be greater than the RHS' maximal absolute value.  Because of
1751   // this reason, we turn the range into open (or half-open in case of
1752   // unsigned integers).
1753   //
1754   // While we operate on integer values, an open interval (a, b) can be easily
1755   // represented by the closed interval [a + 1, b - 1].  And this is exactly
1756   // what we do next.
1757   //
1758   // If we are dealing with unsigned case, we shouldn't move the lower bound.
1759   if (Min.isSigned()) {
1760     ++Min;
1761   }
1762   --Max;
1763 
1764   bool IsLHSPositiveOrZero = LHS.From() >= Zero;
1765   bool IsRHSPositiveOrZero = RHS.From() >= Zero;
1766 
1767   // Remainder operator results with negative operands is implementation
1768   // defined.  Positive cases are much easier to reason about though.
1769   if (IsLHSPositiveOrZero && IsRHSPositiveOrZero) {
1770     // If maximal value of LHS is less than maximal value of RHS,
1771     // the result won't get greater than LHS.To().
1772     Max = std::min(LHS.To(), Max);
1773     // We want to check if it is a situation similar to the following:
1774     //
1775     // <------------|---[  LHS  ]--------[  RHS  ]----->
1776     //  -INF        0                              +INF
1777     //
1778     // In this situation, we can conclude that (LHS / RHS) == 0 and
1779     // (LHS % RHS) == LHS.
1780     Min = LHS.To() < RHS.From() ? LHS.From() : Zero;
1781   }
1782 
1783   // Nevertheless, the symmetrical range for RHS is a conservative estimate
1784   // for any sign of either LHS, or RHS.
1785   return {RangeFactory, ValueFactory.getValue(Min), ValueFactory.getValue(Max)};
1786 }
1787 
1788 //===----------------------------------------------------------------------===//
1789 //                  Constraint manager implementation details
1790 //===----------------------------------------------------------------------===//
1791 
1792 class RangeConstraintManager : public RangedConstraintManager {
1793 public:
1794   RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB)
1795       : RangedConstraintManager(EE, SVB), F(getBasicVals()) {}
1796 
1797   //===------------------------------------------------------------------===//
1798   // Implementation for interface from ConstraintManager.
1799   //===------------------------------------------------------------------===//
1800 
1801   bool haveEqualConstraints(ProgramStateRef S1,
1802                             ProgramStateRef S2) const override {
1803     // NOTE: ClassMembers are as simple as back pointers for ClassMap,
1804     //       so comparing constraint ranges and class maps should be
1805     //       sufficient.
1806     return S1->get<ConstraintRange>() == S2->get<ConstraintRange>() &&
1807            S1->get<ClassMap>() == S2->get<ClassMap>();
1808   }
1809 
1810   bool canReasonAbout(SVal X) const override;
1811 
1812   ConditionTruthVal checkNull(ProgramStateRef State, SymbolRef Sym) override;
1813 
1814   const llvm::APSInt *getSymVal(ProgramStateRef State,
1815                                 SymbolRef Sym) const override;
1816 
1817   ProgramStateRef removeDeadBindings(ProgramStateRef State,
1818                                      SymbolReaper &SymReaper) override;
1819 
1820   void printJson(raw_ostream &Out, ProgramStateRef State, const char *NL = "\n",
1821                  unsigned int Space = 0, bool IsDot = false) const override;
1822   void printValue(raw_ostream &Out, ProgramStateRef State,
1823                   SymbolRef Sym) override;
1824   void printConstraints(raw_ostream &Out, ProgramStateRef State,
1825                         const char *NL = "\n", unsigned int Space = 0,
1826                         bool IsDot = false) const;
1827   void printEquivalenceClasses(raw_ostream &Out, ProgramStateRef State,
1828                                const char *NL = "\n", unsigned int Space = 0,
1829                                bool IsDot = false) const;
1830   void printDisequalities(raw_ostream &Out, ProgramStateRef State,
1831                           const char *NL = "\n", unsigned int Space = 0,
1832                           bool IsDot = false) const;
1833 
1834   //===------------------------------------------------------------------===//
1835   // Implementation for interface from RangedConstraintManager.
1836   //===------------------------------------------------------------------===//
1837 
1838   ProgramStateRef assumeSymNE(ProgramStateRef State, SymbolRef Sym,
1839                               const llvm::APSInt &V,
1840                               const llvm::APSInt &Adjustment) override;
1841 
1842   ProgramStateRef assumeSymEQ(ProgramStateRef State, SymbolRef Sym,
1843                               const llvm::APSInt &V,
1844                               const llvm::APSInt &Adjustment) override;
1845 
1846   ProgramStateRef assumeSymLT(ProgramStateRef State, SymbolRef Sym,
1847                               const llvm::APSInt &V,
1848                               const llvm::APSInt &Adjustment) override;
1849 
1850   ProgramStateRef assumeSymGT(ProgramStateRef State, SymbolRef Sym,
1851                               const llvm::APSInt &V,
1852                               const llvm::APSInt &Adjustment) override;
1853 
1854   ProgramStateRef assumeSymLE(ProgramStateRef State, SymbolRef Sym,
1855                               const llvm::APSInt &V,
1856                               const llvm::APSInt &Adjustment) override;
1857 
1858   ProgramStateRef assumeSymGE(ProgramStateRef State, SymbolRef Sym,
1859                               const llvm::APSInt &V,
1860                               const llvm::APSInt &Adjustment) override;
1861 
1862   ProgramStateRef assumeSymWithinInclusiveRange(
1863       ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
1864       const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
1865 
1866   ProgramStateRef assumeSymOutsideInclusiveRange(
1867       ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
1868       const llvm::APSInt &To, const llvm::APSInt &Adjustment) override;
1869 
1870 private:
1871   RangeSet::Factory F;
1872 
1873   RangeSet getRange(ProgramStateRef State, SymbolRef Sym);
1874   RangeSet getRange(ProgramStateRef State, EquivalenceClass Class);
1875   ProgramStateRef setRange(ProgramStateRef State, SymbolRef Sym,
1876                            RangeSet Range);
1877   ProgramStateRef setRange(ProgramStateRef State, EquivalenceClass Class,
1878                            RangeSet Range);
1879 
1880   RangeSet getSymLTRange(ProgramStateRef St, SymbolRef Sym,
1881                          const llvm::APSInt &Int,
1882                          const llvm::APSInt &Adjustment);
1883   RangeSet getSymGTRange(ProgramStateRef St, SymbolRef Sym,
1884                          const llvm::APSInt &Int,
1885                          const llvm::APSInt &Adjustment);
1886   RangeSet getSymLERange(ProgramStateRef St, SymbolRef Sym,
1887                          const llvm::APSInt &Int,
1888                          const llvm::APSInt &Adjustment);
1889   RangeSet getSymLERange(llvm::function_ref<RangeSet()> RS,
1890                          const llvm::APSInt &Int,
1891                          const llvm::APSInt &Adjustment);
1892   RangeSet getSymGERange(ProgramStateRef St, SymbolRef Sym,
1893                          const llvm::APSInt &Int,
1894                          const llvm::APSInt &Adjustment);
1895 };
1896 
1897 //===----------------------------------------------------------------------===//
1898 //                         Constraint assignment logic
1899 //===----------------------------------------------------------------------===//
1900 
1901 /// ConstraintAssignorBase is a small utility class that unifies visitor
1902 /// for ranges with a visitor for constraints (rangeset/range/constant).
1903 ///
1904 /// It is designed to have one derived class, but generally it can have more.
1905 /// Derived class can control which types we handle by defining methods of the
1906 /// following form:
1907 ///
1908 ///   bool handle${SYMBOL}To${CONSTRAINT}(const SYMBOL *Sym,
1909 ///                                       CONSTRAINT Constraint);
1910 ///
1911 /// where SYMBOL is the type of the symbol (e.g. SymSymExpr, SymbolCast, etc.)
1912 ///       CONSTRAINT is the type of constraint (RangeSet/Range/Const)
1913 ///       return value signifies whether we should try other handle methods
1914 ///          (i.e. false would mean to stop right after calling this method)
1915 template <class Derived> class ConstraintAssignorBase {
1916 public:
1917   using Const = const llvm::APSInt &;
1918 
1919 #define DISPATCH(CLASS) return assign##CLASS##Impl(cast<CLASS>(Sym), Constraint)
1920 
1921 #define ASSIGN(CLASS, TO, SYM, CONSTRAINT)                                     \
1922   if (!static_cast<Derived *>(this)->assign##CLASS##To##TO(SYM, CONSTRAINT))   \
1923   return false
1924 
1925   void assign(SymbolRef Sym, RangeSet Constraint) {
1926     assignImpl(Sym, Constraint);
1927   }
1928 
1929   bool assignImpl(SymbolRef Sym, RangeSet Constraint) {
1930     switch (Sym->getKind()) {
1931 #define SYMBOL(Id, Parent)                                                     \
1932   case SymExpr::Id##Kind:                                                      \
1933     DISPATCH(Id);
1934 #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
1935     }
1936     llvm_unreachable("Unknown SymExpr kind!");
1937   }
1938 
1939 #define DEFAULT_ASSIGN(Id)                                                     \
1940   bool assign##Id##To##RangeSet(const Id *Sym, RangeSet Constraint) {          \
1941     return true;                                                               \
1942   }                                                                            \
1943   bool assign##Id##To##Range(const Id *Sym, Range Constraint) { return true; } \
1944   bool assign##Id##To##Const(const Id *Sym, Const Constraint) { return true; }
1945 
1946   // When we dispatch for constraint types, we first try to check
1947   // if the new constraint is the constant and try the corresponding
1948   // assignor methods.  If it didn't interrupt, we can proceed to the
1949   // range, and finally to the range set.
1950 #define CONSTRAINT_DISPATCH(Id)                                                \
1951   if (const llvm::APSInt *Const = Constraint.getConcreteValue()) {             \
1952     ASSIGN(Id, Const, Sym, *Const);                                            \
1953   }                                                                            \
1954   if (Constraint.size() == 1) {                                                \
1955     ASSIGN(Id, Range, Sym, *Constraint.begin());                               \
1956   }                                                                            \
1957   ASSIGN(Id, RangeSet, Sym, Constraint)
1958 
1959   // Our internal assign method first tries to call assignor methods for all
1960   // constraint types that apply.  And if not interrupted, continues with its
1961   // parent class.
1962 #define SYMBOL(Id, Parent)                                                     \
1963   bool assign##Id##Impl(const Id *Sym, RangeSet Constraint) {                  \
1964     CONSTRAINT_DISPATCH(Id);                                                   \
1965     DISPATCH(Parent);                                                          \
1966   }                                                                            \
1967   DEFAULT_ASSIGN(Id)
1968 #define ABSTRACT_SYMBOL(Id, Parent) SYMBOL(Id, Parent)
1969 #include "clang/StaticAnalyzer/Core/PathSensitive/Symbols.def"
1970 
1971   // Default implementations for the top class that doesn't have parents.
1972   bool assignSymExprImpl(const SymExpr *Sym, RangeSet Constraint) {
1973     CONSTRAINT_DISPATCH(SymExpr);
1974     return true;
1975   }
1976   DEFAULT_ASSIGN(SymExpr);
1977 
1978 #undef DISPATCH
1979 #undef CONSTRAINT_DISPATCH
1980 #undef DEFAULT_ASSIGN
1981 #undef ASSIGN
1982 };
1983 
1984 /// A little component aggregating all of the reasoning we have about
1985 /// assigning new constraints to symbols.
1986 ///
1987 /// The main purpose of this class is to associate constraints to symbols,
1988 /// and impose additional constraints on other symbols, when we can imply
1989 /// them.
1990 ///
1991 /// It has a nice symmetry with SymbolicRangeInferrer.  When the latter
1992 /// can provide more precise ranges by looking into the operands of the
1993 /// expression in question, ConstraintAssignor looks into the operands
1994 /// to see if we can imply more from the new constraint.
1995 class ConstraintAssignor : public ConstraintAssignorBase<ConstraintAssignor> {
1996 public:
1997   template <class ClassOrSymbol>
1998   LLVM_NODISCARD static ProgramStateRef
1999   assign(ProgramStateRef State, SValBuilder &Builder, RangeSet::Factory &F,
2000          ClassOrSymbol CoS, RangeSet NewConstraint) {
2001     if (!State || NewConstraint.isEmpty())
2002       return nullptr;
2003 
2004     ConstraintAssignor Assignor{State, Builder, F};
2005     return Assignor.assign(CoS, NewConstraint);
2006   }
2007 
2008   /// Handle expressions like: a % b != 0.
2009   template <typename SymT>
2010   bool handleRemainderOp(const SymT *Sym, RangeSet Constraint) {
2011     if (Sym->getOpcode() != BO_Rem)
2012       return true;
2013     // a % b != 0 implies that a != 0.
2014     if (!Constraint.containsZero()) {
2015       SVal SymSVal = Builder.makeSymbolVal(Sym->getLHS());
2016       if (auto NonLocSymSVal = SymSVal.getAs<nonloc::SymbolVal>()) {
2017         State = State->assume(*NonLocSymSVal, true);
2018         if (!State)
2019           return false;
2020       }
2021     }
2022     return true;
2023   }
2024 
2025   inline bool assignSymExprToConst(const SymExpr *Sym, Const Constraint);
2026   inline bool assignSymIntExprToRangeSet(const SymIntExpr *Sym,
2027                                          RangeSet Constraint) {
2028     return handleRemainderOp(Sym, Constraint);
2029   }
2030   inline bool assignSymSymExprToRangeSet(const SymSymExpr *Sym,
2031                                          RangeSet Constraint);
2032 
2033 private:
2034   ConstraintAssignor(ProgramStateRef State, SValBuilder &Builder,
2035                      RangeSet::Factory &F)
2036       : State(State), Builder(Builder), RangeFactory(F) {}
2037   using Base = ConstraintAssignorBase<ConstraintAssignor>;
2038 
2039   /// Base method for handling new constraints for symbols.
2040   LLVM_NODISCARD ProgramStateRef assign(SymbolRef Sym, RangeSet NewConstraint) {
2041     // All constraints are actually associated with equivalence classes, and
2042     // that's what we are going to do first.
2043     State = assign(EquivalenceClass::find(State, Sym), NewConstraint);
2044     if (!State)
2045       return nullptr;
2046 
2047     // And after that we can check what other things we can get from this
2048     // constraint.
2049     Base::assign(Sym, NewConstraint);
2050     return State;
2051   }
2052 
2053   /// Base method for handling new constraints for classes.
2054   LLVM_NODISCARD ProgramStateRef assign(EquivalenceClass Class,
2055                                         RangeSet NewConstraint) {
2056     // There is a chance that we might need to update constraints for the
2057     // classes that are known to be disequal to Class.
2058     //
2059     // In order for this to be even possible, the new constraint should
2060     // be simply a constant because we can't reason about range disequalities.
2061     if (const llvm::APSInt *Point = NewConstraint.getConcreteValue()) {
2062 
2063       ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2064       ConstraintRangeTy::Factory &CF = State->get_context<ConstraintRange>();
2065 
2066       // Add new constraint.
2067       Constraints = CF.add(Constraints, Class, NewConstraint);
2068 
2069       for (EquivalenceClass DisequalClass : Class.getDisequalClasses(State)) {
2070         RangeSet UpdatedConstraint = SymbolicRangeInferrer::inferRange(
2071             RangeFactory, State, DisequalClass);
2072 
2073         UpdatedConstraint = RangeFactory.deletePoint(UpdatedConstraint, *Point);
2074 
2075         // If we end up with at least one of the disequal classes to be
2076         // constrained with an empty range-set, the state is infeasible.
2077         if (UpdatedConstraint.isEmpty())
2078           return nullptr;
2079 
2080         Constraints = CF.add(Constraints, DisequalClass, UpdatedConstraint);
2081       }
2082       assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2083                                          "a state with infeasible constraints");
2084 
2085       return setConstraints(State, Constraints);
2086     }
2087 
2088     return setConstraint(State, Class, NewConstraint);
2089   }
2090 
2091   ProgramStateRef trackDisequality(ProgramStateRef State, SymbolRef LHS,
2092                                    SymbolRef RHS) {
2093     return EquivalenceClass::markDisequal(RangeFactory, State, LHS, RHS);
2094   }
2095 
2096   ProgramStateRef trackEquality(ProgramStateRef State, SymbolRef LHS,
2097                                 SymbolRef RHS) {
2098     return EquivalenceClass::merge(RangeFactory, State, LHS, RHS);
2099   }
2100 
2101   LLVM_NODISCARD Optional<bool> interpreteAsBool(RangeSet Constraint) {
2102     assert(!Constraint.isEmpty() && "Empty ranges shouldn't get here");
2103 
2104     if (Constraint.getConcreteValue())
2105       return !Constraint.getConcreteValue()->isZero();
2106 
2107     if (!Constraint.containsZero())
2108       return true;
2109 
2110     return llvm::None;
2111   }
2112 
2113   ProgramStateRef State;
2114   SValBuilder &Builder;
2115   RangeSet::Factory &RangeFactory;
2116 };
2117 
2118 
2119 bool ConstraintAssignor::assignSymExprToConst(const SymExpr *Sym,
2120                                               const llvm::APSInt &Constraint) {
2121   llvm::SmallSet<EquivalenceClass, 4> SimplifiedClasses;
2122   // Iterate over all equivalence classes and try to simplify them.
2123   ClassMembersTy Members = State->get<ClassMembers>();
2124   for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members) {
2125     EquivalenceClass Class = ClassToSymbolSet.first;
2126     State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2127     if (!State)
2128       return false;
2129     SimplifiedClasses.insert(Class);
2130   }
2131 
2132   // Trivial equivalence classes (those that have only one symbol member) are
2133   // not stored in the State. Thus, we must skim through the constraints as
2134   // well. And we try to simplify symbols in the constraints.
2135   ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2136   for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
2137     EquivalenceClass Class = ClassConstraint.first;
2138     if (SimplifiedClasses.count(Class)) // Already simplified.
2139       continue;
2140     State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2141     if (!State)
2142       return false;
2143   }
2144 
2145   // We may have trivial equivalence classes in the disequality info as
2146   // well, and we need to simplify them.
2147   DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2148   for (std::pair<EquivalenceClass, ClassSet> DisequalityEntry :
2149        DisequalityInfo) {
2150     EquivalenceClass Class = DisequalityEntry.first;
2151     ClassSet DisequalClasses = DisequalityEntry.second;
2152     State = EquivalenceClass::simplify(Builder, RangeFactory, State, Class);
2153     if (!State)
2154       return false;
2155   }
2156 
2157   return true;
2158 }
2159 
2160 bool ConstraintAssignor::assignSymSymExprToRangeSet(const SymSymExpr *Sym,
2161                                                     RangeSet Constraint) {
2162   if (!handleRemainderOp(Sym, Constraint))
2163     return false;
2164 
2165   Optional<bool> ConstraintAsBool = interpreteAsBool(Constraint);
2166 
2167   if (!ConstraintAsBool)
2168     return true;
2169 
2170   if (Optional<bool> Equality = meansEquality(Sym)) {
2171     // Here we cover two cases:
2172     //   * if Sym is equality and the new constraint is true -> Sym's operands
2173     //     should be marked as equal
2174     //   * if Sym is disequality and the new constraint is false -> Sym's
2175     //     operands should be also marked as equal
2176     if (*Equality == *ConstraintAsBool) {
2177       State = trackEquality(State, Sym->getLHS(), Sym->getRHS());
2178     } else {
2179       // Other combinations leave as with disequal operands.
2180       State = trackDisequality(State, Sym->getLHS(), Sym->getRHS());
2181     }
2182 
2183     if (!State)
2184       return false;
2185   }
2186 
2187   return true;
2188 }
2189 
2190 } // end anonymous namespace
2191 
2192 std::unique_ptr<ConstraintManager>
2193 ento::CreateRangeConstraintManager(ProgramStateManager &StMgr,
2194                                    ExprEngine *Eng) {
2195   return std::make_unique<RangeConstraintManager>(Eng, StMgr.getSValBuilder());
2196 }
2197 
2198 ConstraintMap ento::getConstraintMap(ProgramStateRef State) {
2199   ConstraintMap::Factory &F = State->get_context<ConstraintMap>();
2200   ConstraintMap Result = F.getEmptyMap();
2201 
2202   ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2203   for (std::pair<EquivalenceClass, RangeSet> ClassConstraint : Constraints) {
2204     EquivalenceClass Class = ClassConstraint.first;
2205     SymbolSet ClassMembers = Class.getClassMembers(State);
2206     assert(!ClassMembers.isEmpty() &&
2207            "Class must always have at least one member!");
2208 
2209     SymbolRef Representative = *ClassMembers.begin();
2210     Result = F.add(Result, Representative, ClassConstraint.second);
2211   }
2212 
2213   return Result;
2214 }
2215 
2216 //===----------------------------------------------------------------------===//
2217 //                     EqualityClass implementation details
2218 //===----------------------------------------------------------------------===//
2219 
2220 LLVM_DUMP_METHOD void EquivalenceClass::dumpToStream(ProgramStateRef State,
2221                                                      raw_ostream &os) const {
2222   SymbolSet ClassMembers = getClassMembers(State);
2223   for (const SymbolRef &MemberSym : ClassMembers) {
2224     MemberSym->dump();
2225     os << "\n";
2226   }
2227 }
2228 
2229 inline EquivalenceClass EquivalenceClass::find(ProgramStateRef State,
2230                                                SymbolRef Sym) {
2231   assert(State && "State should not be null");
2232   assert(Sym && "Symbol should not be null");
2233   // We store far from all Symbol -> Class mappings
2234   if (const EquivalenceClass *NontrivialClass = State->get<ClassMap>(Sym))
2235     return *NontrivialClass;
2236 
2237   // This is a trivial class of Sym.
2238   return Sym;
2239 }
2240 
2241 inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
2242                                                ProgramStateRef State,
2243                                                SymbolRef First,
2244                                                SymbolRef Second) {
2245   EquivalenceClass FirstClass = find(State, First);
2246   EquivalenceClass SecondClass = find(State, Second);
2247 
2248   return FirstClass.merge(F, State, SecondClass);
2249 }
2250 
2251 inline ProgramStateRef EquivalenceClass::merge(RangeSet::Factory &F,
2252                                                ProgramStateRef State,
2253                                                EquivalenceClass Other) {
2254   // It is already the same class.
2255   if (*this == Other)
2256     return State;
2257 
2258   // FIXME: As of now, we support only equivalence classes of the same type.
2259   //        This limitation is connected to the lack of explicit casts in
2260   //        our symbolic expression model.
2261   //
2262   //        That means that for `int x` and `char y` we don't distinguish
2263   //        between these two very different cases:
2264   //          * `x == y`
2265   //          * `(char)x == y`
2266   //
2267   //        The moment we introduce symbolic casts, this restriction can be
2268   //        lifted.
2269   if (getType() != Other.getType())
2270     return State;
2271 
2272   SymbolSet Members = getClassMembers(State);
2273   SymbolSet OtherMembers = Other.getClassMembers(State);
2274 
2275   // We estimate the size of the class by the height of tree containing
2276   // its members.  Merging is not a trivial operation, so it's easier to
2277   // merge the smaller class into the bigger one.
2278   if (Members.getHeight() >= OtherMembers.getHeight()) {
2279     return mergeImpl(F, State, Members, Other, OtherMembers);
2280   } else {
2281     return Other.mergeImpl(F, State, OtherMembers, *this, Members);
2282   }
2283 }
2284 
2285 inline ProgramStateRef
2286 EquivalenceClass::mergeImpl(RangeSet::Factory &RangeFactory,
2287                             ProgramStateRef State, SymbolSet MyMembers,
2288                             EquivalenceClass Other, SymbolSet OtherMembers) {
2289   // Essentially what we try to recreate here is some kind of union-find
2290   // data structure.  It does have certain limitations due to persistence
2291   // and the need to remove elements from classes.
2292   //
2293   // In this setting, EquialityClass object is the representative of the class
2294   // or the parent element.  ClassMap is a mapping of class members to their
2295   // parent. Unlike the union-find structure, they all point directly to the
2296   // class representative because we don't have an opportunity to actually do
2297   // path compression when dealing with immutability.  This means that we
2298   // compress paths every time we do merges.  It also means that we lose
2299   // the main amortized complexity benefit from the original data structure.
2300   ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2301   ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
2302 
2303   // 1. If the merged classes have any constraints associated with them, we
2304   //    need to transfer them to the class we have left.
2305   //
2306   // Intersection here makes perfect sense because both of these constraints
2307   // must hold for the whole new class.
2308   if (Optional<RangeSet> NewClassConstraint =
2309           intersect(RangeFactory, getConstraint(State, *this),
2310                     getConstraint(State, Other))) {
2311     // NOTE: Essentially, NewClassConstraint should NEVER be infeasible because
2312     //       range inferrer shouldn't generate ranges incompatible with
2313     //       equivalence classes. However, at the moment, due to imperfections
2314     //       in the solver, it is possible and the merge function can also
2315     //       return infeasible states aka null states.
2316     if (NewClassConstraint->isEmpty())
2317       // Infeasible state
2318       return nullptr;
2319 
2320     // No need in tracking constraints of a now-dissolved class.
2321     Constraints = CRF.remove(Constraints, Other);
2322     // Assign new constraints for this class.
2323     Constraints = CRF.add(Constraints, *this, *NewClassConstraint);
2324 
2325     assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2326                                        "a state with infeasible constraints");
2327 
2328     State = State->set<ConstraintRange>(Constraints);
2329   }
2330 
2331   // 2. Get ALL equivalence-related maps
2332   ClassMapTy Classes = State->get<ClassMap>();
2333   ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
2334 
2335   ClassMembersTy Members = State->get<ClassMembers>();
2336   ClassMembersTy::Factory &MF = State->get_context<ClassMembers>();
2337 
2338   DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2339   DisequalityMapTy::Factory &DF = State->get_context<DisequalityMap>();
2340 
2341   ClassSet::Factory &CF = State->get_context<ClassSet>();
2342   SymbolSet::Factory &F = getMembersFactory(State);
2343 
2344   // 2. Merge members of the Other class into the current class.
2345   SymbolSet NewClassMembers = MyMembers;
2346   for (SymbolRef Sym : OtherMembers) {
2347     NewClassMembers = F.add(NewClassMembers, Sym);
2348     // *this is now the class for all these new symbols.
2349     Classes = CMF.add(Classes, Sym, *this);
2350   }
2351 
2352   // 3. Adjust member mapping.
2353   //
2354   // No need in tracking members of a now-dissolved class.
2355   Members = MF.remove(Members, Other);
2356   // Now only the current class is mapped to all the symbols.
2357   Members = MF.add(Members, *this, NewClassMembers);
2358 
2359   // 4. Update disequality relations
2360   ClassSet DisequalToOther = Other.getDisequalClasses(DisequalityInfo, CF);
2361   // We are about to merge two classes but they are already known to be
2362   // non-equal. This is a contradiction.
2363   if (DisequalToOther.contains(*this))
2364     return nullptr;
2365 
2366   if (!DisequalToOther.isEmpty()) {
2367     ClassSet DisequalToThis = getDisequalClasses(DisequalityInfo, CF);
2368     DisequalityInfo = DF.remove(DisequalityInfo, Other);
2369 
2370     for (EquivalenceClass DisequalClass : DisequalToOther) {
2371       DisequalToThis = CF.add(DisequalToThis, DisequalClass);
2372 
2373       // Disequality is a symmetric relation meaning that if
2374       // DisequalToOther not null then the set for DisequalClass is not
2375       // empty and has at least Other.
2376       ClassSet OriginalSetLinkedToOther =
2377           *DisequalityInfo.lookup(DisequalClass);
2378 
2379       // Other will be eliminated and we should replace it with the bigger
2380       // united class.
2381       ClassSet NewSet = CF.remove(OriginalSetLinkedToOther, Other);
2382       NewSet = CF.add(NewSet, *this);
2383 
2384       DisequalityInfo = DF.add(DisequalityInfo, DisequalClass, NewSet);
2385     }
2386 
2387     DisequalityInfo = DF.add(DisequalityInfo, *this, DisequalToThis);
2388     State = State->set<DisequalityMap>(DisequalityInfo);
2389   }
2390 
2391   // 5. Update the state
2392   State = State->set<ClassMap>(Classes);
2393   State = State->set<ClassMembers>(Members);
2394 
2395   return State;
2396 }
2397 
2398 inline SymbolSet::Factory &
2399 EquivalenceClass::getMembersFactory(ProgramStateRef State) {
2400   return State->get_context<SymbolSet>();
2401 }
2402 
2403 SymbolSet EquivalenceClass::getClassMembers(ProgramStateRef State) const {
2404   if (const SymbolSet *Members = State->get<ClassMembers>(*this))
2405     return *Members;
2406 
2407   // This class is trivial, so we need to construct a set
2408   // with just that one symbol from the class.
2409   SymbolSet::Factory &F = getMembersFactory(State);
2410   return F.add(F.getEmptySet(), getRepresentativeSymbol());
2411 }
2412 
2413 bool EquivalenceClass::isTrivial(ProgramStateRef State) const {
2414   return State->get<ClassMembers>(*this) == nullptr;
2415 }
2416 
2417 bool EquivalenceClass::isTriviallyDead(ProgramStateRef State,
2418                                        SymbolReaper &Reaper) const {
2419   return isTrivial(State) && Reaper.isDead(getRepresentativeSymbol());
2420 }
2421 
2422 inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
2423                                                       ProgramStateRef State,
2424                                                       SymbolRef First,
2425                                                       SymbolRef Second) {
2426   return markDisequal(RF, State, find(State, First), find(State, Second));
2427 }
2428 
2429 inline ProgramStateRef EquivalenceClass::markDisequal(RangeSet::Factory &RF,
2430                                                       ProgramStateRef State,
2431                                                       EquivalenceClass First,
2432                                                       EquivalenceClass Second) {
2433   return First.markDisequal(RF, State, Second);
2434 }
2435 
2436 inline ProgramStateRef
2437 EquivalenceClass::markDisequal(RangeSet::Factory &RF, ProgramStateRef State,
2438                                EquivalenceClass Other) const {
2439   // If we know that two classes are equal, we can only produce an infeasible
2440   // state.
2441   if (*this == Other) {
2442     return nullptr;
2443   }
2444 
2445   DisequalityMapTy DisequalityInfo = State->get<DisequalityMap>();
2446   ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2447 
2448   // Disequality is a symmetric relation, so if we mark A as disequal to B,
2449   // we should also mark B as disequalt to A.
2450   if (!addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, *this,
2451                             Other) ||
2452       !addToDisequalityInfo(DisequalityInfo, Constraints, RF, State, Other,
2453                             *this))
2454     return nullptr;
2455 
2456   assert(areFeasible(Constraints) && "Constraint manager shouldn't produce "
2457                                      "a state with infeasible constraints");
2458 
2459   State = State->set<DisequalityMap>(DisequalityInfo);
2460   State = State->set<ConstraintRange>(Constraints);
2461 
2462   return State;
2463 }
2464 
2465 inline bool EquivalenceClass::addToDisequalityInfo(
2466     DisequalityMapTy &Info, ConstraintRangeTy &Constraints,
2467     RangeSet::Factory &RF, ProgramStateRef State, EquivalenceClass First,
2468     EquivalenceClass Second) {
2469 
2470   // 1. Get all of the required factories.
2471   DisequalityMapTy::Factory &F = State->get_context<DisequalityMap>();
2472   ClassSet::Factory &CF = State->get_context<ClassSet>();
2473   ConstraintRangeTy::Factory &CRF = State->get_context<ConstraintRange>();
2474 
2475   // 2. Add Second to the set of classes disequal to First.
2476   const ClassSet *CurrentSet = Info.lookup(First);
2477   ClassSet NewSet = CurrentSet ? *CurrentSet : CF.getEmptySet();
2478   NewSet = CF.add(NewSet, Second);
2479 
2480   Info = F.add(Info, First, NewSet);
2481 
2482   // 3. If Second is known to be a constant, we can delete this point
2483   //    from the constraint asociated with First.
2484   //
2485   //    So, if Second == 10, it means that First != 10.
2486   //    At the same time, the same logic does not apply to ranges.
2487   if (const RangeSet *SecondConstraint = Constraints.lookup(Second))
2488     if (const llvm::APSInt *Point = SecondConstraint->getConcreteValue()) {
2489 
2490       RangeSet FirstConstraint = SymbolicRangeInferrer::inferRange(
2491           RF, State, First.getRepresentativeSymbol());
2492 
2493       FirstConstraint = RF.deletePoint(FirstConstraint, *Point);
2494 
2495       // If the First class is about to be constrained with an empty
2496       // range-set, the state is infeasible.
2497       if (FirstConstraint.isEmpty())
2498         return false;
2499 
2500       Constraints = CRF.add(Constraints, First, FirstConstraint);
2501     }
2502 
2503   return true;
2504 }
2505 
2506 inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
2507                                                  SymbolRef FirstSym,
2508                                                  SymbolRef SecondSym) {
2509   return EquivalenceClass::areEqual(State, find(State, FirstSym),
2510                                     find(State, SecondSym));
2511 }
2512 
2513 inline Optional<bool> EquivalenceClass::areEqual(ProgramStateRef State,
2514                                                  EquivalenceClass First,
2515                                                  EquivalenceClass Second) {
2516   // The same equivalence class => symbols are equal.
2517   if (First == Second)
2518     return true;
2519 
2520   // Let's check if we know anything about these two classes being not equal to
2521   // each other.
2522   ClassSet DisequalToFirst = First.getDisequalClasses(State);
2523   if (DisequalToFirst.contains(Second))
2524     return false;
2525 
2526   // It is not clear.
2527   return llvm::None;
2528 }
2529 
2530 LLVM_NODISCARD ProgramStateRef
2531 EquivalenceClass::removeMember(ProgramStateRef State, const SymbolRef Old) {
2532 
2533   SymbolSet ClsMembers = getClassMembers(State);
2534   assert(ClsMembers.contains(Old));
2535 
2536   // Remove `Old`'s Class->Sym relation.
2537   SymbolSet::Factory &F = getMembersFactory(State);
2538   ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
2539   ClsMembers = F.remove(ClsMembers, Old);
2540   // Ensure another precondition of the removeMember function (we can check
2541   // this only with isEmpty, thus we have to do the remove first).
2542   assert(!ClsMembers.isEmpty() &&
2543          "Class should have had at least two members before member removal");
2544   // Overwrite the existing members assigned to this class.
2545   ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
2546   ClassMembersMap = EMFactory.add(ClassMembersMap, *this, ClsMembers);
2547   State = State->set<ClassMembers>(ClassMembersMap);
2548 
2549   // Remove `Old`'s Sym->Class relation.
2550   ClassMapTy Classes = State->get<ClassMap>();
2551   ClassMapTy::Factory &CMF = State->get_context<ClassMap>();
2552   Classes = CMF.remove(Classes, Old);
2553   State = State->set<ClassMap>(Classes);
2554 
2555   return State;
2556 }
2557 
2558 // Re-evaluate an SVal with top-level `State->assume` logic.
2559 LLVM_NODISCARD ProgramStateRef reAssume(ProgramStateRef State,
2560                                         const RangeSet *Constraint,
2561                                         SVal TheValue) {
2562   if (!Constraint)
2563     return State;
2564 
2565   const auto DefinedVal = TheValue.castAs<DefinedSVal>();
2566 
2567   // If the SVal is 0, we can simply interpret that as `false`.
2568   if (Constraint->encodesFalseRange())
2569     return State->assume(DefinedVal, false);
2570 
2571   // If the constraint does not encode 0 then we can interpret that as `true`
2572   // AND as a Range(Set).
2573   if (Constraint->encodesTrueRange()) {
2574     State = State->assume(DefinedVal, true);
2575     if (!State)
2576       return nullptr;
2577     // Fall through, re-assume based on the range values as well.
2578   }
2579   // Overestimate the individual Ranges with the RangeSet' lowest and
2580   // highest values.
2581   return State->assumeInclusiveRange(DefinedVal, Constraint->getMinValue(),
2582                                      Constraint->getMaxValue(), true);
2583 }
2584 
2585 // Iterate over all symbols and try to simplify them. Once a symbol is
2586 // simplified then we check if we can merge the simplified symbol's equivalence
2587 // class to this class. This way, we simplify not just the symbols but the
2588 // classes as well: we strive to keep the number of the classes to be the
2589 // absolute minimum.
2590 LLVM_NODISCARD ProgramStateRef
2591 EquivalenceClass::simplify(SValBuilder &SVB, RangeSet::Factory &F,
2592                            ProgramStateRef State, EquivalenceClass Class) {
2593   SymbolSet ClassMembers = Class.getClassMembers(State);
2594   for (const SymbolRef &MemberSym : ClassMembers) {
2595 
2596     const SVal SimplifiedMemberVal = simplifyToSVal(State, MemberSym);
2597     const SymbolRef SimplifiedMemberSym = SimplifiedMemberVal.getAsSymbol();
2598 
2599     // The symbol is collapsed to a constant, check if the current State is
2600     // still feasible.
2601     if (const auto CI = SimplifiedMemberVal.getAs<nonloc::ConcreteInt>()) {
2602       const llvm::APSInt &SV = CI->getValue();
2603       const RangeSet *ClassConstraint = getConstraint(State, Class);
2604       // We have found a contradiction.
2605       if (ClassConstraint && !ClassConstraint->contains(SV))
2606         return nullptr;
2607     }
2608 
2609     if (SimplifiedMemberSym && MemberSym != SimplifiedMemberSym) {
2610       // The simplified symbol should be the member of the original Class,
2611       // however, it might be in another existing class at the moment. We
2612       // have to merge these classes.
2613       ProgramStateRef OldState = State;
2614       State = merge(F, State, MemberSym, SimplifiedMemberSym);
2615       if (!State)
2616         return nullptr;
2617       // No state change, no merge happened actually.
2618       if (OldState == State)
2619         continue;
2620 
2621       assert(find(State, MemberSym) == find(State, SimplifiedMemberSym));
2622       // Remove the old and more complex symbol.
2623       State = find(State, MemberSym).removeMember(State, MemberSym);
2624 
2625       // Query the class constraint again b/c that may have changed during the
2626       // merge above.
2627       const RangeSet *ClassConstraint = getConstraint(State, Class);
2628 
2629       // Re-evaluate an SVal with top-level `State->assume`, this ignites
2630       // a RECURSIVE algorithm that will reach a FIXPOINT.
2631       //
2632       // About performance and complexity: Let us assume that in a State we
2633       // have N non-trivial equivalence classes and that all constraints and
2634       // disequality info is related to non-trivial classes. In the worst case,
2635       // we can simplify only one symbol of one class in each iteration. The
2636       // number of symbols in one class cannot grow b/c we replace the old
2637       // symbol with the simplified one. Also, the number of the equivalence
2638       // classes can decrease only, b/c the algorithm does a merge operation
2639       // optionally. We need N iterations in this case to reach the fixpoint.
2640       // Thus, the steps needed to be done in the worst case is proportional to
2641       // N*N.
2642       //
2643       // This worst case scenario can be extended to that case when we have
2644       // trivial classes in the constraints and in the disequality map. This
2645       // case can be reduced to the case with a State where there are only
2646       // non-trivial classes. This is because a merge operation on two trivial
2647       // classes results in one non-trivial class.
2648       State = reAssume(State, ClassConstraint, SimplifiedMemberVal);
2649       if (!State)
2650         return nullptr;
2651     }
2652   }
2653   return State;
2654 }
2655 
2656 inline ClassSet EquivalenceClass::getDisequalClasses(ProgramStateRef State,
2657                                                      SymbolRef Sym) {
2658   return find(State, Sym).getDisequalClasses(State);
2659 }
2660 
2661 inline ClassSet
2662 EquivalenceClass::getDisequalClasses(ProgramStateRef State) const {
2663   return getDisequalClasses(State->get<DisequalityMap>(),
2664                             State->get_context<ClassSet>());
2665 }
2666 
2667 inline ClassSet
2668 EquivalenceClass::getDisequalClasses(DisequalityMapTy Map,
2669                                      ClassSet::Factory &Factory) const {
2670   if (const ClassSet *DisequalClasses = Map.lookup(*this))
2671     return *DisequalClasses;
2672 
2673   return Factory.getEmptySet();
2674 }
2675 
2676 bool EquivalenceClass::isClassDataConsistent(ProgramStateRef State) {
2677   ClassMembersTy Members = State->get<ClassMembers>();
2678 
2679   for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair : Members) {
2680     for (SymbolRef Member : ClassMembersPair.second) {
2681       // Every member of the class should have a mapping back to the class.
2682       if (find(State, Member) == ClassMembersPair.first) {
2683         continue;
2684       }
2685 
2686       return false;
2687     }
2688   }
2689 
2690   DisequalityMapTy Disequalities = State->get<DisequalityMap>();
2691   for (std::pair<EquivalenceClass, ClassSet> DisequalityInfo : Disequalities) {
2692     EquivalenceClass Class = DisequalityInfo.first;
2693     ClassSet DisequalClasses = DisequalityInfo.second;
2694 
2695     // There is no use in keeping empty sets in the map.
2696     if (DisequalClasses.isEmpty())
2697       return false;
2698 
2699     // Disequality is symmetrical, i.e. for every Class A and B that A != B,
2700     // B != A should also be true.
2701     for (EquivalenceClass DisequalClass : DisequalClasses) {
2702       const ClassSet *DisequalToDisequalClasses =
2703           Disequalities.lookup(DisequalClass);
2704 
2705       // It should be a set of at least one element: Class
2706       if (!DisequalToDisequalClasses ||
2707           !DisequalToDisequalClasses->contains(Class))
2708         return false;
2709     }
2710   }
2711 
2712   return true;
2713 }
2714 
2715 //===----------------------------------------------------------------------===//
2716 //                    RangeConstraintManager implementation
2717 //===----------------------------------------------------------------------===//
2718 
2719 bool RangeConstraintManager::canReasonAbout(SVal X) const {
2720   Optional<nonloc::SymbolVal> SymVal = X.getAs<nonloc::SymbolVal>();
2721   if (SymVal && SymVal->isExpression()) {
2722     const SymExpr *SE = SymVal->getSymbol();
2723 
2724     if (const SymIntExpr *SIE = dyn_cast<SymIntExpr>(SE)) {
2725       switch (SIE->getOpcode()) {
2726       // We don't reason yet about bitwise-constraints on symbolic values.
2727       case BO_And:
2728       case BO_Or:
2729       case BO_Xor:
2730         return false;
2731       // We don't reason yet about these arithmetic constraints on
2732       // symbolic values.
2733       case BO_Mul:
2734       case BO_Div:
2735       case BO_Rem:
2736       case BO_Shl:
2737       case BO_Shr:
2738         return false;
2739       // All other cases.
2740       default:
2741         return true;
2742       }
2743     }
2744 
2745     if (const SymSymExpr *SSE = dyn_cast<SymSymExpr>(SE)) {
2746       // FIXME: Handle <=> here.
2747       if (BinaryOperator::isEqualityOp(SSE->getOpcode()) ||
2748           BinaryOperator::isRelationalOp(SSE->getOpcode())) {
2749         // We handle Loc <> Loc comparisons, but not (yet) NonLoc <> NonLoc.
2750         // We've recently started producing Loc <> NonLoc comparisons (that
2751         // result from casts of one of the operands between eg. intptr_t and
2752         // void *), but we can't reason about them yet.
2753         if (Loc::isLocType(SSE->getLHS()->getType())) {
2754           return Loc::isLocType(SSE->getRHS()->getType());
2755         }
2756       }
2757     }
2758 
2759     return false;
2760   }
2761 
2762   return true;
2763 }
2764 
2765 ConditionTruthVal RangeConstraintManager::checkNull(ProgramStateRef State,
2766                                                     SymbolRef Sym) {
2767   const RangeSet *Ranges = getConstraint(State, Sym);
2768 
2769   // If we don't have any information about this symbol, it's underconstrained.
2770   if (!Ranges)
2771     return ConditionTruthVal();
2772 
2773   // If we have a concrete value, see if it's zero.
2774   if (const llvm::APSInt *Value = Ranges->getConcreteValue())
2775     return *Value == 0;
2776 
2777   BasicValueFactory &BV = getBasicVals();
2778   APSIntType IntType = BV.getAPSIntType(Sym->getType());
2779   llvm::APSInt Zero = IntType.getZeroValue();
2780 
2781   // Check if zero is in the set of possible values.
2782   if (!Ranges->contains(Zero))
2783     return false;
2784 
2785   // Zero is a possible value, but it is not the /only/ possible value.
2786   return ConditionTruthVal();
2787 }
2788 
2789 const llvm::APSInt *RangeConstraintManager::getSymVal(ProgramStateRef St,
2790                                                       SymbolRef Sym) const {
2791   const RangeSet *T = getConstraint(St, Sym);
2792   return T ? T->getConcreteValue() : nullptr;
2793 }
2794 
2795 //===----------------------------------------------------------------------===//
2796 //                Remove dead symbols from existing constraints
2797 //===----------------------------------------------------------------------===//
2798 
2799 /// Scan all symbols referenced by the constraints. If the symbol is not alive
2800 /// as marked in LSymbols, mark it as dead in DSymbols.
2801 ProgramStateRef
2802 RangeConstraintManager::removeDeadBindings(ProgramStateRef State,
2803                                            SymbolReaper &SymReaper) {
2804   ClassMembersTy ClassMembersMap = State->get<ClassMembers>();
2805   ClassMembersTy NewClassMembersMap = ClassMembersMap;
2806   ClassMembersTy::Factory &EMFactory = State->get_context<ClassMembers>();
2807   SymbolSet::Factory &SetFactory = State->get_context<SymbolSet>();
2808 
2809   ConstraintRangeTy Constraints = State->get<ConstraintRange>();
2810   ConstraintRangeTy NewConstraints = Constraints;
2811   ConstraintRangeTy::Factory &ConstraintFactory =
2812       State->get_context<ConstraintRange>();
2813 
2814   ClassMapTy Map = State->get<ClassMap>();
2815   ClassMapTy NewMap = Map;
2816   ClassMapTy::Factory &ClassFactory = State->get_context<ClassMap>();
2817 
2818   DisequalityMapTy Disequalities = State->get<DisequalityMap>();
2819   DisequalityMapTy::Factory &DisequalityFactory =
2820       State->get_context<DisequalityMap>();
2821   ClassSet::Factory &ClassSetFactory = State->get_context<ClassSet>();
2822 
2823   bool ClassMapChanged = false;
2824   bool MembersMapChanged = false;
2825   bool ConstraintMapChanged = false;
2826   bool DisequalitiesChanged = false;
2827 
2828   auto removeDeadClass = [&](EquivalenceClass Class) {
2829     // Remove associated constraint ranges.
2830     Constraints = ConstraintFactory.remove(Constraints, Class);
2831     ConstraintMapChanged = true;
2832 
2833     // Update disequality information to not hold any information on the
2834     // removed class.
2835     ClassSet DisequalClasses =
2836         Class.getDisequalClasses(Disequalities, ClassSetFactory);
2837     if (!DisequalClasses.isEmpty()) {
2838       for (EquivalenceClass DisequalClass : DisequalClasses) {
2839         ClassSet DisequalToDisequalSet =
2840             DisequalClass.getDisequalClasses(Disequalities, ClassSetFactory);
2841         // DisequalToDisequalSet is guaranteed to be non-empty for consistent
2842         // disequality info.
2843         assert(!DisequalToDisequalSet.isEmpty());
2844         ClassSet NewSet = ClassSetFactory.remove(DisequalToDisequalSet, Class);
2845 
2846         // No need in keeping an empty set.
2847         if (NewSet.isEmpty()) {
2848           Disequalities =
2849               DisequalityFactory.remove(Disequalities, DisequalClass);
2850         } else {
2851           Disequalities =
2852               DisequalityFactory.add(Disequalities, DisequalClass, NewSet);
2853         }
2854       }
2855       // Remove the data for the class
2856       Disequalities = DisequalityFactory.remove(Disequalities, Class);
2857       DisequalitiesChanged = true;
2858     }
2859   };
2860 
2861   // 1. Let's see if dead symbols are trivial and have associated constraints.
2862   for (std::pair<EquivalenceClass, RangeSet> ClassConstraintPair :
2863        Constraints) {
2864     EquivalenceClass Class = ClassConstraintPair.first;
2865     if (Class.isTriviallyDead(State, SymReaper)) {
2866       // If this class is trivial, we can remove its constraints right away.
2867       removeDeadClass(Class);
2868     }
2869   }
2870 
2871   // 2. We don't need to track classes for dead symbols.
2872   for (std::pair<SymbolRef, EquivalenceClass> SymbolClassPair : Map) {
2873     SymbolRef Sym = SymbolClassPair.first;
2874 
2875     if (SymReaper.isDead(Sym)) {
2876       ClassMapChanged = true;
2877       NewMap = ClassFactory.remove(NewMap, Sym);
2878     }
2879   }
2880 
2881   // 3. Remove dead members from classes and remove dead non-trivial classes
2882   //    and their constraints.
2883   for (std::pair<EquivalenceClass, SymbolSet> ClassMembersPair :
2884        ClassMembersMap) {
2885     EquivalenceClass Class = ClassMembersPair.first;
2886     SymbolSet LiveMembers = ClassMembersPair.second;
2887     bool MembersChanged = false;
2888 
2889     for (SymbolRef Member : ClassMembersPair.second) {
2890       if (SymReaper.isDead(Member)) {
2891         MembersChanged = true;
2892         LiveMembers = SetFactory.remove(LiveMembers, Member);
2893       }
2894     }
2895 
2896     // Check if the class changed.
2897     if (!MembersChanged)
2898       continue;
2899 
2900     MembersMapChanged = true;
2901 
2902     if (LiveMembers.isEmpty()) {
2903       // The class is dead now, we need to wipe it out of the members map...
2904       NewClassMembersMap = EMFactory.remove(NewClassMembersMap, Class);
2905 
2906       // ...and remove all of its constraints.
2907       removeDeadClass(Class);
2908     } else {
2909       // We need to change the members associated with the class.
2910       NewClassMembersMap =
2911           EMFactory.add(NewClassMembersMap, Class, LiveMembers);
2912     }
2913   }
2914 
2915   // 4. Update the state with new maps.
2916   //
2917   // Here we try to be humble and update a map only if it really changed.
2918   if (ClassMapChanged)
2919     State = State->set<ClassMap>(NewMap);
2920 
2921   if (MembersMapChanged)
2922     State = State->set<ClassMembers>(NewClassMembersMap);
2923 
2924   if (ConstraintMapChanged)
2925     State = State->set<ConstraintRange>(Constraints);
2926 
2927   if (DisequalitiesChanged)
2928     State = State->set<DisequalityMap>(Disequalities);
2929 
2930   assert(EquivalenceClass::isClassDataConsistent(State));
2931 
2932   return State;
2933 }
2934 
2935 RangeSet RangeConstraintManager::getRange(ProgramStateRef State,
2936                                           SymbolRef Sym) {
2937   return SymbolicRangeInferrer::inferRange(F, State, Sym);
2938 }
2939 
2940 ProgramStateRef RangeConstraintManager::setRange(ProgramStateRef State,
2941                                                  SymbolRef Sym,
2942                                                  RangeSet Range) {
2943   return ConstraintAssignor::assign(State, getSValBuilder(), F, Sym, Range);
2944 }
2945 
2946 //===------------------------------------------------------------------------===
2947 // assumeSymX methods: protected interface for RangeConstraintManager.
2948 //===------------------------------------------------------------------------===/
2949 
2950 // The syntax for ranges below is mathematical, using [x, y] for closed ranges
2951 // and (x, y) for open ranges. These ranges are modular, corresponding with
2952 // a common treatment of C integer overflow. This means that these methods
2953 // do not have to worry about overflow; RangeSet::Intersect can handle such a
2954 // "wraparound" range.
2955 // As an example, the range [UINT_MAX-1, 3) contains five values: UINT_MAX-1,
2956 // UINT_MAX, 0, 1, and 2.
2957 
2958 ProgramStateRef
2959 RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym,
2960                                     const llvm::APSInt &Int,
2961                                     const llvm::APSInt &Adjustment) {
2962   // Before we do any real work, see if the value can even show up.
2963   APSIntType AdjustmentType(Adjustment);
2964   if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
2965     return St;
2966 
2967   llvm::APSInt Point = AdjustmentType.convert(Int) - Adjustment;
2968   RangeSet New = getRange(St, Sym);
2969   New = F.deletePoint(New, Point);
2970 
2971   return setRange(St, Sym, New);
2972 }
2973 
2974 ProgramStateRef
2975 RangeConstraintManager::assumeSymEQ(ProgramStateRef St, SymbolRef Sym,
2976                                     const llvm::APSInt &Int,
2977                                     const llvm::APSInt &Adjustment) {
2978   // Before we do any real work, see if the value can even show up.
2979   APSIntType AdjustmentType(Adjustment);
2980   if (AdjustmentType.testInRange(Int, true) != APSIntType::RTR_Within)
2981     return nullptr;
2982 
2983   // [Int-Adjustment, Int-Adjustment]
2984   llvm::APSInt AdjInt = AdjustmentType.convert(Int) - Adjustment;
2985   RangeSet New = getRange(St, Sym);
2986   New = F.intersect(New, AdjInt);
2987 
2988   return setRange(St, Sym, New);
2989 }
2990 
2991 RangeSet RangeConstraintManager::getSymLTRange(ProgramStateRef St,
2992                                                SymbolRef Sym,
2993                                                const llvm::APSInt &Int,
2994                                                const llvm::APSInt &Adjustment) {
2995   // Before we do any real work, see if the value can even show up.
2996   APSIntType AdjustmentType(Adjustment);
2997   switch (AdjustmentType.testInRange(Int, true)) {
2998   case APSIntType::RTR_Below:
2999     return F.getEmptySet();
3000   case APSIntType::RTR_Within:
3001     break;
3002   case APSIntType::RTR_Above:
3003     return getRange(St, Sym);
3004   }
3005 
3006   // Special case for Int == Min. This is always false.
3007   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3008   llvm::APSInt Min = AdjustmentType.getMinValue();
3009   if (ComparisonVal == Min)
3010     return F.getEmptySet();
3011 
3012   llvm::APSInt Lower = Min - Adjustment;
3013   llvm::APSInt Upper = ComparisonVal - Adjustment;
3014   --Upper;
3015 
3016   RangeSet Result = getRange(St, Sym);
3017   return F.intersect(Result, Lower, Upper);
3018 }
3019 
3020 ProgramStateRef
3021 RangeConstraintManager::assumeSymLT(ProgramStateRef St, SymbolRef Sym,
3022                                     const llvm::APSInt &Int,
3023                                     const llvm::APSInt &Adjustment) {
3024   RangeSet New = getSymLTRange(St, Sym, Int, Adjustment);
3025   return setRange(St, Sym, New);
3026 }
3027 
3028 RangeSet RangeConstraintManager::getSymGTRange(ProgramStateRef St,
3029                                                SymbolRef Sym,
3030                                                const llvm::APSInt &Int,
3031                                                const llvm::APSInt &Adjustment) {
3032   // Before we do any real work, see if the value can even show up.
3033   APSIntType AdjustmentType(Adjustment);
3034   switch (AdjustmentType.testInRange(Int, true)) {
3035   case APSIntType::RTR_Below:
3036     return getRange(St, Sym);
3037   case APSIntType::RTR_Within:
3038     break;
3039   case APSIntType::RTR_Above:
3040     return F.getEmptySet();
3041   }
3042 
3043   // Special case for Int == Max. This is always false.
3044   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3045   llvm::APSInt Max = AdjustmentType.getMaxValue();
3046   if (ComparisonVal == Max)
3047     return F.getEmptySet();
3048 
3049   llvm::APSInt Lower = ComparisonVal - Adjustment;
3050   llvm::APSInt Upper = Max - Adjustment;
3051   ++Lower;
3052 
3053   RangeSet SymRange = getRange(St, Sym);
3054   return F.intersect(SymRange, Lower, Upper);
3055 }
3056 
3057 ProgramStateRef
3058 RangeConstraintManager::assumeSymGT(ProgramStateRef St, SymbolRef Sym,
3059                                     const llvm::APSInt &Int,
3060                                     const llvm::APSInt &Adjustment) {
3061   RangeSet New = getSymGTRange(St, Sym, Int, Adjustment);
3062   return setRange(St, Sym, New);
3063 }
3064 
3065 RangeSet RangeConstraintManager::getSymGERange(ProgramStateRef St,
3066                                                SymbolRef Sym,
3067                                                const llvm::APSInt &Int,
3068                                                const llvm::APSInt &Adjustment) {
3069   // Before we do any real work, see if the value can even show up.
3070   APSIntType AdjustmentType(Adjustment);
3071   switch (AdjustmentType.testInRange(Int, true)) {
3072   case APSIntType::RTR_Below:
3073     return getRange(St, Sym);
3074   case APSIntType::RTR_Within:
3075     break;
3076   case APSIntType::RTR_Above:
3077     return F.getEmptySet();
3078   }
3079 
3080   // Special case for Int == Min. This is always feasible.
3081   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3082   llvm::APSInt Min = AdjustmentType.getMinValue();
3083   if (ComparisonVal == Min)
3084     return getRange(St, Sym);
3085 
3086   llvm::APSInt Max = AdjustmentType.getMaxValue();
3087   llvm::APSInt Lower = ComparisonVal - Adjustment;
3088   llvm::APSInt Upper = Max - Adjustment;
3089 
3090   RangeSet SymRange = getRange(St, Sym);
3091   return F.intersect(SymRange, Lower, Upper);
3092 }
3093 
3094 ProgramStateRef
3095 RangeConstraintManager::assumeSymGE(ProgramStateRef St, SymbolRef Sym,
3096                                     const llvm::APSInt &Int,
3097                                     const llvm::APSInt &Adjustment) {
3098   RangeSet New = getSymGERange(St, Sym, Int, Adjustment);
3099   return setRange(St, Sym, New);
3100 }
3101 
3102 RangeSet
3103 RangeConstraintManager::getSymLERange(llvm::function_ref<RangeSet()> RS,
3104                                       const llvm::APSInt &Int,
3105                                       const llvm::APSInt &Adjustment) {
3106   // Before we do any real work, see if the value can even show up.
3107   APSIntType AdjustmentType(Adjustment);
3108   switch (AdjustmentType.testInRange(Int, true)) {
3109   case APSIntType::RTR_Below:
3110     return F.getEmptySet();
3111   case APSIntType::RTR_Within:
3112     break;
3113   case APSIntType::RTR_Above:
3114     return RS();
3115   }
3116 
3117   // Special case for Int == Max. This is always feasible.
3118   llvm::APSInt ComparisonVal = AdjustmentType.convert(Int);
3119   llvm::APSInt Max = AdjustmentType.getMaxValue();
3120   if (ComparisonVal == Max)
3121     return RS();
3122 
3123   llvm::APSInt Min = AdjustmentType.getMinValue();
3124   llvm::APSInt Lower = Min - Adjustment;
3125   llvm::APSInt Upper = ComparisonVal - Adjustment;
3126 
3127   RangeSet Default = RS();
3128   return F.intersect(Default, Lower, Upper);
3129 }
3130 
3131 RangeSet RangeConstraintManager::getSymLERange(ProgramStateRef St,
3132                                                SymbolRef Sym,
3133                                                const llvm::APSInt &Int,
3134                                                const llvm::APSInt &Adjustment) {
3135   return getSymLERange([&] { return getRange(St, Sym); }, Int, Adjustment);
3136 }
3137 
3138 ProgramStateRef
3139 RangeConstraintManager::assumeSymLE(ProgramStateRef St, SymbolRef Sym,
3140                                     const llvm::APSInt &Int,
3141                                     const llvm::APSInt &Adjustment) {
3142   RangeSet New = getSymLERange(St, Sym, Int, Adjustment);
3143   return setRange(St, Sym, New);
3144 }
3145 
3146 ProgramStateRef RangeConstraintManager::assumeSymWithinInclusiveRange(
3147     ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
3148     const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
3149   RangeSet New = getSymGERange(State, Sym, From, Adjustment);
3150   if (New.isEmpty())
3151     return nullptr;
3152   RangeSet Out = getSymLERange([&] { return New; }, To, Adjustment);
3153   return setRange(State, Sym, Out);
3154 }
3155 
3156 ProgramStateRef RangeConstraintManager::assumeSymOutsideInclusiveRange(
3157     ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From,
3158     const llvm::APSInt &To, const llvm::APSInt &Adjustment) {
3159   RangeSet RangeLT = getSymLTRange(State, Sym, From, Adjustment);
3160   RangeSet RangeGT = getSymGTRange(State, Sym, To, Adjustment);
3161   RangeSet New(F.add(RangeLT, RangeGT));
3162   return setRange(State, Sym, New);
3163 }
3164 
3165 //===----------------------------------------------------------------------===//
3166 // Pretty-printing.
3167 //===----------------------------------------------------------------------===//
3168 
3169 void RangeConstraintManager::printJson(raw_ostream &Out, ProgramStateRef State,
3170                                        const char *NL, unsigned int Space,
3171                                        bool IsDot) const {
3172   printConstraints(Out, State, NL, Space, IsDot);
3173   printEquivalenceClasses(Out, State, NL, Space, IsDot);
3174   printDisequalities(Out, State, NL, Space, IsDot);
3175 }
3176 
3177 void RangeConstraintManager::printValue(raw_ostream &Out, ProgramStateRef State,
3178                                         SymbolRef Sym) {
3179   const RangeSet RS = getRange(State, Sym);
3180   Out << RS.getBitWidth() << (RS.isUnsigned() ? "u:" : "s:");
3181   RS.dump(Out);
3182 }
3183 
3184 static std::string toString(const SymbolRef &Sym) {
3185   std::string S;
3186   llvm::raw_string_ostream O(S);
3187   Sym->dumpToStream(O);
3188   return O.str();
3189 }
3190 
3191 void RangeConstraintManager::printConstraints(raw_ostream &Out,
3192                                               ProgramStateRef State,
3193                                               const char *NL,
3194                                               unsigned int Space,
3195                                               bool IsDot) const {
3196   ConstraintRangeTy Constraints = State->get<ConstraintRange>();
3197 
3198   Indent(Out, Space, IsDot) << "\"constraints\": ";
3199   if (Constraints.isEmpty()) {
3200     Out << "null," << NL;
3201     return;
3202   }
3203 
3204   std::map<std::string, RangeSet> OrderedConstraints;
3205   for (std::pair<EquivalenceClass, RangeSet> P : Constraints) {
3206     SymbolSet ClassMembers = P.first.getClassMembers(State);
3207     for (const SymbolRef &ClassMember : ClassMembers) {
3208       bool insertion_took_place;
3209       std::tie(std::ignore, insertion_took_place) =
3210           OrderedConstraints.insert({toString(ClassMember), P.second});
3211       assert(insertion_took_place &&
3212              "two symbols should not have the same dump");
3213     }
3214   }
3215 
3216   ++Space;
3217   Out << '[' << NL;
3218   bool First = true;
3219   for (std::pair<std::string, RangeSet> P : OrderedConstraints) {
3220     if (First) {
3221       First = false;
3222     } else {
3223       Out << ',';
3224       Out << NL;
3225     }
3226     Indent(Out, Space, IsDot)
3227         << "{ \"symbol\": \"" << P.first << "\", \"range\": \"";
3228     P.second.dump(Out);
3229     Out << "\" }";
3230   }
3231   Out << NL;
3232 
3233   --Space;
3234   Indent(Out, Space, IsDot) << "]," << NL;
3235 }
3236 
3237 static std::string toString(ProgramStateRef State, EquivalenceClass Class) {
3238   SymbolSet ClassMembers = Class.getClassMembers(State);
3239   llvm::SmallVector<SymbolRef, 8> ClassMembersSorted(ClassMembers.begin(),
3240                                                      ClassMembers.end());
3241   llvm::sort(ClassMembersSorted,
3242              [](const SymbolRef &LHS, const SymbolRef &RHS) {
3243                return toString(LHS) < toString(RHS);
3244              });
3245 
3246   bool FirstMember = true;
3247 
3248   std::string Str;
3249   llvm::raw_string_ostream Out(Str);
3250   Out << "[ ";
3251   for (SymbolRef ClassMember : ClassMembersSorted) {
3252     if (FirstMember)
3253       FirstMember = false;
3254     else
3255       Out << ", ";
3256     Out << "\"" << ClassMember << "\"";
3257   }
3258   Out << " ]";
3259   return Out.str();
3260 }
3261 
3262 void RangeConstraintManager::printEquivalenceClasses(raw_ostream &Out,
3263                                                      ProgramStateRef State,
3264                                                      const char *NL,
3265                                                      unsigned int Space,
3266                                                      bool IsDot) const {
3267   ClassMembersTy Members = State->get<ClassMembers>();
3268 
3269   Indent(Out, Space, IsDot) << "\"equivalence_classes\": ";
3270   if (Members.isEmpty()) {
3271     Out << "null," << NL;
3272     return;
3273   }
3274 
3275   std::set<std::string> MembersStr;
3276   for (std::pair<EquivalenceClass, SymbolSet> ClassToSymbolSet : Members)
3277     MembersStr.insert(toString(State, ClassToSymbolSet.first));
3278 
3279   ++Space;
3280   Out << '[' << NL;
3281   bool FirstClass = true;
3282   for (const std::string &Str : MembersStr) {
3283     if (FirstClass) {
3284       FirstClass = false;
3285     } else {
3286       Out << ',';
3287       Out << NL;
3288     }
3289     Indent(Out, Space, IsDot);
3290     Out << Str;
3291   }
3292   Out << NL;
3293 
3294   --Space;
3295   Indent(Out, Space, IsDot) << "]," << NL;
3296 }
3297 
3298 void RangeConstraintManager::printDisequalities(raw_ostream &Out,
3299                                                 ProgramStateRef State,
3300                                                 const char *NL,
3301                                                 unsigned int Space,
3302                                                 bool IsDot) const {
3303   DisequalityMapTy Disequalities = State->get<DisequalityMap>();
3304 
3305   Indent(Out, Space, IsDot) << "\"disequality_info\": ";
3306   if (Disequalities.isEmpty()) {
3307     Out << "null," << NL;
3308     return;
3309   }
3310 
3311   // Transform the disequality info to an ordered map of
3312   // [string -> (ordered set of strings)]
3313   using EqClassesStrTy = std::set<std::string>;
3314   using DisequalityInfoStrTy = std::map<std::string, EqClassesStrTy>;
3315   DisequalityInfoStrTy DisequalityInfoStr;
3316   for (std::pair<EquivalenceClass, ClassSet> ClassToDisEqSet : Disequalities) {
3317     EquivalenceClass Class = ClassToDisEqSet.first;
3318     ClassSet DisequalClasses = ClassToDisEqSet.second;
3319     EqClassesStrTy MembersStr;
3320     for (EquivalenceClass DisEqClass : DisequalClasses)
3321       MembersStr.insert(toString(State, DisEqClass));
3322     DisequalityInfoStr.insert({toString(State, Class), MembersStr});
3323   }
3324 
3325   ++Space;
3326   Out << '[' << NL;
3327   bool FirstClass = true;
3328   for (std::pair<std::string, EqClassesStrTy> ClassToDisEqSet :
3329        DisequalityInfoStr) {
3330     const std::string &Class = ClassToDisEqSet.first;
3331     if (FirstClass) {
3332       FirstClass = false;
3333     } else {
3334       Out << ',';
3335       Out << NL;
3336     }
3337     Indent(Out, Space, IsDot) << "{" << NL;
3338     unsigned int DisEqSpace = Space + 1;
3339     Indent(Out, DisEqSpace, IsDot) << "\"class\": ";
3340     Out << Class;
3341     const EqClassesStrTy &DisequalClasses = ClassToDisEqSet.second;
3342     if (!DisequalClasses.empty()) {
3343       Out << "," << NL;
3344       Indent(Out, DisEqSpace, IsDot) << "\"disequal_to\": [" << NL;
3345       unsigned int DisEqClassSpace = DisEqSpace + 1;
3346       Indent(Out, DisEqClassSpace, IsDot);
3347       bool FirstDisEqClass = true;
3348       for (const std::string &DisEqClass : DisequalClasses) {
3349         if (FirstDisEqClass) {
3350           FirstDisEqClass = false;
3351         } else {
3352           Out << ',' << NL;
3353           Indent(Out, DisEqClassSpace, IsDot);
3354         }
3355         Out << DisEqClass;
3356       }
3357       Out << "]" << NL;
3358     }
3359     Indent(Out, Space, IsDot) << "}";
3360   }
3361   Out << NL;
3362 
3363   --Space;
3364   Indent(Out, Space, IsDot) << "]," << NL;
3365 }
3366