1 //===-- DifferenceEngine.cpp - Structural function/module comparison ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header defines the implementation of the LLVM difference
10 // engine, which structurally compares global values within a module.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "DifferenceEngine.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSet.h"
20 #include "llvm/IR/CFG.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include "llvm/Support/type_traits.h"
28 #include <utility>
29 
30 using namespace llvm;
31 
32 namespace {
33 
34 /// A priority queue, implemented as a heap.
35 template <class T, class Sorter, unsigned InlineCapacity>
36 class PriorityQueue {
37   Sorter Precedes;
38   llvm::SmallVector<T, InlineCapacity> Storage;
39 
40 public:
PriorityQueue(const Sorter & Precedes)41   PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {}
42 
43   /// Checks whether the heap is empty.
empty() const44   bool empty() const { return Storage.empty(); }
45 
46   /// Insert a new value on the heap.
insert(const T & V)47   void insert(const T &V) {
48     unsigned Index = Storage.size();
49     Storage.push_back(V);
50     if (Index == 0) return;
51 
52     T *data = Storage.data();
53     while (true) {
54       unsigned Target = (Index + 1) / 2 - 1;
55       if (!Precedes(data[Index], data[Target])) return;
56       std::swap(data[Index], data[Target]);
57       if (Target == 0) return;
58       Index = Target;
59     }
60   }
61 
62   /// Remove the minimum value in the heap.  Only valid on a non-empty heap.
remove_min()63   T remove_min() {
64     assert(!empty());
65     T tmp = Storage[0];
66 
67     unsigned NewSize = Storage.size() - 1;
68     if (NewSize) {
69       // Move the slot at the end to the beginning.
70       if (std::is_trivially_copyable<T>::value)
71         Storage[0] = Storage[NewSize];
72       else
73         std::swap(Storage[0], Storage[NewSize]);
74 
75       // Bubble the root up as necessary.
76       unsigned Index = 0;
77       while (true) {
78         // With a 1-based index, the children would be Index*2 and Index*2+1.
79         unsigned R = (Index + 1) * 2;
80         unsigned L = R - 1;
81 
82         // If R is out of bounds, we're done after this in any case.
83         if (R >= NewSize) {
84           // If L is also out of bounds, we're done immediately.
85           if (L >= NewSize) break;
86 
87           // Otherwise, test whether we should swap L and Index.
88           if (Precedes(Storage[L], Storage[Index]))
89             std::swap(Storage[L], Storage[Index]);
90           break;
91         }
92 
93         // Otherwise, we need to compare with the smaller of L and R.
94         // Prefer R because it's closer to the end of the array.
95         unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R);
96 
97         // If Index is >= the min of L and R, then heap ordering is restored.
98         if (!Precedes(Storage[IndexToTest], Storage[Index]))
99           break;
100 
101         // Otherwise, keep bubbling up.
102         std::swap(Storage[IndexToTest], Storage[Index]);
103         Index = IndexToTest;
104       }
105     }
106     Storage.pop_back();
107 
108     return tmp;
109   }
110 };
111 
112 /// A function-scope difference engine.
113 class FunctionDifferenceEngine {
114   DifferenceEngine &Engine;
115 
116   // Some initializers may reference the variable we're currently checking. This
117   // can cause an infinite loop. The Saved[LR]HS ivars can be checked to prevent
118   // recursing.
119   const Value *SavedLHS;
120   const Value *SavedRHS;
121 
122   /// The current mapping from old local values to new local values.
123   DenseMap<const Value *, const Value *> Values;
124 
125   /// The current mapping from old blocks to new blocks.
126   DenseMap<const BasicBlock *, const BasicBlock *> Blocks;
127 
128   DenseSet<std::pair<const Value *, const Value *>> TentativeValues;
129 
getUnprocPredCount(const BasicBlock * Block) const130   unsigned getUnprocPredCount(const BasicBlock *Block) const {
131     unsigned Count = 0;
132     for (const_pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E;
133          ++I)
134       if (!Blocks.count(*I)) Count++;
135     return Count;
136   }
137 
138   typedef std::pair<const BasicBlock *, const BasicBlock *> BlockPair;
139 
140   /// A type which sorts a priority queue by the number of unprocessed
141   /// predecessor blocks it has remaining.
142   ///
143   /// This is actually really expensive to calculate.
144   struct QueueSorter {
145     const FunctionDifferenceEngine &fde;
QueueSorter__anon47b14c490111::FunctionDifferenceEngine::QueueSorter146     explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {}
147 
operator ()__anon47b14c490111::FunctionDifferenceEngine::QueueSorter148     bool operator()(BlockPair &Old, BlockPair &New) {
149       return fde.getUnprocPredCount(Old.first)
150            < fde.getUnprocPredCount(New.first);
151     }
152   };
153 
154   /// A queue of unified blocks to process.
155   PriorityQueue<BlockPair, QueueSorter, 20> Queue;
156 
157   /// Try to unify the given two blocks.  Enqueues them for processing
158   /// if they haven't already been processed.
159   ///
160   /// Returns true if there was a problem unifying them.
tryUnify(const BasicBlock * L,const BasicBlock * R)161   bool tryUnify(const BasicBlock *L, const BasicBlock *R) {
162     const BasicBlock *&Ref = Blocks[L];
163 
164     if (Ref) {
165       if (Ref == R) return false;
166 
167       Engine.logf("successor %l cannot be equivalent to %r; "
168                   "it's already equivalent to %r")
169         << L << R << Ref;
170       return true;
171     }
172 
173     Ref = R;
174     Queue.insert(BlockPair(L, R));
175     return false;
176   }
177 
178   /// Unifies two instructions, given that they're known not to have
179   /// structural differences.
unify(const Instruction * L,const Instruction * R)180   void unify(const Instruction *L, const Instruction *R) {
181     DifferenceEngine::Context C(Engine, L, R);
182 
183     bool Result = diff(L, R, true, true);
184     assert(!Result && "structural differences second time around?");
185     (void) Result;
186     if (!L->use_empty())
187       Values[L] = R;
188   }
189 
processQueue()190   void processQueue() {
191     while (!Queue.empty()) {
192       BlockPair Pair = Queue.remove_min();
193       diff(Pair.first, Pair.second);
194     }
195   }
196 
diff(const BasicBlock * L,const BasicBlock * R)197   void diff(const BasicBlock *L, const BasicBlock *R) {
198     DifferenceEngine::Context C(Engine, L, R);
199 
200     BasicBlock::const_iterator LI = L->begin(), LE = L->end();
201     BasicBlock::const_iterator RI = R->begin();
202 
203     do {
204       assert(LI != LE && RI != R->end());
205       const Instruction *LeftI = &*LI, *RightI = &*RI;
206 
207       // If the instructions differ, start the more sophisticated diff
208       // algorithm at the start of the block.
209       if (diff(LeftI, RightI, false, false)) {
210         TentativeValues.clear();
211         return runBlockDiff(L->begin(), R->begin());
212       }
213 
214       // Otherwise, tentatively unify them.
215       if (!LeftI->use_empty())
216         TentativeValues.insert(std::make_pair(LeftI, RightI));
217 
218       ++LI;
219       ++RI;
220     } while (LI != LE); // This is sufficient: we can't get equality of
221                         // terminators if there are residual instructions.
222 
223     // Unify everything in the block, non-tentatively this time.
224     TentativeValues.clear();
225     for (LI = L->begin(), RI = R->begin(); LI != LE; ++LI, ++RI)
226       unify(&*LI, &*RI);
227   }
228 
229   bool matchForBlockDiff(const Instruction *L, const Instruction *R);
230   void runBlockDiff(BasicBlock::const_iterator LI,
231                     BasicBlock::const_iterator RI);
232 
diffCallSites(const CallBase & L,const CallBase & R,bool Complain)233   bool diffCallSites(const CallBase &L, const CallBase &R, bool Complain) {
234     // FIXME: call attributes
235     if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand())) {
236       if (Complain) Engine.log("called functions differ");
237       return true;
238     }
239     if (L.arg_size() != R.arg_size()) {
240       if (Complain) Engine.log("argument counts differ");
241       return true;
242     }
243     for (unsigned I = 0, E = L.arg_size(); I != E; ++I)
244       if (!equivalentAsOperands(L.getArgOperand(I), R.getArgOperand(I))) {
245         if (Complain)
246           Engine.logf("arguments %l and %r differ")
247               << L.getArgOperand(I) << R.getArgOperand(I);
248         return true;
249       }
250     return false;
251   }
252 
diff(const Instruction * L,const Instruction * R,bool Complain,bool TryUnify)253   bool diff(const Instruction *L, const Instruction *R, bool Complain,
254             bool TryUnify) {
255     // FIXME: metadata (if Complain is set)
256 
257     // Different opcodes always imply different operations.
258     if (L->getOpcode() != R->getOpcode()) {
259       if (Complain) Engine.log("different instruction types");
260       return true;
261     }
262 
263     if (isa<CmpInst>(L)) {
264       if (cast<CmpInst>(L)->getPredicate()
265             != cast<CmpInst>(R)->getPredicate()) {
266         if (Complain) Engine.log("different predicates");
267         return true;
268       }
269     } else if (isa<CallInst>(L)) {
270       return diffCallSites(cast<CallInst>(*L), cast<CallInst>(*R), Complain);
271     } else if (isa<PHINode>(L)) {
272       // FIXME: implement.
273 
274       // This is really weird;  type uniquing is broken?
275       if (L->getType() != R->getType()) {
276         if (!L->getType()->isPointerTy() || !R->getType()->isPointerTy()) {
277           if (Complain) Engine.log("different phi types");
278           return true;
279         }
280       }
281       return false;
282 
283     // Terminators.
284     } else if (isa<InvokeInst>(L)) {
285       const InvokeInst &LI = cast<InvokeInst>(*L);
286       const InvokeInst &RI = cast<InvokeInst>(*R);
287       if (diffCallSites(LI, RI, Complain))
288         return true;
289 
290       if (TryUnify) {
291         tryUnify(LI.getNormalDest(), RI.getNormalDest());
292         tryUnify(LI.getUnwindDest(), RI.getUnwindDest());
293       }
294       return false;
295 
296     } else if (isa<CallBrInst>(L)) {
297       const CallBrInst &LI = cast<CallBrInst>(*L);
298       const CallBrInst &RI = cast<CallBrInst>(*R);
299       if (LI.getNumIndirectDests() != RI.getNumIndirectDests()) {
300         if (Complain)
301           Engine.log("callbr # of indirect destinations differ");
302         return true;
303       }
304 
305       // Perform the "try unify" step so that we can equate the indirect
306       // destinations before checking the call site.
307       for (unsigned I = 0; I < LI.getNumIndirectDests(); I++)
308         tryUnify(LI.getIndirectDest(I), RI.getIndirectDest(I));
309 
310       if (diffCallSites(LI, RI, Complain))
311         return true;
312 
313       if (TryUnify)
314         tryUnify(LI.getDefaultDest(), RI.getDefaultDest());
315       return false;
316 
317     } else if (isa<BranchInst>(L)) {
318       const BranchInst *LI = cast<BranchInst>(L);
319       const BranchInst *RI = cast<BranchInst>(R);
320       if (LI->isConditional() != RI->isConditional()) {
321         if (Complain) Engine.log("branch conditionality differs");
322         return true;
323       }
324 
325       if (LI->isConditional()) {
326         if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
327           if (Complain) Engine.log("branch conditions differ");
328           return true;
329         }
330         if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1));
331       }
332       if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0));
333       return false;
334 
335     } else if (isa<IndirectBrInst>(L)) {
336       const IndirectBrInst *LI = cast<IndirectBrInst>(L);
337       const IndirectBrInst *RI = cast<IndirectBrInst>(R);
338       if (LI->getNumDestinations() != RI->getNumDestinations()) {
339         if (Complain) Engine.log("indirectbr # of destinations differ");
340         return true;
341       }
342 
343       if (!equivalentAsOperands(LI->getAddress(), RI->getAddress())) {
344         if (Complain) Engine.log("indirectbr addresses differ");
345         return true;
346       }
347 
348       if (TryUnify) {
349         for (unsigned i = 0; i < LI->getNumDestinations(); i++) {
350           tryUnify(LI->getDestination(i), RI->getDestination(i));
351         }
352       }
353       return false;
354 
355     } else if (isa<SwitchInst>(L)) {
356       const SwitchInst *LI = cast<SwitchInst>(L);
357       const SwitchInst *RI = cast<SwitchInst>(R);
358       if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
359         if (Complain) Engine.log("switch conditions differ");
360         return true;
361       }
362       if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest());
363 
364       bool Difference = false;
365 
366       DenseMap<const ConstantInt *, const BasicBlock *> LCases;
367       for (auto Case : LI->cases())
368         LCases[Case.getCaseValue()] = Case.getCaseSuccessor();
369 
370       for (auto Case : RI->cases()) {
371         const ConstantInt *CaseValue = Case.getCaseValue();
372         const BasicBlock *LCase = LCases[CaseValue];
373         if (LCase) {
374           if (TryUnify)
375             tryUnify(LCase, Case.getCaseSuccessor());
376           LCases.erase(CaseValue);
377         } else if (Complain || !Difference) {
378           if (Complain)
379             Engine.logf("right switch has extra case %r") << CaseValue;
380           Difference = true;
381         }
382       }
383       if (!Difference)
384         for (DenseMap<const ConstantInt *, const BasicBlock *>::iterator
385                  I = LCases.begin(),
386                  E = LCases.end();
387              I != E; ++I) {
388           if (Complain)
389             Engine.logf("left switch has extra case %l") << I->first;
390           Difference = true;
391         }
392       return Difference;
393     } else if (isa<UnreachableInst>(L)) {
394       return false;
395     }
396 
397     if (L->getNumOperands() != R->getNumOperands()) {
398       if (Complain) Engine.log("instructions have different operand counts");
399       return true;
400     }
401 
402     for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
403       Value *LO = L->getOperand(I), *RO = R->getOperand(I);
404       if (!equivalentAsOperands(LO, RO)) {
405         if (Complain) Engine.logf("operands %l and %r differ") << LO << RO;
406         return true;
407       }
408     }
409 
410     return false;
411   }
412 
413 public:
equivalentAsOperands(const Constant * L,const Constant * R)414   bool equivalentAsOperands(const Constant *L, const Constant *R) {
415     // Use equality as a preliminary filter.
416     if (L == R)
417       return true;
418 
419     if (L->getValueID() != R->getValueID())
420       return false;
421 
422     // Ask the engine about global values.
423     if (isa<GlobalValue>(L))
424       return Engine.equivalentAsOperands(cast<GlobalValue>(L),
425                                          cast<GlobalValue>(R));
426 
427     // Compare constant expressions structurally.
428     if (isa<ConstantExpr>(L))
429       return equivalentAsOperands(cast<ConstantExpr>(L),
430                                   cast<ConstantExpr>(R));
431 
432     // Constants of the "same type" don't always actually have the same
433     // type; I don't know why.  Just white-list them.
434     if (isa<ConstantPointerNull>(L) || isa<UndefValue>(L) || isa<ConstantAggregateZero>(L))
435       return true;
436 
437     // Block addresses only match if we've already encountered the
438     // block.  FIXME: tentative matches?
439     if (isa<BlockAddress>(L))
440       return Blocks[cast<BlockAddress>(L)->getBasicBlock()]
441                  == cast<BlockAddress>(R)->getBasicBlock();
442 
443     // If L and R are ConstantVectors, compare each element
444     if (isa<ConstantVector>(L)) {
445       const ConstantVector *CVL = cast<ConstantVector>(L);
446       const ConstantVector *CVR = cast<ConstantVector>(R);
447       if (CVL->getType()->getNumElements() != CVR->getType()->getNumElements())
448         return false;
449       for (unsigned i = 0; i < CVL->getType()->getNumElements(); i++) {
450         if (!equivalentAsOperands(CVL->getOperand(i), CVR->getOperand(i)))
451           return false;
452       }
453       return true;
454     }
455 
456     // If L and R are ConstantArrays, compare the element count and types.
457     if (isa<ConstantArray>(L)) {
458       const ConstantArray *CAL = cast<ConstantArray>(L);
459       const ConstantArray *CAR = cast<ConstantArray>(R);
460       // Sometimes a type may be equivalent, but not uniquified---e.g. it may
461       // contain a GEP instruction. Do a deeper comparison of the types.
462       if (CAL->getType()->getNumElements() != CAR->getType()->getNumElements())
463         return false;
464 
465       for (unsigned I = 0; I < CAL->getType()->getNumElements(); ++I) {
466         if (!equivalentAsOperands(CAL->getAggregateElement(I),
467                                   CAR->getAggregateElement(I)))
468           return false;
469       }
470 
471       return true;
472     }
473 
474     // If L and R are ConstantStructs, compare each field and type.
475     if (isa<ConstantStruct>(L)) {
476       const ConstantStruct *CSL = cast<ConstantStruct>(L);
477       const ConstantStruct *CSR = cast<ConstantStruct>(R);
478 
479       const StructType *LTy = cast<StructType>(CSL->getType());
480       const StructType *RTy = cast<StructType>(CSR->getType());
481 
482       // The StructTypes should have the same attributes. Don't use
483       // isLayoutIdentical(), because that just checks the element pointers,
484       // which may not work here.
485       if (LTy->getNumElements() != RTy->getNumElements() ||
486           LTy->isPacked() != RTy->isPacked())
487         return false;
488 
489       for (unsigned I = 0; I < LTy->getNumElements(); I++) {
490         const Value *LAgg = CSL->getAggregateElement(I);
491         const Value *RAgg = CSR->getAggregateElement(I);
492 
493         if (LAgg == SavedLHS || RAgg == SavedRHS) {
494           if (LAgg != SavedLHS || RAgg != SavedRHS)
495             // If the left and right operands aren't both re-analyzing the
496             // variable, then the initialiers don't match, so report "false".
497             // Otherwise, we skip these operands..
498             return false;
499 
500           continue;
501         }
502 
503         if (!equivalentAsOperands(LAgg, RAgg)) {
504           return false;
505         }
506       }
507 
508       return true;
509     }
510 
511     return false;
512   }
513 
equivalentAsOperands(const ConstantExpr * L,const ConstantExpr * R)514   bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R) {
515     if (L == R)
516       return true;
517 
518     if (L->getOpcode() != R->getOpcode())
519       return false;
520 
521     switch (L->getOpcode()) {
522     case Instruction::ICmp:
523     case Instruction::FCmp:
524       if (L->getPredicate() != R->getPredicate())
525         return false;
526       break;
527 
528     case Instruction::GetElementPtr:
529       // FIXME: inbounds?
530       break;
531 
532     default:
533       break;
534     }
535 
536     if (L->getNumOperands() != R->getNumOperands())
537       return false;
538 
539     for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
540       const auto *LOp = L->getOperand(I);
541       const auto *ROp = R->getOperand(I);
542 
543       if (LOp == SavedLHS || ROp == SavedRHS) {
544         if (LOp != SavedLHS || ROp != SavedRHS)
545           // If the left and right operands aren't both re-analyzing the
546           // variable, then the initialiers don't match, so report "false".
547           // Otherwise, we skip these operands..
548           return false;
549 
550         continue;
551       }
552 
553       if (!equivalentAsOperands(LOp, ROp))
554         return false;
555     }
556 
557     return true;
558   }
559 
equivalentAsOperands(const Value * L,const Value * R)560   bool equivalentAsOperands(const Value *L, const Value *R) {
561     // Fall out if the values have different kind.
562     // This possibly shouldn't take priority over oracles.
563     if (L->getValueID() != R->getValueID())
564       return false;
565 
566     // Value subtypes:  Argument, Constant, Instruction, BasicBlock,
567     //                  InlineAsm, MDNode, MDString, PseudoSourceValue
568 
569     if (isa<Constant>(L))
570       return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R));
571 
572     if (isa<Instruction>(L))
573       return Values[L] == R || TentativeValues.count(std::make_pair(L, R));
574 
575     if (isa<Argument>(L))
576       return Values[L] == R;
577 
578     if (isa<BasicBlock>(L))
579       return Blocks[cast<BasicBlock>(L)] != R;
580 
581     // Pretend everything else is identical.
582     return true;
583   }
584 
585   // Avoid a gcc warning about accessing 'this' in an initializer.
this_()586   FunctionDifferenceEngine *this_() { return this; }
587 
588 public:
FunctionDifferenceEngine(DifferenceEngine & Engine,const Value * SavedLHS=nullptr,const Value * SavedRHS=nullptr)589   FunctionDifferenceEngine(DifferenceEngine &Engine,
590                            const Value *SavedLHS = nullptr,
591                            const Value *SavedRHS = nullptr)
592       : Engine(Engine), SavedLHS(SavedLHS), SavedRHS(SavedRHS),
593         Queue(QueueSorter(*this_())) {}
594 
diff(const Function * L,const Function * R)595   void diff(const Function *L, const Function *R) {
596     if (L->arg_size() != R->arg_size())
597       Engine.log("different argument counts");
598 
599     // Map the arguments.
600     for (Function::const_arg_iterator LI = L->arg_begin(), LE = L->arg_end(),
601                                       RI = R->arg_begin(), RE = R->arg_end();
602          LI != LE && RI != RE; ++LI, ++RI)
603       Values[&*LI] = &*RI;
604 
605     tryUnify(&*L->begin(), &*R->begin());
606     processQueue();
607   }
608 };
609 
610 struct DiffEntry {
DiffEntry__anon47b14c490111::DiffEntry611   DiffEntry() : Cost(0) {}
612 
613   unsigned Cost;
614   llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange
615 };
616 
matchForBlockDiff(const Instruction * L,const Instruction * R)617 bool FunctionDifferenceEngine::matchForBlockDiff(const Instruction *L,
618                                                  const Instruction *R) {
619   return !diff(L, R, false, false);
620 }
621 
runBlockDiff(BasicBlock::const_iterator LStart,BasicBlock::const_iterator RStart)622 void FunctionDifferenceEngine::runBlockDiff(BasicBlock::const_iterator LStart,
623                                             BasicBlock::const_iterator RStart) {
624   BasicBlock::const_iterator LE = LStart->getParent()->end();
625   BasicBlock::const_iterator RE = RStart->getParent()->end();
626 
627   unsigned NL = std::distance(LStart, LE);
628 
629   SmallVector<DiffEntry, 20> Paths1(NL+1);
630   SmallVector<DiffEntry, 20> Paths2(NL+1);
631 
632   DiffEntry *Cur = Paths1.data();
633   DiffEntry *Next = Paths2.data();
634 
635   const unsigned LeftCost = 2;
636   const unsigned RightCost = 2;
637   const unsigned MatchCost = 0;
638 
639   assert(TentativeValues.empty());
640 
641   // Initialize the first column.
642   for (unsigned I = 0; I != NL+1; ++I) {
643     Cur[I].Cost = I * LeftCost;
644     for (unsigned J = 0; J != I; ++J)
645       Cur[I].Path.push_back(DC_left);
646   }
647 
648   for (BasicBlock::const_iterator RI = RStart; RI != RE; ++RI) {
649     // Initialize the first row.
650     Next[0] = Cur[0];
651     Next[0].Cost += RightCost;
652     Next[0].Path.push_back(DC_right);
653 
654     unsigned Index = 1;
655     for (BasicBlock::const_iterator LI = LStart; LI != LE; ++LI, ++Index) {
656       if (matchForBlockDiff(&*LI, &*RI)) {
657         Next[Index] = Cur[Index-1];
658         Next[Index].Cost += MatchCost;
659         Next[Index].Path.push_back(DC_match);
660         TentativeValues.insert(std::make_pair(&*LI, &*RI));
661       } else if (Next[Index-1].Cost <= Cur[Index].Cost) {
662         Next[Index] = Next[Index-1];
663         Next[Index].Cost += LeftCost;
664         Next[Index].Path.push_back(DC_left);
665       } else {
666         Next[Index] = Cur[Index];
667         Next[Index].Cost += RightCost;
668         Next[Index].Path.push_back(DC_right);
669       }
670     }
671 
672     std::swap(Cur, Next);
673   }
674 
675   // We don't need the tentative values anymore; everything from here
676   // on out should be non-tentative.
677   TentativeValues.clear();
678 
679   SmallVectorImpl<char> &Path = Cur[NL].Path;
680   BasicBlock::const_iterator LI = LStart, RI = RStart;
681 
682   DiffLogBuilder Diff(Engine.getConsumer());
683 
684   // Drop trailing matches.
685   while (Path.size() && Path.back() == DC_match)
686     Path.pop_back();
687 
688   // Skip leading matches.
689   SmallVectorImpl<char>::iterator
690     PI = Path.begin(), PE = Path.end();
691   while (PI != PE && *PI == DC_match) {
692     unify(&*LI, &*RI);
693     ++PI;
694     ++LI;
695     ++RI;
696   }
697 
698   for (; PI != PE; ++PI) {
699     switch (static_cast<DiffChange>(*PI)) {
700     case DC_match:
701       assert(LI != LE && RI != RE);
702       {
703         const Instruction *L = &*LI, *R = &*RI;
704         unify(L, R);
705         Diff.addMatch(L, R);
706       }
707       ++LI; ++RI;
708       break;
709 
710     case DC_left:
711       assert(LI != LE);
712       Diff.addLeft(&*LI);
713       ++LI;
714       break;
715 
716     case DC_right:
717       assert(RI != RE);
718       Diff.addRight(&*RI);
719       ++RI;
720       break;
721     }
722   }
723 
724   // Finishing unifying and complaining about the tails of the block,
725   // which should be matches all the way through.
726   while (LI != LE) {
727     assert(RI != RE);
728     unify(&*LI, &*RI);
729     ++LI;
730     ++RI;
731   }
732 
733   // If the terminators have different kinds, but one is an invoke and the
734   // other is an unconditional branch immediately following a call, unify
735   // the results and the destinations.
736   const Instruction *LTerm = LStart->getParent()->getTerminator();
737   const Instruction *RTerm = RStart->getParent()->getTerminator();
738   if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) {
739     if (cast<BranchInst>(LTerm)->isConditional()) return;
740     BasicBlock::const_iterator I = LTerm->getIterator();
741     if (I == LStart->getParent()->begin()) return;
742     --I;
743     if (!isa<CallInst>(*I)) return;
744     const CallInst *LCall = cast<CallInst>(&*I);
745     const InvokeInst *RInvoke = cast<InvokeInst>(RTerm);
746     if (!equivalentAsOperands(LCall->getCalledOperand(),
747                               RInvoke->getCalledOperand()))
748       return;
749     if (!LCall->use_empty())
750       Values[LCall] = RInvoke;
751     tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest());
752   } else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) {
753     if (cast<BranchInst>(RTerm)->isConditional()) return;
754     BasicBlock::const_iterator I = RTerm->getIterator();
755     if (I == RStart->getParent()->begin()) return;
756     --I;
757     if (!isa<CallInst>(*I)) return;
758     const CallInst *RCall = cast<CallInst>(I);
759     const InvokeInst *LInvoke = cast<InvokeInst>(LTerm);
760     if (!equivalentAsOperands(LInvoke->getCalledOperand(),
761                               RCall->getCalledOperand()))
762       return;
763     if (!LInvoke->use_empty())
764       Values[LInvoke] = RCall;
765     tryUnify(LInvoke->getNormalDest(), RTerm->getSuccessor(0));
766   }
767 }
768 }
769 
anchor()770 void DifferenceEngine::Oracle::anchor() { }
771 
diff(const Function * L,const Function * R)772 void DifferenceEngine::diff(const Function *L, const Function *R) {
773   Context C(*this, L, R);
774 
775   // FIXME: types
776   // FIXME: attributes and CC
777   // FIXME: parameter attributes
778 
779   // If both are declarations, we're done.
780   if (L->empty() && R->empty())
781     return;
782   else if (L->empty())
783     log("left function is declaration, right function is definition");
784   else if (R->empty())
785     log("right function is declaration, left function is definition");
786   else
787     FunctionDifferenceEngine(*this).diff(L, R);
788 }
789 
diff(const Module * L,const Module * R)790 void DifferenceEngine::diff(const Module *L, const Module *R) {
791   StringSet<> LNames;
792   SmallVector<std::pair<const Function *, const Function *>, 20> Queue;
793 
794   unsigned LeftAnonCount = 0;
795   unsigned RightAnonCount = 0;
796 
797   for (Module::const_iterator I = L->begin(), E = L->end(); I != E; ++I) {
798     const Function *LFn = &*I;
799     StringRef Name = LFn->getName();
800     if (Name.empty()) {
801       ++LeftAnonCount;
802       continue;
803     }
804 
805     LNames.insert(Name);
806 
807     if (Function *RFn = R->getFunction(LFn->getName()))
808       Queue.push_back(std::make_pair(LFn, RFn));
809     else
810       logf("function %l exists only in left module") << LFn;
811   }
812 
813   for (Module::const_iterator I = R->begin(), E = R->end(); I != E; ++I) {
814     const Function *RFn = &*I;
815     StringRef Name = RFn->getName();
816     if (Name.empty()) {
817       ++RightAnonCount;
818       continue;
819     }
820 
821     if (!LNames.count(Name))
822       logf("function %r exists only in right module") << RFn;
823   }
824 
825   if (LeftAnonCount != 0 || RightAnonCount != 0) {
826     SmallString<32> Tmp;
827     logf(("not comparing " + Twine(LeftAnonCount) +
828           " anonymous functions in the left module and " +
829           Twine(RightAnonCount) + " in the right module")
830              .toStringRef(Tmp));
831   }
832 
833   for (SmallVectorImpl<std::pair<const Function *, const Function *>>::iterator
834            I = Queue.begin(),
835            E = Queue.end();
836        I != E; ++I)
837     diff(I->first, I->second);
838 }
839 
equivalentAsOperands(const GlobalValue * L,const GlobalValue * R)840 bool DifferenceEngine::equivalentAsOperands(const GlobalValue *L,
841                                             const GlobalValue *R) {
842   if (globalValueOracle) return (*globalValueOracle)(L, R);
843 
844   if (isa<GlobalVariable>(L) && isa<GlobalVariable>(R)) {
845     const GlobalVariable *GVL = cast<GlobalVariable>(L);
846     const GlobalVariable *GVR = cast<GlobalVariable>(R);
847     if (GVL->hasLocalLinkage() && GVL->hasUniqueInitializer() &&
848         GVR->hasLocalLinkage() && GVR->hasUniqueInitializer())
849       return FunctionDifferenceEngine(*this, GVL, GVR)
850           .equivalentAsOperands(GVL->getInitializer(), GVR->getInitializer());
851   }
852 
853   return L->getName() == R->getName();
854 }
855