1 //===-- DifferenceEngine.cpp - Structural function/module comparison ------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This header defines the implementation of the LLVM difference
10 // engine, which structurally compares global values within a module.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "DifferenceEngine.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/SmallString.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSet.h"
20 #include "llvm/IR/CFG.h"
21 #include "llvm/IR/Constants.h"
22 #include "llvm/IR/Function.h"
23 #include "llvm/IR/Instructions.h"
24 #include "llvm/IR/Module.h"
25 #include "llvm/Support/ErrorHandling.h"
26 #include "llvm/Support/raw_ostream.h"
27 #include "llvm/Support/type_traits.h"
28 #include <utility>
29
30 using namespace llvm;
31
32 namespace {
33
34 /// A priority queue, implemented as a heap.
35 template <class T, class Sorter, unsigned InlineCapacity>
36 class PriorityQueue {
37 Sorter Precedes;
38 llvm::SmallVector<T, InlineCapacity> Storage;
39
40 public:
PriorityQueue(const Sorter & Precedes)41 PriorityQueue(const Sorter &Precedes) : Precedes(Precedes) {}
42
43 /// Checks whether the heap is empty.
empty() const44 bool empty() const { return Storage.empty(); }
45
46 /// Insert a new value on the heap.
insert(const T & V)47 void insert(const T &V) {
48 unsigned Index = Storage.size();
49 Storage.push_back(V);
50 if (Index == 0) return;
51
52 T *data = Storage.data();
53 while (true) {
54 unsigned Target = (Index + 1) / 2 - 1;
55 if (!Precedes(data[Index], data[Target])) return;
56 std::swap(data[Index], data[Target]);
57 if (Target == 0) return;
58 Index = Target;
59 }
60 }
61
62 /// Remove the minimum value in the heap. Only valid on a non-empty heap.
remove_min()63 T remove_min() {
64 assert(!empty());
65 T tmp = Storage[0];
66
67 unsigned NewSize = Storage.size() - 1;
68 if (NewSize) {
69 // Move the slot at the end to the beginning.
70 if (std::is_trivially_copyable<T>::value)
71 Storage[0] = Storage[NewSize];
72 else
73 std::swap(Storage[0], Storage[NewSize]);
74
75 // Bubble the root up as necessary.
76 unsigned Index = 0;
77 while (true) {
78 // With a 1-based index, the children would be Index*2 and Index*2+1.
79 unsigned R = (Index + 1) * 2;
80 unsigned L = R - 1;
81
82 // If R is out of bounds, we're done after this in any case.
83 if (R >= NewSize) {
84 // If L is also out of bounds, we're done immediately.
85 if (L >= NewSize) break;
86
87 // Otherwise, test whether we should swap L and Index.
88 if (Precedes(Storage[L], Storage[Index]))
89 std::swap(Storage[L], Storage[Index]);
90 break;
91 }
92
93 // Otherwise, we need to compare with the smaller of L and R.
94 // Prefer R because it's closer to the end of the array.
95 unsigned IndexToTest = (Precedes(Storage[L], Storage[R]) ? L : R);
96
97 // If Index is >= the min of L and R, then heap ordering is restored.
98 if (!Precedes(Storage[IndexToTest], Storage[Index]))
99 break;
100
101 // Otherwise, keep bubbling up.
102 std::swap(Storage[IndexToTest], Storage[Index]);
103 Index = IndexToTest;
104 }
105 }
106 Storage.pop_back();
107
108 return tmp;
109 }
110 };
111
112 /// A function-scope difference engine.
113 class FunctionDifferenceEngine {
114 DifferenceEngine &Engine;
115
116 // Some initializers may reference the variable we're currently checking. This
117 // can cause an infinite loop. The Saved[LR]HS ivars can be checked to prevent
118 // recursing.
119 const Value *SavedLHS;
120 const Value *SavedRHS;
121
122 /// The current mapping from old local values to new local values.
123 DenseMap<const Value *, const Value *> Values;
124
125 /// The current mapping from old blocks to new blocks.
126 DenseMap<const BasicBlock *, const BasicBlock *> Blocks;
127
128 DenseSet<std::pair<const Value *, const Value *>> TentativeValues;
129
getUnprocPredCount(const BasicBlock * Block) const130 unsigned getUnprocPredCount(const BasicBlock *Block) const {
131 unsigned Count = 0;
132 for (const_pred_iterator I = pred_begin(Block), E = pred_end(Block); I != E;
133 ++I)
134 if (!Blocks.count(*I)) Count++;
135 return Count;
136 }
137
138 typedef std::pair<const BasicBlock *, const BasicBlock *> BlockPair;
139
140 /// A type which sorts a priority queue by the number of unprocessed
141 /// predecessor blocks it has remaining.
142 ///
143 /// This is actually really expensive to calculate.
144 struct QueueSorter {
145 const FunctionDifferenceEngine &fde;
QueueSorter__anon47b14c490111::FunctionDifferenceEngine::QueueSorter146 explicit QueueSorter(const FunctionDifferenceEngine &fde) : fde(fde) {}
147
operator ()__anon47b14c490111::FunctionDifferenceEngine::QueueSorter148 bool operator()(BlockPair &Old, BlockPair &New) {
149 return fde.getUnprocPredCount(Old.first)
150 < fde.getUnprocPredCount(New.first);
151 }
152 };
153
154 /// A queue of unified blocks to process.
155 PriorityQueue<BlockPair, QueueSorter, 20> Queue;
156
157 /// Try to unify the given two blocks. Enqueues them for processing
158 /// if they haven't already been processed.
159 ///
160 /// Returns true if there was a problem unifying them.
tryUnify(const BasicBlock * L,const BasicBlock * R)161 bool tryUnify(const BasicBlock *L, const BasicBlock *R) {
162 const BasicBlock *&Ref = Blocks[L];
163
164 if (Ref) {
165 if (Ref == R) return false;
166
167 Engine.logf("successor %l cannot be equivalent to %r; "
168 "it's already equivalent to %r")
169 << L << R << Ref;
170 return true;
171 }
172
173 Ref = R;
174 Queue.insert(BlockPair(L, R));
175 return false;
176 }
177
178 /// Unifies two instructions, given that they're known not to have
179 /// structural differences.
unify(const Instruction * L,const Instruction * R)180 void unify(const Instruction *L, const Instruction *R) {
181 DifferenceEngine::Context C(Engine, L, R);
182
183 bool Result = diff(L, R, true, true);
184 assert(!Result && "structural differences second time around?");
185 (void) Result;
186 if (!L->use_empty())
187 Values[L] = R;
188 }
189
processQueue()190 void processQueue() {
191 while (!Queue.empty()) {
192 BlockPair Pair = Queue.remove_min();
193 diff(Pair.first, Pair.second);
194 }
195 }
196
diff(const BasicBlock * L,const BasicBlock * R)197 void diff(const BasicBlock *L, const BasicBlock *R) {
198 DifferenceEngine::Context C(Engine, L, R);
199
200 BasicBlock::const_iterator LI = L->begin(), LE = L->end();
201 BasicBlock::const_iterator RI = R->begin();
202
203 do {
204 assert(LI != LE && RI != R->end());
205 const Instruction *LeftI = &*LI, *RightI = &*RI;
206
207 // If the instructions differ, start the more sophisticated diff
208 // algorithm at the start of the block.
209 if (diff(LeftI, RightI, false, false)) {
210 TentativeValues.clear();
211 return runBlockDiff(L->begin(), R->begin());
212 }
213
214 // Otherwise, tentatively unify them.
215 if (!LeftI->use_empty())
216 TentativeValues.insert(std::make_pair(LeftI, RightI));
217
218 ++LI;
219 ++RI;
220 } while (LI != LE); // This is sufficient: we can't get equality of
221 // terminators if there are residual instructions.
222
223 // Unify everything in the block, non-tentatively this time.
224 TentativeValues.clear();
225 for (LI = L->begin(), RI = R->begin(); LI != LE; ++LI, ++RI)
226 unify(&*LI, &*RI);
227 }
228
229 bool matchForBlockDiff(const Instruction *L, const Instruction *R);
230 void runBlockDiff(BasicBlock::const_iterator LI,
231 BasicBlock::const_iterator RI);
232
diffCallSites(const CallBase & L,const CallBase & R,bool Complain)233 bool diffCallSites(const CallBase &L, const CallBase &R, bool Complain) {
234 // FIXME: call attributes
235 if (!equivalentAsOperands(L.getCalledOperand(), R.getCalledOperand())) {
236 if (Complain) Engine.log("called functions differ");
237 return true;
238 }
239 if (L.arg_size() != R.arg_size()) {
240 if (Complain) Engine.log("argument counts differ");
241 return true;
242 }
243 for (unsigned I = 0, E = L.arg_size(); I != E; ++I)
244 if (!equivalentAsOperands(L.getArgOperand(I), R.getArgOperand(I))) {
245 if (Complain)
246 Engine.logf("arguments %l and %r differ")
247 << L.getArgOperand(I) << R.getArgOperand(I);
248 return true;
249 }
250 return false;
251 }
252
diff(const Instruction * L,const Instruction * R,bool Complain,bool TryUnify)253 bool diff(const Instruction *L, const Instruction *R, bool Complain,
254 bool TryUnify) {
255 // FIXME: metadata (if Complain is set)
256
257 // Different opcodes always imply different operations.
258 if (L->getOpcode() != R->getOpcode()) {
259 if (Complain) Engine.log("different instruction types");
260 return true;
261 }
262
263 if (isa<CmpInst>(L)) {
264 if (cast<CmpInst>(L)->getPredicate()
265 != cast<CmpInst>(R)->getPredicate()) {
266 if (Complain) Engine.log("different predicates");
267 return true;
268 }
269 } else if (isa<CallInst>(L)) {
270 return diffCallSites(cast<CallInst>(*L), cast<CallInst>(*R), Complain);
271 } else if (isa<PHINode>(L)) {
272 // FIXME: implement.
273
274 // This is really weird; type uniquing is broken?
275 if (L->getType() != R->getType()) {
276 if (!L->getType()->isPointerTy() || !R->getType()->isPointerTy()) {
277 if (Complain) Engine.log("different phi types");
278 return true;
279 }
280 }
281 return false;
282
283 // Terminators.
284 } else if (isa<InvokeInst>(L)) {
285 const InvokeInst &LI = cast<InvokeInst>(*L);
286 const InvokeInst &RI = cast<InvokeInst>(*R);
287 if (diffCallSites(LI, RI, Complain))
288 return true;
289
290 if (TryUnify) {
291 tryUnify(LI.getNormalDest(), RI.getNormalDest());
292 tryUnify(LI.getUnwindDest(), RI.getUnwindDest());
293 }
294 return false;
295
296 } else if (isa<CallBrInst>(L)) {
297 const CallBrInst &LI = cast<CallBrInst>(*L);
298 const CallBrInst &RI = cast<CallBrInst>(*R);
299 if (LI.getNumIndirectDests() != RI.getNumIndirectDests()) {
300 if (Complain)
301 Engine.log("callbr # of indirect destinations differ");
302 return true;
303 }
304
305 // Perform the "try unify" step so that we can equate the indirect
306 // destinations before checking the call site.
307 for (unsigned I = 0; I < LI.getNumIndirectDests(); I++)
308 tryUnify(LI.getIndirectDest(I), RI.getIndirectDest(I));
309
310 if (diffCallSites(LI, RI, Complain))
311 return true;
312
313 if (TryUnify)
314 tryUnify(LI.getDefaultDest(), RI.getDefaultDest());
315 return false;
316
317 } else if (isa<BranchInst>(L)) {
318 const BranchInst *LI = cast<BranchInst>(L);
319 const BranchInst *RI = cast<BranchInst>(R);
320 if (LI->isConditional() != RI->isConditional()) {
321 if (Complain) Engine.log("branch conditionality differs");
322 return true;
323 }
324
325 if (LI->isConditional()) {
326 if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
327 if (Complain) Engine.log("branch conditions differ");
328 return true;
329 }
330 if (TryUnify) tryUnify(LI->getSuccessor(1), RI->getSuccessor(1));
331 }
332 if (TryUnify) tryUnify(LI->getSuccessor(0), RI->getSuccessor(0));
333 return false;
334
335 } else if (isa<IndirectBrInst>(L)) {
336 const IndirectBrInst *LI = cast<IndirectBrInst>(L);
337 const IndirectBrInst *RI = cast<IndirectBrInst>(R);
338 if (LI->getNumDestinations() != RI->getNumDestinations()) {
339 if (Complain) Engine.log("indirectbr # of destinations differ");
340 return true;
341 }
342
343 if (!equivalentAsOperands(LI->getAddress(), RI->getAddress())) {
344 if (Complain) Engine.log("indirectbr addresses differ");
345 return true;
346 }
347
348 if (TryUnify) {
349 for (unsigned i = 0; i < LI->getNumDestinations(); i++) {
350 tryUnify(LI->getDestination(i), RI->getDestination(i));
351 }
352 }
353 return false;
354
355 } else if (isa<SwitchInst>(L)) {
356 const SwitchInst *LI = cast<SwitchInst>(L);
357 const SwitchInst *RI = cast<SwitchInst>(R);
358 if (!equivalentAsOperands(LI->getCondition(), RI->getCondition())) {
359 if (Complain) Engine.log("switch conditions differ");
360 return true;
361 }
362 if (TryUnify) tryUnify(LI->getDefaultDest(), RI->getDefaultDest());
363
364 bool Difference = false;
365
366 DenseMap<const ConstantInt *, const BasicBlock *> LCases;
367 for (auto Case : LI->cases())
368 LCases[Case.getCaseValue()] = Case.getCaseSuccessor();
369
370 for (auto Case : RI->cases()) {
371 const ConstantInt *CaseValue = Case.getCaseValue();
372 const BasicBlock *LCase = LCases[CaseValue];
373 if (LCase) {
374 if (TryUnify)
375 tryUnify(LCase, Case.getCaseSuccessor());
376 LCases.erase(CaseValue);
377 } else if (Complain || !Difference) {
378 if (Complain)
379 Engine.logf("right switch has extra case %r") << CaseValue;
380 Difference = true;
381 }
382 }
383 if (!Difference)
384 for (DenseMap<const ConstantInt *, const BasicBlock *>::iterator
385 I = LCases.begin(),
386 E = LCases.end();
387 I != E; ++I) {
388 if (Complain)
389 Engine.logf("left switch has extra case %l") << I->first;
390 Difference = true;
391 }
392 return Difference;
393 } else if (isa<UnreachableInst>(L)) {
394 return false;
395 }
396
397 if (L->getNumOperands() != R->getNumOperands()) {
398 if (Complain) Engine.log("instructions have different operand counts");
399 return true;
400 }
401
402 for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
403 Value *LO = L->getOperand(I), *RO = R->getOperand(I);
404 if (!equivalentAsOperands(LO, RO)) {
405 if (Complain) Engine.logf("operands %l and %r differ") << LO << RO;
406 return true;
407 }
408 }
409
410 return false;
411 }
412
413 public:
equivalentAsOperands(const Constant * L,const Constant * R)414 bool equivalentAsOperands(const Constant *L, const Constant *R) {
415 // Use equality as a preliminary filter.
416 if (L == R)
417 return true;
418
419 if (L->getValueID() != R->getValueID())
420 return false;
421
422 // Ask the engine about global values.
423 if (isa<GlobalValue>(L))
424 return Engine.equivalentAsOperands(cast<GlobalValue>(L),
425 cast<GlobalValue>(R));
426
427 // Compare constant expressions structurally.
428 if (isa<ConstantExpr>(L))
429 return equivalentAsOperands(cast<ConstantExpr>(L),
430 cast<ConstantExpr>(R));
431
432 // Constants of the "same type" don't always actually have the same
433 // type; I don't know why. Just white-list them.
434 if (isa<ConstantPointerNull>(L) || isa<UndefValue>(L) || isa<ConstantAggregateZero>(L))
435 return true;
436
437 // Block addresses only match if we've already encountered the
438 // block. FIXME: tentative matches?
439 if (isa<BlockAddress>(L))
440 return Blocks[cast<BlockAddress>(L)->getBasicBlock()]
441 == cast<BlockAddress>(R)->getBasicBlock();
442
443 // If L and R are ConstantVectors, compare each element
444 if (isa<ConstantVector>(L)) {
445 const ConstantVector *CVL = cast<ConstantVector>(L);
446 const ConstantVector *CVR = cast<ConstantVector>(R);
447 if (CVL->getType()->getNumElements() != CVR->getType()->getNumElements())
448 return false;
449 for (unsigned i = 0; i < CVL->getType()->getNumElements(); i++) {
450 if (!equivalentAsOperands(CVL->getOperand(i), CVR->getOperand(i)))
451 return false;
452 }
453 return true;
454 }
455
456 // If L and R are ConstantArrays, compare the element count and types.
457 if (isa<ConstantArray>(L)) {
458 const ConstantArray *CAL = cast<ConstantArray>(L);
459 const ConstantArray *CAR = cast<ConstantArray>(R);
460 // Sometimes a type may be equivalent, but not uniquified---e.g. it may
461 // contain a GEP instruction. Do a deeper comparison of the types.
462 if (CAL->getType()->getNumElements() != CAR->getType()->getNumElements())
463 return false;
464
465 for (unsigned I = 0; I < CAL->getType()->getNumElements(); ++I) {
466 if (!equivalentAsOperands(CAL->getAggregateElement(I),
467 CAR->getAggregateElement(I)))
468 return false;
469 }
470
471 return true;
472 }
473
474 // If L and R are ConstantStructs, compare each field and type.
475 if (isa<ConstantStruct>(L)) {
476 const ConstantStruct *CSL = cast<ConstantStruct>(L);
477 const ConstantStruct *CSR = cast<ConstantStruct>(R);
478
479 const StructType *LTy = cast<StructType>(CSL->getType());
480 const StructType *RTy = cast<StructType>(CSR->getType());
481
482 // The StructTypes should have the same attributes. Don't use
483 // isLayoutIdentical(), because that just checks the element pointers,
484 // which may not work here.
485 if (LTy->getNumElements() != RTy->getNumElements() ||
486 LTy->isPacked() != RTy->isPacked())
487 return false;
488
489 for (unsigned I = 0; I < LTy->getNumElements(); I++) {
490 const Value *LAgg = CSL->getAggregateElement(I);
491 const Value *RAgg = CSR->getAggregateElement(I);
492
493 if (LAgg == SavedLHS || RAgg == SavedRHS) {
494 if (LAgg != SavedLHS || RAgg != SavedRHS)
495 // If the left and right operands aren't both re-analyzing the
496 // variable, then the initialiers don't match, so report "false".
497 // Otherwise, we skip these operands..
498 return false;
499
500 continue;
501 }
502
503 if (!equivalentAsOperands(LAgg, RAgg)) {
504 return false;
505 }
506 }
507
508 return true;
509 }
510
511 return false;
512 }
513
equivalentAsOperands(const ConstantExpr * L,const ConstantExpr * R)514 bool equivalentAsOperands(const ConstantExpr *L, const ConstantExpr *R) {
515 if (L == R)
516 return true;
517
518 if (L->getOpcode() != R->getOpcode())
519 return false;
520
521 switch (L->getOpcode()) {
522 case Instruction::ICmp:
523 case Instruction::FCmp:
524 if (L->getPredicate() != R->getPredicate())
525 return false;
526 break;
527
528 case Instruction::GetElementPtr:
529 // FIXME: inbounds?
530 break;
531
532 default:
533 break;
534 }
535
536 if (L->getNumOperands() != R->getNumOperands())
537 return false;
538
539 for (unsigned I = 0, E = L->getNumOperands(); I != E; ++I) {
540 const auto *LOp = L->getOperand(I);
541 const auto *ROp = R->getOperand(I);
542
543 if (LOp == SavedLHS || ROp == SavedRHS) {
544 if (LOp != SavedLHS || ROp != SavedRHS)
545 // If the left and right operands aren't both re-analyzing the
546 // variable, then the initialiers don't match, so report "false".
547 // Otherwise, we skip these operands..
548 return false;
549
550 continue;
551 }
552
553 if (!equivalentAsOperands(LOp, ROp))
554 return false;
555 }
556
557 return true;
558 }
559
equivalentAsOperands(const Value * L,const Value * R)560 bool equivalentAsOperands(const Value *L, const Value *R) {
561 // Fall out if the values have different kind.
562 // This possibly shouldn't take priority over oracles.
563 if (L->getValueID() != R->getValueID())
564 return false;
565
566 // Value subtypes: Argument, Constant, Instruction, BasicBlock,
567 // InlineAsm, MDNode, MDString, PseudoSourceValue
568
569 if (isa<Constant>(L))
570 return equivalentAsOperands(cast<Constant>(L), cast<Constant>(R));
571
572 if (isa<Instruction>(L))
573 return Values[L] == R || TentativeValues.count(std::make_pair(L, R));
574
575 if (isa<Argument>(L))
576 return Values[L] == R;
577
578 if (isa<BasicBlock>(L))
579 return Blocks[cast<BasicBlock>(L)] != R;
580
581 // Pretend everything else is identical.
582 return true;
583 }
584
585 // Avoid a gcc warning about accessing 'this' in an initializer.
this_()586 FunctionDifferenceEngine *this_() { return this; }
587
588 public:
FunctionDifferenceEngine(DifferenceEngine & Engine,const Value * SavedLHS=nullptr,const Value * SavedRHS=nullptr)589 FunctionDifferenceEngine(DifferenceEngine &Engine,
590 const Value *SavedLHS = nullptr,
591 const Value *SavedRHS = nullptr)
592 : Engine(Engine), SavedLHS(SavedLHS), SavedRHS(SavedRHS),
593 Queue(QueueSorter(*this_())) {}
594
diff(const Function * L,const Function * R)595 void diff(const Function *L, const Function *R) {
596 if (L->arg_size() != R->arg_size())
597 Engine.log("different argument counts");
598
599 // Map the arguments.
600 for (Function::const_arg_iterator LI = L->arg_begin(), LE = L->arg_end(),
601 RI = R->arg_begin(), RE = R->arg_end();
602 LI != LE && RI != RE; ++LI, ++RI)
603 Values[&*LI] = &*RI;
604
605 tryUnify(&*L->begin(), &*R->begin());
606 processQueue();
607 }
608 };
609
610 struct DiffEntry {
DiffEntry__anon47b14c490111::DiffEntry611 DiffEntry() : Cost(0) {}
612
613 unsigned Cost;
614 llvm::SmallVector<char, 8> Path; // actually of DifferenceEngine::DiffChange
615 };
616
matchForBlockDiff(const Instruction * L,const Instruction * R)617 bool FunctionDifferenceEngine::matchForBlockDiff(const Instruction *L,
618 const Instruction *R) {
619 return !diff(L, R, false, false);
620 }
621
runBlockDiff(BasicBlock::const_iterator LStart,BasicBlock::const_iterator RStart)622 void FunctionDifferenceEngine::runBlockDiff(BasicBlock::const_iterator LStart,
623 BasicBlock::const_iterator RStart) {
624 BasicBlock::const_iterator LE = LStart->getParent()->end();
625 BasicBlock::const_iterator RE = RStart->getParent()->end();
626
627 unsigned NL = std::distance(LStart, LE);
628
629 SmallVector<DiffEntry, 20> Paths1(NL+1);
630 SmallVector<DiffEntry, 20> Paths2(NL+1);
631
632 DiffEntry *Cur = Paths1.data();
633 DiffEntry *Next = Paths2.data();
634
635 const unsigned LeftCost = 2;
636 const unsigned RightCost = 2;
637 const unsigned MatchCost = 0;
638
639 assert(TentativeValues.empty());
640
641 // Initialize the first column.
642 for (unsigned I = 0; I != NL+1; ++I) {
643 Cur[I].Cost = I * LeftCost;
644 for (unsigned J = 0; J != I; ++J)
645 Cur[I].Path.push_back(DC_left);
646 }
647
648 for (BasicBlock::const_iterator RI = RStart; RI != RE; ++RI) {
649 // Initialize the first row.
650 Next[0] = Cur[0];
651 Next[0].Cost += RightCost;
652 Next[0].Path.push_back(DC_right);
653
654 unsigned Index = 1;
655 for (BasicBlock::const_iterator LI = LStart; LI != LE; ++LI, ++Index) {
656 if (matchForBlockDiff(&*LI, &*RI)) {
657 Next[Index] = Cur[Index-1];
658 Next[Index].Cost += MatchCost;
659 Next[Index].Path.push_back(DC_match);
660 TentativeValues.insert(std::make_pair(&*LI, &*RI));
661 } else if (Next[Index-1].Cost <= Cur[Index].Cost) {
662 Next[Index] = Next[Index-1];
663 Next[Index].Cost += LeftCost;
664 Next[Index].Path.push_back(DC_left);
665 } else {
666 Next[Index] = Cur[Index];
667 Next[Index].Cost += RightCost;
668 Next[Index].Path.push_back(DC_right);
669 }
670 }
671
672 std::swap(Cur, Next);
673 }
674
675 // We don't need the tentative values anymore; everything from here
676 // on out should be non-tentative.
677 TentativeValues.clear();
678
679 SmallVectorImpl<char> &Path = Cur[NL].Path;
680 BasicBlock::const_iterator LI = LStart, RI = RStart;
681
682 DiffLogBuilder Diff(Engine.getConsumer());
683
684 // Drop trailing matches.
685 while (Path.size() && Path.back() == DC_match)
686 Path.pop_back();
687
688 // Skip leading matches.
689 SmallVectorImpl<char>::iterator
690 PI = Path.begin(), PE = Path.end();
691 while (PI != PE && *PI == DC_match) {
692 unify(&*LI, &*RI);
693 ++PI;
694 ++LI;
695 ++RI;
696 }
697
698 for (; PI != PE; ++PI) {
699 switch (static_cast<DiffChange>(*PI)) {
700 case DC_match:
701 assert(LI != LE && RI != RE);
702 {
703 const Instruction *L = &*LI, *R = &*RI;
704 unify(L, R);
705 Diff.addMatch(L, R);
706 }
707 ++LI; ++RI;
708 break;
709
710 case DC_left:
711 assert(LI != LE);
712 Diff.addLeft(&*LI);
713 ++LI;
714 break;
715
716 case DC_right:
717 assert(RI != RE);
718 Diff.addRight(&*RI);
719 ++RI;
720 break;
721 }
722 }
723
724 // Finishing unifying and complaining about the tails of the block,
725 // which should be matches all the way through.
726 while (LI != LE) {
727 assert(RI != RE);
728 unify(&*LI, &*RI);
729 ++LI;
730 ++RI;
731 }
732
733 // If the terminators have different kinds, but one is an invoke and the
734 // other is an unconditional branch immediately following a call, unify
735 // the results and the destinations.
736 const Instruction *LTerm = LStart->getParent()->getTerminator();
737 const Instruction *RTerm = RStart->getParent()->getTerminator();
738 if (isa<BranchInst>(LTerm) && isa<InvokeInst>(RTerm)) {
739 if (cast<BranchInst>(LTerm)->isConditional()) return;
740 BasicBlock::const_iterator I = LTerm->getIterator();
741 if (I == LStart->getParent()->begin()) return;
742 --I;
743 if (!isa<CallInst>(*I)) return;
744 const CallInst *LCall = cast<CallInst>(&*I);
745 const InvokeInst *RInvoke = cast<InvokeInst>(RTerm);
746 if (!equivalentAsOperands(LCall->getCalledOperand(),
747 RInvoke->getCalledOperand()))
748 return;
749 if (!LCall->use_empty())
750 Values[LCall] = RInvoke;
751 tryUnify(LTerm->getSuccessor(0), RInvoke->getNormalDest());
752 } else if (isa<InvokeInst>(LTerm) && isa<BranchInst>(RTerm)) {
753 if (cast<BranchInst>(RTerm)->isConditional()) return;
754 BasicBlock::const_iterator I = RTerm->getIterator();
755 if (I == RStart->getParent()->begin()) return;
756 --I;
757 if (!isa<CallInst>(*I)) return;
758 const CallInst *RCall = cast<CallInst>(I);
759 const InvokeInst *LInvoke = cast<InvokeInst>(LTerm);
760 if (!equivalentAsOperands(LInvoke->getCalledOperand(),
761 RCall->getCalledOperand()))
762 return;
763 if (!LInvoke->use_empty())
764 Values[LInvoke] = RCall;
765 tryUnify(LInvoke->getNormalDest(), RTerm->getSuccessor(0));
766 }
767 }
768 }
769
anchor()770 void DifferenceEngine::Oracle::anchor() { }
771
diff(const Function * L,const Function * R)772 void DifferenceEngine::diff(const Function *L, const Function *R) {
773 Context C(*this, L, R);
774
775 // FIXME: types
776 // FIXME: attributes and CC
777 // FIXME: parameter attributes
778
779 // If both are declarations, we're done.
780 if (L->empty() && R->empty())
781 return;
782 else if (L->empty())
783 log("left function is declaration, right function is definition");
784 else if (R->empty())
785 log("right function is declaration, left function is definition");
786 else
787 FunctionDifferenceEngine(*this).diff(L, R);
788 }
789
diff(const Module * L,const Module * R)790 void DifferenceEngine::diff(const Module *L, const Module *R) {
791 StringSet<> LNames;
792 SmallVector<std::pair<const Function *, const Function *>, 20> Queue;
793
794 unsigned LeftAnonCount = 0;
795 unsigned RightAnonCount = 0;
796
797 for (Module::const_iterator I = L->begin(), E = L->end(); I != E; ++I) {
798 const Function *LFn = &*I;
799 StringRef Name = LFn->getName();
800 if (Name.empty()) {
801 ++LeftAnonCount;
802 continue;
803 }
804
805 LNames.insert(Name);
806
807 if (Function *RFn = R->getFunction(LFn->getName()))
808 Queue.push_back(std::make_pair(LFn, RFn));
809 else
810 logf("function %l exists only in left module") << LFn;
811 }
812
813 for (Module::const_iterator I = R->begin(), E = R->end(); I != E; ++I) {
814 const Function *RFn = &*I;
815 StringRef Name = RFn->getName();
816 if (Name.empty()) {
817 ++RightAnonCount;
818 continue;
819 }
820
821 if (!LNames.count(Name))
822 logf("function %r exists only in right module") << RFn;
823 }
824
825 if (LeftAnonCount != 0 || RightAnonCount != 0) {
826 SmallString<32> Tmp;
827 logf(("not comparing " + Twine(LeftAnonCount) +
828 " anonymous functions in the left module and " +
829 Twine(RightAnonCount) + " in the right module")
830 .toStringRef(Tmp));
831 }
832
833 for (SmallVectorImpl<std::pair<const Function *, const Function *>>::iterator
834 I = Queue.begin(),
835 E = Queue.end();
836 I != E; ++I)
837 diff(I->first, I->second);
838 }
839
equivalentAsOperands(const GlobalValue * L,const GlobalValue * R)840 bool DifferenceEngine::equivalentAsOperands(const GlobalValue *L,
841 const GlobalValue *R) {
842 if (globalValueOracle) return (*globalValueOracle)(L, R);
843
844 if (isa<GlobalVariable>(L) && isa<GlobalVariable>(R)) {
845 const GlobalVariable *GVL = cast<GlobalVariable>(L);
846 const GlobalVariable *GVR = cast<GlobalVariable>(R);
847 if (GVL->hasLocalLinkage() && GVL->hasUniqueInitializer() &&
848 GVR->hasLocalLinkage() && GVR->hasUniqueInitializer())
849 return FunctionDifferenceEngine(*this, GVL, GVR)
850 .equivalentAsOperands(GVL->getInitializer(), GVR->getInitializer());
851 }
852
853 return L->getName() == R->getName();
854 }
855