1 //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // Implementation file for the IRSimilarityIdentifier for identifying
11 // similarities in IR including the IRInstructionMapper.
12 //
13 //===----------------------------------------------------------------------===//
14
15 #include "llvm/Analysis/IRSimilarityIdentifier.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/IR/Intrinsics.h"
18 #include "llvm/IR/Operator.h"
19 #include "llvm/IR/User.h"
20 #include "llvm/InitializePasses.h"
21 #include "llvm/Support/SuffixTree.h"
22
23 using namespace llvm;
24 using namespace IRSimilarity;
25
26 namespace llvm {
27 cl::opt<bool>
28 DisableBranches("no-ir-sim-branch-matching", cl::init(false),
29 cl::ReallyHidden,
30 cl::desc("disable similarity matching, and outlining, "
31 "across branches for debugging purposes."));
32
33 cl::opt<bool>
34 DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
35 cl::ReallyHidden,
36 cl::desc("disable outlining indirect calls."));
37
38 cl::opt<bool>
39 MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
40 cl::desc("only allow matching call instructions if the "
41 "name and type signature match."));
42
43 cl::opt<bool>
44 DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
45 cl::desc("Don't match or outline intrinsics"));
46 } // namespace llvm
47
IRInstructionData(Instruction & I,bool Legality,IRInstructionDataList & IDList)48 IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
49 IRInstructionDataList &IDList)
50 : Inst(&I), Legal(Legality), IDL(&IDList) {
51 initializeInstruction();
52 }
53
initializeInstruction()54 void IRInstructionData::initializeInstruction() {
55 // We check for whether we have a comparison instruction. If it is, we
56 // find the "less than" version of the predicate for consistency for
57 // comparison instructions throught the program.
58 if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
59 CmpInst::Predicate Predicate = predicateForConsistency(C);
60 if (Predicate != C->getPredicate())
61 RevisedPredicate = Predicate;
62 }
63
64 // Here we collect the operands and their types for determining whether
65 // the structure of the operand use matches between two different candidates.
66 for (Use &OI : Inst->operands()) {
67 if (isa<CmpInst>(Inst) && RevisedPredicate) {
68 // If we have a CmpInst where the predicate is reversed, it means the
69 // operands must be reversed as well.
70 OperVals.insert(OperVals.begin(), OI.get());
71 continue;
72 }
73
74 OperVals.push_back(OI.get());
75 }
76
77 // We capture the incoming BasicBlocks as values as well as the incoming
78 // Values in order to check for structural similarity.
79 if (PHINode *PN = dyn_cast<PHINode>(Inst))
80 for (BasicBlock *BB : PN->blocks())
81 OperVals.push_back(BB);
82 }
83
IRInstructionData(IRInstructionDataList & IDList)84 IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
85 : IDL(&IDList) {}
86
setBranchSuccessors(DenseMap<BasicBlock *,unsigned> & BasicBlockToInteger)87 void IRInstructionData::setBranchSuccessors(
88 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
89 assert(isa<BranchInst>(Inst) && "Instruction must be branch");
90
91 BranchInst *BI = cast<BranchInst>(Inst);
92 DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
93
94 BBNumIt = BasicBlockToInteger.find(BI->getParent());
95 assert(BBNumIt != BasicBlockToInteger.end() &&
96 "Could not find location for BasicBlock!");
97
98 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
99
100 for (BasicBlock *Successor : BI->successors()) {
101 BBNumIt = BasicBlockToInteger.find(Successor);
102 assert(BBNumIt != BasicBlockToInteger.end() &&
103 "Could not find number for BasicBlock!");
104 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
105
106 int Relative = OtherBlockNumber - CurrentBlockNumber;
107 RelativeBlockLocations.push_back(Relative);
108 }
109 }
110
setCalleeName(bool MatchByName)111 void IRInstructionData::setCalleeName(bool MatchByName) {
112 CallInst *CI = dyn_cast<CallInst>(Inst);
113 assert(CI && "Instruction must be call");
114
115 CalleeName = "";
116 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
117 // To hash intrinsics, we use the opcode, and types like the other
118 // instructions, but also, the Intrinsic ID, and the Name of the
119 // intrinsic.
120 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
121 FunctionType *FT = II->getFunctionType();
122 // If there is an overloaded name, we have to use the complex version
123 // of getName to get the entire string.
124 if (Intrinsic::isOverloaded(IntrinsicID))
125 CalleeName =
126 Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
127 // If there is not an overloaded name, we only need to use this version.
128 else
129 CalleeName = Intrinsic::getName(IntrinsicID).str();
130
131 return;
132 }
133
134 if (!CI->isIndirectCall() && MatchByName)
135 CalleeName = CI->getCalledFunction()->getName().str();
136 }
137
setPHIPredecessors(DenseMap<BasicBlock *,unsigned> & BasicBlockToInteger)138 void IRInstructionData::setPHIPredecessors(
139 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
140 assert(isa<PHINode>(Inst) && "Instruction must be phi node");
141
142 PHINode *PN = cast<PHINode>(Inst);
143 DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
144
145 BBNumIt = BasicBlockToInteger.find(PN->getParent());
146 assert(BBNumIt != BasicBlockToInteger.end() &&
147 "Could not find location for BasicBlock!");
148
149 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
150
151 // Convert the incoming blocks of the PHINode to an integer value, based on
152 // the relative distances between the current block and the incoming block.
153 for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
154 BasicBlock *Incoming = PN->getIncomingBlock(Idx);
155 BBNumIt = BasicBlockToInteger.find(Incoming);
156 assert(BBNumIt != BasicBlockToInteger.end() &&
157 "Could not find number for BasicBlock!");
158 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
159
160 int Relative = OtherBlockNumber - CurrentBlockNumber;
161 RelativeBlockLocations.push_back(Relative);
162 RelativeBlockLocations.push_back(Relative);
163 }
164 }
165
predicateForConsistency(CmpInst * CI)166 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
167 switch (CI->getPredicate()) {
168 case CmpInst::FCMP_OGT:
169 case CmpInst::FCMP_UGT:
170 case CmpInst::FCMP_OGE:
171 case CmpInst::FCMP_UGE:
172 case CmpInst::ICMP_SGT:
173 case CmpInst::ICMP_UGT:
174 case CmpInst::ICMP_SGE:
175 case CmpInst::ICMP_UGE:
176 return CI->getSwappedPredicate();
177 default:
178 return CI->getPredicate();
179 }
180 }
181
getPredicate() const182 CmpInst::Predicate IRInstructionData::getPredicate() const {
183 assert(isa<CmpInst>(Inst) &&
184 "Can only get a predicate from a compare instruction");
185
186 if (RevisedPredicate)
187 return *RevisedPredicate;
188
189 return cast<CmpInst>(Inst)->getPredicate();
190 }
191
getCalleeName() const192 StringRef IRInstructionData::getCalleeName() const {
193 assert(isa<CallInst>(Inst) &&
194 "Can only get a name from a call instruction");
195
196 assert(CalleeName && "CalleeName has not been set");
197
198 return *CalleeName;
199 }
200
isClose(const IRInstructionData & A,const IRInstructionData & B)201 bool IRSimilarity::isClose(const IRInstructionData &A,
202 const IRInstructionData &B) {
203
204 if (!A.Legal || !B.Legal)
205 return false;
206
207 // Check if we are performing the same sort of operation on the same types
208 // but not on the same values.
209 if (!A.Inst->isSameOperationAs(B.Inst)) {
210 // If there is a predicate, this means that either there is a swapped
211 // predicate, or that the types are different, we want to make sure that
212 // the predicates are equivalent via swapping.
213 if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
214
215 if (A.getPredicate() != B.getPredicate())
216 return false;
217
218 // If the predicates are the same via swap, make sure that the types are
219 // still the same.
220 auto ZippedTypes = zip(A.OperVals, B.OperVals);
221
222 return all_of(
223 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
224 return std::get<0>(R)->getType() == std::get<1>(R)->getType();
225 });
226 }
227
228 return false;
229 }
230
231 // Since any GEP Instruction operands after the first operand cannot be
232 // defined by a register, we must make sure that the operands after the first
233 // are the same in the two instructions
234 if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
235 auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
236
237 // If the instructions do not have the same inbounds restrictions, we do
238 // not consider them the same.
239 if (GEP->isInBounds() != OtherGEP->isInBounds())
240 return false;
241
242 auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
243
244 // We increment here since we do not care about the first instruction,
245 // we only care about the following operands since they must be the
246 // exact same to be considered similar.
247 return all_of(drop_begin(ZippedOperands),
248 [](std::tuple<llvm::Use &, llvm::Use &> R) {
249 return std::get<0>(R) == std::get<1>(R);
250 });
251 }
252
253 // If the instructions are functions calls, we make sure that the function
254 // name is the same. We already know that the types are since is
255 // isSameOperationAs is true.
256 if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
257 if (A.getCalleeName().str() != B.getCalleeName().str())
258 return false;
259 }
260
261 if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
262 A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
263 return false;
264
265 return true;
266 }
267
268 // TODO: This is the same as the MachineOutliner, and should be consolidated
269 // into the same interface.
convertToUnsignedVec(BasicBlock & BB,std::vector<IRInstructionData * > & InstrList,std::vector<unsigned> & IntegerMapping)270 void IRInstructionMapper::convertToUnsignedVec(
271 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
272 std::vector<unsigned> &IntegerMapping) {
273 BasicBlock::iterator It = BB.begin();
274
275 std::vector<unsigned> IntegerMappingForBB;
276 std::vector<IRInstructionData *> InstrListForBB;
277
278 for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
279 switch (InstClassifier.visit(*It)) {
280 case InstrType::Legal:
281 mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
282 break;
283 case InstrType::Illegal:
284 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
285 break;
286 case InstrType::Invisible:
287 AddedIllegalLastTime = false;
288 break;
289 }
290 }
291
292 if (AddedIllegalLastTime)
293 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
294 for (IRInstructionData *ID : InstrListForBB)
295 this->IDL->push_back(*ID);
296 llvm::append_range(InstrList, InstrListForBB);
297 llvm::append_range(IntegerMapping, IntegerMappingForBB);
298 }
299
300 // TODO: This is the same as the MachineOutliner, and should be consolidated
301 // into the same interface.
mapToLegalUnsigned(BasicBlock::iterator & It,std::vector<unsigned> & IntegerMappingForBB,std::vector<IRInstructionData * > & InstrListForBB)302 unsigned IRInstructionMapper::mapToLegalUnsigned(
303 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
304 std::vector<IRInstructionData *> &InstrListForBB) {
305 // We added something legal, so we should unset the AddedLegalLastTime
306 // flag.
307 AddedIllegalLastTime = false;
308
309 // If we have at least two adjacent legal instructions (which may have
310 // invisible instructions in between), remember that.
311 if (CanCombineWithPrevInstr)
312 HaveLegalRange = true;
313 CanCombineWithPrevInstr = true;
314
315 // Get the integer for this instruction or give it the current
316 // LegalInstrNumber.
317 IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);
318 InstrListForBB.push_back(ID);
319
320 if (isa<BranchInst>(*It))
321 ID->setBranchSuccessors(BasicBlockToInteger);
322
323 if (isa<CallInst>(*It))
324 ID->setCalleeName(EnableMatchCallsByName);
325
326 if (isa<PHINode>(*It))
327 ID->setPHIPredecessors(BasicBlockToInteger);
328
329 // Add to the instruction list
330 bool WasInserted;
331 DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
332 ResultIt;
333 std::tie(ResultIt, WasInserted) =
334 InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
335 unsigned INumber = ResultIt->second;
336
337 // There was an insertion.
338 if (WasInserted)
339 LegalInstrNumber++;
340
341 IntegerMappingForBB.push_back(INumber);
342
343 // Make sure we don't overflow or use any integers reserved by the DenseMap.
344 assert(LegalInstrNumber < IllegalInstrNumber &&
345 "Instruction mapping overflow!");
346
347 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
348 "Tried to assign DenseMap tombstone or empty key to instruction.");
349 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
350 "Tried to assign DenseMap tombstone or empty key to instruction.");
351
352 return INumber;
353 }
354
355 IRInstructionData *
allocateIRInstructionData(Instruction & I,bool Legality,IRInstructionDataList & IDL)356 IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,
357 IRInstructionDataList &IDL) {
358 return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
359 }
360
361 IRInstructionData *
allocateIRInstructionData(IRInstructionDataList & IDL)362 IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) {
363 return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
364 }
365
366 IRInstructionDataList *
allocateIRInstructionDataList()367 IRInstructionMapper::allocateIRInstructionDataList() {
368 return new (IDLAllocator->Allocate()) IRInstructionDataList();
369 }
370
371 // TODO: This is the same as the MachineOutliner, and should be consolidated
372 // into the same interface.
mapToIllegalUnsigned(BasicBlock::iterator & It,std::vector<unsigned> & IntegerMappingForBB,std::vector<IRInstructionData * > & InstrListForBB,bool End)373 unsigned IRInstructionMapper::mapToIllegalUnsigned(
374 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
375 std::vector<IRInstructionData *> &InstrListForBB, bool End) {
376 // Can't combine an illegal instruction. Set the flag.
377 CanCombineWithPrevInstr = false;
378
379 // Only add one illegal number per range of legal numbers.
380 if (AddedIllegalLastTime)
381 return IllegalInstrNumber;
382
383 IRInstructionData *ID = nullptr;
384 if (!End)
385 ID = allocateIRInstructionData(*It, false, *IDL);
386 else
387 ID = allocateIRInstructionData(*IDL);
388 InstrListForBB.push_back(ID);
389
390 // Remember that we added an illegal number last time.
391 AddedIllegalLastTime = true;
392 unsigned INumber = IllegalInstrNumber;
393 IntegerMappingForBB.push_back(IllegalInstrNumber--);
394
395 assert(LegalInstrNumber < IllegalInstrNumber &&
396 "Instruction mapping overflow!");
397
398 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
399 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
400
401 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
402 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
403
404 return INumber;
405 }
406
IRSimilarityCandidate(unsigned StartIdx,unsigned Len,IRInstructionData * FirstInstIt,IRInstructionData * LastInstIt)407 IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
408 IRInstructionData *FirstInstIt,
409 IRInstructionData *LastInstIt)
410 : StartIdx(StartIdx), Len(Len) {
411
412 assert(FirstInstIt != nullptr && "Instruction is nullptr!");
413 assert(LastInstIt != nullptr && "Instruction is nullptr!");
414 assert(StartIdx + Len > StartIdx &&
415 "Overflow for IRSimilarityCandidate range?");
416 assert(Len - 1 == static_cast<unsigned>(std::distance(
417 iterator(FirstInstIt), iterator(LastInstIt))) &&
418 "Length of the first and last IRInstructionData do not match the "
419 "given length");
420
421 // We iterate over the given instructions, and map each unique value
422 // to a unique number in the IRSimilarityCandidate ValueToNumber and
423 // NumberToValue maps. A constant get its own value globally, the individual
424 // uses of the constants are not considered to be unique.
425 //
426 // IR: Mapping Added:
427 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
428 // %add2 = add i32 %a, %1 %add2 -> 4
429 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
430 //
431 // when replace with global values, starting from 1, would be
432 //
433 // 3 = add i32 1, 2
434 // 4 = add i32 1, 3
435 // 6 = add i32 5, 2
436 unsigned LocalValNumber = 1;
437 IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
438 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
439 // Map the operand values to an unsigned integer if it does not already
440 // have an unsigned integer assigned to it.
441 for (Value *Arg : ID->OperVals)
442 if (ValueToNumber.find(Arg) == ValueToNumber.end()) {
443 ValueToNumber.try_emplace(Arg, LocalValNumber);
444 NumberToValue.try_emplace(LocalValNumber, Arg);
445 LocalValNumber++;
446 }
447
448 // Mapping the instructions to an unsigned integer if it is not already
449 // exist in the mapping.
450 if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) {
451 ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
452 NumberToValue.try_emplace(LocalValNumber, ID->Inst);
453 LocalValNumber++;
454 }
455 }
456
457 // Setting the first and last instruction data pointers for the candidate. If
458 // we got through the entire for loop without hitting an assert, we know
459 // that both of these instructions are not nullptrs.
460 FirstInst = FirstInstIt;
461 LastInst = LastInstIt;
462
463 // Add the basic blocks contained in the set into the global value numbering.
464 DenseSet<BasicBlock *> BBSet;
465 getBasicBlocks(BBSet);
466 for (BasicBlock *BB : BBSet) {
467 if (ValueToNumber.find(BB) != ValueToNumber.end())
468 continue;
469
470 ValueToNumber.try_emplace(BB, LocalValNumber);
471 NumberToValue.try_emplace(LocalValNumber, BB);
472 LocalValNumber++;
473 }
474 }
475
isSimilar(const IRSimilarityCandidate & A,const IRSimilarityCandidate & B)476 bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
477 const IRSimilarityCandidate &B) {
478 if (A.getLength() != B.getLength())
479 return false;
480
481 auto InstrDataForBoth =
482 zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
483
484 return all_of(InstrDataForBoth,
485 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
486 IRInstructionData &A = std::get<0>(R);
487 IRInstructionData &B = std::get<1>(R);
488 if (!A.Legal || !B.Legal)
489 return false;
490 return isClose(A, B);
491 });
492 }
493
494 /// Determine if one or more of the assigned global value numbers for the
495 /// operands in \p TargetValueNumbers is in the current mapping set for operand
496 /// numbers in \p SourceOperands. The set of possible corresponding global
497 /// value numbers are replaced with the most recent version of compatible
498 /// values.
499 ///
500 /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
501 /// value number for the source IRInstructionCandidate.
502 /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
503 /// IRSimilarityCandidate global value numbers to a set of possible numbers in
504 /// the target.
505 /// \param [in] SourceOperands - The operands in the original
506 /// IRSimilarityCandidate in the current instruction.
507 /// \param [in] TargetValueNumbers - The global value numbers of the operands in
508 /// the corresponding Instruction in the other IRSimilarityCandidate.
509 /// \returns true if there exists a possible mapping between the source
510 /// Instruction operands and the target Instruction operands, and false if not.
checkNumberingAndReplaceCommutative(const DenseMap<Value *,unsigned> & SourceValueToNumberMapping,DenseMap<unsigned,DenseSet<unsigned>> & CurrentSrcTgtNumberMapping,ArrayRef<Value * > & SourceOperands,DenseSet<unsigned> & TargetValueNumbers)511 static bool checkNumberingAndReplaceCommutative(
512 const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
513 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
514 ArrayRef<Value *> &SourceOperands,
515 DenseSet<unsigned> &TargetValueNumbers){
516
517 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
518
519 unsigned ArgVal;
520 bool WasInserted;
521
522 // Iterate over the operands in the source IRSimilarityCandidate to determine
523 // whether there exists an operand in the other IRSimilarityCandidate that
524 // creates a valid mapping of Value to Value between the
525 // IRSimilarityCaniddates.
526 for (Value *V : SourceOperands) {
527 ArgVal = SourceValueToNumberMapping.find(V)->second;
528
529 // Instead of finding a current mapping, we attempt to insert a set.
530 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
531 std::make_pair(ArgVal, TargetValueNumbers));
532
533 // We need to iterate over the items in other IRSimilarityCandidate's
534 // Instruction to determine whether there is a valid mapping of
535 // Value to Value.
536 DenseSet<unsigned> NewSet;
537 for (unsigned &Curr : ValueMappingIt->second)
538 // If we can find the value in the mapping, we add it to the new set.
539 if (TargetValueNumbers.contains(Curr))
540 NewSet.insert(Curr);
541
542 // If we could not find a Value, return 0.
543 if (NewSet.empty())
544 return false;
545
546 // Otherwise replace the old mapping with the newly constructed one.
547 if (NewSet.size() != ValueMappingIt->second.size())
548 ValueMappingIt->second.swap(NewSet);
549
550 // We have reached no conclusions about the mapping, and cannot remove
551 // any items from the other operands, so we move to check the next operand.
552 if (ValueMappingIt->second.size() != 1)
553 continue;
554
555 unsigned ValToRemove = *ValueMappingIt->second.begin();
556 // When there is only one item left in the mapping for and operand, remove
557 // the value from the other operands. If it results in there being no
558 // mapping, return false, it means the mapping is wrong
559 for (Value *InnerV : SourceOperands) {
560 if (V == InnerV)
561 continue;
562
563 unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
564 ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
565 if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
566 continue;
567
568 ValueMappingIt->second.erase(ValToRemove);
569 if (ValueMappingIt->second.empty())
570 return false;
571 }
572 }
573
574 return true;
575 }
576
577 /// Determine if operand number \p TargetArgVal is in the current mapping set
578 /// for operand number \p SourceArgVal.
579 ///
580 /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
581 /// value numbers from source IRSimilarityCandidate to target
582 /// IRSimilarityCandidate.
583 /// \param [in] SourceArgVal The global value number for an operand in the
584 /// in the original candidate.
585 /// \param [in] TargetArgVal The global value number for the corresponding
586 /// operand in the other candidate.
587 /// \returns True if there exists a mapping and false if not.
checkNumberingAndReplace(DenseMap<unsigned,DenseSet<unsigned>> & CurrentSrcTgtNumberMapping,unsigned SourceArgVal,unsigned TargetArgVal)588 bool checkNumberingAndReplace(
589 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
590 unsigned SourceArgVal, unsigned TargetArgVal) {
591 // We are given two unsigned integers representing the global values of
592 // the operands in different IRSimilarityCandidates and a current mapping
593 // between the two.
594 //
595 // Source Operand GVN: 1
596 // Target Operand GVN: 2
597 // CurrentMapping: {1: {1, 2}}
598 //
599 // Since we have mapping, and the target operand is contained in the set, we
600 // update it to:
601 // CurrentMapping: {1: {2}}
602 // and can return true. But, if the mapping was
603 // CurrentMapping: {1: {3}}
604 // we would return false.
605
606 bool WasInserted;
607 DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;
608
609 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
610 std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
611
612 // If we created a new mapping, then we are done.
613 if (WasInserted)
614 return true;
615
616 // If there is more than one option in the mapping set, and the target value
617 // is included in the mapping set replace that set with one that only includes
618 // the target value, as it is the only valid mapping via the non commutative
619 // instruction.
620
621 DenseSet<unsigned> &TargetSet = Val->second;
622 if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
623 TargetSet.clear();
624 TargetSet.insert(TargetArgVal);
625 return true;
626 }
627
628 // Return true if we can find the value in the set.
629 return TargetSet.contains(TargetArgVal);
630 }
631
compareNonCommutativeOperandMapping(OperandMapping A,OperandMapping B)632 bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
633 OperandMapping A, OperandMapping B) {
634 // Iterators to keep track of where we are in the operands for each
635 // Instruction.
636 ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
637 ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
638 unsigned OperandLength = A.OperVals.size();
639
640 // For each operand, get the value numbering and ensure it is consistent.
641 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
642 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
643 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
644
645 // Attempt to add a set with only the target value. If there is no mapping
646 // we can create it here.
647 //
648 // For an instruction like a subtraction:
649 // IRSimilarityCandidateA: IRSimilarityCandidateB:
650 // %resultA = sub %a, %b %resultB = sub %d, %e
651 //
652 // We map %a -> %d and %b -> %e.
653 //
654 // And check to see whether their mapping is consistent in
655 // checkNumberingAndReplace.
656
657 if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
658 return false;
659
660 if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
661 return false;
662 }
663 return true;
664 }
665
compareCommutativeOperandMapping(OperandMapping A,OperandMapping B)666 bool IRSimilarityCandidate::compareCommutativeOperandMapping(
667 OperandMapping A, OperandMapping B) {
668 DenseSet<unsigned> ValueNumbersA;
669 DenseSet<unsigned> ValueNumbersB;
670
671 ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
672 ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
673 unsigned OperandLength = A.OperVals.size();
674
675 // Find the value number sets for the operands.
676 for (unsigned Idx = 0; Idx < OperandLength;
677 Idx++, VItA++, VItB++) {
678 ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
679 ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
680 }
681
682 // Iterate over the operands in the first IRSimilarityCandidate and make sure
683 // there exists a possible mapping with the operands in the second
684 // IRSimilarityCandidate.
685 if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
686 A.ValueNumberMapping, A.OperVals,
687 ValueNumbersB))
688 return false;
689
690 // Iterate over the operands in the second IRSimilarityCandidate and make sure
691 // there exists a possible mapping with the operands in the first
692 // IRSimilarityCandidate.
693 if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
694 B.ValueNumberMapping, B.OperVals,
695 ValueNumbersA))
696 return false;
697
698 return true;
699 }
700
checkRelativeLocations(RelativeLocMapping A,RelativeLocMapping B)701 bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
702 RelativeLocMapping B) {
703 // Get the basic blocks the label refers to.
704 BasicBlock *ABB = static_cast<BasicBlock *>(A.OperVal);
705 BasicBlock *BBB = static_cast<BasicBlock *>(B.OperVal);
706
707 // Get the basic blocks contained in each region.
708 DenseSet<BasicBlock *> BasicBlockA;
709 DenseSet<BasicBlock *> BasicBlockB;
710 A.IRSC.getBasicBlocks(BasicBlockA);
711 B.IRSC.getBasicBlocks(BasicBlockB);
712
713 // Determine if the block is contained in the region.
714 bool AContained = BasicBlockA.contains(ABB);
715 bool BContained = BasicBlockB.contains(BBB);
716
717 // Both blocks need to be contained in the region, or both need to be outside
718 // the reigon.
719 if (AContained != BContained)
720 return false;
721
722 // If both are contained, then we need to make sure that the relative
723 // distance to the target blocks are the same.
724 if (AContained)
725 return A.RelativeLocation == B.RelativeLocation;
726 return true;
727 }
728
compareStructure(const IRSimilarityCandidate & A,const IRSimilarityCandidate & B)729 bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
730 const IRSimilarityCandidate &B) {
731 DenseMap<unsigned, DenseSet<unsigned>> MappingA;
732 DenseMap<unsigned, DenseSet<unsigned>> MappingB;
733 return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
734 }
735
736 typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,
737 SmallVector<int, 4> &, ArrayRef<Value *> &,
738 ArrayRef<Value *> &>
739 ZippedRelativeLocationsT;
740
compareStructure(const IRSimilarityCandidate & A,const IRSimilarityCandidate & B,DenseMap<unsigned,DenseSet<unsigned>> & ValueNumberMappingA,DenseMap<unsigned,DenseSet<unsigned>> & ValueNumberMappingB)741 bool IRSimilarityCandidate::compareStructure(
742 const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
743 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
744 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
745 if (A.getLength() != B.getLength())
746 return false;
747
748 if (A.ValueToNumber.size() != B.ValueToNumber.size())
749 return false;
750
751 iterator ItA = A.begin();
752 iterator ItB = B.begin();
753
754 // These ValueNumber Mapping sets create a create a mapping between the values
755 // in one candidate to values in the other candidate. If we create a set with
756 // one element, and that same element maps to the original element in the
757 // candidate we have a good mapping.
758 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
759
760
761 // Iterate over the instructions contained in each candidate
762 unsigned SectionLength = A.getStartIdx() + A.getLength();
763 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
764 ItA++, ItB++, Loc++) {
765 // Make sure the instructions are similar to one another.
766 if (!isClose(*ItA, *ItB))
767 return false;
768
769 Instruction *IA = ItA->Inst;
770 Instruction *IB = ItB->Inst;
771
772 if (!ItA->Legal || !ItB->Legal)
773 return false;
774
775 // Get the operand sets for the instructions.
776 ArrayRef<Value *> OperValsA = ItA->OperVals;
777 ArrayRef<Value *> OperValsB = ItB->OperVals;
778
779 unsigned InstValA = A.ValueToNumber.find(IA)->second;
780 unsigned InstValB = B.ValueToNumber.find(IB)->second;
781
782 bool WasInserted;
783 // Ensure that the mappings for the instructions exists.
784 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
785 std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
786 if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
787 return false;
788
789 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert(
790 std::make_pair(InstValB, DenseSet<unsigned>({InstValA})));
791 if (!WasInserted && !ValueMappingIt->second.contains(InstValA))
792 return false;
793
794 // We have different paths for commutative instructions and non-commutative
795 // instructions since commutative instructions could allow multiple mappings
796 // to certain values.
797 if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
798 !isa<IntrinsicInst>(IA)) {
799 if (!compareCommutativeOperandMapping(
800 {A, OperValsA, ValueNumberMappingA},
801 {B, OperValsB, ValueNumberMappingB}))
802 return false;
803 continue;
804 }
805
806 // Handle the non-commutative cases.
807 if (!compareNonCommutativeOperandMapping(
808 {A, OperValsA, ValueNumberMappingA},
809 {B, OperValsB, ValueNumberMappingB}))
810 return false;
811
812 // Here we check that between two corresponding instructions,
813 // when referring to a basic block in the same region, the
814 // relative locations are the same. And, that the instructions refer to
815 // basic blocks outside the region in the same corresponding locations.
816
817 // We are able to make the assumption about blocks outside of the region
818 // since the target block labels are considered values and will follow the
819 // same number matching that we defined for the other instructions in the
820 // region. So, at this point, in each location we target a specific block
821 // outside the region, we are targeting a corresponding block in each
822 // analagous location in the region we are comparing to.
823 if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
824 !(isa<PHINode>(IA) && isa<PHINode>(IB)))
825 continue;
826
827 SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
828 SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
829 if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
830 OperValsA.size() != OperValsB.size())
831 return false;
832
833 ZippedRelativeLocationsT ZippedRelativeLocations =
834 zip(RelBlockLocsA, RelBlockLocsB, OperValsA, OperValsB);
835 if (any_of(ZippedRelativeLocations,
836 [&A, &B](std::tuple<int, int, Value *, Value *> R) {
837 return !checkRelativeLocations(
838 {A, std::get<0>(R), std::get<2>(R)},
839 {B, std::get<1>(R), std::get<3>(R)});
840 }))
841 return false;
842 }
843 return true;
844 }
845
overlap(const IRSimilarityCandidate & A,const IRSimilarityCandidate & B)846 bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
847 const IRSimilarityCandidate &B) {
848 auto DoesOverlap = [](const IRSimilarityCandidate &X,
849 const IRSimilarityCandidate &Y) {
850 // Check:
851 // XXXXXX X starts before Y ends
852 // YYYYYYY Y starts after X starts
853 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
854 };
855
856 return DoesOverlap(A, B) || DoesOverlap(B, A);
857 }
858
populateMapper(Module & M,std::vector<IRInstructionData * > & InstrList,std::vector<unsigned> & IntegerMapping)859 void IRSimilarityIdentifier::populateMapper(
860 Module &M, std::vector<IRInstructionData *> &InstrList,
861 std::vector<unsigned> &IntegerMapping) {
862
863 std::vector<IRInstructionData *> InstrListForModule;
864 std::vector<unsigned> IntegerMappingForModule;
865 // Iterate over the functions in the module to map each Instruction in each
866 // BasicBlock to an unsigned integer.
867 Mapper.initializeForBBs(M);
868
869 for (Function &F : M) {
870
871 if (F.empty())
872 continue;
873
874 for (BasicBlock &BB : F) {
875
876 // BB has potential to have similarity since it has a size greater than 2
877 // and can therefore match other regions greater than 2. Map it to a list
878 // of unsigned integers.
879 Mapper.convertToUnsignedVec(BB, InstrListForModule,
880 IntegerMappingForModule);
881 }
882
883 BasicBlock::iterator It = F.begin()->end();
884 Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
885 true);
886 if (InstrListForModule.size() > 0)
887 Mapper.IDL->push_back(*InstrListForModule.back());
888 }
889
890 // Insert the InstrListForModule at the end of the overall InstrList so that
891 // we can have a long InstrList for the entire set of Modules being analyzed.
892 llvm::append_range(InstrList, InstrListForModule);
893 // Do the same as above, but for IntegerMapping.
894 llvm::append_range(IntegerMapping, IntegerMappingForModule);
895 }
896
populateMapper(ArrayRef<std::unique_ptr<Module>> & Modules,std::vector<IRInstructionData * > & InstrList,std::vector<unsigned> & IntegerMapping)897 void IRSimilarityIdentifier::populateMapper(
898 ArrayRef<std::unique_ptr<Module>> &Modules,
899 std::vector<IRInstructionData *> &InstrList,
900 std::vector<unsigned> &IntegerMapping) {
901
902 // Iterate over, and map the instructions in each module.
903 for (const std::unique_ptr<Module> &M : Modules)
904 populateMapper(*M, InstrList, IntegerMapping);
905 }
906
907 /// From a repeated subsequence, find all the different instances of the
908 /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
909 /// the IRInstructionData in subsequence.
910 ///
911 /// \param [in] Mapper - The instruction mapper for basic correctness checks.
912 /// \param [in] InstrList - The vector that holds the instruction data.
913 /// \param [in] IntegerMapping - The vector that holds the mapped integers.
914 /// \param [out] CandsForRepSubstring - The vector to store the generated
915 /// IRSimilarityCandidates.
createCandidatesFromSuffixTree(const IRInstructionMapper & Mapper,std::vector<IRInstructionData * > & InstrList,std::vector<unsigned> & IntegerMapping,SuffixTree::RepeatedSubstring & RS,std::vector<IRSimilarityCandidate> & CandsForRepSubstring)916 static void createCandidatesFromSuffixTree(
917 const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
918 std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
919 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
920
921 unsigned StringLen = RS.Length;
922 if (StringLen < 2)
923 return;
924
925 // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
926 for (const unsigned &StartIdx : RS.StartIndices) {
927 unsigned EndIdx = StartIdx + StringLen - 1;
928
929 // Check that this subsequence does not contain an illegal instruction.
930 bool ContainsIllegal = false;
931 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
932 unsigned Key = IntegerMapping[CurrIdx];
933 if (Key > Mapper.IllegalInstrNumber) {
934 ContainsIllegal = true;
935 break;
936 }
937 }
938
939 // If we have an illegal instruction, we should not create an
940 // IRSimilarityCandidate for this region.
941 if (ContainsIllegal)
942 continue;
943
944 // We are getting iterators to the instructions in this region of code
945 // by advancing the start and end indices from the start of the
946 // InstrList.
947 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
948 std::advance(StartIt, StartIdx);
949 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
950 std::advance(EndIt, EndIdx);
951
952 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
953 }
954 }
955
createCanonicalRelationFrom(IRSimilarityCandidate & SourceCand,DenseMap<unsigned,DenseSet<unsigned>> & ToSourceMapping,DenseMap<unsigned,DenseSet<unsigned>> & FromSourceMapping)956 void IRSimilarityCandidate::createCanonicalRelationFrom(
957 IRSimilarityCandidate &SourceCand,
958 DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
959 DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
960 assert(SourceCand.CanonNumToNumber.size() != 0 &&
961 "Base canonical relationship is empty!");
962 assert(SourceCand.NumberToCanonNum.size() != 0 &&
963 "Base canonical relationship is empty!");
964
965 assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
966 assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
967
968 DenseSet<unsigned> UsedGVNs;
969 // Iterate over the mappings provided from this candidate to SourceCand. We
970 // are then able to map the GVN in this candidate to the same canonical number
971 // given to the corresponding GVN in SourceCand.
972 for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
973 unsigned SourceGVN = GVNMapping.first;
974
975 assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
976
977 unsigned ResultGVN;
978 // We need special handling if we have more than one potential value. This
979 // means that there are at least two GVNs that could correspond to this GVN.
980 // This could lead to potential swapping later on, so we make a decision
981 // here to ensure a one-to-one mapping.
982 if (GVNMapping.second.size() > 1) {
983 bool Found = false;
984 for (unsigned Val : GVNMapping.second) {
985 // We make sure the target value number hasn't already been reserved.
986 if (UsedGVNs.contains(Val))
987 continue;
988
989 // We make sure that the opposite mapping is still consistent.
990 DenseMap<unsigned, DenseSet<unsigned>>::iterator It =
991 FromSourceMapping.find(Val);
992
993 if (!It->second.contains(SourceGVN))
994 continue;
995
996 // We pick the first item that satisfies these conditions.
997 Found = true;
998 ResultGVN = Val;
999 break;
1000 }
1001
1002 assert(Found && "Could not find matching value for source GVN");
1003 (void)Found;
1004
1005 } else
1006 ResultGVN = *GVNMapping.second.begin();
1007
1008 // Whatever GVN is found, we mark it as used.
1009 UsedGVNs.insert(ResultGVN);
1010
1011 unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
1012 CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
1013 NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
1014 }
1015
1016 DenseSet<BasicBlock *> BBSet;
1017 getBasicBlocks(BBSet);
1018 // Find canonical numbers for the BasicBlocks in the current candidate.
1019 // This is done by finding the corresponding value for the first instruction
1020 // in the block in the current candidate, finding the matching value in the
1021 // source candidate. Then by finding the parent of this value, use the
1022 // canonical number of the block in the source candidate for the canonical
1023 // number in the current candidate.
1024 for (BasicBlock *BB : BBSet) {
1025 unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
1026
1027 // We can skip the BasicBlock if the canonical numbering has already been
1028 // found in a separate instruction.
1029 if (NumberToCanonNum.find(BBGVNForCurrCand) != NumberToCanonNum.end())
1030 continue;
1031
1032 // If the basic block is the starting block, then the shared instruction may
1033 // not be the first instruction in the block, it will be the first
1034 // instruction in the similarity region.
1035 Value *FirstOutlineInst = BB == getStartBB()
1036 ? frontInstruction()
1037 : &*BB->instructionsWithoutDebug().begin();
1038
1039 unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
1040 unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
1041 unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
1042 Value *SourceV = *SourceCand.fromGVN(SourceGVN);
1043 BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
1044 unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
1045 unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
1046 CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
1047 NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
1048 }
1049 }
1050
createCanonicalMappingFor(IRSimilarityCandidate & CurrCand)1051 void IRSimilarityCandidate::createCanonicalMappingFor(
1052 IRSimilarityCandidate &CurrCand) {
1053 assert(CurrCand.CanonNumToNumber.size() == 0 &&
1054 "Canonical Relationship is non-empty");
1055 assert(CurrCand.NumberToCanonNum.size() == 0 &&
1056 "Canonical Relationship is non-empty");
1057
1058 unsigned CanonNum = 0;
1059 // Iterate over the value numbers found, the order does not matter in this
1060 // case.
1061 for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1062 CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
1063 CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
1064 CanonNum++;
1065 }
1066 }
1067
1068 /// From the list of IRSimilarityCandidates, perform a comparison between each
1069 /// IRSimilarityCandidate to determine if there are overlapping
1070 /// IRInstructionData, or if they do not have the same structure.
1071 ///
1072 /// \param [in] CandsForRepSubstring - The vector containing the
1073 /// IRSimilarityCandidates.
1074 /// \param [out] StructuralGroups - the mapping of unsigned integers to vector
1075 /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
1076 /// vector are structurally similar to one another.
findCandidateStructures(std::vector<IRSimilarityCandidate> & CandsForRepSubstring,DenseMap<unsigned,SimilarityGroup> & StructuralGroups)1077 static void findCandidateStructures(
1078 std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1079 DenseMap<unsigned, SimilarityGroup> &StructuralGroups) {
1080 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1081 InnerCandEndIt;
1082
1083 // IRSimilarityCandidates each have a structure for operand use. It is
1084 // possible that two instances of the same subsequences have different
1085 // structure. Each type of structure found is assigned a number. This
1086 // DenseMap maps an IRSimilarityCandidate to which type of similarity
1087 // discovered it fits within.
1088 DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
1089
1090 // Find the compatibility from each candidate to the others to determine
1091 // which candidates overlap and which have the same structure by mapping
1092 // each structure to a different group.
1093 bool SameStructure;
1094 bool Inserted;
1095 unsigned CurrentGroupNum = 0;
1096 unsigned OuterGroupNum;
1097 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt;
1098 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner;
1099 DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair;
1100
1101 // Iterate over the candidates to determine its structural and overlapping
1102 // compatibility with other instructions
1103 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
1104 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
1105 for (CandIt = CandsForRepSubstring.begin(),
1106 CandEndIt = CandsForRepSubstring.end();
1107 CandIt != CandEndIt; CandIt++) {
1108
1109 // Determine if it has an assigned structural group already.
1110 CandToGroupIt = CandToGroup.find(&*CandIt);
1111 if (CandToGroupIt == CandToGroup.end()) {
1112 // If not, we assign it one, and add it to our mapping.
1113 std::tie(CandToGroupIt, Inserted) =
1114 CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));
1115 }
1116
1117 // Get the structural group number from the iterator.
1118 OuterGroupNum = CandToGroupIt->second;
1119
1120 // Check if we already have a list of IRSimilarityCandidates for the current
1121 // structural group. Create one if one does not exist.
1122 CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
1123 if (CurrentGroupPair == StructuralGroups.end()) {
1124 IRSimilarityCandidate::createCanonicalMappingFor(*CandIt);
1125 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
1126 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
1127 }
1128
1129 // Iterate over the IRSimilarityCandidates following the current
1130 // IRSimilarityCandidate in the list to determine whether the two
1131 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs
1132 // of IRSimilarityCandidates.
1133 for (InnerCandIt = std::next(CandIt),
1134 InnerCandEndIt = CandsForRepSubstring.end();
1135 InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1136
1137 // We check if the inner item has a group already, if it does, we skip it.
1138 CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
1139 if (CandToGroupItInner != CandToGroup.end())
1140 continue;
1141
1142 // Otherwise we determine if they have the same structure and add it to
1143 // vector if they match.
1144 ValueNumberMappingA.clear();
1145 ValueNumberMappingB.clear();
1146 SameStructure = IRSimilarityCandidate::compareStructure(
1147 *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1148 if (!SameStructure)
1149 continue;
1150
1151 InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1152 ValueNumberMappingB);
1153 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1154 CurrentGroupPair->second.push_back(*InnerCandIt);
1155 }
1156 }
1157 }
1158
findCandidates(std::vector<IRInstructionData * > & InstrList,std::vector<unsigned> & IntegerMapping)1159 void IRSimilarityIdentifier::findCandidates(
1160 std::vector<IRInstructionData *> &InstrList,
1161 std::vector<unsigned> &IntegerMapping) {
1162 SuffixTree ST(IntegerMapping);
1163
1164 std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1165 std::vector<SimilarityGroup> NewCandidateGroups;
1166
1167 DenseMap<unsigned, SimilarityGroup> StructuralGroups;
1168
1169 // Iterate over the subsequences found by the Suffix Tree to create
1170 // IRSimilarityCandidates for each repeated subsequence and determine which
1171 // instances are structurally similar to one another.
1172 for (SuffixTree::RepeatedSubstring &RS : ST) {
1173 createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
1174 CandsForRepSubstring);
1175
1176 if (CandsForRepSubstring.size() < 2)
1177 continue;
1178
1179 findCandidateStructures(CandsForRepSubstring, StructuralGroups);
1180 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups)
1181 // We only add the group if it contains more than one
1182 // IRSimilarityCandidate. If there is only one, that means there is no
1183 // other repeated subsequence with the same structure.
1184 if (Group.second.size() > 1)
1185 SimilarityCandidates->push_back(Group.second);
1186
1187 CandsForRepSubstring.clear();
1188 StructuralGroups.clear();
1189 NewCandidateGroups.clear();
1190 }
1191 }
1192
findSimilarity(ArrayRef<std::unique_ptr<Module>> Modules)1193 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
1194 ArrayRef<std::unique_ptr<Module>> Modules) {
1195 resetSimilarityCandidates();
1196
1197 std::vector<IRInstructionData *> InstrList;
1198 std::vector<unsigned> IntegerMapping;
1199 Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1200 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1201 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1202 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1203 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1204
1205 populateMapper(Modules, InstrList, IntegerMapping);
1206 findCandidates(InstrList, IntegerMapping);
1207
1208 return *SimilarityCandidates;
1209 }
1210
findSimilarity(Module & M)1211 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
1212 resetSimilarityCandidates();
1213 Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1214 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1215 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1216 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1217 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1218
1219 std::vector<IRInstructionData *> InstrList;
1220 std::vector<unsigned> IntegerMapping;
1221
1222 populateMapper(M, InstrList, IntegerMapping);
1223 findCandidates(InstrList, IntegerMapping);
1224
1225 return *SimilarityCandidates;
1226 }
1227
1228 INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",
1229 "ir-similarity-identifier", false, true)
1230
IRSimilarityIdentifierWrapperPass()1231 IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
1232 : ModulePass(ID) {
1233 initializeIRSimilarityIdentifierWrapperPassPass(
1234 *PassRegistry::getPassRegistry());
1235 }
1236
doInitialization(Module & M)1237 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
1238 IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
1239 MatchCallsByName, !DisableIntrinsics,
1240 false));
1241 return false;
1242 }
1243
doFinalization(Module & M)1244 bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) {
1245 IRSI.reset();
1246 return false;
1247 }
1248
runOnModule(Module & M)1249 bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
1250 IRSI->findSimilarity(M);
1251 return false;
1252 }
1253
1254 AnalysisKey IRSimilarityAnalysis::Key;
run(Module & M,ModuleAnalysisManager &)1255 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
1256 ModuleAnalysisManager &) {
1257 auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
1258 MatchCallsByName, !DisableIntrinsics,
1259 false);
1260 IRSI.findSimilarity(M);
1261 return IRSI;
1262 }
1263
1264 PreservedAnalyses
run(Module & M,ModuleAnalysisManager & AM)1265 IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) {
1266 IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M);
1267 std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =
1268 IRSI.getSimilarity();
1269
1270 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1271 OS << CandVec.size() << " candidates of length "
1272 << CandVec.begin()->getLength() << ". Found in: \n";
1273 for (IRSimilarityCandidate &Cand : CandVec) {
1274 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()
1275 << ", Basic Block: ";
1276 if (Cand.front()->Inst->getParent()->getName().str() == "")
1277 OS << "(unnamed)";
1278 else
1279 OS << Cand.front()->Inst->getParent()->getName().str();
1280 OS << "\n Start Instruction: ";
1281 Cand.frontInstruction()->print(OS);
1282 OS << "\n End Instruction: ";
1283 Cand.backInstruction()->print(OS);
1284 OS << "\n";
1285 }
1286 }
1287
1288 return PreservedAnalyses::all();
1289 }
1290
1291 char IRSimilarityIdentifierWrapperPass::ID = 0;
1292