1291 lines
		
	
	
		
			49 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			1291 lines
		
	
	
		
			49 KiB
		
	
	
	
		
			C++
		
	
	
	
| //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
 | |
| //
 | |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | |
| // See https://llvm.org/LICENSE.txt for license information.
 | |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| //
 | |
| // \file
 | |
| // Implementation file for the IRSimilarityIdentifier for identifying
 | |
| // similarities in IR including the IRInstructionMapper.
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #include "llvm/Analysis/IRSimilarityIdentifier.h"
 | |
| #include "llvm/ADT/DenseMap.h"
 | |
| #include "llvm/IR/Intrinsics.h"
 | |
| #include "llvm/IR/Operator.h"
 | |
| #include "llvm/IR/User.h"
 | |
| #include "llvm/InitializePasses.h"
 | |
| #include "llvm/Support/SuffixTree.h"
 | |
| 
 | |
| using namespace llvm;
 | |
| using namespace IRSimilarity;
 | |
| 
 | |
| namespace llvm {
 | |
| cl::opt<bool>
 | |
|     DisableBranches("no-ir-sim-branch-matching", cl::init(false),
 | |
|                     cl::ReallyHidden,
 | |
|                     cl::desc("disable similarity matching, and outlining, "
 | |
|                              "across branches for debugging purposes."));
 | |
| 
 | |
| cl::opt<bool>
 | |
|     DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
 | |
|                          cl::ReallyHidden,
 | |
|                          cl::desc("disable outlining indirect calls."));
 | |
| 
 | |
| cl::opt<bool>
 | |
|     MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
 | |
|                      cl::desc("only allow matching call instructions if the "
 | |
|                               "name and type signature match."));
 | |
| 
 | |
| cl::opt<bool>
 | |
|     DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
 | |
|                       cl::desc("Don't match or outline intrinsics"));
 | |
| } // namespace llvm
 | |
| 
 | |
| IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
 | |
|                                      IRInstructionDataList &IDList)
 | |
|     : Inst(&I), Legal(Legality), IDL(&IDList) {
 | |
|   initializeInstruction();
 | |
| }
 | |
| 
 | |
| void IRInstructionData::initializeInstruction() {
 | |
|   // We check for whether we have a comparison instruction.  If it is, we
 | |
|   // find the "less than" version of the predicate for consistency for
 | |
|   // comparison instructions throught the program.
 | |
|   if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
 | |
|     CmpInst::Predicate Predicate = predicateForConsistency(C);
 | |
|     if (Predicate != C->getPredicate())
 | |
|       RevisedPredicate = Predicate;
 | |
|   }
 | |
| 
 | |
|   // Here we collect the operands and their types for determining whether
 | |
|   // the structure of the operand use matches between two different candidates.
 | |
|   for (Use &OI : Inst->operands()) {
 | |
|     if (isa<CmpInst>(Inst) && RevisedPredicate.hasValue()) {
 | |
|       // If we have a CmpInst where the predicate is reversed, it means the
 | |
|       // operands must be reversed as well.
 | |
|       OperVals.insert(OperVals.begin(), OI.get());
 | |
|       continue;
 | |
|     }
 | |
| 
 | |
|     OperVals.push_back(OI.get());
 | |
|   }
 | |
| 
 | |
|   // We capture the incoming BasicBlocks as values as well as the incoming
 | |
|   // Values in order to check for structural similarity.
 | |
|   if (PHINode *PN = dyn_cast<PHINode>(Inst))
 | |
|     for (BasicBlock *BB : PN->blocks())
 | |
|       OperVals.push_back(BB);
 | |
| }
 | |
| 
 | |
| IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
 | |
|     : IDL(&IDList) {}
 | |
| 
 | |
| void IRInstructionData::setBranchSuccessors(
 | |
|     DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
 | |
|   assert(isa<BranchInst>(Inst) && "Instruction must be branch");
 | |
| 
 | |
|   BranchInst *BI = cast<BranchInst>(Inst);
 | |
|   DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
 | |
| 
 | |
|   BBNumIt = BasicBlockToInteger.find(BI->getParent());
 | |
|   assert(BBNumIt != BasicBlockToInteger.end() &&
 | |
|          "Could not find location for BasicBlock!");
 | |
| 
 | |
|   int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
 | |
| 
 | |
|   for (BasicBlock *Successor : BI->successors()) {
 | |
|     BBNumIt = BasicBlockToInteger.find(Successor);
 | |
|     assert(BBNumIt != BasicBlockToInteger.end() &&
 | |
|            "Could not find number for BasicBlock!");
 | |
|     int OtherBlockNumber = static_cast<int>(BBNumIt->second);
 | |
| 
 | |
|     int Relative = OtherBlockNumber - CurrentBlockNumber;
 | |
|     RelativeBlockLocations.push_back(Relative);
 | |
|   }
 | |
| }
 | |
| 
 | |
| void IRInstructionData::setCalleeName(bool MatchByName) {
 | |
|   CallInst *CI = dyn_cast<CallInst>(Inst);
 | |
|   assert(CI && "Instruction must be call");
 | |
| 
 | |
|   CalleeName = "";
 | |
|   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
 | |
|     // To hash intrinsics, we use the opcode, and types like the other
 | |
|     // instructions, but also, the Intrinsic ID, and the Name of the
 | |
|     // intrinsic.
 | |
|     Intrinsic::ID IntrinsicID = II->getIntrinsicID();
 | |
|     FunctionType *FT = II->getFunctionType();
 | |
|     // If there is an overloaded name, we have to use the complex version
 | |
|     // of getName to get the entire string.
 | |
|     if (Intrinsic::isOverloaded(IntrinsicID))
 | |
|       CalleeName =
 | |
|           Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
 | |
|     // If there is not an overloaded name, we only need to use this version.
 | |
|     else
 | |
|       CalleeName = Intrinsic::getName(IntrinsicID).str();
 | |
| 
 | |
|     return;
 | |
|   }
 | |
| 
 | |
|   if (!CI->isIndirectCall() && MatchByName)
 | |
|     CalleeName = CI->getCalledFunction()->getName().str();
 | |
| }
 | |
| 
 | |
| void IRInstructionData::setPHIPredecessors(
 | |
|     DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
 | |
|   assert(isa<PHINode>(Inst) && "Instruction must be phi node");
 | |
| 
 | |
|   PHINode *PN = cast<PHINode>(Inst);
 | |
|   DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
 | |
| 
 | |
|   BBNumIt = BasicBlockToInteger.find(PN->getParent());
 | |
|   assert(BBNumIt != BasicBlockToInteger.end() &&
 | |
|          "Could not find location for BasicBlock!");
 | |
| 
 | |
|   int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
 | |
| 
 | |
|   // Convert the incoming blocks of the PHINode to an integer value, based on
 | |
|   // the relative distances between the current block and the incoming block.
 | |
|   for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
 | |
|     BasicBlock *Incoming = PN->getIncomingBlock(Idx);
 | |
|     BBNumIt = BasicBlockToInteger.find(Incoming);
 | |
|     assert(BBNumIt != BasicBlockToInteger.end() &&
 | |
|            "Could not find number for BasicBlock!");
 | |
|     int OtherBlockNumber = static_cast<int>(BBNumIt->second);
 | |
| 
 | |
|     int Relative = OtherBlockNumber - CurrentBlockNumber;
 | |
|     RelativeBlockLocations.push_back(Relative);
 | |
|     RelativeBlockLocations.push_back(Relative);
 | |
|   }
 | |
| }
 | |
| 
 | |
| CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
 | |
|   switch (CI->getPredicate()) {
 | |
|   case CmpInst::FCMP_OGT:
 | |
|   case CmpInst::FCMP_UGT:
 | |
|   case CmpInst::FCMP_OGE:
 | |
|   case CmpInst::FCMP_UGE:
 | |
|   case CmpInst::ICMP_SGT:
 | |
|   case CmpInst::ICMP_UGT:
 | |
|   case CmpInst::ICMP_SGE:
 | |
|   case CmpInst::ICMP_UGE:
 | |
|     return CI->getSwappedPredicate();
 | |
|   default:
 | |
|     return CI->getPredicate();
 | |
|   }
 | |
| }
 | |
| 
 | |
| CmpInst::Predicate IRInstructionData::getPredicate() const {
 | |
|   assert(isa<CmpInst>(Inst) &&
 | |
|          "Can only get a predicate from a compare instruction");
 | |
| 
 | |
|   if (RevisedPredicate.hasValue())
 | |
|     return RevisedPredicate.getValue();
 | |
|   
 | |
|   return cast<CmpInst>(Inst)->getPredicate();
 | |
| }
 | |
| 
 | |
| StringRef IRInstructionData::getCalleeName() const {
 | |
|   assert(isa<CallInst>(Inst) &&
 | |
|          "Can only get a name from a call instruction");
 | |
| 
 | |
|   assert(CalleeName.hasValue() && "CalleeName has not been set");
 | |
| 
 | |
|   return *CalleeName;
 | |
| }
 | |
| 
 | |
| bool IRSimilarity::isClose(const IRInstructionData &A,
 | |
|                            const IRInstructionData &B) {
 | |
| 
 | |
|   if (!A.Legal || !B.Legal)
 | |
|     return false;
 | |
| 
 | |
|   // Check if we are performing the same sort of operation on the same types
 | |
|   // but not on the same values.
 | |
|   if (!A.Inst->isSameOperationAs(B.Inst)) {
 | |
|     // If there is a predicate, this means that either there is a swapped
 | |
|     // predicate, or that the types are different, we want to make sure that
 | |
|     // the predicates are equivalent via swapping.
 | |
|     if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
 | |
| 
 | |
|       if (A.getPredicate() != B.getPredicate())
 | |
|         return false;
 | |
| 
 | |
|       // If the predicates are the same via swap, make sure that the types are
 | |
|       // still the same.
 | |
|       auto ZippedTypes = zip(A.OperVals, B.OperVals);
 | |
| 
 | |
|       return all_of(
 | |
|           ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
 | |
|             return std::get<0>(R)->getType() == std::get<1>(R)->getType();
 | |
|           });
 | |
|     }
 | |
| 
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   // Since any GEP Instruction operands after the first operand cannot be
 | |
|   // defined by a register, we must make sure that the operands after the first
 | |
|   // are the same in the two instructions
 | |
|   if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
 | |
|     auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
 | |
| 
 | |
|     // If the instructions do not have the same inbounds restrictions, we do
 | |
|     // not consider them the same.
 | |
|     if (GEP->isInBounds() != OtherGEP->isInBounds())
 | |
|       return false;
 | |
| 
 | |
|     auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
 | |
| 
 | |
|     // We increment here since we do not care about the first instruction,
 | |
|     // we only care about the following operands since they must be the
 | |
|     // exact same to be considered similar.
 | |
|     return all_of(drop_begin(ZippedOperands),
 | |
|                   [](std::tuple<llvm::Use &, llvm::Use &> R) {
 | |
|                     return std::get<0>(R) == std::get<1>(R);
 | |
|                   });
 | |
|   }
 | |
| 
 | |
|   // If the instructions are functions calls, we make sure that the function
 | |
|   // name is the same.  We already know that the types are since is
 | |
|   // isSameOperationAs is true.
 | |
|   if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
 | |
|     if (A.getCalleeName().str() != B.getCalleeName().str())
 | |
|       return false;
 | |
|   }
 | |
| 
 | |
|   if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
 | |
|       A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
 | |
|     return false;
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| // TODO: This is the same as the MachineOutliner, and should be consolidated
 | |
| // into the same interface.
 | |
| void IRInstructionMapper::convertToUnsignedVec(
 | |
|     BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
 | |
|     std::vector<unsigned> &IntegerMapping) {
 | |
|   BasicBlock::iterator It = BB.begin();
 | |
| 
 | |
|   std::vector<unsigned> IntegerMappingForBB;
 | |
|   std::vector<IRInstructionData *> InstrListForBB;
 | |
| 
 | |
|   for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
 | |
|     switch (InstClassifier.visit(*It)) {
 | |
|     case InstrType::Legal:
 | |
|       mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
 | |
|       break;
 | |
|     case InstrType::Illegal:
 | |
|       mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
 | |
|       break;
 | |
|     case InstrType::Invisible:
 | |
|       AddedIllegalLastTime = false;
 | |
|       break;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   if (AddedIllegalLastTime)
 | |
|     mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
 | |
|   for (IRInstructionData *ID : InstrListForBB)
 | |
|     this->IDL->push_back(*ID);
 | |
|   llvm::append_range(InstrList, InstrListForBB);
 | |
|   llvm::append_range(IntegerMapping, IntegerMappingForBB);
 | |
| }
 | |
| 
 | |
| // TODO: This is the same as the MachineOutliner, and should be consolidated
 | |
| // into the same interface.
 | |
| unsigned IRInstructionMapper::mapToLegalUnsigned(
 | |
|     BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
 | |
|     std::vector<IRInstructionData *> &InstrListForBB) {
 | |
|   // We added something legal, so we should unset the AddedLegalLastTime
 | |
|   // flag.
 | |
|   AddedIllegalLastTime = false;
 | |
| 
 | |
|   // If we have at least two adjacent legal instructions (which may have
 | |
|   // invisible instructions in between), remember that.
 | |
|   if (CanCombineWithPrevInstr)
 | |
|     HaveLegalRange = true;
 | |
|   CanCombineWithPrevInstr = true;
 | |
| 
 | |
|   // Get the integer for this instruction or give it the current
 | |
|   // LegalInstrNumber.
 | |
|   IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);
 | |
|   InstrListForBB.push_back(ID);
 | |
| 
 | |
|   if (isa<BranchInst>(*It))
 | |
|     ID->setBranchSuccessors(BasicBlockToInteger);
 | |
| 
 | |
|   if (isa<CallInst>(*It))
 | |
|     ID->setCalleeName(EnableMatchCallsByName);
 | |
| 
 | |
|   if (isa<PHINode>(*It))
 | |
|     ID->setPHIPredecessors(BasicBlockToInteger);
 | |
| 
 | |
|   // Add to the instruction list
 | |
|   bool WasInserted;
 | |
|   DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
 | |
|       ResultIt;
 | |
|   std::tie(ResultIt, WasInserted) =
 | |
|       InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
 | |
|   unsigned INumber = ResultIt->second;
 | |
| 
 | |
|   // There was an insertion.
 | |
|   if (WasInserted)
 | |
|     LegalInstrNumber++;
 | |
| 
 | |
|   IntegerMappingForBB.push_back(INumber);
 | |
| 
 | |
|   // Make sure we don't overflow or use any integers reserved by the DenseMap.
 | |
|   assert(LegalInstrNumber < IllegalInstrNumber &&
 | |
|          "Instruction mapping overflow!");
 | |
| 
 | |
|   assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
 | |
|          "Tried to assign DenseMap tombstone or empty key to instruction.");
 | |
|   assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
 | |
|          "Tried to assign DenseMap tombstone or empty key to instruction.");
 | |
| 
 | |
|   return INumber;
 | |
| }
 | |
| 
 | |
| IRInstructionData *
 | |
| IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,
 | |
|                                                IRInstructionDataList &IDL) {
 | |
|   return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
 | |
| }
 | |
| 
 | |
| IRInstructionData *
 | |
| IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) {
 | |
|   return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
 | |
| }
 | |
| 
 | |
| IRInstructionDataList *
 | |
| IRInstructionMapper::allocateIRInstructionDataList() {
 | |
|   return new (IDLAllocator->Allocate()) IRInstructionDataList();
 | |
| }
 | |
| 
 | |
| // TODO: This is the same as the MachineOutliner, and should be consolidated
 | |
| // into the same interface.
 | |
| unsigned IRInstructionMapper::mapToIllegalUnsigned(
 | |
|     BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
 | |
|     std::vector<IRInstructionData *> &InstrListForBB, bool End) {
 | |
|   // Can't combine an illegal instruction. Set the flag.
 | |
|   CanCombineWithPrevInstr = false;
 | |
| 
 | |
|   // Only add one illegal number per range of legal numbers.
 | |
|   if (AddedIllegalLastTime)
 | |
|     return IllegalInstrNumber;
 | |
| 
 | |
|   IRInstructionData *ID = nullptr;
 | |
|   if (!End)
 | |
|     ID = allocateIRInstructionData(*It, false, *IDL);
 | |
|   else
 | |
|     ID = allocateIRInstructionData(*IDL);
 | |
|   InstrListForBB.push_back(ID);
 | |
| 
 | |
|   // Remember that we added an illegal number last time.
 | |
|   AddedIllegalLastTime = true;
 | |
|   unsigned INumber = IllegalInstrNumber;
 | |
|   IntegerMappingForBB.push_back(IllegalInstrNumber--);
 | |
| 
 | |
|   assert(LegalInstrNumber < IllegalInstrNumber &&
 | |
|          "Instruction mapping overflow!");
 | |
| 
 | |
|   assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
 | |
|          "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
 | |
| 
 | |
|   assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
 | |
|          "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
 | |
| 
 | |
|   return INumber;
 | |
| }
 | |
| 
 | |
| IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
 | |
|                                              IRInstructionData *FirstInstIt,
 | |
|                                              IRInstructionData *LastInstIt)
 | |
|     : StartIdx(StartIdx), Len(Len) {
 | |
| 
 | |
|   assert(FirstInstIt != nullptr && "Instruction is nullptr!");
 | |
|   assert(LastInstIt != nullptr && "Instruction is nullptr!");
 | |
|   assert(StartIdx + Len > StartIdx &&
 | |
|          "Overflow for IRSimilarityCandidate range?");
 | |
|   assert(Len - 1 == static_cast<unsigned>(std::distance(
 | |
|                         iterator(FirstInstIt), iterator(LastInstIt))) &&
 | |
|          "Length of the first and last IRInstructionData do not match the "
 | |
|          "given length");
 | |
| 
 | |
|   // We iterate over the given instructions, and map each unique value
 | |
|   // to a unique number in the IRSimilarityCandidate ValueToNumber and
 | |
|   // NumberToValue maps.  A constant get its own value globally, the individual
 | |
|   // uses of the constants are not considered to be unique.
 | |
|   //
 | |
|   // IR:                    Mapping Added:
 | |
|   // %add1 = add i32 %a, c1    %add1 -> 3, %a -> 1, c1 -> 2
 | |
|   // %add2 = add i32 %a, %1    %add2 -> 4
 | |
|   // %add3 = add i32 c2, c1    %add3 -> 6, c2 -> 5
 | |
|   //
 | |
|   // when replace with global values, starting from 1, would be
 | |
|   //
 | |
|   // 3 = add i32 1, 2
 | |
|   // 4 = add i32 1, 3
 | |
|   // 6 = add i32 5, 2
 | |
|   unsigned LocalValNumber = 1;
 | |
|   IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
 | |
|   for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
 | |
|     // Map the operand values to an unsigned integer if it does not already
 | |
|     // have an unsigned integer assigned to it.
 | |
|     for (Value *Arg : ID->OperVals)
 | |
|       if (ValueToNumber.find(Arg) == ValueToNumber.end()) {
 | |
|         ValueToNumber.try_emplace(Arg, LocalValNumber);
 | |
|         NumberToValue.try_emplace(LocalValNumber, Arg);
 | |
|         LocalValNumber++;
 | |
|       }
 | |
| 
 | |
|     // Mapping the instructions to an unsigned integer if it is not already
 | |
|     // exist in the mapping.
 | |
|     if (ValueToNumber.find(ID->Inst) == ValueToNumber.end()) {
 | |
|       ValueToNumber.try_emplace(ID->Inst, LocalValNumber);
 | |
|       NumberToValue.try_emplace(LocalValNumber, ID->Inst);
 | |
|       LocalValNumber++;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   // Setting the first and last instruction data pointers for the candidate.  If
 | |
|   // we got through the entire for loop without hitting an assert, we know
 | |
|   // that both of these instructions are not nullptrs.
 | |
|   FirstInst = FirstInstIt;
 | |
|   LastInst = LastInstIt;
 | |
| 
 | |
|   // Add the basic blocks contained in the set into the global value numbering.
 | |
|   DenseSet<BasicBlock *> BBSet;
 | |
|   getBasicBlocks(BBSet);
 | |
|   for (BasicBlock *BB : BBSet) {
 | |
|     if (ValueToNumber.find(BB) != ValueToNumber.end())
 | |
|       continue;
 | |
|     
 | |
|     ValueToNumber.try_emplace(BB, LocalValNumber);
 | |
|     NumberToValue.try_emplace(LocalValNumber, BB);
 | |
|     LocalValNumber++;
 | |
|   }
 | |
| }
 | |
| 
 | |
| bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
 | |
|                                       const IRSimilarityCandidate &B) {
 | |
|   if (A.getLength() != B.getLength())
 | |
|     return false;
 | |
| 
 | |
|   auto InstrDataForBoth =
 | |
|       zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
 | |
| 
 | |
|   return all_of(InstrDataForBoth,
 | |
|                 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
 | |
|                   IRInstructionData &A = std::get<0>(R);
 | |
|                   IRInstructionData &B = std::get<1>(R);
 | |
|                   if (!A.Legal || !B.Legal)
 | |
|                     return false;
 | |
|                   return isClose(A, B);
 | |
|                 });
 | |
| }
 | |
| 
 | |
| /// Determine if one or more of the assigned global value numbers for the
 | |
| /// operands in \p TargetValueNumbers is in the current mapping set for operand
 | |
| /// numbers in \p SourceOperands.  The set of possible corresponding global
 | |
| /// value numbers are replaced with the most recent version of compatible
 | |
| /// values.
 | |
| ///
 | |
| /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
 | |
| /// value number for the source IRInstructionCandidate.
 | |
| /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
 | |
| /// IRSimilarityCandidate global value numbers to a set of possible numbers in
 | |
| /// the target.
 | |
| /// \param [in] SourceOperands - The operands in the original
 | |
| /// IRSimilarityCandidate in the current instruction.
 | |
| /// \param [in] TargetValueNumbers - The global value numbers of the operands in
 | |
| /// the corresponding Instruction in the other IRSimilarityCandidate.
 | |
| /// \returns true if there exists a possible mapping between the source
 | |
| /// Instruction operands and the target Instruction operands, and false if not.
 | |
| static bool checkNumberingAndReplaceCommutative(
 | |
|   const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
 | |
|   DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
 | |
|   ArrayRef<Value *> &SourceOperands,
 | |
|   DenseSet<unsigned> &TargetValueNumbers){
 | |
| 
 | |
|   DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
 | |
| 
 | |
|   unsigned ArgVal;
 | |
|   bool WasInserted;
 | |
| 
 | |
|   // Iterate over the operands in the source IRSimilarityCandidate to determine
 | |
|   // whether there exists an operand in the other IRSimilarityCandidate that
 | |
|   // creates a valid mapping of Value to Value between the
 | |
|   // IRSimilarityCaniddates.
 | |
|   for (Value *V : SourceOperands) {
 | |
|     ArgVal = SourceValueToNumberMapping.find(V)->second;
 | |
| 
 | |
|     // Instead of finding a current mapping, we attempt to insert a set.
 | |
|     std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
 | |
|         std::make_pair(ArgVal, TargetValueNumbers));
 | |
| 
 | |
|     // We need to iterate over the items in other IRSimilarityCandidate's
 | |
|     // Instruction to determine whether there is a valid mapping of
 | |
|     // Value to Value.
 | |
|     DenseSet<unsigned> NewSet;
 | |
|     for (unsigned &Curr : ValueMappingIt->second)
 | |
|       // If we can find the value in the mapping, we add it to the new set.
 | |
|       if (TargetValueNumbers.contains(Curr))
 | |
|         NewSet.insert(Curr);
 | |
| 
 | |
|     // If we could not find a Value, return 0.
 | |
|     if (NewSet.empty())
 | |
|       return false;
 | |
|     
 | |
|     // Otherwise replace the old mapping with the newly constructed one.
 | |
|     if (NewSet.size() != ValueMappingIt->second.size())
 | |
|       ValueMappingIt->second.swap(NewSet);
 | |
| 
 | |
|     // We have reached no conclusions about the mapping, and cannot remove
 | |
|     // any items from the other operands, so we move to check the next operand.
 | |
|     if (ValueMappingIt->second.size() != 1)
 | |
|       continue;
 | |
| 
 | |
|     unsigned ValToRemove = *ValueMappingIt->second.begin();
 | |
|     // When there is only one item left in the mapping for and operand, remove
 | |
|     // the value from the other operands.  If it results in there being no
 | |
|     // mapping, return false, it means the mapping is wrong
 | |
|     for (Value *InnerV : SourceOperands) {
 | |
|       if (V == InnerV)
 | |
|         continue;
 | |
| 
 | |
|       unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
 | |
|       ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
 | |
|       if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
 | |
|         continue;
 | |
| 
 | |
|       ValueMappingIt->second.erase(ValToRemove);
 | |
|       if (ValueMappingIt->second.empty())
 | |
|         return false;
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| /// Determine if operand number \p TargetArgVal is in the current mapping set
 | |
| /// for operand number \p SourceArgVal.
 | |
| ///
 | |
| /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
 | |
| /// value numbers from source IRSimilarityCandidate to target
 | |
| /// IRSimilarityCandidate.
 | |
| /// \param [in] SourceArgVal The global value number for an operand in the
 | |
| /// in the original candidate.
 | |
| /// \param [in] TargetArgVal The global value number for the corresponding
 | |
| /// operand in the other candidate.
 | |
| /// \returns True if there exists a mapping and false if not.
 | |
| bool checkNumberingAndReplace(
 | |
|     DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
 | |
|     unsigned SourceArgVal, unsigned TargetArgVal) {
 | |
|   // We are given two unsigned integers representing the global values of
 | |
|   // the operands in different IRSimilarityCandidates and a current mapping
 | |
|   // between the two.
 | |
|   //
 | |
|   // Source Operand GVN: 1
 | |
|   // Target Operand GVN: 2
 | |
|   // CurrentMapping: {1: {1, 2}}
 | |
|   //
 | |
|   // Since we have mapping, and the target operand is contained in the set, we
 | |
|   // update it to:
 | |
|   // CurrentMapping: {1: {2}}
 | |
|   // and can return true. But, if the mapping was
 | |
|   // CurrentMapping: {1: {3}}
 | |
|   // we would return false.
 | |
| 
 | |
|   bool WasInserted;
 | |
|   DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;
 | |
| 
 | |
|   std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
 | |
|       std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
 | |
| 
 | |
|   // If we created a new mapping, then we are done.
 | |
|   if (WasInserted)
 | |
|     return true;
 | |
| 
 | |
|   // If there is more than one option in the mapping set, and the target value
 | |
|   // is included in the mapping set replace that set with one that only includes
 | |
|   // the target value, as it is the only valid mapping via the non commutative
 | |
|   // instruction.
 | |
| 
 | |
|   DenseSet<unsigned> &TargetSet = Val->second;
 | |
|   if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
 | |
|     TargetSet.clear();
 | |
|     TargetSet.insert(TargetArgVal);
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   // Return true if we can find the value in the set.
 | |
|   return TargetSet.contains(TargetArgVal);
 | |
| }
 | |
| 
 | |
| bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
 | |
|     OperandMapping A, OperandMapping B) {
 | |
|   // Iterators to keep track of where we are in the operands for each
 | |
|   // Instruction.
 | |
|   ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
 | |
|   ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
 | |
|   unsigned OperandLength = A.OperVals.size();
 | |
| 
 | |
|   // For each operand, get the value numbering and ensure it is consistent.
 | |
|   for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
 | |
|     unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
 | |
|     unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
 | |
| 
 | |
|     // Attempt to add a set with only the target value.  If there is no mapping
 | |
|     // we can create it here.
 | |
|     //
 | |
|     // For an instruction like a subtraction:
 | |
|     // IRSimilarityCandidateA:  IRSimilarityCandidateB:
 | |
|     // %resultA = sub %a, %b    %resultB = sub %d, %e
 | |
|     //
 | |
|     // We map %a -> %d and %b -> %e.
 | |
|     //
 | |
|     // And check to see whether their mapping is consistent in
 | |
|     // checkNumberingAndReplace.
 | |
| 
 | |
|     if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
 | |
|       return false;
 | |
| 
 | |
|     if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
 | |
|       return false;
 | |
|   }
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| bool IRSimilarityCandidate::compareCommutativeOperandMapping(
 | |
|     OperandMapping A, OperandMapping B) {
 | |
|   DenseSet<unsigned> ValueNumbersA;      
 | |
|   DenseSet<unsigned> ValueNumbersB;
 | |
| 
 | |
|   ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
 | |
|   ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
 | |
|   unsigned OperandLength = A.OperVals.size();
 | |
| 
 | |
|   // Find the value number sets for the operands.
 | |
|   for (unsigned Idx = 0; Idx < OperandLength;
 | |
|        Idx++, VItA++, VItB++) {
 | |
|     ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
 | |
|     ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
 | |
|   }
 | |
| 
 | |
|   // Iterate over the operands in the first IRSimilarityCandidate and make sure
 | |
|   // there exists a possible mapping with the operands in the second
 | |
|   // IRSimilarityCandidate.
 | |
|   if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
 | |
|                                            A.ValueNumberMapping, A.OperVals,
 | |
|                                            ValueNumbersB))
 | |
|     return false;
 | |
| 
 | |
|   // Iterate over the operands in the second IRSimilarityCandidate and make sure
 | |
|   // there exists a possible mapping with the operands in the first
 | |
|   // IRSimilarityCandidate.
 | |
|   if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
 | |
|                                            B.ValueNumberMapping, B.OperVals,
 | |
|                                            ValueNumbersA))
 | |
|     return false;
 | |
| 
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
 | |
|                                                    RelativeLocMapping B) {
 | |
|   // Get the basic blocks the label refers to.
 | |
|   BasicBlock *ABB = static_cast<BasicBlock *>(A.OperVal);
 | |
|   BasicBlock *BBB = static_cast<BasicBlock *>(B.OperVal);
 | |
| 
 | |
|   // Get the basic blocks contained in each region.
 | |
|   DenseSet<BasicBlock *> BasicBlockA;
 | |
|   DenseSet<BasicBlock *> BasicBlockB;
 | |
|   A.IRSC.getBasicBlocks(BasicBlockA);
 | |
|   B.IRSC.getBasicBlocks(BasicBlockB);
 | |
|   
 | |
|   // Determine if the block is contained in the region.
 | |
|   bool AContained = BasicBlockA.contains(ABB);
 | |
|   bool BContained = BasicBlockB.contains(BBB);
 | |
| 
 | |
|   // Both blocks need to be contained in the region, or both need to be outside
 | |
|   // the reigon.
 | |
|   if (AContained != BContained)
 | |
|     return false;
 | |
|   
 | |
|   // If both are contained, then we need to make sure that the relative
 | |
|   // distance to the target blocks are the same.
 | |
|   if (AContained)
 | |
|     return A.RelativeLocation == B.RelativeLocation;
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
 | |
|                                              const IRSimilarityCandidate &B) {
 | |
|   DenseMap<unsigned, DenseSet<unsigned>> MappingA;
 | |
|   DenseMap<unsigned, DenseSet<unsigned>> MappingB;
 | |
|   return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
 | |
| }
 | |
| 
 | |
| typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,
 | |
|                       SmallVector<int, 4> &, ArrayRef<Value *> &,
 | |
|                       ArrayRef<Value *> &>
 | |
|     ZippedRelativeLocationsT;
 | |
| 
 | |
| bool IRSimilarityCandidate::compareStructure(
 | |
|     const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
 | |
|     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
 | |
|     DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
 | |
|   if (A.getLength() != B.getLength())
 | |
|     return false;
 | |
| 
 | |
|   if (A.ValueToNumber.size() != B.ValueToNumber.size())
 | |
|     return false;
 | |
| 
 | |
|   iterator ItA = A.begin();
 | |
|   iterator ItB = B.begin();
 | |
| 
 | |
|   // These ValueNumber Mapping sets create a create a mapping between the values
 | |
|   // in one candidate to values in the other candidate.  If we create a set with
 | |
|   // one element, and that same element maps to the original element in the
 | |
|   // candidate we have a good mapping.
 | |
|   DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
 | |
| 
 | |
| 
 | |
|   // Iterate over the instructions contained in each candidate
 | |
|   unsigned SectionLength = A.getStartIdx() + A.getLength();
 | |
|   for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
 | |
|        ItA++, ItB++, Loc++) {
 | |
|     // Make sure the instructions are similar to one another.
 | |
|     if (!isClose(*ItA, *ItB))
 | |
|       return false;
 | |
| 
 | |
|     Instruction *IA = ItA->Inst;
 | |
|     Instruction *IB = ItB->Inst;
 | |
| 
 | |
|     if (!ItA->Legal || !ItB->Legal)
 | |
|       return false;
 | |
| 
 | |
|     // Get the operand sets for the instructions.
 | |
|     ArrayRef<Value *> OperValsA = ItA->OperVals;
 | |
|     ArrayRef<Value *> OperValsB = ItB->OperVals;
 | |
| 
 | |
|     unsigned InstValA = A.ValueToNumber.find(IA)->second;
 | |
|     unsigned InstValB = B.ValueToNumber.find(IB)->second;
 | |
| 
 | |
|     bool WasInserted;
 | |
|     // Ensure that the mappings for the instructions exists.
 | |
|     std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
 | |
|         std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
 | |
|     if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
 | |
|       return false;
 | |
| 
 | |
|     std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingB.insert(
 | |
|         std::make_pair(InstValB, DenseSet<unsigned>({InstValA})));
 | |
|     if (!WasInserted && !ValueMappingIt->second.contains(InstValA))
 | |
|       return false;
 | |
| 
 | |
|     // We have different paths for commutative instructions and non-commutative
 | |
|     // instructions since commutative instructions could allow multiple mappings
 | |
|     // to certain values.
 | |
|     if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
 | |
|         !isa<IntrinsicInst>(IA)) {
 | |
|       if (!compareCommutativeOperandMapping(
 | |
|               {A, OperValsA, ValueNumberMappingA},
 | |
|               {B, OperValsB, ValueNumberMappingB}))
 | |
|         return false;
 | |
|       continue;
 | |
|     }
 | |
| 
 | |
|     // Handle the non-commutative cases.
 | |
|     if (!compareNonCommutativeOperandMapping(
 | |
|             {A, OperValsA, ValueNumberMappingA},
 | |
|             {B, OperValsB, ValueNumberMappingB}))
 | |
|       return false;
 | |
| 
 | |
|     // Here we check that between two corresponding instructions,
 | |
|     // when referring to a basic block in the same region, the
 | |
|     // relative locations are the same. And, that the instructions refer to
 | |
|     // basic blocks outside the region in the same corresponding locations.
 | |
| 
 | |
|     // We are able to make the assumption about blocks outside of the region
 | |
|     // since the target block labels are considered values and will follow the
 | |
|     // same number matching that we defined for the other instructions in the
 | |
|     // region.  So, at this point, in each location we target a specific block
 | |
|     // outside the region, we are targeting a corresponding block in each
 | |
|     // analagous location in the region we are comparing to.
 | |
|     if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
 | |
|         !(isa<PHINode>(IA) && isa<PHINode>(IB)))
 | |
|       continue;
 | |
| 
 | |
|     SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
 | |
|     SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
 | |
|     if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
 | |
|         OperValsA.size() != OperValsB.size())
 | |
|       return false;
 | |
| 
 | |
|     ZippedRelativeLocationsT ZippedRelativeLocations =
 | |
|         zip(RelBlockLocsA, RelBlockLocsB, OperValsA, OperValsB);
 | |
|     if (any_of(ZippedRelativeLocations,
 | |
|                [&A, &B](std::tuple<int, int, Value *, Value *> R) {
 | |
|                  return !checkRelativeLocations(
 | |
|                      {A, std::get<0>(R), std::get<2>(R)},
 | |
|                      {B, std::get<1>(R), std::get<3>(R)});
 | |
|                }))
 | |
|       return false;
 | |
|   }
 | |
|   return true;
 | |
| }
 | |
| 
 | |
| bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
 | |
|                                     const IRSimilarityCandidate &B) {
 | |
|   auto DoesOverlap = [](const IRSimilarityCandidate &X,
 | |
|                         const IRSimilarityCandidate &Y) {
 | |
|     // Check:
 | |
|     // XXXXXX        X starts before Y ends
 | |
|     //      YYYYYYY  Y starts after X starts
 | |
|     return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
 | |
|   };
 | |
| 
 | |
|   return DoesOverlap(A, B) || DoesOverlap(B, A);
 | |
| }
 | |
| 
 | |
| void IRSimilarityIdentifier::populateMapper(
 | |
|     Module &M, std::vector<IRInstructionData *> &InstrList,
 | |
|     std::vector<unsigned> &IntegerMapping) {
 | |
| 
 | |
|   std::vector<IRInstructionData *> InstrListForModule;
 | |
|   std::vector<unsigned> IntegerMappingForModule;
 | |
|   // Iterate over the functions in the module to map each Instruction in each
 | |
|   // BasicBlock to an unsigned integer.
 | |
|   Mapper.initializeForBBs(M);
 | |
| 
 | |
|   for (Function &F : M) {
 | |
| 
 | |
|     if (F.empty())
 | |
|       continue;
 | |
| 
 | |
|     for (BasicBlock &BB : F) {
 | |
| 
 | |
|       // BB has potential to have similarity since it has a size greater than 2
 | |
|       // and can therefore match other regions greater than 2. Map it to a list
 | |
|       // of unsigned integers.
 | |
|       Mapper.convertToUnsignedVec(BB, InstrListForModule,
 | |
|                                   IntegerMappingForModule);
 | |
|     }
 | |
| 
 | |
|     BasicBlock::iterator It = F.begin()->end();
 | |
|     Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
 | |
|                                 true);
 | |
|     if (InstrListForModule.size() > 0)
 | |
|       Mapper.IDL->push_back(*InstrListForModule.back());
 | |
|   }
 | |
| 
 | |
|   // Insert the InstrListForModule at the end of the overall InstrList so that
 | |
|   // we can have a long InstrList for the entire set of Modules being analyzed.
 | |
|   llvm::append_range(InstrList, InstrListForModule);
 | |
|   // Do the same as above, but for IntegerMapping.
 | |
|   llvm::append_range(IntegerMapping, IntegerMappingForModule);
 | |
| }
 | |
| 
 | |
| void IRSimilarityIdentifier::populateMapper(
 | |
|     ArrayRef<std::unique_ptr<Module>> &Modules,
 | |
|     std::vector<IRInstructionData *> &InstrList,
 | |
|     std::vector<unsigned> &IntegerMapping) {
 | |
| 
 | |
|   // Iterate over, and map the instructions in each module.
 | |
|   for (const std::unique_ptr<Module> &M : Modules)
 | |
|     populateMapper(*M, InstrList, IntegerMapping);
 | |
| }
 | |
| 
 | |
| /// From a repeated subsequence, find all the different instances of the
 | |
| /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
 | |
| /// the IRInstructionData in subsequence.
 | |
| ///
 | |
| /// \param [in] Mapper - The instruction mapper for basic correctness checks.
 | |
| /// \param [in] InstrList - The vector that holds the instruction data.
 | |
| /// \param [in] IntegerMapping - The vector that holds the mapped integers.
 | |
| /// \param [out] CandsForRepSubstring - The vector to store the generated
 | |
| /// IRSimilarityCandidates.
 | |
| static void createCandidatesFromSuffixTree(
 | |
|     const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
 | |
|     std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
 | |
|     std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
 | |
| 
 | |
|   unsigned StringLen = RS.Length;
 | |
|   if (StringLen < 2)
 | |
|     return;
 | |
| 
 | |
|   // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
 | |
|   for (const unsigned &StartIdx : RS.StartIndices) {
 | |
|     unsigned EndIdx = StartIdx + StringLen - 1;
 | |
| 
 | |
|     // Check that this subsequence does not contain an illegal instruction.
 | |
|     bool ContainsIllegal = false;
 | |
|     for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
 | |
|       unsigned Key = IntegerMapping[CurrIdx];
 | |
|       if (Key > Mapper.IllegalInstrNumber) {
 | |
|         ContainsIllegal = true;
 | |
|         break;
 | |
|       }
 | |
|     }
 | |
| 
 | |
|     // If we have an illegal instruction, we should not create an
 | |
|     // IRSimilarityCandidate for this region.
 | |
|     if (ContainsIllegal)
 | |
|       continue;
 | |
| 
 | |
|     // We are getting iterators to the instructions in this region of code
 | |
|     // by advancing the start and end indices from the start of the
 | |
|     // InstrList.
 | |
|     std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
 | |
|     std::advance(StartIt, StartIdx);
 | |
|     std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
 | |
|     std::advance(EndIt, EndIdx);
 | |
| 
 | |
|     CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
 | |
|   }
 | |
| }
 | |
| 
 | |
| void IRSimilarityCandidate::createCanonicalRelationFrom(
 | |
|     IRSimilarityCandidate &SourceCand,
 | |
|     DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
 | |
|     DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
 | |
|   assert(SourceCand.CanonNumToNumber.size() != 0 &&
 | |
|          "Base canonical relationship is empty!");
 | |
|   assert(SourceCand.NumberToCanonNum.size() != 0 &&
 | |
|          "Base canonical relationship is empty!");
 | |
| 
 | |
|   assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
 | |
|   assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
 | |
| 
 | |
|   DenseSet<unsigned> UsedGVNs;
 | |
|   // Iterate over the mappings provided from this candidate to SourceCand.  We
 | |
|   // are then able to map the GVN in this candidate to the same canonical number
 | |
|   // given to the corresponding GVN in SourceCand.
 | |
|   for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
 | |
|     unsigned SourceGVN = GVNMapping.first;
 | |
| 
 | |
|     assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
 | |
| 
 | |
|     unsigned ResultGVN;
 | |
|     // We need special handling if we have more than one potential value.  This
 | |
|     // means that there are at least two GVNs that could correspond to this GVN.
 | |
|     // This could lead to potential swapping later on, so we make a decision
 | |
|     // here to ensure a one-to-one mapping.
 | |
|     if (GVNMapping.second.size() > 1) {
 | |
|       bool Found = false;
 | |
|       for (unsigned Val : GVNMapping.second) {
 | |
|         // We make sure the target value number hasn't already been reserved.
 | |
|         if (UsedGVNs.contains(Val))
 | |
|           continue;
 | |
| 
 | |
|         // We make sure that the opposite mapping is still consistent.
 | |
|         DenseMap<unsigned, DenseSet<unsigned>>::iterator It =
 | |
|             FromSourceMapping.find(Val);
 | |
| 
 | |
|         if (!It->second.contains(SourceGVN))
 | |
|           continue;
 | |
| 
 | |
|         // We pick the first item that satisfies these conditions.
 | |
|         Found = true;
 | |
|         ResultGVN = Val;
 | |
|         break;
 | |
|       }
 | |
| 
 | |
|       assert(Found && "Could not find matching value for source GVN");
 | |
|       (void)Found;
 | |
| 
 | |
|     } else
 | |
|       ResultGVN = *GVNMapping.second.begin();
 | |
| 
 | |
|     // Whatever GVN is found, we mark it as used.
 | |
|     UsedGVNs.insert(ResultGVN);
 | |
| 
 | |
|     unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
 | |
|     CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
 | |
|     NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
 | |
|   }
 | |
| 
 | |
|   DenseSet<BasicBlock *> BBSet;
 | |
|   getBasicBlocks(BBSet);
 | |
|   // Find canonical numbers for the BasicBlocks in the current candidate.
 | |
|   // This is done by finding the corresponding value for the first instruction
 | |
|   // in the block in the current candidate, finding the matching value in the
 | |
|   // source candidate.  Then by finding the parent of this value, use the
 | |
|   // canonical number of the block in the source candidate for the canonical
 | |
|   // number in the current candidate.
 | |
|   for (BasicBlock *BB : BBSet) {
 | |
|     unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
 | |
| 
 | |
|     // We can skip the BasicBlock if the canonical numbering has already been
 | |
|     // found in a separate instruction.
 | |
|     if (NumberToCanonNum.find(BBGVNForCurrCand) != NumberToCanonNum.end())
 | |
|       continue;
 | |
| 
 | |
|     // If the basic block is the starting block, then the shared instruction may
 | |
|     // not be the first instruction in the block, it will be the first
 | |
|     // instruction in the similarity region.
 | |
|     Value *FirstOutlineInst = BB == getStartBB()
 | |
|                                   ? frontInstruction()
 | |
|                                   : &*BB->instructionsWithoutDebug().begin();
 | |
| 
 | |
|     unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
 | |
|     unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
 | |
|     unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
 | |
|     Value *SourceV = *SourceCand.fromGVN(SourceGVN);
 | |
|     BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
 | |
|     unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
 | |
|     unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
 | |
|     CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
 | |
|     NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
 | |
|   }
 | |
| }
 | |
| 
 | |
| void IRSimilarityCandidate::createCanonicalMappingFor(
 | |
|     IRSimilarityCandidate &CurrCand) {
 | |
|   assert(CurrCand.CanonNumToNumber.size() == 0 &&
 | |
|          "Canonical Relationship is non-empty");
 | |
|   assert(CurrCand.NumberToCanonNum.size() == 0 &&
 | |
|          "Canonical Relationship is non-empty");
 | |
| 
 | |
|   unsigned CanonNum = 0;
 | |
|   // Iterate over the value numbers found, the order does not matter in this
 | |
|   // case.
 | |
|   for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
 | |
|     CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
 | |
|     CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
 | |
|     CanonNum++;
 | |
|   }
 | |
| }
 | |
| 
 | |
| /// From the list of IRSimilarityCandidates, perform a comparison between each
 | |
| /// IRSimilarityCandidate to determine if there are overlapping
 | |
| /// IRInstructionData, or if they do not have the same structure.
 | |
| ///
 | |
| /// \param [in] CandsForRepSubstring - The vector containing the
 | |
| /// IRSimilarityCandidates.
 | |
| /// \param [out] StructuralGroups - the mapping of unsigned integers to vector
 | |
| /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
 | |
| /// vector are structurally similar to one another.
 | |
| static void findCandidateStructures(
 | |
|     std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
 | |
|     DenseMap<unsigned, SimilarityGroup> &StructuralGroups) {
 | |
|   std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
 | |
|       InnerCandEndIt;
 | |
| 
 | |
|   // IRSimilarityCandidates each have a structure for operand use.  It is
 | |
|   // possible that two instances of the same subsequences have different
 | |
|   // structure. Each type of structure found is assigned a number.  This
 | |
|   // DenseMap maps an IRSimilarityCandidate to which type of similarity
 | |
|   // discovered it fits within.
 | |
|   DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
 | |
| 
 | |
|   // Find the compatibility from each candidate to the others to determine
 | |
|   // which candidates overlap and which have the same structure by mapping
 | |
|   // each structure to a different group.
 | |
|   bool SameStructure;
 | |
|   bool Inserted;
 | |
|   unsigned CurrentGroupNum = 0;
 | |
|   unsigned OuterGroupNum;
 | |
|   DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt;
 | |
|   DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner;
 | |
|   DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair;
 | |
| 
 | |
|   // Iterate over the candidates to determine its structural and overlapping
 | |
|   // compatibility with other instructions
 | |
|   DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
 | |
|   DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
 | |
|   for (CandIt = CandsForRepSubstring.begin(),
 | |
|       CandEndIt = CandsForRepSubstring.end();
 | |
|        CandIt != CandEndIt; CandIt++) {
 | |
| 
 | |
|     // Determine if it has an assigned structural group already.
 | |
|     CandToGroupIt = CandToGroup.find(&*CandIt);
 | |
|     if (CandToGroupIt == CandToGroup.end()) {
 | |
|       // If not, we assign it one, and add it to our mapping.
 | |
|       std::tie(CandToGroupIt, Inserted) =
 | |
|           CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));
 | |
|     }
 | |
| 
 | |
|     // Get the structural group number from the iterator.
 | |
|     OuterGroupNum = CandToGroupIt->second;
 | |
| 
 | |
|     // Check if we already have a list of IRSimilarityCandidates for the current
 | |
|     // structural group.  Create one if one does not exist.
 | |
|     CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
 | |
|     if (CurrentGroupPair == StructuralGroups.end()) {
 | |
|       IRSimilarityCandidate::createCanonicalMappingFor(*CandIt);
 | |
|       std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
 | |
|           std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
 | |
|     }
 | |
| 
 | |
|     // Iterate over the IRSimilarityCandidates following the current
 | |
|     // IRSimilarityCandidate in the list to determine whether the two
 | |
|     // IRSimilarityCandidates are compatible.  This is so we do not repeat pairs
 | |
|     // of IRSimilarityCandidates.
 | |
|     for (InnerCandIt = std::next(CandIt),
 | |
|         InnerCandEndIt = CandsForRepSubstring.end();
 | |
|          InnerCandIt != InnerCandEndIt; InnerCandIt++) {
 | |
| 
 | |
|       // We check if the inner item has a group already, if it does, we skip it.
 | |
|       CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
 | |
|       if (CandToGroupItInner != CandToGroup.end())
 | |
|         continue;
 | |
| 
 | |
|       // Otherwise we determine if they have the same structure and add it to
 | |
|       // vector if they match.
 | |
|       ValueNumberMappingA.clear();
 | |
|       ValueNumberMappingB.clear();
 | |
|       SameStructure = IRSimilarityCandidate::compareStructure(
 | |
|           *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
 | |
|       if (!SameStructure)
 | |
|         continue;
 | |
| 
 | |
|       InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
 | |
|                                                ValueNumberMappingB);
 | |
|       CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
 | |
|       CurrentGroupPair->second.push_back(*InnerCandIt);
 | |
|     }
 | |
|   }
 | |
| }
 | |
| 
 | |
| void IRSimilarityIdentifier::findCandidates(
 | |
|     std::vector<IRInstructionData *> &InstrList,
 | |
|     std::vector<unsigned> &IntegerMapping) {
 | |
|   SuffixTree ST(IntegerMapping);
 | |
| 
 | |
|   std::vector<IRSimilarityCandidate> CandsForRepSubstring;
 | |
|   std::vector<SimilarityGroup> NewCandidateGroups;
 | |
| 
 | |
|   DenseMap<unsigned, SimilarityGroup> StructuralGroups;
 | |
| 
 | |
|   // Iterate over the subsequences found by the Suffix Tree to create
 | |
|   // IRSimilarityCandidates for each repeated subsequence and determine which
 | |
|   // instances are structurally similar to one another.
 | |
|   for (SuffixTree::RepeatedSubstring &RS : ST) {
 | |
|     createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
 | |
|                                    CandsForRepSubstring);
 | |
| 
 | |
|     if (CandsForRepSubstring.size() < 2)
 | |
|       continue;
 | |
| 
 | |
|     findCandidateStructures(CandsForRepSubstring, StructuralGroups);
 | |
|     for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups)
 | |
|       // We only add the group if it contains more than one
 | |
|       // IRSimilarityCandidate.  If there is only one, that means there is no
 | |
|       // other repeated subsequence with the same structure.
 | |
|       if (Group.second.size() > 1)
 | |
|         SimilarityCandidates->push_back(Group.second);
 | |
| 
 | |
|     CandsForRepSubstring.clear();
 | |
|     StructuralGroups.clear();
 | |
|     NewCandidateGroups.clear();
 | |
|   }
 | |
| }
 | |
| 
 | |
| SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
 | |
|     ArrayRef<std::unique_ptr<Module>> Modules) {
 | |
|   resetSimilarityCandidates();
 | |
| 
 | |
|   std::vector<IRInstructionData *> InstrList;
 | |
|   std::vector<unsigned> IntegerMapping;
 | |
|   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
 | |
|   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
 | |
|   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
 | |
|   Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
 | |
|   Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
 | |
| 
 | |
|   populateMapper(Modules, InstrList, IntegerMapping);
 | |
|   findCandidates(InstrList, IntegerMapping);
 | |
| 
 | |
|   return SimilarityCandidates.getValue();
 | |
| }
 | |
| 
 | |
| SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
 | |
|   resetSimilarityCandidates();
 | |
|   Mapper.InstClassifier.EnableBranches = this->EnableBranches;
 | |
|   Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
 | |
|   Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
 | |
|   Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
 | |
|   Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
 | |
| 
 | |
|   std::vector<IRInstructionData *> InstrList;
 | |
|   std::vector<unsigned> IntegerMapping;
 | |
| 
 | |
|   populateMapper(M, InstrList, IntegerMapping);
 | |
|   findCandidates(InstrList, IntegerMapping);
 | |
| 
 | |
|   return SimilarityCandidates.getValue();
 | |
| }
 | |
| 
 | |
| INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",
 | |
|                 "ir-similarity-identifier", false, true)
 | |
| 
 | |
| IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
 | |
|     : ModulePass(ID) {
 | |
|   initializeIRSimilarityIdentifierWrapperPassPass(
 | |
|       *PassRegistry::getPassRegistry());
 | |
| }
 | |
| 
 | |
| bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
 | |
|   IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
 | |
|                                         MatchCallsByName, !DisableIntrinsics,
 | |
|                                         false));
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) {
 | |
|   IRSI.reset();
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
 | |
|   IRSI->findSimilarity(M);
 | |
|   return false;
 | |
| }
 | |
| 
 | |
| AnalysisKey IRSimilarityAnalysis::Key;
 | |
| IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
 | |
|                                                  ModuleAnalysisManager &) {
 | |
|   auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
 | |
|                                      MatchCallsByName, !DisableIntrinsics,
 | |
|                                      false);
 | |
|   IRSI.findSimilarity(M);
 | |
|   return IRSI;
 | |
| }
 | |
| 
 | |
| PreservedAnalyses
 | |
| IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) {
 | |
|   IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M);
 | |
|   Optional<SimilarityGroupList> &SimilarityCandidatesOpt = IRSI.getSimilarity();
 | |
| 
 | |
|   for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
 | |
|     OS << CandVec.size() << " candidates of length "
 | |
|        << CandVec.begin()->getLength() << ".  Found in: \n";
 | |
|     for (IRSimilarityCandidate &Cand : CandVec) {
 | |
|       OS << "  Function: " << Cand.front()->Inst->getFunction()->getName().str()
 | |
|          << ", Basic Block: ";
 | |
|       if (Cand.front()->Inst->getParent()->getName().str() == "")
 | |
|         OS << "(unnamed)";
 | |
|       else
 | |
|         OS << Cand.front()->Inst->getParent()->getName().str();
 | |
|       OS << "\n    Start Instruction: ";
 | |
|       Cand.frontInstruction()->print(OS);
 | |
|       OS << "\n      End Instruction: ";
 | |
|       Cand.backInstruction()->print(OS);
 | |
|       OS << "\n";
 | |
|     }
 | |
|   }
 | |
| 
 | |
|   return PreservedAnalyses::all();
 | |
| }
 | |
| 
 | |
| char IRSimilarityIdentifierWrapperPass::ID = 0;
 |