672 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			672 lines
		
	
	
		
			22 KiB
		
	
	
	
		
			C++
		
	
	
	
//===- HexagonVectorLoopCarriedReuse.cpp ----------------------------------===//
 | 
						|
//
 | 
						|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | 
						|
// See https://llvm.org/LICENSE.txt for license information.
 | 
						|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
//
 | 
						|
// This pass removes the computation of provably redundant expressions that have
 | 
						|
// been computed earlier in a previous iteration. It relies on the use of PHIs
 | 
						|
// to identify loop carried dependences. This is scalar replacement for vector
 | 
						|
// types.
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
 | 
						|
#include "HexagonVectorLoopCarriedReuse.h"
 | 
						|
#include "llvm/ADT/SetVector.h"
 | 
						|
#include "llvm/ADT/SmallVector.h"
 | 
						|
#include "llvm/ADT/Statistic.h"
 | 
						|
#include "llvm/Analysis/LoopInfo.h"
 | 
						|
#include "llvm/Analysis/LoopPass.h"
 | 
						|
#include "llvm/IR/BasicBlock.h"
 | 
						|
#include "llvm/IR/DerivedTypes.h"
 | 
						|
#include "llvm/IR/IRBuilder.h"
 | 
						|
#include "llvm/IR/Instruction.h"
 | 
						|
#include "llvm/IR/Instructions.h"
 | 
						|
#include "llvm/IR/IntrinsicInst.h"
 | 
						|
#include "llvm/IR/Intrinsics.h"
 | 
						|
#include "llvm/IR/IntrinsicsHexagon.h"
 | 
						|
#include "llvm/IR/Use.h"
 | 
						|
#include "llvm/IR/User.h"
 | 
						|
#include "llvm/IR/Value.h"
 | 
						|
#include "llvm/InitializePasses.h"
 | 
						|
#include "llvm/Pass.h"
 | 
						|
#include "llvm/Support/Casting.h"
 | 
						|
#include "llvm/Support/CommandLine.h"
 | 
						|
#include "llvm/Support/Compiler.h"
 | 
						|
#include "llvm/Support/Debug.h"
 | 
						|
#include "llvm/Support/raw_ostream.h"
 | 
						|
#include "llvm/Transforms/Scalar.h"
 | 
						|
#include "llvm/Transforms/Utils.h"
 | 
						|
#include <algorithm>
 | 
						|
#include <cassert>
 | 
						|
#include <cstddef>
 | 
						|
#include <map>
 | 
						|
#include <memory>
 | 
						|
#include <set>
 | 
						|
 | 
						|
using namespace llvm;
 | 
						|
 | 
						|
#define DEBUG_TYPE "hexagon-vlcr"
 | 
						|
 | 
						|
STATISTIC(HexagonNumVectorLoopCarriedReuse,
 | 
						|
          "Number of values that were reused from a previous iteration.");
 | 
						|
 | 
						|
static cl::opt<int> HexagonVLCRIterationLim("hexagon-vlcr-iteration-lim",
 | 
						|
    cl::Hidden,
 | 
						|
    cl::desc("Maximum distance of loop carried dependences that are handled"),
 | 
						|
    cl::init(2), cl::ZeroOrMore);
 | 
						|
 | 
						|
namespace llvm {
 | 
						|
 | 
						|
void initializeHexagonVectorLoopCarriedReuseLegacyPassPass(PassRegistry &);
 | 
						|
Pass *createHexagonVectorLoopCarriedReuseLegacyPass();
 | 
						|
 | 
						|
} // end namespace llvm
 | 
						|
 | 
						|
namespace {
 | 
						|
 | 
						|
  // See info about DepChain in the comments at the top of this file.
 | 
						|
  using ChainOfDependences = SmallVector<Instruction *, 4>;
 | 
						|
 | 
						|
  class DepChain {
 | 
						|
    ChainOfDependences Chain;
 | 
						|
 | 
						|
  public:
 | 
						|
    bool isIdentical(DepChain &Other) const {
 | 
						|
      if (Other.size() != size())
 | 
						|
        return false;
 | 
						|
      ChainOfDependences &OtherChain = Other.getChain();
 | 
						|
      for (int i = 0; i < size(); ++i) {
 | 
						|
        if (Chain[i] != OtherChain[i])
 | 
						|
          return false;
 | 
						|
      }
 | 
						|
      return true;
 | 
						|
    }
 | 
						|
 | 
						|
    ChainOfDependences &getChain() {
 | 
						|
      return Chain;
 | 
						|
    }
 | 
						|
 | 
						|
    int size() const {
 | 
						|
      return Chain.size();
 | 
						|
    }
 | 
						|
 | 
						|
    void clear() {
 | 
						|
      Chain.clear();
 | 
						|
    }
 | 
						|
 | 
						|
    void push_back(Instruction *I) {
 | 
						|
      Chain.push_back(I);
 | 
						|
    }
 | 
						|
 | 
						|
    int iterations() const {
 | 
						|
      return size() - 1;
 | 
						|
    }
 | 
						|
 | 
						|
    Instruction *front() const {
 | 
						|
      return Chain.front();
 | 
						|
    }
 | 
						|
 | 
						|
    Instruction *back() const {
 | 
						|
      return Chain.back();
 | 
						|
    }
 | 
						|
 | 
						|
    Instruction *&operator[](const int index) {
 | 
						|
      return Chain[index];
 | 
						|
    }
 | 
						|
 | 
						|
   friend raw_ostream &operator<< (raw_ostream &OS, const DepChain &D);
 | 
						|
  };
 | 
						|
 | 
						|
  LLVM_ATTRIBUTE_UNUSED
 | 
						|
  raw_ostream &operator<<(raw_ostream &OS, const DepChain &D) {
 | 
						|
    const ChainOfDependences &CD = D.Chain;
 | 
						|
    int ChainSize = CD.size();
 | 
						|
    OS << "**DepChain Start::**\n";
 | 
						|
    for (int i = 0; i < ChainSize -1; ++i) {
 | 
						|
      OS << *(CD[i]) << " -->\n";
 | 
						|
    }
 | 
						|
    OS << *CD[ChainSize-1] << "\n";
 | 
						|
    return OS;
 | 
						|
  }
 | 
						|
 | 
						|
  struct ReuseValue {
 | 
						|
    Instruction *Inst2Replace = nullptr;
 | 
						|
 | 
						|
    // In the new PHI node that we'll construct this is the value that'll be
 | 
						|
    // used over the backedge. This is the value that gets reused from a
 | 
						|
    // previous iteration.
 | 
						|
    Instruction *BackedgeInst = nullptr;
 | 
						|
    std::map<Instruction *, DepChain *> DepChains;
 | 
						|
    int Iterations = -1;
 | 
						|
 | 
						|
    ReuseValue() = default;
 | 
						|
 | 
						|
    void reset() {
 | 
						|
      Inst2Replace = nullptr;
 | 
						|
      BackedgeInst = nullptr;
 | 
						|
      DepChains.clear();
 | 
						|
      Iterations = -1;
 | 
						|
    }
 | 
						|
    bool isDefined() { return Inst2Replace != nullptr; }
 | 
						|
  };
 | 
						|
 | 
						|
  LLVM_ATTRIBUTE_UNUSED
 | 
						|
  raw_ostream &operator<<(raw_ostream &OS, const ReuseValue &RU) {
 | 
						|
    OS << "** ReuseValue ***\n";
 | 
						|
    OS << "Instruction to Replace: " << *(RU.Inst2Replace) << "\n";
 | 
						|
    OS << "Backedge Instruction: " << *(RU.BackedgeInst) << "\n";
 | 
						|
    return OS;
 | 
						|
  }
 | 
						|
 | 
						|
  class HexagonVectorLoopCarriedReuseLegacyPass : public LoopPass {
 | 
						|
  public:
 | 
						|
    static char ID;
 | 
						|
 | 
						|
    explicit HexagonVectorLoopCarriedReuseLegacyPass() : LoopPass(ID) {
 | 
						|
      PassRegistry *PR = PassRegistry::getPassRegistry();
 | 
						|
      initializeHexagonVectorLoopCarriedReuseLegacyPassPass(*PR);
 | 
						|
    }
 | 
						|
 | 
						|
    StringRef getPassName() const override {
 | 
						|
      return "Hexagon-specific loop carried reuse for HVX vectors";
 | 
						|
    }
 | 
						|
 | 
						|
    void getAnalysisUsage(AnalysisUsage &AU) const override {
 | 
						|
      AU.addRequiredID(LoopSimplifyID);
 | 
						|
      AU.addRequiredID(LCSSAID);
 | 
						|
      AU.addPreservedID(LCSSAID);
 | 
						|
      AU.setPreservesCFG();
 | 
						|
    }
 | 
						|
 | 
						|
    bool runOnLoop(Loop *L, LPPassManager &LPM) override;
 | 
						|
  };
 | 
						|
 | 
						|
  class HexagonVectorLoopCarriedReuse {
 | 
						|
  public:
 | 
						|
    HexagonVectorLoopCarriedReuse(Loop *L) : CurLoop(L){};
 | 
						|
 | 
						|
    bool run();
 | 
						|
 | 
						|
  private:
 | 
						|
    SetVector<DepChain *> Dependences;
 | 
						|
    std::set<Instruction *> ReplacedInsts;
 | 
						|
    Loop *CurLoop;
 | 
						|
    ReuseValue ReuseCandidate;
 | 
						|
 | 
						|
    bool doVLCR();
 | 
						|
    void findLoopCarriedDeps();
 | 
						|
    void findValueToReuse();
 | 
						|
    void findDepChainFromPHI(Instruction *I, DepChain &D);
 | 
						|
    void reuseValue();
 | 
						|
    Value *findValueInBlock(Value *Op, BasicBlock *BB);
 | 
						|
    DepChain *getDepChainBtwn(Instruction *I1, Instruction *I2, int Iters);
 | 
						|
    bool isEquivalentOperation(Instruction *I1, Instruction *I2);
 | 
						|
    bool canReplace(Instruction *I);
 | 
						|
    bool isCallInstCommutative(CallInst *C);
 | 
						|
  };
 | 
						|
 | 
						|
} // end anonymous namespace
 | 
						|
 | 
						|
char HexagonVectorLoopCarriedReuseLegacyPass::ID = 0;
 | 
						|
 | 
						|
INITIALIZE_PASS_BEGIN(HexagonVectorLoopCarriedReuseLegacyPass, "hexagon-vlcr",
 | 
						|
                      "Hexagon-specific predictive commoning for HVX vectors",
 | 
						|
                      false, false)
 | 
						|
INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
 | 
						|
INITIALIZE_PASS_DEPENDENCY(LCSSAWrapperPass)
 | 
						|
INITIALIZE_PASS_END(HexagonVectorLoopCarriedReuseLegacyPass, "hexagon-vlcr",
 | 
						|
                    "Hexagon-specific predictive commoning for HVX vectors",
 | 
						|
                    false, false)
 | 
						|
 | 
						|
PreservedAnalyses
 | 
						|
HexagonVectorLoopCarriedReusePass::run(Loop &L, LoopAnalysisManager &LAM,
 | 
						|
                                       LoopStandardAnalysisResults &AR,
 | 
						|
                                       LPMUpdater &U) {
 | 
						|
  HexagonVectorLoopCarriedReuse Vlcr(&L);
 | 
						|
  if (!Vlcr.run())
 | 
						|
    return PreservedAnalyses::all();
 | 
						|
  PreservedAnalyses PA;
 | 
						|
  PA.preserveSet<CFGAnalyses>();
 | 
						|
  return PA;
 | 
						|
}
 | 
						|
 | 
						|
bool HexagonVectorLoopCarriedReuseLegacyPass::runOnLoop(Loop *L,
 | 
						|
                                                        LPPassManager &LPM) {
 | 
						|
  if (skipLoop(L))
 | 
						|
    return false;
 | 
						|
  HexagonVectorLoopCarriedReuse Vlcr(L);
 | 
						|
  return Vlcr.run();
 | 
						|
}
 | 
						|
 | 
						|
bool HexagonVectorLoopCarriedReuse::run() {
 | 
						|
  if (!CurLoop->getLoopPreheader())
 | 
						|
    return false;
 | 
						|
 | 
						|
  // Work only on innermost loops.
 | 
						|
  if (!CurLoop->getSubLoops().empty())
 | 
						|
    return false;
 | 
						|
 | 
						|
  // Work only on single basic blocks loops.
 | 
						|
  if (CurLoop->getNumBlocks() != 1)
 | 
						|
    return false;
 | 
						|
 | 
						|
  return doVLCR();
 | 
						|
}
 | 
						|
 | 
						|
bool HexagonVectorLoopCarriedReuse::isCallInstCommutative(CallInst *C) {
 | 
						|
  switch (C->getCalledFunction()->getIntrinsicID()) {
 | 
						|
    case Intrinsic::hexagon_V6_vaddb:
 | 
						|
    case Intrinsic::hexagon_V6_vaddb_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vaddh:
 | 
						|
    case Intrinsic::hexagon_V6_vaddh_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vaddw:
 | 
						|
    case Intrinsic::hexagon_V6_vaddw_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vaddubh:
 | 
						|
    case Intrinsic::hexagon_V6_vaddubh_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vadduhw:
 | 
						|
    case Intrinsic::hexagon_V6_vadduhw_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vaddhw:
 | 
						|
    case Intrinsic::hexagon_V6_vaddhw_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vmaxb:
 | 
						|
    case Intrinsic::hexagon_V6_vmaxb_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vmaxh:
 | 
						|
    case Intrinsic::hexagon_V6_vmaxh_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vmaxw:
 | 
						|
    case Intrinsic::hexagon_V6_vmaxw_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vmaxub:
 | 
						|
    case Intrinsic::hexagon_V6_vmaxub_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vmaxuh:
 | 
						|
    case Intrinsic::hexagon_V6_vmaxuh_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vminub:
 | 
						|
    case Intrinsic::hexagon_V6_vminub_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vminuh:
 | 
						|
    case Intrinsic::hexagon_V6_vminuh_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vminb:
 | 
						|
    case Intrinsic::hexagon_V6_vminb_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vminh:
 | 
						|
    case Intrinsic::hexagon_V6_vminh_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vminw:
 | 
						|
    case Intrinsic::hexagon_V6_vminw_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vmpyub:
 | 
						|
    case Intrinsic::hexagon_V6_vmpyub_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vmpyuh:
 | 
						|
    case Intrinsic::hexagon_V6_vmpyuh_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vavgub:
 | 
						|
    case Intrinsic::hexagon_V6_vavgub_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vavgh:
 | 
						|
    case Intrinsic::hexagon_V6_vavgh_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vavguh:
 | 
						|
    case Intrinsic::hexagon_V6_vavguh_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vavgw:
 | 
						|
    case Intrinsic::hexagon_V6_vavgw_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vavgb:
 | 
						|
    case Intrinsic::hexagon_V6_vavgb_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vavguw:
 | 
						|
    case Intrinsic::hexagon_V6_vavguw_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vabsdiffh:
 | 
						|
    case Intrinsic::hexagon_V6_vabsdiffh_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vabsdiffub:
 | 
						|
    case Intrinsic::hexagon_V6_vabsdiffub_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vabsdiffuh:
 | 
						|
    case Intrinsic::hexagon_V6_vabsdiffuh_128B:
 | 
						|
    case Intrinsic::hexagon_V6_vabsdiffw:
 | 
						|
    case Intrinsic::hexagon_V6_vabsdiffw_128B:
 | 
						|
      return true;
 | 
						|
    default:
 | 
						|
      return false;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
bool HexagonVectorLoopCarriedReuse::isEquivalentOperation(Instruction *I1,
 | 
						|
                                                          Instruction *I2) {
 | 
						|
  if (!I1->isSameOperationAs(I2))
 | 
						|
    return false;
 | 
						|
  // This check is in place specifically for intrinsics. isSameOperationAs will
 | 
						|
  // return two for any two hexagon intrinsics because they are essentially the
 | 
						|
  // same instruciton (CallInst). We need to scratch the surface to see if they
 | 
						|
  // are calls to the same function.
 | 
						|
  if (CallInst *C1 = dyn_cast<CallInst>(I1)) {
 | 
						|
    if (CallInst *C2 = dyn_cast<CallInst>(I2)) {
 | 
						|
      if (C1->getCalledFunction() != C2->getCalledFunction())
 | 
						|
        return false;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  // If both the Instructions are of Vector Type and any of the element
 | 
						|
  // is integer constant, check their values too for equivalence.
 | 
						|
  if (I1->getType()->isVectorTy() && I2->getType()->isVectorTy()) {
 | 
						|
    unsigned NumOperands = I1->getNumOperands();
 | 
						|
    for (unsigned i = 0; i < NumOperands; ++i) {
 | 
						|
      ConstantInt *C1 = dyn_cast<ConstantInt>(I1->getOperand(i));
 | 
						|
      ConstantInt *C2 = dyn_cast<ConstantInt>(I2->getOperand(i));
 | 
						|
      if(!C1) continue;
 | 
						|
      assert(C2);
 | 
						|
      if (C1->getSExtValue() != C2->getSExtValue())
 | 
						|
        return false;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return true;
 | 
						|
}
 | 
						|
 | 
						|
bool HexagonVectorLoopCarriedReuse::canReplace(Instruction *I) {
 | 
						|
  const IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
 | 
						|
  if (!II)
 | 
						|
    return true;
 | 
						|
 | 
						|
  switch (II->getIntrinsicID()) {
 | 
						|
  case Intrinsic::hexagon_V6_hi:
 | 
						|
  case Intrinsic::hexagon_V6_lo:
 | 
						|
  case Intrinsic::hexagon_V6_hi_128B:
 | 
						|
  case Intrinsic::hexagon_V6_lo_128B:
 | 
						|
    LLVM_DEBUG(dbgs() << "Not considering for reuse: " << *II << "\n");
 | 
						|
    return false;
 | 
						|
  default:
 | 
						|
    return true;
 | 
						|
  }
 | 
						|
}
 | 
						|
void HexagonVectorLoopCarriedReuse::findValueToReuse() {
 | 
						|
  for (auto *D : Dependences) {
 | 
						|
    LLVM_DEBUG(dbgs() << "Processing dependence " << *(D->front()) << "\n");
 | 
						|
    if (D->iterations() > HexagonVLCRIterationLim) {
 | 
						|
      LLVM_DEBUG(
 | 
						|
          dbgs()
 | 
						|
          << ".. Skipping because number of iterations > than the limit\n");
 | 
						|
      continue;
 | 
						|
    }
 | 
						|
 | 
						|
    PHINode *PN = cast<PHINode>(D->front());
 | 
						|
    Instruction *BEInst = D->back();
 | 
						|
    int Iters = D->iterations();
 | 
						|
    BasicBlock *BB = PN->getParent();
 | 
						|
    LLVM_DEBUG(dbgs() << "Checking if any uses of " << *PN
 | 
						|
                      << " can be reused\n");
 | 
						|
 | 
						|
    SmallVector<Instruction *, 4> PNUsers;
 | 
						|
    for (auto UI = PN->use_begin(), E = PN->use_end(); UI != E; ++UI) {
 | 
						|
      Use &U = *UI;
 | 
						|
      Instruction *User = cast<Instruction>(U.getUser());
 | 
						|
 | 
						|
      if (User->getParent() != BB)
 | 
						|
        continue;
 | 
						|
      if (ReplacedInsts.count(User)) {
 | 
						|
        LLVM_DEBUG(dbgs() << *User
 | 
						|
                          << " has already been replaced. Skipping...\n");
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
      if (isa<PHINode>(User))
 | 
						|
        continue;
 | 
						|
      if (User->mayHaveSideEffects())
 | 
						|
        continue;
 | 
						|
      if (!canReplace(User))
 | 
						|
        continue;
 | 
						|
 | 
						|
      PNUsers.push_back(User);
 | 
						|
    }
 | 
						|
    LLVM_DEBUG(dbgs() << PNUsers.size() << " use(s) of the PHI in the block\n");
 | 
						|
 | 
						|
    // For each interesting use I of PN, find an Instruction BEUser that
 | 
						|
    // performs the same operation as I on BEInst and whose other operands,
 | 
						|
    // if any, can also be rematerialized in OtherBB. We stop when we find the
 | 
						|
    // first such Instruction BEUser. This is because once BEUser is
 | 
						|
    // rematerialized in OtherBB, we may find more such "fixup" opportunities
 | 
						|
    // in this block. So, we'll start over again.
 | 
						|
    for (Instruction *I : PNUsers) {
 | 
						|
      for (auto UI = BEInst->use_begin(), E = BEInst->use_end(); UI != E;
 | 
						|
           ++UI) {
 | 
						|
        Use &U = *UI;
 | 
						|
        Instruction *BEUser = cast<Instruction>(U.getUser());
 | 
						|
 | 
						|
        if (BEUser->getParent() != BB)
 | 
						|
          continue;
 | 
						|
        if (!isEquivalentOperation(I, BEUser))
 | 
						|
          continue;
 | 
						|
 | 
						|
        int NumOperands = I->getNumOperands();
 | 
						|
 | 
						|
        // Take operands of each PNUser one by one and try to find DepChain
 | 
						|
        // with every operand of the BEUser. If any of the operands of BEUser
 | 
						|
        // has DepChain with current operand of the PNUser, break the matcher
 | 
						|
        // loop. Keep doing this for Every PNUser operand. If PNUser operand
 | 
						|
        // does not have DepChain with any of the BEUser operand, break the
 | 
						|
        // outer matcher loop, mark the BEUser as null and reset the ReuseCandidate.
 | 
						|
        // This ensures that DepChain exist for all the PNUser operand with
 | 
						|
        // BEUser operand. This also ensures that DepChains are independent of
 | 
						|
        // the positions in PNUser and BEUser.
 | 
						|
        std::map<Instruction *, DepChain *> DepChains;
 | 
						|
        CallInst *C1 = dyn_cast<CallInst>(I);
 | 
						|
        if ((I && I->isCommutative()) || (C1 && isCallInstCommutative(C1))) {
 | 
						|
          bool Found = false;
 | 
						|
          for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
 | 
						|
            Value *Op = I->getOperand(OpNo);
 | 
						|
            Instruction *OpInst = dyn_cast<Instruction>(Op);
 | 
						|
            Found = false;
 | 
						|
            for (int T = 0; T < NumOperands; ++T) {
 | 
						|
              Value *BEOp = BEUser->getOperand(T);
 | 
						|
              Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
 | 
						|
              if (!OpInst && !BEOpInst) {
 | 
						|
                if (Op == BEOp) {
 | 
						|
                  Found = true;
 | 
						|
                  break;
 | 
						|
                }
 | 
						|
              }
 | 
						|
 | 
						|
              if ((OpInst && !BEOpInst) || (!OpInst && BEOpInst))
 | 
						|
                continue;
 | 
						|
 | 
						|
              DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
 | 
						|
 | 
						|
              if (D) {
 | 
						|
                Found = true;
 | 
						|
                DepChains[OpInst] = D;
 | 
						|
                break;
 | 
						|
              }
 | 
						|
            }
 | 
						|
            if (!Found) {
 | 
						|
              BEUser = nullptr;
 | 
						|
              break;
 | 
						|
            }
 | 
						|
          }
 | 
						|
        } else {
 | 
						|
 | 
						|
          for (int OpNo = 0; OpNo < NumOperands; ++OpNo) {
 | 
						|
            Value *Op = I->getOperand(OpNo);
 | 
						|
            Value *BEOp = BEUser->getOperand(OpNo);
 | 
						|
 | 
						|
            Instruction *OpInst = dyn_cast<Instruction>(Op);
 | 
						|
            if (!OpInst) {
 | 
						|
              if (Op == BEOp)
 | 
						|
                continue;
 | 
						|
              // Do not allow reuse to occur when the operands may be different
 | 
						|
              // values.
 | 
						|
              BEUser = nullptr;
 | 
						|
              break;
 | 
						|
            }
 | 
						|
 | 
						|
            Instruction *BEOpInst = dyn_cast<Instruction>(BEOp);
 | 
						|
            DepChain *D = getDepChainBtwn(OpInst, BEOpInst, Iters);
 | 
						|
 | 
						|
            if (D) {
 | 
						|
              DepChains[OpInst] = D;
 | 
						|
            } else {
 | 
						|
              BEUser = nullptr;
 | 
						|
              break;
 | 
						|
            }
 | 
						|
          }
 | 
						|
        }
 | 
						|
        if (BEUser) {
 | 
						|
          LLVM_DEBUG(dbgs() << "Found Value for reuse.\n");
 | 
						|
          ReuseCandidate.Inst2Replace = I;
 | 
						|
          ReuseCandidate.BackedgeInst = BEUser;
 | 
						|
          ReuseCandidate.DepChains = DepChains;
 | 
						|
          ReuseCandidate.Iterations = Iters;
 | 
						|
          return;
 | 
						|
        }
 | 
						|
        ReuseCandidate.reset();
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
  ReuseCandidate.reset();
 | 
						|
}
 | 
						|
 | 
						|
Value *HexagonVectorLoopCarriedReuse::findValueInBlock(Value *Op,
 | 
						|
                                                       BasicBlock *BB) {
 | 
						|
  PHINode *PN = dyn_cast<PHINode>(Op);
 | 
						|
  assert(PN);
 | 
						|
  Value *ValueInBlock = PN->getIncomingValueForBlock(BB);
 | 
						|
  return ValueInBlock;
 | 
						|
}
 | 
						|
 | 
						|
void HexagonVectorLoopCarriedReuse::reuseValue() {
 | 
						|
  LLVM_DEBUG(dbgs() << ReuseCandidate);
 | 
						|
  Instruction *Inst2Replace = ReuseCandidate.Inst2Replace;
 | 
						|
  Instruction *BEInst = ReuseCandidate.BackedgeInst;
 | 
						|
  int NumOperands = Inst2Replace->getNumOperands();
 | 
						|
  std::map<Instruction *, DepChain *> &DepChains = ReuseCandidate.DepChains;
 | 
						|
  int Iterations = ReuseCandidate.Iterations;
 | 
						|
  BasicBlock *LoopPH = CurLoop->getLoopPreheader();
 | 
						|
  assert(!DepChains.empty() && "No DepChains");
 | 
						|
  LLVM_DEBUG(dbgs() << "reuseValue is making the following changes\n");
 | 
						|
 | 
						|
  SmallVector<Instruction *, 4> InstsInPreheader;
 | 
						|
  for (int i = 0; i < Iterations; ++i) {
 | 
						|
    Instruction *InstInPreheader = Inst2Replace->clone();
 | 
						|
    SmallVector<Value *, 4> Ops;
 | 
						|
    for (int j = 0; j < NumOperands; ++j) {
 | 
						|
      Instruction *I = dyn_cast<Instruction>(Inst2Replace->getOperand(j));
 | 
						|
      if (!I)
 | 
						|
        continue;
 | 
						|
      // Get the DepChain corresponding to this operand.
 | 
						|
      DepChain &D = *DepChains[I];
 | 
						|
      // Get the PHI for the iteration number and find
 | 
						|
      // the incoming value from the Loop Preheader for
 | 
						|
      // that PHI.
 | 
						|
      Value *ValInPreheader = findValueInBlock(D[i], LoopPH);
 | 
						|
      InstInPreheader->setOperand(j, ValInPreheader);
 | 
						|
    }
 | 
						|
    InstsInPreheader.push_back(InstInPreheader);
 | 
						|
    InstInPreheader->setName(Inst2Replace->getName() + ".hexagon.vlcr");
 | 
						|
    InstInPreheader->insertBefore(LoopPH->getTerminator());
 | 
						|
    LLVM_DEBUG(dbgs() << "Added " << *InstInPreheader << " to "
 | 
						|
                      << LoopPH->getName() << "\n");
 | 
						|
  }
 | 
						|
  BasicBlock *BB = BEInst->getParent();
 | 
						|
  IRBuilder<> IRB(BB);
 | 
						|
  IRB.SetInsertPoint(BB->getFirstNonPHI());
 | 
						|
  Value *BEVal = BEInst;
 | 
						|
  PHINode *NewPhi;
 | 
						|
  for (int i = Iterations-1; i >=0 ; --i) {
 | 
						|
    Instruction *InstInPreheader = InstsInPreheader[i];
 | 
						|
    NewPhi = IRB.CreatePHI(InstInPreheader->getType(), 2);
 | 
						|
    NewPhi->addIncoming(InstInPreheader, LoopPH);
 | 
						|
    NewPhi->addIncoming(BEVal, BB);
 | 
						|
    LLVM_DEBUG(dbgs() << "Adding " << *NewPhi << " to " << BB->getName()
 | 
						|
                      << "\n");
 | 
						|
    BEVal = NewPhi;
 | 
						|
  }
 | 
						|
  // We are in LCSSA form. So, a value defined inside the Loop is used only
 | 
						|
  // inside the loop. So, the following is safe.
 | 
						|
  Inst2Replace->replaceAllUsesWith(NewPhi);
 | 
						|
  ReplacedInsts.insert(Inst2Replace);
 | 
						|
  ++HexagonNumVectorLoopCarriedReuse;
 | 
						|
}
 | 
						|
 | 
						|
bool HexagonVectorLoopCarriedReuse::doVLCR() {
 | 
						|
  assert(CurLoop->getSubLoops().empty() &&
 | 
						|
         "Can do VLCR on the innermost loop only");
 | 
						|
  assert((CurLoop->getNumBlocks() == 1) &&
 | 
						|
         "Can do VLCR only on single block loops");
 | 
						|
 | 
						|
  bool Changed = false;
 | 
						|
  bool Continue;
 | 
						|
 | 
						|
  LLVM_DEBUG(dbgs() << "Working on Loop: " << *CurLoop->getHeader() << "\n");
 | 
						|
  do {
 | 
						|
    // Reset datastructures.
 | 
						|
    Dependences.clear();
 | 
						|
    Continue = false;
 | 
						|
 | 
						|
    findLoopCarriedDeps();
 | 
						|
    findValueToReuse();
 | 
						|
    if (ReuseCandidate.isDefined()) {
 | 
						|
      reuseValue();
 | 
						|
      Changed = true;
 | 
						|
      Continue = true;
 | 
						|
    }
 | 
						|
    llvm::for_each(Dependences, std::default_delete<DepChain>());
 | 
						|
  } while (Continue);
 | 
						|
  return Changed;
 | 
						|
}
 | 
						|
 | 
						|
void HexagonVectorLoopCarriedReuse::findDepChainFromPHI(Instruction *I,
 | 
						|
                                                        DepChain &D) {
 | 
						|
  PHINode *PN = dyn_cast<PHINode>(I);
 | 
						|
  if (!PN) {
 | 
						|
    D.push_back(I);
 | 
						|
    return;
 | 
						|
  } else {
 | 
						|
    auto NumIncomingValues = PN->getNumIncomingValues();
 | 
						|
    if (NumIncomingValues != 2) {
 | 
						|
      D.clear();
 | 
						|
      return;
 | 
						|
    }
 | 
						|
 | 
						|
    BasicBlock *BB = PN->getParent();
 | 
						|
    if (BB != CurLoop->getHeader()) {
 | 
						|
      D.clear();
 | 
						|
      return;
 | 
						|
    }
 | 
						|
 | 
						|
    Value *BEVal = PN->getIncomingValueForBlock(BB);
 | 
						|
    Instruction *BEInst = dyn_cast<Instruction>(BEVal);
 | 
						|
    // This is a single block loop with a preheader, so at least
 | 
						|
    // one value should come over the backedge.
 | 
						|
    assert(BEInst && "There should be a value over the backedge");
 | 
						|
 | 
						|
    Value *PreHdrVal =
 | 
						|
      PN->getIncomingValueForBlock(CurLoop->getLoopPreheader());
 | 
						|
    if(!PreHdrVal || !isa<Instruction>(PreHdrVal)) {
 | 
						|
      D.clear();
 | 
						|
      return;
 | 
						|
    }
 | 
						|
    D.push_back(PN);
 | 
						|
    findDepChainFromPHI(BEInst, D);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
DepChain *HexagonVectorLoopCarriedReuse::getDepChainBtwn(Instruction *I1,
 | 
						|
                                                         Instruction *I2,
 | 
						|
                                                         int Iters) {
 | 
						|
  for (auto *D : Dependences) {
 | 
						|
    if (D->front() == I1 && D->back() == I2 && D->iterations() == Iters)
 | 
						|
      return D;
 | 
						|
  }
 | 
						|
  return nullptr;
 | 
						|
}
 | 
						|
 | 
						|
void HexagonVectorLoopCarriedReuse::findLoopCarriedDeps() {
 | 
						|
  BasicBlock *BB = CurLoop->getHeader();
 | 
						|
  for (auto I = BB->begin(), E = BB->end(); I != E && isa<PHINode>(I); ++I) {
 | 
						|
    auto *PN = cast<PHINode>(I);
 | 
						|
    if (!isa<VectorType>(PN->getType()))
 | 
						|
      continue;
 | 
						|
 | 
						|
    DepChain *D = new DepChain();
 | 
						|
    findDepChainFromPHI(PN, *D);
 | 
						|
    if (D->size() != 0)
 | 
						|
      Dependences.insert(D);
 | 
						|
    else
 | 
						|
      delete D;
 | 
						|
  }
 | 
						|
  LLVM_DEBUG(dbgs() << "Found " << Dependences.size() << " dependences\n");
 | 
						|
  LLVM_DEBUG(for (size_t i = 0; i < Dependences.size();
 | 
						|
                  ++i) { dbgs() << *Dependences[i] << "\n"; });
 | 
						|
}
 | 
						|
 | 
						|
Pass *llvm::createHexagonVectorLoopCarriedReuseLegacyPass() {
 | 
						|
  return new HexagonVectorLoopCarriedReuseLegacyPass();
 | 
						|
}
 |