638 lines
		
	
	
		
			23 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			638 lines
		
	
	
		
			23 KiB
		
	
	
	
		
			C++
		
	
	
	
//===--------------------- SIOptimizeVGPRLiveRange.cpp  -------------------===//
 | 
						|
//
 | 
						|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | 
						|
// See https://llvm.org/LICENSE.txt for license information.
 | 
						|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
//
 | 
						|
/// \file
 | 
						|
/// This pass tries to remove unnecessary VGPR live ranges in divergent if-else
 | 
						|
/// structures and waterfall loops.
 | 
						|
///
 | 
						|
/// When we do structurization, we usually transform an if-else into two
 | 
						|
/// successive if-then (with a flow block to do predicate inversion). Consider a
 | 
						|
/// simple case after structurization: A divergent value %a was defined before
 | 
						|
/// if-else and used in both THEN (use in THEN is optional) and ELSE part:
 | 
						|
///    bb.if:
 | 
						|
///      %a = ...
 | 
						|
///      ...
 | 
						|
///    bb.then:
 | 
						|
///      ... = op %a
 | 
						|
///      ... // %a can be dead here
 | 
						|
///    bb.flow:
 | 
						|
///      ...
 | 
						|
///    bb.else:
 | 
						|
///      ... = %a
 | 
						|
///      ...
 | 
						|
///    bb.endif
 | 
						|
///
 | 
						|
///  As register allocator has no idea of the thread-control-flow, it will just
 | 
						|
///  assume %a would be alive in the whole range of bb.then because of a later
 | 
						|
///  use in bb.else. On AMDGPU architecture, the VGPR is accessed with respect
 | 
						|
///  to exec mask. For this if-else case, the lanes active in bb.then will be
 | 
						|
///  inactive in bb.else, and vice-versa. So we are safe to say that %a was dead
 | 
						|
///  after the last use in bb.then until the end of the block. The reason is
 | 
						|
///  the instructions in bb.then will only overwrite lanes that will never be
 | 
						|
///  accessed in bb.else.
 | 
						|
///
 | 
						|
///  This pass aims to to tell register allocator that %a is in-fact dead,
 | 
						|
///  through inserting a phi-node in bb.flow saying that %a is undef when coming
 | 
						|
///  from bb.then, and then replace the uses in the bb.else with the result of
 | 
						|
///  newly inserted phi.
 | 
						|
///
 | 
						|
///  Two key conditions must be met to ensure correctness:
 | 
						|
///  1.) The def-point should be in the same loop-level as if-else-endif to make
 | 
						|
///      sure the second loop iteration still get correct data.
 | 
						|
///  2.) There should be no further uses after the IF-ELSE region.
 | 
						|
///
 | 
						|
///
 | 
						|
/// Waterfall loops get inserted around instructions that use divergent values
 | 
						|
/// but can only be executed with a uniform value. For example an indirect call
 | 
						|
/// to a divergent address:
 | 
						|
///    bb.start:
 | 
						|
///      %a = ...
 | 
						|
///      %fun = ...
 | 
						|
///      ...
 | 
						|
///    bb.loop:
 | 
						|
///      call %fun (%a)
 | 
						|
///      ... // %a can be dead here
 | 
						|
///      loop %bb.loop
 | 
						|
///
 | 
						|
///  The loop block is executed multiple times, but it is run exactly once for
 | 
						|
///  each active lane. Similar to the if-else case, the register allocator
 | 
						|
///  assumes that %a is live throughout the loop as it is used again in the next
 | 
						|
///  iteration. If %a is a VGPR that is unused after the loop, it does not need
 | 
						|
///  to be live after its last use in the loop block. By inserting a phi-node at
 | 
						|
///  the start of bb.loop that is undef when coming from bb.loop, the register
 | 
						|
///  allocation knows that the value of %a does not need to be preserved through
 | 
						|
///  iterations of the loop.
 | 
						|
///
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
 | 
						|
#include "AMDGPU.h"
 | 
						|
#include "GCNSubtarget.h"
 | 
						|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 | 
						|
#include "SIMachineFunctionInfo.h"
 | 
						|
#include "llvm/CodeGen/LiveVariables.h"
 | 
						|
#include "llvm/CodeGen/MachineDominators.h"
 | 
						|
#include "llvm/CodeGen/MachineLoopInfo.h"
 | 
						|
#include "llvm/CodeGen/TargetRegisterInfo.h"
 | 
						|
#include "llvm/InitializePasses.h"
 | 
						|
 | 
						|
using namespace llvm;
 | 
						|
 | 
						|
#define DEBUG_TYPE "si-opt-vgpr-liverange"
 | 
						|
 | 
						|
namespace {
 | 
						|
 | 
						|
class SIOptimizeVGPRLiveRange : public MachineFunctionPass {
 | 
						|
private:
 | 
						|
  const SIRegisterInfo *TRI = nullptr;
 | 
						|
  const SIInstrInfo *TII = nullptr;
 | 
						|
  LiveVariables *LV = nullptr;
 | 
						|
  MachineDominatorTree *MDT = nullptr;
 | 
						|
  const MachineLoopInfo *Loops = nullptr;
 | 
						|
  MachineRegisterInfo *MRI = nullptr;
 | 
						|
 | 
						|
public:
 | 
						|
  static char ID;
 | 
						|
 | 
						|
  MachineBasicBlock *getElseTarget(MachineBasicBlock *MBB) const;
 | 
						|
 | 
						|
  void collectElseRegionBlocks(MachineBasicBlock *Flow,
 | 
						|
                               MachineBasicBlock *Endif,
 | 
						|
                               SmallSetVector<MachineBasicBlock *, 16> &) const;
 | 
						|
 | 
						|
  void
 | 
						|
  collectCandidateRegisters(MachineBasicBlock *If, MachineBasicBlock *Flow,
 | 
						|
                            MachineBasicBlock *Endif,
 | 
						|
                            SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
 | 
						|
                            SmallVectorImpl<Register> &CandidateRegs) const;
 | 
						|
 | 
						|
  void collectWaterfallCandidateRegisters(
 | 
						|
      MachineBasicBlock *Loop,
 | 
						|
      SmallSetVector<Register, 16> &CandidateRegs) const;
 | 
						|
 | 
						|
  void findNonPHIUsesInBlock(Register Reg, MachineBasicBlock *MBB,
 | 
						|
                             SmallVectorImpl<MachineInstr *> &Uses) const;
 | 
						|
 | 
						|
  void updateLiveRangeInThenRegion(Register Reg, MachineBasicBlock *If,
 | 
						|
                                   MachineBasicBlock *Flow) const;
 | 
						|
 | 
						|
  void updateLiveRangeInElseRegion(
 | 
						|
      Register Reg, Register NewReg, MachineBasicBlock *Flow,
 | 
						|
      MachineBasicBlock *Endif,
 | 
						|
      SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
 | 
						|
 | 
						|
  void
 | 
						|
  optimizeLiveRange(Register Reg, MachineBasicBlock *If,
 | 
						|
                    MachineBasicBlock *Flow, MachineBasicBlock *Endif,
 | 
						|
                    SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const;
 | 
						|
 | 
						|
  void optimizeWaterfallLiveRange(Register Reg, MachineBasicBlock *If) const;
 | 
						|
 | 
						|
  SIOptimizeVGPRLiveRange() : MachineFunctionPass(ID) {}
 | 
						|
 | 
						|
  bool runOnMachineFunction(MachineFunction &MF) override;
 | 
						|
 | 
						|
  StringRef getPassName() const override {
 | 
						|
    return "SI Optimize VGPR LiveRange";
 | 
						|
  }
 | 
						|
 | 
						|
  void getAnalysisUsage(AnalysisUsage &AU) const override {
 | 
						|
    AU.addRequired<LiveVariables>();
 | 
						|
    AU.addRequired<MachineDominatorTree>();
 | 
						|
    AU.addRequired<MachineLoopInfo>();
 | 
						|
    AU.addPreserved<LiveVariables>();
 | 
						|
    AU.addPreserved<MachineDominatorTree>();
 | 
						|
    AU.addPreserved<MachineLoopInfo>();
 | 
						|
    MachineFunctionPass::getAnalysisUsage(AU);
 | 
						|
  }
 | 
						|
 | 
						|
  MachineFunctionProperties getRequiredProperties() const override {
 | 
						|
    return MachineFunctionProperties().set(
 | 
						|
        MachineFunctionProperties::Property::IsSSA);
 | 
						|
  }
 | 
						|
 | 
						|
  MachineFunctionProperties getClearedProperties() const override {
 | 
						|
    return MachineFunctionProperties().set(
 | 
						|
        MachineFunctionProperties::Property::NoPHIs);
 | 
						|
  }
 | 
						|
};
 | 
						|
 | 
						|
} // end anonymous namespace
 | 
						|
 | 
						|
// Check whether the MBB is a else flow block and get the branching target which
 | 
						|
// is the Endif block
 | 
						|
MachineBasicBlock *
 | 
						|
SIOptimizeVGPRLiveRange::getElseTarget(MachineBasicBlock *MBB) const {
 | 
						|
  for (auto &BR : MBB->terminators()) {
 | 
						|
    if (BR.getOpcode() == AMDGPU::SI_ELSE)
 | 
						|
      return BR.getOperand(2).getMBB();
 | 
						|
  }
 | 
						|
  return nullptr;
 | 
						|
}
 | 
						|
 | 
						|
void SIOptimizeVGPRLiveRange::collectElseRegionBlocks(
 | 
						|
    MachineBasicBlock *Flow, MachineBasicBlock *Endif,
 | 
						|
    SmallSetVector<MachineBasicBlock *, 16> &Blocks) const {
 | 
						|
  assert(Flow != Endif);
 | 
						|
 | 
						|
  MachineBasicBlock *MBB = Endif;
 | 
						|
  unsigned Cur = 0;
 | 
						|
  while (MBB) {
 | 
						|
    for (auto *Pred : MBB->predecessors()) {
 | 
						|
      if (Pred != Flow && !Blocks.contains(Pred))
 | 
						|
        Blocks.insert(Pred);
 | 
						|
    }
 | 
						|
 | 
						|
    if (Cur < Blocks.size())
 | 
						|
      MBB = Blocks[Cur++];
 | 
						|
    else
 | 
						|
      MBB = nullptr;
 | 
						|
  }
 | 
						|
 | 
						|
  LLVM_DEBUG({
 | 
						|
    dbgs() << "Found Else blocks: ";
 | 
						|
    for (auto *MBB : Blocks)
 | 
						|
      dbgs() << printMBBReference(*MBB) << ' ';
 | 
						|
    dbgs() << '\n';
 | 
						|
  });
 | 
						|
}
 | 
						|
 | 
						|
/// Find the instructions(excluding phi) in \p MBB that uses the \p Reg.
 | 
						|
void SIOptimizeVGPRLiveRange::findNonPHIUsesInBlock(
 | 
						|
    Register Reg, MachineBasicBlock *MBB,
 | 
						|
    SmallVectorImpl<MachineInstr *> &Uses) const {
 | 
						|
  for (auto &UseMI : MRI->use_nodbg_instructions(Reg)) {
 | 
						|
    if (UseMI.getParent() == MBB && !UseMI.isPHI())
 | 
						|
      Uses.push_back(&UseMI);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
/// Collect the killed registers in the ELSE region which are not alive through
 | 
						|
/// the whole THEN region.
 | 
						|
void SIOptimizeVGPRLiveRange::collectCandidateRegisters(
 | 
						|
    MachineBasicBlock *If, MachineBasicBlock *Flow, MachineBasicBlock *Endif,
 | 
						|
    SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks,
 | 
						|
    SmallVectorImpl<Register> &CandidateRegs) const {
 | 
						|
 | 
						|
  SmallSet<Register, 8> KillsInElse;
 | 
						|
 | 
						|
  for (auto *Else : ElseBlocks) {
 | 
						|
    for (auto &MI : Else->instrs()) {
 | 
						|
      if (MI.isDebugInstr())
 | 
						|
        continue;
 | 
						|
 | 
						|
      for (auto &MO : MI.operands()) {
 | 
						|
        if (!MO.isReg() || !MO.getReg() || MO.isDef())
 | 
						|
          continue;
 | 
						|
 | 
						|
        Register MOReg = MO.getReg();
 | 
						|
        // We can only optimize AGPR/VGPR virtual register
 | 
						|
        if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
 | 
						|
          continue;
 | 
						|
 | 
						|
        if (MO.readsReg()) {
 | 
						|
          LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
 | 
						|
          const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
 | 
						|
          // Make sure two conditions are met:
 | 
						|
          // a.) the value is defined before/in the IF block
 | 
						|
          // b.) should be defined in the same loop-level.
 | 
						|
          if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
 | 
						|
              Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If)) {
 | 
						|
            // Check if the register is live into the endif block. If not,
 | 
						|
            // consider it killed in the else region.
 | 
						|
            LiveVariables::VarInfo &VI = LV->getVarInfo(MOReg);
 | 
						|
            if (!VI.isLiveIn(*Endif, MOReg, *MRI)) {
 | 
						|
              KillsInElse.insert(MOReg);
 | 
						|
            } else {
 | 
						|
              LLVM_DEBUG(dbgs() << "Excluding " << printReg(MOReg, TRI)
 | 
						|
                                << " as Live in Endif\n");
 | 
						|
            }
 | 
						|
          }
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  // Check the phis in the Endif, looking for value coming from the ELSE
 | 
						|
  // region. Make sure the phi-use is the last use.
 | 
						|
  for (auto &MI : Endif->phis()) {
 | 
						|
    for (unsigned Idx = 1; Idx < MI.getNumOperands(); Idx += 2) {
 | 
						|
      auto &MO = MI.getOperand(Idx);
 | 
						|
      auto *Pred = MI.getOperand(Idx + 1).getMBB();
 | 
						|
      if (Pred == Flow)
 | 
						|
        continue;
 | 
						|
      assert(ElseBlocks.contains(Pred) && "Should be from Else region\n");
 | 
						|
 | 
						|
      if (!MO.isReg() || !MO.getReg() || MO.isUndef())
 | 
						|
        continue;
 | 
						|
 | 
						|
      Register Reg = MO.getReg();
 | 
						|
      if (Reg.isPhysical() || !TRI->isVectorRegister(*MRI, Reg))
 | 
						|
        continue;
 | 
						|
 | 
						|
      LiveVariables::VarInfo &VI = LV->getVarInfo(Reg);
 | 
						|
 | 
						|
      if (VI.isLiveIn(*Endif, Reg, *MRI)) {
 | 
						|
        LLVM_DEBUG(dbgs() << "Excluding " << printReg(Reg, TRI)
 | 
						|
                          << " as Live in Endif\n");
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
      // Make sure two conditions are met:
 | 
						|
      // a.) the value is defined before/in the IF block
 | 
						|
      // b.) should be defined in the same loop-level.
 | 
						|
      const MachineBasicBlock *DefMBB = MRI->getVRegDef(Reg)->getParent();
 | 
						|
      if ((VI.AliveBlocks.test(If->getNumber()) || DefMBB == If) &&
 | 
						|
          Loops->getLoopFor(DefMBB) == Loops->getLoopFor(If))
 | 
						|
        KillsInElse.insert(Reg);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  auto IsLiveThroughThen = [&](Register Reg) {
 | 
						|
    for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
 | 
						|
         ++I) {
 | 
						|
      if (!I->readsReg())
 | 
						|
        continue;
 | 
						|
      auto *UseMI = I->getParent();
 | 
						|
      auto *UseMBB = UseMI->getParent();
 | 
						|
      if (UseMBB == Flow || UseMBB == Endif) {
 | 
						|
        if (!UseMI->isPHI())
 | 
						|
          return true;
 | 
						|
 | 
						|
        auto *IncomingMBB = UseMI->getOperand(I.getOperandNo() + 1).getMBB();
 | 
						|
        // The register is live through the path If->Flow or Flow->Endif.
 | 
						|
        // we should not optimize for such cases.
 | 
						|
        if ((UseMBB == Flow && IncomingMBB != If) ||
 | 
						|
            (UseMBB == Endif && IncomingMBB == Flow))
 | 
						|
          return true;
 | 
						|
      }
 | 
						|
    }
 | 
						|
    return false;
 | 
						|
  };
 | 
						|
 | 
						|
  for (auto Reg : KillsInElse) {
 | 
						|
    if (!IsLiveThroughThen(Reg))
 | 
						|
      CandidateRegs.push_back(Reg);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
/// Collect the registers used in the waterfall loop block that are defined
 | 
						|
/// before.
 | 
						|
void SIOptimizeVGPRLiveRange::collectWaterfallCandidateRegisters(
 | 
						|
    MachineBasicBlock *Loop,
 | 
						|
    SmallSetVector<Register, 16> &CandidateRegs) const {
 | 
						|
 | 
						|
  for (auto &MI : Loop->instrs()) {
 | 
						|
    if (MI.isDebugInstr())
 | 
						|
      continue;
 | 
						|
 | 
						|
    for (auto &MO : MI.operands()) {
 | 
						|
      if (!MO.isReg() || !MO.getReg() || MO.isDef())
 | 
						|
        continue;
 | 
						|
 | 
						|
      Register MOReg = MO.getReg();
 | 
						|
      // We can only optimize AGPR/VGPR virtual register
 | 
						|
      if (MOReg.isPhysical() || !TRI->isVectorRegister(*MRI, MOReg))
 | 
						|
        continue;
 | 
						|
 | 
						|
      if (MO.readsReg()) {
 | 
						|
        const MachineBasicBlock *DefMBB = MRI->getVRegDef(MOReg)->getParent();
 | 
						|
        // Make sure the value is defined before the LOOP block
 | 
						|
        if (DefMBB != Loop && !CandidateRegs.contains(MOReg)) {
 | 
						|
          // If the variable is used after the loop, the register coalescer will
 | 
						|
          // merge the newly created register and remove the phi node again.
 | 
						|
          // Just do nothing in that case.
 | 
						|
          LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(MOReg);
 | 
						|
          bool IsUsed = false;
 | 
						|
          for (auto *Succ : Loop->successors()) {
 | 
						|
            if (Succ != Loop && OldVarInfo.isLiveIn(*Succ, MOReg, *MRI)) {
 | 
						|
              IsUsed = true;
 | 
						|
              break;
 | 
						|
            }
 | 
						|
          }
 | 
						|
          if (!IsUsed) {
 | 
						|
            LLVM_DEBUG(dbgs() << "Found candidate reg: "
 | 
						|
                              << printReg(MOReg, TRI, 0, MRI) << '\n');
 | 
						|
            CandidateRegs.insert(MOReg);
 | 
						|
          } else {
 | 
						|
            LLVM_DEBUG(dbgs() << "Reg is used after loop, ignoring: "
 | 
						|
                              << printReg(MOReg, TRI, 0, MRI) << '\n');
 | 
						|
          }
 | 
						|
        }
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
// Re-calculate the liveness of \p Reg in the THEN-region
 | 
						|
void SIOptimizeVGPRLiveRange::updateLiveRangeInThenRegion(
 | 
						|
    Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow) const {
 | 
						|
  SetVector<MachineBasicBlock *> Blocks;
 | 
						|
  SmallVector<MachineBasicBlock *> WorkList({If});
 | 
						|
 | 
						|
  // Collect all successors until we see the flow block, where we should
 | 
						|
  // reconverge.
 | 
						|
  while (!WorkList.empty()) {
 | 
						|
    auto *MBB = WorkList.pop_back_val();
 | 
						|
    for (auto *Succ : MBB->successors()) {
 | 
						|
      if (Succ != Flow && !Blocks.contains(Succ)) {
 | 
						|
        WorkList.push_back(Succ);
 | 
						|
        Blocks.insert(Succ);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
 | 
						|
  for (MachineBasicBlock *MBB : Blocks) {
 | 
						|
    // Clear Live bit, as we will recalculate afterwards
 | 
						|
    LLVM_DEBUG(dbgs() << "Clear AliveBlock " << printMBBReference(*MBB)
 | 
						|
                      << '\n');
 | 
						|
    OldVarInfo.AliveBlocks.reset(MBB->getNumber());
 | 
						|
  }
 | 
						|
 | 
						|
  SmallPtrSet<MachineBasicBlock *, 4> PHIIncoming;
 | 
						|
 | 
						|
  // Get the blocks the Reg should be alive through
 | 
						|
  for (auto I = MRI->use_nodbg_begin(Reg), E = MRI->use_nodbg_end(); I != E;
 | 
						|
       ++I) {
 | 
						|
    auto *UseMI = I->getParent();
 | 
						|
    if (UseMI->isPHI() && I->readsReg()) {
 | 
						|
      if (Blocks.contains(UseMI->getParent()))
 | 
						|
        PHIIncoming.insert(UseMI->getOperand(I.getOperandNo() + 1).getMBB());
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  for (MachineBasicBlock *MBB : Blocks) {
 | 
						|
    SmallVector<MachineInstr *> Uses;
 | 
						|
    // PHI instructions has been processed before.
 | 
						|
    findNonPHIUsesInBlock(Reg, MBB, Uses);
 | 
						|
 | 
						|
    if (Uses.size() == 1) {
 | 
						|
      LLVM_DEBUG(dbgs() << "Found one Non-PHI use in "
 | 
						|
                        << printMBBReference(*MBB) << '\n');
 | 
						|
      LV->HandleVirtRegUse(Reg, MBB, *(*Uses.begin()));
 | 
						|
    } else if (Uses.size() > 1) {
 | 
						|
      // Process the instructions in-order
 | 
						|
      LLVM_DEBUG(dbgs() << "Found " << Uses.size() << " Non-PHI uses in "
 | 
						|
                        << printMBBReference(*MBB) << '\n');
 | 
						|
      for (MachineInstr &MI : *MBB) {
 | 
						|
        if (llvm::is_contained(Uses, &MI))
 | 
						|
          LV->HandleVirtRegUse(Reg, MBB, MI);
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
    // Mark Reg alive through the block if this is a PHI incoming block
 | 
						|
    if (PHIIncoming.contains(MBB))
 | 
						|
      LV->MarkVirtRegAliveInBlock(OldVarInfo, MRI->getVRegDef(Reg)->getParent(),
 | 
						|
                                  MBB);
 | 
						|
  }
 | 
						|
 | 
						|
  // Set the isKilled flag if we get new Kills in the THEN region.
 | 
						|
  for (auto *MI : OldVarInfo.Kills) {
 | 
						|
    if (Blocks.contains(MI->getParent()))
 | 
						|
      MI->addRegisterKilled(Reg, TRI);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
void SIOptimizeVGPRLiveRange::updateLiveRangeInElseRegion(
 | 
						|
    Register Reg, Register NewReg, MachineBasicBlock *Flow,
 | 
						|
    MachineBasicBlock *Endif,
 | 
						|
    SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
 | 
						|
  LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
 | 
						|
  LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
 | 
						|
 | 
						|
  // Transfer aliveBlocks from Reg to NewReg
 | 
						|
  for (auto *MBB : ElseBlocks) {
 | 
						|
    unsigned BBNum = MBB->getNumber();
 | 
						|
    if (OldVarInfo.AliveBlocks.test(BBNum)) {
 | 
						|
      NewVarInfo.AliveBlocks.set(BBNum);
 | 
						|
      LLVM_DEBUG(dbgs() << "Removing AliveBlock " << printMBBReference(*MBB)
 | 
						|
                        << '\n');
 | 
						|
      OldVarInfo.AliveBlocks.reset(BBNum);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  // Transfer the possible Kills in ElseBlocks from Reg to NewReg
 | 
						|
  auto I = OldVarInfo.Kills.begin();
 | 
						|
  while (I != OldVarInfo.Kills.end()) {
 | 
						|
    if (ElseBlocks.contains((*I)->getParent())) {
 | 
						|
      NewVarInfo.Kills.push_back(*I);
 | 
						|
      I = OldVarInfo.Kills.erase(I);
 | 
						|
    } else {
 | 
						|
      ++I;
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
void SIOptimizeVGPRLiveRange::optimizeLiveRange(
 | 
						|
    Register Reg, MachineBasicBlock *If, MachineBasicBlock *Flow,
 | 
						|
    MachineBasicBlock *Endif,
 | 
						|
    SmallSetVector<MachineBasicBlock *, 16> &ElseBlocks) const {
 | 
						|
  // Insert a new PHI, marking the value from the THEN region being
 | 
						|
  // undef.
 | 
						|
  LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
 | 
						|
  const auto *RC = MRI->getRegClass(Reg);
 | 
						|
  Register NewReg = MRI->createVirtualRegister(RC);
 | 
						|
  Register UndefReg = MRI->createVirtualRegister(RC);
 | 
						|
  MachineInstrBuilder PHI = BuildMI(*Flow, Flow->getFirstNonPHI(), DebugLoc(),
 | 
						|
                                    TII->get(TargetOpcode::PHI), NewReg);
 | 
						|
  for (auto *Pred : Flow->predecessors()) {
 | 
						|
    if (Pred == If)
 | 
						|
      PHI.addReg(Reg).addMBB(Pred);
 | 
						|
    else
 | 
						|
      PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
 | 
						|
  }
 | 
						|
 | 
						|
  // Replace all uses in the ELSE region or the PHIs in ENDIF block
 | 
						|
  // Use early increment range because setReg() will update the linked list.
 | 
						|
  for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
 | 
						|
    auto *UseMI = O.getParent();
 | 
						|
    auto *UseBlock = UseMI->getParent();
 | 
						|
    // Replace uses in Endif block
 | 
						|
    if (UseBlock == Endif) {
 | 
						|
      assert(UseMI->isPHI() && "Uses should be PHI in Endif block");
 | 
						|
      O.setReg(NewReg);
 | 
						|
      continue;
 | 
						|
    }
 | 
						|
 | 
						|
    // Replace uses in Else region
 | 
						|
    if (ElseBlocks.contains(UseBlock))
 | 
						|
      O.setReg(NewReg);
 | 
						|
  }
 | 
						|
 | 
						|
  // The optimized Reg is not alive through Flow blocks anymore.
 | 
						|
  LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
 | 
						|
  OldVarInfo.AliveBlocks.reset(Flow->getNumber());
 | 
						|
 | 
						|
  updateLiveRangeInElseRegion(Reg, NewReg, Flow, Endif, ElseBlocks);
 | 
						|
  updateLiveRangeInThenRegion(Reg, If, Flow);
 | 
						|
}
 | 
						|
 | 
						|
void SIOptimizeVGPRLiveRange::optimizeWaterfallLiveRange(
 | 
						|
    Register Reg, MachineBasicBlock *Loop) const {
 | 
						|
  // Insert a new PHI, marking the value from the last loop iteration undef.
 | 
						|
  LLVM_DEBUG(dbgs() << "Optimizing " << printReg(Reg, TRI) << '\n');
 | 
						|
  const auto *RC = MRI->getRegClass(Reg);
 | 
						|
  Register NewReg = MRI->createVirtualRegister(RC);
 | 
						|
  Register UndefReg = MRI->createVirtualRegister(RC);
 | 
						|
 | 
						|
  // Replace all uses in the LOOP region
 | 
						|
  // Use early increment range because setReg() will update the linked list.
 | 
						|
  for (auto &O : make_early_inc_range(MRI->use_operands(Reg))) {
 | 
						|
    auto *UseMI = O.getParent();
 | 
						|
    auto *UseBlock = UseMI->getParent();
 | 
						|
    // Replace uses in Loop block
 | 
						|
    if (UseBlock == Loop)
 | 
						|
      O.setReg(NewReg);
 | 
						|
  }
 | 
						|
 | 
						|
  MachineInstrBuilder PHI = BuildMI(*Loop, Loop->getFirstNonPHI(), DebugLoc(),
 | 
						|
                                    TII->get(TargetOpcode::PHI), NewReg);
 | 
						|
  for (auto *Pred : Loop->predecessors()) {
 | 
						|
    if (Pred == Loop)
 | 
						|
      PHI.addReg(UndefReg, RegState::Undef).addMBB(Pred);
 | 
						|
    else
 | 
						|
      PHI.addReg(Reg).addMBB(Pred);
 | 
						|
  }
 | 
						|
 | 
						|
  LiveVariables::VarInfo &NewVarInfo = LV->getVarInfo(NewReg);
 | 
						|
  LiveVariables::VarInfo &OldVarInfo = LV->getVarInfo(Reg);
 | 
						|
 | 
						|
  // collectWaterfallCandidateRegisters only collects registers that are dead
 | 
						|
  // after the loop. So we know that the old reg is not live throughout the
 | 
						|
  // whole block anymore.
 | 
						|
  OldVarInfo.AliveBlocks.reset(Loop->getNumber());
 | 
						|
 | 
						|
  // Mark the last use as kill
 | 
						|
  for (auto &MI : reverse(Loop->instrs())) {
 | 
						|
    if (MI.readsRegister(NewReg, TRI)) {
 | 
						|
      MI.addRegisterKilled(NewReg, TRI);
 | 
						|
      NewVarInfo.Kills.push_back(&MI);
 | 
						|
      break;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  assert(!NewVarInfo.Kills.empty() &&
 | 
						|
         "Failed to find last usage of register in loop");
 | 
						|
}
 | 
						|
 | 
						|
char SIOptimizeVGPRLiveRange::ID = 0;
 | 
						|
 | 
						|
INITIALIZE_PASS_BEGIN(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
 | 
						|
                      "SI Optimize VGPR LiveRange", false, false)
 | 
						|
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
 | 
						|
INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
 | 
						|
INITIALIZE_PASS_DEPENDENCY(LiveVariables)
 | 
						|
INITIALIZE_PASS_END(SIOptimizeVGPRLiveRange, DEBUG_TYPE,
 | 
						|
                    "SI Optimize VGPR LiveRange", false, false)
 | 
						|
 | 
						|
char &llvm::SIOptimizeVGPRLiveRangeID = SIOptimizeVGPRLiveRange::ID;
 | 
						|
 | 
						|
FunctionPass *llvm::createSIOptimizeVGPRLiveRangePass() {
 | 
						|
  return new SIOptimizeVGPRLiveRange();
 | 
						|
}
 | 
						|
 | 
						|
bool SIOptimizeVGPRLiveRange::runOnMachineFunction(MachineFunction &MF) {
 | 
						|
 | 
						|
  const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
 | 
						|
  TII = ST.getInstrInfo();
 | 
						|
  TRI = &TII->getRegisterInfo();
 | 
						|
  MDT = &getAnalysis<MachineDominatorTree>();
 | 
						|
  Loops = &getAnalysis<MachineLoopInfo>();
 | 
						|
  LV = &getAnalysis<LiveVariables>();
 | 
						|
  MRI = &MF.getRegInfo();
 | 
						|
 | 
						|
  if (skipFunction(MF.getFunction()))
 | 
						|
    return false;
 | 
						|
 | 
						|
  bool MadeChange = false;
 | 
						|
 | 
						|
  // TODO: we need to think about the order of visiting the blocks to get
 | 
						|
  // optimal result for nesting if-else cases.
 | 
						|
  for (MachineBasicBlock &MBB : MF) {
 | 
						|
    for (auto &MI : MBB.terminators()) {
 | 
						|
      // Detect the if-else blocks
 | 
						|
      if (MI.getOpcode() == AMDGPU::SI_IF) {
 | 
						|
        MachineBasicBlock *IfTarget = MI.getOperand(2).getMBB();
 | 
						|
        auto *Endif = getElseTarget(IfTarget);
 | 
						|
        if (!Endif)
 | 
						|
          continue;
 | 
						|
 | 
						|
        SmallSetVector<MachineBasicBlock *, 16> ElseBlocks;
 | 
						|
        SmallVector<Register> CandidateRegs;
 | 
						|
 | 
						|
        LLVM_DEBUG(dbgs() << "Checking IF-ELSE-ENDIF: "
 | 
						|
                          << printMBBReference(MBB) << ' '
 | 
						|
                          << printMBBReference(*IfTarget) << ' '
 | 
						|
                          << printMBBReference(*Endif) << '\n');
 | 
						|
 | 
						|
        // Collect all the blocks in the ELSE region
 | 
						|
        collectElseRegionBlocks(IfTarget, Endif, ElseBlocks);
 | 
						|
 | 
						|
        // Collect the registers can be optimized
 | 
						|
        collectCandidateRegisters(&MBB, IfTarget, Endif, ElseBlocks,
 | 
						|
                                  CandidateRegs);
 | 
						|
        MadeChange |= !CandidateRegs.empty();
 | 
						|
        // Now we are safe to optimize.
 | 
						|
        for (auto Reg : CandidateRegs)
 | 
						|
          optimizeLiveRange(Reg, &MBB, IfTarget, Endif, ElseBlocks);
 | 
						|
      } else if (MI.getOpcode() == AMDGPU::SI_WATERFALL_LOOP) {
 | 
						|
        LLVM_DEBUG(dbgs() << "Checking Waterfall loop: "
 | 
						|
                          << printMBBReference(MBB) << '\n');
 | 
						|
 | 
						|
        SmallSetVector<Register, 16> CandidateRegs;
 | 
						|
        collectWaterfallCandidateRegisters(&MBB, CandidateRegs);
 | 
						|
        MadeChange |= !CandidateRegs.empty();
 | 
						|
        // Now we are safe to optimize.
 | 
						|
        for (auto Reg : CandidateRegs)
 | 
						|
          optimizeWaterfallLiveRange(Reg, &MBB);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return MadeChange;
 | 
						|
}
 |