710 lines
		
	
	
		
			25 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			710 lines
		
	
	
		
			25 KiB
		
	
	
	
		
			C++
		
	
	
	
//===-- X86FastPreTileConfig.cpp - Fast Tile Register Configure------------===//
 | 
						|
//
 | 
						|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | 
						|
// See https://llvm.org/LICENSE.txt for license information.
 | 
						|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
//
 | 
						|
/// \file Pass to preconfig the shape of physical tile registers
 | 
						|
/// It inserts ldtilecfg ahead of each group of tile registers. The algorithm
 | 
						|
/// walk each instruction of basic block in reverse order. All the tile
 | 
						|
/// registers that live out the basic block would be spilled and reloaded
 | 
						|
/// before its user. It also check the depenedency of the shape to ensure
 | 
						|
/// the shape is defined before ldtilecfg.
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
 | 
						|
#include "X86.h"
 | 
						|
#include "X86InstrBuilder.h"
 | 
						|
#include "X86MachineFunctionInfo.h"
 | 
						|
#include "X86RegisterInfo.h"
 | 
						|
#include "X86Subtarget.h"
 | 
						|
#include "llvm/ADT/DepthFirstIterator.h"
 | 
						|
#include "llvm/ADT/PostOrderIterator.h"
 | 
						|
#include "llvm/ADT/Statistic.h"
 | 
						|
#include "llvm/CodeGen/MachineFrameInfo.h"
 | 
						|
#include "llvm/CodeGen/MachineFunctionPass.h"
 | 
						|
#include "llvm/CodeGen/MachineInstr.h"
 | 
						|
#include "llvm/CodeGen/MachineRegisterInfo.h"
 | 
						|
#include "llvm/CodeGen/Passes.h"
 | 
						|
#include "llvm/CodeGen/TargetInstrInfo.h"
 | 
						|
#include "llvm/CodeGen/TargetRegisterInfo.h"
 | 
						|
#include "llvm/InitializePasses.h"
 | 
						|
#include "llvm/Support/Debug.h"
 | 
						|
 | 
						|
using namespace llvm;
 | 
						|
 | 
						|
#define DEBUG_TYPE "fastpretileconfig"
 | 
						|
 | 
						|
STATISTIC(NumStores, "Number of stores added");
 | 
						|
STATISTIC(NumLoads, "Number of loads added");
 | 
						|
 | 
						|
namespace {
 | 
						|
 | 
						|
class X86FastPreTileConfig : public MachineFunctionPass {
 | 
						|
  MachineFunction *MF = nullptr;
 | 
						|
  const X86Subtarget *ST = nullptr;
 | 
						|
  const TargetInstrInfo *TII = nullptr;
 | 
						|
  MachineRegisterInfo *MRI = nullptr;
 | 
						|
  X86MachineFunctionInfo *X86FI = nullptr;
 | 
						|
  MachineFrameInfo *MFI = nullptr;
 | 
						|
  const TargetRegisterInfo *TRI = nullptr;
 | 
						|
  MachineBasicBlock *MBB = nullptr;
 | 
						|
  int CfgSS = -1;
 | 
						|
  struct PHIInfo {
 | 
						|
    Register Row;
 | 
						|
    Register Col;
 | 
						|
    Register StackAddr;
 | 
						|
  };
 | 
						|
  DenseMap<MachineInstr *, struct PHIInfo> VisitedPHIs;
 | 
						|
 | 
						|
  /// Maps virtual regs to the frame index where these values are spilled.
 | 
						|
  IndexedMap<int, VirtReg2IndexFunctor> StackSlotForVirtReg;
 | 
						|
 | 
						|
  /// Has a bit set for tile virtual register for which it was determined
 | 
						|
  /// that it is alive across blocks.
 | 
						|
  BitVector MayLiveAcrossBlocks;
 | 
						|
 | 
						|
  int getStackSpaceFor(Register VirtReg);
 | 
						|
  void InitializeTileConfigStackSpace();
 | 
						|
  bool mayLiveOut(Register VirtReg, MachineInstr *CfgMI);
 | 
						|
  void spill(MachineBasicBlock::iterator Before, Register VirtReg, bool Kill);
 | 
						|
  void reload(MachineBasicBlock::iterator UseMI, Register VirtReg,
 | 
						|
              MachineOperand *RowMO, MachineOperand *ColMO);
 | 
						|
  void canonicalizePHIs(MachineBasicBlock &MBB);
 | 
						|
  void convertPHI(MachineBasicBlock *MBB, MachineInstr &PHI);
 | 
						|
  void convertPHIs(MachineBasicBlock &MBB);
 | 
						|
  bool configBasicBlock(MachineBasicBlock &MBB);
 | 
						|
 | 
						|
public:
 | 
						|
  X86FastPreTileConfig() : MachineFunctionPass(ID), StackSlotForVirtReg(-1) {}
 | 
						|
 | 
						|
  /// Return the pass name.
 | 
						|
  StringRef getPassName() const override {
 | 
						|
    return "Fast Tile Register Preconfigure";
 | 
						|
  }
 | 
						|
 | 
						|
  /// Perform tile register configure.
 | 
						|
  bool runOnMachineFunction(MachineFunction &MFunc) override;
 | 
						|
 | 
						|
  static char ID;
 | 
						|
};
 | 
						|
 | 
						|
} // end anonymous namespace
 | 
						|
 | 
						|
char X86FastPreTileConfig::ID = 0;
 | 
						|
 | 
						|
INITIALIZE_PASS_BEGIN(X86FastPreTileConfig, DEBUG_TYPE,
 | 
						|
                      "Fast Tile Register Preconfigure", false, false)
 | 
						|
INITIALIZE_PASS_END(X86FastPreTileConfig, DEBUG_TYPE,
 | 
						|
                    "Fast Tile Register Preconfigure", false, false)
 | 
						|
 | 
						|
static bool dominates(MachineBasicBlock &MBB,
 | 
						|
                      MachineBasicBlock::const_iterator A,
 | 
						|
                      MachineBasicBlock::const_iterator B) {
 | 
						|
  auto MBBEnd = MBB.end();
 | 
						|
  if (B == MBBEnd)
 | 
						|
    return true;
 | 
						|
 | 
						|
  MachineBasicBlock::const_iterator I = MBB.begin();
 | 
						|
  for (; &*I != A && &*I != B; ++I)
 | 
						|
    ;
 | 
						|
 | 
						|
  return &*I == A;
 | 
						|
}
 | 
						|
 | 
						|
/// This allocates space for the specified virtual register to be held on the
 | 
						|
/// stack.
 | 
						|
int X86FastPreTileConfig::getStackSpaceFor(Register VirtReg) {
 | 
						|
  // Find the location Reg would belong...
 | 
						|
  int SS = StackSlotForVirtReg[VirtReg];
 | 
						|
  // Already has space allocated?
 | 
						|
  if (SS != -1)
 | 
						|
    return SS;
 | 
						|
 | 
						|
  // Allocate a new stack object for this spill location...
 | 
						|
  const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
 | 
						|
  unsigned Size = TRI->getSpillSize(RC);
 | 
						|
  Align Alignment = TRI->getSpillAlign(RC);
 | 
						|
  int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
 | 
						|
 | 
						|
  // Assign the slot.
 | 
						|
  StackSlotForVirtReg[VirtReg] = FrameIdx;
 | 
						|
  return FrameIdx;
 | 
						|
}
 | 
						|
 | 
						|
/// Returns false if \p VirtReg is known to not live out of the current config.
 | 
						|
/// If \p VirtReg live out of the current MBB, it must live out of the current
 | 
						|
/// config
 | 
						|
bool X86FastPreTileConfig::mayLiveOut(Register VirtReg, MachineInstr *CfgMI) {
 | 
						|
  if (MayLiveAcrossBlocks.test(Register::virtReg2Index(VirtReg)))
 | 
						|
    return true;
 | 
						|
 | 
						|
  for (const MachineInstr &UseInst : MRI->use_nodbg_instructions(VirtReg)) {
 | 
						|
    if (UseInst.getParent() != MBB) {
 | 
						|
      MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
 | 
						|
      return true;
 | 
						|
    }
 | 
						|
 | 
						|
    // The use and def are in the same MBB. If the tile register is
 | 
						|
    // reconfigured, it is crobbered and we need to spill and reload
 | 
						|
    // tile register.
 | 
						|
    if (CfgMI) {
 | 
						|
      if (dominates(*MBB, *CfgMI, UseInst)) {
 | 
						|
        MayLiveAcrossBlocks.set(Register::virtReg2Index(VirtReg));
 | 
						|
        return true;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return false;
 | 
						|
}
 | 
						|
 | 
						|
void X86FastPreTileConfig::InitializeTileConfigStackSpace() {
 | 
						|
  MachineBasicBlock &MBB = MF->front();
 | 
						|
  MachineInstr *MI = &*MBB.getFirstNonPHI();
 | 
						|
  DebugLoc DL;
 | 
						|
  if (ST->hasAVX512()) {
 | 
						|
    Register Zmm = MRI->createVirtualRegister(&X86::VR512RegClass);
 | 
						|
    BuildMI(MBB, MI, DL, TII->get(X86::AVX512_512_SET0), Zmm);
 | 
						|
    addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSZmr)), CfgSS)
 | 
						|
        .addReg(Zmm);
 | 
						|
  } else if (ST->hasAVX2()) {
 | 
						|
    Register Ymm = MRI->createVirtualRegister(&X86::VR256RegClass);
 | 
						|
    BuildMI(MBB, MI, DL, TII->get(X86::AVX_SET0), Ymm);
 | 
						|
    addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS)
 | 
						|
        .addReg(Ymm);
 | 
						|
    addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::VMOVUPSYmr)), CfgSS,
 | 
						|
                      32)
 | 
						|
        .addReg(Ymm);
 | 
						|
  } else {
 | 
						|
    assert(ST->hasSSE2() && "AMX should assume SSE2 enabled");
 | 
						|
    unsigned StoreOpc = ST->hasAVX() ? X86::VMOVUPSmr : X86::MOVUPSmr;
 | 
						|
    Register Xmm = MRI->createVirtualRegister(&X86::VR128RegClass);
 | 
						|
    BuildMI(MBB, MI, DL, TII->get(X86::V_SET0), Xmm);
 | 
						|
    addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS)
 | 
						|
        .addReg(Xmm);
 | 
						|
    addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 16)
 | 
						|
        .addReg(Xmm);
 | 
						|
    addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 32)
 | 
						|
        .addReg(Xmm);
 | 
						|
    addFrameReference(BuildMI(MBB, MI, DL, TII->get(StoreOpc)), CfgSS, 48)
 | 
						|
        .addReg(Xmm);
 | 
						|
  }
 | 
						|
  // Fill in the palette first.
 | 
						|
  addFrameReference(BuildMI(MBB, MI, DL, TII->get(X86::MOV8mi)), CfgSS)
 | 
						|
      .addImm(1);
 | 
						|
}
 | 
						|
 | 
						|
/// Insert spill instruction for \p AssignedReg before \p Before.
 | 
						|
/// TODO: Update DBG_VALUEs with \p VirtReg operands with the stack slot.
 | 
						|
void X86FastPreTileConfig::spill(MachineBasicBlock::iterator Before,
 | 
						|
                                 Register VirtReg, bool Kill) {
 | 
						|
  LLVM_DEBUG(dbgs() << "Spilling " << printReg(VirtReg, TRI) << " \n");
 | 
						|
  int FI = getStackSpaceFor(VirtReg);
 | 
						|
  LLVM_DEBUG(dbgs() << " to stack slot #" << FI << '\n');
 | 
						|
 | 
						|
  const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
 | 
						|
  // Don't need shape information for tile store, becasue it is adjacent to
 | 
						|
  // the tile def instruction.
 | 
						|
  TII->storeRegToStackSlot(*MBB, Before, VirtReg, Kill, FI, &RC, TRI);
 | 
						|
  ++NumStores;
 | 
						|
 | 
						|
  // TODO: update DBG_VALUEs
 | 
						|
}
 | 
						|
 | 
						|
/// Insert reload instruction for \p PhysReg before \p Before.
 | 
						|
void X86FastPreTileConfig::reload(MachineBasicBlock::iterator UseMI,
 | 
						|
                                  Register OrigReg, MachineOperand *RowMO,
 | 
						|
                                  MachineOperand *ColMO) {
 | 
						|
  int FI = getStackSpaceFor(OrigReg);
 | 
						|
  const TargetRegisterClass &RC = *MRI->getRegClass(OrigReg);
 | 
						|
  Register TileReg;
 | 
						|
  // Fold copy to tileload
 | 
						|
  // BB1:
 | 
						|
  // spill src to s
 | 
						|
  //
 | 
						|
  // BB2:
 | 
						|
  // t = copy src
 | 
						|
  // -->
 | 
						|
  // t = tileload (s)
 | 
						|
  if (UseMI->isCopy())
 | 
						|
    TileReg = UseMI->getOperand(0).getReg();
 | 
						|
  else
 | 
						|
    TileReg = MRI->createVirtualRegister(&RC);
 | 
						|
  // Can't use TII->loadRegFromStackSlot(), because we need the shape
 | 
						|
  // information for reload.
 | 
						|
  // tileloadd (%sp, %idx), %tmm
 | 
						|
  unsigned Opc = X86::PTILELOADDV;
 | 
						|
  Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
 | 
						|
  // FIXME: MBB is not the parent of UseMI.
 | 
						|
  MachineInstr *NewMI = BuildMI(*UseMI->getParent(), UseMI, DebugLoc(),
 | 
						|
                                TII->get(X86::MOV64ri), StrideReg)
 | 
						|
                            .addImm(64);
 | 
						|
  NewMI = addFrameReference(
 | 
						|
      BuildMI(*UseMI->getParent(), UseMI, DebugLoc(), TII->get(Opc), TileReg)
 | 
						|
          .addReg(RowMO->getReg())
 | 
						|
          .addReg(ColMO->getReg()),
 | 
						|
      FI);
 | 
						|
  MachineOperand &MO = NewMI->getOperand(5);
 | 
						|
  MO.setReg(StrideReg);
 | 
						|
  MO.setIsKill(true);
 | 
						|
  RowMO->setIsKill(false);
 | 
						|
  ColMO->setIsKill(false);
 | 
						|
  // Erase copy instruction after it is folded.
 | 
						|
  if (UseMI->isCopy()) {
 | 
						|
    UseMI->eraseFromParent();
 | 
						|
  } else {
 | 
						|
    // Replace the register in the user MI.
 | 
						|
    for (auto &MO : UseMI->operands()) {
 | 
						|
      if (MO.isReg() && MO.getReg() == OrigReg)
 | 
						|
        MO.setReg(TileReg);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  ++NumLoads;
 | 
						|
  LLVM_DEBUG(dbgs() << "Reloading " << printReg(OrigReg, TRI) << " into "
 | 
						|
                    << printReg(TileReg, TRI) << '\n');
 | 
						|
}
 | 
						|
 | 
						|
static bool isTileDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
 | 
						|
  // The instruction must have 3 operands: tile def, row, col.
 | 
						|
  if (MI.isDebugInstr() || MI.getNumOperands() < 3 || !MI.isPseudo())
 | 
						|
    return false;
 | 
						|
  MachineOperand &MO = MI.getOperand(0);
 | 
						|
 | 
						|
  if (MO.isReg()) {
 | 
						|
    Register Reg = MO.getReg();
 | 
						|
    // FIXME it may be used after Greedy RA and the physical
 | 
						|
    // register is not rewritten yet.
 | 
						|
    if (Reg.isVirtual() &&
 | 
						|
        MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
 | 
						|
      return true;
 | 
						|
    if (Reg >= X86::TMM0 && Reg <= X86::TMM7)
 | 
						|
      return true;
 | 
						|
  }
 | 
						|
 | 
						|
  return false;
 | 
						|
}
 | 
						|
 | 
						|
static ShapeT getShape(MachineRegisterInfo *MRI, Register TileReg) {
 | 
						|
  MachineInstr *MI = MRI->getVRegDef(TileReg);
 | 
						|
  if (isTileDef(MRI, *MI)) {
 | 
						|
    MachineOperand *RowMO = &MI->getOperand(1);
 | 
						|
    MachineOperand *ColMO = &MI->getOperand(2);
 | 
						|
    return ShapeT(RowMO, ColMO, MRI);
 | 
						|
  } else if (MI->isCopy()) {
 | 
						|
    TileReg = MI->getOperand(1).getReg();
 | 
						|
    return getShape(MRI, TileReg);
 | 
						|
  }
 | 
						|
 | 
						|
  // The def should not be PHI node, because we walk the MBB in reverse post
 | 
						|
  // order.
 | 
						|
  assert(MI->isPHI() && "Unexpected PHI when get shape.");
 | 
						|
  llvm_unreachable("Unexpected MI when get shape.");
 | 
						|
}
 | 
						|
 | 
						|
// BB0:
 | 
						|
// spill t0 to s0
 | 
						|
// BB1:
 | 
						|
// spill t1 to s1
 | 
						|
//
 | 
						|
// BB2:
 | 
						|
// t = phi [t0, bb0] [t1, bb1]
 | 
						|
// -->
 | 
						|
// row = phi [r0, bb0] [r1, bb1]
 | 
						|
// col = phi [c0, bb0] [c1, bb1]
 | 
						|
//   s = phi [s0, bb0] [s1, bb1]
 | 
						|
//   t = tileload row, col, s
 | 
						|
// The new instruction is inserted at the end of the phi node. The order
 | 
						|
// of the original phi node is not ensured.
 | 
						|
void X86FastPreTileConfig::convertPHI(MachineBasicBlock *MBB,
 | 
						|
                                      MachineInstr &PHI) {
 | 
						|
  // 1. Create instruction to get stack slot address of each incoming block.
 | 
						|
  // 2. Create PHI node for the stack address.
 | 
						|
  // 3. Create PHI node for shape. If one of the incoming shape is immediate
 | 
						|
  //    use the immediate and delete the PHI node.
 | 
						|
  // 4. Create tileload instruction from the stack address.
 | 
						|
  Register StackAddrReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
 | 
						|
  MachineInstrBuilder AddrPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
 | 
						|
                                        TII->get(X86::PHI), StackAddrReg);
 | 
						|
  Register RowReg = MRI->createVirtualRegister(&X86::GR16RegClass);
 | 
						|
  MachineInstrBuilder RowPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
 | 
						|
                                       TII->get(X86::PHI), RowReg);
 | 
						|
  Register ColReg = MRI->createVirtualRegister(&X86::GR16RegClass);
 | 
						|
  MachineInstrBuilder ColPHI = BuildMI(*MBB, ++PHI.getIterator(), DebugLoc(),
 | 
						|
                                       TII->get(X86::PHI), ColReg);
 | 
						|
  // Record the mapping of phi node and its row/column information.
 | 
						|
  VisitedPHIs[&PHI] = {RowReg, ColReg, StackAddrReg};
 | 
						|
 | 
						|
  for (unsigned I = 1, E = PHI.getNumOperands(); I != E; I += 2) {
 | 
						|
    // Get the 2 incoming value of tile register and MBB.
 | 
						|
    Register InTileReg = PHI.getOperand(I).getReg();
 | 
						|
    // Mark it as liveout, so that it will be spilled when visit
 | 
						|
    // the incoming MBB. Otherwise since phi will be deleted, it
 | 
						|
    // would miss spill when visit incoming MBB.
 | 
						|
    MayLiveAcrossBlocks.set(Register::virtReg2Index(InTileReg));
 | 
						|
    MachineBasicBlock *InMBB = PHI.getOperand(I + 1).getMBB();
 | 
						|
 | 
						|
    MachineInstr *TileDefMI = MRI->getVRegDef(InTileReg);
 | 
						|
    MachineBasicBlock::iterator InsertPos;
 | 
						|
    if (TileDefMI->isPHI()) {
 | 
						|
      InsertPos = TileDefMI->getParent()->getFirstNonPHI();
 | 
						|
      if (VisitedPHIs.count(TileDefMI)) { // circular phi reference
 | 
						|
        //        def t1
 | 
						|
        //       /       \
 | 
						|
        //  def t2       t3 = phi(t1, t4) <--
 | 
						|
        //       \       /                  |
 | 
						|
        //      t4 = phi(t2, t3)-------------
 | 
						|
        //
 | 
						|
        // For each (row, column and stack address) append phi incoming value.
 | 
						|
        // Create r3 = phi(r1, r4)
 | 
						|
        // Create r4 = phi(r2, r3)
 | 
						|
        Register InRowReg = VisitedPHIs[TileDefMI].Row;
 | 
						|
        Register InColReg = VisitedPHIs[TileDefMI].Col;
 | 
						|
        Register InStackAddrReg = VisitedPHIs[TileDefMI].StackAddr;
 | 
						|
        RowPHI.addReg(InRowReg).addMBB(InMBB);
 | 
						|
        ColPHI.addReg(InColReg).addMBB(InMBB);
 | 
						|
        AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
 | 
						|
        continue;
 | 
						|
      } else {
 | 
						|
        // Recursively convert PHI to tileload
 | 
						|
        convertPHI(TileDefMI->getParent(), *TileDefMI);
 | 
						|
        // The PHI node is coverted to tileload instruction. Get the stack
 | 
						|
        // address from tileload operands.
 | 
						|
        MachineInstr *TileLoad = MRI->getVRegDef(InTileReg);
 | 
						|
        assert(TileLoad && TileLoad->getOpcode() == X86::PTILELOADDV);
 | 
						|
        Register InRowReg = TileLoad->getOperand(1).getReg();
 | 
						|
        Register InColReg = TileLoad->getOperand(2).getReg();
 | 
						|
        Register InStackAddrReg = TileLoad->getOperand(3).getReg();
 | 
						|
        RowPHI.addReg(InRowReg).addMBB(InMBB);
 | 
						|
        ColPHI.addReg(InColReg).addMBB(InMBB);
 | 
						|
        AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
 | 
						|
      }
 | 
						|
    } else {
 | 
						|
      InsertPos = TileDefMI->getIterator();
 | 
						|
 | 
						|
      // Fill the incoming operand of row/column phi instruction.
 | 
						|
      ShapeT Shape = getShape(MRI, InTileReg);
 | 
						|
      Shape.getRow()->setIsKill(false);
 | 
						|
      Shape.getCol()->setIsKill(false);
 | 
						|
      RowPHI.addReg(Shape.getRow()->getReg()).addMBB(InMBB);
 | 
						|
      ColPHI.addReg(Shape.getCol()->getReg()).addMBB(InMBB);
 | 
						|
 | 
						|
      // The incoming tile register live out of its def BB, it would be spilled.
 | 
						|
      // Create MI to get the spill stack slot address for the tile register
 | 
						|
      int FI = getStackSpaceFor(InTileReg);
 | 
						|
      Register InStackAddrReg =
 | 
						|
          MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
 | 
						|
      addOffset(BuildMI(*TileDefMI->getParent(), InsertPos, DebugLoc(),
 | 
						|
                        TII->get(X86::LEA64r), InStackAddrReg)
 | 
						|
                    .addFrameIndex(FI),
 | 
						|
                0);
 | 
						|
      AddrPHI.addReg(InStackAddrReg).addMBB(InMBB);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  MachineBasicBlock::iterator InsertPos = MBB->getFirstNonPHI();
 | 
						|
  Register StrideReg = MRI->createVirtualRegister(&X86::GR64_NOSPRegClass);
 | 
						|
  BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::MOV64ri), StrideReg)
 | 
						|
      .addImm(64);
 | 
						|
  Register TileReg = PHI.getOperand(0).getReg();
 | 
						|
  MachineInstr *NewMI = addDirectMem(
 | 
						|
      BuildMI(*MBB, InsertPos, DebugLoc(), TII->get(X86::PTILELOADDV), TileReg)
 | 
						|
          .addReg(RowReg)
 | 
						|
          .addReg(ColReg),
 | 
						|
      StackAddrReg);
 | 
						|
  MachineOperand &MO = NewMI->getOperand(5);
 | 
						|
  MO.setReg(StrideReg);
 | 
						|
  MO.setIsKill(true);
 | 
						|
  PHI.eraseFromParent();
 | 
						|
  VisitedPHIs.erase(&PHI);
 | 
						|
}
 | 
						|
 | 
						|
static bool isTileRegDef(MachineRegisterInfo *MRI, MachineInstr &MI) {
 | 
						|
  MachineOperand &MO = MI.getOperand(0);
 | 
						|
  if (MO.isReg() && MO.getReg().isVirtual() &&
 | 
						|
      MRI->getRegClass(MO.getReg())->getID() == X86::TILERegClassID)
 | 
						|
    return true;
 | 
						|
  return false;
 | 
						|
}
 | 
						|
 | 
						|
void X86FastPreTileConfig::canonicalizePHIs(MachineBasicBlock &MBB) {
 | 
						|
  SmallVector<MachineInstr *, 8> PHIs;
 | 
						|
 | 
						|
  for (MachineInstr &MI : MBB) {
 | 
						|
    if (!MI.isPHI())
 | 
						|
      break;
 | 
						|
    if (!isTileRegDef(MRI, MI))
 | 
						|
      continue;
 | 
						|
    PHIs.push_back(&MI);
 | 
						|
  }
 | 
						|
  // Canonicalize the phi node first. One tile phi may depeneds previous
 | 
						|
  // phi node. For below case, we need convert %t4.
 | 
						|
  //
 | 
						|
  // BB0:
 | 
						|
  // %t3 = phi (t1 BB1, t2 BB0)
 | 
						|
  // %t4 = phi (t5 BB1, t3 BB0)
 | 
						|
  // -->
 | 
						|
  // %t3 = phi (t1 BB1, t2 BB0)
 | 
						|
  // %t4 = phi (t5 BB1, t2 BB0)
 | 
						|
  //
 | 
						|
  while (!PHIs.empty()) {
 | 
						|
    MachineInstr *PHI = PHIs.pop_back_val();
 | 
						|
 | 
						|
    // Find the operand that is incoming from the same MBB and the def
 | 
						|
    // is also phi node.
 | 
						|
    MachineOperand *InMO = nullptr;
 | 
						|
    MachineInstr *DefMI = nullptr;
 | 
						|
    for (unsigned I = 1, E = PHI->getNumOperands(); I != E; I += 2) {
 | 
						|
      Register InTileReg = PHI->getOperand(I).getReg();
 | 
						|
      MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
 | 
						|
      DefMI = MRI->getVRegDef(InTileReg);
 | 
						|
      if (InMBB != &MBB || !DefMI->isPHI())
 | 
						|
        continue;
 | 
						|
 | 
						|
      InMO = &PHI->getOperand(I);
 | 
						|
      break;
 | 
						|
    }
 | 
						|
    // If can't find such operand, do nothing.
 | 
						|
    if (!InMO)
 | 
						|
      continue;
 | 
						|
 | 
						|
    // Current phi node depends on previous phi node. Break the
 | 
						|
    // dependency.
 | 
						|
    Register DefTileReg;
 | 
						|
    for (unsigned I = 1, E = DefMI->getNumOperands(); I != E; I += 2) {
 | 
						|
      MachineBasicBlock *InMBB = PHI->getOperand(I + 1).getMBB();
 | 
						|
      if (InMBB != &MBB)
 | 
						|
        continue;
 | 
						|
      DefTileReg = DefMI->getOperand(I).getReg();
 | 
						|
      InMO->setReg(DefTileReg);
 | 
						|
      break;
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
void X86FastPreTileConfig::convertPHIs(MachineBasicBlock &MBB) {
 | 
						|
  SmallVector<MachineInstr *, 8> PHIs;
 | 
						|
  for (MachineInstr &MI : MBB) {
 | 
						|
    if (!MI.isPHI())
 | 
						|
      break;
 | 
						|
    if (!isTileRegDef(MRI, MI))
 | 
						|
      continue;
 | 
						|
    PHIs.push_back(&MI);
 | 
						|
  }
 | 
						|
  while (!PHIs.empty()) {
 | 
						|
    MachineInstr *MI = PHIs.pop_back_val();
 | 
						|
    VisitedPHIs.clear();
 | 
						|
    convertPHI(&MBB, *MI);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
// PreTileConfig should configure the tile registers based on basic
 | 
						|
// block.
 | 
						|
bool X86FastPreTileConfig::configBasicBlock(MachineBasicBlock &MBB) {
 | 
						|
  this->MBB = &MBB;
 | 
						|
  bool Change = false;
 | 
						|
  MachineInstr *LastShapeMI = nullptr;
 | 
						|
  MachineInstr *LastTileCfg = nullptr;
 | 
						|
  bool HasUnconfigTile = false;
 | 
						|
 | 
						|
  auto Config = [&](MachineInstr &Before) {
 | 
						|
    if (CfgSS == -1)
 | 
						|
      CfgSS = MFI->CreateStackObject(ST->getTileConfigSize(),
 | 
						|
                                     ST->getTileConfigAlignment(), false);
 | 
						|
    LastTileCfg = addFrameReference(
 | 
						|
        BuildMI(MBB, Before, DebugLoc(), TII->get(X86::PLDTILECFGV)), CfgSS);
 | 
						|
    LastShapeMI = nullptr;
 | 
						|
    Change = true;
 | 
						|
  };
 | 
						|
  auto HasTileOperand = [](MachineRegisterInfo *MRI, MachineInstr &MI) {
 | 
						|
    for (const MachineOperand &MO : MI.operands()) {
 | 
						|
      if (!MO.isReg())
 | 
						|
        continue;
 | 
						|
      Register Reg = MO.getReg();
 | 
						|
      if (Reg.isVirtual() &&
 | 
						|
          MRI->getRegClass(Reg)->getID() == X86::TILERegClassID)
 | 
						|
        return true;
 | 
						|
    }
 | 
						|
    return false;
 | 
						|
  };
 | 
						|
  for (MachineInstr &MI : reverse(MBB)) {
 | 
						|
    // We have transformed phi node before configuring BB.
 | 
						|
    if (MI.isPHI())
 | 
						|
      break;
 | 
						|
    // Don't collect the shape of used tile, the tile should be defined
 | 
						|
    // before the tile use. Spill and reload would happen if there is only
 | 
						|
    // tile use after ldtilecfg, so the shape can be collected from reload.
 | 
						|
    // Take below code for example. %t would be reloaded before tilestore
 | 
						|
    // call
 | 
						|
    // ....
 | 
						|
    // tilestore %r, %c, %t
 | 
						|
    // -->
 | 
						|
    // call
 | 
						|
    // ldtilecfg
 | 
						|
    // %t = tileload %r, %c
 | 
						|
    // tilestore %r, %c, %t
 | 
						|
    if (HasTileOperand(MRI, MI))
 | 
						|
      HasUnconfigTile = true;
 | 
						|
    // According to AMX ABI, all the tile registers including config register
 | 
						|
    // are volatile. Caller need to save/restore config register.
 | 
						|
    if (MI.isCall() && HasUnconfigTile) {
 | 
						|
      MachineBasicBlock::iterator I;
 | 
						|
      if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
 | 
						|
        I = ++LastShapeMI->getIterator();
 | 
						|
      else
 | 
						|
        I = ++MI.getIterator();
 | 
						|
      Config(*I);
 | 
						|
      HasUnconfigTile = false;
 | 
						|
      continue;
 | 
						|
    }
 | 
						|
    if (!isTileDef(MRI, MI))
 | 
						|
      continue;
 | 
						|
    //
 | 
						|
    //---------------------------------------------------------------------
 | 
						|
    // Don't handle COPY instruction. If the src and dst of the COPY can be
 | 
						|
    // in the same config in below case, we just check the shape of t0.
 | 
						|
    // def row0
 | 
						|
    // def col0
 | 
						|
    // ldtilecfg
 | 
						|
    // t0 = tielzero(row0, col0)
 | 
						|
    // t1 = copy t0
 | 
						|
    // ...
 | 
						|
    // If the src and dst of the COPY can NOT be in the same config in below
 | 
						|
    // case. Reload would be generated befor the copy instruction.
 | 
						|
    // def row0
 | 
						|
    // def col0
 | 
						|
    // t0 = tielzero(row0, col0)
 | 
						|
    // spill t0
 | 
						|
    // ...
 | 
						|
    // def row1
 | 
						|
    // def col1
 | 
						|
    // ldtilecfg
 | 
						|
    // t1 = tilezero(row1, col1)
 | 
						|
    // reload t0
 | 
						|
    // t1 = copy t0
 | 
						|
    //---------------------------------------------------------------------
 | 
						|
    //
 | 
						|
    // If MI dominate the last shape def instruction, we need insert
 | 
						|
    // ldtilecfg after LastShapeMI now. The config doesn't include
 | 
						|
    // current MI.
 | 
						|
    //   def row0
 | 
						|
    //   def col0
 | 
						|
    //   tilezero(row0, col0)  <- MI
 | 
						|
    //   def row1
 | 
						|
    //   def col1
 | 
						|
    //   ldtilecfg             <- insert
 | 
						|
    //   tilezero(row1, col1)
 | 
						|
    if (LastShapeMI && dominates(MBB, MI, LastShapeMI))
 | 
						|
      Config(*(++LastShapeMI->getIterator()));
 | 
						|
    MachineOperand *RowMO = &MI.getOperand(1);
 | 
						|
    MachineOperand *ColMO = &MI.getOperand(2);
 | 
						|
    MachineInstr *RowMI = MRI->getVRegDef(RowMO->getReg());
 | 
						|
    MachineInstr *ColMI = MRI->getVRegDef(ColMO->getReg());
 | 
						|
    // If the shape is defined in current MBB, check the domination.
 | 
						|
    // FIXME how about loop?
 | 
						|
    if (RowMI->getParent() == &MBB) {
 | 
						|
      if (!LastShapeMI)
 | 
						|
        LastShapeMI = RowMI;
 | 
						|
      else if (dominates(MBB, LastShapeMI, RowMI))
 | 
						|
        LastShapeMI = RowMI;
 | 
						|
    }
 | 
						|
    if (ColMI->getParent() == &MBB) {
 | 
						|
      if (!LastShapeMI)
 | 
						|
        LastShapeMI = ColMI;
 | 
						|
      else if (dominates(MBB, LastShapeMI, ColMI))
 | 
						|
        LastShapeMI = ColMI;
 | 
						|
    }
 | 
						|
    // If there is user live out of the tilecfg, spill it and reload in
 | 
						|
    // before the user.
 | 
						|
    Register TileReg = MI.getOperand(0).getReg();
 | 
						|
    if (mayLiveOut(TileReg, LastTileCfg))
 | 
						|
      spill(++MI.getIterator(), TileReg, false);
 | 
						|
    for (MachineInstr &UseMI : MRI->use_instructions(TileReg)) {
 | 
						|
      if (UseMI.getParent() == &MBB) {
 | 
						|
        // check user should not across ldtilecfg
 | 
						|
        if (!LastTileCfg || !dominates(MBB, LastTileCfg, UseMI))
 | 
						|
          continue;
 | 
						|
        // reload befor UseMI
 | 
						|
        reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
 | 
						|
      } else {
 | 
						|
        // Don't reload for phi instruction, we handle phi reload separately.
 | 
						|
        // TODO: merge the reload for the same user MBB.
 | 
						|
        if (!UseMI.isPHI())
 | 
						|
          reload(UseMI.getIterator(), TileReg, RowMO, ColMO);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  // Configure tile registers at the head of the MBB
 | 
						|
  if (HasUnconfigTile) {
 | 
						|
    MachineInstr *Before;
 | 
						|
    if (LastShapeMI == nullptr || LastShapeMI->isPHI())
 | 
						|
      Before = &*MBB.getFirstNonPHI();
 | 
						|
    else
 | 
						|
      Before = &*(++LastShapeMI->getIterator());
 | 
						|
 | 
						|
    Config(*Before);
 | 
						|
  }
 | 
						|
 | 
						|
  return Change;
 | 
						|
}
 | 
						|
 | 
						|
bool X86FastPreTileConfig::runOnMachineFunction(MachineFunction &MFunc) {
 | 
						|
  MF = &MFunc;
 | 
						|
  MRI = &MFunc.getRegInfo();
 | 
						|
  ST = &MFunc.getSubtarget<X86Subtarget>();
 | 
						|
  TII = ST->getInstrInfo();
 | 
						|
  X86FI = MFunc.getInfo<X86MachineFunctionInfo>();
 | 
						|
  MFI = &MFunc.getFrameInfo();
 | 
						|
  TRI = ST->getRegisterInfo();
 | 
						|
  CfgSS = -1;
 | 
						|
 | 
						|
  unsigned NumVirtRegs = MRI->getNumVirtRegs();
 | 
						|
  // Abandon early if there is no tile register to config.
 | 
						|
  bool HasVirtTileReg = false;
 | 
						|
  for (unsigned I = 0, E = NumVirtRegs; I != E; ++I) {
 | 
						|
    Register VirtReg = Register::index2VirtReg(I);
 | 
						|
    if (MRI->getRegClass(VirtReg)->getID() == X86::TILERegClassID) {
 | 
						|
      HasVirtTileReg = true;
 | 
						|
      break;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  if (!HasVirtTileReg)
 | 
						|
    return false;
 | 
						|
 | 
						|
  StackSlotForVirtReg.resize(NumVirtRegs);
 | 
						|
  MayLiveAcrossBlocks.clear();
 | 
						|
  // We will create register during config. *3 is to make sure
 | 
						|
  // the virtual register number doesn't exceed the size of
 | 
						|
  // the bit vector.
 | 
						|
  MayLiveAcrossBlocks.resize(NumVirtRegs * 3);
 | 
						|
  bool Change = false;
 | 
						|
  assert(MRI->isSSA());
 | 
						|
 | 
						|
  // Canonicalize the phi node first.
 | 
						|
  for (MachineBasicBlock &MBB : MFunc)
 | 
						|
    canonicalizePHIs(MBB);
 | 
						|
 | 
						|
  // Loop over all of the basic blocks in reverse post order and insert
 | 
						|
  // ldtilecfg for tile registers. The reserse post order is to facilitate
 | 
						|
  // PHI node convert.
 | 
						|
  ReversePostOrderTraversal<MachineFunction *> RPOT(MF);
 | 
						|
  for (MachineBasicBlock *MBB : RPOT) {
 | 
						|
    convertPHIs(*MBB);
 | 
						|
    Change |= configBasicBlock(*MBB);
 | 
						|
  }
 | 
						|
 | 
						|
  if (Change)
 | 
						|
    InitializeTileConfigStackSpace();
 | 
						|
 | 
						|
  StackSlotForVirtReg.clear();
 | 
						|
  return Change;
 | 
						|
}
 | 
						|
 | 
						|
FunctionPass *llvm::createX86FastPreTileConfigPass() {
 | 
						|
  return new X86FastPreTileConfig();
 | 
						|
}
 |