forked from OSchip/llvm-project
				
			R600/SI: Set the ATC bit on all resource descriptors for the HSA runtime
llvm-svn: 223125
This commit is contained in:
		
							parent
							
								
									4082a6c2ec
								
							
						
					
					
						commit
						794c8c0f78
					
				| 
						 | 
					@ -1012,6 +1012,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
 | 
				
			||||||
                                           SDValue &GLC, SDValue &SLC,
 | 
					                                           SDValue &GLC, SDValue &SLC,
 | 
				
			||||||
                                           SDValue &TFE) const {
 | 
					                                           SDValue &TFE) const {
 | 
				
			||||||
  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
 | 
					  SDValue Ptr, VAddr, Offen, Idxen, Addr64;
 | 
				
			||||||
 | 
					  const SIInstrInfo *TII =
 | 
				
			||||||
 | 
					    static_cast<const SIInstrInfo *>(Subtarget.getInstrInfo());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
 | 
					  SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
 | 
				
			||||||
              GLC, SLC, TFE);
 | 
					              GLC, SLC, TFE);
 | 
				
			||||||
| 
						 | 
					@ -1019,7 +1021,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
 | 
				
			||||||
  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
 | 
					  if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
 | 
				
			||||||
      !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
 | 
					      !cast<ConstantSDNode>(Idxen)->getSExtValue() &&
 | 
				
			||||||
      !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
 | 
					      !cast<ConstantSDNode>(Addr64)->getSExtValue()) {
 | 
				
			||||||
    uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT |
 | 
					    uint64_t Rsrc = TII->getDefaultRsrcDataFormat() |
 | 
				
			||||||
                    APInt::getAllOnesValue(32).getZExtValue(); // Size
 | 
					                    APInt::getAllOnesValue(32).getZExtValue(); // Size
 | 
				
			||||||
    SDLoc DL(Addr);
 | 
					    SDLoc DL(Addr);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -84,7 +84,8 @@ AMDGPUSubtarget::AMDGPUSubtarget(StringRef TT, StringRef GPU, StringRef FS,
 | 
				
			||||||
      FrameLowering(TargetFrameLowering::StackGrowsUp,
 | 
					      FrameLowering(TargetFrameLowering::StackGrowsUp,
 | 
				
			||||||
                    64 * 16, // Maximum stack alignment (long16)
 | 
					                    64 * 16, // Maximum stack alignment (long16)
 | 
				
			||||||
                    0),
 | 
					                    0),
 | 
				
			||||||
      InstrItins(getInstrItineraryForCPU(GPU)) {
 | 
					      InstrItins(getInstrItineraryForCPU(GPU)),
 | 
				
			||||||
 | 
					      TargetTriple(TT) {
 | 
				
			||||||
  if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
 | 
					  if (getGeneration() <= AMDGPUSubtarget::NORTHERN_ISLANDS) {
 | 
				
			||||||
    InstrInfo.reset(new R600InstrInfo(*this));
 | 
					    InstrInfo.reset(new R600InstrInfo(*this));
 | 
				
			||||||
    TLInfo.reset(new R600TargetLowering(TM));
 | 
					    TLInfo.reset(new R600TargetLowering(TM));
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -68,6 +68,7 @@ private:
 | 
				
			||||||
  std::unique_ptr<AMDGPUTargetLowering> TLInfo;
 | 
					  std::unique_ptr<AMDGPUTargetLowering> TLInfo;
 | 
				
			||||||
  std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
 | 
					  std::unique_ptr<AMDGPUInstrInfo> InstrInfo;
 | 
				
			||||||
  InstrItineraryData InstrItins;
 | 
					  InstrItineraryData InstrItins;
 | 
				
			||||||
 | 
					  Triple TargetTriple;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
  AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS, TargetMachine &TM);
 | 
					  AMDGPUSubtarget(StringRef TT, StringRef CPU, StringRef FS, TargetMachine &TM);
 | 
				
			||||||
| 
						 | 
					@ -217,6 +218,9 @@ public:
 | 
				
			||||||
  bool r600ALUEncoding() const {
 | 
					  bool r600ALUEncoding() const {
 | 
				
			||||||
    return R600ALUInst;
 | 
					    return R600ALUInst;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					  bool isAmdHsaOS() const {
 | 
				
			||||||
 | 
					    return TargetTriple.getOS() == Triple::AMDHSA;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
} // End namespace llvm
 | 
					} // End namespace llvm
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2030,6 +2030,8 @@ static SDValue buildSMovImm32(SelectionDAG &DAG, SDLoc DL, uint64_t Val) {
 | 
				
			||||||
MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
 | 
					MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
 | 
				
			||||||
                                                SDLoc DL,
 | 
					                                                SDLoc DL,
 | 
				
			||||||
                                                SDValue Ptr) const {
 | 
					                                                SDValue Ptr) const {
 | 
				
			||||||
 | 
					  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
 | 
				
			||||||
 | 
					      getTargetMachine().getSubtargetImpl()->getInstrInfo());
 | 
				
			||||||
#if 1
 | 
					#if 1
 | 
				
			||||||
    // XXX - Workaround for moveToVALU not handling different register class
 | 
					    // XXX - Workaround for moveToVALU not handling different register class
 | 
				
			||||||
    // inserts for REG_SEQUENCE.
 | 
					    // inserts for REG_SEQUENCE.
 | 
				
			||||||
| 
						 | 
					@ -2039,7 +2041,7 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
 | 
				
			||||||
      DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, MVT::i32),
 | 
					      DAG.getTargetConstant(AMDGPU::SGPR_64RegClassID, MVT::i32),
 | 
				
			||||||
      buildSMovImm32(DAG, DL, 0),
 | 
					      buildSMovImm32(DAG, DL, 0),
 | 
				
			||||||
      DAG.getTargetConstant(AMDGPU::sub0, MVT::i32),
 | 
					      DAG.getTargetConstant(AMDGPU::sub0, MVT::i32),
 | 
				
			||||||
      buildSMovImm32(DAG, DL, AMDGPU::RSRC_DATA_FORMAT >> 32),
 | 
					      buildSMovImm32(DAG, DL, TII->getDefaultRsrcDataFormat() >> 32),
 | 
				
			||||||
      DAG.getTargetConstant(AMDGPU::sub1, MVT::i32)
 | 
					      DAG.getTargetConstant(AMDGPU::sub1, MVT::i32)
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2063,7 +2065,7 @@ MachineSDNode *SITargetLowering::wrapAddr64Rsrc(SelectionDAG &DAG,
 | 
				
			||||||
      DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
 | 
					      DAG.getTargetConstant(AMDGPU::sub0_sub1, MVT::i32),
 | 
				
			||||||
      buildSMovImm32(DAG, DL, 0),
 | 
					      buildSMovImm32(DAG, DL, 0),
 | 
				
			||||||
      DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
 | 
					      DAG.getTargetConstant(AMDGPU::sub2, MVT::i32),
 | 
				
			||||||
      buildSMovImm32(DAG, DL, AMDGPU::RSRC_DATA_FORMAT >> 32),
 | 
					      buildSMovImm32(DAG, DL, TII->getDefaultRsrcFormat() >> 32),
 | 
				
			||||||
      DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
 | 
					      DAG.getTargetConstant(AMDGPU::sub3, MVT::i32)
 | 
				
			||||||
    };
 | 
					    };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2110,7 +2112,9 @@ MachineSDNode *SITargetLowering::buildRSRC(SelectionDAG &DAG,
 | 
				
			||||||
MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
 | 
					MachineSDNode *SITargetLowering::buildScratchRSRC(SelectionDAG &DAG,
 | 
				
			||||||
                                                  SDLoc DL,
 | 
					                                                  SDLoc DL,
 | 
				
			||||||
                                                  SDValue Ptr) const {
 | 
					                                                  SDValue Ptr) const {
 | 
				
			||||||
  uint64_t Rsrc = AMDGPU::RSRC_DATA_FORMAT | AMDGPU::RSRC_TID_ENABLE |
 | 
					  const SIInstrInfo *TII = static_cast<const SIInstrInfo *>(
 | 
				
			||||||
 | 
					      getTargetMachine().getSubtargetImpl()->getInstrInfo());
 | 
				
			||||||
 | 
					  uint64_t Rsrc = TII->getDefaultRsrcDataFormat() | AMDGPU::RSRC_TID_ENABLE |
 | 
				
			||||||
                  0xffffffff; // Size
 | 
					                  0xffffffff; // Size
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
 | 
					  return buildRSRC(DAG, DL, Ptr, 0, Rsrc);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1580,6 +1580,7 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
 | 
				
			||||||
    unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
 | 
					    unsigned SRsrcFormatLo = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
 | 
				
			||||||
    unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
 | 
					    unsigned SRsrcFormatHi = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
 | 
				
			||||||
    unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
 | 
					    unsigned NewSRsrc = MRI.createVirtualRegister(&AMDGPU::SReg_128RegClass);
 | 
				
			||||||
 | 
					    uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // Zero64 = 0
 | 
					    // Zero64 = 0
 | 
				
			||||||
    BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
 | 
					    BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B64),
 | 
				
			||||||
| 
						 | 
					@ -1589,12 +1590,12 @@ void SIInstrInfo::legalizeOperands(MachineInstr *MI) const {
 | 
				
			||||||
    // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
 | 
					    // SRsrcFormatLo = RSRC_DATA_FORMAT{31-0}
 | 
				
			||||||
    BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
 | 
					    BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
 | 
				
			||||||
            SRsrcFormatLo)
 | 
					            SRsrcFormatLo)
 | 
				
			||||||
            .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
 | 
					            .addImm(RsrcDataFormat & 0xFFFFFFFF);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
 | 
					    // SRsrcFormatHi = RSRC_DATA_FORMAT{63-32}
 | 
				
			||||||
    BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
 | 
					    BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32),
 | 
				
			||||||
            SRsrcFormatHi)
 | 
					            SRsrcFormatHi)
 | 
				
			||||||
            .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
 | 
					            .addImm(RsrcDataFormat >> 32);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // NewSRsrc = {Zero64, SRsrcFormat}
 | 
					    // NewSRsrc = {Zero64, SRsrcFormat}
 | 
				
			||||||
    BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
 | 
					    BuildMI(MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE),
 | 
				
			||||||
| 
						 | 
					@ -1798,13 +1799,14 @@ void SIInstrInfo::moveSMRDToVALU(MachineInstr *MI, MachineRegisterInfo &MRI) con
 | 
				
			||||||
      unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
 | 
					      unsigned DWord1 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
 | 
				
			||||||
      unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
 | 
					      unsigned DWord2 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
 | 
				
			||||||
      unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
 | 
					      unsigned DWord3 = MRI.createVirtualRegister(&AMDGPU::SGPR_32RegClass);
 | 
				
			||||||
 | 
					      uint64_t RsrcDataFormat = getDefaultRsrcDataFormat();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1)
 | 
					      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord1)
 | 
				
			||||||
              .addImm(0);
 | 
					              .addImm(0);
 | 
				
			||||||
      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2)
 | 
					      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord2)
 | 
				
			||||||
              .addImm(AMDGPU::RSRC_DATA_FORMAT & 0xFFFFFFFF);
 | 
					              .addImm(RsrcDataFormat & 0xFFFFFFFF);
 | 
				
			||||||
      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
 | 
					      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::S_MOV_B32), DWord3)
 | 
				
			||||||
              .addImm(AMDGPU::RSRC_DATA_FORMAT >> 32);
 | 
					              .addImm(RsrcDataFormat >> 32);
 | 
				
			||||||
      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
 | 
					      BuildMI(*MBB, MI, MI->getDebugLoc(), get(AMDGPU::REG_SEQUENCE), SRsrc)
 | 
				
			||||||
              .addReg(DWord0)
 | 
					              .addReg(DWord0)
 | 
				
			||||||
              .addImm(AMDGPU::sub0)
 | 
					              .addImm(AMDGPU::sub0)
 | 
				
			||||||
| 
						 | 
					@ -2413,3 +2415,11 @@ MachineOperand *SIInstrInfo::getNamedOperand(MachineInstr &MI,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  return &MI.getOperand(Idx);
 | 
					  return &MI.getOperand(Idx);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					uint64_t SIInstrInfo::getDefaultRsrcDataFormat() const {
 | 
				
			||||||
 | 
					  uint64_t RsrcDataFormat = AMDGPU::RSRC_DATA_FORMAT;
 | 
				
			||||||
 | 
					  if (ST.isAmdHsaOS())
 | 
				
			||||||
 | 
					    RsrcDataFormat |= (1ULL << 56);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return RsrcDataFormat;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -308,6 +308,9 @@ public:
 | 
				
			||||||
                                        unsigned OpName) const {
 | 
					                                        unsigned OpName) const {
 | 
				
			||||||
    return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
 | 
					    return getNamedOperand(const_cast<MachineInstr &>(MI), OpName);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  uint64_t getDefaultRsrcDataFormat() const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace AMDGPU {
 | 
					namespace AMDGPU {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,12 @@
 | 
				
			||||||
 | 
					; RUN: llc < %s -mtriple=r600--amdhsa -mcpu=kaveri | FileCheck --check-prefix=HSA %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; HSA: {{^}}simple:
 | 
				
			||||||
 | 
					; Make sure we are setting the ATC bit:
 | 
				
			||||||
 | 
					; HSA: s_mov_b32 s[[HI:[0-9]]], 0x100f000
 | 
				
			||||||
 | 
					; HSA: buffer_store_dword v{{[0-9]+}}, s[0:[[HI]]], 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define void @simple(i32 addrspace(1)* %out) {
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					  store i32 0, i32 addrspace(1)* %out
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Loading…
	
		Reference in New Issue