forked from OSchip/llvm-project
[AMDGPU] Extend buffer intrinsics with swizzling
Summary: Extend cachepolicy operand in the new VMEM buffer intrinsics to supply information whether the buffer data is swizzled. Also, propagate this information to MIR. Intrinsics updated: int_amdgcn_raw_buffer_load int_amdgcn_raw_buffer_load_format int_amdgcn_raw_buffer_store int_amdgcn_raw_buffer_store_format int_amdgcn_raw_tbuffer_load int_amdgcn_raw_tbuffer_store int_amdgcn_struct_buffer_load int_amdgcn_struct_buffer_load_format int_amdgcn_struct_buffer_store int_amdgcn_struct_buffer_store_format int_amdgcn_struct_tbuffer_load int_amdgcn_struct_tbuffer_store Furthermore, disable merging of VMEM buffer instructions in SI Load/Store optimizer, if the "swizzled" bit on the instruction is on. The default value of the bit is 0, meaning that data in buffer is linear and buffer instructions can be merged. There is no difference in the generated code with this commit. However, in the future it will be expected that front-ends use buffer intrinsics with correct "swizzled" bit set. Reviewers: arsenm, nhaehnle, tpr Reviewed By: nhaehnle Subscribers: arsenm, kzhuravl, jvesely, wdng, nhaehnle, yaxunl, dstuttard, tpr, t-tye, arphaman, jfb, Petar.Avramovic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D68200 llvm-svn: 373491
This commit is contained in:
parent
b879fd05bd
commit
265e94e657
|
|
@ -899,7 +899,10 @@ class AMDGPURawBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
|
|||
[llvm_v4i32_ty, // rsrc(SGPR)
|
||||
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
|
||||
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
|
||||
llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
|
||||
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
|
||||
// bit 1 = slc,
|
||||
// bit 2 = dlc on gfx10+),
|
||||
// swizzled buffer (bit 3 = swz))
|
||||
[IntrReadMem, ImmArg<3>], "", [SDNPMemOperand]>,
|
||||
AMDGPURsrcIntrinsic<0>;
|
||||
def int_amdgcn_raw_buffer_load_format : AMDGPURawBufferLoad<llvm_anyfloat_ty>;
|
||||
|
|
@ -911,7 +914,10 @@ class AMDGPUStructBufferLoad<LLVMType data_ty = llvm_any_ty> : Intrinsic <
|
|||
llvm_i32_ty, // vindex(VGPR)
|
||||
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
|
||||
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
|
||||
llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
|
||||
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
|
||||
// bit 1 = slc,
|
||||
// bit 2 = dlc on gfx10+),
|
||||
// swizzled buffer (bit 3 = swz))
|
||||
[IntrReadMem, ImmArg<4>], "", [SDNPMemOperand]>,
|
||||
AMDGPURsrcIntrinsic<0>;
|
||||
def int_amdgcn_struct_buffer_load_format : AMDGPUStructBufferLoad<llvm_anyfloat_ty>;
|
||||
|
|
@ -923,7 +929,10 @@ class AMDGPURawBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic <
|
|||
llvm_v4i32_ty, // rsrc(SGPR)
|
||||
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
|
||||
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
|
||||
llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
|
||||
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
|
||||
// bit 1 = slc,
|
||||
// bit 2 = dlc on gfx10+),
|
||||
// swizzled buffer (bit 3 = swz))
|
||||
[IntrWriteMem, ImmArg<4>], "", [SDNPMemOperand]>,
|
||||
AMDGPURsrcIntrinsic<1>;
|
||||
def int_amdgcn_raw_buffer_store_format : AMDGPURawBufferStore<llvm_anyfloat_ty>;
|
||||
|
|
@ -936,7 +945,10 @@ class AMDGPUStructBufferStore<LLVMType data_ty = llvm_any_ty> : Intrinsic <
|
|||
llvm_i32_ty, // vindex(VGPR)
|
||||
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
|
||||
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
|
||||
llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
|
||||
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
|
||||
// bit 1 = slc,
|
||||
// bit 2 = dlc on gfx10+),
|
||||
// swizzled buffer (bit 3 = swz))
|
||||
[IntrWriteMem, ImmArg<5>], "", [SDNPMemOperand]>,
|
||||
AMDGPURsrcIntrinsic<1>;
|
||||
def int_amdgcn_struct_buffer_store_format : AMDGPUStructBufferStore<llvm_anyfloat_ty>;
|
||||
|
|
@ -1050,7 +1062,10 @@ def int_amdgcn_raw_tbuffer_load : Intrinsic <
|
|||
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
|
||||
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
|
||||
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
|
||||
llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
|
||||
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
|
||||
// bit 1 = slc,
|
||||
// bit 2 = dlc on gfx10+),
|
||||
// swizzled buffer (bit 3 = swz))
|
||||
[IntrReadMem, ImmArg<3>, ImmArg<4>], "", [SDNPMemOperand]>,
|
||||
AMDGPURsrcIntrinsic<0>;
|
||||
|
||||
|
|
@ -1061,7 +1076,10 @@ def int_amdgcn_raw_tbuffer_store : Intrinsic <
|
|||
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
|
||||
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
|
||||
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
|
||||
llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
|
||||
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
|
||||
// bit 1 = slc,
|
||||
// bit 2 = dlc on gfx10+),
|
||||
// swizzled buffer (bit 3 = swz))
|
||||
[IntrWriteMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
|
||||
AMDGPURsrcIntrinsic<1>;
|
||||
|
||||
|
|
@ -1072,7 +1090,10 @@ def int_amdgcn_struct_tbuffer_load : Intrinsic <
|
|||
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
|
||||
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
|
||||
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
|
||||
llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
|
||||
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
|
||||
// bit 1 = slc,
|
||||
// bit 2 = dlc on gfx10+),
|
||||
// swizzled buffer (bit 3 = swz))
|
||||
[IntrReadMem, ImmArg<4>, ImmArg<5>], "", [SDNPMemOperand]>,
|
||||
AMDGPURsrcIntrinsic<0>;
|
||||
|
||||
|
|
@ -1084,7 +1105,10 @@ def int_amdgcn_struct_tbuffer_store : Intrinsic <
|
|||
llvm_i32_ty, // offset(VGPR/imm, included in bounds checking and swizzling)
|
||||
llvm_i32_ty, // soffset(SGPR/imm, excluded from bounds checking and swizzling)
|
||||
llvm_i32_ty, // format(imm; bits 3..0 = dfmt, bits 6..4 = nfmt)
|
||||
llvm_i32_ty], // cachepolicy(imm; bit 0 = glc, bit 1 = slc, bit 2 = dlc on gfx10+)
|
||||
llvm_i32_ty], // auxiliary data (imm, cachepolicy (bit 0 = glc,
|
||||
// bit 1 = slc,
|
||||
// bit 2 = dlc on gfx10+),
|
||||
// swizzled buffer (bit 3 = swz))
|
||||
[IntrWriteMem, ImmArg<5>, ImmArg<6>], "", [SDNPMemOperand]>,
|
||||
AMDGPURsrcIntrinsic<1>;
|
||||
|
||||
|
|
|
|||
|
|
@ -186,10 +186,11 @@ private:
|
|||
bool SelectMUBUF(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
|
||||
SDValue &SOffset, SDValue &Offset, SDValue &Offen,
|
||||
SDValue &Idxen, SDValue &Addr64, SDValue &GLC, SDValue &SLC,
|
||||
SDValue &TFE, SDValue &DLC) const;
|
||||
SDValue &TFE, SDValue &DLC, SDValue &SWZ) const;
|
||||
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc, SDValue &VAddr,
|
||||
SDValue &SOffset, SDValue &Offset, SDValue &GLC,
|
||||
SDValue &SLC, SDValue &TFE, SDValue &DLC) const;
|
||||
SDValue &SLC, SDValue &TFE, SDValue &DLC,
|
||||
SDValue &SWZ) const;
|
||||
bool SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
||||
SDValue &VAddr, SDValue &SOffset, SDValue &Offset,
|
||||
SDValue &SLC) const;
|
||||
|
|
@ -202,7 +203,7 @@ private:
|
|||
|
||||
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &SOffset,
|
||||
SDValue &Offset, SDValue &GLC, SDValue &SLC,
|
||||
SDValue &TFE, SDValue &DLC) const;
|
||||
SDValue &TFE, SDValue &DLC, SDValue &SWZ) const;
|
||||
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
|
||||
SDValue &Offset, SDValue &SLC) const;
|
||||
bool SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc, SDValue &Soffset,
|
||||
|
|
@ -1313,7 +1314,8 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
|
|||
SDValue &Offset, SDValue &Offen,
|
||||
SDValue &Idxen, SDValue &Addr64,
|
||||
SDValue &GLC, SDValue &SLC,
|
||||
SDValue &TFE, SDValue &DLC) const {
|
||||
SDValue &TFE, SDValue &DLC,
|
||||
SDValue &SWZ) const {
|
||||
// Subtarget prefers to use flat instruction
|
||||
if (Subtarget->useFlatForGlobal())
|
||||
return false;
|
||||
|
|
@ -1326,6 +1328,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUF(SDValue Addr, SDValue &Ptr,
|
|||
SLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
TFE = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
DLC = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
SWZ = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
|
||||
Idxen = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
Offen = CurDAG->getTargetConstant(0, DL, MVT::i1);
|
||||
|
|
@ -1405,7 +1408,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
|||
SDValue &VAddr, SDValue &SOffset,
|
||||
SDValue &Offset, SDValue &GLC,
|
||||
SDValue &SLC, SDValue &TFE,
|
||||
SDValue &DLC) const {
|
||||
SDValue &DLC, SDValue &SWZ) const {
|
||||
SDValue Ptr, Offen, Idxen, Addr64;
|
||||
|
||||
// addr64 bit was removed for volcanic islands.
|
||||
|
|
@ -1413,7 +1416,7 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
|||
return false;
|
||||
|
||||
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
|
||||
GLC, SLC, TFE, DLC))
|
||||
GLC, SLC, TFE, DLC, SWZ))
|
||||
return false;
|
||||
|
||||
ConstantSDNode *C = cast<ConstantSDNode>(Addr64);
|
||||
|
|
@ -1435,9 +1438,9 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFAddr64(SDValue Addr, SDValue &SRsrc,
|
|||
SDValue &Offset,
|
||||
SDValue &SLC) const {
|
||||
SLC = CurDAG->getTargetConstant(0, SDLoc(Addr), MVT::i1);
|
||||
SDValue GLC, TFE, DLC;
|
||||
SDValue GLC, TFE, DLC, SWZ;
|
||||
|
||||
return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC);
|
||||
return SelectMUBUFAddr64(Addr, SRsrc, VAddr, SOffset, Offset, GLC, SLC, TFE, DLC, SWZ);
|
||||
}
|
||||
|
||||
static bool isStackPtrRelative(const MachinePointerInfo &PtrInfo) {
|
||||
|
|
@ -1562,13 +1565,14 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFScratchOffset(SDNode *Parent,
|
|||
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
||||
SDValue &SOffset, SDValue &Offset,
|
||||
SDValue &GLC, SDValue &SLC,
|
||||
SDValue &TFE, SDValue &DLC) const {
|
||||
SDValue &TFE, SDValue &DLC,
|
||||
SDValue &SWZ) const {
|
||||
SDValue Ptr, VAddr, Offen, Idxen, Addr64;
|
||||
const SIInstrInfo *TII =
|
||||
static_cast<const SIInstrInfo *>(Subtarget->getInstrInfo());
|
||||
|
||||
if (!SelectMUBUF(Addr, Ptr, VAddr, SOffset, Offset, Offen, Idxen, Addr64,
|
||||
GLC, SLC, TFE, DLC))
|
||||
GLC, SLC, TFE, DLC, SWZ))
|
||||
return false;
|
||||
|
||||
if (!cast<ConstantSDNode>(Offen)->getSExtValue() &&
|
||||
|
|
@ -1590,16 +1594,16 @@ bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
|||
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
||||
SDValue &Soffset, SDValue &Offset
|
||||
) const {
|
||||
SDValue GLC, SLC, TFE, DLC;
|
||||
SDValue GLC, SLC, TFE, DLC, SWZ;
|
||||
|
||||
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
|
||||
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ);
|
||||
}
|
||||
bool AMDGPUDAGToDAGISel::SelectMUBUFOffset(SDValue Addr, SDValue &SRsrc,
|
||||
SDValue &Soffset, SDValue &Offset,
|
||||
SDValue &SLC) const {
|
||||
SDValue GLC, TFE, DLC;
|
||||
SDValue GLC, TFE, DLC, SWZ;
|
||||
|
||||
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC);
|
||||
return SelectMUBUFOffset(Addr, SRsrc, Soffset, Offset, GLC, SLC, TFE, DLC, SWZ);
|
||||
}
|
||||
|
||||
template <bool IsSigned>
|
||||
|
|
|
|||
|
|
@ -762,16 +762,20 @@ static bool isZero(Register Reg, MachineRegisterInfo &MRI) {
|
|||
return mi_match(Reg, MRI, m_Copy(m_ICst(C))) && C == 0;
|
||||
}
|
||||
|
||||
static unsigned extractGLC(unsigned CachePolicy) {
|
||||
return CachePolicy & 1;
|
||||
static unsigned extractGLC(unsigned AuxiliaryData) {
|
||||
return AuxiliaryData & 1;
|
||||
}
|
||||
|
||||
static unsigned extractSLC(unsigned CachePolicy) {
|
||||
return (CachePolicy >> 1) & 1;
|
||||
static unsigned extractSLC(unsigned AuxiliaryData) {
|
||||
return (AuxiliaryData >> 1) & 1;
|
||||
}
|
||||
|
||||
static unsigned extractDLC(unsigned CachePolicy) {
|
||||
return (CachePolicy >> 2) & 1;
|
||||
static unsigned extractDLC(unsigned AuxiliaryData) {
|
||||
return (AuxiliaryData >> 2) & 1;
|
||||
}
|
||||
|
||||
static unsigned extractSWZ(unsigned AuxiliaryData) {
|
||||
return (AuxiliaryData >> 3) & 1;
|
||||
}
|
||||
|
||||
// Returns Base register, constant offset, and offset def point.
|
||||
|
|
@ -970,7 +974,7 @@ bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI,
|
|||
Register RSrc = MI.getOperand(2).getReg();
|
||||
Register VOffset = MI.getOperand(3).getReg();
|
||||
Register SOffset = MI.getOperand(4).getReg();
|
||||
unsigned CachePolicy = MI.getOperand(5).getImm();
|
||||
unsigned AuxiliaryData = MI.getOperand(5).getImm();
|
||||
unsigned ImmOffset;
|
||||
unsigned TotalOffset;
|
||||
|
||||
|
|
@ -994,10 +998,11 @@ bool AMDGPUInstructionSelector::selectStoreIntrinsic(MachineInstr &MI,
|
|||
MIB.addUse(RSrc)
|
||||
.addUse(SOffset)
|
||||
.addImm(ImmOffset)
|
||||
.addImm(extractGLC(CachePolicy))
|
||||
.addImm(extractSLC(CachePolicy))
|
||||
.addImm(extractGLC(AuxiliaryData))
|
||||
.addImm(extractSLC(AuxiliaryData))
|
||||
.addImm(0) // tfe: FIXME: Remove from inst
|
||||
.addImm(extractDLC(CachePolicy))
|
||||
.addImm(extractDLC(AuxiliaryData))
|
||||
.addImm(extractSWZ(AuxiliaryData))
|
||||
.addMemOperand(MMO);
|
||||
|
||||
MI.eraseFromParent();
|
||||
|
|
|
|||
|
|
@ -143,6 +143,7 @@ public:
|
|||
ImmTyDLC,
|
||||
ImmTyGLC,
|
||||
ImmTySLC,
|
||||
ImmTySWZ,
|
||||
ImmTyTFE,
|
||||
ImmTyD16,
|
||||
ImmTyClampSI,
|
||||
|
|
@ -328,6 +329,7 @@ public:
|
|||
bool isDLC() const { return isImmTy(ImmTyDLC); }
|
||||
bool isGLC() const { return isImmTy(ImmTyGLC); }
|
||||
bool isSLC() const { return isImmTy(ImmTySLC); }
|
||||
bool isSWZ() const { return isImmTy(ImmTySWZ); }
|
||||
bool isTFE() const { return isImmTy(ImmTyTFE); }
|
||||
bool isD16() const { return isImmTy(ImmTyD16); }
|
||||
bool isFORMAT() const { return isImmTy(ImmTyFORMAT) && isUInt<8>(getImm()); }
|
||||
|
|
@ -820,6 +822,7 @@ public:
|
|||
case ImmTyDLC: OS << "DLC"; break;
|
||||
case ImmTyGLC: OS << "GLC"; break;
|
||||
case ImmTySLC: OS << "SLC"; break;
|
||||
case ImmTySWZ: OS << "SWZ"; break;
|
||||
case ImmTyTFE: OS << "TFE"; break;
|
||||
case ImmTyD16: OS << "D16"; break;
|
||||
case ImmTyFORMAT: OS << "FORMAT"; break;
|
||||
|
|
@ -6037,6 +6040,7 @@ static const OptionalOperand AMDGPUOptionalOperandTable[] = {
|
|||
{"format", AMDGPUOperand::ImmTyFORMAT, false, nullptr},
|
||||
{"glc", AMDGPUOperand::ImmTyGLC, true, nullptr},
|
||||
{"slc", AMDGPUOperand::ImmTySLC, true, nullptr},
|
||||
{"swz", AMDGPUOperand::ImmTySWZ, true, nullptr},
|
||||
{"tfe", AMDGPUOperand::ImmTyTFE, true, nullptr},
|
||||
{"d16", AMDGPUOperand::ImmTyD16, true, nullptr},
|
||||
{"high", AMDGPUOperand::ImmTyHigh, true, nullptr},
|
||||
|
|
|
|||
|
|
@ -7,13 +7,13 @@
|
|||
//===----------------------------------------------------------------------===//
|
||||
|
||||
def MUBUFAddr32 : ComplexPattern<i64, 9, "SelectMUBUFAddr32">;
|
||||
def MUBUFAddr64 : ComplexPattern<i64, 8, "SelectMUBUFAddr64">;
|
||||
def MUBUFAddr64 : ComplexPattern<i64, 9, "SelectMUBUFAddr64">;
|
||||
def MUBUFAddr64Atomic : ComplexPattern<i64, 5, "SelectMUBUFAddr64">;
|
||||
|
||||
def MUBUFScratchOffen : ComplexPattern<i64, 4, "SelectMUBUFScratchOffen", [], [SDNPWantParent]>;
|
||||
def MUBUFScratchOffset : ComplexPattern<i64, 3, "SelectMUBUFScratchOffset", [], [SDNPWantParent], 20>;
|
||||
|
||||
def MUBUFOffset : ComplexPattern<i64, 7, "SelectMUBUFOffset">;
|
||||
def MUBUFOffset : ComplexPattern<i64, 8, "SelectMUBUFOffset">;
|
||||
def MUBUFOffsetNoGLC : ComplexPattern<i64, 3, "SelectMUBUFOffset">;
|
||||
def MUBUFOffsetAtomic : ComplexPattern<i64, 4, "SelectMUBUFOffset">;
|
||||
|
||||
|
|
@ -54,6 +54,17 @@ class MTBUFAddr64Table <bit is_addr64, string Name> {
|
|||
// MTBUF classes
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
class MTBUFGetBaseOpcode<string Op> {
|
||||
string ret = !subst("FORMAT_XY", "FORMAT_X",
|
||||
!subst("FORMAT_XYZ", "FORMAT_X",
|
||||
!subst("FORMAT_XYZW", "FORMAT_X", Op)));
|
||||
}
|
||||
|
||||
class getMTBUFElements<string Op> {
|
||||
int ret = 1;
|
||||
}
|
||||
|
||||
|
||||
class MTBUF_Pseudo <string opName, dag outs, dag ins,
|
||||
string asmOps, list<dag> pattern=[]> :
|
||||
InstSI<outs, ins, "", pattern>,
|
||||
|
|
@ -67,6 +78,9 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins,
|
|||
string Mnemonic = opName;
|
||||
string AsmOperands = asmOps;
|
||||
|
||||
Instruction Opcode = !cast<Instruction>(NAME);
|
||||
Instruction BaseOpcode = !cast<Instruction>(MTBUFGetBaseOpcode<NAME>.ret);
|
||||
|
||||
let VM_CNT = 1;
|
||||
let EXP_CNT = 1;
|
||||
let MTBUF = 1;
|
||||
|
|
@ -90,6 +104,7 @@ class MTBUF_Pseudo <string opName, dag outs, dag ins,
|
|||
bits<1> has_offset = 1;
|
||||
bits<1> has_slc = 1;
|
||||
bits<1> has_tfe = 1;
|
||||
bits<4> elements = 0;
|
||||
}
|
||||
|
||||
class MTBUF_Real <MTBUF_Pseudo ps> :
|
||||
|
|
@ -126,17 +141,17 @@ class getMTBUFInsDA<list<RegisterClass> vdataList,
|
|||
RegisterClass vaddrClass = !if(!empty(vaddrList), ?, !head(vaddrList));
|
||||
dag InsNoData = !if(!empty(vaddrList),
|
||||
(ins SReg_128:$srsrc, SCSrc_b32:$soffset,
|
||||
offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc),
|
||||
offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz),
|
||||
(ins vaddrClass:$vaddr, SReg_128:$srsrc, SCSrc_b32:$soffset,
|
||||
offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc)
|
||||
offset:$offset, FORMAT:$format, GLC:$glc, SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz)
|
||||
);
|
||||
dag InsData = !if(!empty(vaddrList),
|
||||
(ins vdataClass:$vdata, SReg_128:$srsrc,
|
||||
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
|
||||
SLC:$slc, TFE:$tfe, DLC:$dlc),
|
||||
SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz),
|
||||
(ins vdataClass:$vdata, vaddrClass:$vaddr, SReg_128:$srsrc,
|
||||
SCSrc_b32:$soffset, offset:$offset, FORMAT:$format, GLC:$glc,
|
||||
SLC:$slc, TFE:$tfe, DLC:$dlc)
|
||||
SLC:$slc, TFE:$tfe, DLC:$dlc, SWZ:$swz)
|
||||
);
|
||||
dag ret = !if(!empty(vdataList), InsNoData, InsData);
|
||||
}
|
||||
|
|
@ -181,51 +196,54 @@ class MTBUF_SetupAddr<int addrKind> {
|
|||
class MTBUF_Load_Pseudo <string opName,
|
||||
int addrKind,
|
||||
RegisterClass vdataClass,
|
||||
int elems,
|
||||
list<dag> pattern=[],
|
||||
// Workaround bug bz30254
|
||||
int addrKindCopy = addrKind>
|
||||
: MTBUF_Pseudo<opName,
|
||||
(outs vdataClass:$vdata),
|
||||
getMTBUFIns<addrKindCopy>.ret,
|
||||
" $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
|
||||
" $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz",
|
||||
pattern>,
|
||||
MTBUF_SetupAddr<addrKindCopy> {
|
||||
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
|
||||
let mayLoad = 1;
|
||||
let mayStore = 0;
|
||||
let elements = elems;
|
||||
}
|
||||
|
||||
multiclass MTBUF_Pseudo_Loads<string opName, RegisterClass vdataClass,
|
||||
ValueType load_vt = i32,
|
||||
int elems, ValueType load_vt = i32,
|
||||
SDPatternOperator ld = null_frag> {
|
||||
|
||||
def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
|
||||
def _OFFSET : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems,
|
||||
[(set load_vt:$vdata,
|
||||
(ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i8:$format,
|
||||
i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>,
|
||||
i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)))]>,
|
||||
MTBUFAddr64Table<0, NAME>;
|
||||
|
||||
def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
|
||||
def _ADDR64 : MTBUF_Load_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems,
|
||||
[(set load_vt:$vdata,
|
||||
(ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset,
|
||||
i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)))]>,
|
||||
i8:$format, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)))]>,
|
||||
MTBUFAddr64Table<1, NAME>;
|
||||
|
||||
def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
|
||||
def _IDXEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
|
||||
def _BOTHEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
|
||||
def _OFFEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>;
|
||||
def _IDXEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>;
|
||||
def _BOTHEN : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>;
|
||||
|
||||
let DisableWQM = 1 in {
|
||||
def _OFFSET_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass>;
|
||||
def _OFFEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
|
||||
def _IDXEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
|
||||
def _BOTHEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
|
||||
def _OFFSET_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>;
|
||||
def _OFFEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>;
|
||||
def _IDXEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>;
|
||||
def _BOTHEN_exact : MTBUF_Load_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>;
|
||||
}
|
||||
}
|
||||
|
||||
class MTBUF_Store_Pseudo <string opName,
|
||||
int addrKind,
|
||||
RegisterClass vdataClass,
|
||||
int elems,
|
||||
list<dag> pattern=[],
|
||||
// Workaround bug bz30254
|
||||
int addrKindCopy = addrKind,
|
||||
|
|
@ -233,39 +251,40 @@ class MTBUF_Store_Pseudo <string opName,
|
|||
: MTBUF_Pseudo<opName,
|
||||
(outs),
|
||||
getMTBUFIns<addrKindCopy, [vdataClassCopy]>.ret,
|
||||
" $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
|
||||
" $vdata, " # getMTBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz",
|
||||
pattern>,
|
||||
MTBUF_SetupAddr<addrKindCopy> {
|
||||
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
|
||||
let mayLoad = 0;
|
||||
let mayStore = 1;
|
||||
let elements = elems;
|
||||
}
|
||||
|
||||
multiclass MTBUF_Pseudo_Stores<string opName, RegisterClass vdataClass,
|
||||
ValueType store_vt = i32,
|
||||
int elems, ValueType store_vt = i32,
|
||||
SDPatternOperator st = null_frag> {
|
||||
|
||||
def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass,
|
||||
def _OFFSET : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems,
|
||||
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
|
||||
i16:$offset, i8:$format, i1:$glc,
|
||||
i1:$slc, i1:$tfe, i1:$dlc))]>,
|
||||
i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>,
|
||||
MTBUFAddr64Table<0, NAME>;
|
||||
|
||||
def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass,
|
||||
def _ADDR64 : MTBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, vdataClass, elems,
|
||||
[(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
||||
i16:$offset, i8:$format, i1:$glc,
|
||||
i1:$slc, i1:$tfe, i1:$dlc))]>,
|
||||
i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>,
|
||||
MTBUFAddr64Table<1, NAME>;
|
||||
|
||||
def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
|
||||
def _IDXEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
|
||||
def _BOTHEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
|
||||
def _OFFEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>;
|
||||
def _IDXEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>;
|
||||
def _BOTHEN : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>;
|
||||
|
||||
let DisableWQM = 1 in {
|
||||
def _OFFSET_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass>;
|
||||
def _OFFEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass>;
|
||||
def _IDXEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass>;
|
||||
def _BOTHEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass>;
|
||||
def _OFFSET_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.Offset, vdataClass, elems>;
|
||||
def _OFFEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, vdataClass, elems>;
|
||||
def _IDXEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.IdxEn, vdataClass, elems>;
|
||||
def _BOTHEN_exact : MTBUF_Store_Pseudo <opName, BUFAddrKind.BothEn, vdataClass, elems>;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -393,7 +412,7 @@ class getMUBUFInsDA<list<RegisterClass> vdataList,
|
|||
);
|
||||
dag ret = !con(
|
||||
!if(!empty(vdataList), InsNoData, InsData),
|
||||
!if(isLds, (ins DLC:$dlc), (ins TFE:$tfe, DLC:$dlc))
|
||||
!if(isLds, (ins DLC:$dlc, SWZ:$swz), (ins TFE:$tfe, DLC:$dlc,SWZ:$swz))
|
||||
);
|
||||
}
|
||||
|
||||
|
|
@ -465,7 +484,7 @@ class MUBUF_Load_Pseudo <string opName,
|
|||
!con(getMUBUFIns<addrKindCopy, [], isLds>.ret,
|
||||
!if(HasTiedDest, (ins getVregSrcForVT<vdata_vt>.ret:$vdata_in), (ins))),
|
||||
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc" #
|
||||
!if(isLds, " lds", "$tfe") # "$dlc",
|
||||
!if(isLds, " lds", "$tfe") # "$dlc" # "$swz",
|
||||
pattern>,
|
||||
MUBUF_SetupAddr<addrKindCopy> {
|
||||
let PseudoInstr = opName # !if(isLds, "_lds", "") #
|
||||
|
|
@ -483,15 +502,15 @@ class MUBUF_Load_Pseudo <string opName,
|
|||
}
|
||||
|
||||
class MUBUF_Offset_Load_Pat <Instruction inst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> : Pat <
|
||||
(load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
|
||||
(load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))
|
||||
(load_vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
|
||||
(load_vt (inst v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))
|
||||
>;
|
||||
|
||||
class MUBUF_Addr64_Load_Pat <Instruction inst,
|
||||
ValueType load_vt = i32,
|
||||
SDPatternOperator ld = null_frag> : Pat <
|
||||
(load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
|
||||
(load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))
|
||||
(load_vt (ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
|
||||
(load_vt (inst i64:$vaddr, v4i32:$srsrc, i32:$soffset, i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))
|
||||
>;
|
||||
|
||||
multiclass MUBUF_Pseudo_Load_Pats<string BaseInst, ValueType load_vt = i32, SDPatternOperator ld = null_frag> {
|
||||
|
|
@ -542,7 +561,7 @@ class MUBUF_Store_Pseudo <string opName,
|
|||
: MUBUF_Pseudo<opName,
|
||||
(outs),
|
||||
getMUBUFIns<addrKindCopy, [getVregSrcForVT<store_vt>.ret]>.ret,
|
||||
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc",
|
||||
" $vdata, " # getMUBUFAsmOps<addrKindCopy>.ret # "$glc$slc$tfe$dlc$swz",
|
||||
pattern>,
|
||||
MUBUF_SetupAddr<addrKindCopy> {
|
||||
let PseudoInstr = opName # "_" # getAddrName<addrKindCopy>.ret;
|
||||
|
|
@ -558,12 +577,12 @@ multiclass MUBUF_Pseudo_Stores<string opName,
|
|||
|
||||
def _OFFSET : MUBUF_Store_Pseudo <opName, BUFAddrKind.Offset, store_vt,
|
||||
[(st store_vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
|
||||
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>,
|
||||
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>,
|
||||
MUBUFAddr64Table<0, NAME>;
|
||||
|
||||
def _ADDR64 : MUBUF_Store_Pseudo <opName, BUFAddrKind.Addr64, store_vt,
|
||||
[(st store_vt:$vdata, (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
||||
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))]>,
|
||||
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))]>,
|
||||
MUBUFAddr64Table<1, NAME>;
|
||||
|
||||
def _OFFEN : MUBUF_Store_Pseudo <opName, BUFAddrKind.OffEn, store_vt>;
|
||||
|
|
@ -581,8 +600,8 @@ multiclass MUBUF_Pseudo_Stores<string opName,
|
|||
class MUBUF_Pseudo_Store_Lds<string opName>
|
||||
: MUBUF_Pseudo<opName,
|
||||
(outs),
|
||||
(ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc),
|
||||
" $srsrc, $soffset$offset lds$glc$slc"> {
|
||||
(ins SReg_128:$srsrc, SCSrc_b32:$soffset, offset:$offset, GLC:$glc, SLC:$slc, SWZ:$swz),
|
||||
" $srsrc, $soffset$offset lds$glc$slc$swz"> {
|
||||
let mayLoad = 0;
|
||||
let mayStore = 1;
|
||||
let maybeAtomic = 1;
|
||||
|
|
@ -1065,35 +1084,35 @@ defm BUFFER_ATOMIC_PK_ADD_F16 : MUBUF_Pseudo_Atomics_NO_RTN <
|
|||
// MTBUF Instructions
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32>;
|
||||
defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64>;
|
||||
defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_96>;
|
||||
defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128>;
|
||||
defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32>;
|
||||
defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64>;
|
||||
defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_96>;
|
||||
defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128>;
|
||||
defm TBUFFER_LOAD_FORMAT_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_x", VGPR_32, 1>;
|
||||
defm TBUFFER_LOAD_FORMAT_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_xy", VReg_64, 2>;
|
||||
defm TBUFFER_LOAD_FORMAT_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyz", VReg_96, 3>;
|
||||
defm TBUFFER_LOAD_FORMAT_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_xyzw", VReg_128, 4>;
|
||||
defm TBUFFER_STORE_FORMAT_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_x", VGPR_32, 1>;
|
||||
defm TBUFFER_STORE_FORMAT_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_xy", VReg_64, 2>;
|
||||
defm TBUFFER_STORE_FORMAT_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyz", VReg_96, 3>;
|
||||
defm TBUFFER_STORE_FORMAT_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_xyzw", VReg_128, 4>;
|
||||
|
||||
let SubtargetPredicate = HasUnpackedD16VMem, D16Buf = 1 in {
|
||||
defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>;
|
||||
defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64>;
|
||||
defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96>;
|
||||
defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_128>;
|
||||
defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32>;
|
||||
defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VReg_64>;
|
||||
defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_96>;
|
||||
defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128>;
|
||||
defm TBUFFER_LOAD_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32, 1>;
|
||||
defm TBUFFER_LOAD_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VReg_64, 2>;
|
||||
defm TBUFFER_LOAD_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_96, 3>;
|
||||
defm TBUFFER_LOAD_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_128, 4>;
|
||||
defm TBUFFER_STORE_FORMAT_D16_X_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32, 1>;
|
||||
defm TBUFFER_STORE_FORMAT_D16_XY_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VReg_64, 2>;
|
||||
defm TBUFFER_STORE_FORMAT_D16_XYZ_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_96, 3>;
|
||||
defm TBUFFER_STORE_FORMAT_D16_XYZW_gfx80 : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_128, 4>;
|
||||
} // End HasUnpackedD16VMem.
|
||||
|
||||
let SubtargetPredicate = HasPackedD16VMem, D16Buf = 1 in {
|
||||
defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32>;
|
||||
defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32>;
|
||||
defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64>;
|
||||
defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_64>;
|
||||
defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32>;
|
||||
defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VGPR_32>;
|
||||
defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_64>;
|
||||
defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64>;
|
||||
defm TBUFFER_LOAD_FORMAT_D16_X : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_x", VGPR_32, 1>;
|
||||
defm TBUFFER_LOAD_FORMAT_D16_XY : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xy", VGPR_32, 2>;
|
||||
defm TBUFFER_LOAD_FORMAT_D16_XYZ : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyz", VReg_64, 3>;
|
||||
defm TBUFFER_LOAD_FORMAT_D16_XYZW : MTBUF_Pseudo_Loads <"tbuffer_load_format_d16_xyzw", VReg_64, 4>;
|
||||
defm TBUFFER_STORE_FORMAT_D16_X : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_x", VGPR_32, 1>;
|
||||
defm TBUFFER_STORE_FORMAT_D16_XY : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xy", VGPR_32, 2>;
|
||||
defm TBUFFER_STORE_FORMAT_D16_XYZ : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyz", VReg_64, 3>;
|
||||
defm TBUFFER_STORE_FORMAT_D16_XYZW : MTBUF_Pseudo_Stores <"tbuffer_store_format_d16_xyzw", VReg_64, 4>;
|
||||
} // End HasPackedD16VMem.
|
||||
|
||||
let SubtargetPredicate = isGFX7Plus in {
|
||||
|
|
@ -1128,6 +1147,10 @@ def extract_dlc : SDNodeXForm<imm, [{
|
|||
return CurDAG->getTargetConstant((N->getZExtValue() >> 2) & 1, SDLoc(N), MVT::i8);
|
||||
}]>;
|
||||
|
||||
def extract_swz : SDNodeXForm<imm, [{
|
||||
return CurDAG->getTargetConstant((N->getZExtValue() >> 3) & 1, SDLoc(N), MVT::i8);
|
||||
}]>;
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// buffer_load/store_format patterns
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
|
@ -1136,32 +1159,36 @@ multiclass MUBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
string opcode> {
|
||||
def : GCNPat<
|
||||
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, 0)),
|
||||
timm:$auxiliary, 0)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, 0)),
|
||||
timm:$auxiliary, 0)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, timm)),
|
||||
timm:$auxiliary, timm)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, timm)),
|
||||
timm:$auxiliary, timm)),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN)
|
||||
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
||||
$rsrc, $soffset, (as_i16imm $offset),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary))
|
||||
>;
|
||||
}
|
||||
|
||||
|
|
@ -1211,35 +1238,39 @@ multiclass MUBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
string opcode> {
|
||||
def : GCNPat<
|
||||
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, 0),
|
||||
timm:$auxiliary, 0),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset, (as_i16imm $offset),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, 0),
|
||||
timm:$auxiliary, 0),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (extract_glc $cachepolicy),
|
||||
(extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
(as_i16imm $offset), (extract_glc $auxiliary),
|
||||
(extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, timm),
|
||||
timm:$auxiliary, timm),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (extract_glc $cachepolicy),
|
||||
(extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
(as_i16imm $offset), (extract_glc $auxiliary),
|
||||
(extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$cachepolicy, timm),
|
||||
timm:$auxiliary, timm),
|
||||
(!cast<MUBUF_Pseudo>(opcode # _BOTHEN_exact)
|
||||
$vdata,
|
||||
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
||||
$rsrc, $soffset, (as_i16imm $offset), (extract_glc $cachepolicy),
|
||||
(extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
$rsrc, $soffset, (as_i16imm $offset), (extract_glc $auxiliary),
|
||||
(extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary))
|
||||
>;
|
||||
}
|
||||
|
||||
|
|
@ -1441,8 +1472,8 @@ def : GCNPat<
|
|||
class MUBUFLoad_PatternADDR64 <MUBUF_Pseudo Instr_ADDR64, ValueType vt,
|
||||
PatFrag constant_ld> : GCNPat <
|
||||
(vt (constant_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
||||
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
|
||||
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
|
||||
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
|
||||
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz)
|
||||
>;
|
||||
|
||||
multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
|
||||
|
|
@ -1450,12 +1481,12 @@ multiclass MUBUFLoad_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Ins
|
|||
def : GCNPat <
|
||||
(vt (atomic_ld (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
||||
i16:$offset, i1:$slc))),
|
||||
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0)
|
||||
(Instr_ADDR64 $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(vt (atomic_ld (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset))),
|
||||
(Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0)
|
||||
(Instr_OFFSET $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0)
|
||||
>;
|
||||
}
|
||||
|
||||
|
|
@ -1476,8 +1507,8 @@ multiclass MUBUFLoad_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
|
|||
|
||||
def : GCNPat <
|
||||
(vt (ld (MUBUFOffset v4i32:$srsrc, i32:$soffset,
|
||||
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc))),
|
||||
(Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
|
||||
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz))),
|
||||
(Instr_OFFSET $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz)
|
||||
>;
|
||||
}
|
||||
|
||||
|
|
@ -1500,12 +1531,12 @@ multiclass MUBUFScratchLoadPat <MUBUF_Pseudo InstrOffen,
|
|||
def : GCNPat <
|
||||
(vt (ld (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
|
||||
i32:$soffset, u16imm:$offset))),
|
||||
(InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
|
||||
(InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(vt (ld (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset))),
|
||||
(InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0)
|
||||
(InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0)
|
||||
>;
|
||||
}
|
||||
|
||||
|
|
@ -1515,12 +1546,12 @@ multiclass MUBUFScratchLoadPat_D16 <MUBUF_Pseudo InstrOffen,
|
|||
ValueType vt, PatFrag ld_frag> {
|
||||
def : GCNPat <
|
||||
(ld_frag (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr, i32:$soffset, u16imm:$offset), vt:$in),
|
||||
(InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, $in)
|
||||
(InstrOffen $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(ld_frag (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset, u16imm:$offset), vt:$in),
|
||||
(InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, $in)
|
||||
(InstrOffset $srsrc, $soffset, $offset, 0, 0, 0, 0, 0, $in)
|
||||
>;
|
||||
}
|
||||
|
||||
|
|
@ -1560,16 +1591,16 @@ defm : MUBUFScratchLoadPat_D16<BUFFER_LOAD_SBYTE_D16_OFFEN, BUFFER_LOAD_SBYTE_D1
|
|||
|
||||
multiclass MUBUFStore_Atomic_Pattern <MUBUF_Pseudo Instr_ADDR64, MUBUF_Pseudo Instr_OFFSET,
|
||||
ValueType vt, PatFrag atomic_st> {
|
||||
// Store follows atomic op convention so address is forst
|
||||
// Store follows atomic op convention so address is first
|
||||
def : GCNPat <
|
||||
(atomic_st (MUBUFAddr64 v4i32:$srsrc, i64:$vaddr, i32:$soffset,
|
||||
i16:$offset, i1:$slc), vt:$val),
|
||||
(Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0)
|
||||
(Instr_ADDR64 $val, $vaddr, $srsrc, $soffset, $offset, 0, $slc, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(atomic_st (MUBUFOffsetNoGLC v4i32:$rsrc, i32:$soffset, i16:$offset), vt:$val),
|
||||
(Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0)
|
||||
(Instr_OFFSET $val, $rsrc, $soffset, (as_i16imm $offset), 0, 0, 0, 0, 0)
|
||||
>;
|
||||
}
|
||||
let SubtargetPredicate = isGFX6GFX7 in {
|
||||
|
|
@ -1583,8 +1614,8 @@ multiclass MUBUFStore_Pattern <MUBUF_Pseudo Instr_OFFSET, ValueType vt,
|
|||
|
||||
def : GCNPat <
|
||||
(st vt:$vdata, (MUBUFOffset v4i32:$srsrc, i32:$soffset,
|
||||
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc)),
|
||||
(Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc)
|
||||
i16:$offset, i1:$glc, i1:$slc, i1:$tfe, i1:$dlc, i1:$swz)),
|
||||
(Instr_OFFSET $vdata, $srsrc, $soffset, $offset, $glc, $slc, $tfe, $dlc, $swz)
|
||||
>;
|
||||
}
|
||||
|
||||
|
|
@ -1598,13 +1629,13 @@ multiclass MUBUFScratchStorePat <MUBUF_Pseudo InstrOffen,
|
|||
def : GCNPat <
|
||||
(st vt:$value, (MUBUFScratchOffen v4i32:$srsrc, i32:$vaddr,
|
||||
i32:$soffset, u16imm:$offset)),
|
||||
(InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0)
|
||||
(InstrOffen rc:$value, $vaddr, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0)
|
||||
>;
|
||||
|
||||
def : GCNPat <
|
||||
(st vt:$value, (MUBUFScratchOffset v4i32:$srsrc, i32:$soffset,
|
||||
u16imm:$offset)),
|
||||
(InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0)
|
||||
(InstrOffset rc:$value, $srsrc, $soffset, $offset, 0, 0, 0, 0, 0)
|
||||
>;
|
||||
}
|
||||
|
||||
|
|
@ -1643,36 +1674,40 @@ multiclass MTBUF_LoadIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
string opcode> {
|
||||
def : GCNPat<
|
||||
(vt (name v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
|
||||
timm:$format, timm:$cachepolicy, 0)),
|
||||
timm:$format, timm:$auxiliary, 0)),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _OFFSET) $rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i8imm $format),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (name v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
|
||||
timm:$format, timm:$cachepolicy, timm)),
|
||||
timm:$format, timm:$auxiliary, timm)),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _IDXEN) $vindex, $rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i8imm $format),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (name v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$format, timm:$cachepolicy, 0)),
|
||||
timm:$format, timm:$auxiliary, 0)),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _OFFEN) $voffset, $rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i8imm $format),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(vt (name v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$format, timm:$cachepolicy, timm)),
|
||||
timm:$format, timm:$auxiliary, timm)),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN)
|
||||
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
||||
$rsrc, $soffset, (as_i16imm $offset),
|
||||
(as_i8imm $format),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary))
|
||||
>;
|
||||
}
|
||||
|
||||
|
|
@ -1701,36 +1736,40 @@ multiclass MTBUF_StoreIntrinsicPat<SDPatternOperator name, ValueType vt,
|
|||
string opcode> {
|
||||
def : GCNPat<
|
||||
(name vt:$vdata, v4i32:$rsrc, 0, 0, i32:$soffset, timm:$offset,
|
||||
timm:$format, timm:$cachepolicy, 0),
|
||||
timm:$format, timm:$auxiliary, 0),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _OFFSET_exact) $vdata, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (as_i8imm $format),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, 0, i32:$soffset, timm:$offset,
|
||||
timm:$format, timm:$cachepolicy, timm),
|
||||
timm:$format, timm:$auxiliary, timm),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _IDXEN_exact) $vdata, $vindex, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (as_i8imm $format),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name vt:$vdata, v4i32:$rsrc, 0, i32:$voffset, i32:$soffset, timm:$offset,
|
||||
timm:$format, timm:$cachepolicy, 0),
|
||||
timm:$format, timm:$auxiliary, 0),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _OFFEN_exact) $vdata, $voffset, $rsrc, $soffset,
|
||||
(as_i16imm $offset), (as_i8imm $format),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary))
|
||||
>;
|
||||
|
||||
def : GCNPat<
|
||||
(name vt:$vdata, v4i32:$rsrc, i32:$vindex, i32:$voffset, i32:$soffset,
|
||||
timm:$offset, timm:$format, timm:$cachepolicy, timm),
|
||||
timm:$offset, timm:$format, timm:$auxiliary, timm),
|
||||
(!cast<MTBUF_Pseudo>(opcode # _BOTHEN_exact)
|
||||
$vdata,
|
||||
(REG_SEQUENCE VReg_64, $vindex, sub0, $voffset, sub1),
|
||||
$rsrc, $soffset, (as_i16imm $offset), (as_i8imm $format),
|
||||
(extract_glc $cachepolicy), (extract_slc $cachepolicy), 0, (extract_dlc $cachepolicy))
|
||||
(extract_glc $auxiliary), (extract_slc $auxiliary), 0, (extract_dlc $auxiliary),
|
||||
(extract_swz $auxiliary))
|
||||
>;
|
||||
}
|
||||
|
||||
|
|
@ -2397,3 +2436,22 @@ def getMUBUFInfoFromBaseOpcodeAndElements : SearchIndex {
|
|||
let Table = MUBUFInfoTable;
|
||||
let Key = ["BaseOpcode", "elements"];
|
||||
}
|
||||
|
||||
def MTBUFInfoTable : GenericTable {
|
||||
let FilterClass = "MTBUF_Pseudo";
|
||||
let CppTypeName = "MTBUFInfo";
|
||||
let Fields = ["Opcode", "BaseOpcode", "elements", "has_vaddr", "has_srsrc", "has_soffset"];
|
||||
|
||||
let PrimaryKey = ["Opcode"];
|
||||
let PrimaryKeyName = "getMTBUFOpcodeHelper";
|
||||
}
|
||||
|
||||
def getMTBUFInfoFromOpcode : SearchIndex {
|
||||
let Table = MTBUFInfoTable;
|
||||
let Key = ["Opcode"];
|
||||
}
|
||||
|
||||
def getMTBUFInfoFromBaseOpcodeAndElements : SearchIndex {
|
||||
let Table = MTBUFInfoTable;
|
||||
let Key = ["BaseOpcode", "elements"];
|
||||
}
|
||||
|
|
|
|||
|
|
@ -196,6 +196,10 @@ void AMDGPUInstPrinter::printSLC(const MCInst *MI, unsigned OpNo,
|
|||
printNamedBit(MI, OpNo, O, "slc");
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printSWZ(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
}
|
||||
|
||||
void AMDGPUInstPrinter::printTFE(const MCInst *MI, unsigned OpNo,
|
||||
const MCSubtargetInfo &STI, raw_ostream &O) {
|
||||
printNamedBit(MI, OpNo, O, "tfe");
|
||||
|
|
|
|||
|
|
@ -72,6 +72,8 @@ private:
|
|||
raw_ostream &O);
|
||||
void printSLC(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
|
||||
raw_ostream &O);
|
||||
void printSWZ(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
|
||||
raw_ostream &O);
|
||||
void printTFE(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
|
||||
raw_ostream &O);
|
||||
void printDMask(const MCInst *MI, unsigned OpNo, const MCSubtargetInfo &STI,
|
||||
|
|
|
|||
|
|
@ -112,6 +112,7 @@ static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
|
|||
.addImm(0) // slc
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // swz
|
||||
.addMemOperand(MMO);
|
||||
return;
|
||||
}
|
||||
|
|
@ -132,6 +133,7 @@ static void buildPrologSpill(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
|
|||
.addImm(0) // slc
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // swz
|
||||
.addMemOperand(MMO);
|
||||
}
|
||||
|
||||
|
|
@ -157,6 +159,7 @@ static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
|
|||
.addImm(0) // slc
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // swz
|
||||
.addMemOperand(MMO);
|
||||
return;
|
||||
}
|
||||
|
|
@ -177,6 +180,7 @@ static void buildEpilogReload(LivePhysRegs &LiveRegs, MachineBasicBlock &MBB,
|
|||
.addImm(0) // slc
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // swz
|
||||
.addMemOperand(MMO);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6271,7 +6271,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
Offsets.first, // voffset
|
||||
Op.getOperand(4), // soffset
|
||||
Offsets.second, // offset
|
||||
Op.getOperand(5), // cachepolicy
|
||||
Op.getOperand(5), // cachepolicy, swizzled buffer
|
||||
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
|
||||
};
|
||||
|
||||
|
|
@ -6289,7 +6289,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
Offsets.first, // voffset
|
||||
Op.getOperand(5), // soffset
|
||||
Offsets.second, // offset
|
||||
Op.getOperand(6), // cachepolicy
|
||||
Op.getOperand(6), // cachepolicy, swizzled buffer
|
||||
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
|
||||
};
|
||||
|
||||
|
|
@ -6338,7 +6338,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
Op.getOperand(4), // soffset
|
||||
Offsets.second, // offset
|
||||
Op.getOperand(5), // format
|
||||
Op.getOperand(6), // cachepolicy
|
||||
Op.getOperand(6), // cachepolicy, swizzled buffer
|
||||
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
|
||||
};
|
||||
|
||||
|
|
@ -6362,7 +6362,7 @@ SDValue SITargetLowering::LowerINTRINSIC_W_CHAIN(SDValue Op,
|
|||
Op.getOperand(5), // soffset
|
||||
Offsets.second, // offset
|
||||
Op.getOperand(6), // format
|
||||
Op.getOperand(7), // cachepolicy
|
||||
Op.getOperand(7), // cachepolicy, swizzled buffer
|
||||
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
|
||||
};
|
||||
|
||||
|
|
@ -6832,7 +6832,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
|||
Op.getOperand(6), // soffset
|
||||
Offsets.second, // offset
|
||||
Op.getOperand(7), // format
|
||||
Op.getOperand(8), // cachepolicy
|
||||
Op.getOperand(8), // cachepolicy, swizzled buffer
|
||||
DAG.getTargetConstant(1, DL, MVT::i1), // idexen
|
||||
};
|
||||
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
|
||||
|
|
@ -6857,7 +6857,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
|||
Op.getOperand(5), // soffset
|
||||
Offsets.second, // offset
|
||||
Op.getOperand(6), // format
|
||||
Op.getOperand(7), // cachepolicy
|
||||
Op.getOperand(7), // cachepolicy, swizzled buffer
|
||||
DAG.getTargetConstant(0, DL, MVT::i1), // idexen
|
||||
};
|
||||
unsigned Opc = IsD16 ? AMDGPUISD::TBUFFER_STORE_FORMAT_D16 :
|
||||
|
|
@ -6931,7 +6931,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
|||
Offsets.first, // voffset
|
||||
Op.getOperand(5), // soffset
|
||||
Offsets.second, // offset
|
||||
Op.getOperand(6), // cachepolicy
|
||||
Op.getOperand(6), // cachepolicy, swizzled buffer
|
||||
DAG.getTargetConstant(0, DL, MVT::i1), // idxen
|
||||
};
|
||||
unsigned Opc =
|
||||
|
|
@ -6975,7 +6975,7 @@ SDValue SITargetLowering::LowerINTRINSIC_VOID(SDValue Op,
|
|||
Offsets.first, // voffset
|
||||
Op.getOperand(6), // soffset
|
||||
Offsets.second, // offset
|
||||
Op.getOperand(7), // cachepolicy
|
||||
Op.getOperand(7), // cachepolicy, swizzled buffer
|
||||
DAG.getTargetConstant(1, DL, MVT::i1), // idxen
|
||||
};
|
||||
unsigned Opc = IntrinsicID == Intrinsic::amdgcn_struct_buffer_store ?
|
||||
|
|
|
|||
|
|
@ -4693,6 +4693,8 @@ void SIInstrInfo::legalizeOperands(MachineInstr &MI,
|
|||
MIB.addImm(TFE->getImm());
|
||||
}
|
||||
|
||||
MIB.addImm(getNamedImmOperand(MI, AMDGPU::OpName::swz));
|
||||
|
||||
MIB.cloneMemRefs(MI);
|
||||
Addr64 = MIB;
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ def SDTtbuffer_load : SDTypeProfile<1, 8,
|
|||
SDTCisVT<4, i32>, // soffset(SGPR)
|
||||
SDTCisVT<5, i32>, // offset(imm)
|
||||
SDTCisVT<6, i32>, // format(imm)
|
||||
SDTCisVT<7, i32>, // cachecontrol(imm)
|
||||
SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm)
|
||||
SDTCisVT<8, i1> // idxen(imm)
|
||||
]>;
|
||||
|
||||
|
|
@ -102,7 +102,7 @@ def SDTtbuffer_store : SDTypeProfile<0, 9,
|
|||
SDTCisVT<4, i32>, // soffset(SGPR)
|
||||
SDTCisVT<5, i32>, // offset(imm)
|
||||
SDTCisVT<6, i32>, // format(imm)
|
||||
SDTCisVT<7, i32>, // cachecontrol(imm)
|
||||
SDTCisVT<7, i32>, // cachepolicy, swizzled buffer(imm)
|
||||
SDTCisVT<8, i1> // idxen(imm)
|
||||
]>;
|
||||
|
||||
|
|
@ -119,7 +119,7 @@ def SDTBufferLoad : SDTypeProfile<1, 7,
|
|||
SDTCisVT<3, i32>, // voffset(VGPR)
|
||||
SDTCisVT<4, i32>, // soffset(SGPR)
|
||||
SDTCisVT<5, i32>, // offset(imm)
|
||||
SDTCisVT<6, i32>, // cachepolicy(imm)
|
||||
SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm)
|
||||
SDTCisVT<7, i1>]>; // idxen(imm)
|
||||
|
||||
def SIbuffer_load : SDNode <"AMDGPUISD::BUFFER_LOAD", SDTBufferLoad,
|
||||
|
|
@ -145,7 +145,7 @@ def SDTBufferStore : SDTypeProfile<0, 8,
|
|||
SDTCisVT<3, i32>, // voffset(VGPR)
|
||||
SDTCisVT<4, i32>, // soffset(SGPR)
|
||||
SDTCisVT<5, i32>, // offset(imm)
|
||||
SDTCisVT<6, i32>, // cachepolicy(imm)
|
||||
SDTCisVT<6, i32>, // cachepolicy, swizzled buffer(imm)
|
||||
SDTCisVT<7, i1>]>; // idxen(imm)
|
||||
|
||||
def SIbuffer_store : SDNode <"AMDGPUISD::BUFFER_STORE", SDTBufferStore,
|
||||
|
|
@ -1035,6 +1035,7 @@ def DLC : NamedOperandBit<"DLC", NamedMatchClass<"DLC">>;
|
|||
def GLC : NamedOperandBit<"GLC", NamedMatchClass<"GLC">>;
|
||||
def SLC : NamedOperandBit<"SLC", NamedMatchClass<"SLC">>;
|
||||
def TFE : NamedOperandBit<"TFE", NamedMatchClass<"TFE">>;
|
||||
def SWZ : NamedOperandBit<"SWZ", NamedMatchClass<"SWZ">>;
|
||||
def UNorm : NamedOperandBit<"UNorm", NamedMatchClass<"UNorm">>;
|
||||
def DA : NamedOperandBit<"DA", NamedMatchClass<"DA">>;
|
||||
def R128A16 : NamedOperandBit<"R128A16", NamedMatchClass<"R128A16">>;
|
||||
|
|
|
|||
|
|
@ -640,6 +640,12 @@ bool SILoadStoreOptimizer::findMatchingInst(CombineInfo &CI) {
|
|||
return false;
|
||||
}
|
||||
|
||||
// Do not merge VMEM buffer instructions with "swizzled" bit set.
|
||||
int Swizzled =
|
||||
AMDGPU::getNamedOperandIdx(CI.I->getOpcode(), AMDGPU::OpName::swz);
|
||||
if (Swizzled != -1 && CI.I->getOperand(Swizzled).getImm())
|
||||
return false;
|
||||
|
||||
for (unsigned i = 0; i < CI.NumAddresses; i++) {
|
||||
// We only ever merge operations with the same base address register, so
|
||||
// don't bother scanning forward if there are no other uses.
|
||||
|
|
@ -998,6 +1004,7 @@ SILoadStoreOptimizer::mergeBufferLoadPair(CombineInfo &CI) {
|
|||
.addImm(CI.SLC0) // slc
|
||||
.addImm(0) // tfe
|
||||
.addImm(CI.DLC0) // dlc
|
||||
.addImm(0) // swz
|
||||
.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
|
||||
|
||||
std::pair<unsigned, unsigned> SubRegIdx = getSubRegIdxs(CI);
|
||||
|
|
@ -1191,6 +1198,7 @@ SILoadStoreOptimizer::mergeBufferStorePair(CombineInfo &CI) {
|
|||
.addImm(CI.SLC0) // slc
|
||||
.addImm(0) // tfe
|
||||
.addImm(CI.DLC0) // dlc
|
||||
.addImm(0) // swz
|
||||
.addMemOperand(combineKnownAdjacentMMOs(*MBB->getParent(), MMOa, MMOb));
|
||||
|
||||
moveInstsAfter(MIB, CI.InstsToMove);
|
||||
|
|
|
|||
|
|
@ -617,6 +617,7 @@ static bool buildMUBUFOffsetLoadStore(const SIInstrInfo *TII,
|
|||
.addImm(0) // slc
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // swz
|
||||
.cloneMemRefs(*MI);
|
||||
|
||||
const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
|
||||
|
|
@ -737,6 +738,7 @@ void SIRegisterInfo::buildSpillLoadStore(MachineBasicBlock::iterator MI,
|
|||
.addImm(0) // slc
|
||||
.addImm(0) // tfe
|
||||
.addImm(0) // dlc
|
||||
.addImm(0) // swz
|
||||
.addMemOperand(NewMMO);
|
||||
|
||||
if (!IsStore && TmpReg != AMDGPU::NoRegister)
|
||||
|
|
|
|||
|
|
@ -137,10 +137,51 @@ struct MUBUFInfo {
|
|||
bool has_soffset;
|
||||
};
|
||||
|
||||
struct MTBUFInfo {
|
||||
uint16_t Opcode;
|
||||
uint16_t BaseOpcode;
|
||||
uint8_t elements;
|
||||
bool has_vaddr;
|
||||
bool has_srsrc;
|
||||
bool has_soffset;
|
||||
};
|
||||
|
||||
#define GET_MTBUFInfoTable_DECL
|
||||
#define GET_MTBUFInfoTable_IMPL
|
||||
#define GET_MUBUFInfoTable_DECL
|
||||
#define GET_MUBUFInfoTable_IMPL
|
||||
#include "AMDGPUGenSearchableTables.inc"
|
||||
|
||||
int getMTBUFBaseOpcode(unsigned Opc) {
|
||||
const MTBUFInfo *Info = getMTBUFInfoFromOpcode(Opc);
|
||||
return Info ? Info->BaseOpcode : -1;
|
||||
}
|
||||
|
||||
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements) {
|
||||
const MTBUFInfo *Info = getMTBUFInfoFromBaseOpcodeAndElements(BaseOpc, Elements);
|
||||
return Info ? Info->Opcode : -1;
|
||||
}
|
||||
|
||||
int getMTBUFElements(unsigned Opc) {
|
||||
const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
|
||||
return Info ? Info->elements : 0;
|
||||
}
|
||||
|
||||
bool getMTBUFHasVAddr(unsigned Opc) {
|
||||
const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
|
||||
return Info ? Info->has_vaddr : false;
|
||||
}
|
||||
|
||||
bool getMTBUFHasSrsrc(unsigned Opc) {
|
||||
const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
|
||||
return Info ? Info->has_srsrc : false;
|
||||
}
|
||||
|
||||
bool getMTBUFHasSoffset(unsigned Opc) {
|
||||
const MTBUFInfo *Info = getMTBUFOpcodeHelper(Opc);
|
||||
return Info ? Info->has_soffset : false;
|
||||
}
|
||||
|
||||
int getMUBUFBaseOpcode(unsigned Opc) {
|
||||
const MUBUFInfo *Info = getMUBUFInfoFromOpcode(Opc);
|
||||
return Info ? Info->BaseOpcode : -1;
|
||||
|
|
|
|||
|
|
@ -263,6 +263,24 @@ struct MIMGInfo {
|
|||
LLVM_READONLY
|
||||
const MIMGInfo *getMIMGInfo(unsigned Opc);
|
||||
|
||||
LLVM_READONLY
|
||||
int getMTBUFBaseOpcode(unsigned Opc);
|
||||
|
||||
LLVM_READONLY
|
||||
int getMTBUFOpcode(unsigned BaseOpc, unsigned Elements);
|
||||
|
||||
LLVM_READONLY
|
||||
int getMTBUFElements(unsigned Opc);
|
||||
|
||||
LLVM_READONLY
|
||||
bool getMTBUFHasVAddr(unsigned Opc);
|
||||
|
||||
LLVM_READONLY
|
||||
bool getMTBUFHasSrsrc(unsigned Opc);
|
||||
|
||||
LLVM_READONLY
|
||||
bool getMTBUFHasSoffset(unsigned Opc);
|
||||
|
||||
LLVM_READONLY
|
||||
int getMUBUFBaseOpcode(unsigned Opc);
|
||||
|
||||
|
|
|
|||
|
|
@ -20,12 +20,12 @@ body: |
|
|||
; GFX6-LABEL: name: load_private_s32_from_4
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_4
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
|
||||
|
|
@ -51,12 +51,12 @@ body: |
|
|||
; GFX6-LABEL: name: load_private_s32_from_2
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_2
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_USHORT_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_USHORT_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 2, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_USHORT_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 2, align 2, addrspace 5)
|
||||
|
|
@ -82,12 +82,12 @@ body: |
|
|||
; GFX6-LABEL: name: load_private_s32_from_1
|
||||
; GFX6: liveins: $vgpr0
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5)
|
||||
|
|
@ -208,12 +208,12 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2047, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_2047
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 2047, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 2047
|
||||
|
|
@ -243,14 +243,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
|
||||
; GFX6: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 2047, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_2047_known_bits
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
|
||||
; GFX9: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 2047, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_AND_B32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 2047, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 2147483647
|
||||
|
|
@ -283,12 +283,12 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2048, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_2048
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 2048, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 2048, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 2048
|
||||
|
|
@ -318,14 +318,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_m2047
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965249, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -2047
|
||||
|
|
@ -355,14 +355,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_m2048
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294965248, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -2048
|
||||
|
|
@ -392,12 +392,12 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_4095
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[COPY]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4095
|
||||
|
|
@ -427,14 +427,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_4096
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4096
|
||||
|
|
@ -464,14 +464,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_m4095
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963201, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -4095
|
||||
|
|
@ -501,14 +501,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_m4096
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294963200, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -4096
|
||||
|
|
@ -538,14 +538,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_8191
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8191, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 8191
|
||||
|
|
@ -575,14 +575,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_8192
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 8192, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 8192
|
||||
|
|
@ -612,14 +612,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_m8191
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959105, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -8191
|
||||
|
|
@ -649,14 +649,14 @@ body: |
|
|||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_gep_m8192
|
||||
; GFX9: liveins: $vgpr0
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4294959104, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[COPY]], [[V_MOV_B32_e32_]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 -8192
|
||||
|
|
@ -681,10 +681,10 @@ body: |
|
|||
bb.0:
|
||||
|
||||
; GFX6-LABEL: name: load_private_s32_from_4_constant_0
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_4_constant_0
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
|
||||
%0:vgpr(p5) = G_CONSTANT i32 0
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
|
||||
|
|
@ -707,10 +707,10 @@ body: |
|
|||
bb.0:
|
||||
|
||||
; GFX6-LABEL: name: load_private_s32_from_4_constant_sgpr_16
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_4_constant_sgpr_16
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 16, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFSET]]
|
||||
%0:sgpr(p5) = G_CONSTANT i32 16
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
|
||||
|
|
@ -733,10 +733,10 @@ body: |
|
|||
bb.0:
|
||||
|
||||
; GFX6-LABEL: name: load_private_s32_from_1_constant_4095
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_constant_4095
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFSET:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFSET]]
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4095
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5)
|
||||
|
|
@ -760,11 +760,11 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: load_private_s32_from_1_constant_4096
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_constant_4096
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4096
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 1, align 1, addrspace 5)
|
||||
|
|
@ -789,10 +789,10 @@ body: |
|
|||
bb.0:
|
||||
|
||||
; GFX6-LABEL: name: load_private_s32_from_fi
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_fi
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:vgpr(s32) = G_LOAD %0 :: (load 4, align 4, addrspace 5)
|
||||
|
|
@ -820,10 +820,10 @@ body: |
|
|||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4095
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4095
|
||||
|
|
@ -853,13 +853,13 @@ body: |
|
|||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX6: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
; GFX9-LABEL: name: load_private_s32_from_1_fi_offset_4096
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX9: [[V_ADD_U32_e64_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: [[BUFFER_LOAD_UBYTE_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_UBYTE_OFFEN [[V_ADD_U32_e64_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 1, addrspace 5)
|
||||
; GFX9: $vgpr0 = COPY [[BUFFER_LOAD_UBYTE_OFFEN]]
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4096
|
||||
|
|
|
|||
|
|
@ -21,12 +21,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_s32_to_4
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
|
||||
|
|
@ -52,12 +52,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_s32_to_2
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_SHORT_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 2, addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 2, align 2, addrspace 5)
|
||||
|
|
@ -83,12 +83,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_s32_to_1
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
%0:vgpr(s32) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 1, align 1, addrspace 5)
|
||||
|
|
@ -114,12 +114,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_v2s16
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
%0:vgpr(<2 x s16>) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
|
||||
|
|
@ -145,12 +145,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_p3
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
%0:vgpr(p3) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
|
||||
|
|
@ -176,12 +176,12 @@ body: |
|
|||
; GFX6: liveins: $vgpr0, $vgpr1
|
||||
; GFX6: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX6: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_p5
|
||||
; GFX9: liveins: $vgpr0, $vgpr1
|
||||
; GFX9: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GFX9: [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_DWORD_OFFEN [[COPY]], [[COPY1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 5)
|
||||
%0:vgpr(p5) = COPY $vgpr0
|
||||
%1:vgpr(p5) = COPY $vgpr1
|
||||
G_STORE %0, %1 :: (store 4, align 4, addrspace 5)
|
||||
|
|
@ -209,10 +209,10 @@ body: |
|
|||
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4095, implicit $exec
|
||||
; GFX6: %2:vgpr_32, dead %4:sreg_64_xexec = V_ADD_I32_e64 [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], 0, implicit $exec
|
||||
; GFX6: [[V_MOV_B32_e32_2:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_2]], %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_2]], %2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_s32_to_1_fi_offset_4095
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4095, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
%0:vgpr(p5) = G_FRAME_INDEX %stack.0
|
||||
%1:vgpr(s32) = G_CONSTANT i32 4095
|
||||
%2:vgpr(p5) = G_GEP %0, %1
|
||||
|
|
@ -239,10 +239,10 @@ body: |
|
|||
|
||||
; GFX6-LABEL: name: store_private_s32_to_1_constant_4095
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_s32_to_1_constant_4095
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFSET [[V_MOV_B32_e32_]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4095, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4095
|
||||
%1:vgpr(s32) = G_CONSTANT i32 0
|
||||
G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
|
||||
|
|
@ -268,11 +268,11 @@ body: |
|
|||
; GFX6-LABEL: name: store_private_s32_to_1_constant_4096
|
||||
; GFX6: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX6: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX6: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9-LABEL: name: store_private_s32_to_1_constant_4096
|
||||
; GFX9: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GFX9: [[V_MOV_B32_e32_1:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
; GFX9: BUFFER_STORE_BYTE_OFFEN [[V_MOV_B32_e32_]], [[V_MOV_B32_e32_1]], $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 1, addrspace 5)
|
||||
%0:vgpr(p5) = G_CONSTANT i32 4096
|
||||
%1:vgpr(s32) = G_CONSTANT i32 0
|
||||
G_STORE %1, %0 :: (store 1, align 1, addrspace 5)
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; UNPACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_f16
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
|
@ -27,7 +27,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -44,7 +44,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409
|
|||
; UNPACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; UNPACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; UNPACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_4095__sgpr_soffset_f16
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
|
@ -56,7 +56,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409
|
|||
; PACKED: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; PACKED: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7 + 4095, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.f16(half %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -78,7 +78,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY7]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
|
@ -91,7 +91,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -116,7 +116,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[COPY9:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_1:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY9]], [[COPY5]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1, [[COPY5]], %subreg.sub2, [[V_LSHRREV_B32_e64_1]], %subreg.sub3
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v4f16
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
|
@ -131,7 +131,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -173,7 +173,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
@ -211,7 +211,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
@ -240,7 +240,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4095
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
|
@ -253,7 +253,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
|
||||
ret void
|
||||
|
|
@ -275,7 +275,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[COPY6:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY6]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_soffset4096
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
|
@ -288,7 +288,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; PACKED: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
|
||||
ret void
|
||||
|
|
@ -312,7 +312,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_16
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
|
@ -328,7 +328,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 16
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
|
@ -354,7 +354,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4095
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
|
@ -370,7 +370,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
|
||||
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4095
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
|
@ -396,7 +396,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_1]]
|
||||
; UNPACKED: [[V_LSHRREV_B32_e64_:%[0-9]+]]:vgpr_32 = V_LSHRREV_B32_e64 [[COPY8]], [[COPY4]], implicit $exec
|
||||
; UNPACKED: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[V_LSHRREV_B32_e64_]], %subreg.sub1
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XY_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: S_ENDPGM 0
|
||||
; PACKED-LABEL: name: raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr_soffset_v2f16_voffset_add_4096
|
||||
; PACKED: bb.1 (%ir-block.0):
|
||||
|
|
@ -412,7 +412,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
|
||||
; PACKED: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; PACKED: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XY_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4096
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
|
@ -459,7 +459,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; UNPACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec
|
||||
; UNPACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; UNPACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: BUFFER_STORE_FORMAT_D16_XYZW_gfx80_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; UNPACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; UNPACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; UNPACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
@ -500,7 +500,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; PACKED: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY10]], implicit $exec
|
||||
; PACKED: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; PACKED: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: BUFFER_STORE_FORMAT_D16_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; PACKED: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; PACKED: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; PACKED: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -31,7 +31,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__voffset_409
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_X_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -51,7 +51,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -72,7 +72,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZ_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -94,7 +94,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -132,7 +132,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY11]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
@ -159,7 +159,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
|
||||
ret void
|
||||
|
|
@ -179,7 +179,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
|
||||
ret void
|
||||
|
|
@ -202,7 +202,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %13:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 16
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
|
@ -226,7 +226,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %13:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4095
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
|
@ -250,7 +250,7 @@ define amdgpu_ps void @raw_buffer_store_format__sgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %13:vgpr_32, dead %15:sreg_64_xexec = V_ADD_I32_e64 [[COPY6]], [[COPY8]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XY_OFFEN_exact [[REG_SEQUENCE1]], %13, [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4096
|
||||
call void @llvm.amdgcn.raw.buffer.store.format.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
|
@ -294,7 +294,7 @@ define amdgpu_ps void @raw_buffer_store_format__vgpr_rsrc__vgpr_val__vgpr_voffse
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY12]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_FORMAT_XYZW_OFFEN_exact [[REG_SEQUENCE1]], %15, [[REG_SEQUENCE4]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -36,7 +36,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__sgpr_val__sgpr_voffset__sgpr
|
|||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[COPY4]]
|
||||
; CHECK: [[COPY8:%[0-9]+]]:vgpr_32 = COPY [[COPY5]]
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY7]], [[COPY8]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -71,7 +71,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY8]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
@ -103,7 +103,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
|
|||
; CHECK: successors: %bb.3(0x40000000), %bb.2(0x40000000)
|
||||
; CHECK: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_]], [[COPY6]], implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[V_READFIRSTLANE_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[V_CMP_EQ_U32_e64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
@ -148,7 +148,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__vgpr
|
|||
; CHECK: [[V_READFIRSTLANE_B32_4:%[0-9]+]]:sreg_32_xm0 = V_READFIRSTLANE_B32 [[COPY6]], implicit $exec
|
||||
; CHECK: [[V_CMP_EQ_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_EQ_U32_e64 [[V_READFIRSTLANE_B32_4]], [[COPY6]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_1:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U32_e64_]], [[S_AND_B64_]], implicit-def $scc
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE3]], [[V_READFIRSTLANE_B32_4]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_1]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
@ -173,7 +173,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 1)
|
||||
ret void
|
||||
|
|
@ -191,7 +191,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 2)
|
||||
ret void
|
||||
|
|
@ -209,7 +209,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 3)
|
||||
ret void
|
||||
|
|
@ -227,7 +227,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 4)
|
||||
ret void
|
||||
|
|
@ -245,7 +245,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 1, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 6)
|
||||
ret void
|
||||
|
|
@ -263,7 +263,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 0, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 5)
|
||||
ret void
|
||||
|
|
@ -281,7 +281,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 1, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 1, 1, 0, 1, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 7)
|
||||
ret void
|
||||
|
|
@ -301,7 +301,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -322,7 +322,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY8:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_96 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2
|
||||
; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORDX3_OFFEN_exact [[REG_SEQUENCE1]], [[COPY7]], [[REG_SEQUENCE]], [[COPY8]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v3f32(<3 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -344,7 +344,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY9:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_128 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1, [[COPY6]], %subreg.sub2, [[COPY7]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORDX4_OFFEN_exact [[REG_SEQUENCE1]], [[COPY8]], [[REG_SEQUENCE]], [[COPY9]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 16 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -362,7 +362,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom TargetCustom7, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_BYTE_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 1 into custom TargetCustom7, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%val.trunc = trunc i32 %val to i8
|
||||
call void @llvm.amdgcn.raw.buffer.store.i8(i8 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
|
@ -381,7 +381,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%val.trunc = trunc i32 %val to i16
|
||||
call void @llvm.amdgcn.raw.buffer.store.i16(i16 %val.trunc, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
|
|
@ -400,7 +400,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_SHORT_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 2 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f16(half %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -418,7 +418,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[COPY6:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -438,7 +438,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY7:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY4]], %subreg.sub0, [[COPY5]], %subreg.sub1
|
||||
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v4f16(<4 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -474,7 +474,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE3]], [[COPY9]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE4:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORDX2_OFFEN_exact [[REG_SEQUENCE1]], [[COPY6]], [[REG_SEQUENCE4]], [[COPY7]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
@ -498,7 +498,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v
|
|||
; CHECK: [[COPY4:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], $noreg, [[REG_SEQUENCE]], [[COPY5]], 4095, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4095, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4095, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -516,7 +516,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__sgpr_soffset_f32_v
|
|||
; CHECK: [[COPY5:%[0-9]+]]:sreg_32 = COPY $sgpr6
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 4096, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE]], [[COPY5]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 4096, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 4096, i32 %soffset, i32 0)
|
||||
ret void
|
||||
|
|
@ -537,7 +537,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 16
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
|
@ -559,7 +559,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4095
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
|
@ -581,7 +581,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4096
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
|
@ -600,7 +600,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4095
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4095, i32 0)
|
||||
ret void
|
||||
|
|
@ -618,7 +618,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[COPY5:%[0-9]+]]:vgpr_32 = COPY $vgpr1
|
||||
; CHECK: [[REG_SEQUENCE:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[COPY1]], %subreg.sub1, [[COPY2]], %subreg.sub2, [[COPY3]], %subreg.sub3
|
||||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 4096
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[COPY5]], [[REG_SEQUENCE]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset, i32 4096, i32 0)
|
||||
ret void
|
||||
|
|
@ -639,7 +639,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 16
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 16
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
|
@ -661,7 +661,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4095
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4095
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
|
@ -683,7 +683,7 @@ define amdgpu_ps void @raw_buffer_store__sgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[S_MOV_B32_:%[0-9]+]]:sreg_32_xm0 = S_MOV_B32 4096
|
||||
; CHECK: [[COPY7:%[0-9]+]]:vgpr_32 = COPY [[S_MOV_B32_]]
|
||||
; CHECK: %11:vgpr_32, dead %13:sreg_64_xexec = V_ADD_I32_e64 [[COPY5]], [[COPY7]], 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: S_ENDPGM 0
|
||||
%voffset.add = add i32 %voffset, 4096
|
||||
call void @llvm.amdgcn.raw.buffer.store.v2f16(<2 x half> %val, <4 x i32> %rsrc, i32 %voffset.add, i32 %soffset, i32 0)
|
||||
|
|
@ -722,7 +722,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__vgpr_voffset__sgpr
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY9]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], %11, [[REG_SEQUENCE3]], [[COPY6]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
@ -765,7 +765,7 @@ define amdgpu_ps void @raw_buffer_store__vgpr_rsrc__vgpr_val__5000_voffset__sgpr
|
|||
; CHECK: [[V_CMP_EQ_U64_e64_1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[REG_SEQUENCE2]], [[COPY7]], implicit $exec
|
||||
; CHECK: [[S_AND_B64_:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[V_CMP_EQ_U64_e64_1]], [[V_CMP_EQ_U64_e64_]], implicit-def $scc
|
||||
; CHECK: [[REG_SEQUENCE3:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1, [[V_READFIRSTLANE_B32_2]], %subreg.sub2, [[V_READFIRSTLANE_B32_3]], %subreg.sub3
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 5000, align 1, addrspace 4)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN_exact [[COPY4]], [[V_MOV_B32_e32_]], [[REG_SEQUENCE3]], [[COPY5]], 904, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into custom TargetCustom7 + 5000, align 1, addrspace 4)
|
||||
; CHECK: [[S_AND_SAVEEXEC_B64_:%[0-9]+]]:sreg_64 = S_AND_SAVEEXEC_B64 killed [[S_AND_B64_]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
; CHECK: $exec = S_XOR_B64_term $exec, [[S_AND_SAVEEXEC_B64_]], implicit-def $scc
|
||||
; CHECK: S_CBRANCH_EXECNZ %bb.2, implicit $exec
|
||||
|
|
|
|||
|
|
@ -393,12 +393,12 @@ name: trivial_clause_load_mubuf4_x2
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: trivial_clause_load_mubuf4_x2
|
||||
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr3 = BUFFER_LOAD_DWORD_OFFEN $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
|
@ -407,13 +407,13 @@ name: break_clause_simple_load_mubuf_offen_ptr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_clause_simple_load_mubuf_offen_ptr
|
||||
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
|
@ -424,11 +424,11 @@ name: mubuf_load4_overwrite_ptr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: mubuf_load4_overwrite_ptr
|
||||
; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 0, implicit $exec
|
||||
$vgpr2 = V_MOV_B32_e32 $vgpr0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
|
@ -443,11 +443,11 @@ body: |
|
|||
; GCN-LABEL: name: break_clause_flat_load_mubuf_load
|
||||
; GCN: $vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr0 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
$vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr2 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
# Break a clause from interference between mubuf and flat instructions
|
||||
|
|
@ -462,7 +462,7 @@ name: break_clause_mubuf_load_flat_load
|
|||
|
||||
body: |
|
||||
bb.0:
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = FLAT_LOAD_DWORD $vgpr2_vgpr3, 0, 0, 0, 0, implicit $exec, implicit $flat_scr
|
||||
|
||||
S_ENDPGM 0
|
||||
|
|
@ -504,12 +504,12 @@ name: break_clause_atomic_rtn_into_ptr_mubuf4
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: break_clause_atomic_rtn_into_ptr_mubuf4
|
||||
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; XNACK-NEXT: S_NOP 0
|
||||
; GCN-NEXT: $vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr2 = BUFFER_ATOMIC_ADD_OFFEN_RTN $vgpr2, $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
|
@ -521,11 +521,11 @@ body: |
|
|||
bb.0:
|
||||
; GCN-LABEL: name: break_clause_atomic_nortn_ptr_load_mubuf4
|
||||
; GCN: BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
|
||||
BUFFER_ATOMIC_ADD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
|
@ -536,11 +536,11 @@ name: no_break_clause_mubuf_load_novaddr
|
|||
body: |
|
||||
bb.0:
|
||||
; GCN-LABEL: name: no_break_clause_mubuf_load_novaddr
|
||||
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN-NEXT: S_ENDPGM 0
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
|
|
|||
|
|
@ -55,10 +55,10 @@ body: |
|
|||
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%18 = COPY %26
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
|
||||
%21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -117,10 +117,10 @@ body: |
|
|||
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%18 = COPY %26
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
|
||||
%21 = V_MAX_F32_e64 0, killed %20, 0, killed %20, 1, 3, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
|
@ -180,10 +180,10 @@ body: |
|
|||
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%18 = COPY %26
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
|
||||
%21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 0, 3, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -245,10 +245,10 @@ body: |
|
|||
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%18 = COPY %26
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
|
||||
%21 = V_MUL_F32_e64 0, killed %20, 0, 1056964608, 1, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -322,10 +322,10 @@ body: |
|
|||
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%18 = COPY %26
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
|
||||
%21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 0, 3, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -387,10 +387,10 @@ body: |
|
|||
%26 = V_LSHL_B64 killed %25, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%18 = COPY %26
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %26, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%20 = V_ADD_F32_e64 0, killed %17, 0, 1065353216, 0, 0, implicit $exec
|
||||
%21 = V_ADD_F32_e64 0, killed %20, 0, killed %20, 1, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %21, %26, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ body: |
|
|||
%14:vgpr_32 = V_AND_B32_e32 1, %13, implicit $exec
|
||||
%15:sreg_64_xexec = V_CMP_EQ_U32_e64 0, %14, implicit $exec
|
||||
%16:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, 1, %15, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4)
|
||||
BUFFER_STORE_DWORD_OFFEN_exact %16, undef %17:vgpr_32, undef %18:sreg_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into constant-pool, align 1, addrspace 4)
|
||||
S_ENDPGM 0
|
||||
|
||||
bb.2:
|
||||
|
|
@ -78,7 +78,7 @@ body: |
|
|||
|
||||
bb.8:
|
||||
successors: %bb.10
|
||||
%31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
|
||||
%31:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %32:vgpr_32, undef %33:sreg_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
|
||||
%34:sreg_64_xexec = V_CMP_NE_U32_e64 0, %31, implicit $exec
|
||||
%35:vgpr_32 = V_CNDMASK_B32_e64 0, 0, 0, -1, %34, implicit $exec
|
||||
%28:vgpr_32 = COPY %35
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ body: |
|
|||
|
||||
bb.9:
|
||||
successors: %bb.10(0x80000000)
|
||||
%19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
|
||||
%19:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %18, undef %20:sreg_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
|
||||
%21:sreg_64 = V_CMP_NE_U32_e64 target-flags(amdgpu-gotprel) 0, killed %19.sub0, implicit $exec
|
||||
%22:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%23:sreg_64 = S_AND_B64 %22, %21, implicit-def dead $scc
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ body: |
|
|||
%23:vreg_128 = COPY killed %17
|
||||
%24:sreg_64 = COPY killed %16
|
||||
%25:vgpr_32 = V_OR_B32_e32 %22, %11, implicit $exec
|
||||
%26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
|
||||
%26:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %25, undef %27:sreg_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
|
||||
%28:vgpr_32 = V_LSHRREV_B32_e32 30, killed %26.sub0, implicit $exec
|
||||
%29:vreg_128 = COPY killed %21
|
||||
%29.sub0:vreg_128 = COPY %1
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
#
|
||||
# GCN-LABEL: bb.6:
|
||||
# GCN: successors: %bb.7(0x{{[0-9]+}}), %bb.18(0x{{[0-9]+}})
|
||||
# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# GCN: %{{[0-9]+}}:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET %{{[0-9]+}}, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
#
|
||||
|
||||
--- |
|
||||
|
|
@ -69,7 +69,7 @@ body: |
|
|||
%10:sreg_64 = COPY killed %5
|
||||
undef %11.sub2:sreg_128 = COPY %4
|
||||
%11.sub3:sreg_128 = COPY %3
|
||||
%12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%12:vreg_128 = BUFFER_LOAD_DWORDX4_OFFSET killed %11, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
undef %13.sub1:vreg_128 = COPY %9.sub1
|
||||
%13.sub2:vreg_128 = COPY %9.sub2
|
||||
%14:sreg_64 = V_CMP_GT_F32_e64 0, target-flags(amdgpu-rel32-lo) 0, 0, killed %12.sub3, 0, implicit $exec
|
||||
|
|
@ -161,7 +161,7 @@ body: |
|
|||
bb.18:
|
||||
successors: %bb.7(0x80000000)
|
||||
dead %59:vgpr_32 = V_FMA_F32 0, killed %9.sub2, 0, undef %60:vgpr_32, 0, undef %61:vgpr_32, 0, 0, implicit $exec
|
||||
dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sreg_128, undef %65:sreg_32, 0, 0, 0, 0, 0, implicit $exec
|
||||
dead %62:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN undef %63:vgpr_32, undef %64:sreg_128, undef %65:sreg_32, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
undef %66.sub1:vreg_128 = COPY %13.sub1
|
||||
%66.sub2:vreg_128 = COPY %13.sub2
|
||||
%67:sreg_64 = V_CMP_NGT_F32_e64 0, 0, 0, undef %68:vgpr_32, 0, implicit $exec
|
||||
|
|
|
|||
|
|
@ -148,7 +148,7 @@ body: |
|
|||
%43:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM undef %44:sreg_128, 12, 0, 0 :: (dereferenceable invariant load 4)
|
||||
%45:vgpr_32 = V_MUL_LO_I32 killed %42, killed %43, implicit $exec
|
||||
%46:vgpr_32 = V_LSHLREV_B32_e32 2, killed %45, implicit $exec
|
||||
%47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
|
||||
%47:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN killed %46, undef %48:sreg_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from constant-pool, align 1, addrspace 4)
|
||||
%49:sreg_64 = V_CMP_NE_U32_e64 0, killed %47, implicit $exec
|
||||
%50:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%51:sreg_64 = S_AND_B64 %50, %49, implicit-def dead $scc
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ body: |
|
|||
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
|
||||
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
|
||||
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
|
||||
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]]
|
||||
|
|
@ -44,7 +44,7 @@ body: |
|
|||
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
|
||||
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.4(0x80000000)
|
||||
; GCN: DBG_VALUE
|
||||
|
|
@ -80,7 +80,7 @@ body: |
|
|||
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
|
||||
%5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
%5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
|
||||
%12:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
|
||||
|
|
@ -92,7 +92,7 @@ body: |
|
|||
%5.sub0:sgpr_128 = COPY %5.sub2
|
||||
%5.sub1:sgpr_128 = COPY %5.sub2
|
||||
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
|
||||
bb.3:
|
||||
$exec = S_OR_B64 $exec, %12, implicit-def $scc
|
||||
|
|
@ -141,7 +141,7 @@ body: |
|
|||
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
|
||||
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
|
||||
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
|
||||
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]]
|
||||
|
|
@ -152,7 +152,7 @@ body: |
|
|||
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
|
||||
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.4(0x80000000)
|
||||
; GCN: bb.4:
|
||||
|
|
@ -188,7 +188,7 @@ body: |
|
|||
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
|
||||
%5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
%5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
|
||||
%12:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
|
||||
|
|
@ -200,7 +200,7 @@ body: |
|
|||
%5.sub0:sgpr_128 = COPY %5.sub2
|
||||
%5.sub1:sgpr_128 = COPY %5.sub2
|
||||
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
|
||||
bb.3:
|
||||
$exec = S_OR_B64 $exec, %12, implicit-def $scc
|
||||
|
|
@ -249,7 +249,7 @@ body: |
|
|||
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
|
||||
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
|
||||
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
|
||||
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]]
|
||||
|
|
@ -260,7 +260,7 @@ body: |
|
|||
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
|
||||
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.4(0x80000000)
|
||||
; GCN: bb.4:
|
||||
|
|
@ -297,7 +297,7 @@ body: |
|
|||
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
|
||||
%5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
%5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
|
||||
%12:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
|
||||
|
|
@ -309,7 +309,7 @@ body: |
|
|||
%5.sub0:sgpr_128 = COPY %5.sub2
|
||||
%5.sub1:sgpr_128 = COPY %5.sub2
|
||||
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
|
||||
bb.3:
|
||||
$exec = S_OR_B64 $exec, %12, implicit-def $scc
|
||||
|
|
@ -358,7 +358,7 @@ body: |
|
|||
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
|
||||
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
|
||||
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
|
||||
|
|
@ -370,7 +370,7 @@ body: |
|
|||
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
|
||||
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.4(0x80000000)
|
||||
; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
|
|
@ -408,7 +408,7 @@ body: |
|
|||
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
|
||||
%5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
%5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
|
||||
%12:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
|
||||
|
|
@ -420,7 +420,7 @@ body: |
|
|||
%5.sub0:sgpr_128 = COPY %5.sub2
|
||||
%5.sub1:sgpr_128 = COPY %5.sub2
|
||||
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
|
||||
bb.3:
|
||||
%15:sgpr_32 = IMPLICIT_DEF
|
||||
|
|
@ -471,7 +471,7 @@ body: |
|
|||
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
|
||||
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
|
||||
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 $exec, [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
|
||||
; GCN: $exec = S_MOV_B64_term [[S_AND_B64_1]]
|
||||
|
|
@ -482,7 +482,7 @@ body: |
|
|||
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
|
||||
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.4(0x80000000)
|
||||
; GCN: [[DEF:%[0-9]+]]:sgpr_32 = IMPLICIT_DEF
|
||||
|
|
@ -520,7 +520,7 @@ body: |
|
|||
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
|
||||
%5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
%5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
|
||||
%12:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
|
||||
|
|
@ -532,7 +532,7 @@ body: |
|
|||
%5.sub0:sgpr_128 = COPY %5.sub2
|
||||
%5.sub1:sgpr_128 = COPY %5.sub2
|
||||
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
|
||||
bb.3:
|
||||
$exec = S_OR_B64 $exec, %12, implicit-def $scc
|
||||
|
|
@ -583,7 +583,7 @@ body: |
|
|||
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
|
||||
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
|
||||
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
|
||||
|
|
@ -595,7 +595,7 @@ body: |
|
|||
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
|
||||
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.4(0x80000000)
|
||||
; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
|
||||
|
|
@ -631,7 +631,7 @@ body: |
|
|||
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
|
||||
%5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
%5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
|
||||
%12:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
|
||||
|
|
@ -643,7 +643,7 @@ body: |
|
|||
%5.sub0:sgpr_128 = COPY %5.sub2
|
||||
%5.sub1:sgpr_128 = COPY %5.sub2
|
||||
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
|
||||
bb.3:
|
||||
$exec = S_OR_B64 $exec, %12, implicit-def $scc
|
||||
|
|
@ -691,7 +691,7 @@ body: |
|
|||
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
|
||||
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
|
||||
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
|
||||
|
|
@ -703,7 +703,7 @@ body: |
|
|||
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
|
||||
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.4(0x80000000)
|
||||
; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
|
||||
|
|
@ -739,7 +739,7 @@ body: |
|
|||
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
|
||||
%5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
%5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
|
||||
%12:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
|
||||
|
|
@ -751,7 +751,7 @@ body: |
|
|||
%5.sub0:sgpr_128 = COPY %5.sub2
|
||||
%5.sub1:sgpr_128 = COPY %5.sub2
|
||||
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
|
||||
bb.3:
|
||||
$exec = S_OR_B64 $exec, %12, implicit-def $scc
|
||||
|
|
@ -799,7 +799,7 @@ body: |
|
|||
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
|
||||
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
|
||||
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
|
||||
|
|
@ -811,7 +811,7 @@ body: |
|
|||
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
|
||||
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.5(0x80000000)
|
||||
; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
|
||||
|
|
@ -850,7 +850,7 @@ body: |
|
|||
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
|
||||
%5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
%5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
|
||||
%12:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
|
||||
|
|
@ -862,7 +862,7 @@ body: |
|
|||
%5.sub0:sgpr_128 = COPY %5.sub2
|
||||
%5.sub1:sgpr_128 = COPY %5.sub2
|
||||
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
|
||||
bb.3:
|
||||
$exec = S_OR_B64 $exec, %12, implicit-def $scc
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ body: |
|
|||
; GCN: %8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, [[COPY3]], %9, 0, implicit $exec
|
||||
; GCN: %5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
; GCN: %5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: [[V_CMP_NE_U32_e64_:%[0-9]+]]:sreg_64 = V_CMP_NE_U32_e64 2, [[COPY1]], implicit $exec
|
||||
; GCN: [[COPY4:%[0-9]+]]:sreg_64 = COPY $exec, implicit-def $exec
|
||||
; GCN: [[S_AND_B64_1:%[0-9]+]]:sreg_64 = S_AND_B64 [[COPY4]], [[V_CMP_NE_U32_e64_]], implicit-def dead $scc
|
||||
|
|
@ -54,7 +54,7 @@ body: |
|
|||
; GCN: %5.sub0:sgpr_128 = COPY %5.sub2
|
||||
; GCN: %5.sub1:sgpr_128 = COPY %5.sub2
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_ADDR64 [[V_MOV_B32_e32_]], %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
; GCN: bb.3:
|
||||
; GCN: successors: %bb.4(0x80000000)
|
||||
; GCN: $exec = S_OR_B64 $exec, [[COPY4]], implicit-def $scc
|
||||
|
|
@ -91,7 +91,7 @@ body: |
|
|||
%8.sub1:vreg_64, dead %10:sreg_64_xexec = V_ADDC_U32_e64 0, %7, %9, 0, implicit $exec
|
||||
%5.sub3:sgpr_128 = S_MOV_B32 61440
|
||||
%5.sub2:sgpr_128 = S_MOV_B32 0
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %6.sub1, %6, %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
%11:sreg_64 = V_CMP_NE_U32_e64 2, %0, implicit $exec
|
||||
%12:sreg_64 = COPY $exec, implicit-def $exec
|
||||
%13:sreg_64 = S_AND_B64 %12, %11, implicit-def dead $scc
|
||||
|
|
@ -103,7 +103,7 @@ body: |
|
|||
%5.sub0:sgpr_128 = COPY %5.sub2
|
||||
%5.sub1:sgpr_128 = COPY %5.sub2
|
||||
%14:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
BUFFER_STORE_DWORD_ADDR64 %14, %8, %5, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4, addrspace 1)
|
||||
|
||||
bb.3:
|
||||
$exec = S_OR_B64 $exec, %12, implicit-def $scc
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@ body: |
|
|||
%8 = S_MOV_B32 9999
|
||||
%9 = S_AND_B32 killed %7, killed %8, implicit-def dead $scc
|
||||
%10 = COPY %9
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %10, killed %6, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -219,7 +219,7 @@ body: |
|
|||
%10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
|
||||
%12 = S_LSHL_B32 killed %5, 12, implicit-def dead $scc
|
||||
%13 = COPY %12
|
||||
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -419,7 +419,7 @@ body: |
|
|||
%10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
|
||||
%12 = S_ASHR_I32 killed %5, 12, implicit-def dead $scc
|
||||
%13 = COPY %12
|
||||
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -627,7 +627,7 @@ body: |
|
|||
%10 = REG_SEQUENCE killed %7, 1, killed %6, 2, killed %9, 3, killed %8, 4
|
||||
%12 = S_LSHR_B32 killed %5, 12, implicit-def dead $scc
|
||||
%13 = COPY %12
|
||||
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %13, killed %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
|||
|
|
@ -291,7 +291,7 @@ body: |
|
|||
bb.3..lr.ph3410.preheader:
|
||||
successors: %bb.4(0x80000000)
|
||||
|
||||
dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sreg_128, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
|
||||
dead %22:vreg_128 = BUFFER_LOAD_FORMAT_XYZW_IDXEN killed %53.sub3, undef %24:sreg_128, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from constant-pool, align 1, addrspace 4)
|
||||
dead %60:vgpr_32 = V_MOV_B32_e32 -1, implicit $exec
|
||||
%36:sreg_64 = S_AND_B64 $exec, -1, implicit-def dead $scc
|
||||
dead %67:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) {
|
|||
; GCN: [[DEF:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]]
|
||||
; GCN: [[DEF1:%[0-9]+]]:sreg_128 = IMPLICIT_DEF
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
|
||||
; GCN: [[BUFFER_LOAD_DWORDX4_OFFEN:%[0-9]+]]:vreg_128 = BUFFER_LOAD_DWORDX4_OFFEN [[COPY]], [[DEF1]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 16 from custom TargetCustom7, align 1, addrspace 4)
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub2
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub1
|
||||
; GCN: [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[BUFFER_LOAD_DWORDX4_OFFEN]].sub0
|
||||
|
|
@ -21,7 +21,7 @@ define amdgpu_hs void @main([0 x i8] addrspace(6)* inreg %arg) {
|
|||
; GCN: [[DEF2:%[0-9]+]]:sreg_32_xm0 = IMPLICIT_DEF
|
||||
; GCN: [[COPY5:%[0-9]+]]:vgpr_32 = COPY [[DEF2]]
|
||||
; GCN: [[DEF3:%[0-9]+]]:sreg_128 = IMPLICIT_DEF
|
||||
; GCN: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; GCN: BUFFER_STORE_DWORDX3_OFFEN_exact killed [[COPY4]], [[COPY5]], [[DEF3]], [[S_MOV_B32_]], 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 12 into custom TargetCustom7, align 1, addrspace 4)
|
||||
; GCN: S_ENDPGM 0
|
||||
main_body:
|
||||
%tmp25 = call <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32> undef, i32 undef, i32 0, i32 0)
|
||||
|
|
|
|||
|
|
@ -23,13 +23,13 @@ body: |
|
|||
; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
|
||||
; GCN: SI_RETURN_TO_EPILOG $vgpr0
|
||||
%0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%1:sreg_32_xm0 = S_MOV_B32 0
|
||||
%2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
%3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, 0, 0, implicit $exec
|
||||
%3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, %1, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = COPY %3
|
||||
SI_RETURN_TO_EPILOG $vgpr0
|
||||
|
||||
|
|
@ -57,12 +57,12 @@ body: |
|
|||
; GCN: liveins: $sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GCN: [[COPY:%[0-9]+]]:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], $sgpr32, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: [[BUFFER_LOAD_DWORD_IDXEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN [[V_MOV_B32_e32_]], [[COPY]], $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_IDXEN]]
|
||||
; GCN: SI_RETURN_TO_EPILOG $vgpr0
|
||||
%0:sgpr_128 = COPY $sgpr12_sgpr13_sgpr14_sgpr15
|
||||
%2:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
%3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, $sgpr32, 0, 0, 0, 0, 0, implicit $exec
|
||||
%3:vgpr_32 = BUFFER_LOAD_DWORD_IDXEN %2, %0, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = COPY %3
|
||||
SI_RETURN_TO_EPILOG $vgpr0
|
||||
|
||||
|
|
@ -87,15 +87,15 @@ body: |
|
|||
|
||||
; GCN-LABEL: name: fold_fi_mubuf_scratch_scratch_wave_offset
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
||||
; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; GCN: S_ENDPGM 0, implicit $vgpr0
|
||||
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
%1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
||||
|
||||
BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, 0, 0, implicit $exec
|
||||
%2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = COPY %2
|
||||
S_ENDPGM 0, implicit $vgpr0
|
||||
|
||||
|
|
@ -119,15 +119,15 @@ body: |
|
|||
|
||||
; GCN-LABEL: name: no_fold_fi_mubuf_scratch_sp_offset
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
||||
; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: BUFFER_STORE_DWORD_OFFEN [[V_MOV_B32_e32_]], %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: [[BUFFER_LOAD_DWORD_OFFEN:%[0-9]+]]:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr0 = COPY [[BUFFER_LOAD_DWORD_OFFEN]]
|
||||
; GCN: S_ENDPGM 0, implicit $vgpr0
|
||||
%0:vgpr_32 = V_MOV_B32_e32 %stack.0, implicit $exec
|
||||
%1:vgpr_32 = V_MOV_B32_e32 7, implicit $exec
|
||||
|
||||
BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec
|
||||
%2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN %1:vgpr_32, %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%2:vgpr_32 = BUFFER_LOAD_DWORD_OFFEN %0:vgpr_32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = COPY %2
|
||||
S_ENDPGM 0, implicit $vgpr0
|
||||
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ body: |
|
|||
%4:vgpr_32 = V_LSHLREV_B32_e64 killed %3, %0, implicit $exec
|
||||
%5:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%6:vreg_64 = REG_SEQUENCE killed %4, %subreg.sub0, killed %5, %subreg.sub1
|
||||
%7:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %6, %2, 0, 4, 0, 0, 0, 0, implicit $exec
|
||||
%7:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %6, %2, 0, 4, 0, 0, 0, 0, 0, implicit $exec
|
||||
%8:sreg_32_xm0 = S_MOV_B32 65535
|
||||
%9:vgpr_32 = COPY %8
|
||||
%10:vgpr_32 = V_AND_B32_e32 %7, %9, implicit $exec
|
||||
|
|
|
|||
|
|
@ -158,10 +158,10 @@ body: |
|
|||
%8 = S_MOV_B32 61440
|
||||
%9 = S_MOV_B32 -1
|
||||
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
||||
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%12 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
%13 = V_ADD_F16_e64 0, killed %11, 0, %12, 0, 0, implicit $exec
|
||||
BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %13, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -222,13 +222,13 @@ body: |
|
|||
%8 = S_MOV_B32 61440
|
||||
%9 = S_MOV_B32 -1
|
||||
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
||||
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
%13 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
%14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec
|
||||
%15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec
|
||||
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -289,14 +289,14 @@ body: |
|
|||
%8 = S_MOV_B32 61440
|
||||
%9 = S_MOV_B32 -1
|
||||
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
||||
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
%14 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
|
||||
%16 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
|
||||
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -360,16 +360,16 @@ body: |
|
|||
%8 = S_MOV_B32 61440
|
||||
%9 = S_MOV_B32 -1
|
||||
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
||||
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
%14 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
|
||||
%16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec
|
||||
%17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
|
||||
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -427,13 +427,13 @@ body: |
|
|||
%8 = S_MOV_B32 61440
|
||||
%9 = S_MOV_B32 -1
|
||||
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
||||
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
%13 = V_MOV_B32_e32 1, implicit $exec
|
||||
%14 = V_ADD_F16_e64 0, killed %11, 0, %13, 0, 0, implicit $exec
|
||||
%15 = V_ADD_F16_e64 0, killed %12, 0, killed %13, 0, 0, implicit $exec
|
||||
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -494,16 +494,16 @@ body: |
|
|||
%8 = S_MOV_B32 61440
|
||||
%9 = S_MOV_B32 -1
|
||||
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
||||
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%13 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
%14 = V_MOV_B32_e32 -2, implicit $exec
|
||||
%15 = V_ADD_F16_e64 0, %11, 0, %14, 0, 0, implicit $exec
|
||||
%16 = V_ADD_F16_e64 0, %12, 0, %14, 0, 0, implicit $exec
|
||||
%17 = V_ADD_F32_e64 0, killed %13, 0, killed %14, 0, 0, implicit $exec
|
||||
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %16, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed %17, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -564,13 +564,13 @@ body: |
|
|||
%8 = S_MOV_B32 61440
|
||||
%9 = S_MOV_B32 -1
|
||||
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
||||
%11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
%11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
%12 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
%13 = V_MOV_B32_e32 15360, implicit $exec
|
||||
%14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec
|
||||
%15 = V_ADD_F32_e64 0, %12, 0, %13, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -631,13 +631,13 @@ body: |
|
|||
%8 = S_MOV_B32 61440
|
||||
%9 = S_MOV_B32 -1
|
||||
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
||||
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%11 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%13 = V_MOV_B32_e32 80886784, implicit $exec
|
||||
%14 = V_ADD_F16_e64 0, %11, 0, %13, 0, 0, implicit $exec
|
||||
%15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec
|
||||
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -697,13 +697,13 @@ body: |
|
|||
%8 = S_MOV_B32 61440
|
||||
%9 = S_MOV_B32 -1
|
||||
%10 = REG_SEQUENCE killed %7, 1, killed %5, 2, killed %9, 3, killed %8, 4
|
||||
%11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%11 = BUFFER_LOAD_DWORD_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
%12 = BUFFER_LOAD_USHORT_OFFSET %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 2 from `half addrspace(1)* undef`)
|
||||
%13 = V_MOV_B32_e32 305413120, implicit $exec
|
||||
%14 = V_ADD_F32_e64 0, %11, 0, %13, 0, 0, implicit $exec
|
||||
%15 = V_ADD_F16_e64 0, %12, 0, %13, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed %14, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `float addrspace(1)* undef`)
|
||||
BUFFER_STORE_SHORT_OFFSET killed %15, %10, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 2 into `half addrspace(1)* undef`)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
|||
|
|
@ -60,13 +60,13 @@ body: |
|
|||
%17 = REG_SEQUENCE killed %6, 17, %13, 18
|
||||
%18 = REG_SEQUENCE killed %4, 17, %13, 18
|
||||
%20 = COPY %29
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%22 = COPY %29
|
||||
%21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%23 = V_MOV_B32_e32 1090519040, implicit $exec
|
||||
%24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $exec
|
||||
%26 = COPY %29
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -131,13 +131,13 @@ body: |
|
|||
%17 = REG_SEQUENCE killed %6, 17, %13, 18
|
||||
%18 = REG_SEQUENCE killed %4, 17, %13, 18
|
||||
%20 = COPY %29
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%22 = COPY %29
|
||||
%21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%23 = V_MOV_B32_e32 1090519040, implicit $exec
|
||||
%24 = V_MAC_F32_e64 0, killed %19, 0, killed %21, 0, %23, 0, 2, implicit $exec
|
||||
%26 = COPY %29
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -202,13 +202,13 @@ body: |
|
|||
%17 = REG_SEQUENCE killed %6, 17, %13, 18
|
||||
%18 = REG_SEQUENCE killed %4, 17, %13, 18
|
||||
%20 = COPY %29
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%22 = COPY %29
|
||||
%21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%23 = V_MOV_B32_e32 1090519040, implicit $exec
|
||||
%24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 1, 0, implicit $exec
|
||||
%26 = COPY %29
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -273,13 +273,13 @@ body: |
|
|||
%17 = REG_SEQUENCE killed %6, 17, %13, 18
|
||||
%18 = REG_SEQUENCE killed %4, 17, %13, 18
|
||||
%20 = COPY %29
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %20, killed %14, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%22 = COPY %29
|
||||
%21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%21 = BUFFER_LOAD_DWORD_ADDR64 %22, killed %17, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%23 = V_MOV_B32_e32 1090519040, implicit $exec
|
||||
%24 = V_MAD_F32 0, killed %19, 0, killed %21, 0, %23, 0, 1, implicit $exec
|
||||
%26 = COPY %29
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %24, %26, killed %18, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ body: |
|
|||
%3 = S_LSHL_B32 %1, killed %1, implicit-def dead $scc
|
||||
%4 = V_AND_B32_e64 killed %2, killed %3, implicit $exec
|
||||
%5 = IMPLICIT_DEF
|
||||
BUFFER_STORE_DWORD_OFFSET killed %4, killed %5, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed %4, killed %5, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ body: |
|
|||
bb.0.entry:
|
||||
liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $vgpr0, $vgpr1, $vgpr7, $vgpr8, $vgpr9, $vgpr10
|
||||
|
||||
BUFFER_STORE_DWORDX4_OFFSET_exact killed $vgpr7_vgpr8_vgpr9_vgpr10, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 96, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORDX4_OFFSET_exact killed $vgpr7_vgpr8_vgpr9_vgpr10, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 96, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr7 = V_INTERP_P1_F32 $vgpr0, 0, 0, implicit $m0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ body: |
|
|||
BUNDLE implicit-def $sgpr0_sgpr1, implicit $sgpr10_sgpr11 {
|
||||
$sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM $sgpr10_sgpr11, 0, 0, 0
|
||||
}
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
|
|
@ -91,5 +91,5 @@ body: |
|
|||
}
|
||||
|
||||
bb.2:
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
|
|
|
|||
|
|
@ -104,7 +104,7 @@ define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
|
|||
; GCN: $exec = S_MOV_B64 renamable $sgpr0_sgpr1
|
||||
; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
|
||||
; GCN: $sgpr4_sgpr5_sgpr6_sgpr7 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 16 from %stack.1, align 4, addrspace 5)
|
||||
; GCN: BUFFER_STORE_DWORD_OFFSET renamable $vgpr0, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
|
||||
; GCN: BUFFER_STORE_DWORD_OFFSET renamable $vgpr0, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
|
||||
; GCN: S_ENDPGM 0
|
||||
entry:
|
||||
%id = call i32 @llvm.amdgcn.workitem.id.x() #1
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ body: |
|
|||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.2(0x80000000)
|
||||
; CHECK: $vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
; CHECK: bb.2:
|
||||
; CHECK: S_ENDPGM 0
|
||||
bb.0:
|
||||
|
|
@ -51,7 +51,7 @@ body: |
|
|||
bb.1:
|
||||
successors: %bb.2
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
|
|
|
|||
|
|
@ -49,10 +49,10 @@ body: |
|
|||
bb.0 (%ir-block.2):
|
||||
$sgpr3 = S_MOV_B32 61440
|
||||
$sgpr2 = S_MOV_B32 -1
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
$vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
$vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
$vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
$vgpr3 = BUFFER_LOAD_DWORD_OFFSET killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 4 from `float addrspace(1)* undef`)
|
||||
EXP_DONE 0, killed $vgpr0, killed $vgpr1, killed $vgpr2, killed $vgpr3, -1, -1, 15, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 1056964608, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 1065353216, implicit $exec
|
||||
|
|
|
|||
|
|
@ -230,17 +230,17 @@ name: vmem_gt_8dw_store
|
|||
|
||||
body: |
|
||||
bb.0:
|
||||
BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
|
||||
BUFFER_STORE_DWORDX3_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORDX3_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
|
||||
BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
|
||||
BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORDX4_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
|
||||
BUFFER_STORE_FORMAT_XYZ_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_FORMAT_XYZ_OFFSET $vgpr2_vgpr3_vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
|
||||
BUFFER_STORE_FORMAT_XYZW_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_FORMAT_XYZW_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
|
||||
BUFFER_ATOMIC_CMPSWAP_X2_OFFSET $vgpr2_vgpr3_vgpr4_vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, implicit $exec
|
||||
$vgpr3 = V_MOV_B32_e32 0, implicit $exec
|
||||
|
|
@ -553,10 +553,10 @@ body: |
|
|||
dead $sgpr6_sgpr7 = KILL $sgpr4_sgpr5
|
||||
$sgpr8 = S_MOV_B32 $sgpr5
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr + 4)
|
||||
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr + 4)
|
||||
$sgpr8 = S_MOV_B32 $sgpr4, implicit killed $sgpr4_sgpr5
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr8, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr)
|
||||
BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr9, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.A.addr)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ body: |
|
|||
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
||||
|
||||
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
||||
S_BRANCH %bb.3
|
||||
|
||||
|
|
@ -72,7 +72,7 @@ body: |
|
|||
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
||||
|
||||
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
|
||||
bb.3.done:
|
||||
|
|
@ -80,7 +80,7 @@ body: |
|
|||
|
||||
$sgpr3 = S_MOV_B32 61440
|
||||
$sgpr2 = S_MOV_B32 -1
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ body: |
|
|||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
|
|
@ -24,7 +24,7 @@ name: hazard_buf_branch_lds
|
|||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
|
|
@ -56,11 +56,11 @@ name: no_hazard_buf_branch_buf
|
|||
body: |
|
||||
bb.0:
|
||||
successors: %bb.1
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
|
|
@ -75,7 +75,7 @@ body: |
|
|||
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
|
|
@ -87,7 +87,7 @@ name: no_hazard_lds_branch_buf_samebb
|
|||
body: |
|
||||
bb.0:
|
||||
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
|
|
@ -101,7 +101,7 @@ body: |
|
|||
bb.0:
|
||||
successors: %bb.0
|
||||
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_BRANCH %bb.0
|
||||
...
|
||||
|
||||
|
|
@ -118,8 +118,8 @@ body: |
|
|||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
|
|
@ -137,7 +137,7 @@ body: |
|
|||
|
||||
bb.1:
|
||||
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
|
|
@ -150,11 +150,11 @@ body: |
|
|||
bb.0:
|
||||
successors: %bb.1
|
||||
$vgpr1 = DS_READ_B32 undef $vgpr0, 0, 0, implicit $m0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
|
|
@ -171,7 +171,7 @@ body: |
|
|||
|
||||
bb.1:
|
||||
S_WAITCNT_VSCNT undef $sgpr_null, 1
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
|
|
@ -189,7 +189,7 @@ body: |
|
|||
|
||||
bb.1:
|
||||
S_WAITCNT_VSCNT undef $sgpr_null, 0
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
|
|
@ -206,7 +206,7 @@ body: |
|
|||
|
||||
bb.1:
|
||||
S_WAITCNT_VSCNT undef $sgpr0, 0
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
|
|
@ -223,7 +223,7 @@ body: |
|
|||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
|
|
|
|||
|
|
@ -400,6 +400,46 @@ main_body:
|
|||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}raw_buffer_load_x1_offset_merged:
|
||||
;CHECK-NEXT: %bb.
|
||||
;CHECK-NEXT: buffer_load_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
|
||||
;CHECK-NEXT: buffer_load_dwordx2 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:28
|
||||
;CHECK: s_waitcnt
|
||||
define amdgpu_ps void @raw_buffer_load_x1_offset_merged(<4 x i32> inreg %rsrc) {
|
||||
main_body:
|
||||
%r1 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4, i32 0, i32 0)
|
||||
%r2 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 8, i32 0, i32 0)
|
||||
%r3 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 12, i32 0, i32 0)
|
||||
%r4 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 16, i32 0, i32 0)
|
||||
%r5 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 28, i32 0, i32 0)
|
||||
%r6 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 32, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}raw_buffer_load_x1_offset_swizzled_not_merged:
|
||||
;CHECK-NEXT: %bb.
|
||||
;CHECK-NEXT: buffer_load_dword v{{[0-9]}}, off, s[0:3], 0 offset:4
|
||||
;CHECK-NEXT: buffer_load_dword v{{[0-9]}}, off, s[0:3], 0 offset:8
|
||||
;CHECK-NEXT: buffer_load_dword v{{[0-9]}}, off, s[0:3], 0 offset:12
|
||||
;CHECK-NEXT: buffer_load_dword v{{[0-9]}}, off, s[0:3], 0 offset:16
|
||||
;CHECK-NEXT: buffer_load_dword v{{[0-9]}}, off, s[0:3], 0 offset:28
|
||||
;CHECK-NEXT: buffer_load_dword v{{[0-9]}}, off, s[0:3], 0 offset:32
|
||||
;CHECK: s_waitcnt
|
||||
define amdgpu_ps void @raw_buffer_load_x1_offset_swizzled_not_merged(<4 x i32> inreg %rsrc) {
|
||||
main_body:
|
||||
%r1 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 4, i32 0, i32 8)
|
||||
%r2 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 8, i32 0, i32 8)
|
||||
%r3 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 12, i32 0, i32 8)
|
||||
%r4 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 16, i32 0, i32 8)
|
||||
%r5 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 28, i32 0, i32 8)
|
||||
%r6 = call float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32> %rsrc, i32 32, i32 0, i32 8)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r1, float %r2, float %r3, float %r4, i1 true, i1 true)
|
||||
call void @llvm.amdgcn.exp.f32(i32 0, i32 15, float %r5, float %r6, float undef, float undef, i1 true, i1 true)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare float @llvm.amdgcn.raw.buffer.load.f32(<4 x i32>, i32, i32, i32) #0
|
||||
declare <2 x float> @llvm.amdgcn.raw.buffer.load.v2f32(<4 x i32>, i32, i32, i32) #0
|
||||
declare <4 x float> @llvm.amdgcn.raw.buffer.load.v4f32(<4 x i32>, i32, i32, i32) #0
|
||||
|
|
|
|||
|
|
@ -276,6 +276,37 @@ main_body:
|
|||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}raw_buffer_store_x1_offset_merged:
|
||||
;CHECK-NOT: s_waitcnt
|
||||
;CHECK-DAG: buffer_store_dwordx4 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:4
|
||||
;CHECK-DAG: buffer_store_dwordx2 v[{{[0-9]}}:{{[0-9]}}], off, s[0:3], 0 offset:28
|
||||
define amdgpu_ps void @raw_buffer_store_x1_offset_merged(<4 x i32> inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 8, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 12, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 16, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 28, i32 0, i32 0)
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 32, i32 0, i32 0)
|
||||
ret void
|
||||
}
|
||||
|
||||
;CHECK-LABEL: {{^}}raw_buffer_store_x1_offset_swizzled_not_merged:
|
||||
;CHECK-DAG: buffer_store_dword v{{[0-9]}}, off, s[0:3], 0 offset:4
|
||||
;CHECK-DAG: buffer_store_dword v{{[0-9]}}, off, s[0:3], 0 offset:8
|
||||
;CHECK-DAG: buffer_store_dword v{{[0-9]}}, off, s[0:3], 0 offset:12
|
||||
;CHECK-DAG: buffer_store_dword v{{[0-9]}}, off, s[0:3], 0 offset:16
|
||||
;CHECK-DAG: buffer_store_dword v{{[0-9]}}, off, s[0:3], 0 offset:28
|
||||
;CHECK-DAG: buffer_store_dword v{{[0-9]}}, off, s[0:3], 0 offset:32
|
||||
define amdgpu_ps void @raw_buffer_store_x1_offset_swizzled_not_merged(<4 x i32> inreg %rsrc, float %v1, float %v2, float %v3, float %v4, float %v5, float %v6) {
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %v1, <4 x i32> %rsrc, i32 4, i32 0, i32 8)
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %v2, <4 x i32> %rsrc, i32 8, i32 0, i32 8)
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %v3, <4 x i32> %rsrc, i32 12, i32 0, i32 8)
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %v4, <4 x i32> %rsrc, i32 16, i32 0, i32 8)
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %v5, <4 x i32> %rsrc, i32 28, i32 0, i32 8)
|
||||
call void @llvm.amdgcn.raw.buffer.store.f32(float %v6, <4 x i32> %rsrc, i32 32, i32 0, i32 8)
|
||||
ret void
|
||||
}
|
||||
|
||||
declare void @llvm.amdgcn.raw.buffer.store.f32(float, <4 x i32>, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.raw.buffer.store.v2f32(<2 x float>, <4 x i32>, i32, i32, i32) #0
|
||||
declare void @llvm.amdgcn.raw.buffer.store.v4f32(<4 x float>, <4 x i32>, i32, i32, i32) #0
|
||||
|
|
|
|||
|
|
@ -86,7 +86,7 @@ body: |
|
|||
$sgpr7 = S_MOV_B32 61440
|
||||
$sgpr6 = S_MOV_B32 0
|
||||
S_WAITCNT 127
|
||||
$vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed $vgpr1_vgpr2, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 8 from %ir.tid.gep)
|
||||
$vgpr1_vgpr2 = BUFFER_LOAD_DWORDX2_ADDR64 killed $vgpr1_vgpr2, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile load 8 from %ir.tid.gep)
|
||||
$vgpr0 = V_XOR_B32_e32 1, killed $vgpr0, implicit $exec
|
||||
V_CMP_NE_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
|
||||
$sgpr2_sgpr3 = S_AND_SAVEEXEC_B64 killed $vcc, implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
|
|
|
|||
|
|
@ -23,13 +23,13 @@ body: |
|
|||
$sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
|
||||
$sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
|
||||
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
|
||||
S_WAITCNT 127
|
||||
S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc
|
||||
S_WAITCNT 3855
|
||||
$vgpr0 = V_MOV_B32_e32 2, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 32772, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into `i32 addrspace(5)* undef`)
|
||||
S_CBRANCH_SCC0 %bb.1, implicit killed $scc
|
||||
|
||||
bb.2:
|
||||
|
|
@ -55,7 +55,7 @@ body: |
|
|||
S_WAITCNT 127
|
||||
$sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
|
||||
$vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (load syncscope("agent-one-as") unordered 4 from `i32 addrspace(1)* undef`), (load syncscope("workgroup-one-as") seq_cst 4 from `[8192 x i32] addrspace(5)* undef`)
|
||||
$vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
|
||||
$vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
|
||||
S_WAITCNT 3952
|
||||
|
|
|
|||
|
|
@ -117,13 +117,13 @@ body: |
|
|||
$sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
|
||||
$sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
|
||||
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
|
||||
S_WAITCNT 127
|
||||
S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc
|
||||
S_WAITCNT 3855
|
||||
$vgpr0 = V_MOV_B32_e32 2, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 32772, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
|
||||
BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
|
||||
S_CBRANCH_SCC0 %bb.1.if, implicit killed $scc
|
||||
|
||||
bb.2.else:
|
||||
|
|
@ -149,7 +149,7 @@ body: |
|
|||
S_WAITCNT 127
|
||||
$sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
|
||||
$vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (non-temporal load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (non-temporal load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
|
||||
$vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
|
||||
$vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
|
||||
S_WAITCNT 3952
|
||||
|
|
|
|||
|
|
@ -97,13 +97,13 @@ body: |
|
|||
$sgpr10 = S_MOV_B32 4294967295, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
|
||||
$sgpr11 = S_MOV_B32 15204352, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
|
||||
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr01)
|
||||
S_WAITCNT 127
|
||||
S_CMP_LG_U32 killed $sgpr2, 0, implicit-def $scc
|
||||
S_WAITCNT 3855
|
||||
$vgpr0 = V_MOV_B32_e32 2, implicit $exec
|
||||
$vgpr1 = V_MOV_B32_e32 32772, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
|
||||
BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.scratchptr12)
|
||||
S_CBRANCH_SCC0 %bb.1.if, implicit killed $scc
|
||||
|
||||
bb.2.else:
|
||||
|
|
@ -129,7 +129,7 @@ body: |
|
|||
S_WAITCNT 127
|
||||
$sgpr0 = S_LSHL_B32 killed $sgpr0, 2, implicit-def dead $scc
|
||||
$vgpr0 = V_ADD_I32_e32 killed $sgpr0, killed $vgpr0, implicit-def dead $vcc, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr0, killed $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %ir.else_ptr), (non-temporal load 4 from %ir.if_ptr)
|
||||
$vgpr1 = V_MOV_B32_e32 $sgpr4, implicit $exec, implicit-def $vgpr1_vgpr2, implicit $sgpr4_sgpr5
|
||||
$vgpr2 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $sgpr4_sgpr5, implicit $exec
|
||||
S_WAITCNT 3952
|
||||
|
|
|
|||
|
|
@ -337,7 +337,7 @@ body: |
|
|||
# GCN: dead early-clobber %4:vreg_128, dead early-clobber %3:vreg_128, dead early-clobber %5:vgpr_32 = BUNDLE %0, %2, %1, implicit $exec {
|
||||
# GCN-NEXT: dead %3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# GCN-NEXT: dead %4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
|
||||
# GCN-NEXT: dead %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# GCN-NEXT: dead %5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# GCN-NEXT: }
|
||||
|
||||
---
|
||||
|
|
@ -357,7 +357,7 @@ body: |
|
|||
%2 = IMPLICIT_DEF
|
||||
%3:vreg_128 = IMAGE_SAMPLE_LZ_V4_V2 %0, %1, %2, 15, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%4:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, 0, 0, implicit $exec
|
||||
%5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%5:vgpr_32 = BUFFER_LOAD_DWORD_ADDR64 %0, %2, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
# GCN-LABEL: {{^}}name: atomic{{$}}
|
||||
|
|
|
|||
|
|
@ -169,10 +169,10 @@ body: |
|
|||
---
|
||||
# CHECK-LABEL: merge_mmos
|
||||
# CHECK: S_BUFFER_LOAD_DWORDX2_IMM %0, 0, 0, 0 :: (dereferenceable invariant load 8, align 4)
|
||||
# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 4)
|
||||
# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 4)
|
||||
# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 64, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from %ir.ptr_addr1 + 64, align 4
|
||||
# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 64, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into %ir.ptr_addr1 + 64, align 4
|
||||
# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8, align 4)
|
||||
# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8, align 4)
|
||||
# CHECK: BUFFER_LOAD_DWORDX2_OFFSET %0, 0, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 8 from %ir.ptr_addr1 + 64, align 4
|
||||
# CHECK: BUFFER_STORE_DWORDX2_OFFSET_exact killed %{{[0-9]+}}, %0, 0, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 8 into %ir.ptr_addr1 + 64, align 4
|
||||
name: merge_mmos
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
|
|
@ -182,14 +182,14 @@ body: |
|
|||
%0:sreg_128 = COPY $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
%1:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0, 0, 0, 0 :: (dereferenceable invariant load 4)
|
||||
%2:sreg_32_xm0_xexec = S_BUFFER_LOAD_DWORD_IMM %0, 1, 0, 0 :: (dereferenceable invariant load 4)
|
||||
%3:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4)
|
||||
%4:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 4, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4)
|
||||
BUFFER_STORE_DWORD_OFFSET_exact %3, %0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4)
|
||||
BUFFER_STORE_DWORD_OFFSET_exact %4, %0, 0, 4, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4)
|
||||
%5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 64, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from %ir.ptr_addr1 + 64)
|
||||
%6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 68, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from %ir.ptr_addr1 + 68)
|
||||
BUFFER_STORE_DWORD_OFFSET_exact %5, %0, 0, 64, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.ptr_addr1 + 64)
|
||||
BUFFER_STORE_DWORD_OFFSET_exact %6, %0, 0, 68, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.ptr_addr1 + 68)
|
||||
%3:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4)
|
||||
%4:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4)
|
||||
BUFFER_STORE_DWORD_OFFSET_exact %3, %0, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4)
|
||||
BUFFER_STORE_DWORD_OFFSET_exact %4, %0, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4)
|
||||
%5:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from %ir.ptr_addr1 + 64)
|
||||
%6:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %0, 0, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable load 4 from %ir.ptr_addr1 + 68)
|
||||
BUFFER_STORE_DWORD_OFFSET_exact %5, %0, 0, 64, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.ptr_addr1 + 64)
|
||||
BUFFER_STORE_DWORD_OFFSET_exact %6, %0, 0, 68, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable store 4 into %ir.ptr_addr1 + 68)
|
||||
|
||||
S_ENDPGM 0
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@
|
|||
# W64: [[CMP1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec
|
||||
# W64: [[CMP:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc
|
||||
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
|
||||
# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
# W64-LABEL bb.2:
|
||||
|
|
@ -47,7 +47,7 @@
|
|||
# W32: [[CMP1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec
|
||||
# W32: [[CMP:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[CMP0]], [[CMP1]], implicit-def $scc
|
||||
# W32: [[TMPEXEC:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# TODO: S_XOR_B32_term should be `implicit-def $scc`
|
||||
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
|
||||
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
|
|
@ -72,7 +72,7 @@ body: |
|
|||
%1:vgpr_32 = COPY $vgpr1
|
||||
%0:vgpr_32 = COPY $vgpr0
|
||||
%6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
|
||||
%7:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%7:vgpr_32 = BUFFER_LOAD_FORMAT_X_IDXEN %4, killed %6, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$sgpr30_sgpr31 = COPY %5
|
||||
$vgpr0 = COPY %7
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
|
||||
|
|
@ -94,7 +94,7 @@ body: |
|
|||
# W64: [[CMP1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec
|
||||
# W64: [[CMP:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc
|
||||
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
|
||||
# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
# W64-LABEL bb.2:
|
||||
|
|
@ -116,7 +116,7 @@ body: |
|
|||
# W32: [[CMP1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec
|
||||
# W32: [[CMP:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[CMP0]], [[CMP1]], implicit-def $scc
|
||||
# W32: [[TMPEXEC:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# TODO: S_XOR_B32_term should be `implicit-def $scc`
|
||||
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
|
||||
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
|
|
@ -141,7 +141,7 @@ body: |
|
|||
%1:vgpr_32 = COPY $vgpr1
|
||||
%0:vgpr_32 = COPY $vgpr0
|
||||
%6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
|
||||
%7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFEN %4, killed %6, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$sgpr30_sgpr31 = COPY %5
|
||||
$vgpr0 = COPY %7
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
|
||||
|
|
@ -163,7 +163,7 @@ body: |
|
|||
# W64: [[CMP1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec
|
||||
# W64: [[CMP:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc
|
||||
# W64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# W64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# W64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
|
||||
# W64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
# W64-LABEL bb.2:
|
||||
|
|
@ -185,7 +185,7 @@ body: |
|
|||
# W32: [[CMP1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec
|
||||
# W32: [[CMP:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[CMP0]], [[CMP1]], implicit-def $scc
|
||||
# W32: [[TMPEXEC:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# TODO: S_XOR_B32_term should be `implicit-def $scc`
|
||||
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
|
||||
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
|
|
@ -210,7 +210,7 @@ body: |
|
|||
%1:vgpr_32 = COPY $vgpr1
|
||||
%0:vgpr_32 = COPY $vgpr0
|
||||
%6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
|
||||
%7:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%7:vgpr_32 = BUFFER_LOAD_FORMAT_X_BOTHEN %4, killed %6, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$sgpr30_sgpr31 = COPY %5
|
||||
$vgpr0 = COPY %7
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
|
||||
|
|
@ -226,7 +226,7 @@ body: |
|
|||
# ADDR64: %9:vgpr_32, %12:sreg_64_xexec = V_ADD_I32_e64 %14.sub0, %4.sub0, 0, implicit $exec
|
||||
# ADDR64: %10:vgpr_32, dead %13:sreg_64_xexec = V_ADDC_U32_e64 %14.sub1, %4.sub1, killed %12, 0, implicit $exec
|
||||
# ADDR64: %11:vreg_64 = REG_SEQUENCE %9, %subreg.sub0, %10, %subreg.sub1
|
||||
# ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %11, killed %18, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %11, killed %18, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
---
|
||||
name: addr64
|
||||
liveins:
|
||||
|
|
@ -246,7 +246,7 @@ body: |
|
|||
%1:vgpr_32 = COPY $vgpr1
|
||||
%0:vgpr_32 = COPY $vgpr0
|
||||
%6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
|
||||
%7:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %4, killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%7:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 %4, killed %6, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$sgpr30_sgpr31 = COPY %5
|
||||
$vgpr0 = COPY %7
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
|
||||
|
|
@ -269,7 +269,7 @@ body: |
|
|||
# W64-NO-ADDR64: [[CMP1:%[0-9]+]]:sreg_64_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec
|
||||
# W64-NO-ADDR64: [[CMP:%[0-9]+]]:sreg_64_xexec = S_AND_B64 [[CMP0]], [[CMP1]], implicit-def $scc
|
||||
# W64-NO-ADDR64: [[TMPEXEC:%[0-9]+]]:sreg_64_xexec = S_AND_SAVEEXEC_B64 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
# W64-NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# W64-NO-ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# W64-NO-ADDR64: $exec = S_XOR_B64_term $exec, [[TMPEXEC]], implicit-def $scc
|
||||
# W64-NO-ADDR64: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
# W64-NO-ADDR64-LABEL bb.2:
|
||||
|
|
@ -289,7 +289,7 @@ body: |
|
|||
# W32: [[CMP1:%[0-9]+]]:sreg_32_xm0_xexec = V_CMP_EQ_U64_e64 [[SRSRC]].sub2_sub3, [[VRSRC]].sub2_sub3, implicit $exec
|
||||
# W32: [[CMP:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_B32 [[CMP0]], [[CMP1]], implicit-def $scc
|
||||
# W32: [[TMPEXEC:%[0-9]+]]:sreg_32_xm0_xexec = S_AND_SAVEEXEC_B32 killed [[CMP]], implicit-def $exec, implicit-def $scc, implicit $exec
|
||||
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# W32: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed [[SRSRC]], 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# TODO: S_XOR_B32_term should be `implicit-def $scc`
|
||||
# W32: $exec_lo = S_XOR_B32_term $exec_lo, [[TMPEXEC]]
|
||||
# W32: S_CBRANCH_EXECNZ %bb.1, implicit $exec
|
||||
|
|
@ -303,7 +303,7 @@ body: |
|
|||
# ADDR64: [[RSRCFMTHI:%[0-9]+]]:sgpr_32 = S_MOV_B32 61440
|
||||
# ADDR64: [[ZERORSRC:%[0-9]+]]:sreg_128 = REG_SEQUENCE [[ZERO64]], %subreg.sub0_sub1, [[RSRCFMTLO]], %subreg.sub2, [[RSRCFMTHI]], %subreg.sub3
|
||||
# ADDR64: [[VADDR64:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[RSRCPTR]].sub0, %subreg.sub0, [[RSRCPTR]].sub1, %subreg.sub1
|
||||
# ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[VADDR64]], [[ZERORSRC]], 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# ADDR64: {{[0-9]+}}:vgpr_32 = BUFFER_LOAD_FORMAT_X_ADDR64 [[VADDR64]], [[ZERORSRC]], 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
---
|
||||
name: offset
|
||||
|
|
@ -324,7 +324,7 @@ body: |
|
|||
%1:vgpr_32 = COPY $vgpr1
|
||||
%0:vgpr_32 = COPY $vgpr0
|
||||
%6:sreg_128 = REG_SEQUENCE %0, %subreg.sub0, %1, %subreg.sub1, %2, %subreg.sub2, %3, %subreg.sub3
|
||||
%7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed %6, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%7:vgpr_32 = BUFFER_LOAD_FORMAT_X_OFFSET killed %6, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$sgpr30_sgpr31 = COPY %5
|
||||
$vgpr0 = COPY %7
|
||||
S_SETPC_B64_return $sgpr30_sgpr31, implicit $vgpr0
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ name: hazard_image_sample_d_buf_off6
|
|||
body: |
|
||||
bb.0:
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: no_hazard_image_sample_d_buf_off1
|
||||
|
|
@ -20,7 +20,7 @@ name: no_hazard_image_sample_d_buf_off1
|
|||
body: |
|
||||
bb.0:
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 1, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 1, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
# GCN-LABEL: name: no_hazard_image_sample_d_buf_far
|
||||
|
|
@ -33,7 +33,7 @@ body: |
|
|||
bb.0:
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_D_V4_V9_nsa_gfx10 undef $vgpr3, undef $vgpr8, undef $vgpr7, undef $vgpr5, undef $vgpr4, undef $vgpr6, undef $vgpr0, undef $vgpr2, undef $vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
V_NOP_e32 implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
# Non-NSA
|
||||
|
|
@ -45,7 +45,7 @@ name: no_hazard_image_sample_v4_v2_buf_off6
|
|||
body: |
|
||||
bb.0:
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V2_gfx10 undef $vgpr1_vgpr2, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 1, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
|
||||
# Less than 4 dwords
|
||||
|
|
@ -57,5 +57,5 @@ name: no_hazard_image_sample_v4_v3_buf_off6
|
|||
body: |
|
||||
bb.0:
|
||||
$vgpr0_vgpr1_vgpr2_vgpr3 = IMAGE_SAMPLE_V4_V3_nsa_gfx10 undef $vgpr1, undef $vgpr2, undef $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7, undef $sgpr8_sgpr9_sgpr10_sgpr11, 15, 2, 0, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFSET undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr4, 6, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
|
|
|
|||
|
|
@ -137,7 +137,7 @@ body: |
|
|||
%28 = REG_SEQUENCE %6, 17, killed %27, 18
|
||||
%29 = V_MOV_B32_e32 0, implicit $exec
|
||||
%30 = COPY %24
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.2.bb2:
|
||||
SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
|
|
@ -243,7 +243,7 @@ body: |
|
|||
%37 = REG_SEQUENCE %6, 17, killed %36, 18
|
||||
%38 = V_MOV_B32_e32 0, implicit $exec
|
||||
%39 = COPY %33
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %38, killed %39, killed %37, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %38, killed %39, killed %37, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.2.bb2:
|
||||
SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
|
|
@ -332,7 +332,7 @@ body: |
|
|||
%28 = REG_SEQUENCE %6, 17, killed %27, 18
|
||||
%29 = V_MOV_B32_e32 0, implicit $exec
|
||||
%30 = COPY %24
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %29, killed %30, killed %28, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.2.bb2:
|
||||
SI_END_CF %1, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
|
|
|
|||
|
|
@ -151,7 +151,7 @@ body: |
|
|||
|
||||
$sgpr7 = S_MOV_B32 61440
|
||||
$sgpr6 = S_MOV_B32 -1
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.2.end:
|
||||
liveins: $vgpr0, $sgpr0_sgpr1
|
||||
|
|
@ -159,7 +159,7 @@ body: |
|
|||
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$sgpr3 = S_MOV_B32 61440
|
||||
$sgpr2 = S_MOV_B32 -1
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -188,7 +188,7 @@ body: |
|
|||
|
||||
$sgpr7 = S_MOV_B32 61440
|
||||
$sgpr6 = S_MOV_B32 -1
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.2.end:
|
||||
liveins: $vgpr0, $sgpr0_sgpr1
|
||||
|
|
@ -196,7 +196,7 @@ body: |
|
|||
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$sgpr3 = S_MOV_B32 61440
|
||||
$sgpr2 = S_MOV_B32 -1
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -225,7 +225,7 @@ body: |
|
|||
|
||||
$sgpr7 = S_MOV_B32 61440
|
||||
$sgpr6 = S_MOV_B32 -1
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.2.end:
|
||||
liveins: $vgpr0, $sgpr0_sgpr1
|
||||
|
|
@ -233,14 +233,14 @@ body: |
|
|||
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$sgpr3 = S_MOV_B32 61440
|
||||
$sgpr2 = S_MOV_B32 -1
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
---
|
||||
# CHECK-LABEL: name: optimize_if_and_saveexec_xor_valu_middle
|
||||
# CHECK: $sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
||||
# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# CHECK-NEXT: $sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
# CHECK-NEXT: $exec = COPY killed $sgpr2_sgpr3
|
||||
# CHECK-NEXT: SI_MASK_BRANCH
|
||||
|
|
@ -255,7 +255,7 @@ body: |
|
|||
$vcc = V_CMP_EQ_I32_e64 0, killed $vgpr0, implicit $exec
|
||||
$vgpr0 = V_MOV_B32_e32 4, implicit $exec
|
||||
$sgpr2_sgpr3 = S_AND_B64 $sgpr0_sgpr1, killed $vcc, implicit-def $scc
|
||||
BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET $vgpr0, undef $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$sgpr0_sgpr1 = S_XOR_B64 $sgpr2_sgpr3, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$exec = S_MOV_B64_term killed $sgpr2_sgpr3
|
||||
SI_MASK_BRANCH %bb.2, implicit $exec
|
||||
|
|
@ -266,7 +266,7 @@ body: |
|
|||
|
||||
$sgpr7 = S_MOV_B32 61440
|
||||
$sgpr6 = S_MOV_B32 -1
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.2.end:
|
||||
liveins: $vgpr0, $sgpr0_sgpr1
|
||||
|
|
@ -274,7 +274,7 @@ body: |
|
|||
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$sgpr3 = S_MOV_B32 61440
|
||||
$sgpr2 = S_MOV_B32 -1
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -304,7 +304,7 @@ body: |
|
|||
|
||||
bb.1.if:
|
||||
liveins: $sgpr0_sgpr1 , $sgpr4_sgpr5_sgpr6_sgpr7
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.2.end:
|
||||
liveins: $vgpr0, $sgpr0_sgpr1, $sgpr4_sgpr5_sgpr6_sgpr7
|
||||
|
|
@ -312,7 +312,7 @@ body: |
|
|||
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$sgpr3 = S_MOV_B32 61440
|
||||
$sgpr2 = S_MOV_B32 -1
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -346,7 +346,7 @@ body: |
|
|||
|
||||
$sgpr7 = S_MOV_B32 61440
|
||||
$sgpr6 = S_MOV_B32 -1
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.2.end:
|
||||
liveins: $vgpr0, $sgpr0_sgpr1
|
||||
|
|
@ -356,7 +356,7 @@ body: |
|
|||
$sgpr1 = S_MOV_B32 1
|
||||
$sgpr2 = S_MOV_B32 -1
|
||||
$sgpr3 = S_MOV_B32 61440
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -387,7 +387,7 @@ body: |
|
|||
S_SLEEP 0, implicit $sgpr2_sgpr3
|
||||
$sgpr7 = S_MOV_B32 61440
|
||||
$sgpr6 = S_MOV_B32 -1
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.2.end:
|
||||
liveins: $vgpr0, $sgpr0_sgpr1
|
||||
|
|
@ -395,7 +395,7 @@ body: |
|
|||
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$sgpr3 = S_MOV_B32 61440
|
||||
$sgpr2 = S_MOV_B32 -1
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -426,7 +426,7 @@ body: |
|
|||
|
||||
$sgpr7 = S_MOV_B32 61440
|
||||
$sgpr6 = S_MOV_B32 -1
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.2.end:
|
||||
liveins: $vgpr0, $sgpr0_sgpr1
|
||||
|
|
@ -434,7 +434,7 @@ body: |
|
|||
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$sgpr3 = S_MOV_B32 61440
|
||||
$sgpr2 = S_MOV_B32 -1
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -463,7 +463,7 @@ body: |
|
|||
|
||||
$sgpr7 = S_MOV_B32 61440
|
||||
$sgpr6 = S_MOV_B32 -1
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.2.end:
|
||||
liveins: $vgpr0, $sgpr0_sgpr1
|
||||
|
|
@ -471,7 +471,7 @@ body: |
|
|||
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$sgpr3 = S_MOV_B32 61440
|
||||
$sgpr2 = S_MOV_B32 -1
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -500,7 +500,7 @@ body: |
|
|||
|
||||
$sgpr7 = S_MOV_B32 61440
|
||||
$sgpr6 = S_MOV_B32 -1
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.2.end:
|
||||
liveins: $vgpr0, $sgpr0_sgpr1
|
||||
|
|
@ -508,7 +508,7 @@ body: |
|
|||
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$sgpr3 = S_MOV_B32 61440
|
||||
$sgpr2 = S_MOV_B32 -1
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -539,7 +539,7 @@ body: |
|
|||
|
||||
$sgpr7 = S_MOV_B32 61440
|
||||
$sgpr6 = S_MOV_B32 -1
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.2.end:
|
||||
liveins: $vgpr0, $sgpr0_sgpr1
|
||||
|
|
@ -547,6 +547,6 @@ body: |
|
|||
$exec = S_OR_B64 $exec, killed $sgpr0_sgpr1, implicit-def $scc
|
||||
$sgpr3 = S_MOV_B32 61440
|
||||
$sgpr2 = S_MOV_B32 -1
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
|
|
|||
|
|
@ -27,12 +27,12 @@ body: |
|
|||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: liveins: $sgpr33, $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: $sgpr4 = S_ADD_U32 $sgpr32, 524288, implicit-def $scc
|
||||
; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
|
||||
; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
|
||||
; CHECK: S_BRANCH %bb.1
|
||||
; CHECK: bb.1:
|
||||
; CHECK: liveins: $sgpr0_sgpr1_sgpr2_sgpr3
|
||||
; CHECK: $sgpr4 = S_ADD_U32 $sgpr32, 524288, implicit-def $scc
|
||||
; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
|
||||
; CHECK: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec :: (load 4 from %stack.0, align 8192, addrspace 5)
|
||||
; CHECK: S_ENDPGM 0, implicit $vgpr0
|
||||
bb.0:
|
||||
$vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ body: |
|
|||
%15:sreg_32_xm0 = S_MOV_B32 61440
|
||||
%16:sreg_32_xm0 = S_MOV_B32 -1
|
||||
%17:sreg_128 = REG_SEQUENCE undef %14:sreg_32_xm0, %subreg.sub0, undef %12:sreg_32_xm0, %subreg.sub1, %16, %subreg.sub2, %15, %subreg.sub3
|
||||
BUFFER_STORE_DWORD_OFFSET %4, %17, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
BUFFER_STORE_DWORD_OFFSET %4, %17, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`, addrspace 1)
|
||||
%19:vgpr_32 = COPY %4
|
||||
%20:sreg_64 = SI_IF %0, %bb.2, implicit-def dead $exec, implicit-def dead $scc, implicit $exec
|
||||
S_BRANCH %bb.3
|
||||
|
|
|
|||
|
|
@ -17,6 +17,6 @@ body: |
|
|||
S_BARRIER
|
||||
$agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = V_MFMA_F32_32X32X1F32 undef $vgpr0, undef $vgpr0, 0, 0, 0, 2, implicit $exec
|
||||
$vgpr0 = V_ACCVGPR_READ_B32 $agpr31, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN killed $vgpr0, undef $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr6, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN killed $vgpr0, undef $vgpr0, $sgpr8_sgpr9_sgpr10_sgpr11, $sgpr6, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
...
|
||||
|
|
|
|||
|
|
@ -185,7 +185,7 @@ body: |
|
|||
bb.28:
|
||||
%9 = S_FF1_I32_B32 undef %10
|
||||
%13 = V_MAD_U32_U24 killed %9, 48, 32, 0, implicit $exec
|
||||
%45 = BUFFER_LOAD_DWORD_OFFEN killed %13, undef %15, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
|
||||
%45 = BUFFER_LOAD_DWORD_OFFEN killed %13, undef %15, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (dereferenceable invariant load 4)
|
||||
%46 = V_AND_B32_e32 1, killed %45, implicit $exec
|
||||
%21 = S_BUFFER_LOAD_DWORD_SGPR undef %22, undef %23, 0, 0 :: (dereferenceable invariant load 4)
|
||||
%25 = V_CMP_GE_F32_e64 0, 0, 0, killed %21, 0, implicit $exec
|
||||
|
|
|
|||
|
|
@ -70,7 +70,7 @@ body: |
|
|||
%13.sub2_sub3 = COPY killed %12
|
||||
%20 = V_LSHL_B64 killed %19, 2, implicit $exec
|
||||
%16 = COPY killed %5
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %16, killed %20, killed %13, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
|
||||
BUFFER_STORE_DWORD_ADDR64 killed %16, killed %20, killed %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
|||
|
|
@ -134,10 +134,10 @@ body: |
|
|||
%6.sub2 = COPY %6.sub0
|
||||
|
||||
bb.2:
|
||||
BUFFER_STORE_DWORD_OFFEN %6.sub3, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN %6.sub2, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN %6.sub1, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN %6.sub0, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN %6.sub3, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 12, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN %6.sub2, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 8, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN %6.sub1, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 4, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN %6.sub0, %0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$sgpr30_sgpr31 = COPY %5
|
||||
S_SETPC_B64_return $sgpr30_sgpr31
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ body: |
|
|||
; CHECK: [[COPY:%[0-9]+]]:vreg_512 = COPY %0
|
||||
; CHECK: bb.1:
|
||||
; CHECK: successors: %bb.1(0x80000000)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5)
|
||||
; CHECK: BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5)
|
||||
; CHECK: dead %6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec
|
||||
; CHECK: dead %8:vreg_64 = DS_READ_B64_gfx9 [[V_MOV_B32_e32_]], 0, 0, implicit $exec
|
||||
; CHECK: dead %9:vreg_128 = DS_READ_B128_gfx9 [[V_ADD_U32_e32_]], 0, 0, implicit $exec
|
||||
|
|
@ -52,7 +52,7 @@ body: |
|
|||
%4:vreg_512 = COPY %0
|
||||
|
||||
bb.1:
|
||||
BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5)
|
||||
BUFFER_STORE_DWORD_OFFEN %0.sub3, undef %5:vgpr_32, $sgpr24_sgpr25_sgpr26_sgpr27, $sgpr32, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4, align 8, addrspace 5)
|
||||
%6:vgpr_32 = DS_READ_B32_gfx9 undef %7:vgpr_32, 0, 0, implicit $exec
|
||||
%8:vreg_64 = DS_READ_B64_gfx9 %1, 0, 0, implicit $exec
|
||||
%9:vreg_128 = DS_READ_B128_gfx9 %2, 0, 0, implicit $exec
|
||||
|
|
|
|||
|
|
@ -279,10 +279,10 @@ body: |
|
|||
%80:vgpr_32 = IMPLICIT_DEF
|
||||
%81:vgpr_32 = IMPLICIT_DEF
|
||||
%84:vgpr_32 = IMPLICIT_DEF
|
||||
BUFFER_STORE_DWORD_OFFEN %84, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN %81, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN %80, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN %78, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN %84, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 108, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN %81, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 104, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN %80, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 100, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN %78, %stack.0.tmp5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr101, 96, 0, 0, 0, 0, 0, implicit $exec
|
||||
%85:vgpr_32 = IMPLICIT_DEF
|
||||
%86:vgpr_32 = IMPLICIT_DEF
|
||||
%87:vgpr_32 = IMPLICIT_DEF
|
||||
|
|
|
|||
|
|
@ -30,14 +30,14 @@ body: |
|
|||
%33.sub1:sgpr_128 = V_READFIRSTLANE_B32 %44.sub1, implicit $exec
|
||||
%33.sub2:sgpr_128 = V_READFIRSTLANE_B32 %45.sub2, implicit $exec
|
||||
%33.sub3:sgpr_128 = V_READFIRSTLANE_B32 %46.sub3, implicit $exec
|
||||
%15:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %33, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%15:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %33, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%39:vgpr_32 = V_MUL_LO_U32 %15, %15, implicit $exec
|
||||
|
||||
undef %27.sub0:sgpr_128 = V_READFIRSTLANE_B32 %26.sub0, implicit $exec
|
||||
%27.sub1:sgpr_128 = V_READFIRSTLANE_B32 %41.sub1, implicit $exec
|
||||
%27.sub2:sgpr_128 = V_READFIRSTLANE_B32 %42.sub2, implicit $exec
|
||||
%27.sub3:sgpr_128 = V_READFIRSTLANE_B32 %43.sub3, implicit $exec
|
||||
%19:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %27, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19:vgpr_32 = BUFFER_LOAD_DWORD_OFFSET %27, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%40:vgpr_32 = V_MUL_LO_U32 %19, %19, implicit $exec
|
||||
|
||||
%23:vgpr_32 = V_ADD_U32_e32 %39, %40, implicit $exec
|
||||
|
|
|
|||
|
|
@ -81,11 +81,11 @@ body: |
|
|||
%13 = REG_SEQUENCE killed %5, 17, %12, 18
|
||||
%28 = V_LSHL_B64 killed %27, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, implicit $exec
|
||||
%29, %9 = V_ADD_I32_e64 %19, %17, 0, implicit $exec
|
||||
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -165,11 +165,11 @@ body: |
|
|||
%13 = REG_SEQUENCE killed %5, 17, %12, 18
|
||||
%28 = V_LSHL_B64 killed %27, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, implicit $exec
|
||||
%29, %9 = V_SUB_I32_e64 %19, %17, 0, implicit $exec
|
||||
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -249,11 +249,11 @@ body: |
|
|||
%13 = REG_SEQUENCE killed %5, 17, %12, 18
|
||||
%28 = V_LSHL_B64 killed %27, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, implicit $exec
|
||||
%29, %9 = V_SUBREV_I32_e64 %19, %17, 0, implicit $exec
|
||||
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed %9, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %29, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -332,12 +332,12 @@ body: |
|
|||
%13 = REG_SEQUENCE killed %5, 17, %12, 18
|
||||
%28 = V_LSHL_B64 killed %27, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, implicit $exec
|
||||
%9 = S_MOV_B64 0
|
||||
%29, $vcc = V_ADDC_U32_e64 %19, %17, %9, 0, implicit $exec
|
||||
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -417,12 +417,12 @@ body: |
|
|||
%13 = REG_SEQUENCE killed %5, 17, %12, 18
|
||||
%28 = V_LSHL_B64 killed %27, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vcc = S_MOV_B64 0
|
||||
%29, $vcc = V_ADDC_U32_e64 %19, %17, $vcc, 0, implicit $exec
|
||||
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -502,11 +502,11 @@ body: |
|
|||
%13 = REG_SEQUENCE killed %5, 17, %12, 18
|
||||
%28 = V_LSHL_B64 killed %27, 2, implicit $exec
|
||||
%16 = REG_SEQUENCE killed %4, 17, %12, 18
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, implicit $exec
|
||||
%17 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
%19 = BUFFER_LOAD_DWORD_ADDR64 %28, %13, 0, 4, 0, 0, 0, 0, 0, implicit $exec
|
||||
%29, $vcc = V_ADDC_U32_e64 %19, %17, undef $vcc, 0, implicit $exec
|
||||
%24 = V_CNDMASK_B32_e64 0, 0, 0, 1, killed $vcc, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %24, %28, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ body: |
|
|||
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
||||
|
||||
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
S_BRANCH %bb.3
|
||||
|
||||
|
|
@ -96,7 +96,7 @@ body: |
|
|||
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
||||
|
||||
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
||||
|
||||
bb.3.done:
|
||||
|
|
@ -104,7 +104,7 @@ body: |
|
|||
|
||||
$sgpr3 = S_MOV_B32 61440
|
||||
$sgpr2 = S_MOV_B32 -1
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
@ -149,7 +149,7 @@ body: |
|
|||
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
||||
|
||||
$vgpr0 = V_MOV_B32_e32 9, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 0, implicit $exec
|
||||
S_BRANCH %bb.3
|
||||
|
||||
|
|
@ -157,7 +157,7 @@ body: |
|
|||
liveins: $sgpr6, $sgpr7, $sgpr0_sgpr1_sgpr2_sgpr3:0x00000003
|
||||
|
||||
$vgpr0 = V_MOV_B32_e32 100, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (volatile store 4 into `i32 addrspace(1)* undef`)
|
||||
$vgpr0 = V_MOV_B32_e32 1, implicit $exec
|
||||
|
||||
bb.3.done:
|
||||
|
|
@ -165,7 +165,7 @@ body: |
|
|||
|
||||
$sgpr3 = S_MOV_B32 61440
|
||||
$sgpr2 = S_MOV_B32 -1
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, killed $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out)
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ body: |
|
|||
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
|
||||
$sgpr4 = IMPLICIT_DEF
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$sgpr0 = S_MOV_B32 0
|
||||
...
|
||||
# GCN-LABEL: name: vmem_smem_write_sgpr
|
||||
|
|
@ -25,7 +25,7 @@ body: |
|
|||
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
|
||||
$sgpr4 = IMPLICIT_DEF
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$sgpr0 = S_LOAD_DWORD_IMM $sgpr0_sgpr1, 0, 0, 0
|
||||
...
|
||||
# GCN-LABEL: name: vmem_snop_write_sgpr
|
||||
|
|
@ -40,7 +40,7 @@ body: |
|
|||
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
|
||||
$sgpr4 = IMPLICIT_DEF
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_NOP 0
|
||||
$sgpr0 = S_MOV_B32 0
|
||||
...
|
||||
|
|
@ -55,7 +55,7 @@ body: |
|
|||
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
|
||||
$sgpr4 = IMPLICIT_DEF
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr2 = V_ADD_F32_e32 $vgpr1, $vgpr1, implicit $exec
|
||||
$sgpr0 = S_MOV_B32 0
|
||||
...
|
||||
|
|
@ -70,7 +70,7 @@ body: |
|
|||
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
|
||||
$sgpr4 = IMPLICIT_DEF
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_WAITCNT 0
|
||||
$sgpr0 = S_MOV_B32 0
|
||||
...
|
||||
|
|
@ -86,7 +86,7 @@ body: |
|
|||
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
|
||||
$sgpr4 = IMPLICIT_DEF
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_WAITCNT 1
|
||||
$sgpr0 = S_MOV_B32 0
|
||||
...
|
||||
|
|
@ -101,7 +101,7 @@ body: |
|
|||
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
|
||||
$sgpr4 = IMPLICIT_DEF
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$exec = S_MOV_B64 7
|
||||
...
|
||||
# GCN-LABEL: name: vmem_write_exec_expread
|
||||
|
|
@ -114,7 +114,7 @@ body: |
|
|||
bb.0:
|
||||
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $exec_lo, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $exec_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$exec = S_MOV_B64 7
|
||||
...
|
||||
# GCN-LABEL: name: ds_write_m0
|
||||
|
|
@ -143,7 +143,7 @@ body: |
|
|||
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
|
||||
$sgpr4 = IMPLICIT_DEF
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.1:
|
||||
$sgpr0 = S_MOV_B32 0
|
||||
|
|
@ -161,7 +161,7 @@ body: |
|
|||
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
|
||||
$sgpr4 = IMPLICIT_DEF
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
|
|
@ -181,7 +181,7 @@ body: |
|
|||
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
|
||||
$sgpr4 = IMPLICIT_DEF
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_BRANCH %bb.2
|
||||
|
||||
bb.1:
|
||||
|
|
@ -206,7 +206,7 @@ body: |
|
|||
$sgpr0 = S_MOV_B32 0
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr4, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_BRANCH %bb.0
|
||||
...
|
||||
# GCN-LABEL: name: ds_write_exec
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ body: |
|
|||
$vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
# GCN-LABEL: name: vmem_vcc_branch_to_next
|
||||
# GCN: bb.1:
|
||||
|
|
@ -40,7 +40,7 @@ body: |
|
|||
S_BRANCH %bb.1
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
# GCN-LABEL: name: vmem_vcc_fallthrough_no_hazard_too_far
|
||||
# GCN: bb.1:
|
||||
|
|
@ -61,7 +61,7 @@ body: |
|
|||
$sgpr0 = S_MOV_B32 0
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
# GCN-LABEL: name: vmem_vcc_fallthrough_no_hazard_nops
|
||||
# GCN: bb.1:
|
||||
|
|
@ -78,7 +78,7 @@ body: |
|
|||
S_NOP 4
|
||||
|
||||
bb.1:
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
# GCN-LABEL: name: vmem_vcc_branch_around
|
||||
# GCN: bb.2:
|
||||
|
|
@ -107,7 +107,7 @@ body: |
|
|||
S_NOP 0
|
||||
|
||||
bb.2:
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
# GCN-LABEL: name: vmem_vcc_branch_backedge
|
||||
# GCN: S_NOP
|
||||
|
|
@ -123,7 +123,7 @@ body: |
|
|||
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.1:
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
|
|
@ -156,7 +156,7 @@ body: |
|
|||
$vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
|
||||
|
||||
bb.2:
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
# GCN-LABEL: name: vmem_vcc_self_loop
|
||||
# GCN: S_NOP
|
||||
|
|
@ -172,7 +172,7 @@ body: |
|
|||
|
||||
$vgpr0 = IMPLICIT_DEF
|
||||
$sgpr0_sgpr1_sgpr2_sgpr3 = IMPLICIT_DEF
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = V_ADDC_U32_e32 $vgpr0, $vgpr0, implicit-def $vcc, implicit $vcc, implicit $exec
|
||||
S_BRANCH %bb.0
|
||||
...
|
||||
|
|
@ -198,7 +198,7 @@ body: |
|
|||
successors: %bb.1
|
||||
|
||||
$sgpr0 = S_MOV_B32 0
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
...
|
||||
|
|
@ -224,7 +224,7 @@ body: |
|
|||
successors: %bb.1
|
||||
|
||||
$sgpr0 = S_MOV_B32 0
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $vcc_lo, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = V_ADDC_U32_e32 $vgpr1, $vgpr1, implicit-def $vcc, implicit $vcc, implicit $exec
|
||||
S_BRANCH %bb.1
|
||||
...
|
||||
|
|
|
|||
|
|
@ -78,7 +78,7 @@ body: |
|
|||
|
||||
bb.1:
|
||||
successors: %bb.2
|
||||
BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, renamable $vgpr2, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, renamable $vgpr2, renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
bb.2:
|
||||
successors: %bb.3, %bb.6
|
||||
|
|
@ -86,7 +86,7 @@ body: |
|
|||
|
||||
bb.3:
|
||||
successors: %bb.4, %bb.5
|
||||
BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, killed renamable $vgpr2, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFEN_exact killed renamable $vgpr3, killed renamable $vgpr2, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_CBRANCH_VCCNZ %bb.5, implicit $vcc
|
||||
|
||||
bb.4:
|
||||
|
|
|
|||
|
|
@ -44,6 +44,6 @@ body: |
|
|||
$sgpr7 = S_MOV_B32 61440
|
||||
$sgpr6 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
|
|
|||
|
|
@ -44,6 +44,6 @@ body: |
|
|||
$sgpr7 = S_MOV_B32 61440
|
||||
$sgpr6 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@
|
|||
}
|
||||
...
|
||||
|
||||
# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1)
|
||||
# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1)
|
||||
---
|
||||
name: test1
|
||||
liveins:
|
||||
|
|
@ -56,14 +56,14 @@ body: |
|
|||
%5:vgpr_32 = COPY $vgpr0
|
||||
%6:vgpr_32 = COPY $vgpr1
|
||||
|
||||
BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 1, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
---
|
||||
name: test2
|
||||
liveins:
|
||||
|
|
@ -87,14 +87,14 @@ body: |
|
|||
%5:vgpr_32 = COPY $vgpr0
|
||||
%6:vgpr_32 = COPY $vgpr1
|
||||
|
||||
BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 1, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
# CHECK: BUFFER_STORE_DWORD_OFFSET %{{[0-9]+}}, %{{[0-9]+}}, 0, 8, 0, 0, 0, 1, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
---
|
||||
name: test3
|
||||
liveins:
|
||||
|
|
@ -118,13 +118,13 @@ body: |
|
|||
%5:vgpr_32 = COPY $vgpr0
|
||||
%6:vgpr_32 = COPY $vgpr1
|
||||
|
||||
BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 1, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 1, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1)
|
||||
# CHECK: BUFFER_STORE_DWORDX2_OFFSET killed %{{[0-9]+}}, %{{[0-9]+}}, 0, 4, 0, 0, 0, 1, 0, implicit $exec :: (store 8 into %ir.out.gep.1, align 4, addrspace 1)
|
||||
---
|
||||
name: test4
|
||||
liveins:
|
||||
|
|
@ -148,8 +148,8 @@ body: |
|
|||
%5:vgpr_32 = COPY $vgpr0
|
||||
%6:vgpr_32 = COPY $vgpr1
|
||||
|
||||
BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 1, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
BUFFER_STORE_DWORD_OFFSET %5, %4, 0, 4, 0, 0, 0, 1, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
BUFFER_STORE_DWORD_OFFSET %6, %4, 0, 8, 0, 0, 0, 1, 0, implicit $exec :: (store 4 into %ir.out.gep.1, addrspace 1)
|
||||
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
# CHECK-NEXT: %namedVReg1353:vreg_64 = REG_SEQUENCE %namedVReg4354, %subreg.sub0, %namedVReg1352, %subreg.sub1
|
||||
# CHECK-NEXT: %namedVReg1354:sgpr_128 = REG_SEQUENCE %namedVReg4354, %subreg.sub0, %namedVReg1352, %subreg.sub1, %namedVReg1358, %subreg.sub2, %namedVReg1359, %subreg.sub3
|
||||
# This tests for the itereator invalidation fix (reviews.llvm.org/D62713)
|
||||
# CHECK-NEXT: BUFFER_STORE_DWORD_ADDR64 %namedVReg1352, %namedVReg1353, %namedVReg1354, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
# CHECK-NEXT: BUFFER_STORE_DWORD_ADDR64 %namedVReg1352, %namedVReg1353, %namedVReg1354, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
...
|
||||
---
|
||||
name: foo
|
||||
|
|
@ -27,7 +27,7 @@ body: |
|
|||
%vreg123_3:vgpr_32 = COPY %5
|
||||
%16:sgpr_128 = REG_SEQUENCE killed %vreg123_0, %subreg.sub0, %vreg123_1, %subreg.sub1, %vreg123_2, %subreg.sub2, %vreg123_3, %subreg.sub3
|
||||
|
||||
BUFFER_STORE_DWORD_ADDR64 %vreg123_1, %27, killed %16, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_ADDR64 %vreg123_1, %27, killed %16, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
|
||||
...
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@
|
|||
# CHECK: scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
|
||||
# CHECK: scratchWaveOffsetReg: '$sgpr50'
|
||||
# CHECK: frameOffsetReg: '$sgpr50'
|
||||
# CHECK: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
# CHECK: renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
name: reserve_correct_register
|
||||
tracksRegLiveness: true
|
||||
machineFunctionInfo:
|
||||
|
|
@ -25,6 +25,6 @@ stack:
|
|||
|
||||
body: |
|
||||
bb.0:
|
||||
renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
renamable $vgpr0 = BUFFER_LOAD_DWORD_OFFEN %stack.0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr50, 4, 0, 0, 0, 0, 0, implicit $exec :: (load 4, addrspace 5)
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ body: |
|
|||
$sgpr7 = S_MOV_B32 61440
|
||||
$sgpr6 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
---
|
||||
|
|
@ -82,6 +82,6 @@ body: |
|
|||
$sgpr7 = S_MOV_B32 61440
|
||||
$sgpr6 = S_MOV_B32 -1
|
||||
$vgpr0 = V_MOV_B32_e32 killed $sgpr2, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, 0, 0, implicit $exec
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
|
|
|||
Loading…
Reference in New Issue