[AMDGPU] Change FLAT SADDR to VADDR form in moveToVALU
Instead of legalizing saddr operand with a readfirstlane when address is moved from SGPR to VGPR we can just change the opcode. Differential Revision: https://reviews.llvm.org/D101405
This commit is contained in:
parent
a27ca15dd0
commit
89a94be16b
|
|
@ -5012,6 +5012,63 @@ void SIInstrInfo::legalizeOperandsSMRD(MachineRegisterInfo &MRI,
|
|||
}
|
||||
}
|
||||
|
||||
bool SIInstrInfo::moveFlatAddrToVGPR(MachineInstr &Inst) const {
|
||||
unsigned Opc = Inst.getOpcode();
|
||||
int OldSAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr);
|
||||
if (OldSAddrIdx < 0)
|
||||
return false;
|
||||
|
||||
assert(isSegmentSpecificFLAT(Inst));
|
||||
|
||||
int NewOpc = AMDGPU::getGlobalVaddrOp(Opc);
|
||||
if (NewOpc < 0)
|
||||
return false;
|
||||
|
||||
MachineRegisterInfo &MRI = Inst.getMF()->getRegInfo();
|
||||
MachineOperand &SAddr = Inst.getOperand(OldSAddrIdx);
|
||||
if (RI.isSGPRReg(MRI, SAddr.getReg()))
|
||||
return false;
|
||||
|
||||
int NewVAddrIdx = AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vaddr);
|
||||
if (NewVAddrIdx < 0)
|
||||
return false;
|
||||
|
||||
int OldVAddrIdx = AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::vaddr);
|
||||
assert(OldVAddrIdx >= 0);
|
||||
|
||||
// Check vaddr, it shall be zero
|
||||
MachineOperand &VAddr = Inst.getOperand(OldVAddrIdx);
|
||||
MachineInstr *VAddrDef = MRI.getUniqueVRegDef(VAddr.getReg());
|
||||
if (!VAddrDef || VAddrDef->getOpcode() != AMDGPU::V_MOV_B32_e32 ||
|
||||
!VAddrDef->getOperand(1).isImm() || VAddrDef->getOperand(1).getImm() != 0)
|
||||
return false;
|
||||
|
||||
const MCInstrDesc &NewDesc = get(NewOpc);
|
||||
Inst.setDesc(NewDesc);
|
||||
|
||||
// Callers expect interator to be valid after this call, so modify the
|
||||
// instruction in place.
|
||||
if (OldVAddrIdx == NewVAddrIdx) {
|
||||
MachineOperand &NewVAddr = Inst.getOperand(NewVAddrIdx);
|
||||
// Clear use list from the old vaddr holding a zero register.
|
||||
MRI.removeRegOperandFromUseList(&NewVAddr);
|
||||
MRI.moveOperands(&NewVAddr, &SAddr, 1);
|
||||
Inst.RemoveOperand(OldSAddrIdx);
|
||||
// Update the use list with the pointer we have just moved from vaddr to
|
||||
// saddr poisition. Otherwise new vaddr will be missing from the use list.
|
||||
MRI.removeRegOperandFromUseList(&NewVAddr);
|
||||
MRI.addRegOperandToUseList(&NewVAddr);
|
||||
} else {
|
||||
assert(OldSAddrIdx == NewVAddrIdx);
|
||||
Inst.RemoveOperand(OldVAddrIdx);
|
||||
}
|
||||
|
||||
if (MRI.use_nodbg_empty(VAddrDef->getOperand(0).getReg()))
|
||||
VAddrDef->eraseFromParent();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
// FIXME: Remove this when SelectionDAG is obsoleted.
|
||||
void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI,
|
||||
MachineInstr &MI) const {
|
||||
|
|
@ -5024,6 +5081,9 @@ void SIInstrInfo::legalizeOperandsFLAT(MachineRegisterInfo &MRI,
|
|||
if (!SAddr || RI.isSGPRClass(MRI.getRegClass(SAddr->getReg())))
|
||||
return;
|
||||
|
||||
if (moveFlatAddrToVGPR(MI))
|
||||
return;
|
||||
|
||||
Register ToSGPR = readlaneVGPRToSGPR(SAddr->getReg(), MI, MRI);
|
||||
SAddr->setReg(ToSGPR);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -946,6 +946,10 @@ public:
|
|||
MachineBasicBlock *
|
||||
legalizeOperands(MachineInstr &MI, MachineDominatorTree *MDT = nullptr) const;
|
||||
|
||||
/// Change SADDR form of a FLAT \p Inst to its VADDR form if saddr operand
|
||||
/// was moved to VGPR. \returns true if succeeded.
|
||||
bool moveFlatAddrToVGPR(MachineInstr &Inst) const;
|
||||
|
||||
/// Replace this instruction's opcode with the equivalent VALU
|
||||
/// opcode. This function will also move the users of \p MI to the
|
||||
/// VALU if necessary. If present, \p MDT is updated.
|
||||
|
|
@ -1198,9 +1202,16 @@ namespace AMDGPU {
|
|||
LLVM_READONLY
|
||||
int getSOPKOp(uint16_t Opcode);
|
||||
|
||||
/// \returns SADDR form of a FLAT Global instruction given an \p Opcode
|
||||
/// of a VADDR form.
|
||||
LLVM_READONLY
|
||||
int getGlobalSaddrOp(uint16_t Opcode);
|
||||
|
||||
/// \returns VADDR form of a FLAT Global instruction given an \p Opcode
|
||||
/// of a SADDR form.
|
||||
LLVM_READONLY
|
||||
int getGlobalVaddrOp(uint16_t Opcode);
|
||||
|
||||
LLVM_READONLY
|
||||
int getVCMPXNoSDstOp(uint16_t Opcode);
|
||||
|
||||
|
|
|
|||
|
|
@ -2505,6 +2505,15 @@ def getGlobalSaddrOp : InstrMapping {
|
|||
let ValueCols = [["1"]];
|
||||
}
|
||||
|
||||
// Maps a GLOBAL SADDR to its VADDR form.
|
||||
def getGlobalVaddrOp : InstrMapping {
|
||||
let FilterClass = "GlobalSaddrTable";
|
||||
let RowFields = ["SaddrOp"];
|
||||
let ColFields = ["IsSaddr"];
|
||||
let KeyCol = ["1"];
|
||||
let ValueCols = [["0"]];
|
||||
}
|
||||
|
||||
// Maps a v_cmpx opcode with sdst to opcode without sdst.
|
||||
def getVCMPXNoSDstOp : InstrMapping {
|
||||
let FilterClass = "VCMPXNoSDstTable";
|
||||
|
|
|
|||
|
|
@ -0,0 +1,33 @@
|
|||
; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s
|
||||
|
||||
; The first load produces address in a VGPR which is used in address calculation
|
||||
; of the second load (one inside the loop). The value is uniform and the inner
|
||||
; load correctly selected to use SADDR form, however the address is promoted to
|
||||
; vector registers because it all starts with a VGPR produced by the entry block
|
||||
; load.
|
||||
;
|
||||
; Check that we are changing SADDR form of a load to VADDR and do not have to use
|
||||
; readfirstlane instructions to move address from VGPRs into SGPRs.
|
||||
|
||||
; GCN-LABEL: {{^}}test_move_load_address_to_vgpr:
|
||||
; GCN: BB{{[0-9]+}}_1:
|
||||
; GCN-NOT: v_readfirstlane_b32
|
||||
; GCN: global_load_dword v{{[0-9]+}}, v[{{[0-9:]+}}], off glc
|
||||
define amdgpu_kernel void @test_move_load_address_to_vgpr(i32 addrspace(1)* nocapture %arg) {
|
||||
bb:
|
||||
%i1 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i32 0
|
||||
%i2 = load volatile i32, i32 addrspace(1)* %i1, align 4
|
||||
br label %bb3
|
||||
|
||||
bb2: ; preds = %bb3
|
||||
ret void
|
||||
|
||||
bb3: ; preds = %bb3, %bb
|
||||
%i = phi i32 [ %i2, %bb ], [ %i8, %bb3 ]
|
||||
%i4 = zext i32 %i to i64
|
||||
%i5 = getelementptr inbounds i32, i32 addrspace(1)* %arg, i64 %i4
|
||||
%i6 = load volatile i32, i32 addrspace(1)* %i5, align 4
|
||||
%i8 = add nuw nsw i32 %i, 1
|
||||
%i9 = icmp eq i32 %i8, 256
|
||||
br i1 %i9, label %bb2, label %bb3
|
||||
}
|
||||
|
|
@ -0,0 +1,413 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
# RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass=si-fix-sgpr-copies -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
|
||||
|
||||
---
|
||||
name: global_load_saddr_to_valu
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; GCN-LABEL: name: global_load_saddr_to_valu
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0_vgpr1
|
||||
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1
|
||||
; GCN: [[GLOBAL_LOAD_DWORD:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD [[PHI]], 0, 0, implicit $exec
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
|
||||
; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
|
||||
; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
|
||||
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
|
||||
; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE]], implicit-def $vcc, implicit $exec
|
||||
; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec
|
||||
; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
%0:sreg_64 = COPY $vgpr0_vgpr1
|
||||
|
||||
bb.1:
|
||||
%1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
|
||||
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %3, 0, 0, implicit $exec
|
||||
%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
|
||||
S_CMP_LG_U64 %2, 0, implicit-def $scc
|
||||
S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: global_load_saddr_to_valu_non_zero_vaddr
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; GCN-LABEL: name: global_load_saddr_to_valu_non_zero_vaddr
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0_vgpr1
|
||||
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
|
||||
; GCN: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
|
||||
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
|
||||
; GCN: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
|
||||
; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
|
||||
; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
|
||||
; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
|
||||
; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE1]], implicit-def $vcc, implicit $exec
|
||||
; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec
|
||||
; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
%0:sreg_64 = COPY $vgpr0_vgpr1
|
||||
|
||||
bb.1:
|
||||
%1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
|
||||
%3:vgpr_32 = V_MOV_B32_e32 1, implicit $exec
|
||||
%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, %3, 0, 0, implicit $exec
|
||||
%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
|
||||
S_CMP_LG_U64 %2, 0, implicit-def $scc
|
||||
S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
|
||||
---
|
||||
name: global_load_saddr_to_valu_undef_vaddr
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; GCN-LABEL: name: global_load_saddr_to_valu_undef_vaddr
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0_vgpr1
|
||||
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1
|
||||
; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
|
||||
; GCN: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
|
||||
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
|
||||
; GCN: [[GLOBAL_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR [[REG_SEQUENCE]], undef %4:vgpr_32, 0, 0, implicit $exec
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
|
||||
; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
|
||||
; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
|
||||
; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
|
||||
; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE1]], implicit-def $vcc, implicit $exec
|
||||
; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec
|
||||
; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
%0:sreg_64 = COPY $vgpr0_vgpr1
|
||||
|
||||
bb.1:
|
||||
%1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
|
||||
%4:vgpr_32 = GLOBAL_LOAD_DWORD_SADDR %1, undef %3:vgpr_32, 0, 0, implicit $exec
|
||||
%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
|
||||
S_CMP_LG_U64 %2, 0, implicit-def $scc
|
||||
S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: global_store_saddr_to_valu
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; GCN-LABEL: name: global_store_saddr_to_valu
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0_vgpr1
|
||||
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: GLOBAL_STORE_DWORD [[PHI]], [[DEF]], 0, 0, implicit $exec
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
|
||||
; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
|
||||
; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
|
||||
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
|
||||
; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE]], implicit-def $vcc, implicit $exec
|
||||
; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec
|
||||
; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
%0:sreg_64 = COPY $vgpr0_vgpr1
|
||||
|
||||
bb.1:
|
||||
%1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
|
||||
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%4:vgpr_32 = IMPLICIT_DEF
|
||||
GLOBAL_STORE_DWORD_SADDR %3, %4, %1, 0, 0, implicit $exec
|
||||
%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
|
||||
S_CMP_LG_U64 %2, 0, implicit-def $scc
|
||||
S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: global_addtid_load_saddr_to_valu
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; GCN-LABEL: name: global_addtid_load_saddr_to_valu
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0_vgpr1
|
||||
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %6, %bb.1
|
||||
; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
|
||||
; GCN: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
|
||||
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
|
||||
; GCN: [[GLOBAL_LOAD_DWORD_ADDTID_SADDR:%[0-9]+]]:vgpr_32 = GLOBAL_LOAD_DWORD_ADDTID_SADDR [[REG_SEQUENCE]], 0, 0, implicit $exec
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
|
||||
; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
|
||||
; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
|
||||
; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
|
||||
; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE1]], implicit-def $vcc, implicit $exec
|
||||
; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec
|
||||
; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
%0:sreg_64 = COPY $vgpr0_vgpr1
|
||||
|
||||
bb.1:
|
||||
%1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
|
||||
%4:vgpr_32 = GLOBAL_LOAD_DWORD_ADDTID_SADDR %1, 0, 0, implicit $exec
|
||||
%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
|
||||
S_CMP_LG_U64 %2, 0, implicit-def $scc
|
||||
S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: global_store_addtid_saddr_to_valu
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; GCN-LABEL: name: global_store_addtid_saddr_to_valu
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0_vgpr1
|
||||
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %6, %bb.1
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub0, implicit $exec
|
||||
; GCN: [[V_READFIRSTLANE_B32_1:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]].sub1, implicit $exec
|
||||
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[V_READFIRSTLANE_B32_]], %subreg.sub0, [[V_READFIRSTLANE_B32_1]], %subreg.sub1
|
||||
; GCN: GLOBAL_STORE_DWORD_ADDTID_SADDR [[DEF]], [[REG_SEQUENCE]], 0, 0, implicit $exec
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
|
||||
; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
|
||||
; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
|
||||
; GCN: [[REG_SEQUENCE1:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
|
||||
; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE1]], implicit-def $vcc, implicit $exec
|
||||
; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE1]], implicit $exec
|
||||
; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
%0:sreg_64 = COPY $vgpr0_vgpr1
|
||||
|
||||
bb.1:
|
||||
%1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
|
||||
%4:vgpr_32 = IMPLICIT_DEF
|
||||
GLOBAL_STORE_DWORD_ADDTID_SADDR %4, %1, 0, 0, implicit $exec
|
||||
%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
|
||||
S_CMP_LG_U64 %2, 0, implicit-def $scc
|
||||
S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: global_atomic_noret_saddr_to_valu
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; GCN-LABEL: name: global_atomic_noret_saddr_to_valu
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0_vgpr1
|
||||
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %6, %bb.1
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN: GLOBAL_ATOMIC_ADD [[PHI]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
|
||||
; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
|
||||
; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
|
||||
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
|
||||
; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE]], implicit-def $vcc, implicit $exec
|
||||
; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec
|
||||
; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
%0:sreg_64 = COPY $vgpr0_vgpr1
|
||||
|
||||
bb.1:
|
||||
%1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
|
||||
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
GLOBAL_ATOMIC_ADD_SADDR %3, %3, %1, 0, 0, implicit $exec
|
||||
%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
|
||||
S_CMP_LG_U64 %2, 0, implicit-def $scc
|
||||
S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: global_atomic_rtn_saddr_to_valu
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; GCN-LABEL: name: global_atomic_rtn_saddr_to_valu
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0_vgpr1
|
||||
; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: [[PHI:%[0-9]+]]:vreg_64 = PHI [[COPY]], %bb.0, %7, %bb.1
|
||||
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN: [[GLOBAL_ATOMIC_ADD_RTN:%[0-9]+]]:vgpr_32 = GLOBAL_ATOMIC_ADD_RTN [[PHI]], [[V_MOV_B32_e32_]], 0, 0, implicit $exec
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub0
|
||||
; GCN: [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[PHI]].sub1
|
||||
; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY1]], 1, implicit $exec
|
||||
; GCN: [[V_AND_B32_e64_1:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY2]], 0, implicit $exec
|
||||
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[V_AND_B32_e64_]], %subreg.sub0, [[V_AND_B32_e64_1]], %subreg.sub1
|
||||
; GCN: V_CMP_NE_U64_e32 0, [[REG_SEQUENCE]], implicit-def $vcc, implicit $exec
|
||||
; GCN: [[COPY3:%[0-9]+]]:vreg_64 = COPY [[REG_SEQUENCE]], implicit $exec
|
||||
; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0_vgpr1
|
||||
%0:sreg_64 = COPY $vgpr0_vgpr1
|
||||
|
||||
bb.1:
|
||||
%1:sreg_64 = PHI %0, %bb.0, %2, %bb.1
|
||||
%3:vgpr_32 = V_MOV_B32_e32 0, implicit $exec
|
||||
%4:vgpr_32 = GLOBAL_ATOMIC_ADD_SADDR_RTN %3, %3, %1, 0, 0, implicit $exec
|
||||
%2:sreg_64 = S_AND_B64 %1, 1, implicit-def $scc
|
||||
S_CMP_LG_U64 %2, 0, implicit-def $scc
|
||||
S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: scratch_load_saddr_to_valu
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; GCN-LABEL: name: scratch_load_saddr_to_valu
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %6, %bb.1
|
||||
; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]], implicit $exec
|
||||
; GCN: [[SCRATCH_LOAD_DWORD_SADDR:%[0-9]+]]:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR [[V_READFIRSTLANE_B32_]], 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec
|
||||
; GCN: V_CMP_NE_U32_e32 0, [[V_AND_B32_e64_]], implicit-def $vcc, implicit $exec
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec
|
||||
; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
%0:sgpr_32 = COPY $vgpr0
|
||||
|
||||
bb.1:
|
||||
%1:sgpr_32 = PHI %0, %bb.0, %2, %bb.1
|
||||
%4:vgpr_32 = SCRATCH_LOAD_DWORD_SADDR %1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%2:sgpr_32 = S_AND_B32 %1, 1, implicit-def $scc
|
||||
S_CMP_LG_U32 %2, 0, implicit-def $scc
|
||||
S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
|
||||
---
|
||||
name: scratch_store_saddr_to_valu
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
; GCN-LABEL: name: scratch_store_saddr_to_valu
|
||||
; GCN: bb.0:
|
||||
; GCN: successors: %bb.1(0x80000000)
|
||||
; GCN: liveins: $vgpr0
|
||||
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
|
||||
; GCN: bb.1:
|
||||
; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
|
||||
; GCN: [[PHI:%[0-9]+]]:vgpr_32 = PHI [[COPY]], %bb.0, %6, %bb.1
|
||||
; GCN: [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
|
||||
; GCN: [[V_READFIRSTLANE_B32_:%[0-9]+]]:sgpr_32 = V_READFIRSTLANE_B32 [[PHI]], implicit $exec
|
||||
; GCN: SCRATCH_STORE_DWORD_SADDR [[DEF]], [[V_READFIRSTLANE_B32_]], 0, 0, implicit $exec, implicit $flat_scr
|
||||
; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[PHI]], 1, implicit $exec
|
||||
; GCN: V_CMP_NE_U32_e32 0, [[V_AND_B32_e64_]], implicit-def $vcc, implicit $exec
|
||||
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[V_AND_B32_e64_]], implicit $exec
|
||||
; GCN: $vcc = S_AND_B64 $exec, $vcc, implicit-def $scc
|
||||
; GCN: S_CBRANCH_VCCNZ %bb.1, implicit $vcc
|
||||
; GCN: bb.2:
|
||||
; GCN: S_ENDPGM 0
|
||||
bb.0:
|
||||
liveins: $vgpr0
|
||||
%0:sgpr_32 = COPY $vgpr0
|
||||
|
||||
bb.1:
|
||||
%1:sgpr_32 = PHI %0, %bb.0, %2, %bb.1
|
||||
%4:vgpr_32 = IMPLICIT_DEF
|
||||
SCRATCH_STORE_DWORD_SADDR %4, %1, 0, 0, implicit $exec, implicit $flat_scr
|
||||
%2:sgpr_32 = S_AND_B32 %1, 1, implicit-def $scc
|
||||
S_CMP_LG_U32 %2, 0, implicit-def $scc
|
||||
S_CBRANCH_SCC1 %bb.1, implicit $scc
|
||||
|
||||
bb.2:
|
||||
S_ENDPGM 0
|
||||
...
|
||||
Loading…
Reference in New Issue