AMDGPU: Remove kills following clusters of memory instruction
In a future commit, soft clauses will be hinted with kill instructions rather than forced together with bundles. Look for kills that look like this, and erase them. I'm not sure if the check for specific uses is worthwhile, or if it would be better to just unconditionally erase kills. This reduces test churn in a future patch.
This commit is contained in:
parent
5dfba562dd
commit
a7455d7b7c
|
|
@ -48,6 +48,9 @@ private:
|
|||
|
||||
SmallSet<Register, 16> Defs;
|
||||
|
||||
void collectUsedRegUnits(const MachineInstr &MI,
|
||||
BitVector &UsedRegUnits) const;
|
||||
|
||||
bool isBundleCandidate(const MachineInstr &MI) const;
|
||||
bool isDependentLoad(const MachineInstr &MI) const;
|
||||
bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const;
|
||||
|
|
@ -85,6 +88,21 @@ bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI,
|
||||
BitVector &UsedRegUnits) const {
|
||||
for (const MachineOperand &Op : MI.operands()) {
|
||||
if (!Op.isReg() || !Op.readsReg())
|
||||
continue;
|
||||
|
||||
Register Reg = Op.getReg();
|
||||
assert(!Op.getSubReg() &&
|
||||
"subregister indexes should not be present after RA");
|
||||
|
||||
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
|
||||
UsedRegUnits.set(*Units);
|
||||
}
|
||||
}
|
||||
|
||||
bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const {
|
||||
const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
|
||||
return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled();
|
||||
|
|
@ -105,6 +123,9 @@ bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) {
|
|||
return false;
|
||||
|
||||
TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
|
||||
BitVector BundleUsedRegUnits(TRI->getNumRegUnits());
|
||||
BitVector KillUsedRegUnits(TRI->getNumRegUnits());
|
||||
|
||||
bool Changed = false;
|
||||
for (MachineBasicBlock &MBB : MF) {
|
||||
MachineBasicBlock::instr_iterator Next;
|
||||
|
|
@ -147,6 +168,34 @@ bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) {
|
|||
Next = std::next(BundleEnd);
|
||||
if (ClauseLength > 1) {
|
||||
Changed = true;
|
||||
|
||||
// Before register allocation, kills are inserted after potential soft
|
||||
// clauses to hint register allocation. Look for kills that look like
|
||||
// this, and erase them.
|
||||
if (Next != E && Next->isKill()) {
|
||||
MachineInstr &Kill = *Next;
|
||||
|
||||
// TODO: Should maybe back-propagate kill flags to the bundle.
|
||||
for (const MachineInstr &BundleMI : make_range(BundleStart, Next))
|
||||
collectUsedRegUnits(BundleMI, BundleUsedRegUnits);
|
||||
collectUsedRegUnits(Kill, KillUsedRegUnits);
|
||||
|
||||
BundleUsedRegUnits.flip();
|
||||
KillUsedRegUnits &= BundleUsedRegUnits;
|
||||
|
||||
// Erase the kill if it's a subset of the used registers.
|
||||
//
|
||||
// TODO: Should we just remove all kills? Is there any real reason to
|
||||
// keep them after RA?
|
||||
if (KillUsedRegUnits.none()) {
|
||||
++Next;
|
||||
Kill.eraseFromParent();
|
||||
}
|
||||
|
||||
BundleUsedRegUnits.reset();
|
||||
KillUsedRegUnits.reset();
|
||||
}
|
||||
|
||||
finalizeBundle(MBB, BundleStart, Next);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -220,3 +220,61 @@ body: |
|
|||
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
|
||||
...
|
||||
|
||||
# Before register allocation, KILL hints are inserted after potential soft
|
||||
# clauses to hint the register allocator to not clobber the input
|
||||
# registers. Kills that look like this should be erased.
|
||||
---
|
||||
name: post_bundle_kill
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
|
||||
|
||||
; GCN-LABEL: name: post_bundle_kill
|
||||
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
|
||||
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: }
|
||||
$vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
KILL killed $vgpr3_vgpr4, killed $vgpr5_vgpr6
|
||||
...
|
||||
|
||||
# Kill some other register not used in the bundle, should not be touched.
|
||||
---
|
||||
name: post_bundle_kill_other
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
|
||||
; GCN-LABEL: name: post_bundle_kill_other
|
||||
; GCN: $vgpr7 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
|
||||
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: }
|
||||
; GCN: KILL killed $vgpr7
|
||||
$vgpr7 = V_MOV_B32_e32 0, implicit $exec
|
||||
$vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
KILL killed $vgpr7
|
||||
...
|
||||
|
||||
# Kill some other register not used in the bundle, but also some
|
||||
# from the bundle.
|
||||
---
|
||||
name: post_bundle_kill_plus_other
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
|
||||
; GCN-LABEL: name: post_bundle_kill_plus_other
|
||||
; GCN: $vgpr7 = V_MOV_B32_e32 0, implicit $exec
|
||||
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
|
||||
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
; GCN: }
|
||||
; GCN: KILL killed $vgpr7, killed $vgpr3
|
||||
$vgpr7 = V_MOV_B32_e32 0, implicit $exec
|
||||
$vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
|
||||
$vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
|
||||
KILL killed $vgpr7, killed $vgpr3
|
||||
...
|
||||
|
|
|
|||
Loading…
Reference in New Issue