AMDGPU: Remove kills following clusters of memory instruction

In a future commit, soft clauses will be hinted with kill instructions
rather than forced together with bundles. Look for kills that look
like this, and erase them. I'm not sure if the check for specific uses
is worthwhile, or if it would be better to just unconditionally erase
kills.

This reduces test churn in a future patch.
This commit is contained in:
Matt Arsenault 2021-02-14 09:54:25 -05:00
parent 5dfba562dd
commit a7455d7b7c
2 changed files with 107 additions and 0 deletions

View File

@ -48,6 +48,9 @@ private:
SmallSet<Register, 16> Defs;
void collectUsedRegUnits(const MachineInstr &MI,
BitVector &UsedRegUnits) const;
bool isBundleCandidate(const MachineInstr &MI) const;
bool isDependentLoad(const MachineInstr &MI) const;
bool canBundle(const MachineInstr &MI, const MachineInstr &NextMI) const;
@ -85,6 +88,21 @@ bool SIPostRABundler::isDependentLoad(const MachineInstr &MI) const {
return false;
}
void SIPostRABundler::collectUsedRegUnits(const MachineInstr &MI,
BitVector &UsedRegUnits) const {
for (const MachineOperand &Op : MI.operands()) {
if (!Op.isReg() || !Op.readsReg())
continue;
Register Reg = Op.getReg();
assert(!Op.getSubReg() &&
"subregister indexes should not be present after RA");
for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
UsedRegUnits.set(*Units);
}
}
bool SIPostRABundler::isBundleCandidate(const MachineInstr &MI) const {
const uint64_t IMemFlags = MI.getDesc().TSFlags & MemFlags;
return IMemFlags != 0 && MI.mayLoadOrStore() && !MI.isBundled();
@ -105,6 +123,9 @@ bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) {
return false;
TRI = MF.getSubtarget<GCNSubtarget>().getRegisterInfo();
BitVector BundleUsedRegUnits(TRI->getNumRegUnits());
BitVector KillUsedRegUnits(TRI->getNumRegUnits());
bool Changed = false;
for (MachineBasicBlock &MBB : MF) {
MachineBasicBlock::instr_iterator Next;
@ -147,6 +168,34 @@ bool SIPostRABundler::runOnMachineFunction(MachineFunction &MF) {
Next = std::next(BundleEnd);
if (ClauseLength > 1) {
Changed = true;
// Before register allocation, kills are inserted after potential soft
// clauses to hint register allocation. Look for kills that look like
// this, and erase them.
if (Next != E && Next->isKill()) {
MachineInstr &Kill = *Next;
// TODO: Should maybe back-propagate kill flags to the bundle.
for (const MachineInstr &BundleMI : make_range(BundleStart, Next))
collectUsedRegUnits(BundleMI, BundleUsedRegUnits);
collectUsedRegUnits(Kill, KillUsedRegUnits);
BundleUsedRegUnits.flip();
KillUsedRegUnits &= BundleUsedRegUnits;
// Erase the kill if it's a subset of the used registers.
//
// TODO: Should we just remove all kills? Is there any real reason to
// keep them after RA?
if (KillUsedRegUnits.none()) {
++Next;
Kill.eraseFromParent();
}
BundleUsedRegUnits.reset();
KillUsedRegUnits.reset();
}
finalizeBundle(MBB, BundleStart, Next);
}

View File

@ -220,3 +220,61 @@ body: |
$vgpr2 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
...
# Before register allocation, KILL hints are inserted after potential soft
# clauses to hint the register allocator to not clobber the input
# registers. Kills that look like this should be erased.
---
name: post_bundle_kill
body: |
bb.0:
liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
; GCN-LABEL: name: post_bundle_kill
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
; GCN: }
$vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
$vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
KILL killed $vgpr3_vgpr4, killed $vgpr5_vgpr6
...
# Kill some other register not used in the bundle, should not be touched.
---
name: post_bundle_kill_other
body: |
bb.0:
liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
; GCN-LABEL: name: post_bundle_kill_other
; GCN: $vgpr7 = V_MOV_B32_e32 0, implicit $exec
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
; GCN: }
; GCN: KILL killed $vgpr7
$vgpr7 = V_MOV_B32_e32 0, implicit $exec
$vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
$vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
KILL killed $vgpr7
...
# Kill some other register not used in the bundle, but also some
# from the bundle.
---
name: post_bundle_kill_plus_other
body: |
bb.0:
liveins: $vgpr3_vgpr4, $vgpr5_vgpr6
; GCN-LABEL: name: post_bundle_kill_plus_other
; GCN: $vgpr7 = V_MOV_B32_e32 0, implicit $exec
; GCN: BUNDLE implicit-def $vgpr0, implicit-def $vgpr0_lo16, implicit-def $vgpr0_hi16, implicit-def $vgpr1, implicit-def $vgpr1_lo16, implicit-def $vgpr1_hi16, implicit $vgpr3_vgpr4, implicit $exec, implicit $vgpr5_vgpr6 {
; GCN: $vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
; GCN: $vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
; GCN: }
; GCN: KILL killed $vgpr7, killed $vgpr3
$vgpr7 = V_MOV_B32_e32 0, implicit $exec
$vgpr0 = GLOBAL_LOAD_DWORD $vgpr3_vgpr4, 0, 0, 0, 0, implicit $exec
$vgpr1 = GLOBAL_LOAD_DWORD $vgpr5_vgpr6, 0, 0, 0, 0, implicit $exec
KILL killed $vgpr7, killed $vgpr3
...