AMDGPU: Use reserved VGPR for AGPR spills to memory
Previously would reuse the VGPR used for large frame offsets with the one needed for copying from the AGPR. Fix this by reusing the register we already reserved for handling AGPR to AGPR copies.
This commit is contained in:
parent
c302f1e677
commit
f2c99ea47d
|
|
@ -1557,16 +1557,8 @@ void SIRegisterInfo::buildSpillLoadStore(
|
|||
assert(EltSize == 4);
|
||||
|
||||
if (!TmpIntermediateVGPR) {
|
||||
bool AllowSpill = !UseVGPROffset;
|
||||
|
||||
assert(RS && "Needs to have RegScavenger to spill an AGPR!");
|
||||
// FIXME: change to scavengeRegisterBackwards()
|
||||
TmpIntermediateVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass,
|
||||
MI, 0, AllowSpill);
|
||||
if (!TmpIntermediateVGPR)
|
||||
TmpIntermediateVGPR = TmpOffsetVGPR;
|
||||
else
|
||||
RS->setRegUsed(TmpIntermediateVGPR);
|
||||
assert(MF->getRegInfo().isReserved(AMDGPU::VGPR32));
|
||||
TmpIntermediateVGPR = AMDGPU::VGPR32;
|
||||
}
|
||||
if (IsStore) {
|
||||
auto AccRead = BuildMI(MBB, MI, DL,
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@ body: |
|
|||
; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 8200, implicit $exec
|
||||
; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
|
||||
; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX908-NEXT: $vgpr32 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
|
||||
; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec
|
||||
; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
|
||||
; GFX908-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
|
|
@ -516,8 +516,8 @@ body: |
|
|||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8200, $vgpr0, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr32 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
||||
; GFX908-FLATSCR-NEXT: {{ $}}
|
||||
|
|
@ -1035,11 +1035,10 @@ body: |
|
|||
; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 8200, implicit $exec
|
||||
; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
|
||||
; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 8200, implicit $exec
|
||||
; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
|
||||
; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; GFX908-NEXT: $vgpr32 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
|
||||
; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; GFX908-NEXT: $vgpr32 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
|
||||
; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
|
||||
; GFX908-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
|
|
@ -1530,12 +1529,10 @@ body: |
|
|||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8200, $vgpr0, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8200, $vgpr0, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1 + 4, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; GFX908-FLATSCR-NEXT: $vgpr32 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; GFX908-FLATSCR-NEXT: $vgpr32 = SCRATCH_LOAD_DWORD $vgpr0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1 + 4, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
||||
; GFX908-FLATSCR-NEXT: {{ $}}
|
||||
|
|
@ -2053,14 +2050,12 @@ body: |
|
|||
; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 8200, implicit $exec
|
||||
; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
|
||||
; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 8200, implicit $exec
|
||||
; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
|
||||
; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 8200, implicit $exec
|
||||
; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 8, addrspace 5)
|
||||
; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX908-NEXT: $vgpr32 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
|
||||
; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX908-NEXT: $vgpr32 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
|
||||
; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX908-NEXT: $vgpr32 = BUFFER_LOAD_DWORD_OFFEN $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 8, addrspace 5)
|
||||
; GFX908-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
|
||||
; GFX908-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
|
|
@ -2552,16 +2547,12 @@ body: |
|
|||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8200, $vgpr0, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8200, $vgpr0, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1 + 4, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8200, $vgpr0, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD $vgpr0, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1 + 8, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX908-FLATSCR-NEXT: $vgpr32 = SCRATCH_LOAD_DWORD $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX908-FLATSCR-NEXT: $vgpr32 = SCRATCH_LOAD_DWORD $vgpr0, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1 + 4, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX908-FLATSCR-NEXT: $vgpr32 = SCRATCH_LOAD_DWORD $vgpr0, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1 + 8, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
||||
; GFX908-FLATSCR-NEXT: {{ $}}
|
||||
|
|
@ -3079,8 +3070,8 @@ body: |
|
|||
; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 8200, implicit $exec
|
||||
; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr32, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
|
||||
; GFX908-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
|
|
@ -3570,8 +3561,8 @@ body: |
|
|||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8200, $vgpr0, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr0, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr32, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
||||
; GFX908-FLATSCR-NEXT: {{ $}}
|
||||
|
|
@ -4088,11 +4079,10 @@ body: |
|
|||
; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 8200, implicit $exec
|
||||
; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 8200, implicit $exec
|
||||
; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr32, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr32, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
|
||||
; GFX908-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
|
|
@ -4583,12 +4573,10 @@ body: |
|
|||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8200, $vgpr0, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr0, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8200, $vgpr0, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr0, $vgpr0, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr32, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr32, $vgpr0, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
||||
; GFX908-FLATSCR-NEXT: {{ $}}
|
||||
|
|
@ -5104,14 +5092,12 @@ body: |
|
|||
; GFX908-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 8200, implicit $exec
|
||||
; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 8200, implicit $exec
|
||||
; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 4, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr0 = V_MOV_B32_e32 8200, implicit $exec
|
||||
; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr32, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr32, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 4, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
|
||||
; GFX908-NEXT: BUFFER_STORE_DWORD_OFFEN $vgpr32, $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5)
|
||||
; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
|
||||
; GFX908-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
||||
; GFX908-NEXT: {{ $}}
|
||||
|
|
@ -5603,16 +5589,12 @@ body: |
|
|||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8200, $vgpr0, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr0, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8200, $vgpr0, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr0, $vgpr0, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 4, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_MOV_B32_e32 $sgpr32, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ADD_U32_e32 8200, $vgpr0, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr0, $vgpr0, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
|
||||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr32, $vgpr0, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr32, $vgpr0, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 4, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
|
||||
; GFX908-FLATSCR-NEXT: SCRATCH_STORE_DWORD $vgpr32, $vgpr0, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
|
||||
; GFX908-FLATSCR-NEXT: S_CBRANCH_SCC1 %bb.2, implicit $scc
|
||||
; GFX908-FLATSCR-NEXT: {{ $}}
|
||||
|
|
|
|||
|
|
@ -65,80 +65,81 @@ define void @no_free_vgprs_at_agpr_copy(float %v0, float %v1) #0 {
|
|||
; GFX908-NEXT: v_mfma_f32_16x16x1f32 a[0:15], v33, v34, a[16:31]
|
||||
; GFX908-NEXT: s_nop 7
|
||||
; GFX908-NEXT: s_nop 1
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v34, a0 ; Reload Reuse
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v32, a0 ; Reload Reuse
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v39, a11 ; Reload Reuse
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v38, a12 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v34, a1 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v32, a1 ; Reload Reuse
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v37, a13 ; Reload Reuse
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v36, a14 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v34, a2 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v32, a2 ; Reload Reuse
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v35, a15 ; Reload Reuse
|
||||
; GFX908-NEXT: s_nop 0
|
||||
; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v34, a3 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v32, a3 ; Reload Reuse
|
||||
; GFX908-NEXT: s_nop 1
|
||||
; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v34, a4 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v32, a4 ; Reload Reuse
|
||||
; GFX908-NEXT: s_nop 1
|
||||
; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v34, a5 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:16 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v32, a5 ; Reload Reuse
|
||||
; GFX908-NEXT: s_nop 1
|
||||
; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v34, a6 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:20 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v32, a6 ; Reload Reuse
|
||||
; GFX908-NEXT: s_nop 1
|
||||
; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v34, a7 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:24 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v32, a7 ; Reload Reuse
|
||||
; GFX908-NEXT: s_nop 1
|
||||
; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v34, a8 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:28 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v32, a8 ; Reload Reuse
|
||||
; GFX908-NEXT: s_nop 1
|
||||
; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v34, a9 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:32 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v32, a9 ; Reload Reuse
|
||||
; GFX908-NEXT: s_nop 1
|
||||
; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v34, a10 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:36 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v32, a10 ; Reload Reuse
|
||||
; GFX908-NEXT: s_nop 1
|
||||
; GFX908-NEXT: buffer_store_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: buffer_store_dword v32, off, s[0:3], s32 offset:40 ; 4-byte Folded Spill
|
||||
; GFX908-NEXT: ;;#ASMSTART
|
||||
; GFX908-NEXT: ; copy
|
||||
; GFX908-NEXT: ;;#ASMEND
|
||||
; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: v_accvgpr_read_b32 v32, a1
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a0, v34 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: s_nop 1
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a16, v32
|
||||
; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a1, v34 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a0, v32 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:4 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a2, v34 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a1, v32 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:8 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a3, v34 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a2, v32 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:12 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a4, v34 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a3, v32 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:16 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a5, v34 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a4, v32 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:20 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a6, v34 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a5, v32 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:24 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a7, v34 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a6, v32 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:28 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a8, v34 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a7, v32 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:32 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a9, v34 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v34, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a8, v32 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:36 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a10, v34 ; Reload Reuse
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a9, v32 ; Reload Reuse
|
||||
; GFX908-NEXT: buffer_load_dword v32, off, s[0:3], s32 offset:40 ; 4-byte Folded Reload
|
||||
; GFX908-NEXT: s_waitcnt vmcnt(0)
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a10, v32 ; Reload Reuse
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a11, v39 ; Reload Reuse
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a12, v38 ; Reload Reuse
|
||||
; GFX908-NEXT: v_accvgpr_write_b32 a13, v37 ; Reload Reuse
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
|
|
@ -17,10 +17,10 @@ body: |
|
|||
; CHECK-LABEL: name: spill_a64_kill
|
||||
; CHECK: liveins: $agpr0_agpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5)
|
||||
; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
|
||||
; CHECK-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5)
|
||||
; CHECK-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
|
||||
SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
|
||||
...
|
||||
|
||||
|
|
@ -42,10 +42,10 @@ body: |
|
|||
; CHECK-LABEL: name: spill_a64_undef_sub1_killed
|
||||
; CHECK: liveins: $agpr0
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5)
|
||||
; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
|
||||
; CHECK-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5)
|
||||
; CHECK-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
|
||||
SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
|
||||
...
|
||||
|
||||
|
|
@ -65,9 +65,9 @@ body: |
|
|||
; CHECK-LABEL: name: spill_a64_undef_sub0_killed
|
||||
; CHECK: liveins: $agpr1
|
||||
; CHECK-NEXT: {{ $}}
|
||||
; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5)
|
||||
; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
|
||||
; CHECK-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5)
|
||||
; CHECK-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
|
||||
; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
|
||||
SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
|
||||
...
|
||||
|
|
|
|||
|
|
@ -74,14 +74,14 @@ use:
|
|||
; GCN: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
|
||||
; GCN: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
|
||||
|
||||
; GFX908-DAG: v_accvgpr_read_b32 v1, a0 ; Reload Reuse
|
||||
; GFX908-DAG: buffer_store_dword v1, off, s[{{[0-9:]+}}], 0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX908-DAG: v_accvgpr_read_b32 v1, a1 ; Reload Reuse
|
||||
; GFX908-DAG: buffer_store_dword v1, off, s[{{[0-9:]+}}], 0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX908-DAG: v_accvgpr_read_b32 v1, a2 ; Reload Reuse
|
||||
; GFX908-DAG: buffer_store_dword v1, off, s[{{[0-9:]+}}], 0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX908-DAG: v_accvgpr_read_b32 v1, a3 ; Reload Reuse
|
||||
; GFX908-DAG: buffer_store_dword v1, off, s[{{[0-9:]+}}], 0 offset:16 ; 4-byte Folded Spill
|
||||
; GFX908-DAG: v_accvgpr_read_b32 v32, a0 ; Reload Reuse
|
||||
; GFX908-DAG: buffer_store_dword v32, off, s[{{[0-9:]+}}], 0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX908-DAG: v_accvgpr_read_b32 v32, a1 ; Reload Reuse
|
||||
; GFX908-DAG: buffer_store_dword v32, off, s[{{[0-9:]+}}], 0 offset:8 ; 4-byte Folded Spill
|
||||
; GFX908-DAG: v_accvgpr_read_b32 v32, a2 ; Reload Reuse
|
||||
; GFX908-DAG: buffer_store_dword v32, off, s[{{[0-9:]+}}], 0 offset:12 ; 4-byte Folded Spill
|
||||
; GFX908-DAG: v_accvgpr_read_b32 v32, a3 ; Reload Reuse
|
||||
; GFX908-DAG: buffer_store_dword v32, off, s[{{[0-9:]+}}], 0 offset:16 ; 4-byte Folded Spill
|
||||
|
||||
; GFX90A-DAG: buffer_store_dword a0, off, s[{{[0-9:]+}}], 0 offset:4 ; 4-byte Folded Spill
|
||||
; GFX90A-DAG: buffer_store_dword a1, off, s[{{[0-9:]+}}], 0 offset:8 ; 4-byte Folded Spill
|
||||
|
|
|
|||
|
|
@ -308,10 +308,8 @@ body: |
|
|||
; GFX908-EXPANDED-NEXT: liveins: $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
|
||||
; GFX908-EXPANDED-NEXT: {{ $}}
|
||||
; GFX908-EXPANDED-NEXT: S_NOP 0, implicit-def renamable $agpr0
|
||||
; GFX908-EXPANDED-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
|
||||
; GFX908-EXPANDED-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
|
||||
; GFX908-EXPANDED-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
|
||||
; GFX908-EXPANDED-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
|
||||
; GFX908-EXPANDED-NEXT: $vgpr32 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
|
||||
; GFX908-EXPANDED-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr32, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
|
||||
; GFX908-EXPANDED-NEXT: S_CBRANCH_SCC1 %bb.1, implicit undef $scc
|
||||
; GFX908-EXPANDED-NEXT: {{ $}}
|
||||
; GFX908-EXPANDED-NEXT: bb.1:
|
||||
|
|
@ -319,8 +317,8 @@ body: |
|
|||
; GFX908-EXPANDED-NEXT: {{ $}}
|
||||
; GFX908-EXPANDED-NEXT: {{ $}}
|
||||
; GFX908-EXPANDED-NEXT: bb.2:
|
||||
; GFX908-EXPANDED-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
|
||||
; GFX908-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
|
||||
; GFX908-EXPANDED-NEXT: $vgpr32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
|
||||
; GFX908-EXPANDED-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr32, implicit $exec
|
||||
; GFX908-EXPANDED-NEXT: S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
|
||||
; GFX908-EXPANDED-NEXT: S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
|
||||
; GFX908-EXPANDED-NEXT: S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
|
||||
|
|
|
|||
Loading…
Reference in New Issue