forked from OSchip/llvm-project
				
			AMDGPU: Hack for VS_32 register pressure
For some reason VS_32 ends up factoring into the pressure heuristics even though we should never see a virtual register with this class. When SGPRs are reserved for register spilling, this for some reason triggers reg-crit scheduling. Setting isAllocatable = 0 may help with this since that seems to remove it from the default implementation's generated table. llvm-svn: 252321
This commit is contained in:
		
							parent
							
								
									1063293a89
								
							
						
					
					
						commit
						623e6fd466
					
				| 
						 | 
				
			
			@ -73,26 +73,32 @@ BitVector SIRegisterInfo::getReservedRegs(const MachineFunction &MF) const {
 | 
			
		|||
 | 
			
		||||
unsigned SIRegisterInfo::getRegPressureSetLimit(const MachineFunction &MF,
 | 
			
		||||
                                                unsigned Idx) const {
 | 
			
		||||
 | 
			
		||||
  const AMDGPUSubtarget &STI = MF.getSubtarget<AMDGPUSubtarget>();
 | 
			
		||||
  // FIXME: We should adjust the max number of waves based on LDS size.
 | 
			
		||||
  unsigned SGPRLimit = getNumSGPRsAllowed(STI.getGeneration(),
 | 
			
		||||
                                          STI.getMaxWavesPerCU());
 | 
			
		||||
  unsigned VGPRLimit = getNumVGPRsAllowed(STI.getMaxWavesPerCU());
 | 
			
		||||
 | 
			
		||||
  unsigned VSLimit = SGPRLimit + VGPRLimit;
 | 
			
		||||
 | 
			
		||||
  for (regclass_iterator I = regclass_begin(), E = regclass_end();
 | 
			
		||||
       I != E; ++I) {
 | 
			
		||||
    const TargetRegisterClass *RC = *I;
 | 
			
		||||
 | 
			
		||||
    unsigned NumSubRegs = std::max((int)(*I)->getSize() / 4, 1);
 | 
			
		||||
    unsigned NumSubRegs = std::max((int)RC->getSize() / 4, 1);
 | 
			
		||||
    unsigned Limit;
 | 
			
		||||
 | 
			
		||||
    if (isSGPRClass(*I)) {
 | 
			
		||||
    if (isPseudoRegClass(RC)) {
 | 
			
		||||
      // FIXME: This is a hack. We should never be considering the pressure of
 | 
			
		||||
      // these since no virtual register should ever have this class.
 | 
			
		||||
      Limit = VSLimit;
 | 
			
		||||
    } else if (isSGPRClass(RC)) {
 | 
			
		||||
      Limit = SGPRLimit / NumSubRegs;
 | 
			
		||||
    } else {
 | 
			
		||||
      Limit = VGPRLimit / NumSubRegs;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    const int *Sets = getRegClassPressureSets(*I);
 | 
			
		||||
    const int *Sets = getRegClassPressureSets(RC);
 | 
			
		||||
    assert(Sets);
 | 
			
		||||
    for (unsigned i = 0; Sets[i] != -1; ++i) {
 | 
			
		||||
      if (Sets[i] == (int)Idx)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -59,6 +59,13 @@ public:
 | 
			
		|||
  /// \returns true if this class contains VGPR registers.
 | 
			
		||||
  bool hasVGPRs(const TargetRegisterClass *RC) const;
 | 
			
		||||
 | 
			
		||||
  /// returns true if this is a pseudoregister class combination of VGPRs and
 | 
			
		||||
  /// SGPRs for operand modeling. FIXME: We should set isAllocatable = 0 on
 | 
			
		||||
  /// them.
 | 
			
		||||
  static bool isPseudoRegClass(const TargetRegisterClass *RC) {
 | 
			
		||||
    return RC == &AMDGPU::VS_32RegClass || RC == &AMDGPU::VS_64RegClass;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  /// \returns A VGPR reg class with the same width as \p SRC
 | 
			
		||||
  const TargetRegisterClass *getEquivalentVGPRClass(
 | 
			
		||||
                                          const TargetRegisterClass *SRC) const;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -185,18 +185,18 @@ entry:
 | 
			
		|||
; GCN-LABEL: {{^}}smrd_valu_ci_offset_x16:
 | 
			
		||||
 | 
			
		||||
; GCN: s_mov_b32 s[[OFFSET0:[0-9]+]], 0x13480{{$}}
 | 
			
		||||
; SI: s_add_i32 s[[OFFSET1:[0-9]+]], s[[OFFSET0]], 16
 | 
			
		||||
; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET0]]:{{[0-9]+}}], 0 addr64{{$}}
 | 
			
		||||
; SI-DAG: s_add_i32 s[[OFFSET1:[0-9]+]], s[[OFFSET0]], 16
 | 
			
		||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET0]]:{{[0-9]+}}], 0 addr64{{$}}
 | 
			
		||||
 | 
			
		||||
; CI: s_mov_b32 s[[OFFSET1:[0-9]+]], 0x13490{{$}}
 | 
			
		||||
; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET1]]:{{[0-9]+}}], 0 addr64{{$}}
 | 
			
		||||
; CI-DAG: s_mov_b32 s[[OFFSET1:[0-9]+]], 0x13490{{$}}
 | 
			
		||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET1]]:{{[0-9]+}}], 0 addr64{{$}}
 | 
			
		||||
 | 
			
		||||
; SI: s_add_i32 s[[OFFSET2:[0-9]+]], s[[OFFSET0]], 32
 | 
			
		||||
; CI: s_mov_b32 s[[OFFSET2:[0-9]+]], 0x134a0
 | 
			
		||||
; SI-DAG: s_add_i32 s[[OFFSET2:[0-9]+]], s[[OFFSET0]], 32
 | 
			
		||||
; CI-DAG: s_mov_b32 s[[OFFSET2:[0-9]+]], 0x134a0
 | 
			
		||||
 | 
			
		||||
; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET2]]:{{[0-9]+}}], 0 addr64{{$}}
 | 
			
		||||
; GCN: s_add_i32 s[[OFFSET3:[0-9]+]], s[[OFFSET2]], 16
 | 
			
		||||
; GCN: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET3]]:{{[0-9]+}}], 0 addr64{{$}}
 | 
			
		||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET2]]:{{[0-9]+}}], 0 addr64{{$}}
 | 
			
		||||
; GCN-DAG: s_add_i32 s[[OFFSET3:[0-9]+]], s[[OFFSET2]], 16
 | 
			
		||||
; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}}, s{{\[}}[[OFFSET3]]:{{[0-9]+}}], 0 addr64{{$}}
 | 
			
		||||
; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 | 
			
		||||
; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 | 
			
		||||
; GCN: v_or_b32_e32 {{v[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -78,8 +78,8 @@ exit:
 | 
			
		|||
 | 
			
		||||
; SI: BB2_3:
 | 
			
		||||
; SI: buffer_load_dword
 | 
			
		||||
; SI: buffer_store_dword
 | 
			
		||||
; SI: v_cmp_eq_i32_e32 vcc,
 | 
			
		||||
; SI-DAG: buffer_store_dword
 | 
			
		||||
; SI-DAG: v_cmp_eq_i32_e32 vcc,
 | 
			
		||||
; SI: s_or_b64 [[OR_SREG:s\[[0-9]+:[0-9]+\]]]
 | 
			
		||||
; SI: s_andn2_b64 exec, exec, [[OR_SREG]]
 | 
			
		||||
; SI: s_cbranch_execnz BB2_3
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue