forked from OSchip/llvm-project
				
			R600/SI: Disable VMEM and SMEM clauses by breaking them with S_NOP
This is only a workaround. llvm-svn: 223615
This commit is contained in:
		
							parent
							
								
									58f61a84e7
								
							
						
					
					
						commit
						fa58e5e111
					
				| 
						 | 
				
			
			@ -41,6 +41,12 @@ typedef union {
 | 
			
		|||
 | 
			
		||||
} Counters;
 | 
			
		||||
 | 
			
		||||
typedef enum {
 | 
			
		||||
  OTHER,
 | 
			
		||||
  SMEM,
 | 
			
		||||
  VMEM
 | 
			
		||||
} InstType;
 | 
			
		||||
 | 
			
		||||
typedef Counters RegCounters[512];
 | 
			
		||||
typedef std::pair<unsigned, unsigned> RegInterval;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -73,6 +79,9 @@ private:
 | 
			
		|||
  /// \brief Different export instruction types seen since last wait.
 | 
			
		||||
  unsigned ExpInstrTypesSeen;
 | 
			
		||||
 | 
			
		||||
  /// \brief Type of the last opcode.
 | 
			
		||||
  InstType LastOpcodeType;
 | 
			
		||||
 | 
			
		||||
  /// \brief Get increment/decrement amount for this instruction.
 | 
			
		||||
  Counters getHwCounts(MachineInstr &MI);
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -83,7 +92,8 @@ private:
 | 
			
		|||
  RegInterval getRegInterval(MachineOperand &Op);
 | 
			
		||||
 | 
			
		||||
  /// \brief Handle instructions async components
 | 
			
		||||
  void pushInstruction(MachineInstr &MI);
 | 
			
		||||
  void pushInstruction(MachineBasicBlock &MBB,
 | 
			
		||||
                       MachineBasicBlock::iterator I);
 | 
			
		||||
 | 
			
		||||
  /// \brief Insert the actual wait instruction
 | 
			
		||||
  bool insertWait(MachineBasicBlock &MBB,
 | 
			
		||||
| 
						 | 
				
			
			@ -203,10 +213,11 @@ RegInterval SIInsertWaits::getRegInterval(MachineOperand &Op) {
 | 
			
		|||
  return Result;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void SIInsertWaits::pushInstruction(MachineInstr &MI) {
 | 
			
		||||
void SIInsertWaits::pushInstruction(MachineBasicBlock &MBB,
 | 
			
		||||
                                    MachineBasicBlock::iterator I) {
 | 
			
		||||
 | 
			
		||||
  // Get the hardware counter increments and sum them up
 | 
			
		||||
  Counters Increment = getHwCounts(MI);
 | 
			
		||||
  Counters Increment = getHwCounts(*I);
 | 
			
		||||
  unsigned Sum = 0;
 | 
			
		||||
 | 
			
		||||
  for (unsigned i = 0; i < 3; ++i) {
 | 
			
		||||
| 
						 | 
				
			
			@ -215,17 +226,42 @@ void SIInsertWaits::pushInstruction(MachineInstr &MI) {
 | 
			
		|||
  }
 | 
			
		||||
 | 
			
		||||
  // If we don't increase anything then that's it
 | 
			
		||||
  if (Sum == 0)
 | 
			
		||||
  if (Sum == 0) {
 | 
			
		||||
    LastOpcodeType = OTHER;
 | 
			
		||||
    return;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  if (TRI->ST.getGeneration() >= AMDGPUSubtarget::VOLCANIC_ISLANDS) {
 | 
			
		||||
    // Any occurence of consecutive VMEM or SMEM instructions forms a VMEM
 | 
			
		||||
    // or SMEM clause, respectively.
 | 
			
		||||
    //
 | 
			
		||||
    // The temporary workaround is to break the clauses with S_NOP.
 | 
			
		||||
    //
 | 
			
		||||
    // The proper solution would be to allocate registers such that all source
 | 
			
		||||
    // and destination registers don't overlap, e.g. this is illegal:
 | 
			
		||||
    //   r0 = load r2
 | 
			
		||||
    //   r2 = load r0
 | 
			
		||||
    if ((LastOpcodeType == SMEM && TII->isSMRD(I->getOpcode())) ||
 | 
			
		||||
        (LastOpcodeType == VMEM && Increment.Named.VM)) {
 | 
			
		||||
      // Insert a NOP to break the clause.
 | 
			
		||||
      BuildMI(MBB, I, DebugLoc(), TII->get(AMDGPU::S_NOP))
 | 
			
		||||
          .addImm(0);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (TII->isSMRD(I->getOpcode()))
 | 
			
		||||
      LastOpcodeType = SMEM;
 | 
			
		||||
    else if (Increment.Named.VM)
 | 
			
		||||
      LastOpcodeType = VMEM;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  // Remember which export instructions we have seen
 | 
			
		||||
  if (Increment.Named.EXP) {
 | 
			
		||||
    ExpInstrTypesSeen |= MI.getOpcode() == AMDGPU::EXP ? 1 : 2;
 | 
			
		||||
    ExpInstrTypesSeen |= I->getOpcode() == AMDGPU::EXP ? 1 : 2;
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
 | 
			
		||||
  for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
 | 
			
		||||
 | 
			
		||||
    MachineOperand &Op = MI.getOperand(i);
 | 
			
		||||
    MachineOperand &Op = I->getOperand(i);
 | 
			
		||||
    if (!isOpRelevant(Op))
 | 
			
		||||
      continue;
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -302,6 +338,7 @@ bool SIInsertWaits::insertWait(MachineBasicBlock &MBB,
 | 
			
		|||
                  ((Counts.Named.EXP & 0x7) << 4) |
 | 
			
		||||
                  ((Counts.Named.LGKM & 0x7) << 8));
 | 
			
		||||
 | 
			
		||||
  LastOpcodeType = OTHER;
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -356,6 +393,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
 | 
			
		|||
 | 
			
		||||
  WaitedOn = ZeroCounts;
 | 
			
		||||
  LastIssued = ZeroCounts;
 | 
			
		||||
  LastOpcodeType = OTHER;
 | 
			
		||||
 | 
			
		||||
  memset(&UsedRegs, 0, sizeof(UsedRegs));
 | 
			
		||||
  memset(&DefinedRegs, 0, sizeof(DefinedRegs));
 | 
			
		||||
| 
						 | 
				
			
			@ -368,7 +406,7 @@ bool SIInsertWaits::runOnMachineFunction(MachineFunction &MF) {
 | 
			
		|||
         I != E; ++I) {
 | 
			
		||||
 | 
			
		||||
      Changes |= insertWait(MBB, I, handleOperands(*I));
 | 
			
		||||
      pushInstruction(*I);
 | 
			
		||||
      pushInstruction(MBB, I);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Wait for everything at the end of the MBB
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue