224 lines
		
	
	
		
			7.8 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			224 lines
		
	
	
		
			7.8 KiB
		
	
	
	
		
			C++
		
	
	
	
//===- SIInsertHardClauses.cpp - Insert Hard Clauses ----------------------===//
 | 
						|
//
 | 
						|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | 
						|
// See https://llvm.org/LICENSE.txt for license information.
 | 
						|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
//
 | 
						|
/// \file
 | 
						|
/// Insert s_clause instructions to form hard clauses.
 | 
						|
///
 | 
						|
/// Clausing load instructions can give cache coherency benefits. Before gfx10,
 | 
						|
/// the hardware automatically detected "soft clauses", which were sequences of
 | 
						|
/// memory instructions of the same type. In gfx10 this detection was removed,
 | 
						|
/// and the s_clause instruction was introduced to explicitly mark "hard
 | 
						|
/// clauses".
 | 
						|
///
 | 
						|
/// It's the scheduler's job to form the clauses by putting similar memory
 | 
						|
/// instructions next to each other. Our job is just to insert an s_clause
 | 
						|
/// instruction to mark the start of each clause.
 | 
						|
///
 | 
						|
/// Note that hard clauses are very similar to, but logically distinct from, the
 | 
						|
/// groups of instructions that have to be restartable when XNACK is enabled.
 | 
						|
/// The rules are slightly different in each case. For example an s_nop
 | 
						|
/// instruction breaks a restartable group, but can appear in the middle of a
 | 
						|
/// hard clause. (Before gfx10 there wasn't a distinction, and both were called
 | 
						|
/// "soft clauses" or just "clauses".)
 | 
						|
///
 | 
						|
/// The SIFormMemoryClauses pass and GCNHazardRecognizer deal with restartable
 | 
						|
/// groups, not hard clauses.
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
 | 
						|
#include "AMDGPU.h"
 | 
						|
#include "GCNSubtarget.h"
 | 
						|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 | 
						|
#include "llvm/ADT/SmallVector.h"
 | 
						|
 | 
						|
using namespace llvm;
 | 
						|
 | 
						|
#define DEBUG_TYPE "si-insert-hard-clauses"
 | 
						|
 | 
						|
namespace {
 | 
						|
 | 
						|
enum HardClauseType {
 | 
						|
  // Texture, buffer, global or scratch memory instructions.
 | 
						|
  HARDCLAUSE_VMEM,
 | 
						|
  // Flat (not global or scratch) memory instructions.
 | 
						|
  HARDCLAUSE_FLAT,
 | 
						|
  // Instructions that access LDS.
 | 
						|
  HARDCLAUSE_LDS,
 | 
						|
  // Scalar memory instructions.
 | 
						|
  HARDCLAUSE_SMEM,
 | 
						|
  // VALU instructions.
 | 
						|
  HARDCLAUSE_VALU,
 | 
						|
  LAST_REAL_HARDCLAUSE_TYPE = HARDCLAUSE_VALU,
 | 
						|
 | 
						|
  // Internal instructions, which are allowed in the middle of a hard clause,
 | 
						|
  // except for s_waitcnt.
 | 
						|
  HARDCLAUSE_INTERNAL,
 | 
						|
  // Meta instructions that do not result in any ISA like KILL.
 | 
						|
  HARDCLAUSE_IGNORE,
 | 
						|
  // Instructions that are not allowed in a hard clause: SALU, export, branch,
 | 
						|
  // message, GDS, s_waitcnt and anything else not mentioned above.
 | 
						|
  HARDCLAUSE_ILLEGAL,
 | 
						|
};
 | 
						|
 | 
						|
class SIInsertHardClauses : public MachineFunctionPass {
 | 
						|
public:
 | 
						|
  static char ID;
 | 
						|
  const GCNSubtarget *ST = nullptr;
 | 
						|
 | 
						|
  SIInsertHardClauses() : MachineFunctionPass(ID) {}
 | 
						|
 | 
						|
  void getAnalysisUsage(AnalysisUsage &AU) const override {
 | 
						|
    AU.setPreservesCFG();
 | 
						|
    MachineFunctionPass::getAnalysisUsage(AU);
 | 
						|
  }
 | 
						|
 | 
						|
  HardClauseType getHardClauseType(const MachineInstr &MI) {
 | 
						|
 | 
						|
    // On current architectures we only get a benefit from clausing loads.
 | 
						|
    if (MI.mayLoad()) {
 | 
						|
      if (SIInstrInfo::isVMEM(MI) || SIInstrInfo::isSegmentSpecificFLAT(MI)) {
 | 
						|
        if (ST->hasNSAClauseBug()) {
 | 
						|
          const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
 | 
						|
          if (Info && Info->MIMGEncoding == AMDGPU::MIMGEncGfx10NSA)
 | 
						|
            return HARDCLAUSE_ILLEGAL;
 | 
						|
        }
 | 
						|
        return HARDCLAUSE_VMEM;
 | 
						|
      }
 | 
						|
      if (SIInstrInfo::isFLAT(MI))
 | 
						|
        return HARDCLAUSE_FLAT;
 | 
						|
      // TODO: LDS
 | 
						|
      if (SIInstrInfo::isSMRD(MI))
 | 
						|
        return HARDCLAUSE_SMEM;
 | 
						|
    }
 | 
						|
 | 
						|
    // Don't form VALU clauses. It's not clear what benefit they give, if any.
 | 
						|
 | 
						|
    // In practice s_nop is the only internal instruction we're likely to see.
 | 
						|
    // It's safe to treat the rest as illegal.
 | 
						|
    if (MI.getOpcode() == AMDGPU::S_NOP)
 | 
						|
      return HARDCLAUSE_INTERNAL;
 | 
						|
    if (MI.isMetaInstruction())
 | 
						|
      return HARDCLAUSE_IGNORE;
 | 
						|
    return HARDCLAUSE_ILLEGAL;
 | 
						|
  }
 | 
						|
 | 
						|
  // Track information about a clause as we discover it.
 | 
						|
  struct ClauseInfo {
 | 
						|
    // The type of all (non-internal) instructions in the clause.
 | 
						|
    HardClauseType Type = HARDCLAUSE_ILLEGAL;
 | 
						|
    // The first (necessarily non-internal) instruction in the clause.
 | 
						|
    MachineInstr *First = nullptr;
 | 
						|
    // The last non-internal instruction in the clause.
 | 
						|
    MachineInstr *Last = nullptr;
 | 
						|
    // The length of the clause including any internal instructions in the
 | 
						|
    // middle (but not at the end) of the clause.
 | 
						|
    unsigned Length = 0;
 | 
						|
    // Internal instructions at the and of a clause should not be included in
 | 
						|
    // the clause. Count them in TrailingInternalLength until a new memory
 | 
						|
    // instruction is added.
 | 
						|
    unsigned TrailingInternalLength = 0;
 | 
						|
    // The base operands of *Last.
 | 
						|
    SmallVector<const MachineOperand *, 4> BaseOps;
 | 
						|
  };
 | 
						|
 | 
						|
  bool emitClause(const ClauseInfo &CI, const SIInstrInfo *SII) {
 | 
						|
    if (CI.First == CI.Last)
 | 
						|
      return false;
 | 
						|
    assert(CI.Length <= 64 && "Hard clause is too long!");
 | 
						|
 | 
						|
    auto &MBB = *CI.First->getParent();
 | 
						|
    auto ClauseMI =
 | 
						|
        BuildMI(MBB, *CI.First, DebugLoc(), SII->get(AMDGPU::S_CLAUSE))
 | 
						|
            .addImm(CI.Length - 1);
 | 
						|
    finalizeBundle(MBB, ClauseMI->getIterator(),
 | 
						|
                   std::next(CI.Last->getIterator()));
 | 
						|
    return true;
 | 
						|
  }
 | 
						|
 | 
						|
  bool runOnMachineFunction(MachineFunction &MF) override {
 | 
						|
    if (skipFunction(MF.getFunction()))
 | 
						|
      return false;
 | 
						|
 | 
						|
    ST = &MF.getSubtarget<GCNSubtarget>();
 | 
						|
    if (!ST->hasHardClauses())
 | 
						|
      return false;
 | 
						|
 | 
						|
    const SIInstrInfo *SII = ST->getInstrInfo();
 | 
						|
    const TargetRegisterInfo *TRI = ST->getRegisterInfo();
 | 
						|
 | 
						|
    bool Changed = false;
 | 
						|
    for (auto &MBB : MF) {
 | 
						|
      ClauseInfo CI;
 | 
						|
      for (auto &MI : MBB) {
 | 
						|
        HardClauseType Type = getHardClauseType(MI);
 | 
						|
 | 
						|
        int64_t Dummy1;
 | 
						|
        bool Dummy2;
 | 
						|
        unsigned Dummy3;
 | 
						|
        SmallVector<const MachineOperand *, 4> BaseOps;
 | 
						|
        if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
 | 
						|
          if (!SII->getMemOperandsWithOffsetWidth(MI, BaseOps, Dummy1, Dummy2,
 | 
						|
                                                  Dummy3, TRI)) {
 | 
						|
            // We failed to get the base operands, so we'll never clause this
 | 
						|
            // instruction with any other, so pretend it's illegal.
 | 
						|
            Type = HARDCLAUSE_ILLEGAL;
 | 
						|
          }
 | 
						|
        }
 | 
						|
 | 
						|
        if (CI.Length == 64 ||
 | 
						|
            (CI.Length && Type != HARDCLAUSE_INTERNAL &&
 | 
						|
             Type != HARDCLAUSE_IGNORE &&
 | 
						|
             (Type != CI.Type ||
 | 
						|
              // Note that we lie to shouldClusterMemOps about the size of the
 | 
						|
              // cluster. When shouldClusterMemOps is called from the machine
 | 
						|
              // scheduler it limits the size of the cluster to avoid increasing
 | 
						|
              // register pressure too much, but this pass runs after register
 | 
						|
              // allocation so there is no need for that kind of limit.
 | 
						|
              !SII->shouldClusterMemOps(CI.BaseOps, BaseOps, 2, 2)))) {
 | 
						|
          // Finish the current clause.
 | 
						|
          Changed |= emitClause(CI, SII);
 | 
						|
          CI = ClauseInfo();
 | 
						|
        }
 | 
						|
 | 
						|
        if (CI.Length) {
 | 
						|
          // Extend the current clause.
 | 
						|
          if (Type != HARDCLAUSE_IGNORE) {
 | 
						|
            if (Type == HARDCLAUSE_INTERNAL) {
 | 
						|
              ++CI.TrailingInternalLength;
 | 
						|
            } else {
 | 
						|
              ++CI.Length;
 | 
						|
              CI.Length += CI.TrailingInternalLength;
 | 
						|
              CI.TrailingInternalLength = 0;
 | 
						|
              CI.Last = &MI;
 | 
						|
              CI.BaseOps = std::move(BaseOps);
 | 
						|
            }
 | 
						|
          }
 | 
						|
        } else if (Type <= LAST_REAL_HARDCLAUSE_TYPE) {
 | 
						|
          // Start a new clause.
 | 
						|
          CI = ClauseInfo{Type, &MI, &MI, 1, 0, std::move(BaseOps)};
 | 
						|
        }
 | 
						|
      }
 | 
						|
 | 
						|
      // Finish the last clause in the basic block if any.
 | 
						|
      if (CI.Length)
 | 
						|
        Changed |= emitClause(CI, SII);
 | 
						|
    }
 | 
						|
 | 
						|
    return Changed;
 | 
						|
  }
 | 
						|
};
 | 
						|
 | 
						|
} // namespace
 | 
						|
 | 
						|
char SIInsertHardClauses::ID = 0;
 | 
						|
 | 
						|
char &llvm::SIInsertHardClausesID = SIInsertHardClauses::ID;
 | 
						|
 | 
						|
INITIALIZE_PASS(SIInsertHardClauses, DEBUG_TYPE, "SI Insert Hard Clauses",
 | 
						|
                false, false)
 |