693 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			693 lines
		
	
	
		
			19 KiB
		
	
	
	
		
			C++
		
	
	
	
| //==- SIMachineFunctionInfo.h - SIMachineFunctionInfo interface --*- C++ -*-==//
 | |
| //
 | |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | |
| // See https://llvm.org/LICENSE.txt for license information.
 | |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| //
 | |
| /// \file
 | |
| //
 | |
| //===----------------------------------------------------------------------===//
 | |
| 
 | |
| #ifndef LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
 | |
| #define LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
 | |
| 
 | |
| #include "AMDGPUArgumentUsageInfo.h"
 | |
| #include "AMDGPUMachineFunction.h"
 | |
| #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 | |
| #include "SIInstrInfo.h"
 | |
| #include "SIRegisterInfo.h"
 | |
| #include "llvm/ADT/ArrayRef.h"
 | |
| #include "llvm/ADT/DenseMap.h"
 | |
| #include "llvm/ADT/Optional.h"
 | |
| #include "llvm/ADT/SmallVector.h"
 | |
| #include "llvm/ADT/SparseBitVector.h"
 | |
| #include "llvm/CodeGen/MIRYamlMapping.h"
 | |
| #include "llvm/CodeGen/PseudoSourceValue.h"
 | |
| #include "llvm/CodeGen/TargetInstrInfo.h"
 | |
| #include "llvm/MC/MCRegisterInfo.h"
 | |
| #include "llvm/Support/ErrorHandling.h"
 | |
| #include <array>
 | |
| #include <cassert>
 | |
| #include <utility>
 | |
| #include <vector>
 | |
| 
 | |
| namespace llvm {
 | |
| 
 | |
| class MachineFrameInfo;
 | |
| class MachineFunction;
 | |
| class TargetRegisterClass;
 | |
| 
 | |
| class AMDGPUImagePseudoSourceValue : public PseudoSourceValue {
 | |
| public:
 | |
|   // TODO: Is the img rsrc useful?
 | |
|   explicit AMDGPUImagePseudoSourceValue(const TargetInstrInfo &TII) :
 | |
|     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) {}
 | |
| 
 | |
|   bool isConstant(const MachineFrameInfo *) const override {
 | |
|     // This should probably be true for most images, but we will start by being
 | |
|     // conservative.
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   bool isAliased(const MachineFrameInfo *) const override {
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   bool mayAlias(const MachineFrameInfo *) const override {
 | |
|     return true;
 | |
|   }
 | |
| };
 | |
| 
 | |
| class AMDGPUBufferPseudoSourceValue : public PseudoSourceValue {
 | |
| public:
 | |
|   explicit AMDGPUBufferPseudoSourceValue(const TargetInstrInfo &TII) :
 | |
|     PseudoSourceValue(PseudoSourceValue::TargetCustom, TII) { }
 | |
| 
 | |
|   bool isConstant(const MachineFrameInfo *) const override {
 | |
|     // This should probably be true for most images, but we will start by being
 | |
|     // conservative.
 | |
|     return false;
 | |
|   }
 | |
| 
 | |
|   bool isAliased(const MachineFrameInfo *) const override {
 | |
|     return true;
 | |
|   }
 | |
| 
 | |
|   bool mayAlias(const MachineFrameInfo *) const override {
 | |
|     return true;
 | |
|   }
 | |
| };
 | |
| 
 | |
| namespace yaml {
 | |
| 
 | |
| struct SIMachineFunctionInfo final : public yaml::MachineFunctionInfo {
 | |
|   uint64_t ExplicitKernArgSize = 0;
 | |
|   unsigned MaxKernArgAlign = 0;
 | |
|   unsigned LDSSize = 0;
 | |
|   bool IsEntryFunction = false;
 | |
|   bool NoSignedZerosFPMath = false;
 | |
|   bool MemoryBound = false;
 | |
|   bool WaveLimiter = false;
 | |
| 
 | |
|   StringValue ScratchRSrcReg = "$private_rsrc_reg";
 | |
|   StringValue ScratchWaveOffsetReg = "$scratch_wave_offset_reg";
 | |
|   StringValue FrameOffsetReg = "$fp_reg";
 | |
|   StringValue StackPtrOffsetReg = "$sp_reg";
 | |
| 
 | |
|   SIMachineFunctionInfo() = default;
 | |
|   SIMachineFunctionInfo(const llvm::SIMachineFunctionInfo &,
 | |
|                         const TargetRegisterInfo &TRI);
 | |
| 
 | |
|   void mappingImpl(yaml::IO &YamlIO) override;
 | |
|   ~SIMachineFunctionInfo() = default;
 | |
| };
 | |
| 
 | |
| template <> struct MappingTraits<SIMachineFunctionInfo> {
 | |
|   static void mapping(IO &YamlIO, SIMachineFunctionInfo &MFI) {
 | |
|     YamlIO.mapOptional("explicitKernArgSize", MFI.ExplicitKernArgSize,
 | |
|                        UINT64_C(0));
 | |
|     YamlIO.mapOptional("maxKernArgAlign", MFI.MaxKernArgAlign, 0u);
 | |
|     YamlIO.mapOptional("ldsSize", MFI.LDSSize, 0u);
 | |
|     YamlIO.mapOptional("isEntryFunction", MFI.IsEntryFunction, false);
 | |
|     YamlIO.mapOptional("noSignedZerosFPMath", MFI.NoSignedZerosFPMath, false);
 | |
|     YamlIO.mapOptional("memoryBound", MFI.MemoryBound, false);
 | |
|     YamlIO.mapOptional("waveLimiter", MFI.WaveLimiter, false);
 | |
|     YamlIO.mapOptional("scratchRSrcReg", MFI.ScratchRSrcReg,
 | |
|                        StringValue("$private_rsrc_reg"));
 | |
|     YamlIO.mapOptional("scratchWaveOffsetReg", MFI.ScratchWaveOffsetReg,
 | |
|                        StringValue("$scratch_wave_offset_reg"));
 | |
|     YamlIO.mapOptional("frameOffsetReg", MFI.FrameOffsetReg,
 | |
|                        StringValue("$fp_reg"));
 | |
|     YamlIO.mapOptional("stackPtrOffsetReg", MFI.StackPtrOffsetReg,
 | |
|                        StringValue("$sp_reg"));
 | |
|   }
 | |
| };
 | |
| 
 | |
| } // end namespace yaml
 | |
| 
 | |
| /// This class keeps track of the SPI_SP_INPUT_ADDR config register, which
 | |
| /// tells the hardware which interpolation parameters to load.
 | |
| class SIMachineFunctionInfo final : public AMDGPUMachineFunction {
 | |
|   friend class GCNTargetMachine;
 | |
| 
 | |
|   unsigned TIDReg = AMDGPU::NoRegister;
 | |
| 
 | |
|   // Registers that may be reserved for spilling purposes. These may be the same
 | |
|   // as the input registers.
 | |
|   unsigned ScratchRSrcReg = AMDGPU::PRIVATE_RSRC_REG;
 | |
|   unsigned ScratchWaveOffsetReg = AMDGPU::SCRATCH_WAVE_OFFSET_REG;
 | |
| 
 | |
|   // This is the current function's incremented size from the kernel's scratch
 | |
|   // wave offset register. For an entry function, this is exactly the same as
 | |
|   // the ScratchWaveOffsetReg.
 | |
|   unsigned FrameOffsetReg = AMDGPU::FP_REG;
 | |
| 
 | |
|   // Top of the stack SGPR offset derived from the ScratchWaveOffsetReg.
 | |
|   unsigned StackPtrOffsetReg = AMDGPU::SP_REG;
 | |
| 
 | |
|   AMDGPUFunctionArgInfo ArgInfo;
 | |
| 
 | |
|   // State of MODE register, assumed FP mode.
 | |
|   AMDGPU::SIModeRegisterDefaults Mode;
 | |
| 
 | |
|   // Graphics info.
 | |
|   unsigned PSInputAddr = 0;
 | |
|   unsigned PSInputEnable = 0;
 | |
| 
 | |
|   /// Number of bytes of arguments this function has on the stack. If the callee
 | |
|   /// is expected to restore the argument stack this should be a multiple of 16,
 | |
|   /// all usable during a tail call.
 | |
|   ///
 | |
|   /// The alternative would forbid tail call optimisation in some cases: if we
 | |
|   /// want to transfer control from a function with 8-bytes of stack-argument
 | |
|   /// space to a function with 16-bytes then misalignment of this value would
 | |
|   /// make a stack adjustment necessary, which could not be undone by the
 | |
|   /// callee.
 | |
|   unsigned BytesInStackArgArea = 0;
 | |
| 
 | |
|   bool ReturnsVoid = true;
 | |
| 
 | |
|   // A pair of default/requested minimum/maximum flat work group sizes.
 | |
|   // Minimum - first, maximum - second.
 | |
|   std::pair<unsigned, unsigned> FlatWorkGroupSizes = {0, 0};
 | |
| 
 | |
|   // A pair of default/requested minimum/maximum number of waves per execution
 | |
|   // unit. Minimum - first, maximum - second.
 | |
|   std::pair<unsigned, unsigned> WavesPerEU = {0, 0};
 | |
| 
 | |
|   DenseMap<const Value *,
 | |
|            std::unique_ptr<const AMDGPUBufferPseudoSourceValue>> BufferPSVs;
 | |
|   DenseMap<const Value *,
 | |
|            std::unique_ptr<const AMDGPUImagePseudoSourceValue>> ImagePSVs;
 | |
| 
 | |
| private:
 | |
|   unsigned LDSWaveSpillSize = 0;
 | |
|   unsigned NumUserSGPRs = 0;
 | |
|   unsigned NumSystemSGPRs = 0;
 | |
| 
 | |
|   bool HasSpilledSGPRs = false;
 | |
|   bool HasSpilledVGPRs = false;
 | |
|   bool HasNonSpillStackObjects = false;
 | |
|   bool IsStackRealigned = false;
 | |
| 
 | |
|   unsigned NumSpilledSGPRs = 0;
 | |
|   unsigned NumSpilledVGPRs = 0;
 | |
| 
 | |
|   // Feature bits required for inputs passed in user SGPRs.
 | |
|   bool PrivateSegmentBuffer : 1;
 | |
|   bool DispatchPtr : 1;
 | |
|   bool QueuePtr : 1;
 | |
|   bool KernargSegmentPtr : 1;
 | |
|   bool DispatchID : 1;
 | |
|   bool FlatScratchInit : 1;
 | |
| 
 | |
|   // Feature bits required for inputs passed in system SGPRs.
 | |
|   bool WorkGroupIDX : 1; // Always initialized.
 | |
|   bool WorkGroupIDY : 1;
 | |
|   bool WorkGroupIDZ : 1;
 | |
|   bool WorkGroupInfo : 1;
 | |
|   bool PrivateSegmentWaveByteOffset : 1;
 | |
| 
 | |
|   bool WorkItemIDX : 1; // Always initialized.
 | |
|   bool WorkItemIDY : 1;
 | |
|   bool WorkItemIDZ : 1;
 | |
| 
 | |
|   // Private memory buffer
 | |
|   // Compute directly in sgpr[0:1]
 | |
|   // Other shaders indirect 64-bits at sgpr[0:1]
 | |
|   bool ImplicitBufferPtr : 1;
 | |
| 
 | |
|   // Pointer to where the ABI inserts special kernel arguments separate from the
 | |
|   // user arguments. This is an offset from the KernargSegmentPtr.
 | |
|   bool ImplicitArgPtr : 1;
 | |
| 
 | |
|   // The hard-wired high half of the address of the global information table
 | |
|   // for AMDPAL OS type. 0xffffffff represents no hard-wired high half, since
 | |
|   // current hardware only allows a 16 bit value.
 | |
|   unsigned GITPtrHigh;
 | |
| 
 | |
|   unsigned HighBitsOf32BitAddress;
 | |
| 
 | |
|   // Current recorded maximum possible occupancy.
 | |
|   unsigned Occupancy;
 | |
| 
 | |
|   MCPhysReg getNextUserSGPR() const;
 | |
| 
 | |
|   MCPhysReg getNextSystemSGPR() const;
 | |
| 
 | |
| public:
 | |
|   struct SpilledReg {
 | |
|     unsigned VGPR = 0;
 | |
|     int Lane = -1;
 | |
| 
 | |
|     SpilledReg() = default;
 | |
|     SpilledReg(unsigned R, int L) : VGPR (R), Lane (L) {}
 | |
| 
 | |
|     bool hasLane() { return Lane != -1;}
 | |
|     bool hasReg() { return VGPR != 0;}
 | |
|   };
 | |
| 
 | |
|   struct SGPRSpillVGPRCSR {
 | |
|     // VGPR used for SGPR spills
 | |
|     unsigned VGPR;
 | |
| 
 | |
|     // If the VGPR is a CSR, the stack slot used to save/restore it in the
 | |
|     // prolog/epilog.
 | |
|     Optional<int> FI;
 | |
| 
 | |
|     SGPRSpillVGPRCSR(unsigned V, Optional<int> F) : VGPR(V), FI(F) {}
 | |
|   };
 | |
| 
 | |
|   SparseBitVector<> WWMReservedRegs;
 | |
| 
 | |
|   void ReserveWWMRegister(unsigned reg) { WWMReservedRegs.set(reg); }
 | |
| 
 | |
| private:
 | |
|   // SGPR->VGPR spilling support.
 | |
|   using SpillRegMask = std::pair<unsigned, unsigned>;
 | |
| 
 | |
|   // Track VGPR + wave index for each subregister of the SGPR spilled to
 | |
|   // frameindex key.
 | |
|   DenseMap<int, std::vector<SpilledReg>> SGPRToVGPRSpills;
 | |
|   unsigned NumVGPRSpillLanes = 0;
 | |
|   SmallVector<SGPRSpillVGPRCSR, 2> SpillVGPRs;
 | |
| 
 | |
| public:
 | |
|   SIMachineFunctionInfo(const MachineFunction &MF);
 | |
| 
 | |
|   bool initializeBaseYamlFields(const yaml::SIMachineFunctionInfo &YamlMFI);
 | |
| 
 | |
|   ArrayRef<SpilledReg> getSGPRToVGPRSpills(int FrameIndex) const {
 | |
|     auto I = SGPRToVGPRSpills.find(FrameIndex);
 | |
|     return (I == SGPRToVGPRSpills.end()) ?
 | |
|       ArrayRef<SpilledReg>() : makeArrayRef(I->second);
 | |
|   }
 | |
| 
 | |
|   ArrayRef<SGPRSpillVGPRCSR> getSGPRSpillVGPRs() const {
 | |
|     return SpillVGPRs;
 | |
|   }
 | |
| 
 | |
|   AMDGPU::SIModeRegisterDefaults getMode() const {
 | |
|     return Mode;
 | |
|   }
 | |
| 
 | |
|   bool allocateSGPRSpillToVGPR(MachineFunction &MF, int FI);
 | |
|   void removeSGPRToVGPRFrameIndices(MachineFrameInfo &MFI);
 | |
| 
 | |
|   bool hasCalculatedTID() const { return TIDReg != 0; };
 | |
|   unsigned getTIDReg() const { return TIDReg; };
 | |
|   void setTIDReg(unsigned Reg) { TIDReg = Reg; }
 | |
| 
 | |
|   unsigned getBytesInStackArgArea() const {
 | |
|     return BytesInStackArgArea;
 | |
|   }
 | |
| 
 | |
|   void setBytesInStackArgArea(unsigned Bytes) {
 | |
|     BytesInStackArgArea = Bytes;
 | |
|   }
 | |
| 
 | |
|   // Add user SGPRs.
 | |
|   unsigned addPrivateSegmentBuffer(const SIRegisterInfo &TRI);
 | |
|   unsigned addDispatchPtr(const SIRegisterInfo &TRI);
 | |
|   unsigned addQueuePtr(const SIRegisterInfo &TRI);
 | |
|   unsigned addKernargSegmentPtr(const SIRegisterInfo &TRI);
 | |
|   unsigned addDispatchID(const SIRegisterInfo &TRI);
 | |
|   unsigned addFlatScratchInit(const SIRegisterInfo &TRI);
 | |
|   unsigned addImplicitBufferPtr(const SIRegisterInfo &TRI);
 | |
| 
 | |
|   // Add system SGPRs.
 | |
|   unsigned addWorkGroupIDX() {
 | |
|     ArgInfo.WorkGroupIDX = ArgDescriptor::createRegister(getNextSystemSGPR());
 | |
|     NumSystemSGPRs += 1;
 | |
|     return ArgInfo.WorkGroupIDX.getRegister();
 | |
|   }
 | |
| 
 | |
|   unsigned addWorkGroupIDY() {
 | |
|     ArgInfo.WorkGroupIDY = ArgDescriptor::createRegister(getNextSystemSGPR());
 | |
|     NumSystemSGPRs += 1;
 | |
|     return ArgInfo.WorkGroupIDY.getRegister();
 | |
|   }
 | |
| 
 | |
|   unsigned addWorkGroupIDZ() {
 | |
|     ArgInfo.WorkGroupIDZ = ArgDescriptor::createRegister(getNextSystemSGPR());
 | |
|     NumSystemSGPRs += 1;
 | |
|     return ArgInfo.WorkGroupIDZ.getRegister();
 | |
|   }
 | |
| 
 | |
|   unsigned addWorkGroupInfo() {
 | |
|     ArgInfo.WorkGroupInfo = ArgDescriptor::createRegister(getNextSystemSGPR());
 | |
|     NumSystemSGPRs += 1;
 | |
|     return ArgInfo.WorkGroupInfo.getRegister();
 | |
|   }
 | |
| 
 | |
|   // Add special VGPR inputs
 | |
|   void setWorkItemIDX(ArgDescriptor Arg) {
 | |
|     ArgInfo.WorkItemIDX = Arg;
 | |
|   }
 | |
| 
 | |
|   void setWorkItemIDY(ArgDescriptor Arg) {
 | |
|     ArgInfo.WorkItemIDY = Arg;
 | |
|   }
 | |
| 
 | |
|   void setWorkItemIDZ(ArgDescriptor Arg) {
 | |
|     ArgInfo.WorkItemIDZ = Arg;
 | |
|   }
 | |
| 
 | |
|   unsigned addPrivateSegmentWaveByteOffset() {
 | |
|     ArgInfo.PrivateSegmentWaveByteOffset
 | |
|       = ArgDescriptor::createRegister(getNextSystemSGPR());
 | |
|     NumSystemSGPRs += 1;
 | |
|     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
 | |
|   }
 | |
| 
 | |
|   void setPrivateSegmentWaveByteOffset(unsigned Reg) {
 | |
|     ArgInfo.PrivateSegmentWaveByteOffset = ArgDescriptor::createRegister(Reg);
 | |
|   }
 | |
| 
 | |
|   bool hasPrivateSegmentBuffer() const {
 | |
|     return PrivateSegmentBuffer;
 | |
|   }
 | |
| 
 | |
|   bool hasDispatchPtr() const {
 | |
|     return DispatchPtr;
 | |
|   }
 | |
| 
 | |
|   bool hasQueuePtr() const {
 | |
|     return QueuePtr;
 | |
|   }
 | |
| 
 | |
|   bool hasKernargSegmentPtr() const {
 | |
|     return KernargSegmentPtr;
 | |
|   }
 | |
| 
 | |
|   bool hasDispatchID() const {
 | |
|     return DispatchID;
 | |
|   }
 | |
| 
 | |
|   bool hasFlatScratchInit() const {
 | |
|     return FlatScratchInit;
 | |
|   }
 | |
| 
 | |
|   bool hasWorkGroupIDX() const {
 | |
|     return WorkGroupIDX;
 | |
|   }
 | |
| 
 | |
|   bool hasWorkGroupIDY() const {
 | |
|     return WorkGroupIDY;
 | |
|   }
 | |
| 
 | |
|   bool hasWorkGroupIDZ() const {
 | |
|     return WorkGroupIDZ;
 | |
|   }
 | |
| 
 | |
|   bool hasWorkGroupInfo() const {
 | |
|     return WorkGroupInfo;
 | |
|   }
 | |
| 
 | |
|   bool hasPrivateSegmentWaveByteOffset() const {
 | |
|     return PrivateSegmentWaveByteOffset;
 | |
|   }
 | |
| 
 | |
|   bool hasWorkItemIDX() const {
 | |
|     return WorkItemIDX;
 | |
|   }
 | |
| 
 | |
|   bool hasWorkItemIDY() const {
 | |
|     return WorkItemIDY;
 | |
|   }
 | |
| 
 | |
|   bool hasWorkItemIDZ() const {
 | |
|     return WorkItemIDZ;
 | |
|   }
 | |
| 
 | |
|   bool hasImplicitArgPtr() const {
 | |
|     return ImplicitArgPtr;
 | |
|   }
 | |
| 
 | |
|   bool hasImplicitBufferPtr() const {
 | |
|     return ImplicitBufferPtr;
 | |
|   }
 | |
| 
 | |
|   AMDGPUFunctionArgInfo &getArgInfo() {
 | |
|     return ArgInfo;
 | |
|   }
 | |
| 
 | |
|   const AMDGPUFunctionArgInfo &getArgInfo() const {
 | |
|     return ArgInfo;
 | |
|   }
 | |
| 
 | |
|   std::pair<const ArgDescriptor *, const TargetRegisterClass *>
 | |
|   getPreloadedValue(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
 | |
|     return ArgInfo.getPreloadedValue(Value);
 | |
|   }
 | |
| 
 | |
|   unsigned getPreloadedReg(AMDGPUFunctionArgInfo::PreloadedValue Value) const {
 | |
|     return ArgInfo.getPreloadedValue(Value).first->getRegister();
 | |
|   }
 | |
| 
 | |
|   unsigned getGITPtrHigh() const {
 | |
|     return GITPtrHigh;
 | |
|   }
 | |
| 
 | |
|   unsigned get32BitAddressHighBits() const {
 | |
|     return HighBitsOf32BitAddress;
 | |
|   }
 | |
| 
 | |
|   unsigned getNumUserSGPRs() const {
 | |
|     return NumUserSGPRs;
 | |
|   }
 | |
| 
 | |
|   unsigned getNumPreloadedSGPRs() const {
 | |
|     return NumUserSGPRs + NumSystemSGPRs;
 | |
|   }
 | |
| 
 | |
|   unsigned getPrivateSegmentWaveByteOffsetSystemSGPR() const {
 | |
|     return ArgInfo.PrivateSegmentWaveByteOffset.getRegister();
 | |
|   }
 | |
| 
 | |
|   /// Returns the physical register reserved for use as the resource
 | |
|   /// descriptor for scratch accesses.
 | |
|   unsigned getScratchRSrcReg() const {
 | |
|     return ScratchRSrcReg;
 | |
|   }
 | |
| 
 | |
|   void setScratchRSrcReg(unsigned Reg) {
 | |
|     assert(Reg != 0 && "Should never be unset");
 | |
|     ScratchRSrcReg = Reg;
 | |
|   }
 | |
| 
 | |
|   unsigned getScratchWaveOffsetReg() const {
 | |
|     return ScratchWaveOffsetReg;
 | |
|   }
 | |
| 
 | |
|   unsigned getFrameOffsetReg() const {
 | |
|     return FrameOffsetReg;
 | |
|   }
 | |
| 
 | |
|   void setStackPtrOffsetReg(unsigned Reg) {
 | |
|     assert(Reg != 0 && "Should never be unset");
 | |
|     StackPtrOffsetReg = Reg;
 | |
|   }
 | |
| 
 | |
|   // Note the unset value for this is AMDGPU::SP_REG rather than
 | |
|   // NoRegister. This is mostly a workaround for MIR tests where state that
 | |
|   // can't be directly computed from the function is not preserved in serialized
 | |
|   // MIR.
 | |
|   unsigned getStackPtrOffsetReg() const {
 | |
|     return StackPtrOffsetReg;
 | |
|   }
 | |
| 
 | |
|   void setScratchWaveOffsetReg(unsigned Reg) {
 | |
|     assert(Reg != 0 && "Should never be unset");
 | |
|     ScratchWaveOffsetReg = Reg;
 | |
|     if (isEntryFunction())
 | |
|       FrameOffsetReg = ScratchWaveOffsetReg;
 | |
|   }
 | |
| 
 | |
|   unsigned getQueuePtrUserSGPR() const {
 | |
|     return ArgInfo.QueuePtr.getRegister();
 | |
|   }
 | |
| 
 | |
|   unsigned getImplicitBufferPtrUserSGPR() const {
 | |
|     return ArgInfo.ImplicitBufferPtr.getRegister();
 | |
|   }
 | |
| 
 | |
|   bool hasSpilledSGPRs() const {
 | |
|     return HasSpilledSGPRs;
 | |
|   }
 | |
| 
 | |
|   void setHasSpilledSGPRs(bool Spill = true) {
 | |
|     HasSpilledSGPRs = Spill;
 | |
|   }
 | |
| 
 | |
|   bool hasSpilledVGPRs() const {
 | |
|     return HasSpilledVGPRs;
 | |
|   }
 | |
| 
 | |
|   void setHasSpilledVGPRs(bool Spill = true) {
 | |
|     HasSpilledVGPRs = Spill;
 | |
|   }
 | |
| 
 | |
|   bool hasNonSpillStackObjects() const {
 | |
|     return HasNonSpillStackObjects;
 | |
|   }
 | |
| 
 | |
|   void setHasNonSpillStackObjects(bool StackObject = true) {
 | |
|     HasNonSpillStackObjects = StackObject;
 | |
|   }
 | |
| 
 | |
|   bool isStackRealigned() const {
 | |
|     return IsStackRealigned;
 | |
|   }
 | |
| 
 | |
|   void setIsStackRealigned(bool Realigned = true) {
 | |
|     IsStackRealigned = Realigned;
 | |
|   }
 | |
| 
 | |
|   unsigned getNumSpilledSGPRs() const {
 | |
|     return NumSpilledSGPRs;
 | |
|   }
 | |
| 
 | |
|   unsigned getNumSpilledVGPRs() const {
 | |
|     return NumSpilledVGPRs;
 | |
|   }
 | |
| 
 | |
|   void addToSpilledSGPRs(unsigned num) {
 | |
|     NumSpilledSGPRs += num;
 | |
|   }
 | |
| 
 | |
|   void addToSpilledVGPRs(unsigned num) {
 | |
|     NumSpilledVGPRs += num;
 | |
|   }
 | |
| 
 | |
|   unsigned getPSInputAddr() const {
 | |
|     return PSInputAddr;
 | |
|   }
 | |
| 
 | |
|   unsigned getPSInputEnable() const {
 | |
|     return PSInputEnable;
 | |
|   }
 | |
| 
 | |
|   bool isPSInputAllocated(unsigned Index) const {
 | |
|     return PSInputAddr & (1 << Index);
 | |
|   }
 | |
| 
 | |
|   void markPSInputAllocated(unsigned Index) {
 | |
|     PSInputAddr |= 1 << Index;
 | |
|   }
 | |
| 
 | |
|   void markPSInputEnabled(unsigned Index) {
 | |
|     PSInputEnable |= 1 << Index;
 | |
|   }
 | |
| 
 | |
|   bool returnsVoid() const {
 | |
|     return ReturnsVoid;
 | |
|   }
 | |
| 
 | |
|   void setIfReturnsVoid(bool Value) {
 | |
|     ReturnsVoid = Value;
 | |
|   }
 | |
| 
 | |
|   /// \returns A pair of default/requested minimum/maximum flat work group sizes
 | |
|   /// for this function.
 | |
|   std::pair<unsigned, unsigned> getFlatWorkGroupSizes() const {
 | |
|     return FlatWorkGroupSizes;
 | |
|   }
 | |
| 
 | |
|   /// \returns Default/requested minimum flat work group size for this function.
 | |
|   unsigned getMinFlatWorkGroupSize() const {
 | |
|     return FlatWorkGroupSizes.first;
 | |
|   }
 | |
| 
 | |
|   /// \returns Default/requested maximum flat work group size for this function.
 | |
|   unsigned getMaxFlatWorkGroupSize() const {
 | |
|     return FlatWorkGroupSizes.second;
 | |
|   }
 | |
| 
 | |
|   /// \returns A pair of default/requested minimum/maximum number of waves per
 | |
|   /// execution unit.
 | |
|   std::pair<unsigned, unsigned> getWavesPerEU() const {
 | |
|     return WavesPerEU;
 | |
|   }
 | |
| 
 | |
|   /// \returns Default/requested minimum number of waves per execution unit.
 | |
|   unsigned getMinWavesPerEU() const {
 | |
|     return WavesPerEU.first;
 | |
|   }
 | |
| 
 | |
|   /// \returns Default/requested maximum number of waves per execution unit.
 | |
|   unsigned getMaxWavesPerEU() const {
 | |
|     return WavesPerEU.second;
 | |
|   }
 | |
| 
 | |
|   /// \returns SGPR used for \p Dim's work group ID.
 | |
|   unsigned getWorkGroupIDSGPR(unsigned Dim) const {
 | |
|     switch (Dim) {
 | |
|     case 0:
 | |
|       assert(hasWorkGroupIDX());
 | |
|       return ArgInfo.WorkGroupIDX.getRegister();
 | |
|     case 1:
 | |
|       assert(hasWorkGroupIDY());
 | |
|       return ArgInfo.WorkGroupIDY.getRegister();
 | |
|     case 2:
 | |
|       assert(hasWorkGroupIDZ());
 | |
|       return ArgInfo.WorkGroupIDZ.getRegister();
 | |
|     }
 | |
|     llvm_unreachable("unexpected dimension");
 | |
|   }
 | |
| 
 | |
|   /// \returns VGPR used for \p Dim' work item ID.
 | |
|   unsigned getWorkItemIDVGPR(unsigned Dim) const;
 | |
| 
 | |
|   unsigned getLDSWaveSpillSize() const {
 | |
|     return LDSWaveSpillSize;
 | |
|   }
 | |
| 
 | |
|   const AMDGPUBufferPseudoSourceValue *getBufferPSV(const SIInstrInfo &TII,
 | |
|                                                     const Value *BufferRsrc) {
 | |
|     assert(BufferRsrc);
 | |
|     auto PSV = BufferPSVs.try_emplace(
 | |
|       BufferRsrc,
 | |
|       llvm::make_unique<AMDGPUBufferPseudoSourceValue>(TII));
 | |
|     return PSV.first->second.get();
 | |
|   }
 | |
| 
 | |
|   const AMDGPUImagePseudoSourceValue *getImagePSV(const SIInstrInfo &TII,
 | |
|                                                   const Value *ImgRsrc) {
 | |
|     assert(ImgRsrc);
 | |
|     auto PSV = ImagePSVs.try_emplace(
 | |
|       ImgRsrc,
 | |
|       llvm::make_unique<AMDGPUImagePseudoSourceValue>(TII));
 | |
|     return PSV.first->second.get();
 | |
|   }
 | |
| 
 | |
|   unsigned getOccupancy() const {
 | |
|     return Occupancy;
 | |
|   }
 | |
| 
 | |
|   unsigned getMinAllowedOccupancy() const {
 | |
|     if (!isMemoryBound() && !needsWaveLimiter())
 | |
|       return Occupancy;
 | |
|     return (Occupancy < 4) ? Occupancy : 4;
 | |
|   }
 | |
| 
 | |
|   void limitOccupancy(const MachineFunction &MF);
 | |
| 
 | |
|   void limitOccupancy(unsigned Limit) {
 | |
|     if (Occupancy > Limit)
 | |
|       Occupancy = Limit;
 | |
|   }
 | |
| 
 | |
|   void increaseOccupancy(const MachineFunction &MF, unsigned Limit) {
 | |
|     if (Occupancy < Limit)
 | |
|       Occupancy = Limit;
 | |
|     limitOccupancy(MF);
 | |
|   }
 | |
| };
 | |
| 
 | |
| } // end namespace llvm
 | |
| 
 | |
| #endif // LLVM_LIB_TARGET_AMDGPU_SIMACHINEFUNCTIONINFO_H
 |