396 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			396 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			C++
		
	
	
	
//===--- AMDGPUPropagateAttributes.cpp --------------------------*- C++ -*-===//
 | 
						|
//
 | 
						|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | 
						|
// See https://llvm.org/LICENSE.txt for license information.
 | 
						|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
//
 | 
						|
/// \file
 | 
						|
/// \brief This pass propagates attributes from kernels to the non-entry
 | 
						|
/// functions. Most of the library functions were not compiled for specific ABI,
 | 
						|
/// yet will be correctly compiled if proper attrbutes are propagated from the
 | 
						|
/// caller.
 | 
						|
///
 | 
						|
/// The pass analyzes call graph and propagates ABI target features through the
 | 
						|
/// call graph.
 | 
						|
///
 | 
						|
/// It can run in two modes: as a function or module pass. A function pass
 | 
						|
/// simply propagates attributes. A module pass clones functions if there are
 | 
						|
/// callers with different ABI. If a function is clonned all call sites will
 | 
						|
/// be updated to use a correct clone.
 | 
						|
///
 | 
						|
/// A function pass is limited in functionality but can run early in the
 | 
						|
/// pipeline. A module pass is more powerful but has to run late, so misses
 | 
						|
/// library folding opportunities.
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
 | 
						|
#include "AMDGPU.h"
 | 
						|
#include "AMDGPUSubtarget.h"
 | 
						|
#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
 | 
						|
#include "Utils/AMDGPUBaseInfo.h"
 | 
						|
#include "llvm/ADT/SmallSet.h"
 | 
						|
#include "llvm/ADT/SmallVector.h"
 | 
						|
#include "llvm/IR/Function.h"
 | 
						|
#include "llvm/IR/Module.h"
 | 
						|
#include "llvm/Target/TargetMachine.h"
 | 
						|
#include "llvm/Transforms/Utils/Cloning.h"
 | 
						|
#include <string>
 | 
						|
 | 
						|
#define DEBUG_TYPE "amdgpu-propagate-attributes"
 | 
						|
 | 
						|
using namespace llvm;
 | 
						|
 | 
						|
namespace llvm {
 | 
						|
extern const SubtargetFeatureKV AMDGPUFeatureKV[AMDGPU::NumSubtargetFeatures-1];
 | 
						|
}
 | 
						|
 | 
						|
namespace {
 | 
						|
 | 
						|
// Target features to propagate.
 | 
						|
static constexpr const FeatureBitset TargetFeatures = {
 | 
						|
  AMDGPU::FeatureWavefrontSize16,
 | 
						|
  AMDGPU::FeatureWavefrontSize32,
 | 
						|
  AMDGPU::FeatureWavefrontSize64
 | 
						|
};
 | 
						|
 | 
						|
// Attributes to propagate.
 | 
						|
static constexpr const char* AttributeNames[] = {
 | 
						|
  "amdgpu-waves-per-eu"
 | 
						|
};
 | 
						|
 | 
						|
static constexpr unsigned NumAttr =
 | 
						|
  sizeof(AttributeNames) / sizeof(AttributeNames[0]);
 | 
						|
 | 
						|
class AMDGPUPropagateAttributes {
 | 
						|
 | 
						|
  class FnProperties {
 | 
						|
  private:
 | 
						|
    explicit FnProperties(const FeatureBitset &&FB) : Features(FB) {}
 | 
						|
 | 
						|
  public:
 | 
						|
    explicit FnProperties(const TargetMachine &TM, const Function &F) {
 | 
						|
      Features = TM.getSubtargetImpl(F)->getFeatureBits();
 | 
						|
 | 
						|
      for (unsigned I = 0; I < NumAttr; ++I)
 | 
						|
        if (F.hasFnAttribute(AttributeNames[I]))
 | 
						|
          Attributes[I] = F.getFnAttribute(AttributeNames[I]);
 | 
						|
    }
 | 
						|
 | 
						|
    bool operator == (const FnProperties &Other) const {
 | 
						|
      if ((Features & TargetFeatures) != (Other.Features & TargetFeatures))
 | 
						|
        return false;
 | 
						|
      for (unsigned I = 0; I < NumAttr; ++I)
 | 
						|
        if (Attributes[I] != Other.Attributes[I])
 | 
						|
          return false;
 | 
						|
      return true;
 | 
						|
    }
 | 
						|
 | 
						|
    FnProperties adjustToCaller(const FnProperties &CallerProps) const {
 | 
						|
      FnProperties New((Features & ~TargetFeatures) | CallerProps.Features);
 | 
						|
      for (unsigned I = 0; I < NumAttr; ++I)
 | 
						|
        New.Attributes[I] = CallerProps.Attributes[I];
 | 
						|
      return New;
 | 
						|
    }
 | 
						|
 | 
						|
    FeatureBitset Features;
 | 
						|
    Optional<Attribute> Attributes[NumAttr];
 | 
						|
  };
 | 
						|
 | 
						|
  class Clone {
 | 
						|
  public:
 | 
						|
    Clone(const FnProperties &Props, Function *OrigF, Function *NewF) :
 | 
						|
      Properties(Props), OrigF(OrigF), NewF(NewF) {}
 | 
						|
 | 
						|
    FnProperties Properties;
 | 
						|
    Function *OrigF;
 | 
						|
    Function *NewF;
 | 
						|
  };
 | 
						|
 | 
						|
  const TargetMachine *TM;
 | 
						|
 | 
						|
  // Clone functions as needed or just set attributes.
 | 
						|
  bool AllowClone;
 | 
						|
 | 
						|
  // Option propagation roots.
 | 
						|
  SmallSet<Function *, 32> Roots;
 | 
						|
 | 
						|
  // Clones of functions with their attributes.
 | 
						|
  SmallVector<Clone, 32> Clones;
 | 
						|
 | 
						|
  // Find a clone with required features.
 | 
						|
  Function *findFunction(const FnProperties &PropsNeeded,
 | 
						|
                         Function *OrigF);
 | 
						|
 | 
						|
  // Clone function \p F and set \p NewProps on the clone.
 | 
						|
  // Cole takes the name of original function.
 | 
						|
  Function *cloneWithProperties(Function &F, const FnProperties &NewProps);
 | 
						|
 | 
						|
  // Set new function's features in place.
 | 
						|
  void setFeatures(Function &F, const FeatureBitset &NewFeatures);
 | 
						|
 | 
						|
  // Set new function's attributes in place.
 | 
						|
  void setAttributes(Function &F, const ArrayRef<Optional<Attribute>> NewAttrs);
 | 
						|
 | 
						|
  std::string getFeatureString(const FeatureBitset &Features) const;
 | 
						|
 | 
						|
  // Propagate attributes from Roots.
 | 
						|
  bool process();
 | 
						|
 | 
						|
public:
 | 
						|
  AMDGPUPropagateAttributes(const TargetMachine *TM, bool AllowClone) :
 | 
						|
    TM(TM), AllowClone(AllowClone) {}
 | 
						|
 | 
						|
  // Use F as a root and propagate its attributes.
 | 
						|
  bool process(Function &F);
 | 
						|
 | 
						|
  // Propagate attributes starting from kernel functions.
 | 
						|
  bool process(Module &M);
 | 
						|
};
 | 
						|
 | 
						|
// Allows to propagate attributes early, but no clonning is allowed as it must
 | 
						|
// be a function pass to run before any optimizations.
 | 
						|
// TODO: We shall only need a one instance of module pass, but that needs to be
 | 
						|
// in the linker pipeline which is currently not possible.
 | 
						|
class AMDGPUPropagateAttributesEarly : public FunctionPass {
 | 
						|
  const TargetMachine *TM;
 | 
						|
 | 
						|
public:
 | 
						|
  static char ID; // Pass identification
 | 
						|
 | 
						|
  AMDGPUPropagateAttributesEarly(const TargetMachine *TM = nullptr) :
 | 
						|
    FunctionPass(ID), TM(TM) {
 | 
						|
    initializeAMDGPUPropagateAttributesEarlyPass(
 | 
						|
      *PassRegistry::getPassRegistry());
 | 
						|
  }
 | 
						|
 | 
						|
  bool runOnFunction(Function &F) override;
 | 
						|
};
 | 
						|
 | 
						|
// Allows to propagate attributes with clonning but does that late in the
 | 
						|
// pipeline.
 | 
						|
class AMDGPUPropagateAttributesLate : public ModulePass {
 | 
						|
  const TargetMachine *TM;
 | 
						|
 | 
						|
public:
 | 
						|
  static char ID; // Pass identification
 | 
						|
 | 
						|
  AMDGPUPropagateAttributesLate(const TargetMachine *TM = nullptr) :
 | 
						|
    ModulePass(ID), TM(TM) {
 | 
						|
    initializeAMDGPUPropagateAttributesLatePass(
 | 
						|
      *PassRegistry::getPassRegistry());
 | 
						|
  }
 | 
						|
 | 
						|
  bool runOnModule(Module &M) override;
 | 
						|
};
 | 
						|
 | 
						|
}  // end anonymous namespace.
 | 
						|
 | 
						|
char AMDGPUPropagateAttributesEarly::ID = 0;
 | 
						|
char AMDGPUPropagateAttributesLate::ID = 0;
 | 
						|
 | 
						|
INITIALIZE_PASS(AMDGPUPropagateAttributesEarly,
 | 
						|
                "amdgpu-propagate-attributes-early",
 | 
						|
                "Early propagate attributes from kernels to functions",
 | 
						|
                false, false)
 | 
						|
INITIALIZE_PASS(AMDGPUPropagateAttributesLate,
 | 
						|
                "amdgpu-propagate-attributes-late",
 | 
						|
                "Late propagate attributes from kernels to functions",
 | 
						|
                false, false)
 | 
						|
 | 
						|
Function *
 | 
						|
AMDGPUPropagateAttributes::findFunction(const FnProperties &PropsNeeded,
 | 
						|
                                        Function *OrigF) {
 | 
						|
  // TODO: search for clone's clones.
 | 
						|
  for (Clone &C : Clones)
 | 
						|
    if (C.OrigF == OrigF && PropsNeeded == C.Properties)
 | 
						|
      return C.NewF;
 | 
						|
 | 
						|
  return nullptr;
 | 
						|
}
 | 
						|
 | 
						|
bool AMDGPUPropagateAttributes::process(Module &M) {
 | 
						|
  for (auto &F : M.functions())
 | 
						|
    if (AMDGPU::isEntryFunctionCC(F.getCallingConv()))
 | 
						|
      Roots.insert(&F);
 | 
						|
 | 
						|
  return process();
 | 
						|
}
 | 
						|
 | 
						|
bool AMDGPUPropagateAttributes::process(Function &F) {
 | 
						|
  Roots.insert(&F);
 | 
						|
  return process();
 | 
						|
}
 | 
						|
 | 
						|
bool AMDGPUPropagateAttributes::process() {
 | 
						|
  bool Changed = false;
 | 
						|
  SmallSet<Function *, 32> NewRoots;
 | 
						|
  SmallSet<Function *, 32> Replaced;
 | 
						|
 | 
						|
  if (Roots.empty())
 | 
						|
    return false;
 | 
						|
  Module &M = *(*Roots.begin())->getParent();
 | 
						|
 | 
						|
  do {
 | 
						|
    Roots.insert(NewRoots.begin(), NewRoots.end());
 | 
						|
    NewRoots.clear();
 | 
						|
 | 
						|
    for (auto &F : M.functions()) {
 | 
						|
      if (F.isDeclaration())
 | 
						|
        continue;
 | 
						|
 | 
						|
      const FnProperties CalleeProps(*TM, F);
 | 
						|
      SmallVector<std::pair<CallBase *, Function *>, 32> ToReplace;
 | 
						|
      SmallSet<CallBase *, 32> Visited;
 | 
						|
 | 
						|
      for (User *U : F.users()) {
 | 
						|
        Instruction *I = dyn_cast<Instruction>(U);
 | 
						|
        if (!I)
 | 
						|
          continue;
 | 
						|
        CallBase *CI = dyn_cast<CallBase>(I);
 | 
						|
        if (!CI)
 | 
						|
          continue;
 | 
						|
        Function *Caller = CI->getCaller();
 | 
						|
        if (!Caller || !Visited.insert(CI).second)
 | 
						|
          continue;
 | 
						|
        if (!Roots.count(Caller) && !NewRoots.count(Caller))
 | 
						|
          continue;
 | 
						|
 | 
						|
        const FnProperties CallerProps(*TM, *Caller);
 | 
						|
 | 
						|
        if (CalleeProps == CallerProps) {
 | 
						|
          if (!Roots.count(&F))
 | 
						|
            NewRoots.insert(&F);
 | 
						|
          continue;
 | 
						|
        }
 | 
						|
 | 
						|
        Function *NewF = findFunction(CallerProps, &F);
 | 
						|
        if (!NewF) {
 | 
						|
          const FnProperties NewProps = CalleeProps.adjustToCaller(CallerProps);
 | 
						|
          if (!AllowClone) {
 | 
						|
            // This may set different features on different iteartions if
 | 
						|
            // there is a contradiction in callers' attributes. In this case
 | 
						|
            // we rely on a second pass running on Module, which is allowed
 | 
						|
            // to clone.
 | 
						|
            setFeatures(F, NewProps.Features);
 | 
						|
            setAttributes(F, NewProps.Attributes);
 | 
						|
            NewRoots.insert(&F);
 | 
						|
            Changed = true;
 | 
						|
            break;
 | 
						|
          }
 | 
						|
 | 
						|
          NewF = cloneWithProperties(F, NewProps);
 | 
						|
          Clones.push_back(Clone(CallerProps, &F, NewF));
 | 
						|
          NewRoots.insert(NewF);
 | 
						|
        }
 | 
						|
 | 
						|
        ToReplace.push_back(std::make_pair(CI, NewF));
 | 
						|
        Replaced.insert(&F);
 | 
						|
 | 
						|
        Changed = true;
 | 
						|
      }
 | 
						|
 | 
						|
      while (!ToReplace.empty()) {
 | 
						|
        auto R = ToReplace.pop_back_val();
 | 
						|
        R.first->setCalledFunction(R.second);
 | 
						|
      }
 | 
						|
    }
 | 
						|
  } while (!NewRoots.empty());
 | 
						|
 | 
						|
  for (Function *F : Replaced) {
 | 
						|
    if (F->use_empty())
 | 
						|
      F->eraseFromParent();
 | 
						|
  }
 | 
						|
 | 
						|
  Roots.clear();
 | 
						|
  Clones.clear();
 | 
						|
 | 
						|
  return Changed;
 | 
						|
}
 | 
						|
 | 
						|
Function *
 | 
						|
AMDGPUPropagateAttributes::cloneWithProperties(Function &F,
 | 
						|
                                               const FnProperties &NewProps) {
 | 
						|
  LLVM_DEBUG(dbgs() << "Cloning " << F.getName() << '\n');
 | 
						|
 | 
						|
  ValueToValueMapTy dummy;
 | 
						|
  Function *NewF = CloneFunction(&F, dummy);
 | 
						|
  setFeatures(*NewF, NewProps.Features);
 | 
						|
  setAttributes(*NewF, NewProps.Attributes);
 | 
						|
  NewF->setVisibility(GlobalValue::DefaultVisibility);
 | 
						|
  NewF->setLinkage(GlobalValue::InternalLinkage);
 | 
						|
 | 
						|
  // Swap names. If that is the only clone it will retain the name of now
 | 
						|
  // dead value. Preserve original name for externally visible functions.
 | 
						|
  if (F.hasName() && F.hasLocalLinkage()) {
 | 
						|
    std::string NewName = std::string(NewF->getName());
 | 
						|
    NewF->takeName(&F);
 | 
						|
    F.setName(NewName);
 | 
						|
  }
 | 
						|
 | 
						|
  return NewF;
 | 
						|
}
 | 
						|
 | 
						|
void AMDGPUPropagateAttributes::setFeatures(Function &F,
 | 
						|
                                            const FeatureBitset &NewFeatures) {
 | 
						|
  std::string NewFeatureStr = getFeatureString(NewFeatures);
 | 
						|
 | 
						|
  LLVM_DEBUG(dbgs() << "Set features "
 | 
						|
                    << getFeatureString(NewFeatures & TargetFeatures)
 | 
						|
                    << " on " << F.getName() << '\n');
 | 
						|
 | 
						|
  F.removeFnAttr("target-features");
 | 
						|
  F.addFnAttr("target-features", NewFeatureStr);
 | 
						|
}
 | 
						|
 | 
						|
void AMDGPUPropagateAttributes::setAttributes(Function &F,
 | 
						|
    const ArrayRef<Optional<Attribute>> NewAttrs) {
 | 
						|
  LLVM_DEBUG(dbgs() << "Set attributes on " << F.getName() << ":\n");
 | 
						|
  for (unsigned I = 0; I < NumAttr; ++I) {
 | 
						|
    F.removeFnAttr(AttributeNames[I]);
 | 
						|
    if (NewAttrs[I]) {
 | 
						|
      LLVM_DEBUG(dbgs() << '\t' << NewAttrs[I]->getAsString() << '\n');
 | 
						|
      F.addFnAttr(*NewAttrs[I]);
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
std::string
 | 
						|
AMDGPUPropagateAttributes::getFeatureString(const FeatureBitset &Features) const
 | 
						|
{
 | 
						|
  std::string Ret;
 | 
						|
  for (const SubtargetFeatureKV &KV : AMDGPUFeatureKV) {
 | 
						|
    if (Features[KV.Value])
 | 
						|
      Ret += (StringRef("+") + KV.Key + ",").str();
 | 
						|
    else if (TargetFeatures[KV.Value])
 | 
						|
      Ret += (StringRef("-") + KV.Key + ",").str();
 | 
						|
  }
 | 
						|
  Ret.pop_back(); // Remove last comma.
 | 
						|
  return Ret;
 | 
						|
}
 | 
						|
 | 
						|
bool AMDGPUPropagateAttributesEarly::runOnFunction(Function &F) {
 | 
						|
  if (!TM || !AMDGPU::isEntryFunctionCC(F.getCallingConv()))
 | 
						|
    return false;
 | 
						|
 | 
						|
  return AMDGPUPropagateAttributes(TM, false).process(F);
 | 
						|
}
 | 
						|
 | 
						|
bool AMDGPUPropagateAttributesLate::runOnModule(Module &M) {
 | 
						|
  if (!TM)
 | 
						|
    return false;
 | 
						|
 | 
						|
  return AMDGPUPropagateAttributes(TM, true).process(M);
 | 
						|
}
 | 
						|
 | 
						|
FunctionPass
 | 
						|
*llvm::createAMDGPUPropagateAttributesEarlyPass(const TargetMachine *TM) {
 | 
						|
  return new AMDGPUPropagateAttributesEarly(TM);
 | 
						|
}
 | 
						|
 | 
						|
ModulePass
 | 
						|
*llvm::createAMDGPUPropagateAttributesLatePass(const TargetMachine *TM) {
 | 
						|
  return new AMDGPUPropagateAttributesLate(TM);
 | 
						|
}
 |