forked from OSchip/llvm-project
Introduce a hybrid target to generate code for either the GPU or CPU
Summary: Introduce a "hybrid" `-polly-target` option to optimise code for either the GPU or CPU. When this target is selected, PPCGCodeGeneration will attempt first to optimise a Scop. If the Scop isn't modified, it is then sent to the passes that form the CPU pipeline, i.e. IslScheduleOptimizerPass, IslAstInfoWrapperPass and CodeGeneration. In case the Scop is modified, it is marked to be skipped by the subsequent CPU optimisation passes. Reviewers: grosser, Meinersbur, bollu Reviewed By: grosser Subscribers: kbarton, nemanjai, pollydev Tags: #polly Differential Revision: https://reviews.llvm.org/D34054 llvm-svn: 306863
This commit is contained in:
parent
9f59da8d27
commit
02ca346e48
|
|
@ -1633,6 +1633,9 @@ private:
|
|||
/// Number of copy statements.
|
||||
unsigned CopyStmtsNum;
|
||||
|
||||
/// Flag to indicate if the Scop is to be skipped.
|
||||
bool SkipScop;
|
||||
|
||||
typedef std::list<ScopStmt> StmtSet;
|
||||
/// The statements in this Scop.
|
||||
StmtSet Stmts;
|
||||
|
|
@ -2366,6 +2369,12 @@ public:
|
|||
/// Check if the SCoP has been optimized by the scheduler.
|
||||
bool isOptimized() const { return IsOptimized; }
|
||||
|
||||
/// Mark the SCoP to be skipped by ScopPass passes.
|
||||
void markAsToBeSkipped() { SkipScop = true; }
|
||||
|
||||
/// Check if the SCoP is to be skipped by ScopPass passes.
|
||||
bool isToBeSkipped() const { return SkipScop; }
|
||||
|
||||
/// Get the name of the entry and exit blocks of this Scop.
|
||||
///
|
||||
/// These along with the function name can uniquely identify a Scop.
|
||||
|
|
|
|||
|
|
@ -3497,8 +3497,8 @@ static Loop *getLoopSurroundingScop(Scop &S, LoopInfo &LI) {
|
|||
Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI,
|
||||
ScopDetection::DetectionContext &DC)
|
||||
: SE(&ScalarEvolution), R(R), name(R.getNameStr()), IsOptimized(false),
|
||||
HasSingleExitEdge(R.getExitingBlock()), HasErrorBlock(false),
|
||||
MaxLoopDepth(0), CopyStmtsNum(0), DC(DC),
|
||||
SkipScop(false), HasSingleExitEdge(R.getExitingBlock()),
|
||||
HasErrorBlock(false), MaxLoopDepth(0), CopyStmtsNum(0), DC(DC),
|
||||
IslCtx(isl_ctx_alloc(), isl_ctx_free), Context(nullptr),
|
||||
Affinator(this, LI), AssumedContext(nullptr), InvalidContext(nullptr),
|
||||
Schedule(nullptr) {
|
||||
|
|
|
|||
|
|
@ -278,6 +278,10 @@ public:
|
|||
|
||||
/// Generate LLVM-IR for the SCoP @p S.
|
||||
bool runOnScop(Scop &S) override {
|
||||
// Skip SCoPs in case they're already code-generated by PPCGCodeGeneration.
|
||||
if (S.isToBeSkipped())
|
||||
return false;
|
||||
|
||||
AI = &getAnalysis<IslAstInfoWrapperPass>().getAI();
|
||||
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
|
||||
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
|
||||
|
|
|
|||
|
|
@ -624,6 +624,11 @@ PreservedAnalyses IslAstPrinterPass::run(Scop &S, ScopAnalysisManager &SAM,
|
|||
void IslAstInfoWrapperPass::releaseMemory() { Ast.reset(); }
|
||||
|
||||
bool IslAstInfoWrapperPass::runOnScop(Scop &Scop) {
|
||||
|
||||
// Skip SCoPs in case they're already handled by PPCGCodeGeneration.
|
||||
if (Scop.isToBeSkipped())
|
||||
return false;
|
||||
|
||||
const Dependences &D =
|
||||
getAnalysis<DependenceInfo>().getDependences(Dependences::AL_Statement);
|
||||
|
||||
|
|
|
|||
|
|
@ -2840,8 +2840,10 @@ public:
|
|||
auto PPCGProg = createPPCGProg(PPCGScop);
|
||||
auto PPCGGen = generateGPU(PPCGScop, PPCGProg);
|
||||
|
||||
if (PPCGGen->tree)
|
||||
if (PPCGGen->tree) {
|
||||
generateCode(isl_ast_node_copy(PPCGGen->tree), PPCGProg);
|
||||
CurrentScop.markAsToBeSkipped();
|
||||
}
|
||||
|
||||
freeOptions(PPCGScop);
|
||||
freePPCGGen(PPCGGen);
|
||||
|
|
|
|||
|
|
@ -92,13 +92,15 @@ static cl::opt<CodeGenChoice> CodeGeneration(
|
|||
clEnumValN(CODEGEN_NONE, "none", "No code generation")),
|
||||
cl::Hidden, cl::init(CODEGEN_FULL), cl::ZeroOrMore, cl::cat(PollyCategory));
|
||||
|
||||
enum TargetChoice { TARGET_CPU, TARGET_GPU };
|
||||
enum TargetChoice { TARGET_CPU, TARGET_GPU, TARGET_HYBRID };
|
||||
static cl::opt<TargetChoice>
|
||||
Target("polly-target", cl::desc("The hardware to target"),
|
||||
cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code")
|
||||
#ifdef GPU_CODEGEN
|
||||
,
|
||||
clEnumValN(TARGET_GPU, "gpu", "generate GPU code")
|
||||
clEnumValN(TARGET_GPU, "gpu", "generate GPU code"),
|
||||
clEnumValN(TARGET_HYBRID, "hybrid",
|
||||
"generate GPU code (preferably) or CPU code")
|
||||
#endif
|
||||
),
|
||||
cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory));
|
||||
|
|
@ -314,9 +316,12 @@ void registerPollyPasses(llvm::legacy::PassManagerBase &PM) {
|
|||
if (EnablePruneUnprofitable)
|
||||
PM.add(polly::createPruneUnprofitablePass());
|
||||
|
||||
if (Target == TARGET_GPU) {
|
||||
// GPU generation provides its own scheduling optimization strategy.
|
||||
} else {
|
||||
#ifdef GPU_CODEGEN
|
||||
if (Target == TARGET_HYBRID)
|
||||
PM.add(
|
||||
polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice));
|
||||
#endif
|
||||
if (Target == TARGET_CPU || Target == TARGET_HYBRID)
|
||||
switch (Optimizer) {
|
||||
case OPTIMIZER_NONE:
|
||||
break; /* Do nothing */
|
||||
|
|
@ -325,17 +330,11 @@ void registerPollyPasses(llvm::legacy::PassManagerBase &PM) {
|
|||
PM.add(polly::createIslScheduleOptimizerPass());
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (ExportJScop)
|
||||
PM.add(polly::createJSONExporterPass());
|
||||
|
||||
if (Target == TARGET_GPU) {
|
||||
#ifdef GPU_CODEGEN
|
||||
PM.add(
|
||||
polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice));
|
||||
#endif
|
||||
} else {
|
||||
if (Target == TARGET_CPU || Target == TARGET_HYBRID)
|
||||
switch (CodeGeneration) {
|
||||
case CODEGEN_AST:
|
||||
PM.add(polly::createIslAstInfoWrapperPassPass());
|
||||
|
|
@ -346,7 +345,11 @@ void registerPollyPasses(llvm::legacy::PassManagerBase &PM) {
|
|||
case CODEGEN_NONE:
|
||||
break;
|
||||
}
|
||||
}
|
||||
#ifdef GPU_CODEGEN
|
||||
else
|
||||
PM.add(
|
||||
polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice));
|
||||
#endif
|
||||
|
||||
// FIXME: This dummy ModulePass keeps some programs from miscompiling,
|
||||
// probably some not correctly preserved analyses. It acts as a barrier to
|
||||
|
|
|
|||
|
|
@ -1443,6 +1443,10 @@ char IslScheduleOptimizer::ID = 0;
|
|||
|
||||
bool IslScheduleOptimizer::runOnScop(Scop &S) {
|
||||
|
||||
// Skip SCoPs in case they're already optimised by PPCGCodeGeneration
|
||||
if (S.isToBeSkipped())
|
||||
return false;
|
||||
|
||||
// Skip empty SCoPs but still allow code generation as it will delete the
|
||||
// loops present but not needed.
|
||||
if (S.getSize() == 0) {
|
||||
|
|
|
|||
Loading…
Reference in New Issue