Introduce a hybrid target to generate code for either the GPU or CPU

Summary:
Introduce a "hybrid" `-polly-target` option to optimise code for either the GPU or CPU.

When this target is selected, PPCGCodeGeneration will attempt first to optimise a Scop. If the Scop isn't modified, it is then sent to the passes that form the CPU pipeline, i.e. IslScheduleOptimizerPass, IslAstInfoWrapperPass and CodeGeneration.

In case the Scop is modified, it is marked to be skipped by the subsequent CPU optimisation passes.

Reviewers: grosser, Meinersbur, bollu

Reviewed By: grosser

Subscribers: kbarton, nemanjai, pollydev

Tags: #polly

Differential Revision: https://reviews.llvm.org/D34054

llvm-svn: 306863
This commit is contained in:
Singapuram Sanjay Srivallabh 2017-06-30 19:42:21 +00:00
parent 9f59da8d27
commit 02ca346e48
7 changed files with 43 additions and 16 deletions

View File

@ -1633,6 +1633,9 @@ private:
/// Number of copy statements.
unsigned CopyStmtsNum;
/// Flag to indicate if the Scop is to be skipped.
bool SkipScop;
typedef std::list<ScopStmt> StmtSet;
/// The statements in this Scop.
StmtSet Stmts;
@ -2366,6 +2369,12 @@ public:
/// Check if the SCoP has been optimized by the scheduler.
bool isOptimized() const { return IsOptimized; }
/// Mark the SCoP to be skipped by ScopPass passes.
void markAsToBeSkipped() { SkipScop = true; }
/// Check if the SCoP is to be skipped by ScopPass passes.
bool isToBeSkipped() const { return SkipScop; }
/// Get the name of the entry and exit blocks of this Scop.
///
/// These along with the function name can uniquely identify a Scop.

View File

@ -3497,8 +3497,8 @@ static Loop *getLoopSurroundingScop(Scop &S, LoopInfo &LI) {
Scop::Scop(Region &R, ScalarEvolution &ScalarEvolution, LoopInfo &LI,
ScopDetection::DetectionContext &DC)
: SE(&ScalarEvolution), R(R), name(R.getNameStr()), IsOptimized(false),
HasSingleExitEdge(R.getExitingBlock()), HasErrorBlock(false),
MaxLoopDepth(0), CopyStmtsNum(0), DC(DC),
SkipScop(false), HasSingleExitEdge(R.getExitingBlock()),
HasErrorBlock(false), MaxLoopDepth(0), CopyStmtsNum(0), DC(DC),
IslCtx(isl_ctx_alloc(), isl_ctx_free), Context(nullptr),
Affinator(this, LI), AssumedContext(nullptr), InvalidContext(nullptr),
Schedule(nullptr) {

View File

@ -278,6 +278,10 @@ public:
/// Generate LLVM-IR for the SCoP @p S.
bool runOnScop(Scop &S) override {
// Skip SCoPs in case they're already code-generated by PPCGCodeGeneration.
if (S.isToBeSkipped())
return false;
AI = &getAnalysis<IslAstInfoWrapperPass>().getAI();
LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();

View File

@ -624,6 +624,11 @@ PreservedAnalyses IslAstPrinterPass::run(Scop &S, ScopAnalysisManager &SAM,
void IslAstInfoWrapperPass::releaseMemory() { Ast.reset(); }
bool IslAstInfoWrapperPass::runOnScop(Scop &Scop) {
// Skip SCoPs in case they're already handled by PPCGCodeGeneration.
if (Scop.isToBeSkipped())
return false;
const Dependences &D =
getAnalysis<DependenceInfo>().getDependences(Dependences::AL_Statement);

View File

@ -2840,8 +2840,10 @@ public:
auto PPCGProg = createPPCGProg(PPCGScop);
auto PPCGGen = generateGPU(PPCGScop, PPCGProg);
if (PPCGGen->tree)
if (PPCGGen->tree) {
generateCode(isl_ast_node_copy(PPCGGen->tree), PPCGProg);
CurrentScop.markAsToBeSkipped();
}
freeOptions(PPCGScop);
freePPCGGen(PPCGGen);

View File

@ -92,13 +92,15 @@ static cl::opt<CodeGenChoice> CodeGeneration(
clEnumValN(CODEGEN_NONE, "none", "No code generation")),
cl::Hidden, cl::init(CODEGEN_FULL), cl::ZeroOrMore, cl::cat(PollyCategory));
enum TargetChoice { TARGET_CPU, TARGET_GPU };
enum TargetChoice { TARGET_CPU, TARGET_GPU, TARGET_HYBRID };
static cl::opt<TargetChoice>
Target("polly-target", cl::desc("The hardware to target"),
cl::values(clEnumValN(TARGET_CPU, "cpu", "generate CPU code")
#ifdef GPU_CODEGEN
,
clEnumValN(TARGET_GPU, "gpu", "generate GPU code")
clEnumValN(TARGET_GPU, "gpu", "generate GPU code"),
clEnumValN(TARGET_HYBRID, "hybrid",
"generate GPU code (preferably) or CPU code")
#endif
),
cl::init(TARGET_CPU), cl::ZeroOrMore, cl::cat(PollyCategory));
@ -314,9 +316,12 @@ void registerPollyPasses(llvm::legacy::PassManagerBase &PM) {
if (EnablePruneUnprofitable)
PM.add(polly::createPruneUnprofitablePass());
if (Target == TARGET_GPU) {
// GPU generation provides its own scheduling optimization strategy.
} else {
#ifdef GPU_CODEGEN
if (Target == TARGET_HYBRID)
PM.add(
polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice));
#endif
if (Target == TARGET_CPU || Target == TARGET_HYBRID)
switch (Optimizer) {
case OPTIMIZER_NONE:
break; /* Do nothing */
@ -325,17 +330,11 @@ void registerPollyPasses(llvm::legacy::PassManagerBase &PM) {
PM.add(polly::createIslScheduleOptimizerPass());
break;
}
}
if (ExportJScop)
PM.add(polly::createJSONExporterPass());
if (Target == TARGET_GPU) {
#ifdef GPU_CODEGEN
PM.add(
polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice));
#endif
} else {
if (Target == TARGET_CPU || Target == TARGET_HYBRID)
switch (CodeGeneration) {
case CODEGEN_AST:
PM.add(polly::createIslAstInfoWrapperPassPass());
@ -346,7 +345,11 @@ void registerPollyPasses(llvm::legacy::PassManagerBase &PM) {
case CODEGEN_NONE:
break;
}
}
#ifdef GPU_CODEGEN
else
PM.add(
polly::createPPCGCodeGenerationPass(GPUArchChoice, GPURuntimeChoice));
#endif
// FIXME: This dummy ModulePass keeps some programs from miscompiling,
// probably some not correctly preserved analyses. It acts as a barrier to

View File

@ -1443,6 +1443,10 @@ char IslScheduleOptimizer::ID = 0;
bool IslScheduleOptimizer::runOnScop(Scop &S) {
// Skip SCoPs in case they're already optimised by PPCGCodeGeneration
if (S.isToBeSkipped())
return false;
// Skip empty SCoPs but still allow code generation as it will delete the
// loops present but not needed.
if (S.getSize() == 0) {