[SimplifyCFG] don't sink common insts too soon (PR34603)
This should solve: https://bugs.llvm.org/show_bug.cgi?id=34603 ...by preventing SimplifyCFG from altering redundant instructions before early-cse has a chance to run. It changes the default (canonical-forming) behavior of SimplifyCFG, so we're only doing the sinking transform later in the optimization pipeline. Differential Revision: https://reviews.llvm.org/D38566 llvm-svn: 320749
This commit is contained in:
parent
34ccadcea9
commit
0ab0c1a201
|
|
@ -267,7 +267,7 @@ FunctionPass *createJumpThreadingPass(int Threshold = -1);
|
||||||
//
|
//
|
||||||
FunctionPass *createCFGSimplificationPass(
|
FunctionPass *createCFGSimplificationPass(
|
||||||
unsigned Threshold = 1, bool ForwardSwitchCond = false,
|
unsigned Threshold = 1, bool ForwardSwitchCond = false,
|
||||||
bool ConvertSwitch = false, bool KeepLoops = true,
|
bool ConvertSwitch = false, bool KeepLoops = true, bool SinkCommon = false,
|
||||||
std::function<bool(const Function &)> Ftor = nullptr);
|
std::function<bool(const Function &)> Ftor = nullptr);
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,8 @@ public:
|
||||||
: SimplifyCFGPass(SimplifyCFGOptions()
|
: SimplifyCFGPass(SimplifyCFGOptions()
|
||||||
.forwardSwitchCondToPhi(false)
|
.forwardSwitchCondToPhi(false)
|
||||||
.convertSwitchToLookupTable(false)
|
.convertSwitchToLookupTable(false)
|
||||||
.needCanonicalLoops(true)) {}
|
.needCanonicalLoops(true)
|
||||||
|
.sinkCommonInsts(false)) {}
|
||||||
|
|
||||||
|
|
||||||
/// Construct a pass with optional optimizations.
|
/// Construct a pass with optional optimizations.
|
||||||
|
|
|
||||||
|
|
@ -63,16 +63,20 @@ struct SimplifyCFGOptions {
|
||||||
bool ForwardSwitchCondToPhi;
|
bool ForwardSwitchCondToPhi;
|
||||||
bool ConvertSwitchToLookupTable;
|
bool ConvertSwitchToLookupTable;
|
||||||
bool NeedCanonicalLoop;
|
bool NeedCanonicalLoop;
|
||||||
|
bool SinkCommonInsts;
|
||||||
AssumptionCache *AC;
|
AssumptionCache *AC;
|
||||||
|
|
||||||
SimplifyCFGOptions(unsigned BonusThreshold = 1,
|
SimplifyCFGOptions(unsigned BonusThreshold = 1,
|
||||||
bool ForwardSwitchCond = false,
|
bool ForwardSwitchCond = false,
|
||||||
bool SwitchToLookup = false, bool CanonicalLoops = true,
|
bool SwitchToLookup = false, bool CanonicalLoops = true,
|
||||||
|
bool SinkCommon = false,
|
||||||
AssumptionCache *AssumpCache = nullptr)
|
AssumptionCache *AssumpCache = nullptr)
|
||||||
: BonusInstThreshold(BonusThreshold),
|
: BonusInstThreshold(BonusThreshold),
|
||||||
ForwardSwitchCondToPhi(ForwardSwitchCond),
|
ForwardSwitchCondToPhi(ForwardSwitchCond),
|
||||||
ConvertSwitchToLookupTable(SwitchToLookup),
|
ConvertSwitchToLookupTable(SwitchToLookup),
|
||||||
NeedCanonicalLoop(CanonicalLoops), AC(AssumpCache) {}
|
NeedCanonicalLoop(CanonicalLoops),
|
||||||
|
SinkCommonInsts(SinkCommon),
|
||||||
|
AC(AssumpCache) {}
|
||||||
|
|
||||||
// Support 'builder' pattern to set members by name at construction time.
|
// Support 'builder' pattern to set members by name at construction time.
|
||||||
SimplifyCFGOptions &bonusInstThreshold(int I) {
|
SimplifyCFGOptions &bonusInstThreshold(int I) {
|
||||||
|
|
@ -91,6 +95,10 @@ struct SimplifyCFGOptions {
|
||||||
NeedCanonicalLoop = B;
|
NeedCanonicalLoop = B;
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
SimplifyCFGOptions &sinkCommonInsts(bool B) {
|
||||||
|
SinkCommonInsts = B;
|
||||||
|
return *this;
|
||||||
|
}
|
||||||
SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) {
|
SimplifyCFGOptions &setAssumptionCache(AssumptionCache *Cache) {
|
||||||
AC = Cache;
|
AC = Cache;
|
||||||
return *this;
|
return *this;
|
||||||
|
|
|
||||||
|
|
@ -747,21 +747,24 @@ PassBuilder::buildModuleOptimizationPipeline(OptimizationLevel Level,
|
||||||
// Cleanup after the loop optimization passes.
|
// Cleanup after the loop optimization passes.
|
||||||
OptimizePM.addPass(InstCombinePass());
|
OptimizePM.addPass(InstCombinePass());
|
||||||
|
|
||||||
|
|
||||||
// Now that we've formed fast to execute loop structures, we do further
|
// Now that we've formed fast to execute loop structures, we do further
|
||||||
// optimizations. These are run afterward as they might block doing complex
|
// optimizations. These are run afterward as they might block doing complex
|
||||||
// analyses and transforms such as what are needed for loop vectorization.
|
// analyses and transforms such as what are needed for loop vectorization.
|
||||||
|
|
||||||
// Optimize parallel scalar instruction chains into SIMD instructions.
|
// Cleanup after loop vectorization, etc. Simplification passes like CVP and
|
||||||
OptimizePM.addPass(SLPVectorizerPass());
|
|
||||||
|
|
||||||
// Cleanup after all of the vectorizers. Simplification passes like CVP and
|
|
||||||
// GVN, loop transforms, and others have already run, so it's now better to
|
// GVN, loop transforms, and others have already run, so it's now better to
|
||||||
// convert to more optimized IR using more aggressive simplify CFG options.
|
// convert to more optimized IR using more aggressive simplify CFG options.
|
||||||
|
// The extra sinking transform can create larger basic blocks, so do this
|
||||||
|
// before SLP vectorization.
|
||||||
OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions().
|
OptimizePM.addPass(SimplifyCFGPass(SimplifyCFGOptions().
|
||||||
forwardSwitchCondToPhi(true).
|
forwardSwitchCondToPhi(true).
|
||||||
convertSwitchToLookupTable(true).
|
convertSwitchToLookupTable(true).
|
||||||
needCanonicalLoops(false)));
|
needCanonicalLoops(false).
|
||||||
|
sinkCommonInsts(true)));
|
||||||
|
|
||||||
|
// Optimize parallel scalar instruction chains into SIMD instructions.
|
||||||
|
OptimizePM.addPass(SLPVectorizerPass());
|
||||||
|
|
||||||
OptimizePM.addPass(InstCombinePass());
|
OptimizePM.addPass(InstCombinePass());
|
||||||
|
|
||||||
// Unroll small loops to hide loop backedge latency and saturate any parallel
|
// Unroll small loops to hide loop backedge latency and saturate any parallel
|
||||||
|
|
|
||||||
|
|
@ -365,7 +365,7 @@ void AArch64PassConfig::addIRPasses() {
|
||||||
// determine whether it succeeded. We can exploit existing control-flow in
|
// determine whether it succeeded. We can exploit existing control-flow in
|
||||||
// ldrex/strex loops to simplify this, but it needs tidying up.
|
// ldrex/strex loops to simplify this, but it needs tidying up.
|
||||||
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
|
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
|
||||||
addPass(createCFGSimplificationPass(1, true, true, false));
|
addPass(createCFGSimplificationPass(1, true, true, false, true));
|
||||||
|
|
||||||
// Run LoopDataPrefetch
|
// Run LoopDataPrefetch
|
||||||
//
|
//
|
||||||
|
|
|
||||||
|
|
@ -385,7 +385,7 @@ void ARMPassConfig::addIRPasses() {
|
||||||
// ldrex/strex loops to simplify this, but it needs tidying up.
|
// ldrex/strex loops to simplify this, but it needs tidying up.
|
||||||
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
|
if (TM->getOptLevel() != CodeGenOpt::None && EnableAtomicTidy)
|
||||||
addPass(createCFGSimplificationPass(
|
addPass(createCFGSimplificationPass(
|
||||||
1, false, false, true, [this](const Function &F) {
|
1, false, false, true, true, [this](const Function &F) {
|
||||||
const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
|
const auto &ST = this->TM->getSubtarget<ARMSubtarget>(F);
|
||||||
return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
|
return ST.hasAnyDataBarrier() && !ST.isThumb1Only();
|
||||||
}));
|
}));
|
||||||
|
|
|
||||||
|
|
@ -630,6 +630,13 @@ void PassManagerBuilder::populateModulePassManager(
|
||||||
addInstructionCombiningPass(MPM);
|
addInstructionCombiningPass(MPM);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cleanup after loop vectorization, etc. Simplification passes like CVP and
|
||||||
|
// GVN, loop transforms, and others have already run, so it's now better to
|
||||||
|
// convert to more optimized IR using more aggressive simplify CFG options.
|
||||||
|
// The extra sinking transform can create larger basic blocks, so do this
|
||||||
|
// before SLP vectorization.
|
||||||
|
MPM.add(createCFGSimplificationPass(1, true, true, false, true));
|
||||||
|
|
||||||
if (RunSLPAfterLoopVectorization && SLPVectorize) {
|
if (RunSLPAfterLoopVectorization && SLPVectorize) {
|
||||||
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
|
MPM.add(createSLPVectorizerPass()); // Vectorize parallel scalar chains.
|
||||||
if (OptLevel > 1 && ExtraVectorizerPasses) {
|
if (OptLevel > 1 && ExtraVectorizerPasses) {
|
||||||
|
|
@ -638,9 +645,6 @@ void PassManagerBuilder::populateModulePassManager(
|
||||||
}
|
}
|
||||||
|
|
||||||
addExtensionsToPM(EP_Peephole, MPM);
|
addExtensionsToPM(EP_Peephole, MPM);
|
||||||
// Switches to lookup tables and other transforms that may not be considered
|
|
||||||
// canonical by other IR passes.
|
|
||||||
MPM.add(createCFGSimplificationPass(1, true, true, false));
|
|
||||||
addInstructionCombiningPass(MPM);
|
addInstructionCombiningPass(MPM);
|
||||||
|
|
||||||
if (!DisableUnrollLoops) {
|
if (!DisableUnrollLoops) {
|
||||||
|
|
|
||||||
|
|
@ -61,6 +61,11 @@ static cl::opt<bool> UserForwardSwitchCond(
|
||||||
"forward-switch-cond", cl::Hidden, cl::init(false),
|
"forward-switch-cond", cl::Hidden, cl::init(false),
|
||||||
cl::desc("Forward switch condition to phi ops (default = false)"));
|
cl::desc("Forward switch condition to phi ops (default = false)"));
|
||||||
|
|
||||||
|
static cl::opt<bool> UserSinkCommonInsts(
|
||||||
|
"sink-common-insts", cl::Hidden, cl::init(false),
|
||||||
|
cl::desc("Sink common instructions (default = false)"));
|
||||||
|
|
||||||
|
|
||||||
STATISTIC(NumSimpl, "Number of blocks simplified");
|
STATISTIC(NumSimpl, "Number of blocks simplified");
|
||||||
|
|
||||||
/// If we have more than one empty (other than phi node) return blocks,
|
/// If we have more than one empty (other than phi node) return blocks,
|
||||||
|
|
@ -205,6 +210,9 @@ SimplifyCFGPass::SimplifyCFGPass(const SimplifyCFGOptions &Opts) {
|
||||||
Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences()
|
Options.NeedCanonicalLoop = UserKeepLoops.getNumOccurrences()
|
||||||
? UserKeepLoops
|
? UserKeepLoops
|
||||||
: Opts.NeedCanonicalLoop;
|
: Opts.NeedCanonicalLoop;
|
||||||
|
Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
|
||||||
|
? UserSinkCommonInsts
|
||||||
|
: Opts.SinkCommonInsts;
|
||||||
}
|
}
|
||||||
|
|
||||||
PreservedAnalyses SimplifyCFGPass::run(Function &F,
|
PreservedAnalyses SimplifyCFGPass::run(Function &F,
|
||||||
|
|
@ -226,6 +234,7 @@ struct CFGSimplifyPass : public FunctionPass {
|
||||||
|
|
||||||
CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false,
|
CFGSimplifyPass(unsigned Threshold = 1, bool ForwardSwitchCond = false,
|
||||||
bool ConvertSwitch = false, bool KeepLoops = true,
|
bool ConvertSwitch = false, bool KeepLoops = true,
|
||||||
|
bool SinkCommon = false,
|
||||||
std::function<bool(const Function &)> Ftor = nullptr)
|
std::function<bool(const Function &)> Ftor = nullptr)
|
||||||
: FunctionPass(ID), PredicateFtor(std::move(Ftor)) {
|
: FunctionPass(ID), PredicateFtor(std::move(Ftor)) {
|
||||||
|
|
||||||
|
|
@ -246,6 +255,10 @@ struct CFGSimplifyPass : public FunctionPass {
|
||||||
|
|
||||||
Options.NeedCanonicalLoop =
|
Options.NeedCanonicalLoop =
|
||||||
UserKeepLoops.getNumOccurrences() ? UserKeepLoops : KeepLoops;
|
UserKeepLoops.getNumOccurrences() ? UserKeepLoops : KeepLoops;
|
||||||
|
|
||||||
|
Options.SinkCommonInsts = UserSinkCommonInsts.getNumOccurrences()
|
||||||
|
? UserSinkCommonInsts
|
||||||
|
: SinkCommon;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool runOnFunction(Function &F) override {
|
bool runOnFunction(Function &F) override {
|
||||||
|
|
@ -276,7 +289,8 @@ INITIALIZE_PASS_END(CFGSimplifyPass, "simplifycfg", "Simplify the CFG", false,
|
||||||
FunctionPass *
|
FunctionPass *
|
||||||
llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond,
|
llvm::createCFGSimplificationPass(unsigned Threshold, bool ForwardSwitchCond,
|
||||||
bool ConvertSwitch, bool KeepLoops,
|
bool ConvertSwitch, bool KeepLoops,
|
||||||
|
bool SinkCommon,
|
||||||
std::function<bool(const Function &)> Ftor) {
|
std::function<bool(const Function &)> Ftor) {
|
||||||
return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch,
|
return new CFGSimplifyPass(Threshold, ForwardSwitchCond, ConvertSwitch,
|
||||||
KeepLoops, std::move(Ftor));
|
KeepLoops, SinkCommon, std::move(Ftor));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5728,7 +5728,7 @@ bool SimplifyCFGOpt::SimplifyUncondBranch(BranchInst *BI,
|
||||||
BasicBlock *BB = BI->getParent();
|
BasicBlock *BB = BI->getParent();
|
||||||
BasicBlock *Succ = BI->getSuccessor(0);
|
BasicBlock *Succ = BI->getSuccessor(0);
|
||||||
|
|
||||||
if (SinkCommon && SinkThenElseCodeToEnd(BI))
|
if (SinkCommon && Options.SinkCommonInsts && SinkThenElseCodeToEnd(BI))
|
||||||
return true;
|
return true;
|
||||||
|
|
||||||
// If the Terminator is the only non-phi instruction, simplify the block.
|
// If the Terminator is the only non-phi instruction, simplify the block.
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
; RUN: opt -simplifycfg -S < %s | FileCheck %s
|
; RUN: opt -simplifycfg -sink-common-insts -S < %s | FileCheck %s
|
||||||
|
|
||||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||||
target triple = "x86_64-unknown-linux-gnu"
|
target triple = "x86_64-unknown-linux-gnu"
|
||||||
|
|
|
||||||
|
|
@ -197,8 +197,8 @@
|
||||||
; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass
|
; CHECK-O-NEXT: Running pass: LoopLoadEliminationPass
|
||||||
; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis
|
; CHECK-O-NEXT: Running analysis: LoopAccessAnalysis
|
||||||
; CHECK-O-NEXT: Running pass: InstCombinePass
|
; CHECK-O-NEXT: Running pass: InstCombinePass
|
||||||
; CHECK-O-NEXT: Running pass: SLPVectorizerPass
|
|
||||||
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
; CHECK-O-NEXT: Running pass: SimplifyCFGPass
|
||||||
|
; CHECK-O-NEXT: Running pass: SLPVectorizerPass
|
||||||
; CHECK-O-NEXT: Running pass: InstCombinePass
|
; CHECK-O-NEXT: Running pass: InstCombinePass
|
||||||
; CHECK-O-NEXT: Running pass: LoopUnrollPass
|
; CHECK-O-NEXT: Running pass: LoopUnrollPass
|
||||||
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
; CHECK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||||
|
|
|
||||||
|
|
@ -185,8 +185,8 @@
|
||||||
; CHECK-POSTLINK-O-NEXT: Running pass: LoopLoadEliminationPass
|
; CHECK-POSTLINK-O-NEXT: Running pass: LoopLoadEliminationPass
|
||||||
; CHECK-POSTLINK-O-NEXT: Running analysis: LoopAccessAnalysis
|
; CHECK-POSTLINK-O-NEXT: Running analysis: LoopAccessAnalysis
|
||||||
; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
|
; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
|
||||||
; CHECK-POSTLINK-O-NEXT: Running pass: SLPVectorizerPass
|
|
||||||
; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass
|
; CHECK-POSTLINK-O-NEXT: Running pass: SimplifyCFGPass
|
||||||
|
; CHECK-POSTLINK-O-NEXT: Running pass: SLPVectorizerPass
|
||||||
; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
|
; CHECK-POSTLINK-O-NEXT: Running pass: InstCombinePass
|
||||||
; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass
|
; CHECK-POSTLINK-O-NEXT: Running pass: LoopUnrollPass
|
||||||
; CHECK-POSTLINK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
; CHECK-POSTLINK-O-NEXT: Running analysis: OuterAnalysisManagerProxy
|
||||||
|
|
|
||||||
|
|
@ -76,10 +76,8 @@ define double @max_of_loads(double* %x, double* %y, i64 %i) {
|
||||||
; ALL-NEXT: [[XI:%.*]] = load double, double* [[XI_PTR]], align 8
|
; ALL-NEXT: [[XI:%.*]] = load double, double* [[XI_PTR]], align 8
|
||||||
; ALL-NEXT: [[YI:%.*]] = load double, double* [[YI_PTR]], align 8
|
; ALL-NEXT: [[YI:%.*]] = load double, double* [[YI_PTR]], align 8
|
||||||
; ALL-NEXT: [[CMP:%.*]] = fcmp ogt double [[XI]], [[YI]]
|
; ALL-NEXT: [[CMP:%.*]] = fcmp ogt double [[XI]], [[YI]]
|
||||||
; ALL-NEXT: [[Y_SINK:%.*]] = select i1 [[CMP]], double* [[X]], double* [[Y]]
|
; ALL-NEXT: [[XI_YI:%.*]] = select i1 [[CMP]], double [[XI]], double [[YI]]
|
||||||
; ALL-NEXT: [[YI_PTR_AGAIN:%.*]] = getelementptr double, double* [[Y_SINK]], i64 [[I]]
|
; ALL-NEXT: ret double [[XI_YI]]
|
||||||
; ALL-NEXT: [[YI_AGAIN:%.*]] = load double, double* [[YI_PTR_AGAIN]], align 8
|
|
||||||
; ALL-NEXT: ret double [[YI_AGAIN]]
|
|
||||||
;
|
;
|
||||||
entry:
|
entry:
|
||||||
%xi_ptr = getelementptr double, double* %x, i64 %i
|
%xi_ptr = getelementptr double, double* %x, i64 %i
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
; RUN: opt < %s -simplifycfg -S | FileCheck %s
|
; RUN: opt < %s -simplifycfg -sink-common-insts -S | FileCheck %s
|
||||||
|
|
||||||
define i1 @test1(i1 zeroext %flag, i8* %y) #0 {
|
define i1 @test1(i1 zeroext %flag, i8* %y) #0 {
|
||||||
entry:
|
entry:
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
; RUN: opt < %s -simplifycfg -S | FileCheck -enable-var-scope %s
|
; RUN: opt < %s -simplifycfg -sink-common-insts -S | FileCheck -enable-var-scope %s
|
||||||
|
|
||||||
define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
|
define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
|
||||||
entry:
|
entry:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue