forked from OSchip/llvm-project
				
			[HotColdSplit] Introduce a cost model to control splitting behavior
The main goal of the model is to avoid *increasing* function size, as
that would eradicate any memory locality benefits from splitting. This
happens when:
  - There are too many inputs or outputs to the cold region. Argument
    materialization and reloads of outputs have a cost.
  - The cold region has too many distinct exit blocks, causing a large
    switch to be formed in the caller.
  - The code size cost of the split code is less than the cost of a
    set-up call.
A secondary goal is to prevent excessive overall binary size growth.
With the cost model in place, I experimented to find a splitting
threshold that works well in practice. To make warm & cold code easily
separable for analysis purposes, I moved split functions to a "cold"
section. I experimented with thresholds between [0, 4] and set the
default to the threshold which minimized geomean __text size.
Experiment data from building LNT+externals for X86 (N = 639 programs,
all sizes in bytes):
| Configuration | __text geom size | __cold geom size | TEXT geom size |
| **-Os**       | 1736.3           | 0, n=0           | 10961.6        |
| -Os, thresh=0 | 1740.53          | 124.482, n=134   | 11014          |
| -Os, thresh=1 | 1734.79          | 57.8781, n=90    | 10978.6        |
| -Os, thresh=2 | ** 1733.85 **    | 65.6604, n=61    | 10977.6        |
| -Os, thresh=3 | 1733.85          | 65.3071, n=61    | 10977.6        |
| -Os, thresh=4 | 1735.08          | 67.5156, n=54    | 10965.7        |
| **-Oz**       | 1554.4           | 0, n=0           | 10153          |
| -Oz, thresh=2 | ** 1552.2 **     | 65.633, n=61     | 10176          |
| **-O3**       | 2563.37          | 0, n=0           | 13105.4        |
| -O3, thresh=2 | ** 2559.49 **    | 71.1072, n=61    | 13162.4        |
Picking thresh=2 reduces the geomean __text section size by 0.14% at
-Os, -Oz, and -O3 and causes ~0.2% growth in the TEXT segment. Note that
TEXT size is page-aligned, whereas section sizes are byte-aligned.
Experiment data from building LNT+externals for ARM64 (N = 558 programs,
all sizes in bytes):
| Configuration | __text geom size | __cold geom size | TEXT geom size |
| **-Os**       | 1763.96          | 0, n=0           | 42934.9        |
| -Os, thresh=2 | ** 1760.9 **     | 76.6755, n=61    | 42934.9        |
Picking thresh=2 reduces the geomean __text section size by 0.17% at
-Os and causes no growth in the TEXT segment.
Measurements were done with D57082 (r352080) applied.
Differential Revision: https://reviews.llvm.org/D57125
llvm-svn: 352228
			
			
This commit is contained in:
		
							parent
							
								
									13ef84fced
								
							
						
					
					
						commit
						db3f9774ee
					
				| 
						 | 
					@ -80,9 +80,9 @@ static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
 | 
				
			||||||
                              cl::init(true), cl::Hidden);
 | 
					                              cl::init(true), cl::Hidden);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static cl::opt<int>
 | 
					static cl::opt<int>
 | 
				
			||||||
    SplittingThreshold("hotcoldsplit-threshold", cl::init(3), cl::Hidden,
 | 
					    SplittingThreshold("hotcoldsplit-threshold", cl::init(2), cl::Hidden,
 | 
				
			||||||
                       cl::desc("Code size threshold for splitting cold code "
 | 
					                       cl::desc("Base penalty for splitting cold code (as a "
 | 
				
			||||||
                                "(as a multiple of TCC_Basic)"));
 | 
					                                "multiple of TCC_Basic)"));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
namespace {
 | 
					namespace {
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -139,31 +139,6 @@ static bool mayExtractBlock(const BasicBlock &BB) {
 | 
				
			||||||
         !isa<InvokeInst>(BB.getTerminator());
 | 
					         !isa<InvokeInst>(BB.getTerminator());
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// Check whether \p Region is profitable to outline.
 | 
					 | 
				
			||||||
static bool isProfitableToOutline(const BlockSequence &Region,
 | 
					 | 
				
			||||||
                                  TargetTransformInfo &TTI) {
 | 
					 | 
				
			||||||
  // If the splitting threshold is set at or below zero, skip the usual
 | 
					 | 
				
			||||||
  // profitability check.
 | 
					 | 
				
			||||||
  if (SplittingThreshold <= 0)
 | 
					 | 
				
			||||||
    return true;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  if (Region.size() > 1)
 | 
					 | 
				
			||||||
    return true;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  int Cost = 0;
 | 
					 | 
				
			||||||
  const BasicBlock &BB = *Region[0];
 | 
					 | 
				
			||||||
  for (const Instruction &I : BB) {
 | 
					 | 
				
			||||||
    if (isa<DbgInfoIntrinsic>(&I) || &I == BB.getTerminator())
 | 
					 | 
				
			||||||
      continue;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    Cost += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (Cost >= (SplittingThreshold * TargetTransformInfo::TCC_Basic))
 | 
					 | 
				
			||||||
      return true;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
  return false;
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/// Mark \p F cold. Based on this assumption, also optimize it for minimum size.
 | 
					/// Mark \p F cold. Based on this assumption, also optimize it for minimum size.
 | 
				
			||||||
/// Return true if the function is changed.
 | 
					/// Return true if the function is changed.
 | 
				
			||||||
static bool markFunctionCold(Function &F) {
 | 
					static bool markFunctionCold(Function &F) {
 | 
				
			||||||
| 
						 | 
					@ -247,6 +222,82 @@ bool HotColdSplitting::shouldOutlineFrom(const Function &F) const {
 | 
				
			||||||
  return true;
 | 
					  return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// Get the benefit score of outlining \p Region.
 | 
				
			||||||
 | 
					static int getOutliningBenefit(ArrayRef<BasicBlock *> Region,
 | 
				
			||||||
 | 
					                               TargetTransformInfo &TTI) {
 | 
				
			||||||
 | 
					  // Sum up the code size costs of non-terminator instructions. Tight coupling
 | 
				
			||||||
 | 
					  // with \ref getOutliningPenalty is needed to model the costs of terminators.
 | 
				
			||||||
 | 
					  int Benefit = 0;
 | 
				
			||||||
 | 
					  for (BasicBlock *BB : Region)
 | 
				
			||||||
 | 
					    for (Instruction &I : BB->instructionsWithoutDebug())
 | 
				
			||||||
 | 
					      if (&I != BB->getTerminator())
 | 
				
			||||||
 | 
					        Benefit +=
 | 
				
			||||||
 | 
					            TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return Benefit;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/// Get the penalty score for outlining \p Region.
 | 
				
			||||||
 | 
					static int getOutliningPenalty(ArrayRef<BasicBlock *> Region,
 | 
				
			||||||
 | 
					                               unsigned NumInputs, unsigned NumOutputs) {
 | 
				
			||||||
 | 
					  int Penalty = SplittingThreshold;
 | 
				
			||||||
 | 
					  LLVM_DEBUG(dbgs() << "Applying penalty for splitting: " << Penalty << "\n");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // If the splitting threshold is set at or below zero, skip the usual
 | 
				
			||||||
 | 
					  // profitability check.
 | 
				
			||||||
 | 
					  if (SplittingThreshold <= 0)
 | 
				
			||||||
 | 
					    return Penalty;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // The typical code size cost for materializing an argument for the outlined
 | 
				
			||||||
 | 
					  // call.
 | 
				
			||||||
 | 
					  LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumInputs << " inputs\n");
 | 
				
			||||||
 | 
					  const int CostForArgMaterialization = TargetTransformInfo::TCC_Basic;
 | 
				
			||||||
 | 
					  Penalty += CostForArgMaterialization * NumInputs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // The typical code size cost for an output alloca, its associated store, and
 | 
				
			||||||
 | 
					  // its associated reload.
 | 
				
			||||||
 | 
					  LLVM_DEBUG(dbgs() << "Applying penalty for: " << NumOutputs << " outputs\n");
 | 
				
			||||||
 | 
					  const int CostForRegionOutput = 3 * TargetTransformInfo::TCC_Basic;
 | 
				
			||||||
 | 
					  Penalty += CostForRegionOutput * NumOutputs;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Find the number of distinct exit blocks for the region. Use a conservative
 | 
				
			||||||
 | 
					  // check to determine whether control returns from the region.
 | 
				
			||||||
 | 
					  bool NoBlocksReturn = true;
 | 
				
			||||||
 | 
					  SmallPtrSet<BasicBlock *, 2> SuccsOutsideRegion;
 | 
				
			||||||
 | 
					  for (BasicBlock *BB : Region) {
 | 
				
			||||||
 | 
					    // If a block has no successors, only assume it does not return if it's
 | 
				
			||||||
 | 
					    // unreachable.
 | 
				
			||||||
 | 
					    if (succ_empty(BB)) {
 | 
				
			||||||
 | 
					      NoBlocksReturn &= isa<UnreachableInst>(BB->getTerminator());
 | 
				
			||||||
 | 
					      continue;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    for (BasicBlock *SuccBB : successors(BB)) {
 | 
				
			||||||
 | 
					      if (find(Region, SuccBB) == Region.end()) {
 | 
				
			||||||
 | 
					        NoBlocksReturn = false;
 | 
				
			||||||
 | 
					        SuccsOutsideRegion.insert(SuccBB);
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Apply a `noreturn` bonus.
 | 
				
			||||||
 | 
					  if (NoBlocksReturn) {
 | 
				
			||||||
 | 
					    LLVM_DEBUG(dbgs() << "Applying bonus for: " << Region.size()
 | 
				
			||||||
 | 
					                      << " non-returning terminators\n");
 | 
				
			||||||
 | 
					    Penalty -= Region.size();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Apply a penalty for having more than one successor outside of the region.
 | 
				
			||||||
 | 
					  // This penalty accounts for the switch needed in the caller.
 | 
				
			||||||
 | 
					  if (!SuccsOutsideRegion.empty()) {
 | 
				
			||||||
 | 
					    LLVM_DEBUG(dbgs() << "Applying penalty for: " << SuccsOutsideRegion.size()
 | 
				
			||||||
 | 
					                      << " non-region successors\n");
 | 
				
			||||||
 | 
					    Penalty += (SuccsOutsideRegion.size() - 1) * TargetTransformInfo::TCC_Basic;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return Penalty;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
 | 
					Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
 | 
				
			||||||
                                              DominatorTree &DT,
 | 
					                                              DominatorTree &DT,
 | 
				
			||||||
                                              BlockFrequencyInfo *BFI,
 | 
					                                              BlockFrequencyInfo *BFI,
 | 
				
			||||||
| 
						 | 
					@ -261,6 +312,18 @@ Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
 | 
				
			||||||
                   /* AllowAlloca */ false,
 | 
					                   /* AllowAlloca */ false,
 | 
				
			||||||
                   /* Suffix */ "cold." + std::to_string(Count));
 | 
					                   /* Suffix */ "cold." + std::to_string(Count));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Perform a simple cost/benefit analysis to decide whether or not to permit
 | 
				
			||||||
 | 
					  // splitting.
 | 
				
			||||||
 | 
					  SetVector<Value *> Inputs, Outputs, Sinks;
 | 
				
			||||||
 | 
					  CE.findInputsOutputs(Inputs, Outputs, Sinks);
 | 
				
			||||||
 | 
					  int OutliningBenefit = getOutliningBenefit(Region, TTI);
 | 
				
			||||||
 | 
					  int OutliningPenalty =
 | 
				
			||||||
 | 
					      getOutliningPenalty(Region, Inputs.size(), Outputs.size());
 | 
				
			||||||
 | 
					  LLVM_DEBUG(dbgs() << "Split profitability: benefit = " << OutliningBenefit
 | 
				
			||||||
 | 
					                    << ", penalty = " << OutliningPenalty << "\n");
 | 
				
			||||||
 | 
					  if (OutliningBenefit <= OutliningPenalty)
 | 
				
			||||||
 | 
					    return nullptr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Function *OrigF = Region[0]->getParent();
 | 
					  Function *OrigF = Region[0]->getParent();
 | 
				
			||||||
  if (Function *OutF = CE.extractCodeRegion()) {
 | 
					  if (Function *OutF = CE.extractCodeRegion()) {
 | 
				
			||||||
    User *U = *OutF->user_begin();
 | 
					    User *U = *OutF->user_begin();
 | 
				
			||||||
| 
						 | 
					@ -556,14 +619,6 @@ bool HotColdSplitting::outlineColdRegions(Function &F, bool HasProfileSummary) {
 | 
				
			||||||
    assert(!Region.empty() && "Empty outlining region in worklist");
 | 
					    assert(!Region.empty() && "Empty outlining region in worklist");
 | 
				
			||||||
    do {
 | 
					    do {
 | 
				
			||||||
      BlockSequence SubRegion = Region.takeSingleEntrySubRegion(*DT);
 | 
					      BlockSequence SubRegion = Region.takeSingleEntrySubRegion(*DT);
 | 
				
			||||||
      if (!isProfitableToOutline(SubRegion, TTI)) {
 | 
					 | 
				
			||||||
        LLVM_DEBUG({
 | 
					 | 
				
			||||||
          dbgs() << "Skipping outlining; not profitable to outline\n";
 | 
					 | 
				
			||||||
          SubRegion[0]->dump();
 | 
					 | 
				
			||||||
        });
 | 
					 | 
				
			||||||
        continue;
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      LLVM_DEBUG({
 | 
					      LLVM_DEBUG({
 | 
				
			||||||
        dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
 | 
					        dbgs() << "Hot/cold splitting attempting to outline these blocks:\n";
 | 
				
			||||||
        for (BasicBlock *BB : SubRegion)
 | 
					        for (BasicBlock *BB : SubRegion)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,63 +0,0 @@
 | 
				
			||||||
; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=1 < %s | FileCheck %s
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 | 
					 | 
				
			||||||
target triple = "x86_64-apple-macosx10.14.0"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
; CHECK-LABEL: define {{.*}}@foo(
 | 
					 | 
				
			||||||
; CHECK: call {{.*}}@foo.cold.1(
 | 
					 | 
				
			||||||
; CHECK: unreachable
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
; CHECK-LABEL: define {{.*}}@foo.cold.1(
 | 
					 | 
				
			||||||
; CHECK: switch i32 undef, label %sw.epilog.i
 | 
					 | 
				
			||||||
define void @foo(i32 %QMM) {
 | 
					 | 
				
			||||||
entry:
 | 
					 | 
				
			||||||
  switch i32 %QMM, label %entry.if.end16_crit_edge [
 | 
					 | 
				
			||||||
    i32 1, label %if.then
 | 
					 | 
				
			||||||
  ]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
entry.if.end16_crit_edge:                         ; preds = %entry
 | 
					 | 
				
			||||||
  br label %if.end16
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
if.then:                                          ; preds = %entry
 | 
					 | 
				
			||||||
  br i1 undef, label %cond.true.i.i, label %_ZN10StringView8popFrontEv.exit.i
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
cond.true.i.i:                                    ; preds = %if.then
 | 
					 | 
				
			||||||
  ret void
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
_ZN10StringView8popFrontEv.exit.i:                ; preds = %if.then
 | 
					 | 
				
			||||||
  switch i32 undef, label %sw.epilog.i [
 | 
					 | 
				
			||||||
    i32 81, label %if.end16
 | 
					 | 
				
			||||||
    i32 82, label %sw.bb4.i
 | 
					 | 
				
			||||||
    i32 83, label %sw.bb8.i
 | 
					 | 
				
			||||||
    i32 84, label %sw.bb12.i
 | 
					 | 
				
			||||||
    i32 65, label %if.end16
 | 
					 | 
				
			||||||
    i32 66, label %sw.bb20.i
 | 
					 | 
				
			||||||
    i32 67, label %sw.bb24.i
 | 
					 | 
				
			||||||
    i32 68, label %sw.bb28.i
 | 
					 | 
				
			||||||
  ]
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
sw.bb4.i:                                         ; preds = %_ZN10StringView8popFrontEv.exit.i
 | 
					 | 
				
			||||||
  br label %if.end16
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
sw.bb8.i:                                         ; preds = %_ZN10StringView8popFrontEv.exit.i
 | 
					 | 
				
			||||||
  br label %if.end16
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
sw.bb12.i:                                        ; preds = %_ZN10StringView8popFrontEv.exit.i
 | 
					 | 
				
			||||||
  br label %if.end16
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
sw.bb20.i:                                        ; preds = %_ZN10StringView8popFrontEv.exit.i
 | 
					 | 
				
			||||||
  br label %if.end16
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
sw.bb24.i:                                        ; preds = %_ZN10StringView8popFrontEv.exit.i
 | 
					 | 
				
			||||||
  br label %if.end16
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
sw.bb28.i:                                        ; preds = %_ZN10StringView8popFrontEv.exit.i
 | 
					 | 
				
			||||||
  br label %if.end16
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
sw.epilog.i:                                      ; preds = %_ZN10StringView8popFrontEv.exit.i
 | 
					 | 
				
			||||||
  br label %if.end16
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
if.end16:                                         ; preds = %sw.epilog.i, %sw.bb28.i, %sw.bb24.i, %sw.bb20.i, %sw.bb12.i, %sw.bb8.i, %sw.bb4.i, %_ZN10StringView8popFrontEv.exit.i, %_ZN10StringView8popFrontEv.exit.i, %entry.if.end16_crit_edge
 | 
					 | 
				
			||||||
  %0 = phi i8 [ 0, %entry.if.end16_crit_edge ], [ 0, %_ZN10StringView8popFrontEv.exit.i ], [ 0, %_ZN10StringView8popFrontEv.exit.i ], [ 1, %sw.bb4.i ], [ 2, %sw.bb8.i ], [ 3, %sw.bb12.i ], [ 1, %sw.bb20.i ], [ 2, %sw.bb24.i ], [ 3, %sw.bb28.i ], [ 0, %sw.epilog.i ]
 | 
					 | 
				
			||||||
  unreachable
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
| 
						 | 
					@ -1,25 +0,0 @@
 | 
				
			||||||
; The magic number 6 comes from (1 * TCC_Expensive) + (1 * CostOfCallX86).
 | 
					 | 
				
			||||||
; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=6 -S < %s | FileCheck %s
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
; Test that we outline even though there are only two cold instructions. TTI
 | 
					 | 
				
			||||||
; should determine that they are expensive in terms of code size.
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 | 
					 | 
				
			||||||
target triple = "x86_64-apple-macosx10.14.0"
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
; CHECK-LABEL: @fun
 | 
					 | 
				
			||||||
; CHECK: call void @fun.cold.1
 | 
					 | 
				
			||||||
define void @fun(i32 %x) {
 | 
					 | 
				
			||||||
entry:
 | 
					 | 
				
			||||||
  br i1 undef, label %if.then, label %if.else
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
if.then:
 | 
					 | 
				
			||||||
  ret void
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
if.else:
 | 
					 | 
				
			||||||
  %y = sdiv i32 %x, 111
 | 
					 | 
				
			||||||
  call void @sink(i32 %y)
 | 
					 | 
				
			||||||
  ret void
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
declare void @sink(i32 %x) cold
 | 
					 | 
				
			||||||
| 
						 | 
					@ -1,4 +1,4 @@
 | 
				
			||||||
; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s
 | 
					; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=-1 -S < %s | FileCheck %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 | 
					target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 | 
				
			||||||
target triple = "x86_64-apple-macosx10.14.0"
 | 
					target triple = "x86_64-apple-macosx10.14.0"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,26 @@
 | 
				
			||||||
 | 
					; REQUIRES: asserts
 | 
				
			||||||
 | 
					; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -S < %s -o /dev/null 2>&1 | FileCheck %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					declare void @sink() cold
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define void @foo(i32 %arg) {
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					  br i1 undef, label %cold1, label %exit
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cold1:
 | 
				
			||||||
 | 
					  ; CHECK: Applying bonus for: 4 non-returning terminators
 | 
				
			||||||
 | 
					  call void @sink()
 | 
				
			||||||
 | 
					  br i1 undef, label %cold2, label %cold3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cold2:
 | 
				
			||||||
 | 
					  br label %cold4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cold3:
 | 
				
			||||||
 | 
					  br label %cold4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cold4:
 | 
				
			||||||
 | 
					  unreachable
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					exit:
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,19 @@
 | 
				
			||||||
 | 
					; REQUIRES: asserts
 | 
				
			||||||
 | 
					; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -S < %s -o /dev/null 2>&1 | FileCheck %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					declare void @sink(i32*, i32, i32) cold
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@g = global i32 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define void @foo(i32 %arg) {
 | 
				
			||||||
 | 
					  %local = load i32, i32* @g
 | 
				
			||||||
 | 
					  br i1 undef, label %cold, label %exit
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cold:
 | 
				
			||||||
 | 
					  ; CHECK: Applying penalty for: 2 inputs
 | 
				
			||||||
 | 
					  call void @sink(i32* @g, i32 %arg, i32 %local)
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					exit:
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,22 @@
 | 
				
			||||||
 | 
					; REQUIRES: asserts
 | 
				
			||||||
 | 
					; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -S < %s -o /dev/null 2>&1 | FileCheck %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					declare void @sink() cold
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@g = global i32 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define i32 @foo(i32 %arg) {
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					  br i1 undef, label %cold, label %exit
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cold:
 | 
				
			||||||
 | 
					  ; CHECK: Applying penalty for: 1 output
 | 
				
			||||||
 | 
					  ; CHECK: Applying penalty for: 1 non-region successors
 | 
				
			||||||
 | 
					  %local = load i32, i32* @g
 | 
				
			||||||
 | 
					  call void @sink()
 | 
				
			||||||
 | 
					  br label %exit
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					exit:
 | 
				
			||||||
 | 
					  %p = phi i32 [ %local, %cold ], [ 0, %entry ]
 | 
				
			||||||
 | 
					  ret i32 %p
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,53 @@
 | 
				
			||||||
 | 
					; REQUIRES: asserts
 | 
				
			||||||
 | 
					; RUN: opt -hotcoldsplit -debug-only=hotcoldsplit -S < %s -o /dev/null 2>&1 | FileCheck %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					declare void @sink() cold
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; CHECK-LABEL: Outlining in one_non_region_successor
 | 
				
			||||||
 | 
					define void @one_non_region_successor(i32 %arg) {
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					  br i1 undef, label %cold1, label %exit
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cold1:
 | 
				
			||||||
 | 
					  ; CHECK: Applying penalty for: 1 non-region successor
 | 
				
			||||||
 | 
					  call void @sink()
 | 
				
			||||||
 | 
					  br i1 undef, label %cold2, label %cold3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cold2:
 | 
				
			||||||
 | 
					  br i1 undef, label %cold4, label %exit
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cold3:
 | 
				
			||||||
 | 
					  br i1 undef, label %cold4, label %exit
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cold4:
 | 
				
			||||||
 | 
					  unreachable
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					exit:
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; CHECK-LABEL: Outlining in two_non_region_successor
 | 
				
			||||||
 | 
					define void @two_non_region_successors(i32 %arg) {
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					  br i1 undef, label %cold1, label %exit1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cold1:
 | 
				
			||||||
 | 
					  ; CHECK: Applying penalty for: 2 non-region successors
 | 
				
			||||||
 | 
					  call void @sink()
 | 
				
			||||||
 | 
					  br i1 undef, label %cold2, label %cold3
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cold2:
 | 
				
			||||||
 | 
					  br i1 undef, label %cold4, label %exit1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cold3:
 | 
				
			||||||
 | 
					  br i1 undef, label %cold4, label %exit2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					cold4:
 | 
				
			||||||
 | 
					  unreachable
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					exit1:
 | 
				
			||||||
 | 
					  br label %exit2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					exit2:
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -1,4 +1,4 @@
 | 
				
			||||||
; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=0 < %s 2>&1 | FileCheck %s
 | 
					; RUN: opt -S -hotcoldsplit -hotcoldsplit-threshold=-1 < %s 2>&1 | FileCheck %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK-LABEL: define {{.*}}@fun
 | 
					; CHECK-LABEL: define {{.*}}@fun
 | 
				
			||||||
; CHECK: call {{.*}}@fun.cold.2(
 | 
					; CHECK: call {{.*}}@fun.cold.2(
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,4 +1,4 @@
 | 
				
			||||||
; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -S < %s | FileCheck %s
 | 
					; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=-1 -S < %s | FileCheck %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 | 
					target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 | 
				
			||||||
target triple = "x86_64-apple-macosx10.14.0"
 | 
					target triple = "x86_64-apple-macosx10.14.0"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,5 +1,5 @@
 | 
				
			||||||
; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=0 -pass-remarks=hotcoldsplit -S < %s 2>&1 | FileCheck %s
 | 
					; RUN: opt -hotcoldsplit -hotcoldsplit-threshold=-1 -pass-remarks=hotcoldsplit -S < %s 2>&1 | FileCheck %s
 | 
				
			||||||
; RUN: opt -hotcoldsplit-threshold=0 -passes=hotcoldsplit -pass-remarks=hotcoldsplit -S < %s 2>&1 | FileCheck %s
 | 
					; RUN: opt -passes=hotcoldsplit -hotcoldsplit-threshold=-1 -pass-remarks=hotcoldsplit -S < %s 2>&1 | FileCheck %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; Make sure this compiles. This test used to fail with an invalid phi node: the
 | 
					; Make sure this compiles. This test used to fail with an invalid phi node: the
 | 
				
			||||||
; two predecessors were outlined and the SSA representation was invalid.
 | 
					; two predecessors were outlined and the SSA representation was invalid.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue