483 lines
		
	
	
		
			18 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			483 lines
		
	
	
		
			18 KiB
		
	
	
	
		
			C++
		
	
	
	
//===- BypassSlowDivision.cpp - Bypass slow division ----------------------===//
 | 
						|
//
 | 
						|
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 | 
						|
// See https://llvm.org/LICENSE.txt for license information.
 | 
						|
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
//
 | 
						|
// This file contains an optimization for div and rem on architectures that
 | 
						|
// execute short instructions significantly faster than longer instructions.
 | 
						|
// For example, on Intel Atom 32-bit divides are slow enough that during
 | 
						|
// runtime it is profitable to check the value of the operands, and if they are
 | 
						|
// positive and less than 256 use an unsigned 8-bit divide.
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
 | 
						|
#include "llvm/Transforms/Utils/BypassSlowDivision.h"
 | 
						|
#include "llvm/ADT/DenseMap.h"
 | 
						|
#include "llvm/ADT/None.h"
 | 
						|
#include "llvm/ADT/Optional.h"
 | 
						|
#include "llvm/ADT/STLExtras.h"
 | 
						|
#include "llvm/ADT/SmallPtrSet.h"
 | 
						|
#include "llvm/Transforms/Utils/Local.h"
 | 
						|
#include "llvm/Analysis/ValueTracking.h"
 | 
						|
#include "llvm/IR/BasicBlock.h"
 | 
						|
#include "llvm/IR/Constants.h"
 | 
						|
#include "llvm/IR/DerivedTypes.h"
 | 
						|
#include "llvm/IR/Function.h"
 | 
						|
#include "llvm/IR/IRBuilder.h"
 | 
						|
#include "llvm/IR/Instruction.h"
 | 
						|
#include "llvm/IR/Instructions.h"
 | 
						|
#include "llvm/IR/Module.h"
 | 
						|
#include "llvm/IR/Type.h"
 | 
						|
#include "llvm/IR/Value.h"
 | 
						|
#include "llvm/Support/Casting.h"
 | 
						|
#include "llvm/Support/KnownBits.h"
 | 
						|
#include <cassert>
 | 
						|
#include <cstdint>
 | 
						|
 | 
						|
using namespace llvm;
 | 
						|
 | 
						|
#define DEBUG_TYPE "bypass-slow-division"
 | 
						|
 | 
						|
namespace {
 | 
						|
 | 
						|
  struct QuotRemPair {
 | 
						|
    Value *Quotient;
 | 
						|
    Value *Remainder;
 | 
						|
 | 
						|
    QuotRemPair(Value *InQuotient, Value *InRemainder)
 | 
						|
        : Quotient(InQuotient), Remainder(InRemainder) {}
 | 
						|
  };
 | 
						|
 | 
						|
  /// A quotient and remainder, plus a BB from which they logically "originate".
 | 
						|
  /// If you use Quotient or Remainder in a Phi node, you should use BB as its
 | 
						|
  /// corresponding predecessor.
 | 
						|
  struct QuotRemWithBB {
 | 
						|
    BasicBlock *BB = nullptr;
 | 
						|
    Value *Quotient = nullptr;
 | 
						|
    Value *Remainder = nullptr;
 | 
						|
  };
 | 
						|
 | 
						|
using DivCacheTy = DenseMap<DivRemMapKey, QuotRemPair>;
 | 
						|
using BypassWidthsTy = DenseMap<unsigned, unsigned>;
 | 
						|
using VisitedSetTy = SmallPtrSet<Instruction *, 4>;
 | 
						|
 | 
						|
enum ValueRange {
 | 
						|
  /// Operand definitely fits into BypassType. No runtime checks are needed.
 | 
						|
  VALRNG_KNOWN_SHORT,
 | 
						|
  /// A runtime check is required, as value range is unknown.
 | 
						|
  VALRNG_UNKNOWN,
 | 
						|
  /// Operand is unlikely to fit into BypassType. The bypassing should be
 | 
						|
  /// disabled.
 | 
						|
  VALRNG_LIKELY_LONG
 | 
						|
};
 | 
						|
 | 
						|
class FastDivInsertionTask {
 | 
						|
  bool IsValidTask = false;
 | 
						|
  Instruction *SlowDivOrRem = nullptr;
 | 
						|
  IntegerType *BypassType = nullptr;
 | 
						|
  BasicBlock *MainBB = nullptr;
 | 
						|
 | 
						|
  bool isHashLikeValue(Value *V, VisitedSetTy &Visited);
 | 
						|
  ValueRange getValueRange(Value *Op, VisitedSetTy &Visited);
 | 
						|
  QuotRemWithBB createSlowBB(BasicBlock *Successor);
 | 
						|
  QuotRemWithBB createFastBB(BasicBlock *Successor);
 | 
						|
  QuotRemPair createDivRemPhiNodes(QuotRemWithBB &LHS, QuotRemWithBB &RHS,
 | 
						|
                                   BasicBlock *PhiBB);
 | 
						|
  Value *insertOperandRuntimeCheck(Value *Op1, Value *Op2);
 | 
						|
  Optional<QuotRemPair> insertFastDivAndRem();
 | 
						|
 | 
						|
  bool isSignedOp() {
 | 
						|
    return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
 | 
						|
           SlowDivOrRem->getOpcode() == Instruction::SRem;
 | 
						|
  }
 | 
						|
 | 
						|
  bool isDivisionOp() {
 | 
						|
    return SlowDivOrRem->getOpcode() == Instruction::SDiv ||
 | 
						|
           SlowDivOrRem->getOpcode() == Instruction::UDiv;
 | 
						|
  }
 | 
						|
 | 
						|
  Type *getSlowType() { return SlowDivOrRem->getType(); }
 | 
						|
 | 
						|
public:
 | 
						|
  FastDivInsertionTask(Instruction *I, const BypassWidthsTy &BypassWidths);
 | 
						|
 | 
						|
  Value *getReplacement(DivCacheTy &Cache);
 | 
						|
};
 | 
						|
 | 
						|
} // end anonymous namespace
 | 
						|
 | 
						|
FastDivInsertionTask::FastDivInsertionTask(Instruction *I,
 | 
						|
                                           const BypassWidthsTy &BypassWidths) {
 | 
						|
  switch (I->getOpcode()) {
 | 
						|
  case Instruction::UDiv:
 | 
						|
  case Instruction::SDiv:
 | 
						|
  case Instruction::URem:
 | 
						|
  case Instruction::SRem:
 | 
						|
    SlowDivOrRem = I;
 | 
						|
    break;
 | 
						|
  default:
 | 
						|
    // I is not a div/rem operation.
 | 
						|
    return;
 | 
						|
  }
 | 
						|
 | 
						|
  // Skip division on vector types. Only optimize integer instructions.
 | 
						|
  IntegerType *SlowType = dyn_cast<IntegerType>(SlowDivOrRem->getType());
 | 
						|
  if (!SlowType)
 | 
						|
    return;
 | 
						|
 | 
						|
  // Skip if this bitwidth is not bypassed.
 | 
						|
  auto BI = BypassWidths.find(SlowType->getBitWidth());
 | 
						|
  if (BI == BypassWidths.end())
 | 
						|
    return;
 | 
						|
 | 
						|
  // Get type for div/rem instruction with bypass bitwidth.
 | 
						|
  IntegerType *BT = IntegerType::get(I->getContext(), BI->second);
 | 
						|
  BypassType = BT;
 | 
						|
 | 
						|
  // The original basic block.
 | 
						|
  MainBB = I->getParent();
 | 
						|
 | 
						|
  // The instruction is indeed a slow div or rem operation.
 | 
						|
  IsValidTask = true;
 | 
						|
}
 | 
						|
 | 
						|
/// Reuses previously-computed dividend or remainder from the current BB if
 | 
						|
/// operands and operation are identical. Otherwise calls insertFastDivAndRem to
 | 
						|
/// perform the optimization and caches the resulting dividend and remainder.
 | 
						|
/// If no replacement can be generated, nullptr is returned.
 | 
						|
Value *FastDivInsertionTask::getReplacement(DivCacheTy &Cache) {
 | 
						|
  // First, make sure that the task is valid.
 | 
						|
  if (!IsValidTask)
 | 
						|
    return nullptr;
 | 
						|
 | 
						|
  // Then, look for a value in Cache.
 | 
						|
  Value *Dividend = SlowDivOrRem->getOperand(0);
 | 
						|
  Value *Divisor = SlowDivOrRem->getOperand(1);
 | 
						|
  DivRemMapKey Key(isSignedOp(), Dividend, Divisor);
 | 
						|
  auto CacheI = Cache.find(Key);
 | 
						|
 | 
						|
  if (CacheI == Cache.end()) {
 | 
						|
    // If previous instance does not exist, try to insert fast div.
 | 
						|
    Optional<QuotRemPair> OptResult = insertFastDivAndRem();
 | 
						|
    // Bail out if insertFastDivAndRem has failed.
 | 
						|
    if (!OptResult)
 | 
						|
      return nullptr;
 | 
						|
    CacheI = Cache.insert({Key, *OptResult}).first;
 | 
						|
  }
 | 
						|
 | 
						|
  QuotRemPair &Value = CacheI->second;
 | 
						|
  return isDivisionOp() ? Value.Quotient : Value.Remainder;
 | 
						|
}
 | 
						|
 | 
						|
/// Check if a value looks like a hash.
 | 
						|
///
 | 
						|
/// The routine is expected to detect values computed using the most common hash
 | 
						|
/// algorithms. Typically, hash computations end with one of the following
 | 
						|
/// instructions:
 | 
						|
///
 | 
						|
/// 1) MUL with a constant wider than BypassType
 | 
						|
/// 2) XOR instruction
 | 
						|
///
 | 
						|
/// And even if we are wrong and the value is not a hash, it is still quite
 | 
						|
/// unlikely that such values will fit into BypassType.
 | 
						|
///
 | 
						|
/// To detect string hash algorithms like FNV we have to look through PHI-nodes.
 | 
						|
/// It is implemented as a depth-first search for values that look neither long
 | 
						|
/// nor hash-like.
 | 
						|
bool FastDivInsertionTask::isHashLikeValue(Value *V, VisitedSetTy &Visited) {
 | 
						|
  Instruction *I = dyn_cast<Instruction>(V);
 | 
						|
  if (!I)
 | 
						|
    return false;
 | 
						|
 | 
						|
  switch (I->getOpcode()) {
 | 
						|
  case Instruction::Xor:
 | 
						|
    return true;
 | 
						|
  case Instruction::Mul: {
 | 
						|
    // After Constant Hoisting pass, long constants may be represented as
 | 
						|
    // bitcast instructions. As a result, some constants may look like an
 | 
						|
    // instruction at first, and an additional check is necessary to find out if
 | 
						|
    // an operand is actually a constant.
 | 
						|
    Value *Op1 = I->getOperand(1);
 | 
						|
    ConstantInt *C = dyn_cast<ConstantInt>(Op1);
 | 
						|
    if (!C && isa<BitCastInst>(Op1))
 | 
						|
      C = dyn_cast<ConstantInt>(cast<BitCastInst>(Op1)->getOperand(0));
 | 
						|
    return C && C->getValue().getMinSignedBits() > BypassType->getBitWidth();
 | 
						|
  }
 | 
						|
  case Instruction::PHI:
 | 
						|
    // Stop IR traversal in case of a crazy input code. This limits recursion
 | 
						|
    // depth.
 | 
						|
    if (Visited.size() >= 16)
 | 
						|
      return false;
 | 
						|
    // Do not visit nodes that have been visited already. We return true because
 | 
						|
    // it means that we couldn't find any value that doesn't look hash-like.
 | 
						|
    if (!Visited.insert(I).second)
 | 
						|
      return true;
 | 
						|
    return llvm::all_of(cast<PHINode>(I)->incoming_values(), [&](Value *V) {
 | 
						|
      // Ignore undef values as they probably don't affect the division
 | 
						|
      // operands.
 | 
						|
      return getValueRange(V, Visited) == VALRNG_LIKELY_LONG ||
 | 
						|
             isa<UndefValue>(V);
 | 
						|
    });
 | 
						|
  default:
 | 
						|
    return false;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
/// Check if an integer value fits into our bypass type.
 | 
						|
ValueRange FastDivInsertionTask::getValueRange(Value *V,
 | 
						|
                                               VisitedSetTy &Visited) {
 | 
						|
  unsigned ShortLen = BypassType->getBitWidth();
 | 
						|
  unsigned LongLen = V->getType()->getIntegerBitWidth();
 | 
						|
 | 
						|
  assert(LongLen > ShortLen && "Value type must be wider than BypassType");
 | 
						|
  unsigned HiBits = LongLen - ShortLen;
 | 
						|
 | 
						|
  const DataLayout &DL = SlowDivOrRem->getModule()->getDataLayout();
 | 
						|
  KnownBits Known(LongLen);
 | 
						|
 | 
						|
  computeKnownBits(V, Known, DL);
 | 
						|
 | 
						|
  if (Known.countMinLeadingZeros() >= HiBits)
 | 
						|
    return VALRNG_KNOWN_SHORT;
 | 
						|
 | 
						|
  if (Known.countMaxLeadingZeros() < HiBits)
 | 
						|
    return VALRNG_LIKELY_LONG;
 | 
						|
 | 
						|
  // Long integer divisions are often used in hashtable implementations. It's
 | 
						|
  // not worth bypassing such divisions because hash values are extremely
 | 
						|
  // unlikely to have enough leading zeros. The call below tries to detect
 | 
						|
  // values that are unlikely to fit BypassType (including hashes).
 | 
						|
  if (isHashLikeValue(V, Visited))
 | 
						|
    return VALRNG_LIKELY_LONG;
 | 
						|
 | 
						|
  return VALRNG_UNKNOWN;
 | 
						|
}
 | 
						|
 | 
						|
/// Add new basic block for slow div and rem operations and put it before
 | 
						|
/// SuccessorBB.
 | 
						|
QuotRemWithBB FastDivInsertionTask::createSlowBB(BasicBlock *SuccessorBB) {
 | 
						|
  QuotRemWithBB DivRemPair;
 | 
						|
  DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
 | 
						|
                                     MainBB->getParent(), SuccessorBB);
 | 
						|
  IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
 | 
						|
  Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
 | 
						|
 | 
						|
  Value *Dividend = SlowDivOrRem->getOperand(0);
 | 
						|
  Value *Divisor = SlowDivOrRem->getOperand(1);
 | 
						|
 | 
						|
  if (isSignedOp()) {
 | 
						|
    DivRemPair.Quotient = Builder.CreateSDiv(Dividend, Divisor);
 | 
						|
    DivRemPair.Remainder = Builder.CreateSRem(Dividend, Divisor);
 | 
						|
  } else {
 | 
						|
    DivRemPair.Quotient = Builder.CreateUDiv(Dividend, Divisor);
 | 
						|
    DivRemPair.Remainder = Builder.CreateURem(Dividend, Divisor);
 | 
						|
  }
 | 
						|
 | 
						|
  Builder.CreateBr(SuccessorBB);
 | 
						|
  return DivRemPair;
 | 
						|
}
 | 
						|
 | 
						|
/// Add new basic block for fast div and rem operations and put it before
 | 
						|
/// SuccessorBB.
 | 
						|
QuotRemWithBB FastDivInsertionTask::createFastBB(BasicBlock *SuccessorBB) {
 | 
						|
  QuotRemWithBB DivRemPair;
 | 
						|
  DivRemPair.BB = BasicBlock::Create(MainBB->getParent()->getContext(), "",
 | 
						|
                                     MainBB->getParent(), SuccessorBB);
 | 
						|
  IRBuilder<> Builder(DivRemPair.BB, DivRemPair.BB->begin());
 | 
						|
  Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
 | 
						|
 | 
						|
  Value *Dividend = SlowDivOrRem->getOperand(0);
 | 
						|
  Value *Divisor = SlowDivOrRem->getOperand(1);
 | 
						|
  Value *ShortDivisorV =
 | 
						|
      Builder.CreateCast(Instruction::Trunc, Divisor, BypassType);
 | 
						|
  Value *ShortDividendV =
 | 
						|
      Builder.CreateCast(Instruction::Trunc, Dividend, BypassType);
 | 
						|
 | 
						|
  // udiv/urem because this optimization only handles positive numbers.
 | 
						|
  Value *ShortQV = Builder.CreateUDiv(ShortDividendV, ShortDivisorV);
 | 
						|
  Value *ShortRV = Builder.CreateURem(ShortDividendV, ShortDivisorV);
 | 
						|
  DivRemPair.Quotient =
 | 
						|
      Builder.CreateCast(Instruction::ZExt, ShortQV, getSlowType());
 | 
						|
  DivRemPair.Remainder =
 | 
						|
      Builder.CreateCast(Instruction::ZExt, ShortRV, getSlowType());
 | 
						|
  Builder.CreateBr(SuccessorBB);
 | 
						|
 | 
						|
  return DivRemPair;
 | 
						|
}
 | 
						|
 | 
						|
/// Creates Phi nodes for result of Div and Rem.
 | 
						|
QuotRemPair FastDivInsertionTask::createDivRemPhiNodes(QuotRemWithBB &LHS,
 | 
						|
                                                       QuotRemWithBB &RHS,
 | 
						|
                                                       BasicBlock *PhiBB) {
 | 
						|
  IRBuilder<> Builder(PhiBB, PhiBB->begin());
 | 
						|
  Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
 | 
						|
  PHINode *QuoPhi = Builder.CreatePHI(getSlowType(), 2);
 | 
						|
  QuoPhi->addIncoming(LHS.Quotient, LHS.BB);
 | 
						|
  QuoPhi->addIncoming(RHS.Quotient, RHS.BB);
 | 
						|
  PHINode *RemPhi = Builder.CreatePHI(getSlowType(), 2);
 | 
						|
  RemPhi->addIncoming(LHS.Remainder, LHS.BB);
 | 
						|
  RemPhi->addIncoming(RHS.Remainder, RHS.BB);
 | 
						|
  return QuotRemPair(QuoPhi, RemPhi);
 | 
						|
}
 | 
						|
 | 
						|
/// Creates a runtime check to test whether both the divisor and dividend fit
 | 
						|
/// into BypassType. The check is inserted at the end of MainBB. True return
 | 
						|
/// value means that the operands fit. Either of the operands may be NULL if it
 | 
						|
/// doesn't need a runtime check.
 | 
						|
Value *FastDivInsertionTask::insertOperandRuntimeCheck(Value *Op1, Value *Op2) {
 | 
						|
  assert((Op1 || Op2) && "Nothing to check");
 | 
						|
  IRBuilder<> Builder(MainBB, MainBB->end());
 | 
						|
  Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
 | 
						|
 | 
						|
  Value *OrV;
 | 
						|
  if (Op1 && Op2)
 | 
						|
    OrV = Builder.CreateOr(Op1, Op2);
 | 
						|
  else
 | 
						|
    OrV = Op1 ? Op1 : Op2;
 | 
						|
 | 
						|
  // BitMask is inverted to check if the operands are
 | 
						|
  // larger than the bypass type
 | 
						|
  uint64_t BitMask = ~BypassType->getBitMask();
 | 
						|
  Value *AndV = Builder.CreateAnd(OrV, BitMask);
 | 
						|
 | 
						|
  // Compare operand values
 | 
						|
  Value *ZeroV = ConstantInt::getSigned(getSlowType(), 0);
 | 
						|
  return Builder.CreateICmpEQ(AndV, ZeroV);
 | 
						|
}
 | 
						|
 | 
						|
/// Substitutes the div/rem instruction with code that checks the value of the
 | 
						|
/// operands and uses a shorter-faster div/rem instruction when possible.
 | 
						|
Optional<QuotRemPair> FastDivInsertionTask::insertFastDivAndRem() {
 | 
						|
  Value *Dividend = SlowDivOrRem->getOperand(0);
 | 
						|
  Value *Divisor = SlowDivOrRem->getOperand(1);
 | 
						|
 | 
						|
  VisitedSetTy SetL;
 | 
						|
  ValueRange DividendRange = getValueRange(Dividend, SetL);
 | 
						|
  if (DividendRange == VALRNG_LIKELY_LONG)
 | 
						|
    return None;
 | 
						|
 | 
						|
  VisitedSetTy SetR;
 | 
						|
  ValueRange DivisorRange = getValueRange(Divisor, SetR);
 | 
						|
  if (DivisorRange == VALRNG_LIKELY_LONG)
 | 
						|
    return None;
 | 
						|
 | 
						|
  bool DividendShort = (DividendRange == VALRNG_KNOWN_SHORT);
 | 
						|
  bool DivisorShort = (DivisorRange == VALRNG_KNOWN_SHORT);
 | 
						|
 | 
						|
  if (DividendShort && DivisorShort) {
 | 
						|
    // If both operands are known to be short then just replace the long
 | 
						|
    // division with a short one in-place.  Since we're not introducing control
 | 
						|
    // flow in this case, narrowing the division is always a win, even if the
 | 
						|
    // divisor is a constant (and will later get replaced by a multiplication).
 | 
						|
 | 
						|
    IRBuilder<> Builder(SlowDivOrRem);
 | 
						|
    Value *TruncDividend = Builder.CreateTrunc(Dividend, BypassType);
 | 
						|
    Value *TruncDivisor = Builder.CreateTrunc(Divisor, BypassType);
 | 
						|
    Value *TruncDiv = Builder.CreateUDiv(TruncDividend, TruncDivisor);
 | 
						|
    Value *TruncRem = Builder.CreateURem(TruncDividend, TruncDivisor);
 | 
						|
    Value *ExtDiv = Builder.CreateZExt(TruncDiv, getSlowType());
 | 
						|
    Value *ExtRem = Builder.CreateZExt(TruncRem, getSlowType());
 | 
						|
    return QuotRemPair(ExtDiv, ExtRem);
 | 
						|
  }
 | 
						|
 | 
						|
  if (isa<ConstantInt>(Divisor)) {
 | 
						|
    // If the divisor is not a constant, DAGCombiner will convert it to a
 | 
						|
    // multiplication by a magic constant.  It isn't clear if it is worth
 | 
						|
    // introducing control flow to get a narrower multiply.
 | 
						|
    return None;
 | 
						|
  }
 | 
						|
 | 
						|
  // After Constant Hoisting pass, long constants may be represented as
 | 
						|
  // bitcast instructions. As a result, some constants may look like an
 | 
						|
  // instruction at first, and an additional check is necessary to find out if
 | 
						|
  // an operand is actually a constant.
 | 
						|
  if (auto *BCI = dyn_cast<BitCastInst>(Divisor))
 | 
						|
    if (BCI->getParent() == SlowDivOrRem->getParent() &&
 | 
						|
        isa<ConstantInt>(BCI->getOperand(0)))
 | 
						|
      return None;
 | 
						|
 | 
						|
  IRBuilder<> Builder(MainBB, MainBB->end());
 | 
						|
  Builder.SetCurrentDebugLocation(SlowDivOrRem->getDebugLoc());
 | 
						|
 | 
						|
  if (DividendShort && !isSignedOp()) {
 | 
						|
    // If the division is unsigned and Dividend is known to be short, then
 | 
						|
    // either
 | 
						|
    // 1) Divisor is less or equal to Dividend, and the result can be computed
 | 
						|
    //    with a short division.
 | 
						|
    // 2) Divisor is greater than Dividend. In this case, no division is needed
 | 
						|
    //    at all: The quotient is 0 and the remainder is equal to Dividend.
 | 
						|
    //
 | 
						|
    // So instead of checking at runtime whether Divisor fits into BypassType,
 | 
						|
    // we emit a runtime check to differentiate between these two cases. This
 | 
						|
    // lets us entirely avoid a long div.
 | 
						|
 | 
						|
    // Split the basic block before the div/rem.
 | 
						|
    BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
 | 
						|
    // Remove the unconditional branch from MainBB to SuccessorBB.
 | 
						|
    MainBB->getInstList().back().eraseFromParent();
 | 
						|
    QuotRemWithBB Long;
 | 
						|
    Long.BB = MainBB;
 | 
						|
    Long.Quotient = ConstantInt::get(getSlowType(), 0);
 | 
						|
    Long.Remainder = Dividend;
 | 
						|
    QuotRemWithBB Fast = createFastBB(SuccessorBB);
 | 
						|
    QuotRemPair Result = createDivRemPhiNodes(Fast, Long, SuccessorBB);
 | 
						|
    Value *CmpV = Builder.CreateICmpUGE(Dividend, Divisor);
 | 
						|
    Builder.CreateCondBr(CmpV, Fast.BB, SuccessorBB);
 | 
						|
    return Result;
 | 
						|
  } else {
 | 
						|
    // General case. Create both slow and fast div/rem pairs and choose one of
 | 
						|
    // them at runtime.
 | 
						|
 | 
						|
    // Split the basic block before the div/rem.
 | 
						|
    BasicBlock *SuccessorBB = MainBB->splitBasicBlock(SlowDivOrRem);
 | 
						|
    // Remove the unconditional branch from MainBB to SuccessorBB.
 | 
						|
    MainBB->getInstList().back().eraseFromParent();
 | 
						|
    QuotRemWithBB Fast = createFastBB(SuccessorBB);
 | 
						|
    QuotRemWithBB Slow = createSlowBB(SuccessorBB);
 | 
						|
    QuotRemPair Result = createDivRemPhiNodes(Fast, Slow, SuccessorBB);
 | 
						|
    Value *CmpV = insertOperandRuntimeCheck(DividendShort ? nullptr : Dividend,
 | 
						|
                                            DivisorShort ? nullptr : Divisor);
 | 
						|
    Builder.CreateCondBr(CmpV, Fast.BB, Slow.BB);
 | 
						|
    return Result;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
/// This optimization identifies DIV/REM instructions in a BB that can be
 | 
						|
/// profitably bypassed and carried out with a shorter, faster divide.
 | 
						|
bool llvm::bypassSlowDivision(BasicBlock *BB,
 | 
						|
                              const BypassWidthsTy &BypassWidths) {
 | 
						|
  DivCacheTy PerBBDivCache;
 | 
						|
 | 
						|
  bool MadeChange = false;
 | 
						|
  Instruction *Next = &*BB->begin();
 | 
						|
  while (Next != nullptr) {
 | 
						|
    // We may add instructions immediately after I, but we want to skip over
 | 
						|
    // them.
 | 
						|
    Instruction *I = Next;
 | 
						|
    Next = Next->getNextNode();
 | 
						|
 | 
						|
    // Ignore dead code to save time and avoid bugs.
 | 
						|
    if (I->hasNUses(0))
 | 
						|
      continue;
 | 
						|
 | 
						|
    FastDivInsertionTask Task(I, BypassWidths);
 | 
						|
    if (Value *Replacement = Task.getReplacement(PerBBDivCache)) {
 | 
						|
      I->replaceAllUsesWith(Replacement);
 | 
						|
      I->eraseFromParent();
 | 
						|
      MadeChange = true;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  // Above we eagerly create divs and rems, as pairs, so that we can efficiently
 | 
						|
  // create divrem machine instructions.  Now erase any unused divs / rems so we
 | 
						|
  // don't leave extra instructions sitting around.
 | 
						|
  for (auto &KV : PerBBDivCache)
 | 
						|
    for (Value *V : {KV.second.Quotient, KV.second.Remainder})
 | 
						|
      RecursivelyDeleteTriviallyDeadInstructions(V);
 | 
						|
 | 
						|
  return MadeChange;
 | 
						|
}
 |