853 lines
		
	
	
		
			28 KiB
		
	
	
	
		
			C++
		
	
	
	
			
		
		
	
	
			853 lines
		
	
	
		
			28 KiB
		
	
	
	
		
			C++
		
	
	
	
//===- VecUtils.cpp --- Vectorization Utilities ---------------------------===//
 | 
						|
//
 | 
						|
//                     The LLVM Compiler Infrastructure
 | 
						|
//
 | 
						|
// This file is distributed under the University of Illinois Open Source
 | 
						|
// License. See LICENSE.TXT for details.
 | 
						|
//
 | 
						|
//===----------------------------------------------------------------------===//
 | 
						|
#define DEBUG_TYPE "SLP"
 | 
						|
 | 
						|
#include "VecUtils.h"
 | 
						|
#include "llvm/ADT/DenseMap.h"
 | 
						|
#include "llvm/ADT/SmallPtrSet.h"
 | 
						|
#include "llvm/ADT/SmallSet.h"
 | 
						|
#include "llvm/ADT/SmallVector.h"
 | 
						|
#include "llvm/Analysis/AliasAnalysis.h"
 | 
						|
#include "llvm/Analysis/ScalarEvolution.h"
 | 
						|
#include "llvm/Analysis/ScalarEvolutionExpressions.h"
 | 
						|
#include "llvm/Analysis/TargetTransformInfo.h"
 | 
						|
#include "llvm/Analysis/Verifier.h"
 | 
						|
#include "llvm/Analysis/LoopInfo.h"
 | 
						|
#include "llvm/IR/Constants.h"
 | 
						|
#include "llvm/IR/DataLayout.h"
 | 
						|
#include "llvm/IR/Function.h"
 | 
						|
#include "llvm/IR/Instructions.h"
 | 
						|
#include "llvm/IR/Module.h"
 | 
						|
#include "llvm/IR/Type.h"
 | 
						|
#include "llvm/IR/Value.h"
 | 
						|
#include "llvm/Pass.h"
 | 
						|
#include "llvm/Support/CommandLine.h"
 | 
						|
#include "llvm/Support/Debug.h"
 | 
						|
#include "llvm/Support/raw_ostream.h"
 | 
						|
#include "llvm/Target/TargetLibraryInfo.h"
 | 
						|
#include "llvm/Transforms/Scalar.h"
 | 
						|
#include "llvm/Transforms/Utils/Local.h"
 | 
						|
#include <algorithm>
 | 
						|
#include <map>
 | 
						|
 | 
						|
using namespace llvm;
 | 
						|
 | 
						|
static const unsigned MinVecRegSize = 128;
 | 
						|
 | 
						|
static const unsigned RecursionMaxDepth = 6;
 | 
						|
 | 
						|
namespace llvm {
 | 
						|
 | 
						|
BoUpSLP::BoUpSLP(BasicBlock *Bb, ScalarEvolution *S, DataLayout *Dl,
 | 
						|
                 TargetTransformInfo *Tti, AliasAnalysis *Aa, Loop *Lp) :
 | 
						|
  Builder(S->getContext()), BB(Bb), SE(S), DL(Dl), TTI(Tti), AA(Aa), L(Lp) {
 | 
						|
  numberInstructions();
 | 
						|
}
 | 
						|
 | 
						|
void BoUpSLP::numberInstructions() {
 | 
						|
  int Loc = 0;
 | 
						|
  InstrIdx.clear();
 | 
						|
  InstrVec.clear();
 | 
						|
  // Number the instructions in the block.
 | 
						|
  for (BasicBlock::iterator it=BB->begin(), e=BB->end(); it != e; ++it) {
 | 
						|
    InstrIdx[it] = Loc++;
 | 
						|
    InstrVec.push_back(it);
 | 
						|
    assert(InstrVec[InstrIdx[it]] == it && "Invalid allocation");
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
Value *BoUpSLP::getPointerOperand(Value *I) {
 | 
						|
  if (LoadInst *LI = dyn_cast<LoadInst>(I)) return LI->getPointerOperand();
 | 
						|
  if (StoreInst *SI = dyn_cast<StoreInst>(I)) return SI->getPointerOperand();
 | 
						|
  return 0;
 | 
						|
}
 | 
						|
 | 
						|
unsigned BoUpSLP::getAddressSpaceOperand(Value *I) {
 | 
						|
  if (LoadInst *L=dyn_cast<LoadInst>(I)) return L->getPointerAddressSpace();
 | 
						|
  if (StoreInst *S=dyn_cast<StoreInst>(I)) return S->getPointerAddressSpace();
 | 
						|
  return -1;
 | 
						|
}
 | 
						|
 | 
						|
bool BoUpSLP::isConsecutiveAccess(Value *A, Value *B) {
 | 
						|
  Value *PtrA = getPointerOperand(A);
 | 
						|
  Value *PtrB = getPointerOperand(B);
 | 
						|
  unsigned ASA = getAddressSpaceOperand(A);
 | 
						|
  unsigned ASB = getAddressSpaceOperand(B);
 | 
						|
 | 
						|
  // Check that the address spaces match and that the pointers are valid.
 | 
						|
  if (!PtrA || !PtrB || (ASA != ASB)) return false;
 | 
						|
 | 
						|
  // Check that A and B are of the same type.
 | 
						|
  if (PtrA->getType() != PtrB->getType()) return false;
 | 
						|
 | 
						|
  // Calculate the distance.
 | 
						|
  const SCEV *PtrSCEVA = SE->getSCEV(PtrA);
 | 
						|
  const SCEV *PtrSCEVB = SE->getSCEV(PtrB);
 | 
						|
  const SCEV *OffsetSCEV = SE->getMinusSCEV(PtrSCEVA, PtrSCEVB);
 | 
						|
  const SCEVConstant *ConstOffSCEV = dyn_cast<SCEVConstant>(OffsetSCEV);
 | 
						|
 | 
						|
  // Non constant distance.
 | 
						|
  if (!ConstOffSCEV) return false;
 | 
						|
 | 
						|
  int64_t Offset = ConstOffSCEV->getValue()->getSExtValue();
 | 
						|
  Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
 | 
						|
  // The Instructions are connsecutive if the size of the first load/store is
 | 
						|
  // the same as the offset.
 | 
						|
  int64_t Sz = DL->getTypeStoreSize(Ty);
 | 
						|
  return ((-Offset) == Sz);
 | 
						|
}
 | 
						|
 | 
						|
bool BoUpSLP::vectorizeStoreChain(ArrayRef<Value *> Chain, int CostThreshold) {
 | 
						|
  Type *StoreTy = cast<StoreInst>(Chain[0])->getValueOperand()->getType();
 | 
						|
  unsigned Sz = DL->getTypeSizeInBits(StoreTy);
 | 
						|
  unsigned VF = MinVecRegSize / Sz;
 | 
						|
 | 
						|
  if (!isPowerOf2_32(Sz) || VF < 2) return false;
 | 
						|
 | 
						|
  bool Changed = false;
 | 
						|
  // Look for profitable vectorizable trees at all offsets, starting at zero.
 | 
						|
  for (unsigned i = 0, e = Chain.size(); i < e; ++i) {
 | 
						|
    if (i + VF > e) return Changed;
 | 
						|
    DEBUG(dbgs()<<"SLP: Analyzing " << VF << " stores at offset "<< i << "\n");
 | 
						|
    ArrayRef<Value *> Operands = Chain.slice(i, VF);
 | 
						|
 | 
						|
    int Cost = getTreeCost(Operands);
 | 
						|
    DEBUG(dbgs() << "SLP: Found cost=" << Cost << " for VF=" << VF << "\n");
 | 
						|
    if (Cost < CostThreshold) {
 | 
						|
      DEBUG(dbgs() << "SLP: Decided to vectorize cost=" << Cost << "\n");
 | 
						|
      Builder.SetInsertPoint(getInsertionPoint(getLastIndex(Operands,VF)));
 | 
						|
      vectorizeTree(Operands, VF);
 | 
						|
      i += VF - 1;
 | 
						|
      Changed = true;
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  return Changed;
 | 
						|
}
 | 
						|
 | 
						|
bool BoUpSLP::vectorizeStores(ArrayRef<StoreInst *> Stores, int costThreshold) {
 | 
						|
  SetVector<Value*> Heads, Tails;
 | 
						|
  SmallDenseMap<Value*, Value*> ConsecutiveChain;
 | 
						|
 | 
						|
  // We may run into multiple chains that merge into a single chain. We mark the
 | 
						|
  // stores that we vectorized so that we don't visit the same store twice.
 | 
						|
  ValueSet VectorizedStores;
 | 
						|
  bool Changed = false;
 | 
						|
 | 
						|
  // Do a quadratic search on all of the given stores and find
 | 
						|
  // all of the pairs of loads that follow each other.
 | 
						|
  for (unsigned i = 0, e = Stores.size(); i < e; ++i)
 | 
						|
    for (unsigned j = 0; j < e; ++j) {
 | 
						|
      if (i == j) continue;
 | 
						|
      if (isConsecutiveAccess(Stores[i], Stores[j])) {
 | 
						|
        Tails.insert(Stores[j]);
 | 
						|
        Heads.insert(Stores[i]);
 | 
						|
        ConsecutiveChain[Stores[i]] = Stores[j];
 | 
						|
      }
 | 
						|
    }
 | 
						|
 | 
						|
  // For stores that start but don't end a link in the chain:
 | 
						|
  for (SetVector<Value*>::iterator it = Heads.begin(), e = Heads.end();
 | 
						|
       it != e; ++it) {
 | 
						|
    if (Tails.count(*it)) continue;
 | 
						|
 | 
						|
    // We found a store instr that starts a chain. Now follow the chain and try
 | 
						|
    // to vectorize it.
 | 
						|
    ValueList Operands;
 | 
						|
    Value *I = *it;
 | 
						|
    // Collect the chain into a list.
 | 
						|
    while (Tails.count(I) || Heads.count(I)) {
 | 
						|
      if (VectorizedStores.count(I)) break;
 | 
						|
      Operands.push_back(I);
 | 
						|
      // Move to the next value in the chain.
 | 
						|
      I = ConsecutiveChain[I];
 | 
						|
    }
 | 
						|
 | 
						|
    bool Vectorized = vectorizeStoreChain(Operands, costThreshold);
 | 
						|
 | 
						|
    // Mark the vectorized stores so that we don't vectorize them again.
 | 
						|
    if (Vectorized)
 | 
						|
      VectorizedStores.insert(Operands.begin(), Operands.end());
 | 
						|
    Changed |= Vectorized;
 | 
						|
  }
 | 
						|
 | 
						|
  return Changed;
 | 
						|
}
 | 
						|
 | 
						|
int BoUpSLP::getScalarizationCost(ArrayRef<Value *> VL) {
 | 
						|
  // Find the type of the operands in VL.
 | 
						|
  Type *ScalarTy = VL[0]->getType();
 | 
						|
  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
 | 
						|
    ScalarTy = SI->getValueOperand()->getType();
 | 
						|
  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
 | 
						|
  // Find the cost of inserting/extracting values from the vector.
 | 
						|
  return getScalarizationCost(VecTy);
 | 
						|
}
 | 
						|
 | 
						|
int BoUpSLP::getScalarizationCost(Type *Ty) {
 | 
						|
  int Cost = 0;
 | 
						|
  for (unsigned i = 0, e = cast<VectorType>(Ty)->getNumElements(); i < e; ++i)
 | 
						|
    Cost += TTI->getVectorInstrCost(Instruction::InsertElement, Ty, i);
 | 
						|
  return Cost;
 | 
						|
}
 | 
						|
 | 
						|
AliasAnalysis::Location BoUpSLP::getLocation(Instruction *I) {
 | 
						|
  if (StoreInst *SI = dyn_cast<StoreInst>(I)) return AA->getLocation(SI);
 | 
						|
  if (LoadInst *LI = dyn_cast<LoadInst>(I)) return AA->getLocation(LI);
 | 
						|
  return AliasAnalysis::Location();
 | 
						|
}
 | 
						|
 | 
						|
Value *BoUpSLP::isUnsafeToSink(Instruction *Src, Instruction *Dst) {
 | 
						|
  assert(Src->getParent() == Dst->getParent() && "Not the same BB");
 | 
						|
  BasicBlock::iterator I = Src, E = Dst;
 | 
						|
  /// Scan all of the instruction from SRC to DST and check if
 | 
						|
  /// the source may alias.
 | 
						|
  for (++I; I != E; ++I) {
 | 
						|
    // Ignore store instructions that are marked as 'ignore'.
 | 
						|
    if (MemBarrierIgnoreList.count(I)) continue;
 | 
						|
    if (Src->mayWriteToMemory()) /* Write */ {
 | 
						|
      if (!I->mayReadOrWriteMemory()) continue;
 | 
						|
    } else /* Read */ {
 | 
						|
      if (!I->mayWriteToMemory()) continue;
 | 
						|
    }
 | 
						|
    AliasAnalysis::Location A = getLocation(&*I);
 | 
						|
    AliasAnalysis::Location B = getLocation(Src);
 | 
						|
 | 
						|
    if (!A.Ptr || !B.Ptr || AA->alias(A, B))
 | 
						|
      return I;
 | 
						|
  }
 | 
						|
  return 0;
 | 
						|
}
 | 
						|
 | 
						|
void BoUpSLP::vectorizeArith(ArrayRef<Value *> Operands) {
 | 
						|
  int LastIdx = getLastIndex(Operands, Operands.size());
 | 
						|
  Instruction *Loc = getInsertionPoint(LastIdx);
 | 
						|
  Builder.SetInsertPoint(Loc);
 | 
						|
 | 
						|
  assert(getFirstUserIndex(Operands, Operands.size()) > LastIdx  &&
 | 
						|
         "Vectorizing with in-tree users");
 | 
						|
 | 
						|
  Value *Vec = vectorizeTree(Operands, Operands.size());
 | 
						|
  // After vectorizing the operands we need to generate extractelement
 | 
						|
  // instructions and replace all of the uses of the scalar values with
 | 
						|
  // the values that we extracted from the vectorized tree.
 | 
						|
  for (unsigned i = 0, e = Operands.size(); i != e; ++i) {
 | 
						|
    Value *S = Builder.CreateExtractElement(Vec, Builder.getInt32(i));
 | 
						|
    Operands[i]->replaceAllUsesWith(S);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
int BoUpSLP::getTreeCost(ArrayRef<Value *> VL) {
 | 
						|
  // Get rid of the list of stores that were removed, and from the
 | 
						|
  // lists of instructions with multiple users.
 | 
						|
  MemBarrierIgnoreList.clear();
 | 
						|
  LaneMap.clear();
 | 
						|
  MultiUserVals.clear();
 | 
						|
  MustScalarize.clear();
 | 
						|
  MustExtract.clear();
 | 
						|
 | 
						|
  // Find the location of the last root.
 | 
						|
  int LastRootIndex = getLastIndex(VL, VL.size());
 | 
						|
  int FirstUserIndex = getFirstUserIndex(VL, VL.size());
 | 
						|
 | 
						|
  // Don't vectorize if there are users of the tree roots inside the tree
 | 
						|
  // itself.
 | 
						|
  if (LastRootIndex > FirstUserIndex)
 | 
						|
    return max_cost;
 | 
						|
 | 
						|
  // Scan the tree and find which value is used by which lane, and which values
 | 
						|
  // must be scalarized.
 | 
						|
  getTreeUses_rec(VL, 0);
 | 
						|
 | 
						|
  // Check that instructions with multiple users can be vectorized. Mark unsafe
 | 
						|
  // instructions.
 | 
						|
  for (SetVector<Value*>::iterator it = MultiUserVals.begin(),
 | 
						|
       e = MultiUserVals.end(); it != e; ++it) {
 | 
						|
    // Check that all of the users of this instr are within the tree
 | 
						|
    // and that they are all from the same lane.
 | 
						|
    int Lane = -1;
 | 
						|
    for (Value::use_iterator I = (*it)->use_begin(), E = (*it)->use_end();
 | 
						|
         I != E; ++I) {
 | 
						|
      if (LaneMap.find(*I) == LaneMap.end()) {
 | 
						|
        DEBUG(dbgs()<<"SLP: Instr " << **it << " has multiple users.\n");
 | 
						|
 | 
						|
        // We don't have an ordering problem if the user is not in this basic
 | 
						|
        // block.
 | 
						|
        Instruction *Inst = cast<Instruction>(*I);
 | 
						|
        if (Inst->getParent() != BB) {
 | 
						|
          MustExtract.insert(*it);
 | 
						|
          continue;
 | 
						|
        }
 | 
						|
 | 
						|
        // We don't have an ordering problem if the user is after the last root.
 | 
						|
        int Idx = InstrIdx[Inst];
 | 
						|
        if (Idx < LastRootIndex) {
 | 
						|
          MustScalarize.insert(*it);
 | 
						|
          DEBUG(dbgs()<<"SLP: Adding to MustScalarize "
 | 
						|
                "because of an unsafe out of tree usage.\n");
 | 
						|
          break;
 | 
						|
        }
 | 
						|
 | 
						|
 | 
						|
        DEBUG(dbgs()<<"SLP: Adding to MustExtract "
 | 
						|
              "because of a safe out of tree usage.\n");
 | 
						|
        MustExtract.insert(*it);
 | 
						|
        continue;
 | 
						|
      }
 | 
						|
      if (Lane == -1) Lane = LaneMap[*I];
 | 
						|
      if (Lane != LaneMap[*I]) {
 | 
						|
        MustScalarize.insert(*it);
 | 
						|
        DEBUG(dbgs()<<"SLP: Adding " << **it <<
 | 
						|
              " to MustScalarize because multiple lane use it: "
 | 
						|
              << Lane << " and " << LaneMap[*I] << ".\n");
 | 
						|
        break;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  // Now calculate the cost of vectorizing the tree.
 | 
						|
  return getTreeCost_rec(VL, 0);
 | 
						|
}
 | 
						|
 | 
						|
void BoUpSLP::getTreeUses_rec(ArrayRef<Value *> VL, unsigned Depth) {
 | 
						|
  if (Depth == RecursionMaxDepth) return;
 | 
						|
 | 
						|
  // Don't handle vectors.
 | 
						|
  if (VL[0]->getType()->isVectorTy()) return;
 | 
						|
  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
 | 
						|
    if (SI->getValueOperand()->getType()->isVectorTy()) return;
 | 
						|
 | 
						|
  // Check if all of the operands are constants.
 | 
						|
  bool AllConst = true;
 | 
						|
  bool AllSameScalar = true;
 | 
						|
  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
 | 
						|
    AllConst &= isa<Constant>(VL[i]);
 | 
						|
    AllSameScalar &= (VL[0] == VL[i]);
 | 
						|
    Instruction *I = dyn_cast<Instruction>(VL[i]);
 | 
						|
    // If one of the instructions is out of this BB, we need to scalarize all.
 | 
						|
    if (I && I->getParent() != BB) return;
 | 
						|
  }
 | 
						|
 | 
						|
  // If all of the operands are identical or constant we have a simple solution.
 | 
						|
  if (AllConst || AllSameScalar) return;
 | 
						|
 | 
						|
  // Scalarize unknown structures.
 | 
						|
  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
 | 
						|
  if (!VL0) return;
 | 
						|
 | 
						|
  unsigned Opcode = VL0->getOpcode();
 | 
						|
  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
 | 
						|
    Instruction *I = dyn_cast<Instruction>(VL[i]);
 | 
						|
    // If not all of the instructions are identical then we have to scalarize.
 | 
						|
    if (!I || Opcode != I->getOpcode()) return;
 | 
						|
  }
 | 
						|
 | 
						|
  for (int i = 0, e = VL.size(); i < e; ++i) {
 | 
						|
    // Check that the instruction is only used within
 | 
						|
    // one lane.
 | 
						|
    if (LaneMap.count(VL[i]) && LaneMap[VL[i]] != i) return;
 | 
						|
    // Make this instruction as 'seen' and remember the lane.
 | 
						|
    LaneMap[VL[i]] = i;
 | 
						|
  }
 | 
						|
 | 
						|
  // Mark instructions with multiple users.
 | 
						|
  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
 | 
						|
    Instruction *I = dyn_cast<Instruction>(VL[i]);
 | 
						|
    // Remember to check if all of the users of this instr are vectorized
 | 
						|
    // within our tree. At depth zero we have no local users, only external
 | 
						|
    // users that we don't care about.
 | 
						|
    if (Depth && I && I->getNumUses() > 1) {
 | 
						|
      DEBUG(dbgs()<<"SLP: Adding to MultiUserVals "
 | 
						|
            "because it has multiple users:" << *I << " \n");
 | 
						|
      MultiUserVals.insert(I);
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  switch (Opcode) {
 | 
						|
    case Instruction::ZExt:
 | 
						|
    case Instruction::SExt:
 | 
						|
    case Instruction::FPToUI:
 | 
						|
    case Instruction::FPToSI:
 | 
						|
    case Instruction::FPExt:
 | 
						|
    case Instruction::PtrToInt:
 | 
						|
    case Instruction::IntToPtr:
 | 
						|
    case Instruction::SIToFP:
 | 
						|
    case Instruction::UIToFP:
 | 
						|
    case Instruction::Trunc:
 | 
						|
    case Instruction::FPTrunc:
 | 
						|
    case Instruction::BitCast:
 | 
						|
    case Instruction::Add:
 | 
						|
    case Instruction::FAdd:
 | 
						|
    case Instruction::Sub:
 | 
						|
    case Instruction::FSub:
 | 
						|
    case Instruction::Mul:
 | 
						|
    case Instruction::FMul:
 | 
						|
    case Instruction::UDiv:
 | 
						|
    case Instruction::SDiv:
 | 
						|
    case Instruction::FDiv:
 | 
						|
    case Instruction::URem:
 | 
						|
    case Instruction::SRem:
 | 
						|
    case Instruction::FRem:
 | 
						|
    case Instruction::Shl:
 | 
						|
    case Instruction::LShr:
 | 
						|
    case Instruction::AShr:
 | 
						|
    case Instruction::And:
 | 
						|
    case Instruction::Or:
 | 
						|
    case Instruction::Xor: {
 | 
						|
      for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
 | 
						|
        ValueList Operands;
 | 
						|
        // Prepare the operand vector.
 | 
						|
        for (unsigned j = 0; j < VL.size(); ++j)
 | 
						|
          Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
 | 
						|
 | 
						|
        getTreeUses_rec(Operands, Depth+1);
 | 
						|
      }
 | 
						|
      return;
 | 
						|
    }
 | 
						|
    case Instruction::Store: {
 | 
						|
      ValueList Operands;
 | 
						|
      for (unsigned j = 0; j < VL.size(); ++j)
 | 
						|
        Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
 | 
						|
      getTreeUses_rec(Operands, Depth+1);
 | 
						|
      return;
 | 
						|
    }
 | 
						|
    default:
 | 
						|
    return;
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
int BoUpSLP::getTreeCost_rec(ArrayRef<Value *> VL, unsigned Depth) {
 | 
						|
  Type *ScalarTy = VL[0]->getType();
 | 
						|
 | 
						|
  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
 | 
						|
    ScalarTy = SI->getValueOperand()->getType();
 | 
						|
 | 
						|
  /// Don't mess with vectors.
 | 
						|
  if (ScalarTy->isVectorTy()) return max_cost;
 | 
						|
  VectorType *VecTy = VectorType::get(ScalarTy, VL.size());
 | 
						|
 | 
						|
  if (Depth == RecursionMaxDepth) return getScalarizationCost(VecTy);
 | 
						|
 | 
						|
  // Check if all of the operands are constants.
 | 
						|
  bool AllConst = true;
 | 
						|
  bool AllSameScalar = true;
 | 
						|
  bool MustScalarizeFlag = false;
 | 
						|
  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
 | 
						|
    AllConst &= isa<Constant>(VL[i]);
 | 
						|
    AllSameScalar &= (VL[0] == VL[i]);
 | 
						|
    // Must have a single use.
 | 
						|
    Instruction *I = dyn_cast<Instruction>(VL[i]);
 | 
						|
    MustScalarizeFlag |= MustScalarize.count(VL[i]);
 | 
						|
    // This instruction is outside the basic block.
 | 
						|
    if (I && I->getParent() != BB)
 | 
						|
      return getScalarizationCost(VecTy);
 | 
						|
  }
 | 
						|
 | 
						|
  // Is this a simple vector constant.
 | 
						|
  if (AllConst) return 0;
 | 
						|
 | 
						|
  // If all of the operands are identical we can broadcast them.
 | 
						|
  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
 | 
						|
  if (AllSameScalar) {
 | 
						|
    // If we are in a loop, and this is not an instruction (e.g. constant or
 | 
						|
    // argument) or the instruction is defined outside the loop then assume
 | 
						|
    // that the cost is zero.
 | 
						|
    if (L && (!VL0 || !L->contains(VL0)))
 | 
						|
      return 0;
 | 
						|
 | 
						|
    // We need to broadcast the scalar.
 | 
						|
    return TTI->getShuffleCost(TargetTransformInfo::SK_Broadcast, VecTy, 0);
 | 
						|
  }
 | 
						|
 | 
						|
  // If this is not a constant, or a scalar from outside the loop then we
 | 
						|
  // need to scalarize it.
 | 
						|
  if (MustScalarizeFlag)
 | 
						|
    return getScalarizationCost(VecTy);
 | 
						|
 | 
						|
  if (!VL0) return getScalarizationCost(VecTy);
 | 
						|
  assert(VL0->getParent() == BB && "Wrong BB");
 | 
						|
 | 
						|
  unsigned Opcode = VL0->getOpcode();
 | 
						|
  for (unsigned i = 0, e = VL.size(); i < e; ++i) {
 | 
						|
    Instruction *I = dyn_cast<Instruction>(VL[i]);
 | 
						|
    // If not all of the instructions are identical then we have to scalarize.
 | 
						|
    if (!I || Opcode != I->getOpcode()) return getScalarizationCost(VecTy);
 | 
						|
  }
 | 
						|
 | 
						|
  // Check if it is safe to sink the loads or the stores.
 | 
						|
  if (Opcode == Instruction::Load || Opcode == Instruction::Store) {
 | 
						|
    int MaxIdx = getLastIndex(VL, VL.size());
 | 
						|
    Instruction *Last = InstrVec[MaxIdx];
 | 
						|
 | 
						|
    for (unsigned i = 0, e = VL.size(); i < e; ++i ) {
 | 
						|
      if (VL[i] == Last) continue;
 | 
						|
      Value *Barrier = isUnsafeToSink(cast<Instruction>(VL[i]), Last);
 | 
						|
      if (Barrier) {
 | 
						|
        DEBUG(dbgs() << "SLP: Can't sink " << *VL[i] << "\n down to " <<
 | 
						|
              *Last << "\n because of " << *Barrier << "\n");
 | 
						|
        return max_cost;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
 | 
						|
  // Calculate the extract cost.
 | 
						|
  unsigned ExternalUserExtractCost = 0;
 | 
						|
  for (unsigned i = 0, e = VL.size(); i < e; ++i)
 | 
						|
    if (MustExtract.count(VL[i]))
 | 
						|
      ExternalUserExtractCost +=
 | 
						|
        TTI->getVectorInstrCost(Instruction::ExtractElement, VecTy, i);
 | 
						|
 | 
						|
  switch (Opcode) {
 | 
						|
  case Instruction::ZExt:
 | 
						|
  case Instruction::SExt:
 | 
						|
  case Instruction::FPToUI:
 | 
						|
  case Instruction::FPToSI:
 | 
						|
  case Instruction::FPExt:
 | 
						|
  case Instruction::PtrToInt:
 | 
						|
  case Instruction::IntToPtr:
 | 
						|
  case Instruction::SIToFP:
 | 
						|
  case Instruction::UIToFP:
 | 
						|
  case Instruction::Trunc:
 | 
						|
  case Instruction::FPTrunc:
 | 
						|
  case Instruction::BitCast: {
 | 
						|
    int Cost = ExternalUserExtractCost;
 | 
						|
    ValueList Operands;
 | 
						|
    Type *SrcTy = VL0->getOperand(0)->getType();
 | 
						|
    // Prepare the operand vector.
 | 
						|
    for (unsigned j = 0; j < VL.size(); ++j) {
 | 
						|
      Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
 | 
						|
      // Check that the casted type is the same for all users.
 | 
						|
      if (cast<Instruction>(VL[j])->getOperand(0)->getType() != SrcTy)
 | 
						|
        return getScalarizationCost(VecTy);
 | 
						|
    }
 | 
						|
 | 
						|
    Cost += getTreeCost_rec(Operands, Depth+1);
 | 
						|
    if (Cost >= max_cost) return max_cost;
 | 
						|
 | 
						|
    // Calculate the cost of this instruction.
 | 
						|
    int ScalarCost = VL.size() * TTI->getCastInstrCost(VL0->getOpcode(),
 | 
						|
                                                       VL0->getType(), SrcTy);
 | 
						|
 | 
						|
    VectorType *SrcVecTy = VectorType::get(SrcTy, VL.size());
 | 
						|
    int VecCost = TTI->getCastInstrCost(VL0->getOpcode(), VecTy, SrcVecTy);
 | 
						|
    Cost += (VecCost - ScalarCost);
 | 
						|
    return Cost;
 | 
						|
  }
 | 
						|
  case Instruction::Add:
 | 
						|
  case Instruction::FAdd:
 | 
						|
  case Instruction::Sub:
 | 
						|
  case Instruction::FSub:
 | 
						|
  case Instruction::Mul:
 | 
						|
  case Instruction::FMul:
 | 
						|
  case Instruction::UDiv:
 | 
						|
  case Instruction::SDiv:
 | 
						|
  case Instruction::FDiv:
 | 
						|
  case Instruction::URem:
 | 
						|
  case Instruction::SRem:
 | 
						|
  case Instruction::FRem:
 | 
						|
  case Instruction::Shl:
 | 
						|
  case Instruction::LShr:
 | 
						|
  case Instruction::AShr:
 | 
						|
  case Instruction::And:
 | 
						|
  case Instruction::Or:
 | 
						|
  case Instruction::Xor: {
 | 
						|
    int Cost = ExternalUserExtractCost;
 | 
						|
    // Calculate the cost of all of the operands.
 | 
						|
    for (unsigned i = 0, e = VL0->getNumOperands(); i < e; ++i) {
 | 
						|
      ValueList Operands;
 | 
						|
      // Prepare the operand vector.
 | 
						|
      for (unsigned j = 0; j < VL.size(); ++j)
 | 
						|
        Operands.push_back(cast<Instruction>(VL[j])->getOperand(i));
 | 
						|
 | 
						|
      Cost += getTreeCost_rec(Operands, Depth+1);
 | 
						|
      if (Cost >= max_cost) return max_cost;
 | 
						|
    }
 | 
						|
 | 
						|
    // Calculate the cost of this instruction.
 | 
						|
    int ScalarCost = VecTy->getNumElements() *
 | 
						|
      TTI->getArithmeticInstrCost(Opcode, ScalarTy);
 | 
						|
 | 
						|
    int VecCost = TTI->getArithmeticInstrCost(Opcode, VecTy);
 | 
						|
    Cost += (VecCost - ScalarCost);
 | 
						|
    return Cost;
 | 
						|
  }
 | 
						|
  case Instruction::Load: {
 | 
						|
    // If we are scalarize the loads, add the cost of forming the vector.
 | 
						|
    for (unsigned i = 0, e = VL.size()-1; i < e; ++i)
 | 
						|
      if (!isConsecutiveAccess(VL[i], VL[i+1]))
 | 
						|
        return getScalarizationCost(VecTy);
 | 
						|
 | 
						|
    // Cost of wide load - cost of scalar loads.
 | 
						|
    int ScalarLdCost = VecTy->getNumElements() *
 | 
						|
      TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
 | 
						|
    int VecLdCost = TTI->getMemoryOpCost(Instruction::Load, ScalarTy, 1, 0);
 | 
						|
    return VecLdCost - ScalarLdCost + ExternalUserExtractCost;
 | 
						|
  }
 | 
						|
  case Instruction::Store: {
 | 
						|
    // We know that we can merge the stores. Calculate the cost.
 | 
						|
    int ScalarStCost = VecTy->getNumElements() *
 | 
						|
      TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1, 0);
 | 
						|
    int VecStCost = TTI->getMemoryOpCost(Instruction::Store, ScalarTy, 1,0);
 | 
						|
    int StoreCost = VecStCost - ScalarStCost;
 | 
						|
 | 
						|
    ValueList Operands;
 | 
						|
    for (unsigned j = 0; j < VL.size(); ++j) {
 | 
						|
      Operands.push_back(cast<Instruction>(VL[j])->getOperand(0));
 | 
						|
      MemBarrierIgnoreList.insert(VL[j]);
 | 
						|
    }
 | 
						|
 | 
						|
    int TotalCost = StoreCost + getTreeCost_rec(Operands, Depth + 1);
 | 
						|
    return TotalCost + ExternalUserExtractCost;
 | 
						|
  }
 | 
						|
  default:
 | 
						|
    // Unable to vectorize unknown instructions.
 | 
						|
    return getScalarizationCost(VecTy);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
int BoUpSLP::getLastIndex(ArrayRef<Value *> VL, unsigned VF) {
 | 
						|
  int MaxIdx = InstrIdx[BB->getFirstNonPHI()];
 | 
						|
  for (unsigned i = 0; i < VF; ++i )
 | 
						|
    MaxIdx = std::max(MaxIdx, InstrIdx[VL[i]]);
 | 
						|
  return MaxIdx;
 | 
						|
}
 | 
						|
 | 
						|
int BoUpSLP::getFirstUserIndex(ArrayRef<Value *> VL, unsigned VF) {
 | 
						|
  // Find the first user of the values.
 | 
						|
  int FirstUser = InstrVec.size();
 | 
						|
  for (unsigned i = 0; i < VF; ++i) {
 | 
						|
    for (Value::use_iterator U = VL[i]->use_begin(), UE = VL[i]->use_end();
 | 
						|
         U != UE; ++U) {
 | 
						|
      Instruction *Instr = dyn_cast<Instruction>(*U);
 | 
						|
      if (!Instr || Instr->getParent() != BB)
 | 
						|
        continue;
 | 
						|
 | 
						|
      FirstUser = std::min(FirstUser, InstrIdx[Instr]);
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return FirstUser;
 | 
						|
}
 | 
						|
 | 
						|
int BoUpSLP::getLastIndex(Instruction *I, Instruction *J) {
 | 
						|
  assert(I->getParent() == BB && "Invalid parent for instruction I");
 | 
						|
  assert(J->getParent() == BB && "Invalid parent for instruction J");
 | 
						|
  return std::max(InstrIdx[I],InstrIdx[J]);
 | 
						|
}
 | 
						|
 | 
						|
Instruction *BoUpSLP::getInsertionPoint(unsigned Index) {
 | 
						|
  return InstrVec[Index + 1];
 | 
						|
}
 | 
						|
 | 
						|
Value *BoUpSLP::Scalarize(ArrayRef<Value *> VL, VectorType *Ty) {
 | 
						|
  Value *Vec = UndefValue::get(Ty);
 | 
						|
  for (unsigned i=0; i < Ty->getNumElements(); ++i) {
 | 
						|
    // Generate the 'InsertElement' instruction.
 | 
						|
    Vec = Builder.CreateInsertElement(Vec, VL[i], Builder.getInt32(i));
 | 
						|
    // Remember that this instruction is used as part of a 'gather' sequence.
 | 
						|
    // The caller of the bottom-up slp vectorizer can try to hoist the sequence
 | 
						|
    // if the users are outside of the basic block.
 | 
						|
    GatherInstructions.push_back(Vec);
 | 
						|
  }
 | 
						|
 | 
						|
  for (unsigned i = 0; i < Ty->getNumElements(); ++i)
 | 
						|
    VectorizedValues[VL[i]] = Vec;
 | 
						|
 | 
						|
  return Vec;
 | 
						|
}
 | 
						|
 | 
						|
Value *BoUpSLP::vectorizeTree(ArrayRef<Value *> VL, int VF) {
 | 
						|
  Value *V = vectorizeTree_rec(VL, VF);
 | 
						|
 | 
						|
  int LastInstrIdx = getLastIndex(VL, VL.size());
 | 
						|
  for (SetVector<Value*>::iterator it = MustExtract.begin(),
 | 
						|
       e = MustExtract.end(); it != e; ++it) {
 | 
						|
    Instruction *I = cast<Instruction>(*it);
 | 
						|
 | 
						|
    // This is a scalarized value, so we can use the original value.
 | 
						|
    // No need to extract from the vector.
 | 
						|
    if (!LaneMap.count(I))
 | 
						|
      continue;
 | 
						|
 | 
						|
    Value *Vec = VectorizedValues[I];
 | 
						|
    // We decided not to vectorize I because one of its users was not
 | 
						|
    // vectorizerd. This is okay.
 | 
						|
    if (!Vec)
 | 
						|
      continue;
 | 
						|
 | 
						|
    Value *Idx = Builder.getInt32(LaneMap[I]);
 | 
						|
    Value *Extract = Builder.CreateExtractElement(Vec, Idx);
 | 
						|
    bool Replaced = false;
 | 
						|
    for (Value::use_iterator U = I->use_begin(), UE = I->use_end(); U != UE;
 | 
						|
         ++U) {
 | 
						|
      Instruction *UI = cast<Instruction>(*U);
 | 
						|
      if (UI->getParent() != I->getParent() || InstrIdx[UI] > LastInstrIdx)
 | 
						|
        UI->replaceUsesOfWith(I ,Extract);
 | 
						|
      Replaced = true;
 | 
						|
    }
 | 
						|
    assert(Replaced && "Must replace at least one outside user");
 | 
						|
    (void)Replaced;
 | 
						|
  }
 | 
						|
 | 
						|
  // We moved some instructions around. We have to number them again
 | 
						|
  // before we can do any analysis.
 | 
						|
  numberInstructions();
 | 
						|
  MustScalarize.clear();
 | 
						|
  MustExtract.clear();
 | 
						|
  VectorizedValues.clear();
 | 
						|
  return V;
 | 
						|
}
 | 
						|
 | 
						|
Value *BoUpSLP::vectorizeTree_rec(ArrayRef<Value *> VL, int VF) {
 | 
						|
  Type *ScalarTy = VL[0]->getType();
 | 
						|
  if (StoreInst *SI = dyn_cast<StoreInst>(VL[0]))
 | 
						|
    ScalarTy = SI->getValueOperand()->getType();
 | 
						|
  VectorType *VecTy = VectorType::get(ScalarTy, VF);
 | 
						|
 | 
						|
  // Check if all of the operands are constants or identical.
 | 
						|
  bool AllConst = true;
 | 
						|
  bool AllSameScalar = true;
 | 
						|
  for (unsigned i = 0, e = VF; i < e; ++i) {
 | 
						|
    AllConst &= isa<Constant>(VL[i]);
 | 
						|
    AllSameScalar &= (VL[0] == VL[i]);
 | 
						|
    // The instruction must be in the same BB, and it must be vectorizable.
 | 
						|
    Instruction *I = dyn_cast<Instruction>(VL[i]);
 | 
						|
    if (MustScalarize.count(VL[i]) || (I && I->getParent() != BB))
 | 
						|
      return Scalarize(VL, VecTy);
 | 
						|
  }
 | 
						|
 | 
						|
  // Check that this is a simple vector constant.
 | 
						|
  if (AllConst || AllSameScalar)
 | 
						|
    return Scalarize(VL, VecTy);
 | 
						|
 | 
						|
  // Scalarize unknown structures.
 | 
						|
  Instruction *VL0 = dyn_cast<Instruction>(VL[0]);
 | 
						|
  if (!VL0)
 | 
						|
    return Scalarize(VL, VecTy);
 | 
						|
 | 
						|
  if (VectorizedValues.count(VL0)) {
 | 
						|
    Value * Vec = VectorizedValues[VL0];
 | 
						|
    for (int i = 0; i < VF; ++i)
 | 
						|
      VectorizedValues[VL[i]] = Vec;
 | 
						|
    return Vec;
 | 
						|
  }
 | 
						|
 | 
						|
  unsigned Opcode = VL0->getOpcode();
 | 
						|
  for (unsigned i = 0, e = VF; i < e; ++i) {
 | 
						|
    Instruction *I = dyn_cast<Instruction>(VL[i]);
 | 
						|
    // If not all of the instructions are identical then we have to scalarize.
 | 
						|
    if (!I || Opcode != I->getOpcode())
 | 
						|
      return Scalarize(VL, VecTy);
 | 
						|
  }
 | 
						|
 | 
						|
  switch (Opcode) {
 | 
						|
  case Instruction::ZExt:
 | 
						|
  case Instruction::SExt:
 | 
						|
  case Instruction::FPToUI:
 | 
						|
  case Instruction::FPToSI:
 | 
						|
  case Instruction::FPExt:
 | 
						|
  case Instruction::PtrToInt:
 | 
						|
  case Instruction::IntToPtr:
 | 
						|
  case Instruction::SIToFP:
 | 
						|
  case Instruction::UIToFP:
 | 
						|
  case Instruction::Trunc:
 | 
						|
  case Instruction::FPTrunc:
 | 
						|
  case Instruction::BitCast: {
 | 
						|
    ValueList INVL;
 | 
						|
    for (int i = 0; i < VF; ++i)
 | 
						|
      INVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
 | 
						|
    Value *InVec = vectorizeTree_rec(INVL, VF);
 | 
						|
    CastInst *CI = dyn_cast<CastInst>(VL0);
 | 
						|
    Value *V = Builder.CreateCast(CI->getOpcode(), InVec, VecTy);
 | 
						|
 | 
						|
    for (int i = 0; i < VF; ++i)
 | 
						|
      VectorizedValues[VL[i]] = V;
 | 
						|
 | 
						|
    return V;
 | 
						|
  }
 | 
						|
  case Instruction::Add:
 | 
						|
  case Instruction::FAdd:
 | 
						|
  case Instruction::Sub:
 | 
						|
  case Instruction::FSub:
 | 
						|
  case Instruction::Mul:
 | 
						|
  case Instruction::FMul:
 | 
						|
  case Instruction::UDiv:
 | 
						|
  case Instruction::SDiv:
 | 
						|
  case Instruction::FDiv:
 | 
						|
  case Instruction::URem:
 | 
						|
  case Instruction::SRem:
 | 
						|
  case Instruction::FRem:
 | 
						|
  case Instruction::Shl:
 | 
						|
  case Instruction::LShr:
 | 
						|
  case Instruction::AShr:
 | 
						|
  case Instruction::And:
 | 
						|
  case Instruction::Or:
 | 
						|
  case Instruction::Xor: {
 | 
						|
    ValueList LHSVL, RHSVL;
 | 
						|
    for (int i = 0; i < VF; ++i) {
 | 
						|
      LHSVL.push_back(cast<Instruction>(VL[i])->getOperand(0));
 | 
						|
      RHSVL.push_back(cast<Instruction>(VL[i])->getOperand(1));
 | 
						|
    }
 | 
						|
 | 
						|
    Value *LHS = vectorizeTree_rec(LHSVL, VF);
 | 
						|
    Value *RHS = vectorizeTree_rec(RHSVL, VF);
 | 
						|
    BinaryOperator *BinOp = cast<BinaryOperator>(VL0);
 | 
						|
    Value *V = Builder.CreateBinOp(BinOp->getOpcode(), LHS,RHS);
 | 
						|
 | 
						|
    for (int i = 0; i < VF; ++i)
 | 
						|
      VectorizedValues[VL[i]] = V;
 | 
						|
 | 
						|
    return V;
 | 
						|
  }
 | 
						|
  case Instruction::Load: {
 | 
						|
    LoadInst *LI = cast<LoadInst>(VL0);
 | 
						|
    unsigned Alignment = LI->getAlignment();
 | 
						|
 | 
						|
    // Check if all of the loads are consecutive.
 | 
						|
    for (unsigned i = 1, e = VF; i < e; ++i)
 | 
						|
      if (!isConsecutiveAccess(VL[i-1], VL[i]))
 | 
						|
        return Scalarize(VL, VecTy);
 | 
						|
 | 
						|
    // Loads are inserted at the head of the tree because we don't want to sink
 | 
						|
    // them all the way down past store instructions.
 | 
						|
    Instruction *Loc = getInsertionPoint(getLastIndex(VL, VL.size()));
 | 
						|
    IRBuilder<> LoadBuilder(Loc);
 | 
						|
    Value *VecPtr = LoadBuilder.CreateBitCast(LI->getPointerOperand(),
 | 
						|
                                              VecTy->getPointerTo());
 | 
						|
    LI = LoadBuilder.CreateLoad(VecPtr);
 | 
						|
    LI->setAlignment(Alignment);
 | 
						|
 | 
						|
    for (int i = 0; i < VF; ++i)
 | 
						|
      VectorizedValues[VL[i]] = LI;
 | 
						|
 | 
						|
    return LI;
 | 
						|
  }
 | 
						|
  case Instruction::Store: {
 | 
						|
    StoreInst *SI = cast<StoreInst>(VL0);
 | 
						|
    unsigned Alignment = SI->getAlignment();
 | 
						|
 | 
						|
    ValueList ValueOp;
 | 
						|
    for (int i = 0; i < VF; ++i)
 | 
						|
      ValueOp.push_back(cast<StoreInst>(VL[i])->getValueOperand());
 | 
						|
 | 
						|
    Value *VecValue = vectorizeTree_rec(ValueOp, VF);
 | 
						|
    Value *VecPtr = Builder.CreateBitCast(SI->getPointerOperand(),
 | 
						|
                                          VecTy->getPointerTo());
 | 
						|
    Builder.CreateStore(VecValue, VecPtr)->setAlignment(Alignment);
 | 
						|
 | 
						|
    for (int i = 0; i < VF; ++i)
 | 
						|
      cast<Instruction>(VL[i])->eraseFromParent();
 | 
						|
    return 0;
 | 
						|
  }
 | 
						|
  default:
 | 
						|
    return Scalarize(VL, VecTy);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
} // end of namespace
 |