forked from OSchip/llvm-project
				
			Refactor 'vectorizeLoop' no functionality change.
This patch merges LoopVectorize of InnerLoopVectorizer and InnerLoopUnroller by adding checks for VF=1. This helps in erasing the Unroller code that is almost identical to the InnerLoopVectorizer code. llvm-svn: 189391
This commit is contained in:
		
							parent
							
								
									047c61510f
								
							
						
					
					
						commit
						6b41f7cc4c
					
				| 
						 | 
				
			
			@ -354,7 +354,6 @@ public:
 | 
			
		|||
    InnerLoopVectorizer(OrigLoop, SE, LI, DT, DL, TLI, 1, UnrollFactor) { }
 | 
			
		||||
 | 
			
		||||
private:
 | 
			
		||||
  virtual void vectorizeLoop(LoopVectorizationLegality *Legal);
 | 
			
		||||
  virtual void scalarizeInstruction(Instruction *Instr);
 | 
			
		||||
  virtual void vectorizeMemoryInstruction(Instruction *Instr,
 | 
			
		||||
                                          LoopVectorizationLegality *Legal);
 | 
			
		||||
| 
						 | 
				
			
			@ -2049,12 +2048,24 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 | 
			
		|||
    if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
 | 
			
		||||
        RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
 | 
			
		||||
      // MinMax reduction have the start value as their identify.
 | 
			
		||||
      VectorStart = Identity = Builder.CreateVectorSplat(VF, RdxDesc.StartValue,
 | 
			
		||||
                                                         "minmax.ident");
 | 
			
		||||
      if (VF == 1) {
 | 
			
		||||
        VectorStart = Identity = RdxDesc.StartValue;
 | 
			
		||||
      } else {
 | 
			
		||||
        VectorStart = Identity = Builder.CreateVectorSplat(VF,
 | 
			
		||||
                                                           RdxDesc.StartValue,
 | 
			
		||||
                                                           "minmax.ident");
 | 
			
		||||
      }
 | 
			
		||||
    } else {
 | 
			
		||||
      // Handle other reduction kinds:
 | 
			
		||||
      Constant *Iden =
 | 
			
		||||
      LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
 | 
			
		||||
                                                      VecTy->getScalarType());
 | 
			
		||||
      if (VF == 1) {
 | 
			
		||||
        Identity = Iden;
 | 
			
		||||
        // This vector is the Identity vector where the first element is the
 | 
			
		||||
        // incoming scalar reduction.
 | 
			
		||||
        VectorStart = RdxDesc.StartValue;
 | 
			
		||||
      } else {
 | 
			
		||||
        Identity = ConstantVector::getSplat(VF, Iden);
 | 
			
		||||
 | 
			
		||||
        // This vector is the Identity vector where the first element is the
 | 
			
		||||
| 
						 | 
				
			
			@ -2062,6 +2073,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 | 
			
		|||
        VectorStart = Builder.CreateInsertElement(Identity,
 | 
			
		||||
                                                  RdxDesc.StartValue, Zero);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Fix the vector-loop phi.
 | 
			
		||||
    // We created the induction variable so we know that the
 | 
			
		||||
| 
						 | 
				
			
			@ -2116,6 +2128,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 | 
			
		|||
                                        ReducedPartRdx, RdxParts[part]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    if (VF > 1) {
 | 
			
		||||
      // VF is a power of 2 so we can emit the reduction using log2(VF) shuffles
 | 
			
		||||
      // and vector ops, reducing the set of values being computed by half each
 | 
			
		||||
      // round.
 | 
			
		||||
| 
						 | 
				
			
			@ -2146,7 +2159,9 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 | 
			
		|||
      }
 | 
			
		||||
 | 
			
		||||
      // The result is in the first element of the vector.
 | 
			
		||||
    Value *Scalar0 = Builder.CreateExtractElement(TmpVec, Builder.getInt32(0));
 | 
			
		||||
      ReducedPartRdx = Builder.CreateExtractElement(TmpVec,
 | 
			
		||||
                                                    Builder.getInt32(0));
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Now, we need to fix the users of the reduction variable
 | 
			
		||||
    // inside and outside of the scalar remainder loop.
 | 
			
		||||
| 
						 | 
				
			
			@ -2165,7 +2180,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 | 
			
		|||
      // incoming bypass edge.
 | 
			
		||||
      if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
 | 
			
		||||
        // Add an edge coming from the bypass.
 | 
			
		||||
        LCSSAPhi->addIncoming(Scalar0, LoopMiddleBlock);
 | 
			
		||||
        LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
    }// end of the LCSSA phi scan.
 | 
			
		||||
| 
						 | 
				
			
			@ -2177,7 +2192,7 @@ InnerLoopVectorizer::vectorizeLoop(LoopVectorizationLegality *Legal) {
 | 
			
		|||
    assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
 | 
			
		||||
    // Pick the other block.
 | 
			
		||||
    int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
 | 
			
		||||
    (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, Scalar0);
 | 
			
		||||
    (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
 | 
			
		||||
    (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
 | 
			
		||||
  }// end of for each redux variable.
 | 
			
		||||
 
 | 
			
		||||
| 
						 | 
				
			
			@ -4788,155 +4803,6 @@ bool LoopVectorizationCostModel::isConsecutiveLoadOrStore(Instruction *Inst) {
 | 
			
		|||
  return false;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void
 | 
			
		||||
InnerLoopUnroller::vectorizeLoop(LoopVectorizationLegality *Legal) {
 | 
			
		||||
  // In order to support reduction variables we need to be able to unroll
 | 
			
		||||
  // Phi nodes. Phi nodes have cycles, so we need to unroll them in two
 | 
			
		||||
  // stages. See InnerLoopVectorizer::vectorizeLoop for more details.
 | 
			
		||||
  PhiVector RdxPHIsToFix;
 | 
			
		||||
 | 
			
		||||
  // Scan the loop in a topological order to ensure that defs are vectorized
 | 
			
		||||
  // before users.
 | 
			
		||||
  LoopBlocksDFS DFS(OrigLoop);
 | 
			
		||||
  DFS.perform(LI);
 | 
			
		||||
 | 
			
		||||
  // Unroll all of the blocks in the original loop.
 | 
			
		||||
  for (LoopBlocksDFS::RPOIterator bb = DFS.beginRPO(), be = DFS.endRPO();
 | 
			
		||||
       bb != be; ++bb)
 | 
			
		||||
    vectorizeBlockInLoop(Legal, *bb, &RdxPHIsToFix);
 | 
			
		||||
 | 
			
		||||
  // Create the 'reduced' values for each of the induction vars.
 | 
			
		||||
  // The reduced values are the vector values that we scalarize and combine
 | 
			
		||||
  // after the loop is finished.
 | 
			
		||||
  for (PhiVector::iterator it = RdxPHIsToFix.begin(), e = RdxPHIsToFix.end();
 | 
			
		||||
       it != e; ++it) {
 | 
			
		||||
    PHINode *RdxPhi = *it;
 | 
			
		||||
    assert(RdxPhi && "Unable to recover vectorized PHI");
 | 
			
		||||
 | 
			
		||||
    // Find the reduction variable descriptor.
 | 
			
		||||
    assert(Legal->getReductionVars()->count(RdxPhi) &&
 | 
			
		||||
           "Unable to find the reduction variable");
 | 
			
		||||
    LoopVectorizationLegality::ReductionDescriptor RdxDesc =
 | 
			
		||||
    (*Legal->getReductionVars())[RdxPhi];
 | 
			
		||||
 | 
			
		||||
    setDebugLocFromInst(Builder, RdxDesc.StartValue);
 | 
			
		||||
 | 
			
		||||
    // We need to generate a reduction vector from the incoming scalar.
 | 
			
		||||
    // To do so, we need to generate the 'identity' vector and overide
 | 
			
		||||
    // one of the elements with the incoming scalar reduction. We need
 | 
			
		||||
    // to do it in the vector-loop preheader.
 | 
			
		||||
    Builder.SetInsertPoint(LoopBypassBlocks.front()->getTerminator());
 | 
			
		||||
 | 
			
		||||
    // This is the vector-clone of the value that leaves the loop.
 | 
			
		||||
    VectorParts &VectorExit = getVectorValue(RdxDesc.LoopExitInstr);
 | 
			
		||||
    Type *VecTy = VectorExit[0]->getType();
 | 
			
		||||
 | 
			
		||||
    // Find the reduction identity variable. Zero for addition, or, xor,
 | 
			
		||||
    // one for multiplication, -1 for And.
 | 
			
		||||
    Value *Identity;
 | 
			
		||||
    Value *VectorStart;
 | 
			
		||||
    if (RdxDesc.Kind == LoopVectorizationLegality::RK_IntegerMinMax ||
 | 
			
		||||
        RdxDesc.Kind == LoopVectorizationLegality::RK_FloatMinMax) {
 | 
			
		||||
      // MinMax reduction have the start value as their identify.
 | 
			
		||||
      VectorStart = Identity = RdxDesc.StartValue;
 | 
			
		||||
 | 
			
		||||
    } else {
 | 
			
		||||
      Identity = LoopVectorizationLegality::getReductionIdentity(RdxDesc.Kind,
 | 
			
		||||
                                                        VecTy->getScalarType());
 | 
			
		||||
 | 
			
		||||
      // This vector is the Identity vector where the first element is the
 | 
			
		||||
      // incoming scalar reduction.
 | 
			
		||||
      VectorStart = RdxDesc.StartValue;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Fix the vector-loop phi.
 | 
			
		||||
    // We created the induction variable so we know that the
 | 
			
		||||
    // preheader is the first entry.
 | 
			
		||||
    BasicBlock *VecPreheader = Induction->getIncomingBlock(0);
 | 
			
		||||
 | 
			
		||||
    // Reductions do not have to start at zero. They can start with
 | 
			
		||||
    // any loop invariant values.
 | 
			
		||||
    VectorParts &VecRdxPhi = WidenMap.get(RdxPhi);
 | 
			
		||||
    BasicBlock *Latch = OrigLoop->getLoopLatch();
 | 
			
		||||
    Value *LoopVal = RdxPhi->getIncomingValueForBlock(Latch);
 | 
			
		||||
    VectorParts &Val = getVectorValue(LoopVal);
 | 
			
		||||
    for (unsigned part = 0; part < UF; ++part) {
 | 
			
		||||
      // Make sure to add the reduction stat value only to the
 | 
			
		||||
      // first unroll part.
 | 
			
		||||
      Value *StartVal = (part == 0) ? VectorStart : Identity;
 | 
			
		||||
      cast<PHINode>(VecRdxPhi[part])->addIncoming(StartVal, VecPreheader);
 | 
			
		||||
      cast<PHINode>(VecRdxPhi[part])->addIncoming(Val[part], LoopVectorBody);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Before each round, move the insertion point right between
 | 
			
		||||
    // the PHIs and the values we are going to write.
 | 
			
		||||
    // This allows us to write both PHINodes and the extractelement
 | 
			
		||||
    // instructions.
 | 
			
		||||
    Builder.SetInsertPoint(LoopMiddleBlock->getFirstInsertionPt());
 | 
			
		||||
 | 
			
		||||
    VectorParts RdxParts;
 | 
			
		||||
    setDebugLocFromInst(Builder, RdxDesc.LoopExitInstr);
 | 
			
		||||
    for (unsigned part = 0; part < UF; ++part) {
 | 
			
		||||
      // This PHINode contains the vectorized reduction variable, or
 | 
			
		||||
      // the initial value vector, if we bypass the vector loop.
 | 
			
		||||
      VectorParts &RdxExitVal = getVectorValue(RdxDesc.LoopExitInstr);
 | 
			
		||||
      PHINode *NewPhi = Builder.CreatePHI(VecTy, 2, "rdx.vec.exit.phi");
 | 
			
		||||
      Value *StartVal = (part == 0) ? VectorStart : Identity;
 | 
			
		||||
      for (unsigned I = 0, E = LoopBypassBlocks.size(); I != E; ++I)
 | 
			
		||||
        NewPhi->addIncoming(StartVal, LoopBypassBlocks[I]);
 | 
			
		||||
      NewPhi->addIncoming(RdxExitVal[part], LoopVectorBody);
 | 
			
		||||
      RdxParts.push_back(NewPhi);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Reduce all of the unrolled parts into a single vector.
 | 
			
		||||
    Value *ReducedPartRdx = RdxParts[0];
 | 
			
		||||
    unsigned Op = getReductionBinOp(RdxDesc.Kind);
 | 
			
		||||
    setDebugLocFromInst(Builder, ReducedPartRdx);
 | 
			
		||||
    for (unsigned part = 1; part < UF; ++part) {
 | 
			
		||||
      if (Op != Instruction::ICmp && Op != Instruction::FCmp)
 | 
			
		||||
        ReducedPartRdx = Builder.CreateBinOp((Instruction::BinaryOps)Op,
 | 
			
		||||
                                             RdxParts[part], ReducedPartRdx,
 | 
			
		||||
                                             "bin.rdx");
 | 
			
		||||
      else
 | 
			
		||||
        ReducedPartRdx = createMinMaxOp(Builder, RdxDesc.MinMaxKind,
 | 
			
		||||
                                        ReducedPartRdx, RdxParts[part]);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // Now, we need to fix the users of the reduction variable
 | 
			
		||||
    // inside and outside of the scalar remainder loop.
 | 
			
		||||
    // We know that the loop is in LCSSA form. We need to update the
 | 
			
		||||
    // PHI nodes in the exit blocks.
 | 
			
		||||
    for (BasicBlock::iterator LEI = LoopExitBlock->begin(),
 | 
			
		||||
         LEE = LoopExitBlock->end(); LEI != LEE; ++LEI) {
 | 
			
		||||
      PHINode *LCSSAPhi = dyn_cast<PHINode>(LEI);
 | 
			
		||||
      if (!LCSSAPhi) continue;
 | 
			
		||||
 | 
			
		||||
      // All PHINodes need to have a single entry edge, or two if
 | 
			
		||||
      // we already fixed them.
 | 
			
		||||
      assert(LCSSAPhi->getNumIncomingValues() < 3 && "Invalid LCSSA PHI");
 | 
			
		||||
 | 
			
		||||
      // We found our reduction value exit-PHI. Update it with the
 | 
			
		||||
      // incoming bypass edge.
 | 
			
		||||
      if (LCSSAPhi->getIncomingValue(0) == RdxDesc.LoopExitInstr) {
 | 
			
		||||
        // Add an edge coming from the bypass.
 | 
			
		||||
        LCSSAPhi->addIncoming(ReducedPartRdx, LoopMiddleBlock);
 | 
			
		||||
        break;
 | 
			
		||||
      }
 | 
			
		||||
    }// end of the LCSSA phi scan.
 | 
			
		||||
 | 
			
		||||
    // Fix the scalar loop reduction variable with the incoming reduction sum
 | 
			
		||||
    // from the vector body and from the backedge value.
 | 
			
		||||
    int IncomingEdgeBlockIdx =
 | 
			
		||||
    (RdxPhi)->getBasicBlockIndex(OrigLoop->getLoopLatch());
 | 
			
		||||
    assert(IncomingEdgeBlockIdx >= 0 && "Invalid block index");
 | 
			
		||||
    // Pick the other block.
 | 
			
		||||
    int SelfEdgeBlockIdx = (IncomingEdgeBlockIdx ? 0 : 1);
 | 
			
		||||
    (RdxPhi)->setIncomingValue(SelfEdgeBlockIdx, ReducedPartRdx);
 | 
			
		||||
    (RdxPhi)->setIncomingValue(IncomingEdgeBlockIdx, RdxDesc.LoopExitInstr);
 | 
			
		||||
  }// end of for each redux variable.
 | 
			
		||||
 | 
			
		||||
  fixLCSSAPHIs();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
void InnerLoopUnroller::scalarizeInstruction(Instruction *Instr) {
 | 
			
		||||
  assert(!Instr->getType()->isAggregateType() && "Can't handle vectors");
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue