Vectors with different number of elements of the same element type can have

the same allocation size but different primitive sizes(e.g., <3xi32> and <4xi32>). When ScalarRepl promotes them, it can't use a bit cast but should use a shuffle vector instead. llvm-svn: 129472
2011-04-13 21:40:02 +00:00 · 2011-04-13 21:40:02 +00:00 · 2e5528f0b2
parent b6a37bff21
commit 2e5528f0b2
2 changed files with 110 additions and 6 deletions
--- a/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/llvm/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@ -690,15 +690,45 @@ Value *ConvertToScalarInfo::
 ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
                           uint64_t Offset, IRBuilder<> &Builder) {
  // If the load is of the whole new alloca, no conversion is needed.
-  if (FromVal->getType() == ToType && Offset == 0)
+  const Type *FromType = FromVal->getType();
+  if (FromType == ToType && Offset == 0)
    return FromVal;

  // If the result alloca is a vector type, this is either an element
  // access or a bitcast to another vector type of the same size.
-  if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
+  if (const VectorType *VTy = dyn_cast<VectorType>(FromType)) {
    unsigned ToTypeSize = TD.getTypeAllocSize(ToType);
-    if (ToTypeSize == AllocaSize)
+    if (ToTypeSize == AllocaSize) {
+      if (FromType->getPrimitiveSizeInBits() ==
+          ToType->getPrimitiveSizeInBits())
        return Builder.CreateBitCast(FromVal, ToType, "tmp");
+      else {
+        // Vectors with the same element type can have the same allocation
+        // size but different primitive sizes (e.g., <3 x i32> and <4 x i32>)
+        // In this case, use a shuffle vector instead of a bit cast.
+        const VectorType *ToVTy = dyn_cast<VectorType>(ToType);
+        assert(ToVTy && (ToVTy->getElementType() == VTy->getElementType()) &&
+               "Vectors must have the same element type");
+        LLVMContext &Context = FromVal->getContext();
+        Value *UnV = UndefValue::get(FromType);
+        unsigned numEltsFrom = VTy->getNumElements();
+        unsigned numEltsTo = ToVTy->getNumElements();
+
+        SmallVector<Constant*, 3> Args;
+        unsigned minNumElts = std::min(numEltsFrom, numEltsTo);
+        unsigned i;
+        for (i=0; i != minNumElts; ++i)
+          Args.push_back(ConstantInt::get(Type::getInt32Ty(Context), i));
+
+        if (i < numEltsTo) {
+          Constant* UnC = UndefValue::get(Type::getInt32Ty(Context));
+          for (; i != numEltsTo; ++i)
+            Args.push_back(UnC);
+        }
+        Constant *Mask = ConstantVector::get(Args);
+        return Builder.CreateShuffleVector(FromVal, UnV, Mask, "tmpV");
+      }
+    }

    if (ToType->isVectorTy()) {
      assert(isPowerOf2_64(AllocaSize / ToTypeSize) &&
@ -837,8 +867,36 @@ ConvertScalar_InsertValue(Value *SV, Value *Old,

    // Changing the whole vector with memset or with an access of a different
    // vector type?
-    if (ValSize == VecSize)
+    if (ValSize == VecSize) {
+      if (VTy->getPrimitiveSizeInBits() ==
+          SV->getType()->getPrimitiveSizeInBits())
        return Builder.CreateBitCast(SV, AllocaType, "tmp");
+      else {
+        // Vectors with the same element type can have the same allocation
+        // size but different primitive sizes (e.g., <3 x i32> and <4 x i32>)
+        // In this case, use a shuffle vector instead of a bit cast.
+        const VectorType *SVVTy = dyn_cast<VectorType>(SV->getType());
+        assert(SVVTy && (SVVTy->getElementType() == VTy->getElementType()) &&
+               "Vectors must have the same element type");
+        Value *UnV = UndefValue::get(SVVTy);
+        unsigned numEltsFrom = SVVTy->getNumElements();
+        unsigned numEltsTo = VTy->getNumElements();
+
+        SmallVector<Constant*, 3> Args;
+        unsigned minNumElts = std::min(numEltsFrom, numEltsTo);
+        unsigned i;
+        for (i=0; i != minNumElts; ++i)
+          Args.push_back(ConstantInt::get(Type::getInt32Ty(Context), i));
+
+        if (i < numEltsTo) {
+          Constant* UnC = UndefValue::get(Type::getInt32Ty(Context));
+          for (; i != numEltsTo; ++i)
+            Args.push_back(UnC);
+        }
+        Constant *Mask = ConstantVector::get(Args);
+        return Builder.CreateShuffleVector(SV, UnV, Mask, "tmpV");
+      }
+    }

    if (SV->getType()->isVectorTy() && isPowerOf2_64(VecSize / ValSize)) {
      assert(Offset == 0 && "Can't insert a value of a smaller vector type at "
--- a/llvm/test/Transforms/ScalarRepl/vector_promote.ll
+++ b/llvm/test/Transforms/ScalarRepl/vector_promote.ll
@ -202,3 +202,49 @@ define float @test13(<4 x float> %x, <2 x i32> %y) {
 ; CHECK-NOT: alloca
 ; CHECK: bitcast <4 x float> %x to i128
 }
+
+define <3 x float> @test14(<3 x float> %x)  {
+entry:
+  %x.addr = alloca <3 x float>, align 16
+  %r = alloca <3 x i32>, align 16
+  %extractVec = shufflevector <3 x float> %x, <3 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %storetmp = bitcast <3 x float>* %x.addr to <4 x float>*
+  store <4 x float> %extractVec, <4 x float>* %storetmp, align 16
+  %tmp = load <3 x float>* %x.addr, align 16
+  %cmp = fcmp une <3 x float> %tmp, zeroinitializer
+  %sext = sext <3 x i1> %cmp to <3 x i32>
+  %and = and <3 x i32> <i32 1065353216, i32 1065353216, i32 1065353216>, %sext
+  %extractVec1 = shufflevector <3 x i32> %and, <3 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %storetmp2 = bitcast <3 x i32>* %r to <4 x i32>*
+  store <4 x i32> %extractVec1, <4 x i32>* %storetmp2, align 16
+  %tmp3 = load <3 x i32>* %r, align 16
+  %0 = bitcast <3 x i32> %tmp3 to <3 x float>
+  %tmp4 = load <3 x float>* %x.addr, align 16
+  ret <3 x float> %tmp4
+; CHECK: @test14
+; CHECK-NOT: alloca
+; CHECK: shufflevector <4 x i32> %extractVec1, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+}
+
+define void @test15(<3 x i64>* sret %agg.result, <3 x i64> %x, <3 x i64> %min) {
+entry:
+  %x.addr = alloca <3 x i64>, align 32
+  %min.addr = alloca <3 x i64>, align 32
+  %extractVec = shufflevector <3 x i64> %x, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %storetmp = bitcast <3 x i64>* %x.addr to <4 x i64>*
+  store <4 x i64> %extractVec, <4 x i64>* %storetmp, align 32
+  %extractVec1 = shufflevector <3 x i64> %min, <3 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  %storetmp2 = bitcast <3 x i64>* %min.addr to <4 x i64>*
+  store <4 x i64> %extractVec1, <4 x i64>* %storetmp2, align 32
+  %tmp = load <3 x i64>* %x.addr
+  %tmp5 = extractelement <3 x i64> %tmp, i32 0
+  %tmp11 = insertelement <3 x i64> %tmp, i64 %tmp5, i32 0
+  store <3 x i64> %tmp11, <3 x i64>* %x.addr
+  %tmp30 = load <3 x i64>* %x.addr, align 32
+  store <3 x i64> %tmp30, <3 x i64>* %agg.result
+  ret void
+; CHECK: @test15
+; CHECK-NOT: alloca
+; CHECK: shufflevector <4 x i64> %tmpV2, <4 x i64> undef, <3 x i32> <i32 0, i32 1, i32 2>
+}
+