forked from OSchip/llvm-project
[AArch64 NEON] Fix a bug when lowering BUILD_VECTOR.
DAG.getVectorShuffle() doesn't always return a vector_shuffle node. If mask is the exact sequence of it's operand(For example, operand_0 is v8i8, and the mask is 0, 1, 2, 3, 4, 5, 6, 7), it will directly return that operand. So a check is added here. llvm-svn: 197967
This commit is contained in:
parent
cd5f3153f5
commit
82bd84aadf
|
|
@ -3957,6 +3957,9 @@ bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG,
|
||||||
if (V1.getNode() && NumElts == V0NumElts &&
|
if (V1.getNode() && NumElts == V0NumElts &&
|
||||||
V0NumElts == V1.getValueType().getVectorNumElements()) {
|
V0NumElts == V1.getValueType().getVectorNumElements()) {
|
||||||
SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
|
SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask);
|
||||||
|
if(Shuffle.getOpcode() != ISD::VECTOR_SHUFFLE)
|
||||||
|
Res = Shuffle;
|
||||||
|
else
|
||||||
Res = LowerVECTOR_SHUFFLE(Shuffle, DAG);
|
Res = LowerVECTOR_SHUFFLE(Shuffle, DAG);
|
||||||
return true;
|
return true;
|
||||||
} else
|
} else
|
||||||
|
|
|
||||||
|
|
@ -704,3 +704,25 @@ define <4 x i32> @testDUP.v1i32(<1 x i32> %a) {
|
||||||
%f = insertelement <4 x i32> %e, i32 %b, i32 3
|
%f = insertelement <4 x i32> %e, i32 %b, i32 3
|
||||||
ret <4 x i32> %f
|
ret <4 x i32> %f
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define <8 x i8> @getl(<16 x i8> %x) #0 {
|
||||||
|
; CHECK-LABEL: getl:
|
||||||
|
; CHECK: ret
|
||||||
|
%vecext = extractelement <16 x i8> %x, i32 0
|
||||||
|
%vecinit = insertelement <8 x i8> undef, i8 %vecext, i32 0
|
||||||
|
%vecext1 = extractelement <16 x i8> %x, i32 1
|
||||||
|
%vecinit2 = insertelement <8 x i8> %vecinit, i8 %vecext1, i32 1
|
||||||
|
%vecext3 = extractelement <16 x i8> %x, i32 2
|
||||||
|
%vecinit4 = insertelement <8 x i8> %vecinit2, i8 %vecext3, i32 2
|
||||||
|
%vecext5 = extractelement <16 x i8> %x, i32 3
|
||||||
|
%vecinit6 = insertelement <8 x i8> %vecinit4, i8 %vecext5, i32 3
|
||||||
|
%vecext7 = extractelement <16 x i8> %x, i32 4
|
||||||
|
%vecinit8 = insertelement <8 x i8> %vecinit6, i8 %vecext7, i32 4
|
||||||
|
%vecext9 = extractelement <16 x i8> %x, i32 5
|
||||||
|
%vecinit10 = insertelement <8 x i8> %vecinit8, i8 %vecext9, i32 5
|
||||||
|
%vecext11 = extractelement <16 x i8> %x, i32 6
|
||||||
|
%vecinit12 = insertelement <8 x i8> %vecinit10, i8 %vecext11, i32 6
|
||||||
|
%vecext13 = extractelement <16 x i8> %x, i32 7
|
||||||
|
%vecinit14 = insertelement <8 x i8> %vecinit12, i8 %vecext13, i32 7
|
||||||
|
ret <8 x i8> %vecinit14
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue