Optimize shufflevector that copies an i64/f64 and zeros the rest.
Summary: Also ran clang-format on the function. The code added is the last else if block. Reviewers: nadav, craig.topper, delena Subscribers: llvm-commits Differential Revision: http://reviews.llvm.org/D3518 llvm-svn: 208372
This commit is contained in:
parent
2eb151e29f
commit
e4b482b3ed
|
|
@ -6956,6 +6956,9 @@ SDValue RewriteAsNarrowerShuffle(ShuffleVectorSDNode *SVOp,
|
||||||
unsigned Scale;
|
unsigned Scale;
|
||||||
switch (VT.SimpleTy) {
|
switch (VT.SimpleTy) {
|
||||||
default: llvm_unreachable("Unexpected!");
|
default: llvm_unreachable("Unexpected!");
|
||||||
|
case MVT::v2i64:
|
||||||
|
case MVT::v2f64:
|
||||||
|
return SDValue(SVOp, 0);
|
||||||
case MVT::v4f32: NewVT = MVT::v2f64; Scale = 2; break;
|
case MVT::v4f32: NewVT = MVT::v2f64; Scale = 2; break;
|
||||||
case MVT::v4i32: NewVT = MVT::v2i64; Scale = 2; break;
|
case MVT::v4i32: NewVT = MVT::v2i64; Scale = 2; break;
|
||||||
case MVT::v8i16: NewVT = MVT::v4i32; Scale = 2; break;
|
case MVT::v8i16: NewVT = MVT::v4i32; Scale = 2; break;
|
||||||
|
|
@ -7557,9 +7560,8 @@ static SDValue LowerVectorIntExtend(SDValue Op, const X86Subtarget *Subtarget,
|
||||||
DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
|
DAG.getNode(X86ISD::VZEXT, DL, NVT, V1));
|
||||||
}
|
}
|
||||||
|
|
||||||
static SDValue
|
static SDValue NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
|
||||||
NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
|
SelectionDAG &DAG) {
|
||||||
SelectionDAG &DAG) {
|
|
||||||
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
|
ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(Op);
|
||||||
MVT VT = Op.getSimpleValueType();
|
MVT VT = Op.getSimpleValueType();
|
||||||
SDLoc dl(Op);
|
SDLoc dl(Op);
|
||||||
|
|
@ -7584,31 +7586,29 @@ NormalizeVectorShuffle(SDValue Op, const X86Subtarget *Subtarget,
|
||||||
|
|
||||||
// If the shuffle can be profitably rewritten as a narrower shuffle, then
|
// If the shuffle can be profitably rewritten as a narrower shuffle, then
|
||||||
// do it!
|
// do it!
|
||||||
if (VT == MVT::v8i16 || VT == MVT::v16i8 ||
|
if (VT == MVT::v8i16 || VT == MVT::v16i8 || VT == MVT::v16i16 ||
|
||||||
VT == MVT::v16i16 || VT == MVT::v32i8) {
|
VT == MVT::v32i8) {
|
||||||
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
|
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
|
||||||
if (NewOp.getNode())
|
if (NewOp.getNode())
|
||||||
return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
|
return DAG.getNode(ISD::BITCAST, dl, VT, NewOp);
|
||||||
} else if ((VT == MVT::v4i32 ||
|
} else if (VT.is128BitVector() && Subtarget->hasSSE2()) {
|
||||||
(VT == MVT::v4f32 && Subtarget->hasSSE2()))) {
|
|
||||||
// FIXME: Figure out a cleaner way to do this.
|
// FIXME: Figure out a cleaner way to do this.
|
||||||
// Try to make use of movq to zero out the top part.
|
|
||||||
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
|
if (ISD::isBuildVectorAllZeros(V2.getNode())) {
|
||||||
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
|
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
|
||||||
if (NewOp.getNode()) {
|
if (NewOp.getNode()) {
|
||||||
MVT NewVT = NewOp.getSimpleValueType();
|
MVT NewVT = NewOp.getSimpleValueType();
|
||||||
if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(),
|
if (isCommutedMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(),
|
||||||
NewVT, true, false))
|
NewVT, true, false))
|
||||||
return getVZextMovL(VT, NewVT, NewOp.getOperand(0),
|
return getVZextMovL(VT, NewVT, NewOp.getOperand(0), DAG, Subtarget,
|
||||||
DAG, Subtarget, dl);
|
dl);
|
||||||
}
|
}
|
||||||
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
|
} else if (ISD::isBuildVectorAllZeros(V1.getNode())) {
|
||||||
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
|
SDValue NewOp = RewriteAsNarrowerShuffle(SVOp, DAG);
|
||||||
if (NewOp.getNode()) {
|
if (NewOp.getNode()) {
|
||||||
MVT NewVT = NewOp.getSimpleValueType();
|
MVT NewVT = NewOp.getSimpleValueType();
|
||||||
if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT))
|
if (isMOVLMask(cast<ShuffleVectorSDNode>(NewOp)->getMask(), NewVT))
|
||||||
return getVZextMovL(VT, NewVT, NewOp.getOperand(1),
|
return getVZextMovL(VT, NewVT, NewOp.getOperand(1), DAG, Subtarget,
|
||||||
DAG, Subtarget, dl);
|
dl);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -306,3 +306,11 @@ define void @test20() {
|
||||||
store <3 x double> %a1, <3 x double>* undef, align 1
|
store <3 x double> %a1, <3 x double>* undef, align 1
|
||||||
ret void
|
ret void
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define <2 x i64> @test_insert_64_zext(<2 x i64> %i) {
|
||||||
|
; CHECK-LABEL: test_insert_64_zext
|
||||||
|
; CHECK-NOT: xor
|
||||||
|
; CHECK: vmovq
|
||||||
|
%1 = shufflevector <2 x i64> %i, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 0, i32 2>
|
||||||
|
ret <2 x i64> %1
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -221,3 +221,11 @@ entry:
|
||||||
%double2float.i = fptrunc <4 x double> %0 to <4 x float>
|
%double2float.i = fptrunc <4 x double> %0 to <4 x float>
|
||||||
ret <4 x float> %double2float.i
|
ret <4 x float> %double2float.i
|
||||||
}
|
}
|
||||||
|
|
||||||
|
define <2 x i64> @test_insert_64_zext(<2 x i64> %i) {
|
||||||
|
; CHECK-LABEL: test_insert_64_zext
|
||||||
|
; CHECK-NOT: xor
|
||||||
|
; CHECK: movq
|
||||||
|
%1 = shufflevector <2 x i64> %i, <2 x i64> <i64 0, i64 undef>, <2 x i32> <i32 0, i32 2>
|
||||||
|
ret <2 x i64> %1
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue