[x86] Hoist the zext-lowering up in the v4i32 lowering routine -- it is

the same speed as pshufd but we can fold loads into the pmovzx
instructions.

This fixes some regressions that came up in the regression test suite
for the new vector shuffle lowering.

llvm-svn: 218733
This commit is contained in:
Chandler Carruth 2014-10-01 02:25:54 +00:00
parent 4f09cd697c
commit 846baf2ca1
2 changed files with 27 additions and 11 deletions

View File

@ -8214,6 +8214,13 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
ArrayRef<int> Mask = SVOp->getMask(); ArrayRef<int> Mask = SVOp->getMask();
assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!"); assert(Mask.size() == 4 && "Unexpected mask size for v4 shuffle!");
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative. It also allows us to fold memory operansd into the
// shuffle in many cases.
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2,
Mask, Subtarget, DAG))
return ZExt;
int NumV2Elements = int NumV2Elements =
std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; }); std::count_if(Mask.begin(), Mask.end(), [](int M) { return M >= 4; });
@ -8239,12 +8246,6 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
getV4X86ShuffleImm8ForMask(Mask, DAG)); getV4X86ShuffleImm8ForMask(Mask, DAG));
} }
// Whenever we can lower this as a zext, that instruction is strictly faster
// than any alternative.
if (SDValue ZExt = lowerVectorShuffleAsZeroOrAnyExtend(DL, MVT::v4i32, V1, V2,
Mask, Subtarget, DAG))
return ZExt;
// Use dedicated unpack instructions for masks that match their pattern. // Use dedicated unpack instructions for masks that match their pattern.
if (isShuffleEquivalent(Mask, 0, 4, 1, 5)) if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2); return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);

View File

@ -937,14 +937,29 @@ define <4 x i32> @shuffle_v4i32_3456(<4 x i32> %a, <4 x i32> %b) {
} }
define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) { define <4 x i32> @shuffle_v4i32_0u1u(<4 x i32> %a, <4 x i32> %b) {
; SSE-LABEL: shuffle_v4i32_0u1u: ; SSE2-LABEL: shuffle_v4i32_0u1u:
; SSE: # BB#0: ; SSE2: # BB#0:
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] ; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; SSE-NEXT: retq ; SSE2-NEXT: retq
;
; SSE3-LABEL: shuffle_v4i32_0u1u:
; SSE3: # BB#0:
; SSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; SSE3-NEXT: retq
;
; SSSE3-LABEL: shuffle_v4i32_0u1u:
; SSSE3: # BB#0:
; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,1,1,3]
; SSSE3-NEXT: retq
;
; SSE41-LABEL: shuffle_v4i32_0u1u:
; SSE41: # BB#0:
; SSE41-NEXT: pmovzxdq %xmm0, %xmm0
; SSE41-NEXT: retq
; ;
; AVX-LABEL: shuffle_v4i32_0u1u: ; AVX-LABEL: shuffle_v4i32_0u1u:
; AVX: # BB#0: ; AVX: # BB#0:
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] ; AVX-NEXT: vpmovzxdq %xmm0, %xmm0
; AVX-NEXT: retq ; AVX-NEXT: retq
%shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef> %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 undef, i32 1, i32 undef>
ret <4 x i32> %shuffle ret <4 x i32> %shuffle