[x86] Teach the x86 DAG combiner to form UNPCKLPS and UNPCKHPS

instructions from the relevant shuffle patterns.

This is the last tweak I'm aware of to generate essentially perfect
v4f32 and v2f64 shuffles with the new vector shuffle lowering up through
SSE4.1. I'm sure I've missed some and it'd be nice to check since v4f32
is amenable to exhaustive exploration, but this is all of the tricks I'm
aware of.

With AVX there is a new trick to use the VPERMILPS instruction, that's
coming up in a subsequent patch.

llvm-svn: 217761
This commit is contained in:
Chandler Carruth 2014-09-15 11:26:25 +00:00
parent 0ffb093931
commit 12d4a70cbd
2 changed files with 28 additions and 0 deletions

View File

@ -19413,6 +19413,20 @@ static bool combineX86ShuffleChain(SDValue Op, SDValue Root, ArrayRef<int> Mask,
/*AddTo*/ true);
return true;
}
if (Mask.equals(0, 0, 1, 1) || Mask.equals(2, 2, 3, 3)) {
bool Lo = Mask.equals(0, 0, 1, 1);
unsigned Shuffle = Lo ? X86ISD::UNPCKL : X86ISD::UNPCKH;
MVT ShuffleVT = MVT::v4f32;
if (Depth == 1 && Root->getOpcode() == Shuffle)
return false; // Nothing to do!
Op = DAG.getNode(ISD::BITCAST, DL, ShuffleVT, Input);
DCI.AddToWorklist(Op.getNode());
Op = DAG.getNode(Shuffle, DL, ShuffleVT, Op, Op);
DCI.AddToWorklist(Op.getNode());
DCI.CombineTo(Root.getNode(), DAG.getNode(ISD::BITCAST, DL, RootVT, Op),
/*AddTo*/ true);
return true;
}
}
// We always canonicalize the 8 x i16 and 16 x i8 shuffles into their UNPCK

View File

@ -119,6 +119,20 @@ define <4 x float> @shuffle_v4f32_3210(<4 x float> %a, <4 x float> %b) {
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_0011(<4 x float> %a, <4 x float> %b) {
; ALL-LABEL: @shuffle_v4f32_0011
; ALL: unpcklps {{.*}} # xmm0 = xmm0[0,0,1,1]
; ALL-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 1, i32 1>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_2233(<4 x float> %a, <4 x float> %b) {
; ALL-LABEL: @shuffle_v4f32_2233
; ALL: unpckhps {{.*}} # xmm0 = xmm0[2,2,3,3]
; ALL-NEXT: retq
%shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 2, i32 2, i32 3, i32 3>
ret <4 x float> %shuffle
}
define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) {
; SSE2-LABEL: @shuffle_v4f32_0022
; SSE2: shufps {{.*}} # xmm0 = xmm0[0,0,2,2]