[x86] Significantly improve the ability of the new vector shuffle
lowering to match VZEXT_MOVL patterns. I hadn't realized that these had sufficient pattern smarts in the backend to lower zext-ing from the low element of a vector without it being a scalar_to_vector node. They do, and this is how to match a bunch of patterns for movq, movss, etc. There is a weird propensity to end up using pshufd to place the element afterward even though it means domain crossing (or rather, to use xorps+movss to zext the element rather than movq) but that's an orthogonal problem with VZEXT_MOVL that someone should probably look at. llvm-svn: 218977
This commit is contained in:
parent
080cab91e1
commit
971a560cb8
|
|
@ -7801,28 +7801,32 @@ static SDValue lowerVectorShuffleAsElementInsertion(
|
||||||
return SDValue(); // Not inserting into a zero vector.
|
return SDValue(); // Not inserting into a zero vector.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
MVT ExtVT = VT;
|
||||||
|
MVT EltVT = VT.getVectorElementType();
|
||||||
|
|
||||||
// Check for a single input from a SCALAR_TO_VECTOR node.
|
// Check for a single input from a SCALAR_TO_VECTOR node.
|
||||||
// FIXME: All of this should be canonicalized into INSERT_VECTOR_ELT and
|
// FIXME: All of this should be canonicalized into INSERT_VECTOR_ELT and
|
||||||
// all the smarts here sunk into that routine. However, the current
|
// all the smarts here sunk into that routine. However, the current
|
||||||
// lowering of BUILD_VECTOR makes that nearly impossible until the old
|
// lowering of BUILD_VECTOR makes that nearly impossible until the old
|
||||||
// vector shuffle lowering is dead.
|
// vector shuffle lowering is dead.
|
||||||
SDValue V2S =
|
if (SDValue V2S = getScalarValueForVectorElement(
|
||||||
getScalarValueForVectorElement(V2, Mask[V2Index] - Mask.size(), DAG);
|
V2, Mask[V2Index] - Mask.size(), DAG)) {
|
||||||
if (!V2S)
|
// We need to zext the scalar if it is smaller than an i32.
|
||||||
|
V2S = DAG.getNode(ISD::BITCAST, DL, EltVT, V2S);
|
||||||
|
if (EltVT == MVT::i8 || EltVT == MVT::i16) {
|
||||||
|
// Zero-extend directly to i32.
|
||||||
|
ExtVT = MVT::v4i32;
|
||||||
|
V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S);
|
||||||
|
}
|
||||||
|
V2 = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S);
|
||||||
|
} else if (Mask[V2Index] != (int)Mask.size() || EltVT == MVT::i8 ||
|
||||||
|
EltVT == MVT::i16) {
|
||||||
|
// Either not inserting from the low element of the input or the input
|
||||||
|
// element size is too small to use VZEXT_MOVL to clear the high bits.
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
// First, we need to zext the scalar if it is smaller than an i32.
|
|
||||||
MVT ExtVT = VT;
|
|
||||||
MVT EltVT = VT.getVectorElementType();
|
|
||||||
V2S = DAG.getNode(ISD::BITCAST, DL, EltVT, V2S);
|
|
||||||
if (EltVT == MVT::i8 || EltVT == MVT::i16) {
|
|
||||||
// Zero-extend directly to i32.
|
|
||||||
ExtVT = MVT::v4i32;
|
|
||||||
V2S = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, V2S);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT,
|
V2 = DAG.getNode(X86ISD::VZEXT_MOVL, DL, ExtVT, V2);
|
||||||
DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, ExtVT, V2S));
|
|
||||||
if (ExtVT != VT)
|
if (ExtVT != VT)
|
||||||
V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
|
V2 = DAG.getNode(ISD::BITCAST, DL, VT, V2);
|
||||||
|
|
||||||
|
|
@ -7998,12 +8002,6 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||||
getV4X86ShuffleImm8ForMask(WidenedMask, DAG)));
|
getV4X86ShuffleImm8ForMask(WidenedMask, DAG)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use dedicated unpack instructions for masks that match their pattern.
|
|
||||||
if (isShuffleEquivalent(Mask, 0, 2))
|
|
||||||
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
|
|
||||||
if (isShuffleEquivalent(Mask, 1, 3))
|
|
||||||
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
|
|
||||||
|
|
||||||
// If we have a single input from V2 insert that into V1 if we can do so
|
// If we have a single input from V2 insert that into V1 if we can do so
|
||||||
// cheaply.
|
// cheaply.
|
||||||
if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1)
|
if ((Mask[0] >= 2) + (Mask[1] >= 2) == 1)
|
||||||
|
|
@ -8011,6 +8009,12 @@ static SDValue lowerV2I64VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||||
MVT::v2i64, DL, V1, V2, Mask, Subtarget, DAG))
|
MVT::v2i64, DL, V1, V2, Mask, Subtarget, DAG))
|
||||||
return Insertion;
|
return Insertion;
|
||||||
|
|
||||||
|
// Use dedicated unpack instructions for masks that match their pattern.
|
||||||
|
if (isShuffleEquivalent(Mask, 0, 2))
|
||||||
|
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v2i64, V1, V2);
|
||||||
|
if (isShuffleEquivalent(Mask, 1, 3))
|
||||||
|
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v2i64, V1, V2);
|
||||||
|
|
||||||
if (Subtarget->hasSSE41())
|
if (Subtarget->hasSSE41())
|
||||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
|
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v2i64, V1, V2, Mask,
|
||||||
Subtarget, DAG))
|
Subtarget, DAG))
|
||||||
|
|
@ -8275,18 +8279,18 @@ static SDValue lowerV4I32VectorShuffle(SDValue Op, SDValue V1, SDValue V2,
|
||||||
getV4X86ShuffleImm8ForMask(Mask, DAG));
|
getV4X86ShuffleImm8ForMask(Mask, DAG));
|
||||||
}
|
}
|
||||||
|
|
||||||
// Use dedicated unpack instructions for masks that match their pattern.
|
|
||||||
if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
|
|
||||||
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
|
|
||||||
if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
|
|
||||||
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
|
|
||||||
|
|
||||||
// There are special ways we can lower some single-element blends.
|
// There are special ways we can lower some single-element blends.
|
||||||
if (NumV2Elements == 1)
|
if (NumV2Elements == 1)
|
||||||
if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v4i32, DL, V1, V2,
|
if (SDValue V = lowerVectorShuffleAsElementInsertion(MVT::v4i32, DL, V1, V2,
|
||||||
Mask, Subtarget, DAG))
|
Mask, Subtarget, DAG))
|
||||||
return V;
|
return V;
|
||||||
|
|
||||||
|
// Use dedicated unpack instructions for masks that match their pattern.
|
||||||
|
if (isShuffleEquivalent(Mask, 0, 4, 1, 5))
|
||||||
|
return DAG.getNode(X86ISD::UNPCKL, DL, MVT::v4i32, V1, V2);
|
||||||
|
if (isShuffleEquivalent(Mask, 2, 6, 3, 7))
|
||||||
|
return DAG.getNode(X86ISD::UNPCKH, DL, MVT::v4i32, V1, V2);
|
||||||
|
|
||||||
if (Subtarget->hasSSE41())
|
if (Subtarget->hasSSE41())
|
||||||
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
|
if (SDValue Blend = lowerVectorShuffleAsBlend(DL, MVT::v4i32, V1, V2, Mask,
|
||||||
Subtarget, DAG))
|
Subtarget, DAG))
|
||||||
|
|
|
||||||
|
|
@ -652,41 +652,15 @@ define <2 x i64> @shuffle_v2i64_31_copy(<2 x i64> %nonce, <2 x i64> %a, <2 x i64
|
||||||
}
|
}
|
||||||
|
|
||||||
define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
|
define <2 x i64> @shuffle_v2i64_0z(<2 x i64> %a) {
|
||||||
; SSE2-LABEL: shuffle_v2i64_0z:
|
; SSE-LABEL: shuffle_v2i64_0z:
|
||||||
; SSE2: # BB#0:
|
; SSE: # BB#0:
|
||||||
; SSE2-NEXT: xorpd %xmm1, %xmm1
|
; SSE-NEXT: movq %xmm0, %xmm0
|
||||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
; SSE-NEXT: retq
|
||||||
; SSE2-NEXT: retq
|
|
||||||
;
|
;
|
||||||
; SSE3-LABEL: shuffle_v2i64_0z:
|
; AVX-LABEL: shuffle_v2i64_0z:
|
||||||
; SSE3: # BB#0:
|
; AVX: # BB#0:
|
||||||
; SSE3-NEXT: xorpd %xmm1, %xmm1
|
; AVX-NEXT: vmovq %xmm0, %xmm0
|
||||||
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
; AVX-NEXT: retq
|
||||||
; SSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSSE3-LABEL: shuffle_v2i64_0z:
|
|
||||||
; SSSE3: # BB#0:
|
|
||||||
; SSSE3-NEXT: xorpd %xmm1, %xmm1
|
|
||||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
|
||||||
; SSSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSE41-LABEL: shuffle_v2i64_0z:
|
|
||||||
; SSE41: # BB#0:
|
|
||||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
|
||||||
; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
|
||||||
; SSE41-NEXT: retq
|
|
||||||
;
|
|
||||||
; AVX1-LABEL: shuffle_v2i64_0z:
|
|
||||||
; AVX1: # BB#0:
|
|
||||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
||||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5,6,7]
|
|
||||||
; AVX1-NEXT: retq
|
|
||||||
;
|
|
||||||
; AVX2-LABEL: shuffle_v2i64_0z:
|
|
||||||
; AVX2: # BB#0:
|
|
||||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
||||||
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
|
|
||||||
; AVX2-NEXT: retq
|
|
||||||
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
|
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 3>
|
||||||
ret <2 x i64> %shuffle
|
ret <2 x i64> %shuffle
|
||||||
}
|
}
|
||||||
|
|
@ -710,15 +684,14 @@ define <2 x i64> @shuffle_v2i64_1z(<2 x i64> %a) {
|
||||||
define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
|
define <2 x i64> @shuffle_v2i64_z0(<2 x i64> %a) {
|
||||||
; SSE-LABEL: shuffle_v2i64_z0:
|
; SSE-LABEL: shuffle_v2i64_z0:
|
||||||
; SSE: # BB#0:
|
; SSE: # BB#0:
|
||||||
; SSE-NEXT: pxor %xmm1, %xmm1
|
; SSE-NEXT: movq %xmm0, %xmm0
|
||||||
; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm0[0]
|
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||||
; SSE-NEXT: movdqa %xmm1, %xmm0
|
|
||||||
; SSE-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: shuffle_v2i64_z0:
|
; AVX-LABEL: shuffle_v2i64_z0:
|
||||||
; AVX: # BB#0:
|
; AVX: # BB#0:
|
||||||
; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
; AVX-NEXT: vmovq %xmm0, %xmm0
|
||||||
; AVX-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0]
|
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1]
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
|
%shuffle = shufflevector <2 x i64> %a, <2 x i64> zeroinitializer, <2 x i32> <i32 2, i32 0>
|
||||||
ret <2 x i64> %shuffle
|
ret <2 x i64> %shuffle
|
||||||
|
|
@ -769,34 +742,14 @@ define <2 x i64> @shuffle_v2i64_z1(<2 x i64> %a) {
|
||||||
}
|
}
|
||||||
|
|
||||||
define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
|
define <2 x double> @shuffle_v2f64_0z(<2 x double> %a) {
|
||||||
; SSE2-LABEL: shuffle_v2f64_0z:
|
; SSE-LABEL: shuffle_v2f64_0z:
|
||||||
; SSE2: # BB#0:
|
; SSE: # BB#0:
|
||||||
; SSE2-NEXT: xorpd %xmm1, %xmm1
|
; SSE-NEXT: movq %xmm0, %xmm0
|
||||||
; SSE2-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
; SSE-NEXT: retq
|
||||||
; SSE2-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSE3-LABEL: shuffle_v2f64_0z:
|
|
||||||
; SSE3: # BB#0:
|
|
||||||
; SSE3-NEXT: xorpd %xmm1, %xmm1
|
|
||||||
; SSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
|
||||||
; SSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSSE3-LABEL: shuffle_v2f64_0z:
|
|
||||||
; SSSE3: # BB#0:
|
|
||||||
; SSSE3-NEXT: xorpd %xmm1, %xmm1
|
|
||||||
; SSSE3-NEXT: shufpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
|
||||||
; SSSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSE41-LABEL: shuffle_v2f64_0z:
|
|
||||||
; SSE41: # BB#0:
|
|
||||||
; SSE41-NEXT: xorpd %xmm1, %xmm1
|
|
||||||
; SSE41-NEXT: blendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
|
||||||
; SSE41-NEXT: retq
|
|
||||||
;
|
;
|
||||||
; AVX-LABEL: shuffle_v2f64_0z:
|
; AVX-LABEL: shuffle_v2f64_0z:
|
||||||
; AVX: # BB#0:
|
; AVX: # BB#0:
|
||||||
; AVX-NEXT: vxorpd %xmm1, %xmm1, %xmm1
|
; AVX-NEXT: vmovq %xmm0, %xmm0
|
||||||
; AVX-NEXT: vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
|
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
|
%shuffle = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 0, i32 3>
|
||||||
ret <2 x double> %shuffle
|
ret <2 x double> %shuffle
|
||||||
|
|
|
||||||
|
|
@ -438,38 +438,17 @@ define <4 x i32> @shuffle_v4i32_4015(<4 x i32> %a, <4 x i32> %b) {
|
||||||
}
|
}
|
||||||
|
|
||||||
define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
|
define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
|
||||||
; SSE2-LABEL: shuffle_v4f32_4zzz:
|
; SSE-LABEL: shuffle_v4f32_4zzz:
|
||||||
; SSE2: # BB#0:
|
; SSE: # BB#0:
|
||||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
; SSE-NEXT: xorps %xmm1, %xmm1
|
||||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0]
|
; SSE-NEXT: movss %xmm0, %xmm1
|
||||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
|
; SSE-NEXT: movaps %xmm1, %xmm0
|
||||||
; SSE2-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
|
||||||
; SSE3-LABEL: shuffle_v4f32_4zzz:
|
|
||||||
; SSE3: # BB#0:
|
|
||||||
; SSE3-NEXT: xorps %xmm1, %xmm1
|
|
||||||
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0]
|
|
||||||
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
|
|
||||||
; SSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSSE3-LABEL: shuffle_v4f32_4zzz:
|
|
||||||
; SSSE3: # BB#0:
|
|
||||||
; SSSE3-NEXT: xorps %xmm1, %xmm1
|
|
||||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0]
|
|
||||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
|
|
||||||
; SSSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSE41-LABEL: shuffle_v4f32_4zzz:
|
|
||||||
; SSE41: # BB#0:
|
|
||||||
; SSE41-NEXT: xorps %xmm1, %xmm1
|
|
||||||
; SSE41-NEXT: blendps {{.*#+}} xmm1 = xmm0[0],xmm1[1,2,3]
|
|
||||||
; SSE41-NEXT: movaps %xmm1, %xmm0
|
|
||||||
; SSE41-NEXT: retq
|
|
||||||
;
|
;
|
||||||
; AVX-LABEL: shuffle_v4f32_4zzz:
|
; AVX-LABEL: shuffle_v4f32_4zzz:
|
||||||
; AVX: # BB#0:
|
; AVX: # BB#0:
|
||||||
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||||
; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
; AVX-NEXT: vmovss %xmm0, %xmm1, %xmm0
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
|
%shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
|
||||||
ret <4 x float> %shuffle
|
ret <4 x float> %shuffle
|
||||||
|
|
@ -660,152 +639,71 @@ define <4 x float> @shuffle_v4f32_z6zz(<4 x float> %a) {
|
||||||
}
|
}
|
||||||
|
|
||||||
define <4 x i32> @shuffle_v4i32_4zzz(<4 x i32> %a) {
|
define <4 x i32> @shuffle_v4i32_4zzz(<4 x i32> %a) {
|
||||||
; SSE2-LABEL: shuffle_v4i32_4zzz:
|
; SSE-LABEL: shuffle_v4i32_4zzz:
|
||||||
; SSE2: # BB#0:
|
; SSE: # BB#0:
|
||||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
; SSE-NEXT: xorps %xmm1, %xmm1
|
||||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0]
|
; SSE-NEXT: movss %xmm0, %xmm1
|
||||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
|
; SSE-NEXT: movaps %xmm1, %xmm0
|
||||||
; SSE2-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
;
|
||||||
; SSE3-LABEL: shuffle_v4i32_4zzz:
|
; AVX-LABEL: shuffle_v4i32_4zzz:
|
||||||
; SSE3: # BB#0:
|
; AVX: # BB#0:
|
||||||
; SSE3-NEXT: xorps %xmm1, %xmm1
|
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||||
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0]
|
; AVX-NEXT: vmovss %xmm0, %xmm1, %xmm0
|
||||||
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
|
; AVX-NEXT: retq
|
||||||
; SSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSSE3-LABEL: shuffle_v4i32_4zzz:
|
|
||||||
; SSSE3: # BB#0:
|
|
||||||
; SSSE3-NEXT: xorps %xmm1, %xmm1
|
|
||||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0]
|
|
||||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
|
|
||||||
; SSSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSE41-LABEL: shuffle_v4i32_4zzz:
|
|
||||||
; SSE41: # BB#0:
|
|
||||||
; SSE41-NEXT: pxor %xmm1, %xmm1
|
|
||||||
; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm0[0,1],xmm1[2,3,4,5,6,7]
|
|
||||||
; SSE41-NEXT: movdqa %xmm1, %xmm0
|
|
||||||
; SSE41-NEXT: retq
|
|
||||||
;
|
|
||||||
; AVX1-LABEL: shuffle_v4i32_4zzz:
|
|
||||||
; AVX1: # BB#0:
|
|
||||||
; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
||||||
; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3,4,5,6,7]
|
|
||||||
; AVX1-NEXT: retq
|
|
||||||
;
|
|
||||||
; AVX2-LABEL: shuffle_v4i32_4zzz:
|
|
||||||
; AVX2: # BB#0:
|
|
||||||
; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
|
||||||
; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
|
|
||||||
; AVX2-NEXT: retq
|
|
||||||
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
|
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
|
||||||
ret <4 x i32> %shuffle
|
ret <4 x i32> %shuffle
|
||||||
}
|
}
|
||||||
|
|
||||||
define <4 x i32> @shuffle_v4i32_z4zz(<4 x i32> %a) {
|
define <4 x i32> @shuffle_v4i32_z4zz(<4 x i32> %a) {
|
||||||
; SSE2-LABEL: shuffle_v4i32_z4zz:
|
; SSE-LABEL: shuffle_v4i32_z4zz:
|
||||||
; SSE2: # BB#0:
|
; SSE: # BB#0:
|
||||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
; SSE-NEXT: xorps %xmm1, %xmm1
|
||||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
|
; SSE-NEXT: movss %xmm0, %xmm1
|
||||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
|
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,0,1,1]
|
||||||
; SSE2-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
|
||||||
; SSE3-LABEL: shuffle_v4i32_z4zz:
|
|
||||||
; SSE3: # BB#0:
|
|
||||||
; SSE3-NEXT: xorps %xmm1, %xmm1
|
|
||||||
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
|
|
||||||
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
|
|
||||||
; SSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSSE3-LABEL: shuffle_v4i32_z4zz:
|
|
||||||
; SSSE3: # BB#0:
|
|
||||||
; SSSE3-NEXT: xorps %xmm1, %xmm1
|
|
||||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[2,0]
|
|
||||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[2,0],xmm1[3,0]
|
|
||||||
; SSSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSE41-LABEL: shuffle_v4i32_z4zz:
|
|
||||||
; SSE41: # BB#0:
|
|
||||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
|
|
||||||
; SSE41-NEXT: retq
|
|
||||||
;
|
;
|
||||||
; AVX-LABEL: shuffle_v4i32_z4zz:
|
; AVX-LABEL: shuffle_v4i32_z4zz:
|
||||||
; AVX: # BB#0:
|
; AVX: # BB#0:
|
||||||
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,xmm0[0],zero,zero
|
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||||
|
; AVX-NEXT: vmovss %xmm0, %xmm1, %xmm0
|
||||||
|
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,0,1,1]
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
|
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 2, i32 4, i32 3, i32 0>
|
||||||
ret <4 x i32> %shuffle
|
ret <4 x i32> %shuffle
|
||||||
}
|
}
|
||||||
|
|
||||||
define <4 x i32> @shuffle_v4i32_zz4z(<4 x i32> %a) {
|
define <4 x i32> @shuffle_v4i32_zz4z(<4 x i32> %a) {
|
||||||
; SSE2-LABEL: shuffle_v4i32_zz4z:
|
; SSE-LABEL: shuffle_v4i32_zz4z:
|
||||||
; SSE2: # BB#0:
|
; SSE: # BB#0:
|
||||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
; SSE-NEXT: xorps %xmm1, %xmm1
|
||||||
; SSE2-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
|
; SSE-NEXT: movss %xmm0, %xmm1
|
||||||
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
|
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,0,1]
|
||||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
; SSE-NEXT: retq
|
||||||
; SSE2-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSE3-LABEL: shuffle_v4i32_zz4z:
|
|
||||||
; SSE3: # BB#0:
|
|
||||||
; SSE3-NEXT: xorps %xmm1, %xmm1
|
|
||||||
; SSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
|
|
||||||
; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
|
|
||||||
; SSE3-NEXT: movaps %xmm1, %xmm0
|
|
||||||
; SSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSSE3-LABEL: shuffle_v4i32_zz4z:
|
|
||||||
; SSSE3: # BB#0:
|
|
||||||
; SSSE3-NEXT: xorps %xmm1, %xmm1
|
|
||||||
; SSSE3-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
|
|
||||||
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,0],xmm0[0,2]
|
|
||||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
|
||||||
; SSSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSE41-LABEL: shuffle_v4i32_zz4z:
|
|
||||||
; SSE41: # BB#0:
|
|
||||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
|
|
||||||
; SSE41-NEXT: retq
|
|
||||||
;
|
;
|
||||||
; AVX-LABEL: shuffle_v4i32_zz4z:
|
; AVX-LABEL: shuffle_v4i32_zz4z:
|
||||||
; AVX: # BB#0:
|
; AVX: # BB#0:
|
||||||
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,xmm0[0],zero
|
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||||
|
; AVX-NEXT: vmovss %xmm0, %xmm1, %xmm0
|
||||||
|
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,0,1]
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
|
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 0, i32 4, i32 0>
|
||||||
ret <4 x i32> %shuffle
|
ret <4 x i32> %shuffle
|
||||||
}
|
}
|
||||||
|
|
||||||
define <4 x i32> @shuffle_v4i32_zuu4(<4 x i32> %a) {
|
define <4 x i32> @shuffle_v4i32_zuu4(<4 x i32> %a) {
|
||||||
; SSE2-LABEL: shuffle_v4i32_zuu4:
|
; SSE-LABEL: shuffle_v4i32_zuu4:
|
||||||
; SSE2: # BB#0:
|
; SSE: # BB#0:
|
||||||
; SSE2-NEXT: xorps %xmm1, %xmm1
|
; SSE-NEXT: xorps %xmm1, %xmm1
|
||||||
; SSE2-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
|
; SSE-NEXT: movss %xmm0, %xmm1
|
||||||
; SSE2-NEXT: movaps %xmm1, %xmm0
|
; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm1[1,1,1,0]
|
||||||
; SSE2-NEXT: retq
|
; SSE-NEXT: retq
|
||||||
;
|
|
||||||
; SSE3-LABEL: shuffle_v4i32_zuu4:
|
|
||||||
; SSE3: # BB#0:
|
|
||||||
; SSE3-NEXT: xorps %xmm1, %xmm1
|
|
||||||
; SSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
|
|
||||||
; SSE3-NEXT: movaps %xmm1, %xmm0
|
|
||||||
; SSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSSE3-LABEL: shuffle_v4i32_zuu4:
|
|
||||||
; SSSE3: # BB#0:
|
|
||||||
; SSSE3-NEXT: xorps %xmm1, %xmm1
|
|
||||||
; SSSE3-NEXT: shufps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2,0]
|
|
||||||
; SSSE3-NEXT: movaps %xmm1, %xmm0
|
|
||||||
; SSSE3-NEXT: retq
|
|
||||||
;
|
|
||||||
; SSE41-LABEL: shuffle_v4i32_zuu4:
|
|
||||||
; SSE41: # BB#0:
|
|
||||||
; SSE41-NEXT: insertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
|
|
||||||
; SSE41-NEXT: retq
|
|
||||||
;
|
;
|
||||||
; AVX-LABEL: shuffle_v4i32_zuu4:
|
; AVX-LABEL: shuffle_v4i32_zuu4:
|
||||||
; AVX: # BB#0:
|
; AVX: # BB#0:
|
||||||
; AVX-NEXT: vinsertps {{.*#+}} xmm0 = zero,zero,zero,xmm0[0]
|
; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||||
|
; AVX-NEXT: vmovss %xmm0, %xmm1, %xmm0
|
||||||
|
; AVX-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,0]
|
||||||
; AVX-NEXT: retq
|
; AVX-NEXT: retq
|
||||||
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
|
%shuffle = shufflevector <4 x i32> zeroinitializer, <4 x i32> %a, <4 x i32> <i32 0, i32 undef, i32 undef, i32 4>
|
||||||
ret <4 x i32> %shuffle
|
ret <4 x i32> %shuffle
|
||||||
|
|
|
||||||
|
|
@ -678,8 +678,8 @@ define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
|
||||||
; AVX1-LABEL: insert_reg_and_zero_v4i64:
|
; AVX1-LABEL: insert_reg_and_zero_v4i64:
|
||||||
; AVX1: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX1-NEXT: vmovq %rdi, %xmm0
|
; AVX1-NEXT: vmovq %rdi, %xmm0
|
||||||
; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
|
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
|
; AVX1-NEXT: vmovsd %xmm0, %xmm1, %xmm0
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: insert_reg_and_zero_v4i64:
|
; AVX2-LABEL: insert_reg_and_zero_v4i64:
|
||||||
|
|
@ -697,8 +697,8 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
|
||||||
; AVX1-LABEL: insert_mem_and_zero_v4i64:
|
; AVX1-LABEL: insert_mem_and_zero_v4i64:
|
||||||
; AVX1: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX1-NEXT: vmovq (%rdi), %xmm0
|
; AVX1-NEXT: vmovq (%rdi), %xmm0
|
||||||
; AVX1-NEXT: vxorpd %ymm1, %ymm1, %ymm1
|
; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||||
; AVX1-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
|
; AVX1-NEXT: vmovsd %xmm0, %xmm1, %xmm0
|
||||||
; AVX1-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: insert_mem_and_zero_v4i64:
|
; AVX2-LABEL: insert_mem_and_zero_v4i64:
|
||||||
|
|
@ -716,9 +716,8 @@ define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
|
||||||
define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
|
define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
|
||||||
; ALL-LABEL: insert_reg_and_zero_v4f64:
|
; ALL-LABEL: insert_reg_and_zero_v4f64:
|
||||||
; ALL: # BB#0:
|
; ALL: # BB#0:
|
||||||
; ALL-NEXT: # kill: XMM0<def> XMM0<kill> YMM0<def>
|
; ALL-NEXT: vxorps %xmm1, %xmm1, %xmm1
|
||||||
; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
|
; ALL-NEXT: vmovsd %xmm0, %xmm1, %xmm0
|
||||||
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
|
|
||||||
; ALL-NEXT: retq
|
; ALL-NEXT: retq
|
||||||
%v = insertelement <4 x double> undef, double %a, i32 0
|
%v = insertelement <4 x double> undef, double %a, i32 0
|
||||||
%shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
|
%shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
|
||||||
|
|
@ -729,8 +728,6 @@ define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) {
|
||||||
; ALL-LABEL: insert_mem_and_zero_v4f64:
|
; ALL-LABEL: insert_mem_and_zero_v4f64:
|
||||||
; ALL: # BB#0:
|
; ALL: # BB#0:
|
||||||
; ALL-NEXT: vmovsd (%rdi), %xmm0
|
; ALL-NEXT: vmovsd (%rdi), %xmm0
|
||||||
; ALL-NEXT: vxorpd %ymm1, %ymm1, %ymm1
|
|
||||||
; ALL-NEXT: vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
|
|
||||||
; ALL-NEXT: retq
|
; ALL-NEXT: retq
|
||||||
%a = load double* %ptr
|
%a = load double* %ptr
|
||||||
%v = insertelement <4 x double> undef, double %a, i32 0
|
%v = insertelement <4 x double> undef, double %a, i32 0
|
||||||
|
|
|
||||||
|
|
@ -95,8 +95,8 @@ define <4 x float> @shuffle_v4f32_4zzz(<4 x float> %a) {
|
||||||
; SSE1-LABEL: shuffle_v4f32_4zzz:
|
; SSE1-LABEL: shuffle_v4f32_4zzz:
|
||||||
; SSE1: # BB#0:
|
; SSE1: # BB#0:
|
||||||
; SSE1-NEXT: xorps %xmm1, %xmm1
|
; SSE1-NEXT: xorps %xmm1, %xmm1
|
||||||
; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[1,0]
|
; SSE1-NEXT: movss %xmm0, %xmm1
|
||||||
; SSE1-NEXT: shufps {{.*#+}} xmm0 = xmm0[0,2],xmm1[2,3]
|
; SSE1-NEXT: movaps %xmm1, %xmm0
|
||||||
; SSE1-NEXT: retq
|
; SSE1-NEXT: retq
|
||||||
%shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
|
%shuffle = shufflevector <4 x float> zeroinitializer, <4 x float> %a, <4 x i32> <i32 4, i32 1, i32 2, i32 3>
|
||||||
ret <4 x float> %shuffle
|
ret <4 x float> %shuffle
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue