[X86] SRL non-LSB extracts when folding to truncating broadcasts.
Now that we recognize this, we can support it instead of bailing out.
That is, we can fold:
(v8i16 (shufflevector
(v8i16 (bitcast (v4i32 (build_vector X, Y, ...)))),
<1,1,...,1>))
into:
(v8i16 (vbroadcast (i16 (trunc (srl Y, 16)))))
llvm-svn: 252362
This commit is contained in:
parent
68614a36d1
commit
05a0514b12
|
|
@ -7896,15 +7896,20 @@ static SDValue lowerVectorShuffleAsTruncBroadcast(SDLoc DL, MVT VT, SDValue V0,
|
||||||
const unsigned Scale = V0EltSize / EltSize;
|
const unsigned Scale = V0EltSize / EltSize;
|
||||||
const unsigned V0BroadcastIdx = BroadcastIdx / Scale;
|
const unsigned V0BroadcastIdx = BroadcastIdx / Scale;
|
||||||
|
|
||||||
// If we're extracting non-least-significant bits, this isn't a truncation.
|
|
||||||
if (BroadcastIdx % Scale)
|
|
||||||
return SDValue();
|
|
||||||
|
|
||||||
if ((V0Opc != ISD::SCALAR_TO_VECTOR || V0BroadcastIdx != 0) &&
|
if ((V0Opc != ISD::SCALAR_TO_VECTOR || V0BroadcastIdx != 0) &&
|
||||||
V0Opc != ISD::BUILD_VECTOR)
|
V0Opc != ISD::BUILD_VECTOR)
|
||||||
return SDValue();
|
return SDValue();
|
||||||
|
|
||||||
SDValue Scalar = V0.getOperand(V0BroadcastIdx);
|
SDValue Scalar = V0.getOperand(V0BroadcastIdx);
|
||||||
|
|
||||||
|
// If we're extracting non-least-significant bits, shift so we can truncate.
|
||||||
|
// Hopefully, we can fold away the trunc/srl/load into the broadcast.
|
||||||
|
// Even if we can't (and !isShuffleFoldableLoad(Scalar)), prefer
|
||||||
|
// vpbroadcast+vmovd+shr to vpshufb(m)+vmovd.
|
||||||
|
if (const int OffsetIdx = BroadcastIdx % Scale)
|
||||||
|
Scalar = DAG.getNode(ISD::SRL, DL, Scalar.getValueType(), Scalar,
|
||||||
|
DAG.getConstant(OffsetIdx * EltSize, DL, Scalar.getValueType()));
|
||||||
|
|
||||||
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
|
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
|
||||||
DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar));
|
DAG.getNode(ISD::TRUNCATE, DL, EltVT, Scalar));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1482,11 +1482,16 @@ define <16 x i8> @insert_dup_elt1_mem_v16i8_i32(i32* %ptr) {
|
||||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: insert_dup_elt1_mem_v16i8_i32:
|
; AVX1-LABEL: insert_dup_elt1_mem_v16i8_i32:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: insert_dup_elt1_mem_v16i8_i32:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: vpbroadcastb 1(%rdi), %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
%tmp = load i32, i32* %ptr, align 4
|
%tmp = load i32, i32* %ptr, align 4
|
||||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
||||||
%tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
|
%tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
|
||||||
|
|
@ -1516,11 +1521,16 @@ define <16 x i8> @insert_dup_elt2_mem_v16i8_i32(i32* %ptr) {
|
||||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
|
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: insert_dup_elt2_mem_v16i8_i32:
|
; AVX1-LABEL: insert_dup_elt2_mem_v16i8_i32:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
|
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: insert_dup_elt2_mem_v16i8_i32:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: vpbroadcastb 2(%rdi), %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
%tmp = load i32, i32* %ptr, align 4
|
%tmp = load i32, i32* %ptr, align 4
|
||||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
||||||
%tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
|
%tmp2 = bitcast <4 x i32> %tmp1 to <16 x i8>
|
||||||
|
|
@ -1553,12 +1563,20 @@ define <16 x i8> @insert_dup_elt1_mem_v16i8_sext_i8(i8* %ptr) {
|
||||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
|
; AVX1-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: movsbl (%rdi), %eax
|
; AVX1-NEXT: movsbl (%rdi), %eax
|
||||||
; AVX-NEXT: vmovd %eax, %xmm0
|
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: insert_dup_elt1_mem_v16i8_sext_i8:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: movsbl (%rdi), %eax
|
||||||
|
; AVX2-NEXT: shrl $8, %eax
|
||||||
|
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||||
|
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
%tmp = load i8, i8* %ptr, align 1
|
%tmp = load i8, i8* %ptr, align 1
|
||||||
%tmp1 = sext i8 %tmp to i32
|
%tmp1 = sext i8 %tmp to i32
|
||||||
%tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
|
%tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
|
||||||
|
|
@ -1592,12 +1610,20 @@ define <16 x i8> @insert_dup_elt2_mem_v16i8_sext_i8(i8* %ptr) {
|
||||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
|
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
|
; AVX1-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: movsbl (%rdi), %eax
|
; AVX1-NEXT: movsbl (%rdi), %eax
|
||||||
; AVX-NEXT: vmovd %eax, %xmm0
|
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
|
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: insert_dup_elt2_mem_v16i8_sext_i8:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: movsbl (%rdi), %eax
|
||||||
|
; AVX2-NEXT: shrl $16, %eax
|
||||||
|
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||||
|
; AVX2-NEXT: vpbroadcastb %xmm0, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
%tmp = load i8, i8* %ptr, align 1
|
%tmp = load i8, i8* %ptr, align 1
|
||||||
%tmp1 = sext i8 %tmp to i32
|
%tmp1 = sext i8 %tmp to i32
|
||||||
%tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
|
%tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
|
||||||
|
|
|
||||||
|
|
@ -2250,11 +2250,18 @@ define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
|
||||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: insert_dup_elt1_mem_v8i16_i32:
|
; AVX1-LABEL: insert_dup_elt1_mem_v8i16_i32:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: insert_dup_elt1_mem_v8i16_i32:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: movzwl 2(%rdi), %eax
|
||||||
|
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||||
|
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
%tmp = load i32, i32* %ptr, align 4
|
%tmp = load i32, i32* %ptr, align 4
|
||||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
||||||
%tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
|
%tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
|
||||||
|
|
@ -2283,11 +2290,18 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
|
||||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: insert_dup_elt3_mem_v8i16_i32:
|
; AVX1-LABEL: insert_dup_elt3_mem_v8i16_i32:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
||||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: insert_dup_elt3_mem_v8i16_i32:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: movzwl 2(%rdi), %eax
|
||||||
|
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||||
|
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
%tmp = load i32, i32* %ptr, align 4
|
%tmp = load i32, i32* %ptr, align 4
|
||||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
|
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
|
||||||
%tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
|
%tmp2 = bitcast <4 x i32> %tmp1 to <8 x i16>
|
||||||
|
|
@ -2319,12 +2333,20 @@ define <8 x i16> @insert_dup_elt1_mem_v8i16_sext_i16(i16* %ptr) {
|
||||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
|
; AVX1-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: movswl (%rdi), %eax
|
; AVX1-NEXT: movswl (%rdi), %eax
|
||||||
; AVX-NEXT: vmovd %eax, %xmm0
|
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: insert_dup_elt1_mem_v8i16_sext_i16:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: movswl (%rdi), %eax
|
||||||
|
; AVX2-NEXT: shrl $16, %eax
|
||||||
|
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||||
|
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
%tmp = load i16, i16* %ptr, align 2
|
%tmp = load i16, i16* %ptr, align 2
|
||||||
%tmp1 = sext i16 %tmp to i32
|
%tmp1 = sext i16 %tmp to i32
|
||||||
%tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
|
%tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 0
|
||||||
|
|
@ -2357,12 +2379,20 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_sext_i16(i16* %ptr) {
|
||||||
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
||||||
; SSE41-NEXT: retq
|
; SSE41-NEXT: retq
|
||||||
;
|
;
|
||||||
; AVX-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
|
; AVX1-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
|
||||||
; AVX: # BB#0:
|
; AVX1: # BB#0:
|
||||||
; AVX-NEXT: movswl (%rdi), %eax
|
; AVX1-NEXT: movswl (%rdi), %eax
|
||||||
; AVX-NEXT: vmovd %eax, %xmm0
|
; AVX1-NEXT: vmovd %eax, %xmm0
|
||||||
; AVX-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
||||||
; AVX-NEXT: retq
|
; AVX1-NEXT: retq
|
||||||
|
;
|
||||||
|
; AVX2-LABEL: insert_dup_elt3_mem_v8i16_sext_i16:
|
||||||
|
; AVX2: # BB#0:
|
||||||
|
; AVX2-NEXT: movswl (%rdi), %eax
|
||||||
|
; AVX2-NEXT: shrl $16, %eax
|
||||||
|
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||||
|
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||||
|
; AVX2-NEXT: retq
|
||||||
%tmp = load i16, i16* %ptr, align 2
|
%tmp = load i16, i16* %ptr, align 2
|
||||||
%tmp1 = sext i16 %tmp to i32
|
%tmp1 = sext i16 %tmp to i32
|
||||||
%tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1
|
%tmp2 = insertelement <4 x i32> zeroinitializer, i32 %tmp1, i32 1
|
||||||
|
|
|
||||||
|
|
@ -3342,9 +3342,9 @@ define <16 x i16> @insert_dup_elt1_mem_v16i16_i32(i32* %ptr) #0 {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: insert_dup_elt1_mem_v16i16_i32:
|
; AVX2-LABEL: insert_dup_elt1_mem_v16i16_i32:
|
||||||
; AVX2: # BB#0:
|
; AVX2: # BB#0:
|
||||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
; AVX2-NEXT: movzwl 2(%rdi), %eax
|
||||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
%tmp = load i32, i32* %ptr, align 4
|
%tmp = load i32, i32* %ptr, align 4
|
||||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
||||||
|
|
@ -3363,9 +3363,9 @@ define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: insert_dup_elt3_mem_v16i16_i32:
|
; AVX2-LABEL: insert_dup_elt3_mem_v16i16_i32:
|
||||||
; AVX2: # BB#0:
|
; AVX2: # BB#0:
|
||||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
; AVX2-NEXT: movzwl 2(%rdi), %eax
|
||||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[2,3,2,3,2,3,2,3,2,3,2,3,2,3,2,3]
|
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
%tmp = load i32, i32* %ptr, align 4
|
%tmp = load i32, i32* %ptr, align 4
|
||||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
|
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
|
||||||
|
|
|
||||||
|
|
@ -2029,9 +2029,7 @@ define <32 x i8> @insert_dup_elt1_mem_v32i8_i32(i32* %ptr) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: insert_dup_elt1_mem_v32i8_i32:
|
; AVX2-LABEL: insert_dup_elt1_mem_v32i8_i32:
|
||||||
; AVX2: # BB#0:
|
; AVX2: # BB#0:
|
||||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
; AVX2-NEXT: vpbroadcastb 1(%rdi), %ymm0
|
||||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
|
||||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
%tmp = load i32, i32* %ptr, align 4
|
%tmp = load i32, i32* %ptr, align 4
|
||||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
||||||
|
|
@ -2050,9 +2048,7 @@ define <32 x i8> @insert_dup_elt3_mem_v32i8_i32(i32* %ptr) {
|
||||||
;
|
;
|
||||||
; AVX2-LABEL: insert_dup_elt3_mem_v32i8_i32:
|
; AVX2-LABEL: insert_dup_elt3_mem_v32i8_i32:
|
||||||
; AVX2: # BB#0:
|
; AVX2: # BB#0:
|
||||||
; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
|
; AVX2-NEXT: vpbroadcastb 3(%rdi), %ymm0
|
||||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3]
|
|
||||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
%tmp = load i32, i32* %ptr, align 4
|
%tmp = load i32, i32* %ptr, align 4
|
||||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
||||||
|
|
@ -2073,9 +2069,9 @@ define <32 x i8> @insert_dup_elt1_mem_v32i8_sext_i8(i8* %ptr) {
|
||||||
; AVX2-LABEL: insert_dup_elt1_mem_v32i8_sext_i8:
|
; AVX2-LABEL: insert_dup_elt1_mem_v32i8_sext_i8:
|
||||||
; AVX2: # BB#0:
|
; AVX2: # BB#0:
|
||||||
; AVX2-NEXT: movsbl (%rdi), %eax
|
; AVX2-NEXT: movsbl (%rdi), %eax
|
||||||
|
; AVX2-NEXT: shrl $8, %eax
|
||||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||||
; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1]
|
; AVX2-NEXT: vpbroadcastb %xmm0, %ymm0
|
||||||
; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm0, %ymm0
|
|
||||||
; AVX2-NEXT: retq
|
; AVX2-NEXT: retq
|
||||||
%tmp = load i8, i8* %ptr, align 1
|
%tmp = load i8, i8* %ptr, align 1
|
||||||
%tmp1 = sext i8 %tmp to i32
|
%tmp1 = sext i8 %tmp to i32
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue