[X86] Add a DAG combine to fix (v4i1 (bitcast (i4))) before type legalization sees the i4 and changes to load/store.

Same for i2 and v2i1.

llvm-svn: 321601
This commit is contained in:
Craig Topper 2017-12-31 08:25:50 +00:00
parent edb61167e5
commit 7f39623533
5 changed files with 83 additions and 385 deletions

View File

@ -30412,9 +30412,22 @@ static SDValue combineBitcast(SDNode *N, SelectionDAG &DAG,
// (i16 movmsk (16i8 sext (v16i1 x)))
// before the setcc result is scalarized on subtargets that don't have legal
// vxi1 types.
if (DCI.isBeforeLegalize())
if (DCI.isBeforeLegalize()) {
if (SDValue V = combineBitcastvxi1(DAG, SDValue(N, 0), Subtarget))
return V;
// If this is a bitcast between a MVT::v4i1/v2i1 and an illegal integer
// type, widen both sides to avoid a trip through memory.
if ((VT == MVT::v4i1 || VT == MVT::v2i1) && SrcVT.isScalarInteger() &&
Subtarget.hasVLX()) {
SDLoc dl(N);
N0 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i8, N0);
N0 = DAG.getBitcast(MVT::v8i1, N0);
return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, N0,
DAG.getIntPtrConstant(0, dl));
}
}
// Since MMX types are special and don't usually play with other vector types,
// it's better to handle them early to be sure we emit efficient code by
// avoiding store-load conversions.

View File

@ -871,23 +871,14 @@ define <2 x i64> @test_mm_broadcastd_epi32(<2 x i64> %a0) {
define <2 x i64> @test_mm_mask_broadcastd_epi32(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) {
; X32-LABEL: test_mm_mask_broadcastd_epi32:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpbroadcastd %xmm1, %xmm0 {%k1}
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_mask_broadcastd_epi32:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpbroadcastd %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
%trn1 = trunc i8 %a1 to i4
@ -903,23 +894,14 @@ define <2 x i64> @test_mm_mask_broadcastd_epi32(<2 x i64> %a0, i8 %a1, <2 x i64>
define <2 x i64> @test_mm_maskz_broadcastd_epi32(i8 %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_maskz_broadcastd_epi32:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z}
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_maskz_broadcastd_epi32:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpbroadcastd %xmm0, %xmm0 {%k1} {z}
; X64-NEXT: retq
%trn0 = trunc i8 %a0 to i4
@ -1007,23 +989,14 @@ define <2 x i64> @test_mm_broadcastq_epi64(<2 x i64> %a0) {
define <2 x i64> @test_mm_mask_broadcastq_epi64(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) {
; X32-LABEL: test_mm_mask_broadcastq_epi64:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $3, %al
; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpbroadcastq %xmm1, %xmm0 {%k1}
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_mask_broadcastq_epi64:
; X64: # %bb.0:
; X64-NEXT: andb $3, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpbroadcastq %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
%trn1 = trunc i8 %a1 to i2
@ -1036,23 +1009,14 @@ define <2 x i64> @test_mm_mask_broadcastq_epi64(<2 x i64> %a0, i8 %a1, <2 x i64>
define <2 x i64> @test_mm_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm_maskz_broadcastq_epi64:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $3, %al
; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z}
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_maskz_broadcastq_epi64:
; X64: # %bb.0:
; X64-NEXT: andb $3, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpbroadcastq %xmm0, %xmm0 {%k1} {z}
; X64-NEXT: retq
%trn0 = trunc i8 %a0 to i2
@ -1079,23 +1043,14 @@ define <4 x i64> @test_mm256_broadcastq_epi64(<2 x i64> %a0) {
define <4 x i64> @test_mm256_mask_broadcastq_epi64(<4 x i64> %a0, i8 %a1, <2 x i64> %a2) {
; X32-LABEL: test_mm256_mask_broadcastq_epi64:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1}
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_mask_broadcastq_epi64:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpbroadcastq %xmm1, %ymm0 {%k1}
; X64-NEXT: retq
%trn1 = trunc i8 %a1 to i4
@ -1108,23 +1063,14 @@ define <4 x i64> @test_mm256_mask_broadcastq_epi64(<4 x i64> %a0, i8 %a1, <2 x i
define <4 x i64> @test_mm256_maskz_broadcastq_epi64(i8 %a0, <2 x i64> %a1) {
; X32-LABEL: test_mm256_maskz_broadcastq_epi64:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z}
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_maskz_broadcastq_epi64:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpbroadcastq %xmm0, %ymm0 {%k1} {z}
; X64-NEXT: retq
%trn0 = trunc i8 %a0 to i4
@ -1151,23 +1097,14 @@ define <2 x double> @test_mm_broadcastsd_pd(<2 x double> %a0) {
define <2 x double> @test_mm_mask_broadcastsd_pd(<2 x double> %a0, i8 %a1, <2 x double> %a2) {
; X32-LABEL: test_mm_mask_broadcastsd_pd:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $3, %al
; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_mask_broadcastsd_pd:
; X64: # %bb.0:
; X64-NEXT: andb $3, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
; X64-NEXT: retq
%trn1 = trunc i8 %a1 to i2
@ -1180,23 +1117,14 @@ define <2 x double> @test_mm_mask_broadcastsd_pd(<2 x double> %a0, i8 %a1, <2 x
define <2 x double> @test_mm_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) {
; X32-LABEL: test_mm_maskz_broadcastsd_pd:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $3, %al
; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_maskz_broadcastsd_pd:
; X64: # %bb.0:
; X64-NEXT: andb $3, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
; X64-NEXT: retq
%trn0 = trunc i8 %a0 to i2
@ -1223,23 +1151,14 @@ define <4 x double> @test_mm256_broadcastsd_pd(<2 x double> %a0) {
define <4 x double> @test_mm256_mask_broadcastsd_pd(<4 x double> %a0, i8 %a1, <2 x double> %a2) {
; X32-LABEL: test_mm256_mask_broadcastsd_pd:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1}
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_mask_broadcastsd_pd:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vbroadcastsd %xmm1, %ymm0 {%k1}
; X64-NEXT: retq
%trn1 = trunc i8 %a1 to i4
@ -1252,23 +1171,14 @@ define <4 x double> @test_mm256_mask_broadcastsd_pd(<4 x double> %a0, i8 %a1, <2
define <4 x double> @test_mm256_maskz_broadcastsd_pd(i8 %a0, <2 x double> %a1) {
; X32-LABEL: test_mm256_maskz_broadcastsd_pd:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_maskz_broadcastsd_pd:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
; X64-NEXT: retq
%trn0 = trunc i8 %a0 to i4
@ -1295,23 +1205,14 @@ define <4 x float> @test_mm_broadcastss_ps(<4 x float> %a0) {
define <4 x float> @test_mm_mask_broadcastss_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) {
; X32-LABEL: test_mm_mask_broadcastss_ps:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vbroadcastss %xmm1, %xmm0 {%k1}
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_mask_broadcastss_ps:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vbroadcastss %xmm1, %xmm0 {%k1}
; X64-NEXT: retq
%trn1 = trunc i8 %a1 to i4
@ -1324,23 +1225,14 @@ define <4 x float> @test_mm_mask_broadcastss_ps(<4 x float> %a0, i8 %a1, <4 x fl
define <4 x float> @test_mm_maskz_broadcastss_ps(i8 %a0, <4 x float> %a1) {
; X32-LABEL: test_mm_maskz_broadcastss_ps:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_maskz_broadcastss_ps:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
; X64-NEXT: retq
%trn0 = trunc i8 %a0 to i4
@ -1419,23 +1311,14 @@ define <2 x double> @test_mm_movddup_pd(<2 x double> %a0) {
define <2 x double> @test_mm_mask_movddup_pd(<2 x double> %a0, i8 %a1, <2 x double> %a2) {
; X32-LABEL: test_mm_mask_movddup_pd:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $3, %al
; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_mask_movddup_pd:
; X64: # %bb.0:
; X64-NEXT: andb $3, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = xmm1[0,0]
; X64-NEXT: retq
%trn1 = trunc i8 %a1 to i2
@ -1448,23 +1331,14 @@ define <2 x double> @test_mm_mask_movddup_pd(<2 x double> %a0, i8 %a1, <2 x doub
define <2 x double> @test_mm_maskz_movddup_pd(i8 %a0, <2 x double> %a1) {
; X32-LABEL: test_mm_maskz_movddup_pd:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $3, %al
; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_maskz_movddup_pd:
; X64: # %bb.0:
; X64-NEXT: andb $3, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
; X64-NEXT: retq
%trn1 = trunc i8 %a0 to i2
@ -1491,23 +1365,14 @@ define <4 x double> @test_mm256_movddup_pd(<4 x double> %a0) {
define <4 x double> @test_mm256_mask_movddup_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2) {
; X32-LABEL: test_mm256_mask_movddup_pd:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_mask_movddup_pd:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = ymm1[0,0,2,2]
; X64-NEXT: retq
%trn1 = trunc i8 %a1 to i4
@ -1520,23 +1385,14 @@ define <4 x double> @test_mm256_mask_movddup_pd(<4 x double> %a0, i8 %a1, <4 x d
define <4 x double> @test_mm256_maskz_movddup_pd(i8 %a0, <4 x double> %a1) {
; X32-LABEL: test_mm256_maskz_movddup_pd:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_maskz_movddup_pd:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
; X64-NEXT: retq
%trn1 = trunc i8 %a0 to i4
@ -1563,23 +1419,14 @@ define <4 x float> @test_mm_movehdup_ps(<4 x float> %a0) {
define <4 x float> @test_mm_mask_movehdup_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) {
; X32-LABEL: test_mm_mask_movehdup_ps:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_mask_movehdup_ps:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = xmm1[1,1,3,3]
; X64-NEXT: retq
%trn1 = trunc i8 %a1 to i4
@ -1592,23 +1439,14 @@ define <4 x float> @test_mm_mask_movehdup_ps(<4 x float> %a0, i8 %a1, <4 x float
define <4 x float> @test_mm_maskz_movehdup_ps(i8 %a0, <4 x float> %a1) {
; X32-LABEL: test_mm_maskz_movehdup_ps:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_maskz_movehdup_ps:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
; X64-NEXT: retq
%trn0 = trunc i8 %a0 to i4
@ -1687,23 +1525,14 @@ define <4 x float> @test_mm_moveldup_ps(<4 x float> %a0) {
define <4 x float> @test_mm_mask_moveldup_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2) {
; X32-LABEL: test_mm_mask_moveldup_ps:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_mask_moveldup_ps:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = xmm1[0,0,2,2]
; X64-NEXT: retq
%trn1 = trunc i8 %a1 to i4
@ -1716,23 +1545,14 @@ define <4 x float> @test_mm_mask_moveldup_ps(<4 x float> %a0, i8 %a1, <4 x float
define <4 x float> @test_mm_maskz_moveldup_ps(i8 %a0, <4 x float> %a1) {
; X32-LABEL: test_mm_maskz_moveldup_ps:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_maskz_moveldup_ps:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
; X64-NEXT: retq
%trn0 = trunc i8 %a0 to i4
@ -1811,23 +1631,14 @@ define <4 x i64> @test_mm256_permutex_epi64(<4 x i64> %a0) {
define <4 x i64> @test_mm256_mask_permutex_epi64(<4 x i64> %a0, i8 %a1, <4 x i64> %a2) {
; X32-LABEL: test_mm256_mask_permutex_epi64:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_mask_permutex_epi64:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpermq {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
; X64-NEXT: retq
%trn1 = trunc i8 %a1 to i4
@ -1840,23 +1651,14 @@ define <4 x i64> @test_mm256_mask_permutex_epi64(<4 x i64> %a0, i8 %a1, <4 x i64
define <4 x i64> @test_mm256_maskz_permutex_epi64(i8 %a0, <4 x i64> %a1) {
; X32-LABEL: test_mm256_maskz_permutex_epi64:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_maskz_permutex_epi64:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
; X64-NEXT: retq
%trn1 = trunc i8 %a0 to i4
@ -1883,23 +1685,14 @@ define <4 x double> @test_mm256_permutex_pd(<4 x double> %a0) {
define <4 x double> @test_mm256_mask_permutex_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2) {
; X32-LABEL: test_mm256_mask_permutex_pd:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_mask_permutex_pd:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = ymm1[1,0,0,0]
; X64-NEXT: retq
%trn1 = trunc i8 %a1 to i4
@ -1912,23 +1705,14 @@ define <4 x double> @test_mm256_mask_permutex_pd(<4 x double> %a0, i8 %a1, <4 x
define <4 x double> @test_mm256_maskz_permutex_pd(i8 %a0, <4 x double> %a1) {
; X32-LABEL: test_mm256_maskz_permutex_pd:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_maskz_permutex_pd:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,0,0]
; X64-NEXT: retq
%trn1 = trunc i8 %a0 to i4
@ -1955,23 +1739,14 @@ define <2 x double> @test_mm_shuffle_pd(<2 x double> %a0, <2 x double> %a1) {
define <2 x double> @test_mm_mask_shuffle_pd(<2 x double> %a0, i8 %a1, <2 x double> %a2, <2 x double> %a3) {
; X32-LABEL: test_mm_mask_shuffle_pd:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $3, %al
; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_mask_shuffle_pd:
; X64: # %bb.0:
; X64-NEXT: andb $3, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} = xmm1[1],xmm2[1]
; X64-NEXT: retq
%trn1 = trunc i8 %a1 to i2
@ -1984,23 +1759,14 @@ define <2 x double> @test_mm_mask_shuffle_pd(<2 x double> %a0, i8 %a1, <2 x doub
define <2 x double> @test_mm_maskz_shuffle_pd(i8 %a0, <2 x double> %a1, <2 x double> %a2) {
; X32-LABEL: test_mm_maskz_shuffle_pd:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $3, %al
; X32-NEXT: movb %al, {{[0-9]+}}(%esp)
; X32-NEXT: movzbl {{[0-9]+}}(%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_maskz_shuffle_pd:
; X64: # %bb.0:
; X64-NEXT: andb $3, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
; X64-NEXT: retq
%trn1 = trunc i8 %a0 to i2
@ -2027,23 +1793,14 @@ define <4 x double> @test_mm256_shuffle_pd(<4 x double> %a0, <4 x double> %a1) {
define <4 x double> @test_mm256_mask_shuffle_pd(<4 x double> %a0, i8 %a1, <4 x double> %a2, <4 x double> %a3) {
; X32-LABEL: test_mm256_mask_shuffle_pd:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_mask_shuffle_pd:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vshufpd {{.*#+}} ymm0 {%k1} = ymm1[1],ymm2[1],ymm1[2],ymm2[2]
; X64-NEXT: retq
%trn1 = trunc i8 %a1 to i4
@ -2056,23 +1813,14 @@ define <4 x double> @test_mm256_mask_shuffle_pd(<4 x double> %a0, i8 %a1, <4 x d
define <4 x double> @test_mm256_maskz_shuffle_pd(i8 %a0, <4 x double> %a1, <4 x double> %a2) {
; X32-LABEL: test_mm256_maskz_shuffle_pd:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm256_maskz_shuffle_pd:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[2],ymm1[2]
; X64-NEXT: retq
%trn1 = trunc i8 %a0 to i4
@ -2099,23 +1847,14 @@ define <4 x float> @test_mm_shuffle_ps(<4 x float> %a0, <4 x float> %a1) {
define <4 x float> @test_mm_mask_shuffle_ps(<4 x float> %a0, i8 %a1, <4 x float> %a2, <4 x float> %a3) {
; X32-LABEL: test_mm_mask_shuffle_ps:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_mask_shuffle_ps:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vshufps {{.*#+}} xmm0 {%k1} = xmm1[0,1],xmm2[0,0]
; X64-NEXT: retq
%trn1 = trunc i8 %a1 to i4
@ -2128,23 +1867,14 @@ define <4 x float> @test_mm_mask_shuffle_ps(<4 x float> %a0, i8 %a1, <4 x float>
define <4 x float> @test_mm_maskz_shuffle_ps(i8 %a0, <4 x float> %a1, <4 x float> %a2) {
; X32-LABEL: test_mm_maskz_shuffle_ps:
; X32: # %bb.0:
; X32-NEXT: pushl %eax
; X32-NEXT: .cfi_def_cfa_offset 8
; X32-NEXT: movb {{[0-9]+}}(%esp), %al
; X32-NEXT: andb $15, %al
; X32-NEXT: movb %al, (%esp)
; X32-NEXT: movzbl (%esp), %eax
; X32-NEXT: kmovw %eax, %k1
; X32-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1],xmm1[0,0]
; X32-NEXT: popl %eax
; X32-NEXT: retl
;
; X64-LABEL: test_mm_maskz_shuffle_ps:
; X64: # %bb.0:
; X64-NEXT: andb $15, %dil
; X64-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; X64-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
; X64-NEXT: kmovw %eax, %k1
; X64-NEXT: kmovw %edi, %k1
; X64-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1],xmm1[0,0]
; X64-NEXT: retq
%trn0 = trunc i8 %a0 to i4

View File

@ -37177,8 +37177,7 @@ entry:
define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
@ -37236,8 +37235,7 @@ entry:
define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
@ -37296,8 +37294,7 @@ entry:
define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
@ -37517,8 +37514,7 @@ entry:
define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
@ -37576,8 +37572,7 @@ entry:
define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
@ -37636,8 +37631,7 @@ entry:
define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
@ -37827,8 +37821,7 @@ entry:
define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
@ -37875,8 +37868,7 @@ entry:
define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
@ -37924,8 +37916,7 @@ entry:
define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
@ -38122,8 +38113,7 @@ entry:
define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask(i4 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
@ -38176,8 +38166,7 @@ entry:
define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem(i4 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
@ -38231,8 +38220,7 @@ entry:
define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <2 x i64> %__a, float* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqps (%rsi){1to4}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
@ -40054,8 +40042,7 @@ entry:
define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
@ -40089,8 +40076,7 @@ entry:
define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
@ -40125,8 +40111,7 @@ entry:
define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovb %k0, -{{[0-9]+}}(%rsp)
; VLX-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax
@ -40280,8 +40265,7 @@ entry:
define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
@ -40325,8 +40309,7 @@ entry:
define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
@ -40371,8 +40354,7 @@ entry:
define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
@ -40536,8 +40518,7 @@ entry:
define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
@ -40581,8 +40562,7 @@ entry:
define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
@ -40627,8 +40607,7 @@ entry:
define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
@ -40804,8 +40783,7 @@ entry:
define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
@ -40852,8 +40830,7 @@ entry:
define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
@ -40901,8 +40878,7 @@ entry:
define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: retq
@ -41099,8 +41075,7 @@ entry:
define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask(i2 zeroext %__u, <2 x i64> %__a, <2 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %xmm1, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
@ -41153,8 +41128,7 @@ entry:
define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem(i2 zeroext %__u, <2 x i64> %__a, <2 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
@ -41208,8 +41182,7 @@ entry:
define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, <2 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to2}, %xmm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: retq
@ -41433,8 +41406,7 @@ entry:
define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
@ -41494,8 +41466,7 @@ entry:
define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
@ -41556,8 +41527,7 @@ entry:
define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %al killed %al killed %eax
@ -41788,8 +41758,7 @@ entry:
define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
@ -41849,8 +41818,7 @@ entry:
define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
@ -41911,8 +41879,7 @@ entry:
define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: # kill: def %ax killed %ax killed %eax
@ -42110,8 +42077,7 @@ entry:
define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
@ -42160,8 +42126,7 @@ entry:
define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
@ -42211,8 +42176,7 @@ entry:
define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovd %k0, %eax
; VLX-NEXT: vzeroupper
@ -42417,8 +42381,7 @@ entry:
define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask(i4 zeroext %__u, <4 x i64> %__a, <4 x i64> %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd %ymm1, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
@ -42473,8 +42436,7 @@ entry:
define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem(i4 zeroext %__u, <4 x i64> %__a, <4 x i64>* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi), %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper
@ -42530,8 +42492,7 @@ entry:
define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, <4 x i64> %__a, double* %__b) local_unnamed_addr {
; VLX-LABEL: test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b:
; VLX: # %bb.0: # %entry
; VLX-NEXT: movb %dil, -{{[0-9]+}}(%rsp)
; VLX-NEXT: kmovb -{{[0-9]+}}(%rsp), %k1
; VLX-NEXT: kmovd %edi, %k1
; VLX-NEXT: vcmpeqpd (%rsi){1to4}, %ymm0, %k0 {%k1}
; VLX-NEXT: kmovq %k0, %rax
; VLX-NEXT: vzeroupper

View File

@ -44,7 +44,6 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) {
;
; AVX512-LABEL: ext_i2_2i64:
; AVX512: # %bb.0:
; AVX512-NEXT: andb $3, %dil
; AVX512-NEXT: kmovd %edi, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa64 %xmm0, %xmm0 {%k1} {z}
@ -84,7 +83,6 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) {
;
; AVX512-LABEL: ext_i4_4i32:
; AVX512: # %bb.0:
; AVX512-NEXT: andb $15, %dil
; AVX512-NEXT: kmovd %edi, %k1
; AVX512-NEXT: vpcmpeqd %xmm0, %xmm0, %xmm0
; AVX512-NEXT: vmovdqa32 %xmm0, %xmm0 {%k1} {z}
@ -235,7 +233,6 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
;
; AVX512-LABEL: ext_i4_4i64:
; AVX512: # %bb.0:
; AVX512-NEXT: andb $15, %dil
; AVX512-NEXT: kmovd %edi, %k1
; AVX512-NEXT: vpcmpeqd %ymm0, %ymm0, %ymm0
; AVX512-NEXT: vmovdqa64 %ymm0, %ymm0 {%k1} {z}

View File

@ -57,7 +57,6 @@ define <2 x i64> @ext_i2_2i64(i2 %a0) {
;
; AVX512VLBW-LABEL: ext_i2_2i64:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: andb $3, %dil
; AVX512VLBW-NEXT: kmovd %edi, %k1
; AVX512VLBW-NEXT: vmovdqa64 {{.*}}(%rip), %xmm0 {%k1} {z}
; AVX512VLBW-NEXT: retq
@ -108,7 +107,6 @@ define <4 x i32> @ext_i4_4i32(i4 %a0) {
;
; AVX512VLBW-LABEL: ext_i4_4i32:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: andb $15, %dil
; AVX512VLBW-NEXT: kmovd %edi, %k1
; AVX512VLBW-NEXT: vpbroadcastd {{.*}}(%rip), %xmm0 {%k1} {z}
; AVX512VLBW-NEXT: retq
@ -299,7 +297,6 @@ define <4 x i64> @ext_i4_4i64(i4 %a0) {
;
; AVX512VLBW-LABEL: ext_i4_4i64:
; AVX512VLBW: # %bb.0:
; AVX512VLBW-NEXT: andb $15, %dil
; AVX512VLBW-NEXT: kmovd %edi, %k1
; AVX512VLBW-NEXT: vpbroadcastq {{.*}}(%rip), %ymm0 {%k1} {z}
; AVX512VLBW-NEXT: retq