[AVX-512] Add more VPTERNLOG patterns to enable folding of broadcast loads that aren't in operand 2.
llvm-svn: 295634
This commit is contained in:
parent
528d874583
commit
5b4e36aafa
|
@ -8992,6 +8992,45 @@ multiclass avx512_ternlog<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
_.RC:$src1)),
|
||||
(!cast<Instruction>(NAME#_.ZSuffix#rmik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
|
||||
|
||||
// Additional patterns for matching broadcasts in other positions.
|
||||
def : Pat<(_.VT (OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
|
||||
_.RC:$src2, _.RC:$src1, (i8 imm:$src4))),
|
||||
(!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
|
||||
addr:$src3, (VPTERNLOG321_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (OpNode _.RC:$src1,
|
||||
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
|
||||
_.RC:$src2, (i8 imm:$src4))),
|
||||
(!cast<Instruction>(NAME#_.ZSuffix#rmbi) _.RC:$src1, _.RC:$src2,
|
||||
addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
|
||||
|
||||
// Additional patterns for matching zero masking with broadcasts in other
|
||||
// positions.
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode (X86VBroadcast (_.ScalarLdFrag addr:$src3)),
|
||||
_.RC:$src2, _.RC:$src1, (i8 imm:$src4)),
|
||||
_.ImmAllZerosV)),
|
||||
(!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
|
||||
_.KRCWM:$mask, _.RC:$src2, addr:$src3,
|
||||
(VPTERNLOG321_imm8 imm:$src4))>;
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode _.RC:$src1,
|
||||
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
|
||||
_.RC:$src2, (i8 imm:$src4)),
|
||||
_.ImmAllZerosV)),
|
||||
(!cast<Instruction>(NAME#_.ZSuffix#rmbikz) _.RC:$src1,
|
||||
_.KRCWM:$mask, _.RC:$src2, addr:$src3,
|
||||
(VPTERNLOG132_imm8 imm:$src4))>;
|
||||
|
||||
// Additional patterns for matching masked broadcasts with different
|
||||
// operand orders.
|
||||
def : Pat<(_.VT (vselect _.KRCWM:$mask,
|
||||
(OpNode _.RC:$src1,
|
||||
(X86VBroadcast (_.ScalarLdFrag addr:$src3)),
|
||||
_.RC:$src2, (i8 imm:$src4)),
|
||||
_.RC:$src1)),
|
||||
(!cast<Instruction>(NAME#_.ZSuffix#rmbik) _.RC:$src1, _.KRCWM:$mask,
|
||||
_.RC:$src2, addr:$src3, (VPTERNLOG132_imm8 imm:$src4))>;
|
||||
}
|
||||
|
||||
multiclass avx512_common_ternlog<string OpcodeStr, AVX512VLVectorVTInfo _>{
|
||||
|
|
|
@ -521,8 +521,7 @@ define <16 x i32> @vpternlog_v16i32_021_load2_maskz(<16 x i32> %x0, <16 x i32> %
|
|||
define <16 x i32> @vpternlog_v16i32_012_broadcast0(i32* %ptr_x0, <16 x i32> %x1, <16 x i32> %x2) {
|
||||
; CHECK-LABEL: vpternlog_v16i32_012_broadcast0:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $9, %zmm1, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x0_scalar = load i32, i32* %ptr_x0
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x0_scalar, i32 0
|
||||
|
@ -534,8 +533,7 @@ define <16 x i32> @vpternlog_v16i32_012_broadcast0(i32* %ptr_x0, <16 x i32> %x1,
|
|||
define <16 x i32> @vpternlog_v16i32_012_broadcast1(<16 x i32> %x0, i32* %ptr_x1, <16 x i32> %x2) {
|
||||
; CHECK-LABEL: vpternlog_v16i32_012_broadcast1:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm1, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x1_scalar = load i32, i32* %ptr_x1
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x1_scalar, i32 0
|
||||
|
@ -559,8 +557,7 @@ define <16 x i32> @vpternlog_v16i32_012_broadcast2(<16 x i32> %x0, <16 x i32> %x
|
|||
define <16 x i32> @vpternlog_v16i32_102_broadcast0(i32* %ptr_x0, <16 x i32> %x1, <16 x i32> %x2) {
|
||||
; CHECK-LABEL: vpternlog_v16i32_102_broadcast0:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm1, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x0_scalar = load i32, i32* %ptr_x0
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x0_scalar, i32 0
|
||||
|
@ -572,8 +569,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast0(i32* %ptr_x0, <16 x i32> %x1,
|
|||
define <16 x i32> @vpternlog_v16i32_102_broadcast1(<16 x i32> %x0, i32* %ptr_x1, <16 x i32> %x2) {
|
||||
; CHECK-LABEL: vpternlog_v16i32_102_broadcast1:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $9, %zmm1, %zmm2, %zmm0
|
||||
; CHECK-NEXT: vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x1_scalar = load i32, i32* %ptr_x1
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x1_scalar, i32 0
|
||||
|
@ -609,9 +605,7 @@ define <16 x i32> @vpternlog_v16i32_210_broadcast0(i32* %ptr_x0, <16 x i32> %x1,
|
|||
define <16 x i32> @vpternlog_v16i32_210_broadcast1(<16 x i32> %x0, i32* %ptr_x1, <16 x i32> %x2) {
|
||||
; CHECK-LABEL: vpternlog_v16i32_210_broadcast1:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $9, %zmm0, %zmm1, %zmm2
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
||||
; CHECK-NEXT: vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x1_scalar = load i32, i32* %ptr_x1
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x1_scalar, i32 0
|
||||
|
@ -623,8 +617,7 @@ define <16 x i32> @vpternlog_v16i32_210_broadcast1(<16 x i32> %x0, i32* %ptr_x1,
|
|||
define <16 x i32> @vpternlog_v16i32_210_broadcast2(<16 x i32> %x0, <16 x i32> %x1, i32* %ptr_x2) {
|
||||
; CHECK-LABEL: vpternlog_v16i32_210_broadcast2:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0
|
||||
; CHECK-NEXT: vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x2_scalar = load i32, i32* %ptr_x2
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x2_scalar, i32 0
|
||||
|
@ -652,8 +645,7 @@ define <16 x i32> @vpternlog_v16i32_012_broadcast1_mask(<16 x i32> %x0, i32* %x1
|
|||
; CHECK-LABEL: vpternlog_v16i32_012_broadcast1_mask:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm1, %zmm2, %zmm0 {%k1}
|
||||
; CHECK-NEXT: vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x1scalar = load i32, i32* %x1ptr
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
|
||||
|
@ -679,8 +671,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast0_mask(i32* %x0ptr, <16 x i32>
|
|||
; CHECK-LABEL: vpternlog_v16i32_102_broadcast0_mask:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm1, %zmm2, %zmm0 {%k1}
|
||||
; CHECK-NEXT: vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x0scalar = load i32, i32* %x0ptr
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
|
||||
|
@ -736,8 +727,7 @@ define <16 x i32> @vpternlog_v16i32_210_broadcast1_mask(<16 x i32> %x0, i32* %x1
|
|||
; CHECK-LABEL: vpternlog_v16i32_210_broadcast1_mask:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm0, %zmm2, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vpternlogd $65, (%rdi){1to16}, %zmm0, %zmm1 {%k1}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
|
||||
; CHECK-NEXT: retq
|
||||
%x1scalar = load i32, i32* %x1ptr
|
||||
|
@ -794,8 +784,7 @@ define <16 x i32> @vpternlog_v16i32_021_broadcast2_mask(<16 x i32> %x0, <16 x i3
|
|||
; CHECK-LABEL: vpternlog_v16i32_021_broadcast2_mask:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm1, %zmm2, %zmm0 {%k1}
|
||||
; CHECK-NEXT: vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0 {%k1}
|
||||
; CHECK-NEXT: retq
|
||||
%x2scalar = load i32, i32* %x2ptr
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
|
||||
|
@ -808,8 +797,7 @@ define <16 x i32> @vpternlog_v16i32_012_broadcast0_maskz(i32* %x0ptr, <16 x i32>
|
|||
; CHECK-LABEL: vpternlog_v16i32_012_broadcast0_maskz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $9, %zmm1, %zmm2, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%x0scalar = load i32, i32* %x0ptr
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
|
||||
|
@ -822,8 +810,7 @@ define <16 x i32> @vpternlog_v16i32_012_broadcast1_maskz(<16 x i32> %x0, i32* %x
|
|||
; CHECK-LABEL: vpternlog_v16i32_012_broadcast1_maskz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm1, %zmm2, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%x1scalar = load i32, i32* %x1ptr
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
|
||||
|
@ -849,8 +836,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast0_maskz(i32* %x0ptr, <16 x i32>
|
|||
; CHECK-LABEL: vpternlog_v16i32_102_broadcast0_maskz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm1, %zmm2, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%x0scalar = load i32, i32* %x0ptr
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
|
||||
|
@ -863,8 +849,7 @@ define <16 x i32> @vpternlog_v16i32_102_broadcast1_maskz(<16 x i32> %x0, i32* %x
|
|||
; CHECK-LABEL: vpternlog_v16i32_102_broadcast1_maskz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $9, %zmm1, %zmm2, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpternlogd $9, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%x1scalar = load i32, i32* %x1ptr
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
|
||||
|
@ -903,9 +888,7 @@ define <16 x i32> @vpternlog_v16i32_210_broadcast1_maskz(<16 x i32> %x0, i32* %x
|
|||
; CHECK-LABEL: vpternlog_v16i32_210_broadcast1_maskz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $9, %zmm0, %zmm1, %zmm2 {%k1} {z}
|
||||
; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
|
||||
; CHECK-NEXT: vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%x1scalar = load i32, i32* %x1ptr
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x1scalar, i32 0
|
||||
|
@ -918,8 +901,7 @@ define <16 x i32> @vpternlog_v16i32_210_broadcast2_maskz(<16 x i32> %x0, <16 x i
|
|||
; CHECK-LABEL: vpternlog_v16i32_210_broadcast2_maskz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%x2scalar = load i32, i32* %x2ptr
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
|
||||
|
@ -932,8 +914,7 @@ define <16 x i32> @vpternlog_v16i32_021_broadcast0_maskz(i32* %x0ptr, <16 x i32>
|
|||
; CHECK-LABEL: vpternlog_v16i32_021_broadcast0_maskz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpternlogd $33, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%x0scalar = load i32, i32* %x0ptr
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x0scalar, i32 0
|
||||
|
@ -959,8 +940,7 @@ define <16 x i32> @vpternlog_v16i32_021_broadcast2_maskz(<16 x i32> %x0, <16 x i
|
|||
; CHECK-LABEL: vpternlog_v16i32_021_broadcast2_maskz:
|
||||
; CHECK: ## BB#0:
|
||||
; CHECK-NEXT: kmovw %esi, %k1
|
||||
; CHECK-NEXT: vpbroadcastd (%rdi), %zmm2
|
||||
; CHECK-NEXT: vpternlogd $33, %zmm1, %zmm2, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: vpternlogd $65, (%rdi){1to16}, %zmm1, %zmm0 {%k1} {z}
|
||||
; CHECK-NEXT: retq
|
||||
%x2scalar = load i32, i32* %x2ptr
|
||||
%vecinit.i = insertelement <16 x i32> undef, i32 %x2scalar, i32 0
|
||||
|
|
Loading…
Reference in New Issue