[AArch64][SVE] Avoid using ptrue for unpredicated predicate AND.

Reviewed By: david-arm

Differential Revision: https://reviews.llvm.org/D118146
This commit is contained in:
Sander de Smalen 2022-01-27 12:15:12 +00:00
parent b70366c9c4
commit dafd1f29da
5 changed files with 26 additions and 21 deletions

View File

@ -734,7 +734,7 @@ let Predicates = [HasSVEorStreamingSVE] in {
defm PFIRST : sve_int_pfirst<0b00000, "pfirst", int_aarch64_sve_pfirst>;
defm PNEXT : sve_int_pnext<0b00110, "pnext", int_aarch64_sve_pnext>;
defm AND_PPzPP : sve_int_pred_log<0b0000, "and", int_aarch64_sve_and_z, and>;
defm AND_PPzPP : sve_int_pred_log_and<0b0000, "and", int_aarch64_sve_and_z>;
defm BIC_PPzPP : sve_int_pred_log<0b0001, "bic", int_aarch64_sve_bic_z>;
defm EOR_PPzPP : sve_int_pred_log<0b0010, "eor", int_aarch64_sve_eor_z, xor>;
defm SEL_PPPP : sve_int_pred_log<0b0011, "sel", vselect>;

View File

@ -1633,6 +1633,18 @@ multiclass sve_int_pred_log<bits<4> opc, string asm, SDPatternOperator op,
!cast<Instruction>(NAME), PTRUE_D>;
}
multiclass sve_int_pred_log_and<bits<4> opc, string asm, SDPatternOperator op> :
sve_int_pred_log<opc, asm, op> {
def : Pat<(nxv16i1 (and nxv16i1:$Op1, nxv16i1:$Op2)),
(!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
def : Pat<(nxv8i1 (and nxv8i1:$Op1, nxv8i1:$Op2)),
(!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
def : Pat<(nxv4i1 (and nxv4i1:$Op1, nxv4i1:$Op2)),
(!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
def : Pat<(nxv2i1 (and nxv2i1:$Op1, nxv2i1:$Op2)),
(!cast<Instruction>(NAME) $Op1, $Op1, $Op2)>;
}
//===----------------------------------------------------------------------===//
// SVE Logical Mask Immediate Group
//===----------------------------------------------------------------------===//

View File

@ -49,8 +49,7 @@ define <vscale x 16 x i8> @and_b_zero(<vscale x 16 x i8> %a) {
define <vscale x 2 x i1> @and_pred_d(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b) {
; CHECK-LABEL: and_pred_d:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.d
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = and <vscale x 2 x i1> %a, %b
ret <vscale x 2 x i1> %res
@ -59,8 +58,7 @@ define <vscale x 2 x i1> @and_pred_d(<vscale x 2 x i1> %a, <vscale x 2 x i1> %b)
define <vscale x 4 x i1> @and_pred_s(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b) {
; CHECK-LABEL: and_pred_s:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.s
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = and <vscale x 4 x i1> %a, %b
ret <vscale x 4 x i1> %res
@ -69,8 +67,7 @@ define <vscale x 4 x i1> @and_pred_s(<vscale x 4 x i1> %a, <vscale x 4 x i1> %b)
define <vscale x 8 x i1> @and_pred_h(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b) {
; CHECK-LABEL: and_pred_h:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.h
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = and <vscale x 8 x i1> %a, %b
ret <vscale x 8 x i1> %res
@ -79,8 +76,7 @@ define <vscale x 8 x i1> @and_pred_h(<vscale x 8 x i1> %a, <vscale x 8 x i1> %b)
define <vscale x 16 x i1> @and_pred_b(<vscale x 16 x i1> %a, <vscale x 16 x i1> %b) {
; CHECK-LABEL: and_pred_b:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%res = and <vscale x 16 x i1> %a, %b
ret <vscale x 16 x i1> %res

View File

@ -17,8 +17,7 @@ define <vscale x 16 x i1> @reinterpret_bool_from_h(<vscale x 8 x i1> %pg) {
; CHECK-LABEL: reinterpret_bool_from_h:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.h
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv8i1(<vscale x 8 x i1> %pg)
ret <vscale x 16 x i1> %out
@ -28,8 +27,7 @@ define <vscale x 16 x i1> @reinterpret_bool_from_s(<vscale x 4 x i1> %pg) {
; CHECK-LABEL: reinterpret_bool_from_s:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.s
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv4i1(<vscale x 4 x i1> %pg)
ret <vscale x 16 x i1> %out
@ -39,8 +37,7 @@ define <vscale x 16 x i1> @reinterpret_bool_from_d(<vscale x 2 x i1> %pg) {
; CHECK-LABEL: reinterpret_bool_from_d:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p1.d
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: ret
%out = call <vscale x 16 x i1> @llvm.aarch64.sve.convert.to.svbool.nxv2i1(<vscale x 2 x i1> %pg)
ret <vscale x 16 x i1> %out

View File

@ -7,7 +7,7 @@ define i1 @andv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK-LABEL: andv_nxv32i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: not p0.b, p2/z, p0.b
; CHECK-NEXT: ptest p2, p0.b
; CHECK-NEXT: cset w0, eq
@ -24,10 +24,10 @@ define i1 @andv_nxv64i1(<vscale x 64 x i1> %a) {
; CHECK-NEXT: str p4, [sp, #7, mul vl] // 2-byte Folded Spill
; CHECK-NEXT: .cfi_escape 0x0f, 0x0c, 0x8f, 0x00, 0x11, 0x10, 0x22, 0x11, 0x08, 0x92, 0x2e, 0x00, 0x1e, 0x22 // sp + 16 + 8 * VG
; CHECK-NEXT: .cfi_offset w29, -16
; CHECK-NEXT: and p1.b, p1/z, p1.b, p3.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p2.b
; CHECK-NEXT: ptrue p4.b
; CHECK-NEXT: and p1.b, p4/z, p1.b, p3.b
; CHECK-NEXT: and p0.b, p4/z, p0.b, p2.b
; CHECK-NEXT: and p0.b, p4/z, p0.b, p1.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: not p0.b, p4/z, p0.b
; CHECK-NEXT: ptest p4, p0.b
; CHECK-NEXT: ldr p4, [sp, #7, mul vl] // 2-byte Folded Reload
@ -73,7 +73,7 @@ define i1 @smaxv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK-LABEL: smaxv_nxv32i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: not p0.b, p2/z, p0.b
; CHECK-NEXT: ptest p2, p0.b
; CHECK-NEXT: cset w0, eq
@ -116,7 +116,7 @@ define i1 @uminv_nxv32i1(<vscale x 32 x i1> %a) {
; CHECK-LABEL: uminv_nxv32i1:
; CHECK: // %bb.0:
; CHECK-NEXT: ptrue p2.b
; CHECK-NEXT: and p0.b, p2/z, p0.b, p1.b
; CHECK-NEXT: and p0.b, p0/z, p0.b, p1.b
; CHECK-NEXT: not p0.b, p2/z, p0.b
; CHECK-NEXT: ptest p2, p0.b
; CHECK-NEXT: cset w0, eq