From 5e4b45361f5da80f42a502e120bd3352a0f3d68a Mon Sep 17 00:00:00 2001 From: Craig Topper Date: Sat, 27 Jan 2018 23:49:14 +0000 Subject: [PATCH] [X86] Add patterns for using masked vptestnmd for 256-bit vectors without VLX. We can widen the mask and extract it back down. llvm-svn: 323610 --- llvm/lib/Target/X86/X86InstrAVX512.td | 37 +++++++++++++++-------- llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll | 5 ++- 2 files changed, 26 insertions(+), 16 deletions(-) diff --git a/llvm/lib/Target/X86/X86InstrAVX512.td b/llvm/lib/Target/X86/X86InstrAVX512.td index b4e68f20c119..e095011d8757 100644 --- a/llvm/lib/Target/X86/X86InstrAVX512.td +++ b/llvm/lib/Target/X86/X86InstrAVX512.td @@ -2958,19 +2958,19 @@ defm KSHIFTR : avx512_mask_shiftop_w<0x30, 0x31, "kshiftr", X86kshiftr, SSE_PSHU multiclass axv512_icmp_packed_no_vlx_lowering { -def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1), + def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1), (Narrow.VT Narrow.RC:$src2))), (COPY_TO_REGCLASS - (!cast(InstStr##Zrr) + (!cast(InstStr#"Zrr") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), Narrow.KRC)>; -def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, - (Frag (Narrow.VT Narrow.RC:$src1), + def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (Frag (Narrow.VT Narrow.RC:$src1), (Narrow.VT Narrow.RC:$src2)))), (COPY_TO_REGCLASS - (!cast(InstStr##Zrrk) + (!cast(InstStr#"Zrrk") (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx))), @@ -5232,14 +5232,25 @@ multiclass avx512_vptest_mb opc, string OpcodeStr, SDNode OpNode, // Use 512bit version to implement 128/256 bit in case NoVLX. multiclass avx512_vptest_lowering { - def : Pat<(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))), - (_.KVT (COPY_TO_REGCLASS - (!cast(NAME # Suffix # "Zrr") - (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), - _.RC:$src1, _.SubRegIdx), - (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), - _.RC:$src2, _.SubRegIdx)), - _.KRC))>; + def : Pat<(_.KVT (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2))), + (_.KVT (COPY_TO_REGCLASS + (!cast(NAME # Suffix # "Zrr") + (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), + _.RC:$src1, _.SubRegIdx), + (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), + _.RC:$src2, _.SubRegIdx)), + _.KRC))>; + + def : Pat<(_.KVT (and _.KRC:$mask, + (OpNode (_.VT _.RC:$src1), (_.VT _.RC:$src2)))), + (COPY_TO_REGCLASS + (!cast(NAME # Suffix # "Zrrk") + (COPY_TO_REGCLASS _.KRC:$mask, ExtendInfo.KRC), + (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), + _.RC:$src1, _.SubRegIdx), + (INSERT_SUBREG (ExtendInfo.VT (IMPLICIT_DEF)), + _.RC:$src2, _.SubRegIdx)), + _.KRC)>; } multiclass avx512_vptest_dq_sizes opc, string OpcodeStr, SDNode OpNode, diff --git a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll index 82c1054a9802..e75e6ea0ad53 100644 --- a/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll +++ b/llvm/test/CodeGen/X86/avx512vl-vec-cmp.ll @@ -1134,9 +1134,8 @@ define <8 x i32> @testnm_and(<8 x i32> %a, <8 x i32> %b, <8 x i32> %x, <8 x i32> ; NoVLX-NEXT: # kill: def %ymm2 killed %ymm2 def %zmm2 ; NoVLX-NEXT: # kill: def %ymm1 killed %ymm1 def %zmm1 ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 def %zmm0 -; NoVLX-NEXT: vptestnmd %zmm0, %zmm0, %k0 -; NoVLX-NEXT: vptestnmd %zmm1, %zmm1, %k1 -; NoVLX-NEXT: kandw %k1, %k0, %k1 +; NoVLX-NEXT: vptestnmd %zmm0, %zmm0, %k1 +; NoVLX-NEXT: vptestnmd %zmm1, %zmm1, %k1 {%k1} ; NoVLX-NEXT: vpblendmd %zmm2, %zmm3, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def %ymm0 killed %ymm0 killed %zmm0 ; NoVLX-NEXT: retq