From 9f63dc3265748bf63d7475ba4013dc1b0c728f6c Mon Sep 17 00:00:00 2001 From: Paul Walker Date: Thu, 13 Aug 2020 15:52:00 +0100 Subject: [PATCH] [SVE] Fix shift-by-imm patterns used by asr, lsl & lsr intrinsics. Right shift patterns will no longer incorrectly accept a shift amount of zero. At the same time they will allow larger shift amounts that are now saturated to their upper bound. Patterns have been extended to enable immediate forms for shifts taking an arbitrary predicate. This patch also unifies the code path for immediate parsing so the i64 based shifts are no longer treated specially. Differential Revision: https://reviews.llvm.org/D86084 --- .../Target/AArch64/AArch64ISelDAGToDAG.cpp | 39 +- .../lib/Target/AArch64/AArch64SVEInstrInfo.td | 18 +- llvm/lib/Target/AArch64/SVEInstrFormats.td | 93 ++- .../AArch64/sve-intrinsics-int-arith-imm.ll | 669 ++++++++++++++---- 4 files changed, 633 insertions(+), 186 deletions(-) diff --git a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 7799ebfbd68e..184458607c3c 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -190,9 +190,9 @@ public: return SelectSVELogicalImm(N, VT, Imm); } - template - bool SelectSVEShiftImm64(SDValue N, SDValue &Imm) { - return SelectSVEShiftImm64(N, Low, High, Imm); + template + bool SelectSVEShiftImm(SDValue N, SDValue &Imm) { + return SelectSVEShiftImm(N, Low, High, AllowSaturation, Imm); } // Returns a suitable CNT/INC/DEC/RDVL multiplier to calculate VSCALE*N. @@ -323,8 +323,8 @@ private: bool SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm); bool SelectSVESignedArithImm(SDValue N, SDValue &Imm); - bool SelectSVEShiftImm64(SDValue N, uint64_t Low, uint64_t High, - SDValue &Imm); + bool SelectSVEShiftImm(SDValue N, uint64_t Low, uint64_t High, + bool AllowSaturation, SDValue &Imm); bool SelectSVEArithImm(SDValue N, SDValue &Imm); bool SelectSVERegRegAddrMode(SDValue N, unsigned Scale, SDValue &Base, @@ -3177,19 +3177,30 @@ bool AArch64DAGToDAGISel::SelectSVELogicalImm(SDValue N, MVT VT, SDValue &Imm) { return false; } -// This method is only needed to "cast" i64s into i32s when the value -// is a valid shift which has been splatted into a vector with i64 elements. -// Every other type is fine in tablegen. -bool AArch64DAGToDAGISel::SelectSVEShiftImm64(SDValue N, uint64_t Low, - uint64_t High, SDValue &Imm) { +// SVE shift intrinsics allow shift amounts larger than the element's bitwidth. +// Rather than attempt to normalise everything we can sometimes saturate the +// shift amount during selection. This function also allows for consistent +// isel patterns by ensuring the resulting "Imm" node is of the i32 type +// required by the instructions. +bool AArch64DAGToDAGISel::SelectSVEShiftImm(SDValue N, uint64_t Low, + uint64_t High, bool AllowSaturation, + SDValue &Imm) { if (auto *CN = dyn_cast(N)) { uint64_t ImmVal = CN->getZExtValue(); - SDLoc DL(N); - if (ImmVal >= Low && ImmVal <= High) { - Imm = CurDAG->getTargetConstant(ImmVal, DL, MVT::i32); - return true; + // Reject shift amounts that are too small. + if (ImmVal < Low) + return false; + + // Reject or saturate shift amounts that are too big. + if (ImmVal > High) { + if (!AllowSaturation) + return false; + ImmVal = High; } + + Imm = CurDAG->getTargetConstant(ImmVal, SDLoc(N), MVT::i32); + return true; } return false; diff --git a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td index 19a701d647a5..ea4c6cab5c35 100644 --- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td @@ -1343,10 +1343,10 @@ multiclass sve_prefetch; // Predicated shifts - defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0000, "asr", "ASR_ZPZI">; - defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right<0b0001, "lsr", "LSR_ZPZI">; - defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0011, "lsl">; - defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>; + defm ASR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0000, "asr", "ASR_ZPZI", int_aarch64_sve_asr>; + defm LSR_ZPmI : sve_int_bin_pred_shift_imm_right_dup<0b0001, "lsr", "LSR_ZPZI", int_aarch64_sve_lsr>; + defm LSL_ZPmI : sve_int_bin_pred_shift_imm_left_dup< 0b0011, "lsl", "LSL_ZPZI", int_aarch64_sve_lsl>; + defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right< 0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>; let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in { defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd; @@ -2385,11 +2385,11 @@ let Predicates = [HasSVE2] in { } // SVE2 predicated shifts - defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl", "SQSHL_ZPZI">; - defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl", "UQSHL_ZPZI">; - defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>; - defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>; - defm SQSHLU_ZPmI : sve2_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>; + defm SQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0110, "sqshl", "SQSHL_ZPZI">; + defm UQSHL_ZPmI : sve_int_bin_pred_shift_imm_left< 0b0111, "uqshl", "UQSHL_ZPZI">; + defm SRSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1100, "srshr", "SRSHR_ZPZI", int_aarch64_sve_srshr>; + defm URSHR_ZPmI : sve_int_bin_pred_shift_imm_right<0b1101, "urshr", "URSHR_ZPZI", int_aarch64_sve_urshr>; + defm SQSHLU_ZPmI : sve_int_bin_pred_shift_imm_left< 0b1111, "sqshlu", "SQSHLU_ZPZI", int_aarch64_sve_sqshlu>; // SVE2 integer add/subtract long defm SADDLB_ZZZ : sve2_wide_int_arith_long<0b00000, "saddlb", int_aarch64_sve_saddlb>; diff --git a/llvm/lib/Target/AArch64/SVEInstrFormats.td b/llvm/lib/Target/AArch64/SVEInstrFormats.td index b517d11de753..5eb811b9c78e 100644 --- a/llvm/lib/Target/AArch64/SVEInstrFormats.td +++ b/llvm/lib/Target/AArch64/SVEInstrFormats.td @@ -209,7 +209,14 @@ def SVE8BitLslImm : ComplexPattern; def SVEArithUImmPat : ComplexPattern; def SVEArithSImmPat : ComplexPattern; -def SVEShiftImm64 : ComplexPattern", []>; +def SVEShiftImmL8 : ComplexPattern", []>; +def SVEShiftImmL16 : ComplexPattern", []>; +def SVEShiftImmL32 : ComplexPattern", []>; +def SVEShiftImmL64 : ComplexPattern", []>; +def SVEShiftImmR8 : ComplexPattern", []>; +def SVEShiftImmR16 : ComplexPattern", []>; +def SVEShiftImmR32 : ComplexPattern", []>; +def SVEShiftImmR64 : ComplexPattern", []>; class SVEExactFPImm : AsmOperandClass { let Name = "SVEExactFPImmOperand" # Suffix; @@ -315,11 +322,6 @@ class SVE_1_Op_Imm_OptLsl_Pat; -class SVE_1_Op_Imm_Shift_Pred_Pat - : Pat<(vt (op (pt (AArch64ptrue 31)), (vt zprty:$Op1), (vt (AArch64dup (ImmTy:$imm))))), - (inst $Op1, ImmTy:$imm)>; - class SVE_1_Op_Imm_Arith_Pred_Pat : Pat<(vt (op (pt (AArch64ptrue 31)), (vt zprty:$Op1), (vt (AArch64dup (it (cpx i32:$imm)))))), @@ -409,6 +411,18 @@ class SVE_InReg_Extend; +class SVE_Shift_DupImm_Pred_Pat +: Pat<(vt (op pt:$Pg, vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))), + (inst $Pg, $Rn, i32:$imm)>; + +class SVE_Shift_DupImm_All_Active_Pat +: Pat<(vt (op (pt (AArch64ptrue 31)), vt:$Rn, (vt (AArch64dup (it (cast i32:$imm)))))), + (inst $Rn, i32:$imm)>; + // // Pseudo -> Instruction mappings // @@ -4761,38 +4775,19 @@ class sve_int_bin_pred_shift_imm tsz8_64, bits<4> opc, string asm, let ElementSize = zprty.ElementSize; } -multiclass sve_int_bin_pred_shift_imm_left opc, string asm, string psName=""> { - def _B : SVEPseudo2Instr, +multiclass sve_int_bin_pred_shift_imm_left opc, string asm, string Ps, + SDPatternOperator op = null_frag> { + def _B : SVEPseudo2Instr, sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; - def _H : SVEPseudo2Instr, + def _H : SVEPseudo2Instr, sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { let Inst{8} = imm{3}; } - def _S : SVEPseudo2Instr, + def _S : SVEPseudo2Instr, sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { let Inst{9-8} = imm{4-3}; } - def _D : SVEPseudo2Instr, - sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { - let Inst{22} = imm{5}; - let Inst{9-8} = imm{4-3}; - } -} - -multiclass sve2_int_bin_pred_shift_imm_left opc, string asm, - string psName, - SDPatternOperator op> { - - def _B : SVEPseudo2Instr, sve_int_bin_pred_shift_imm<{0,0,0,1}, opc, asm, ZPR8, vecshiftL8>; - def _H : SVEPseudo2Instr, - sve_int_bin_pred_shift_imm<{0,0,1,?}, opc, asm, ZPR16, vecshiftL16> { - let Inst{8} = imm{3}; - } - def _S : SVEPseudo2Instr, - sve_int_bin_pred_shift_imm<{0,1,?,?}, opc, asm, ZPR32, vecshiftL32> { - let Inst{9-8} = imm{4-3}; - } - def _D : SVEPseudo2Instr, + def _D : SVEPseudo2Instr, sve_int_bin_pred_shift_imm<{1,?,?,?}, opc, asm, ZPR64, vecshiftL64> { let Inst{22} = imm{5}; let Inst{9-8} = imm{4-3}; @@ -4804,6 +4799,16 @@ multiclass sve2_int_bin_pred_shift_imm_left opc, string asm, def : SVE_3_Op_Imm_Pat(NAME # _D)>; } +// As above but shift amount takes the form of a "vector immediate". +multiclass sve_int_bin_pred_shift_imm_left_dup opc, string asm, + string Ps, SDPatternOperator op> +: sve_int_bin_pred_shift_imm_left { + def : SVE_Shift_DupImm_Pred_Pat(NAME # _B)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _H)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _S)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _D)>; +} + multiclass sve_int_bin_pred_shift_imm_left_zeroing_bhsd { def _ZERO_B : PredTwoOpImmPseudo; def _ZERO_H : PredTwoOpImmPseudo; @@ -4840,6 +4845,16 @@ multiclass sve_int_bin_pred_shift_imm_right opc, string asm, string Ps, def : SVE_3_Op_Imm_Pat(NAME # _D)>; } +// As above but shift amount takes the form of a "vector immediate". +multiclass sve_int_bin_pred_shift_imm_right_dup opc, string asm, + string Ps, SDPatternOperator op> +: sve_int_bin_pred_shift_imm_right { + def : SVE_Shift_DupImm_Pred_Pat(NAME # _B)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _H)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _S)>; + def : SVE_Shift_DupImm_Pred_Pat(NAME # _D)>; +} + multiclass sve_int_bin_pred_shift_imm_right_zeroing_bhsd { def _ZERO_B : PredTwoOpImmPseudo; def _ZERO_H : PredTwoOpImmPseudo; @@ -4980,10 +4995,10 @@ multiclass sve_int_bin_cons_shift_imm_left opc, string asm, let Inst{20-19} = imm{4-3}; } - def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _B)>; - def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _H)>; - def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _S)>; - def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _D)>; + def : SVE_Shift_DupImm_All_Active_Pat(NAME # _B)>; + def : SVE_Shift_DupImm_All_Active_Pat(NAME # _H)>; + def : SVE_Shift_DupImm_All_Active_Pat(NAME # _S)>; + def : SVE_Shift_DupImm_All_Active_Pat(NAME # _D)>; } multiclass sve_int_bin_cons_shift_imm_right opc, string asm, @@ -5000,10 +5015,10 @@ multiclass sve_int_bin_cons_shift_imm_right opc, string asm, let Inst{20-19} = imm{4-3}; } - def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _B)>; - def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _H)>; - def : SVE_1_Op_Imm_Shift_Pred_Pat(NAME # _S)>; - def : SVE_1_Op_Imm_Arith_Pred_Pat(NAME # _D)>; + def : SVE_Shift_DupImm_All_Active_Pat(NAME # _B)>; + def : SVE_Shift_DupImm_All_Active_Pat(NAME # _H)>; + def : SVE_Shift_DupImm_All_Active_Pat(NAME # _S)>; + def : SVE_Shift_DupImm_All_Active_Pat(NAME # _D)>; } //===----------------------------------------------------------------------===// // SVE Memory - Store Group diff --git a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll index 0a6842921cbe..c70686d3447c 100644 --- a/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll +++ b/llvm/test/CodeGen/AArch64/sve-intrinsics-int-arith-imm.ll @@ -1,3 +1,4 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py ; RUN: llc -mtriple=aarch64-linux-gnu -mattr=+sve < %s 2>%t | FileCheck %s ; RUN: FileCheck --check-prefix=WARN --allow-empty %s <%t @@ -8,8 +9,9 @@ define @smax_i8( %a) { ; CHECK-LABEL: smax_i8: -; CHECK: smax z0.b, z0.b, #-128 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.b, z0.b, #-128 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 -128, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -21,8 +23,9 @@ define @smax_i8( %a) { define @smax_i16( %a) { ; CHECK-LABEL: smax_i16: -; CHECK: smax z0.h, z0.h, #127 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.h, z0.h, #127 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -34,8 +37,9 @@ define @smax_i16( %a) { define @smax_i32( %a) { ; CHECK-LABEL: smax_i32: -; CHECK: smax z0.s, z0.s, #-128 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.s, z0.s, #-128 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 -128, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -47,8 +51,9 @@ define @smax_i32( %a) { define @smax_i64( %a) { ; CHECK-LABEL: smax_i64: -; CHECK: smax z0.d, z0.d, #127 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smax z0.d, z0.d, #127 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 127, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -62,8 +67,9 @@ define @smax_i64( %a) { define @smin_i8( %a) { ; CHECK-LABEL: smin_i8: -; CHECK: smin z0.b, z0.b, #127 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.b, z0.b, #127 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -75,8 +81,9 @@ define @smin_i8( %a) { define @smin_i16( %a) { ; CHECK-LABEL: smin_i16: -; CHECK: smin z0.h, z0.h, #-128 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.h, z0.h, #-128 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 -128, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -88,8 +95,9 @@ define @smin_i16( %a) { define @smin_i32( %a) { ; CHECK-LABEL: smin_i32: -; CHECK: smin z0.s, z0.s, #127 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.s, z0.s, #127 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 127, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -101,8 +109,9 @@ define @smin_i32( %a) { define @smin_i64( %a) { ; CHECK-LABEL: smin_i64: -; CHECK: smin z0.d, z0.d, #-128 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: smin z0.d, z0.d, #-128 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 -128, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -116,8 +125,9 @@ define @smin_i64( %a) { define @umax_i8( %a) { ; CHECK-LABEL: umax_i8: -; CHECK: umax z0.b, z0.b, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.b, z0.b, #0 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 0, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -129,8 +139,9 @@ define @umax_i8( %a) { define @umax_i16( %a) { ; CHECK-LABEL: umax_i16: -; CHECK: umax z0.h, z0.h, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.h, z0.h, #255 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -142,8 +153,9 @@ define @umax_i16( %a) { define @umax_i32( %a) { ; CHECK-LABEL: umax_i32: -; CHECK: umax z0.s, z0.s, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.s, z0.s, #0 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 0, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -155,8 +167,9 @@ define @umax_i32( %a) { define @umax_i64( %a) { ; CHECK-LABEL: umax_i64: -; CHECK: umax z0.d, z0.d, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umax z0.d, z0.d, #255 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 255, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -170,8 +183,9 @@ define @umax_i64( %a) { define @umin_i8( %a) { ; CHECK-LABEL: umin_i8: -; CHECK: umin z0.b, z0.b, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.b, z0.b, #255 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -183,8 +197,9 @@ define @umin_i8( %a) { define @umin_i16( %a) { ; CHECK-LABEL: umin_i16: -; CHECK: umin z0.h, z0.h, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.h, z0.h, #0 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 0, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -196,8 +211,9 @@ define @umin_i16( %a) { define @umin_i32( %a) { ; CHECK-LABEL: umin_i32: -; CHECK: umin z0.s, z0.s, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.s, z0.s, #255 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -209,8 +225,9 @@ define @umin_i32( %a) { define @umin_i64( %a) { ; CHECK-LABEL: umin_i64: -; CHECK: umin z0.d, z0.d, #0 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: umin z0.d, z0.d, #0 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 0, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -224,8 +241,9 @@ define @umin_i64( %a) { define @sqadd_b_lowimm( %a) { ; CHECK-LABEL: sqadd_b_lowimm: -; CHECK: sqadd z0.b, z0.b, #27 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.b, z0.b, #27 // =0x1b +; CHECK-NEXT: ret %elt = insertelement undef, i8 27, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv16i8( %a, @@ -235,8 +253,9 @@ define @sqadd_b_lowimm( %a) { define @sqadd_h_lowimm( %a) { ; CHECK-LABEL: sqadd_h_lowimm: -; CHECK: sqadd z0.h, z0.h, #43 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.h, z0.h, #43 // =0x2b +; CHECK-NEXT: ret %elt = insertelement undef, i16 43, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv8i16( %a, @@ -246,8 +265,9 @@ define @sqadd_h_lowimm( %a) { define @sqadd_h_highimm( %a) { ; CHECK-LABEL: sqadd_h_highimm: -; CHECK: sqadd z0.h, z0.h, #2048 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.h, z0.h, #2048 // =0x800 +; CHECK-NEXT: ret %elt = insertelement undef, i16 2048, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv8i16( %a, @@ -257,8 +277,9 @@ define @sqadd_h_highimm( %a) { define @sqadd_s_lowimm( %a) { ; CHECK-LABEL: sqadd_s_lowimm: -; CHECK: sqadd z0.s, z0.s, #1 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.s, z0.s, #1 // =0x1 +; CHECK-NEXT: ret %elt = insertelement undef, i32 1, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv4i32( %a, @@ -268,8 +289,9 @@ define @sqadd_s_lowimm( %a) { define @sqadd_s_highimm( %a) { ; CHECK-LABEL: sqadd_s_highimm: -; CHECK: sqadd z0.s, z0.s, #8192 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.s, z0.s, #8192 // =0x2000 +; CHECK-NEXT: ret %elt = insertelement undef, i32 8192, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv4i32( %a, @@ -279,8 +301,9 @@ define @sqadd_s_highimm( %a) { define @sqadd_d_lowimm( %a) { ; CHECK-LABEL: sqadd_d_lowimm: -; CHECK: sqadd z0.d, z0.d, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.d, z0.d, #255 // =0xff +; CHECK-NEXT: ret %elt = insertelement undef, i64 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv2i64( %a, @@ -290,8 +313,9 @@ define @sqadd_d_lowimm( %a) { define @sqadd_d_highimm( %a) { ; CHECK-LABEL: sqadd_d_highimm: -; CHECK: sqadd z0.d, z0.d, #65280 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqadd z0.d, z0.d, #65280 // =0xff00 +; CHECK-NEXT: ret %elt = insertelement undef, i64 65280, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqadd.x.nxv2i64( %a, @@ -303,8 +327,9 @@ define @sqadd_d_highimm( %a) { define @sqsub_b_lowimm( %a) { ; CHECK-LABEL: sqsub_b_lowimm: -; CHECK: sqsub z0.b, z0.b, #27 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.b, z0.b, #27 // =0x1b +; CHECK-NEXT: ret %elt = insertelement undef, i8 27, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv16i8( %a, @@ -314,8 +339,9 @@ define @sqsub_b_lowimm( %a) { define @sqsub_h_lowimm( %a) { ; CHECK-LABEL: sqsub_h_lowimm: -; CHECK: sqsub z0.h, z0.h, #43 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.h, z0.h, #43 // =0x2b +; CHECK-NEXT: ret %elt = insertelement undef, i16 43, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv8i16( %a, @@ -325,8 +351,9 @@ define @sqsub_h_lowimm( %a) { define @sqsub_h_highimm( %a) { ; CHECK-LABEL: sqsub_h_highimm: -; CHECK: sqsub z0.h, z0.h, #2048 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.h, z0.h, #2048 // =0x800 +; CHECK-NEXT: ret %elt = insertelement undef, i16 2048, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv8i16( %a, @@ -336,8 +363,9 @@ define @sqsub_h_highimm( %a) { define @sqsub_s_lowimm( %a) { ; CHECK-LABEL: sqsub_s_lowimm: -; CHECK: sqsub z0.s, z0.s, #1 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.s, z0.s, #1 // =0x1 +; CHECK-NEXT: ret %elt = insertelement undef, i32 1, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv4i32( %a, @@ -347,8 +375,9 @@ define @sqsub_s_lowimm( %a) { define @sqsub_s_highimm( %a) { ; CHECK-LABEL: sqsub_s_highimm: -; CHECK: sqsub z0.s, z0.s, #8192 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.s, z0.s, #8192 // =0x2000 +; CHECK-NEXT: ret %elt = insertelement undef, i32 8192, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv4i32( %a, @@ -358,8 +387,9 @@ define @sqsub_s_highimm( %a) { define @sqsub_d_lowimm( %a) { ; CHECK-LABEL: sqsub_d_lowimm: -; CHECK: sqsub z0.d, z0.d, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.d, z0.d, #255 // =0xff +; CHECK-NEXT: ret %elt = insertelement undef, i64 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv2i64( %a, @@ -369,8 +399,9 @@ define @sqsub_d_lowimm( %a) { define @sqsub_d_highimm( %a) { ; CHECK-LABEL: sqsub_d_highimm: -; CHECK: sqsub z0.d, z0.d, #65280 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: sqsub z0.d, z0.d, #65280 // =0xff00 +; CHECK-NEXT: ret %elt = insertelement undef, i64 65280, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.sqsub.x.nxv2i64( %a, @@ -382,8 +413,9 @@ define @sqsub_d_highimm( %a) { define @uqadd_b_lowimm( %a) { ; CHECK-LABEL: uqadd_b_lowimm: -; CHECK: uqadd z0.b, z0.b, #27 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.b, z0.b, #27 // =0x1b +; CHECK-NEXT: ret %elt = insertelement undef, i8 27, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv16i8( %a, @@ -393,8 +425,9 @@ define @uqadd_b_lowimm( %a) { define @uqadd_h_lowimm( %a) { ; CHECK-LABEL: uqadd_h_lowimm: -; CHECK: uqadd z0.h, z0.h, #43 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.h, z0.h, #43 // =0x2b +; CHECK-NEXT: ret %elt = insertelement undef, i16 43, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv8i16( %a, @@ -404,8 +437,9 @@ define @uqadd_h_lowimm( %a) { define @uqadd_h_highimm( %a) { ; CHECK-LABEL: uqadd_h_highimm: -; CHECK: uqadd z0.h, z0.h, #2048 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.h, z0.h, #2048 // =0x800 +; CHECK-NEXT: ret %elt = insertelement undef, i16 2048, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv8i16( %a, @@ -415,8 +449,9 @@ define @uqadd_h_highimm( %a) { define @uqadd_s_lowimm( %a) { ; CHECK-LABEL: uqadd_s_lowimm: -; CHECK: uqadd z0.s, z0.s, #1 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.s, z0.s, #1 // =0x1 +; CHECK-NEXT: ret %elt = insertelement undef, i32 1, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv4i32( %a, @@ -428,8 +463,9 @@ define @uqadd_s_lowimm( %a) { define @uqsub_b_lowimm( %a) { ; CHECK-LABEL: uqsub_b_lowimm: -; CHECK: uqsub z0.b, z0.b, #27 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.b, z0.b, #27 // =0x1b +; CHECK-NEXT: ret %elt = insertelement undef, i8 27, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv16i8( %a, @@ -439,8 +475,9 @@ define @uqsub_b_lowimm( %a) { define @uqsub_h_lowimm( %a) { ; CHECK-LABEL: uqsub_h_lowimm: -; CHECK: uqsub z0.h, z0.h, #43 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.h, z0.h, #43 // =0x2b +; CHECK-NEXT: ret %elt = insertelement undef, i16 43, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv8i16( %a, @@ -450,8 +487,9 @@ define @uqsub_h_lowimm( %a) { define @uqsub_h_highimm( %a) { ; CHECK-LABEL: uqsub_h_highimm: -; CHECK: uqsub z0.h, z0.h, #2048 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.h, z0.h, #2048 // =0x800 +; CHECK-NEXT: ret %elt = insertelement undef, i16 2048, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv8i16( %a, @@ -461,8 +499,9 @@ define @uqsub_h_highimm( %a) { define @uqsub_s_lowimm( %a) { ; CHECK-LABEL: uqsub_s_lowimm: -; CHECK: uqsub z0.s, z0.s, #1 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.s, z0.s, #1 // =0x1 +; CHECK-NEXT: ret %elt = insertelement undef, i32 1, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv4i32( %a, @@ -472,8 +511,9 @@ define @uqsub_s_lowimm( %a) { define @uqsub_s_highimm( %a) { ; CHECK-LABEL: uqsub_s_highimm: -; CHECK: uqsub z0.s, z0.s, #8192 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.s, z0.s, #8192 // =0x2000 +; CHECK-NEXT: ret %elt = insertelement undef, i32 8192, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv4i32( %a, @@ -483,8 +523,9 @@ define @uqsub_s_highimm( %a) { define @uqsub_d_lowimm( %a) { ; CHECK-LABEL: uqsub_d_lowimm: -; CHECK: uqsub z0.d, z0.d, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.d, z0.d, #255 // =0xff +; CHECK-NEXT: ret %elt = insertelement undef, i64 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv2i64( %a, @@ -494,8 +535,9 @@ define @uqsub_d_lowimm( %a) { define @uqsub_d_highimm( %a) { ; CHECK-LABEL: uqsub_d_highimm: -; CHECK: uqsub z0.d, z0.d, #65280 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqsub z0.d, z0.d, #65280 // =0xff00 +; CHECK-NEXT: ret %elt = insertelement undef, i64 65280, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqsub.x.nxv2i64( %a, @@ -506,8 +548,9 @@ define @uqsub_d_highimm( %a) { define @uqadd_s_highimm( %a) { ; CHECK-LABEL: uqadd_s_highimm: -; CHECK: uqadd z0.s, z0.s, #8192 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.s, z0.s, #8192 // =0x2000 +; CHECK-NEXT: ret %elt = insertelement undef, i32 8192, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv4i32( %a, @@ -517,8 +560,9 @@ define @uqadd_s_highimm( %a) { define @uqadd_d_lowimm( %a) { ; CHECK-LABEL: uqadd_d_lowimm: -; CHECK: uqadd z0.d, z0.d, #255 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.d, z0.d, #255 // =0xff +; CHECK-NEXT: ret %elt = insertelement undef, i64 255, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv2i64( %a, @@ -528,8 +572,9 @@ define @uqadd_d_lowimm( %a) { define @uqadd_d_highimm( %a) { ; CHECK-LABEL: uqadd_d_highimm: -; CHECK: uqadd z0.d, z0.d, #65280 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: uqadd z0.d, z0.d, #65280 // =0xff00 +; CHECK-NEXT: ret %elt = insertelement undef, i64 65280, i32 0 %splat = shufflevector %elt, undef, zeroinitializer %out = call @llvm.aarch64.sve.uqadd.x.nxv2i64( %a, @@ -539,10 +584,24 @@ define @uqadd_d_highimm( %a) { ; ASR -define @asr_i8( %a) { +define @asr_i8( %pg, %a) { ; CHECK-LABEL: asr_i8: -; CHECK: asr z0.b, z0.b, #8 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.b, p0/m, z0.b, #8 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 9, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @asr_i8_all_active( %a) { +; CHECK-LABEL: asr_i8_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.b, z0.b, #8 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 8, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -552,10 +611,37 @@ define @asr_i8( %a) { ret %out } -define @asr_i16( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @asr_i8_too_small( %pg, %a) { +; CHECK-LABEL: asr_i8_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, #0 // =0x0 +; CHECK-NEXT: asr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv16i8( %pg, + %a, + zeroinitializer) + ret %out +} + +define @asr_i16( %pg, %a) { ; CHECK-LABEL: asr_i16: -; CHECK: asr z0.h, z0.h, #16 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.h, p0/m, z0.h, #16 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 17, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @asr_i16_all_active( %a) { +; CHECK-LABEL: asr_i16_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.h, z0.h, #16 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 16, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -565,10 +651,37 @@ define @asr_i16( %a) { ret %out } -define @asr_i32( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @asr_i16_too_small( %pg, %a) { +; CHECK-LABEL: asr_i16_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, #0 // =0x0 +; CHECK-NEXT: asr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv8i16( %pg, + %a, + zeroinitializer) + ret %out +} + +define @asr_i32( %pg, %a) { ; CHECK-LABEL: asr_i32: -; CHECK: asr z0.s, z0.s, #32 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.s, p0/m, z0.s, #32 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 33, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @asr_i32_all_active( %a) { +; CHECK-LABEL: asr_i32_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.s, z0.s, #32 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 32, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -578,10 +691,37 @@ define @asr_i32( %a) { ret %out } -define @asr_i64( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @asr_i32_too_small( %pg, %a) { +; CHECK-LABEL: asr_i32_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #0 // =0x0 +; CHECK-NEXT: asr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv4i32( %pg, + %a, + zeroinitializer) + ret %out +} + +define @asr_i64( %pg, %a) { ; CHECK-LABEL: asr_i64: -; CHECK: asr z0.d, z0.d, #64 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.d, p0/m, z0.d, #64 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 65, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.asr.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +define @asr_i64_all_active( %a) { +; CHECK-LABEL: asr_i64_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: asr z0.d, z0.d, #64 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 64, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -591,12 +731,39 @@ define @asr_i64( %a) { ret %out } +; Ensure we don't match a right shift by zero to the immediate form. +define @asr_i64_too_small( %pg, %a) { +; CHECK-LABEL: asr_i64_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: asr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.asr.nxv2i64( %pg, + %a, + zeroinitializer) + ret %out +} + ; LSL -define @lsl_i8( %a) { +define @lsl_i8( %pg, %a) { ; CHECK-LABEL: lsl_i8: -; CHECK: lsl z0.b, z0.b, #7 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #7 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 7, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i8_all_active( %a) { +; CHECK-LABEL: lsl_i8_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.b, z0.b, #7 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 7, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -606,10 +773,50 @@ define @lsl_i8( %a) { ret %out } -define @lsl_i16( %a) { +; Ensure we don't match a left shift bigger than its bitwidth to the immediate form. +define @lsl_i8_too_big( %pg, %a) { +; CHECK-LABEL: lsl_i8_too_big: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, #8 // =0x8 +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %elt = insertelement undef, i8 8, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i8_zero( %pg, %a) { +; CHECK-LABEL: lsl_i8_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.b, p0/m, z0.b, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv16i8( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsl_i16( %pg, %a) { ; CHECK-LABEL: lsl_i16: -; CHECK: lsl z0.h, z0.h, #15 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #15 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 15, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i16_all_active( %a) { +; CHECK-LABEL: lsl_i16_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.h, z0.h, #15 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 15, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -619,10 +826,50 @@ define @lsl_i16( %a) { ret %out } -define @lsl_i32( %a) { +; Ensure we don't match a left shift bigger than its bitwidth to the immediate form. +define @lsl_i16_too_big( %pg, %a) { +; CHECK-LABEL: lsl_i16_too_big: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, #16 // =0x10 +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %elt = insertelement undef, i16 16, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i16_zero( %pg, %a) { +; CHECK-LABEL: lsl_i16_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.h, p0/m, z0.h, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv8i16( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsl_i32( %pg, %a) { ; CHECK-LABEL: lsl_i32: -; CHECK: lsl z0.s, z0.s, #31 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #31 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 31, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i32_all_active( %a) { +; CHECK-LABEL: lsl_i32_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, z0.s, #31 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 31, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -632,10 +879,50 @@ define @lsl_i32( %a) { ret %out } -define @lsl_i64( %a) { +; Ensure we don't match a left shift bigger than its bitwidth to the immediate form. +define @lsl_i32_too_big( %pg, %a) { +; CHECK-LABEL: lsl_i32_too_big: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #32 // =0x20 +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %elt = insertelement undef, i32 32, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i32_zero( %pg, %a) { +; CHECK-LABEL: lsl_i32_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.s, p0/m, z0.s, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv4i32( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsl_i64( %pg, %a) { ; CHECK-LABEL: lsl_i64: -; CHECK: lsl z0.d, z0.d, #63 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #63 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 63, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i64_all_active( %a) { +; CHECK-LABEL: lsl_i64_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.d, z0.d, #63 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 63, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -645,12 +932,52 @@ define @lsl_i64( %a) { ret %out } +; Ensure we don't match a left shift bigger than its bitwidth to the immediate form. +define @lsl_i64_too_big( %pg, %a) { +; CHECK-LABEL: lsl_i64_too_big: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #64 // =0x40 +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %elt = insertelement undef, i64 64, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +define @lsl_i64_zero( %pg, %a) { +; CHECK-LABEL: lsl_i64_zero: +; CHECK: // %bb.0: +; CHECK-NEXT: lsl z0.d, p0/m, z0.d, #0 +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsl.nxv2i64( %pg, + %a, + zeroinitializer) + ret %out +} + ; LSR -define @lsr_i8( %a) { +define @lsr_i8( %pg, %a) { ; CHECK-LABEL: lsr_i8: -; CHECK: lsr z0.b, z0.b, #8 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.b, p0/m, z0.b, #8 +; CHECK-NEXT: ret + %elt = insertelement undef, i8 9, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, + %a, + %splat) + ret %out +} + +define @lsr_i8_all_active( %a) { +; CHECK-LABEL: lsr_i8_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.b, z0.b, #8 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv16i1(i32 31) %elt = insertelement undef, i8 8, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -660,10 +987,37 @@ define @lsr_i8( %a) { ret %out } -define @lsr_i16( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @lsr_i8_too_small( %pg, %a) { +; CHECK-LABEL: lsr_i8_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.b, #0 // =0x0 +; CHECK-NEXT: lsr z0.b, p0/m, z0.b, z1.b +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv16i8( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsr_i16( %pg, %a) { ; CHECK-LABEL: lsr_i16: -; CHECK: lsr z0.h, z0.h, #16 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, #16 +; CHECK-NEXT: ret + %elt = insertelement undef, i16 17, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, + %a, + %splat) + ret %out +} + +define @lsr_i16_all_active( %a) { +; CHECK-LABEL: lsr_i16_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.h, z0.h, #16 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv8i1(i32 31) %elt = insertelement undef, i16 16, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -673,10 +1027,37 @@ define @lsr_i16( %a) { ret %out } -define @lsr_i32( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @lsr_i16_too_small( %pg, %a) { +; CHECK-LABEL: lsr_i16_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.h, #0 // =0x0 +; CHECK-NEXT: lsr z0.h, p0/m, z0.h, z1.h +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv8i16( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsr_i32( %pg, %a) { ; CHECK-LABEL: lsr_i32: -; CHECK: lsr z0.s, z0.s, #32 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, #32 +; CHECK-NEXT: ret + %elt = insertelement undef, i32 33, i32 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, + %a, + %splat) + ret %out +} + +define @lsr_i32_all_active( %a) { +; CHECK-LABEL: lsr_i32_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.s, z0.s, #32 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv4i1(i32 31) %elt = insertelement undef, i32 32, i32 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -686,10 +1067,37 @@ define @lsr_i32( %a) { ret %out } -define @lsr_i64( %a) { +; Ensure we don't match a right shift by zero to the immediate form. +define @lsr_i32_too_small( %pg, %a) { +; CHECK-LABEL: lsr_i32_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.s, #0 // =0x0 +; CHECK-NEXT: lsr z0.s, p0/m, z0.s, z1.s +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv4i32( %pg, + %a, + zeroinitializer) + ret %out +} + +define @lsr_i64( %pg, %a) { ; CHECK-LABEL: lsr_i64: -; CHECK: lsr z0.d, z0.d, #64 -; CHECK-NEXT: ret +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, #64 +; CHECK-NEXT: ret + %elt = insertelement undef, i64 65, i64 0 + %splat = shufflevector %elt, undef, zeroinitializer + %out = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, + %a, + %splat) + ret %out +} + +define @lsr_i64_all_active( %a) { +; CHECK-LABEL: lsr_i64_all_active: +; CHECK: // %bb.0: +; CHECK-NEXT: lsr z0.d, z0.d, #64 +; CHECK-NEXT: ret %pg = call @llvm.aarch64.sve.ptrue.nxv2i1(i32 31) %elt = insertelement undef, i64 64, i64 0 %splat = shufflevector %elt, undef, zeroinitializer @@ -699,6 +1107,19 @@ define @lsr_i64( %a) { ret %out } +; Ensure we don't match a right shift by zero to the immediate form. +define @lsr_i64_too_small( %pg, %a) { +; CHECK-LABEL: lsr_i64_too_small: +; CHECK: // %bb.0: +; CHECK-NEXT: mov z1.d, #0 // =0x0 +; CHECK-NEXT: lsr z0.d, p0/m, z0.d, z1.d +; CHECK-NEXT: ret + %out = call @llvm.aarch64.sve.lsr.nxv2i64( %pg, + %a, + zeroinitializer) + ret %out +} + declare @llvm.aarch64.sve.sqadd.x.nxv16i8(, ) declare @llvm.aarch64.sve.sqadd.x.nxv8i16(, ) declare @llvm.aarch64.sve.sqadd.x.nxv4i32(, )