[SVE] Replace remaining _MERGE_OP1 nodes with _PRED variants.
This is the final bit of work to relax the register allocation requirements when code generating normal LLVM IR, which rarely care about the result of inactive lanes. By using _PRED nodes we can make better use of SVE's reversed instructions. Also removes a redundant parameter from the min/max tests. Differential Revision: https://reviews.llvm.org/D85142
This commit is contained in:
parent
e734e8286b
commit
4be13b15d6
|
|
@ -1397,14 +1397,14 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||||
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
|
MAKE_CASE(AArch64ISD::TLSDESC_CALLSEQ)
|
||||||
MAKE_CASE(AArch64ISD::ADD_PRED)
|
MAKE_CASE(AArch64ISD::ADD_PRED)
|
||||||
MAKE_CASE(AArch64ISD::SDIV_PRED)
|
MAKE_CASE(AArch64ISD::SDIV_PRED)
|
||||||
|
MAKE_CASE(AArch64ISD::SHL_PRED)
|
||||||
|
MAKE_CASE(AArch64ISD::SMAX_PRED)
|
||||||
|
MAKE_CASE(AArch64ISD::SMIN_PRED)
|
||||||
|
MAKE_CASE(AArch64ISD::SRA_PRED)
|
||||||
|
MAKE_CASE(AArch64ISD::SRL_PRED)
|
||||||
MAKE_CASE(AArch64ISD::UDIV_PRED)
|
MAKE_CASE(AArch64ISD::UDIV_PRED)
|
||||||
MAKE_CASE(AArch64ISD::SMIN_MERGE_OP1)
|
MAKE_CASE(AArch64ISD::UMAX_PRED)
|
||||||
MAKE_CASE(AArch64ISD::UMIN_MERGE_OP1)
|
MAKE_CASE(AArch64ISD::UMIN_PRED)
|
||||||
MAKE_CASE(AArch64ISD::SMAX_MERGE_OP1)
|
|
||||||
MAKE_CASE(AArch64ISD::UMAX_MERGE_OP1)
|
|
||||||
MAKE_CASE(AArch64ISD::SHL_MERGE_OP1)
|
|
||||||
MAKE_CASE(AArch64ISD::SRL_MERGE_OP1)
|
|
||||||
MAKE_CASE(AArch64ISD::SRA_MERGE_OP1)
|
|
||||||
MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
|
MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
|
||||||
MAKE_CASE(AArch64ISD::ADC)
|
MAKE_CASE(AArch64ISD::ADC)
|
||||||
MAKE_CASE(AArch64ISD::SBC)
|
MAKE_CASE(AArch64ISD::SBC)
|
||||||
|
|
@ -3540,13 +3540,13 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
|
||||||
case ISD::UDIV:
|
case ISD::UDIV:
|
||||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED);
|
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UDIV_PRED);
|
||||||
case ISD::SMIN:
|
case ISD::SMIN:
|
||||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_MERGE_OP1);
|
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMIN_PRED);
|
||||||
case ISD::UMIN:
|
case ISD::UMIN:
|
||||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_MERGE_OP1);
|
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMIN_PRED);
|
||||||
case ISD::SMAX:
|
case ISD::SMAX:
|
||||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_MERGE_OP1);
|
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SMAX_PRED);
|
||||||
case ISD::UMAX:
|
case ISD::UMAX:
|
||||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_MERGE_OP1);
|
return LowerToPredicatedOp(Op, DAG, AArch64ISD::UMAX_PRED);
|
||||||
case ISD::SRA:
|
case ISD::SRA:
|
||||||
case ISD::SRL:
|
case ISD::SRL:
|
||||||
case ISD::SHL:
|
case ISD::SHL:
|
||||||
|
|
@ -8914,7 +8914,7 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
|
||||||
|
|
||||||
case ISD::SHL:
|
case ISD::SHL:
|
||||||
if (VT.isScalableVector())
|
if (VT.isScalableVector())
|
||||||
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_MERGE_OP1);
|
return LowerToPredicatedOp(Op, DAG, AArch64ISD::SHL_PRED);
|
||||||
|
|
||||||
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
|
if (isVShiftLImm(Op.getOperand(1), VT, false, Cnt) && Cnt < EltSize)
|
||||||
return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
|
return DAG.getNode(AArch64ISD::VSHL, DL, VT, Op.getOperand(0),
|
||||||
|
|
@ -8926,8 +8926,8 @@ SDValue AArch64TargetLowering::LowerVectorSRA_SRL_SHL(SDValue Op,
|
||||||
case ISD::SRA:
|
case ISD::SRA:
|
||||||
case ISD::SRL:
|
case ISD::SRL:
|
||||||
if (VT.isScalableVector()) {
|
if (VT.isScalableVector()) {
|
||||||
unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_MERGE_OP1
|
unsigned Opc = Op.getOpcode() == ISD::SRA ? AArch64ISD::SRA_PRED
|
||||||
: AArch64ISD::SRL_MERGE_OP1;
|
: AArch64ISD::SRL_PRED;
|
||||||
return LowerToPredicatedOp(Op, DAG, Opc);
|
return LowerToPredicatedOp(Op, DAG, Opc);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -11940,6 +11940,25 @@ static SDValue combineSVEReductionOrderedFP(SDNode *N, unsigned Opc,
|
||||||
Zero);
|
Zero);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// If a merged operation has no inactive lanes we can relax it to a predicated
|
||||||
|
// or unpredicated operation, which potentially allows better isel (perhaps
|
||||||
|
// using immediate forms) or relaxing register reuse requirements.
|
||||||
|
static SDValue convertMergedOpToPredOp(SDNode *N, unsigned PredOpc,
|
||||||
|
SelectionDAG &DAG) {
|
||||||
|
assert(N->getOpcode() == ISD::INTRINSIC_WO_CHAIN && "Expected intrinsic!");
|
||||||
|
assert(N->getNumOperands() == 4 && "Expected 3 operand intrinsic!");
|
||||||
|
SDValue Pg = N->getOperand(1);
|
||||||
|
|
||||||
|
// ISD way to specify an all active predicate.
|
||||||
|
if ((Pg.getOpcode() == AArch64ISD::PTRUE) &&
|
||||||
|
(Pg.getConstantOperandVal(0) == AArch64SVEPredPattern::all))
|
||||||
|
return DAG.getNode(PredOpc, SDLoc(N), N->getValueType(0), Pg,
|
||||||
|
N->getOperand(2), N->getOperand(3));
|
||||||
|
|
||||||
|
// FUTURE: SplatVector(true)
|
||||||
|
return SDValue();
|
||||||
|
}
|
||||||
|
|
||||||
static SDValue performIntrinsicCombine(SDNode *N,
|
static SDValue performIntrinsicCombine(SDNode *N,
|
||||||
TargetLowering::DAGCombinerInfo &DCI,
|
TargetLowering::DAGCombinerInfo &DCI,
|
||||||
const AArch64Subtarget *Subtarget) {
|
const AArch64Subtarget *Subtarget) {
|
||||||
|
|
@ -12018,26 +12037,19 @@ static SDValue performIntrinsicCombine(SDNode *N,
|
||||||
case Intrinsic::aarch64_sve_ext:
|
case Intrinsic::aarch64_sve_ext:
|
||||||
return LowerSVEIntrinsicEXT(N, DAG);
|
return LowerSVEIntrinsicEXT(N, DAG);
|
||||||
case Intrinsic::aarch64_sve_smin:
|
case Intrinsic::aarch64_sve_smin:
|
||||||
return DAG.getNode(AArch64ISD::SMIN_MERGE_OP1, SDLoc(N), N->getValueType(0),
|
return convertMergedOpToPredOp(N, AArch64ISD::SMIN_PRED, DAG);
|
||||||
N->getOperand(1), N->getOperand(2), N->getOperand(3));
|
|
||||||
case Intrinsic::aarch64_sve_umin:
|
case Intrinsic::aarch64_sve_umin:
|
||||||
return DAG.getNode(AArch64ISD::UMIN_MERGE_OP1, SDLoc(N), N->getValueType(0),
|
return convertMergedOpToPredOp(N, AArch64ISD::UMIN_PRED, DAG);
|
||||||
N->getOperand(1), N->getOperand(2), N->getOperand(3));
|
|
||||||
case Intrinsic::aarch64_sve_smax:
|
case Intrinsic::aarch64_sve_smax:
|
||||||
return DAG.getNode(AArch64ISD::SMAX_MERGE_OP1, SDLoc(N), N->getValueType(0),
|
return convertMergedOpToPredOp(N, AArch64ISD::SMAX_PRED, DAG);
|
||||||
N->getOperand(1), N->getOperand(2), N->getOperand(3));
|
|
||||||
case Intrinsic::aarch64_sve_umax:
|
case Intrinsic::aarch64_sve_umax:
|
||||||
return DAG.getNode(AArch64ISD::UMAX_MERGE_OP1, SDLoc(N), N->getValueType(0),
|
return convertMergedOpToPredOp(N, AArch64ISD::UMAX_PRED, DAG);
|
||||||
N->getOperand(1), N->getOperand(2), N->getOperand(3));
|
|
||||||
case Intrinsic::aarch64_sve_lsl:
|
case Intrinsic::aarch64_sve_lsl:
|
||||||
return DAG.getNode(AArch64ISD::SHL_MERGE_OP1, SDLoc(N), N->getValueType(0),
|
return convertMergedOpToPredOp(N, AArch64ISD::SHL_PRED, DAG);
|
||||||
N->getOperand(1), N->getOperand(2), N->getOperand(3));
|
|
||||||
case Intrinsic::aarch64_sve_lsr:
|
case Intrinsic::aarch64_sve_lsr:
|
||||||
return DAG.getNode(AArch64ISD::SRL_MERGE_OP1, SDLoc(N), N->getValueType(0),
|
return convertMergedOpToPredOp(N, AArch64ISD::SRL_PRED, DAG);
|
||||||
N->getOperand(1), N->getOperand(2), N->getOperand(3));
|
|
||||||
case Intrinsic::aarch64_sve_asr:
|
case Intrinsic::aarch64_sve_asr:
|
||||||
return DAG.getNode(AArch64ISD::SRA_MERGE_OP1, SDLoc(N), N->getValueType(0),
|
return convertMergedOpToPredOp(N, AArch64ISD::SRA_PRED, DAG);
|
||||||
N->getOperand(1), N->getOperand(2), N->getOperand(3));
|
|
||||||
case Intrinsic::aarch64_sve_cmphs:
|
case Intrinsic::aarch64_sve_cmphs:
|
||||||
if (!N->getOperand(2).getValueType().isFloatingPoint())
|
if (!N->getOperand(2).getValueType().isFloatingPoint())
|
||||||
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
|
return DAG.getNode(AArch64ISD::SETCC_MERGE_ZERO, SDLoc(N),
|
||||||
|
|
|
||||||
|
|
@ -72,7 +72,7 @@ enum NodeType : unsigned {
|
||||||
ADC,
|
ADC,
|
||||||
SBC, // adc, sbc instructions
|
SBC, // adc, sbc instructions
|
||||||
|
|
||||||
// Arithmetic instructions
|
// Predicated instructions where inactive lanes produce undefined results.
|
||||||
ADD_PRED,
|
ADD_PRED,
|
||||||
FADD_PRED,
|
FADD_PRED,
|
||||||
FDIV_PRED,
|
FDIV_PRED,
|
||||||
|
|
@ -80,14 +80,14 @@ enum NodeType : unsigned {
|
||||||
FMUL_PRED,
|
FMUL_PRED,
|
||||||
FSUB_PRED,
|
FSUB_PRED,
|
||||||
SDIV_PRED,
|
SDIV_PRED,
|
||||||
|
SHL_PRED,
|
||||||
|
SMAX_PRED,
|
||||||
|
SMIN_PRED,
|
||||||
|
SRA_PRED,
|
||||||
|
SRL_PRED,
|
||||||
UDIV_PRED,
|
UDIV_PRED,
|
||||||
SMIN_MERGE_OP1,
|
UMAX_PRED,
|
||||||
UMIN_MERGE_OP1,
|
UMIN_PRED,
|
||||||
SMAX_MERGE_OP1,
|
|
||||||
UMAX_MERGE_OP1,
|
|
||||||
SHL_MERGE_OP1,
|
|
||||||
SRL_MERGE_OP1,
|
|
||||||
SRA_MERGE_OP1,
|
|
||||||
|
|
||||||
SETCC_MERGE_ZERO,
|
SETCC_MERGE_ZERO,
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -174,22 +174,20 @@ def SDT_AArch64FMA : SDTypeProfile<1, 4, [
|
||||||
|
|
||||||
// Predicated operations with the result of inactive lanes being unspecified.
|
// Predicated operations with the result of inactive lanes being unspecified.
|
||||||
def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>;
|
def AArch64add_p : SDNode<"AArch64ISD::ADD_PRED", SDT_AArch64Arith>;
|
||||||
|
def AArch64asr_p : SDNode<"AArch64ISD::SRA_PRED", SDT_AArch64Arith>;
|
||||||
def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;
|
def AArch64fadd_p : SDNode<"AArch64ISD::FADD_PRED", SDT_AArch64Arith>;
|
||||||
def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;
|
def AArch64fdiv_p : SDNode<"AArch64ISD::FDIV_PRED", SDT_AArch64Arith>;
|
||||||
def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>;
|
def AArch64fma_p : SDNode<"AArch64ISD::FMA_PRED", SDT_AArch64FMA>;
|
||||||
def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>;
|
def AArch64fmul_p : SDNode<"AArch64ISD::FMUL_PRED", SDT_AArch64Arith>;
|
||||||
def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;
|
def AArch64fsub_p : SDNode<"AArch64ISD::FSUB_PRED", SDT_AArch64Arith>;
|
||||||
|
def AArch64lsl_p : SDNode<"AArch64ISD::SHL_PRED", SDT_AArch64Arith>;
|
||||||
|
def AArch64lsr_p : SDNode<"AArch64ISD::SRL_PRED", SDT_AArch64Arith>;
|
||||||
def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
|
def AArch64sdiv_p : SDNode<"AArch64ISD::SDIV_PRED", SDT_AArch64Arith>;
|
||||||
|
def AArch64smax_p : SDNode<"AArch64ISD::SMAX_PRED", SDT_AArch64Arith>;
|
||||||
|
def AArch64smin_p : SDNode<"AArch64ISD::SMIN_PRED", SDT_AArch64Arith>;
|
||||||
def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
|
def AArch64udiv_p : SDNode<"AArch64ISD::UDIV_PRED", SDT_AArch64Arith>;
|
||||||
|
def AArch64umax_p : SDNode<"AArch64ISD::UMAX_PRED", SDT_AArch64Arith>;
|
||||||
// Merging op1 into the inactive lanes.
|
def AArch64umin_p : SDNode<"AArch64ISD::UMIN_PRED", SDT_AArch64Arith>;
|
||||||
def AArch64smin_m1 : SDNode<"AArch64ISD::SMIN_MERGE_OP1", SDT_AArch64Arith>;
|
|
||||||
def AArch64umin_m1 : SDNode<"AArch64ISD::UMIN_MERGE_OP1", SDT_AArch64Arith>;
|
|
||||||
def AArch64smax_m1 : SDNode<"AArch64ISD::SMAX_MERGE_OP1", SDT_AArch64Arith>;
|
|
||||||
def AArch64umax_m1 : SDNode<"AArch64ISD::UMAX_MERGE_OP1", SDT_AArch64Arith>;
|
|
||||||
def AArch64lsl_m1 : SDNode<"AArch64ISD::SHL_MERGE_OP1", SDT_AArch64Arith>;
|
|
||||||
def AArch64lsr_m1 : SDNode<"AArch64ISD::SRL_MERGE_OP1", SDT_AArch64Arith>;
|
|
||||||
def AArch64asr_m1 : SDNode<"AArch64ISD::SRA_MERGE_OP1", SDT_AArch64Arith>;
|
|
||||||
|
|
||||||
def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
|
def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
|
||||||
def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;
|
def AArch64clasta_n : SDNode<"AArch64ISD::CLASTA_N", SDT_AArch64ReduceWithInit>;
|
||||||
|
|
@ -287,10 +285,10 @@ let Predicates = [HasSVE] in {
|
||||||
defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>;
|
defm EOR_ZI : sve_int_log_imm<0b01, "eor", "eon", xor>;
|
||||||
defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>;
|
defm AND_ZI : sve_int_log_imm<0b10, "and", "bic", and>;
|
||||||
|
|
||||||
defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_m1>;
|
defm SMAX_ZI : sve_int_arith_imm1<0b00, "smax", AArch64smax_p>;
|
||||||
defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_m1>;
|
defm SMIN_ZI : sve_int_arith_imm1<0b10, "smin", AArch64smin_p>;
|
||||||
defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_m1>;
|
defm UMAX_ZI : sve_int_arith_imm1_unsigned<0b01, "umax", AArch64umax_p>;
|
||||||
defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_m1>;
|
defm UMIN_ZI : sve_int_arith_imm1_unsigned<0b11, "umin", AArch64umin_p>;
|
||||||
|
|
||||||
defm MUL_ZI : sve_int_arith_imm2<"mul", mul>;
|
defm MUL_ZI : sve_int_arith_imm2<"mul", mul>;
|
||||||
defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>;
|
defm MUL_ZPmZ : sve_int_bin_pred_arit_2<0b000, "mul", int_aarch64_sve_mul>;
|
||||||
|
|
@ -343,12 +341,17 @@ let Predicates = [HasSVE] in {
|
||||||
defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs>;
|
defm FABS_ZPmZ : sve_int_un_pred_arit_1_fp<0b100, "fabs", int_aarch64_sve_fabs>;
|
||||||
defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", int_aarch64_sve_fneg>;
|
defm FNEG_ZPmZ : sve_int_un_pred_arit_1_fp<0b101, "fneg", int_aarch64_sve_fneg>;
|
||||||
|
|
||||||
defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", AArch64smax_m1>;
|
defm SMAX_ZPmZ : sve_int_bin_pred_arit_1<0b000, "smax", "SMAX_ZPZZ", int_aarch64_sve_smax, DestructiveBinaryComm>;
|
||||||
defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", AArch64umax_m1>;
|
defm UMAX_ZPmZ : sve_int_bin_pred_arit_1<0b001, "umax", "UMAX_ZPZZ", int_aarch64_sve_umax, DestructiveBinaryComm>;
|
||||||
defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", AArch64smin_m1>;
|
defm SMIN_ZPmZ : sve_int_bin_pred_arit_1<0b010, "smin", "SMIN_ZPZZ", int_aarch64_sve_smin, DestructiveBinaryComm>;
|
||||||
defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", AArch64umin_m1>;
|
defm UMIN_ZPmZ : sve_int_bin_pred_arit_1<0b011, "umin", "UMIN_ZPZZ", int_aarch64_sve_umin, DestructiveBinaryComm>;
|
||||||
defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", int_aarch64_sve_sabd>;
|
defm SABD_ZPmZ : sve_int_bin_pred_arit_1<0b100, "sabd", "SABD_ZPZZ", int_aarch64_sve_sabd, DestructiveBinaryComm>;
|
||||||
defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", int_aarch64_sve_uabd>;
|
defm UABD_ZPmZ : sve_int_bin_pred_arit_1<0b101, "uabd", "UABD_ZPZZ", int_aarch64_sve_uabd, DestructiveBinaryComm>;
|
||||||
|
|
||||||
|
defm SMAX_ZPZZ : sve_int_bin_pred_bhsd<AArch64smax_p>;
|
||||||
|
defm UMAX_ZPZZ : sve_int_bin_pred_bhsd<AArch64umax_p>;
|
||||||
|
defm SMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64smin_p>;
|
||||||
|
defm UMIN_ZPZZ : sve_int_bin_pred_bhsd<AArch64umin_p>;
|
||||||
|
|
||||||
defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", int_aarch64_sve_frecpe_x>;
|
defm FRECPE_ZZ : sve_fp_2op_u_zd<0b110, "frecpe", int_aarch64_sve_frecpe_x>;
|
||||||
defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>;
|
defm FRSQRTE_ZZ : sve_fp_2op_u_zd<0b111, "frsqrte", int_aarch64_sve_frsqrte_x>;
|
||||||
|
|
@ -1313,9 +1316,9 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
|
||||||
defm INDEX_II : sve_int_index_ii<"index", index_vector>;
|
defm INDEX_II : sve_int_index_ii<"index", index_vector>;
|
||||||
|
|
||||||
// Unpredicated shifts
|
// Unpredicated shifts
|
||||||
defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_m1>;
|
defm ASR_ZZI : sve_int_bin_cons_shift_imm_right<0b00, "asr", AArch64asr_p>;
|
||||||
defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_m1>;
|
defm LSR_ZZI : sve_int_bin_cons_shift_imm_right<0b01, "lsr", AArch64lsr_p>;
|
||||||
defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_m1>;
|
defm LSL_ZZI : sve_int_bin_cons_shift_imm_left< 0b11, "lsl", AArch64lsl_p>;
|
||||||
|
|
||||||
defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">;
|
defm ASR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b00, "asr">;
|
||||||
defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">;
|
defm LSR_WIDE_ZZZ : sve_int_bin_cons_shift_wide<0b01, "lsr">;
|
||||||
|
|
@ -1328,19 +1331,23 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
|
||||||
defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
|
defm ASRD_ZPmI : sve_int_bin_pred_shift_imm_right<0b0100, "asrd", "ASRD_ZPZI", int_aarch64_sve_asrd>;
|
||||||
|
|
||||||
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
|
let Predicates = [HasSVE, UseExperimentalZeroingPseudos] in {
|
||||||
defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<AArch64asr_m1>;
|
defm ASR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_asr>;
|
||||||
defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<AArch64lsr_m1>;
|
defm LSR_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsr>;
|
||||||
defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<AArch64lsl_m1>;
|
defm LSL_ZPZZ : sve_int_bin_pred_zeroing_bhsd<int_aarch64_sve_lsl>;
|
||||||
defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;
|
defm ASRD_ZPZI : sve_int_bin_pred_shift_imm_right_zeroing_bhsd<int_aarch64_sve_asrd>;
|
||||||
}
|
}
|
||||||
|
|
||||||
defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", AArch64asr_m1, "ASRR_ZPmZ">;
|
defm ASR_ZPmZ : sve_int_bin_pred_shift<0b000, "asr", "ASR_ZPZZ", int_aarch64_sve_asr, "ASRR_ZPmZ">;
|
||||||
defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", AArch64lsr_m1, "LSRR_ZPmZ">;
|
defm LSR_ZPmZ : sve_int_bin_pred_shift<0b001, "lsr", "LSR_ZPZZ", int_aarch64_sve_lsr, "LSRR_ZPmZ">;
|
||||||
defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", AArch64lsl_m1, "LSLR_ZPmZ">;
|
defm LSL_ZPmZ : sve_int_bin_pred_shift<0b011, "lsl", "LSL_ZPZZ", int_aarch64_sve_lsl, "LSLR_ZPmZ">;
|
||||||
defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", /*isReverseInstr*/ 1>;
|
defm ASRR_ZPmZ : sve_int_bin_pred_shift<0b100, "asrr", "ASRR_ZPZZ", null_frag, "ASR_ZPmZ", /*isReverseInstr*/ 1>;
|
||||||
defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", /*isReverseInstr*/ 1>;
|
defm LSRR_ZPmZ : sve_int_bin_pred_shift<0b101, "lsrr", "LSRR_ZPZZ", null_frag, "LSR_ZPmZ", /*isReverseInstr*/ 1>;
|
||||||
defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", "LSLR_ZPZZ", null_frag, "LSL_ZPmZ", /*isReverseInstr*/ 1>;
|
defm LSLR_ZPmZ : sve_int_bin_pred_shift<0b111, "lslr", "LSLR_ZPZZ", null_frag, "LSL_ZPmZ", /*isReverseInstr*/ 1>;
|
||||||
|
|
||||||
|
defm ASR_ZPZZ : sve_int_bin_pred_bhsd<AArch64asr_p>;
|
||||||
|
defm LSR_ZPZZ : sve_int_bin_pred_bhsd<AArch64lsr_p>;
|
||||||
|
defm LSL_ZPZZ : sve_int_bin_pred_bhsd<AArch64lsl_p>;
|
||||||
|
|
||||||
defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>;
|
defm ASR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b000, "asr", int_aarch64_sve_asr_wide>;
|
||||||
defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>;
|
defm LSR_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b001, "lsr", int_aarch64_sve_lsr_wide>;
|
||||||
defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>;
|
defm LSL_WIDE_ZPmZ : sve_int_bin_pred_shift_wide<0b011, "lsl", int_aarch64_sve_lsl_wide>;
|
||||||
|
|
|
||||||
|
|
@ -2382,11 +2382,19 @@ multiclass sve_int_bin_pred_arit_0<bits<3> opc, string asm, string Ps,
|
||||||
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
|
def : SVE_3_Op_Pat<nxv2i64, op, nxv2i1, nxv2i64, nxv2i64, !cast<Instruction>(NAME # _D)>;
|
||||||
}
|
}
|
||||||
|
|
||||||
multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm, SDPatternOperator op> {
|
multiclass sve_int_bin_pred_arit_1<bits<3> opc, string asm, string Ps,
|
||||||
def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>;
|
SDPatternOperator op,
|
||||||
def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>;
|
DestructiveInstTypeEnum flags> {
|
||||||
def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>;
|
let DestructiveInstType = flags in {
|
||||||
def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>;
|
def _B : sve_int_bin_pred_arit_log<0b00, 0b01, opc, asm, ZPR8>,
|
||||||
|
SVEPseudo2Instr<Ps # _B, 1>;
|
||||||
|
def _H : sve_int_bin_pred_arit_log<0b01, 0b01, opc, asm, ZPR16>,
|
||||||
|
SVEPseudo2Instr<Ps # _H, 1>;
|
||||||
|
def _S : sve_int_bin_pred_arit_log<0b10, 0b01, opc, asm, ZPR32>,
|
||||||
|
SVEPseudo2Instr<Ps # _S, 1>;
|
||||||
|
def _D : sve_int_bin_pred_arit_log<0b11, 0b01, opc, asm, ZPR64>,
|
||||||
|
SVEPseudo2Instr<Ps # _D, 1>;
|
||||||
|
}
|
||||||
|
|
||||||
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
|
def : SVE_3_Op_Pat<nxv16i8, op, nxv16i1, nxv16i8, nxv16i8, !cast<Instruction>(NAME # _B)>;
|
||||||
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
|
def : SVE_3_Op_Pat<nxv8i16, op, nxv8i1, nxv8i16, nxv8i16, !cast<Instruction>(NAME # _H)>;
|
||||||
|
|
|
||||||
|
|
@ -182,7 +182,7 @@ define <vscale x 2 x i64> @urem_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
|
||||||
; SMIN
|
; SMIN
|
||||||
;
|
;
|
||||||
|
|
||||||
define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
|
define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
|
||||||
; CHECK-LABEL: smin_i8:
|
; CHECK-LABEL: smin_i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.b
|
; CHECK-NEXT: ptrue p0.b
|
||||||
|
|
@ -193,7 +193,7 @@ define <vscale x 16 x i8> @smin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
|
||||||
ret <vscale x 16 x i8> %min
|
ret <vscale x 16 x i8> %min
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
|
define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
|
||||||
; CHECK-LABEL: smin_i16:
|
; CHECK-LABEL: smin_i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h
|
; CHECK-NEXT: ptrue p0.h
|
||||||
|
|
@ -204,7 +204,7 @@ define <vscale x 8 x i16> @smin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
|
||||||
ret <vscale x 8 x i16> %min
|
ret <vscale x 8 x i16> %min
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
|
define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
|
||||||
; CHECK-LABEL: smin_i32:
|
; CHECK-LABEL: smin_i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
|
@ -215,7 +215,7 @@ define <vscale x 4 x i32> @smin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
|
||||||
ret <vscale x 4 x i32> %min
|
ret <vscale x 4 x i32> %min
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
|
define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
|
||||||
; CHECK-LABEL: smin_i64:
|
; CHECK-LABEL: smin_i64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d
|
; CHECK-NEXT: ptrue p0.d
|
||||||
|
|
@ -226,7 +226,7 @@ define <vscale x 2 x i64> @smin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
|
||||||
ret <vscale x 2 x i64> %min
|
ret <vscale x 2 x i64> %min
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 32 x i8> @smin_split_i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b, <vscale x 32 x i8> %c) {
|
define <vscale x 32 x i8> @smin_split_i8(<vscale x 32 x i8> %a, <vscale x 32 x i8> %b) {
|
||||||
; CHECK-LABEL: smin_split_i8:
|
; CHECK-LABEL: smin_split_i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.b
|
; CHECK-NEXT: ptrue p0.b
|
||||||
|
|
@ -238,7 +238,7 @@ define <vscale x 32 x i8> @smin_split_i8(<vscale x 32 x i8> %a, <vscale x 32 x i
|
||||||
ret <vscale x 32 x i8> %min
|
ret <vscale x 32 x i8> %min
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 32 x i16> @smin_split_i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b, <vscale x 32 x i16> %c) {
|
define <vscale x 32 x i16> @smin_split_i16(<vscale x 32 x i16> %a, <vscale x 32 x i16> %b) {
|
||||||
; CHECK-LABEL: smin_split_i16:
|
; CHECK-LABEL: smin_split_i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h
|
; CHECK-NEXT: ptrue p0.h
|
||||||
|
|
@ -252,7 +252,7 @@ define <vscale x 32 x i16> @smin_split_i16(<vscale x 32 x i16> %a, <vscale x 32
|
||||||
ret <vscale x 32 x i16> %min
|
ret <vscale x 32 x i16> %min
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 8 x i32> @smin_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c) {
|
define <vscale x 8 x i32> @smin_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
|
||||||
; CHECK-LABEL: smin_split_i32:
|
; CHECK-LABEL: smin_split_i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
|
@ -264,7 +264,7 @@ define <vscale x 8 x i32> @smin_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i
|
||||||
ret <vscale x 8 x i32> %min
|
ret <vscale x 8 x i32> %min
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 4 x i64> @smin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c) {
|
define <vscale x 4 x i64> @smin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
|
||||||
; CHECK-LABEL: smin_split_i64:
|
; CHECK-LABEL: smin_split_i64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d
|
; CHECK-NEXT: ptrue p0.d
|
||||||
|
|
@ -276,7 +276,7 @@ define <vscale x 4 x i64> @smin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i
|
||||||
ret <vscale x 4 x i64> %min
|
ret <vscale x 4 x i64> %min
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 8 x i8> @smin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) {
|
define <vscale x 8 x i8> @smin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
|
||||||
; CHECK-LABEL: smin_promote_i8:
|
; CHECK-LABEL: smin_promote_i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h
|
; CHECK-NEXT: ptrue p0.h
|
||||||
|
|
@ -289,7 +289,7 @@ define <vscale x 8 x i8> @smin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8
|
||||||
ret <vscale x 8 x i8> %min
|
ret <vscale x 8 x i8> %min
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 4 x i16> @smin_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) {
|
define <vscale x 4 x i16> @smin_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b) {
|
||||||
; CHECK-LABEL: smin_promote_i16:
|
; CHECK-LABEL: smin_promote_i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
|
@ -302,7 +302,7 @@ define <vscale x 4 x i16> @smin_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x
|
||||||
ret <vscale x 4 x i16> %min
|
ret <vscale x 4 x i16> %min
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 2 x i32> @smin_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) {
|
define <vscale x 2 x i32> @smin_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
|
||||||
; CHECK-LABEL: smin_promote_i32:
|
; CHECK-LABEL: smin_promote_i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d
|
; CHECK-NEXT: ptrue p0.d
|
||||||
|
|
@ -319,7 +319,7 @@ define <vscale x 2 x i32> @smin_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x
|
||||||
; UMIN
|
; UMIN
|
||||||
;
|
;
|
||||||
|
|
||||||
define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
|
define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
|
||||||
; CHECK-LABEL: umin_i8:
|
; CHECK-LABEL: umin_i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.b
|
; CHECK-NEXT: ptrue p0.b
|
||||||
|
|
@ -330,7 +330,7 @@ define <vscale x 16 x i8> @umin_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
|
||||||
ret <vscale x 16 x i8> %min
|
ret <vscale x 16 x i8> %min
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
|
define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
|
||||||
; CHECK-LABEL: umin_i16:
|
; CHECK-LABEL: umin_i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h
|
; CHECK-NEXT: ptrue p0.h
|
||||||
|
|
@ -341,7 +341,7 @@ define <vscale x 8 x i16> @umin_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
|
||||||
ret <vscale x 8 x i16> %min
|
ret <vscale x 8 x i16> %min
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
|
define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
|
||||||
; CHECK-LABEL: umin_i32:
|
; CHECK-LABEL: umin_i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
|
@ -352,7 +352,7 @@ define <vscale x 4 x i32> @umin_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
|
||||||
ret <vscale x 4 x i32> %min
|
ret <vscale x 4 x i32> %min
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
|
define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
|
||||||
; CHECK-LABEL: umin_i64:
|
; CHECK-LABEL: umin_i64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d
|
; CHECK-NEXT: ptrue p0.d
|
||||||
|
|
@ -363,7 +363,7 @@ define <vscale x 2 x i64> @umin_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
|
||||||
ret <vscale x 2 x i64> %min
|
ret <vscale x 2 x i64> %min
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 4 x i64> @umin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b, <vscale x 4 x i64> %c) {
|
define <vscale x 4 x i64> @umin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i64> %b) {
|
||||||
; CHECK-LABEL: umin_split_i64:
|
; CHECK-LABEL: umin_split_i64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d
|
; CHECK-NEXT: ptrue p0.d
|
||||||
|
|
@ -375,7 +375,7 @@ define <vscale x 4 x i64> @umin_split_i64(<vscale x 4 x i64> %a, <vscale x 4 x i
|
||||||
ret <vscale x 4 x i64> %min
|
ret <vscale x 4 x i64> %min
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 8 x i8> @umin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b, <vscale x 8 x i8> %c) {
|
define <vscale x 8 x i8> @umin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8> %b) {
|
||||||
; CHECK-LABEL: umin_promote_i8:
|
; CHECK-LABEL: umin_promote_i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h
|
; CHECK-NEXT: ptrue p0.h
|
||||||
|
|
@ -392,7 +392,7 @@ define <vscale x 8 x i8> @umin_promote_i8(<vscale x 8 x i8> %a, <vscale x 8 x i8
|
||||||
; SMAX
|
; SMAX
|
||||||
;
|
;
|
||||||
|
|
||||||
define <vscale x 16 x i8> @smax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
|
define <vscale x 16 x i8> @smax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
|
||||||
; CHECK-LABEL: smax_i8:
|
; CHECK-LABEL: smax_i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.b
|
; CHECK-NEXT: ptrue p0.b
|
||||||
|
|
@ -403,7 +403,7 @@ define <vscale x 16 x i8> @smax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
|
||||||
ret <vscale x 16 x i8> %max
|
ret <vscale x 16 x i8> %max
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
|
define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
|
||||||
; CHECK-LABEL: smax_i16:
|
; CHECK-LABEL: smax_i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h
|
; CHECK-NEXT: ptrue p0.h
|
||||||
|
|
@ -414,7 +414,7 @@ define <vscale x 8 x i16> @smax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
|
||||||
ret <vscale x 8 x i16> %max
|
ret <vscale x 8 x i16> %max
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
|
define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
|
||||||
; CHECK-LABEL: smax_i32:
|
; CHECK-LABEL: smax_i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
|
@ -425,7 +425,7 @@ define <vscale x 4 x i32> @smax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
|
||||||
ret <vscale x 4 x i32> %max
|
ret <vscale x 4 x i32> %max
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
|
define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
|
||||||
; CHECK-LABEL: smax_i64:
|
; CHECK-LABEL: smax_i64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d
|
; CHECK-NEXT: ptrue p0.d
|
||||||
|
|
@ -436,7 +436,7 @@ define <vscale x 2 x i64> @smax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
|
||||||
ret <vscale x 2 x i64> %max
|
ret <vscale x 2 x i64> %max
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 8 x i32> @smax_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b, <vscale x 8 x i32> %c) {
|
define <vscale x 8 x i32> @smax_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i32> %b) {
|
||||||
; CHECK-LABEL: smax_split_i32:
|
; CHECK-LABEL: smax_split_i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
|
@ -448,7 +448,7 @@ define <vscale x 8 x i32> @smax_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i
|
||||||
ret <vscale x 8 x i32> %max
|
ret <vscale x 8 x i32> %max
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 4 x i16> @smax_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b, <vscale x 4 x i16> %c) {
|
define <vscale x 4 x i16> @smax_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x i16> %b) {
|
||||||
; CHECK-LABEL: smax_promote_i16:
|
; CHECK-LABEL: smax_promote_i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
|
@ -465,7 +465,7 @@ define <vscale x 4 x i16> @smax_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x
|
||||||
; UMAX
|
; UMAX
|
||||||
;
|
;
|
||||||
|
|
||||||
define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b, <vscale x 16 x i8> %c) {
|
define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b) {
|
||||||
; CHECK-LABEL: umax_i8:
|
; CHECK-LABEL: umax_i8:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.b
|
; CHECK-NEXT: ptrue p0.b
|
||||||
|
|
@ -476,7 +476,7 @@ define <vscale x 16 x i8> @umax_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b,
|
||||||
ret <vscale x 16 x i8> %max
|
ret <vscale x 16 x i8> %max
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b, <vscale x 8 x i16> %c) {
|
define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b) {
|
||||||
; CHECK-LABEL: umax_i16:
|
; CHECK-LABEL: umax_i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h
|
; CHECK-NEXT: ptrue p0.h
|
||||||
|
|
@ -487,7 +487,7 @@ define <vscale x 8 x i16> @umax_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b
|
||||||
ret <vscale x 8 x i16> %max
|
ret <vscale x 8 x i16> %max
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b, <vscale x 4 x i32> %c) {
|
define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b) {
|
||||||
; CHECK-LABEL: umax_i32:
|
; CHECK-LABEL: umax_i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.s
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
|
@ -498,7 +498,7 @@ define <vscale x 4 x i32> @umax_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b
|
||||||
ret <vscale x 4 x i32> %max
|
ret <vscale x 4 x i32> %max
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b, <vscale x 2 x i64> %c) {
|
define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b) {
|
||||||
; CHECK-LABEL: umax_i64:
|
; CHECK-LABEL: umax_i64:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d
|
; CHECK-NEXT: ptrue p0.d
|
||||||
|
|
@ -509,7 +509,7 @@ define <vscale x 2 x i64> @umax_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b
|
||||||
ret <vscale x 2 x i64> %max
|
ret <vscale x 2 x i64> %max
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 16 x i16> @umax_split_i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b, <vscale x 16 x i16> %c) {
|
define <vscale x 16 x i16> @umax_split_i16(<vscale x 16 x i16> %a, <vscale x 16 x i16> %b) {
|
||||||
; CHECK-LABEL: umax_split_i16:
|
; CHECK-LABEL: umax_split_i16:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.h
|
; CHECK-NEXT: ptrue p0.h
|
||||||
|
|
@ -521,7 +521,7 @@ define <vscale x 16 x i16> @umax_split_i16(<vscale x 16 x i16> %a, <vscale x 16
|
||||||
ret <vscale x 16 x i16> %max
|
ret <vscale x 16 x i16> %max
|
||||||
}
|
}
|
||||||
|
|
||||||
define <vscale x 2 x i32> @umax_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b, <vscale x 2 x i32> %c) {
|
define <vscale x 2 x i32> @umax_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x i32> %b) {
|
||||||
; CHECK-LABEL: umax_promote_i32:
|
; CHECK-LABEL: umax_promote_i32:
|
||||||
; CHECK: // %bb.0:
|
; CHECK: // %bb.0:
|
||||||
; CHECK-NEXT: ptrue p0.d
|
; CHECK-NEXT: ptrue p0.d
|
||||||
|
|
@ -601,6 +601,50 @@ define <vscale x 2 x i32> @asr_promote_i32(<vscale x 2 x i32> %a, <vscale x 2 x
|
||||||
ret <vscale x 2 x i32> %shr
|
ret <vscale x 2 x i32> %shr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;
|
||||||
|
; ASRR
|
||||||
|
;
|
||||||
|
|
||||||
|
define <vscale x 16 x i8> @asrr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
|
||||||
|
; CHECK-LABEL: asrr_i8:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.b
|
||||||
|
; CHECK-NEXT: asrr z0.b, p0/m, z0.b, z1.b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%shr = ashr <vscale x 16 x i8> %b, %a
|
||||||
|
ret <vscale x 16 x i8> %shr
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 8 x i16> @asrr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b){
|
||||||
|
; CHECK-LABEL: asrr_i16:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.h
|
||||||
|
; CHECK-NEXT: asrr z0.h, p0/m, z0.h, z1.h
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%shr = ashr <vscale x 8 x i16> %b, %a
|
||||||
|
ret <vscale x 8 x i16> %shr
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 4 x i32> @asrr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b){
|
||||||
|
; CHECK-LABEL: asrr_i32:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
; CHECK-NEXT: asrr z0.s, p0/m, z0.s, z1.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%shr = ashr <vscale x 4 x i32> %b, %a
|
||||||
|
ret <vscale x 4 x i32> %shr
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 2 x i64> @asrr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){
|
||||||
|
; CHECK-LABEL: asrr_i64:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.d
|
||||||
|
; CHECK-NEXT: asrr z0.d, p0/m, z0.d, z1.d
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%shr = ashr <vscale x 2 x i64> %b, %a
|
||||||
|
ret <vscale x 2 x i64> %shr
|
||||||
|
}
|
||||||
|
|
||||||
;
|
;
|
||||||
; LSL
|
; LSL
|
||||||
;
|
;
|
||||||
|
|
@ -667,6 +711,50 @@ define <vscale x 4 x i16> @lsl_promote_i16(<vscale x 4 x i16> %a, <vscale x 4 x
|
||||||
ret <vscale x 4 x i16> %shl
|
ret <vscale x 4 x i16> %shl
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;
|
||||||
|
; LSLR
|
||||||
|
;
|
||||||
|
|
||||||
|
define <vscale x 16 x i8> @lslr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
|
||||||
|
; CHECK-LABEL: lslr_i8:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.b
|
||||||
|
; CHECK-NEXT: lslr z0.b, p0/m, z0.b, z1.b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%shl = shl <vscale x 16 x i8> %b, %a
|
||||||
|
ret <vscale x 16 x i8> %shl
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 8 x i16> @lslr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b){
|
||||||
|
; CHECK-LABEL: lslr_i16:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.h
|
||||||
|
; CHECK-NEXT: lslr z0.h, p0/m, z0.h, z1.h
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%shl = shl <vscale x 8 x i16> %b, %a
|
||||||
|
ret <vscale x 8 x i16> %shl
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 4 x i32> @lslr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b){
|
||||||
|
; CHECK-LABEL: lslr_i32:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
; CHECK-NEXT: lslr z0.s, p0/m, z0.s, z1.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%shl = shl <vscale x 4 x i32> %b, %a
|
||||||
|
ret <vscale x 4 x i32> %shl
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 2 x i64> @lslr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){
|
||||||
|
; CHECK-LABEL: lslr_i64:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.d
|
||||||
|
; CHECK-NEXT: lslr z0.d, p0/m, z0.d, z1.d
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%shl = shl <vscale x 2 x i64> %b, %a
|
||||||
|
ret <vscale x 2 x i64> %shl
|
||||||
|
}
|
||||||
|
|
||||||
;
|
;
|
||||||
; LSR
|
; LSR
|
||||||
;
|
;
|
||||||
|
|
@ -734,6 +822,50 @@ define <vscale x 8 x i32> @lsr_split_i32(<vscale x 8 x i32> %a, <vscale x 8 x i3
|
||||||
ret <vscale x 8 x i32> %shr
|
ret <vscale x 8 x i32> %shr
|
||||||
}
|
}
|
||||||
|
|
||||||
|
;
|
||||||
|
; LSRR
|
||||||
|
;
|
||||||
|
|
||||||
|
define <vscale x 16 x i8> @lsrr_i8(<vscale x 16 x i8> %a, <vscale x 16 x i8> %b){
|
||||||
|
; CHECK-LABEL: lsrr_i8:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.b
|
||||||
|
; CHECK-NEXT: lsrr z0.b, p0/m, z0.b, z1.b
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%shr = lshr <vscale x 16 x i8> %b, %a
|
||||||
|
ret <vscale x 16 x i8> %shr
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 8 x i16> @lsrr_i16(<vscale x 8 x i16> %a, <vscale x 8 x i16> %b){
|
||||||
|
; CHECK-LABEL: lsrr_i16:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.h
|
||||||
|
; CHECK-NEXT: lsrr z0.h, p0/m, z0.h, z1.h
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%shr = lshr <vscale x 8 x i16> %b, %a
|
||||||
|
ret <vscale x 8 x i16> %shr
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 4 x i32> @lsrr_i32(<vscale x 4 x i32> %a, <vscale x 4 x i32> %b){
|
||||||
|
; CHECK-LABEL: lsrr_i32:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.s
|
||||||
|
; CHECK-NEXT: lsrr z0.s, p0/m, z0.s, z1.s
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%shr = lshr <vscale x 4 x i32> %b, %a
|
||||||
|
ret <vscale x 4 x i32> %shr
|
||||||
|
}
|
||||||
|
|
||||||
|
define <vscale x 2 x i64> @lsrr_i64(<vscale x 2 x i64> %a, <vscale x 2 x i64> %b){
|
||||||
|
; CHECK-LABEL: lsrr_i64:
|
||||||
|
; CHECK: // %bb.0:
|
||||||
|
; CHECK-NEXT: ptrue p0.d
|
||||||
|
; CHECK-NEXT: lsrr z0.d, p0/m, z0.d, z1.d
|
||||||
|
; CHECK-NEXT: ret
|
||||||
|
%shr = lshr <vscale x 2 x i64> %b, %a
|
||||||
|
ret <vscale x 2 x i64> %shr
|
||||||
|
}
|
||||||
|
|
||||||
;
|
;
|
||||||
; CMP
|
; CMP
|
||||||
;
|
;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue