[AArch64][SVE] Add lowering for llvm fceil
Add the functionality to lower fceil for passthru variant Reviewed By: paulwalker-arm Differential Revision: https://reviews.llvm.org/D84548
This commit is contained in:
		
							parent
							
								
									d1e6103a79
								
							
						
					
					
						commit
						fd536eeed9
					
				| 
						 | 
				
			
			@ -138,6 +138,7 @@ static bool isMergePassthruOpcode(unsigned Opc) {
 | 
			
		|||
  case AArch64ISD::FNEG_MERGE_PASSTHRU:
 | 
			
		||||
  case AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU:
 | 
			
		||||
  case AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU:
 | 
			
		||||
  case AArch64ISD::FCEIL_MERGE_PASSTHRU:
 | 
			
		||||
    return true;
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			@ -972,6 +973,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 | 
			
		|||
        setOperationAction(ISD::FMUL, VT, Custom);
 | 
			
		||||
        setOperationAction(ISD::FNEG, VT, Custom);
 | 
			
		||||
        setOperationAction(ISD::FSUB, VT, Custom);
 | 
			
		||||
        setOperationAction(ISD::FCEIL, VT, Custom);
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			@ -1476,6 +1478,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const {
 | 
			
		|||
    MAKE_CASE(AArch64ISD::FNEG_MERGE_PASSTHRU)
 | 
			
		||||
    MAKE_CASE(AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU)
 | 
			
		||||
    MAKE_CASE(AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU)
 | 
			
		||||
    MAKE_CASE(AArch64ISD::FCEIL_MERGE_PASSTHRU)
 | 
			
		||||
    MAKE_CASE(AArch64ISD::SETCC_MERGE_ZERO)
 | 
			
		||||
    MAKE_CASE(AArch64ISD::ADC)
 | 
			
		||||
    MAKE_CASE(AArch64ISD::SBC)
 | 
			
		||||
| 
						 | 
				
			
			@ -3337,6 +3340,9 @@ SDValue AArch64TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op,
 | 
			
		|||
  case Intrinsic::aarch64_sve_fneg:
 | 
			
		||||
    return DAG.getNode(AArch64ISD::FNEG_MERGE_PASSTHRU, dl, Op.getValueType(),
 | 
			
		||||
                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
 | 
			
		||||
  case Intrinsic::aarch64_sve_frintp:
 | 
			
		||||
    return DAG.getNode(AArch64ISD::FCEIL_MERGE_PASSTHRU, dl, Op.getValueType(),
 | 
			
		||||
                       Op.getOperand(2), Op.getOperand(3), Op.getOperand(1));
 | 
			
		||||
  case Intrinsic::aarch64_sve_convert_to_svbool: {
 | 
			
		||||
    EVT OutVT = Op.getValueType();
 | 
			
		||||
    EVT InVT = Op.getOperand(1).getValueType();
 | 
			
		||||
| 
						 | 
				
			
			@ -3633,6 +3639,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
 | 
			
		|||
    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FDIV_PRED);
 | 
			
		||||
  case ISD::FNEG:
 | 
			
		||||
    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FNEG_MERGE_PASSTHRU);
 | 
			
		||||
  case ISD::FCEIL:
 | 
			
		||||
    return LowerToPredicatedOp(Op, DAG, AArch64ISD::FCEIL_MERGE_PASSTHRU);
 | 
			
		||||
  case ISD::FP_ROUND:
 | 
			
		||||
  case ISD::STRICT_FP_ROUND:
 | 
			
		||||
    return LowerFP_ROUND(Op, DAG);
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -95,6 +95,7 @@ enum NodeType : unsigned {
 | 
			
		|||
 | 
			
		||||
  // Predicated instructions with the result of inactive lanes provided by the
 | 
			
		||||
  // last operand.
 | 
			
		||||
  FCEIL_MERGE_PASSTHRU,
 | 
			
		||||
  FNEG_MERGE_PASSTHRU,
 | 
			
		||||
  SIGN_EXTEND_INREG_MERGE_PASSTHRU,
 | 
			
		||||
  ZERO_EXTEND_INREG_MERGE_PASSTHRU,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -202,6 +202,7 @@ def SDT_AArch64IntExtend : SDTypeProfile<1, 4, [
 | 
			
		|||
def AArch64fneg_mt : SDNode<"AArch64ISD::FNEG_MERGE_PASSTHRU", SDT_AArch64Arith>;
 | 
			
		||||
def AArch64sxt_mt  : SDNode<"AArch64ISD::SIGN_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>;
 | 
			
		||||
def AArch64uxt_mt  : SDNode<"AArch64ISD::ZERO_EXTEND_INREG_MERGE_PASSTHRU", SDT_AArch64IntExtend>;
 | 
			
		||||
def AArch64frintp_mt : SDNode<"AArch64ISD::FCEIL_MERGE_PASSTHRU", SDT_AArch64Arith>;
 | 
			
		||||
 | 
			
		||||
def SDT_AArch64ReduceWithInit : SDTypeProfile<1, 3, [SDTCisVec<1>, SDTCisVec<3>]>;
 | 
			
		||||
def AArch64clasta_n   : SDNode<"AArch64ISD::CLASTA_N",   SDT_AArch64ReduceWithInit>;
 | 
			
		||||
| 
						 | 
				
			
			@ -1416,7 +1417,7 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
 | 
			
		|||
  defm FCVTZU_ZPmZ_DtoD : sve_fp_2op_p_zd<0b1111111, "fcvtzu", ZPR64, ZPR64, int_aarch64_sve_fcvtzu,         nxv2i64, nxv2i1, nxv2f64, ElementSizeD>;
 | 
			
		||||
 | 
			
		||||
  defm FRINTN_ZPmZ : sve_fp_2op_p_zd_HSD<0b00000, "frintn", int_aarch64_sve_frintn>;
 | 
			
		||||
  defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp>;
 | 
			
		||||
  defm FRINTP_ZPmZ : sve_fp_2op_p_zd_HSD<0b00001, "frintp", int_aarch64_sve_frintp, AArch64frintp_mt>;
 | 
			
		||||
  defm FRINTM_ZPmZ : sve_fp_2op_p_zd_HSD<0b00010, "frintm", int_aarch64_sve_frintm>;
 | 
			
		||||
  defm FRINTZ_ZPmZ : sve_fp_2op_p_zd_HSD<0b00011, "frintz", int_aarch64_sve_frintz>;
 | 
			
		||||
  defm FRINTA_ZPmZ : sve_fp_2op_p_zd_HSD<0b00100, "frinta", int_aarch64_sve_frinta>;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -2286,14 +2286,22 @@ multiclass sve_fp_2op_p_zd<bits<7> opc, string asm,
 | 
			
		|||
  def : SVE_3_Op_Pat<vt1, op, vt1, vt2, vt3, !cast<Instruction>(NAME)>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op> {
 | 
			
		||||
multiclass sve_fp_2op_p_zd_HSD<bits<5> opc, string asm, SDPatternOperator op_merge,
 | 
			
		||||
                                                SDPatternOperator op_pt = null_frag> {
 | 
			
		||||
  def _H : sve_fp_2op_p_zd<{ 0b01, opc }, asm, ZPR16, ZPR16, ElementSizeH>;
 | 
			
		||||
  def _S : sve_fp_2op_p_zd<{ 0b10, opc }, asm, ZPR32, ZPR32, ElementSizeS>;
 | 
			
		||||
  def _D : sve_fp_2op_p_zd<{ 0b11, opc }, asm, ZPR64, ZPR64, ElementSizeD>;
 | 
			
		||||
 | 
			
		||||
  def : SVE_3_Op_Pat<nxv8f16, op, nxv8f16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
 | 
			
		||||
  def : SVE_3_Op_Pat<nxv4f32, op, nxv4f32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
 | 
			
		||||
  def : SVE_3_Op_Pat<nxv2f64, op, nxv2f64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
 | 
			
		||||
  def : SVE_3_Op_Pat<nxv8f16, op_merge, nxv8f16, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
 | 
			
		||||
  def : SVE_3_Op_Pat<nxv4f32, op_merge, nxv4f32, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
 | 
			
		||||
  def : SVE_3_Op_Pat<nxv2f64, op_merge, nxv2f64, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
 | 
			
		||||
 | 
			
		||||
  def : SVE_1_Op_Passthru_Pat<nxv8f16, op_pt, nxv8i1, nxv8f16, !cast<Instruction>(NAME # _H)>;
 | 
			
		||||
  def : SVE_1_Op_Passthru_Pat<nxv4f16, op_pt, nxv4i1, nxv4f16, !cast<Instruction>(NAME # _H)>;
 | 
			
		||||
  def : SVE_1_Op_Passthru_Pat<nxv2f16, op_pt, nxv2i1, nxv2f16, !cast<Instruction>(NAME # _H)>;
 | 
			
		||||
  def : SVE_1_Op_Passthru_Pat<nxv4f32, op_pt, nxv4i1, nxv4f32, !cast<Instruction>(NAME # _S)>;
 | 
			
		||||
  def : SVE_1_Op_Passthru_Pat<nxv2f32, op_pt, nxv2i1, nxv2f32, !cast<Instruction>(NAME # _S)>;
 | 
			
		||||
  def : SVE_1_Op_Passthru_Pat<nxv2f64, op_pt, nxv2i1, nxv2f64, !cast<Instruction>(NAME # _D)>;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
multiclass sve2_fp_flogb<string asm, SDPatternOperator op> {
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -480,6 +480,62 @@ define void @float_copy(<vscale x 4 x float>* %P1, <vscale x 4 x float>* %P2) {
 | 
			
		|||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; FCEIL
 | 
			
		||||
 | 
			
		||||
define <vscale x 8 x half> @frintp_nxv8f16(<vscale x 8 x half> %a) {
 | 
			
		||||
; CHECK-LABEL: frintp_nxv8f16:
 | 
			
		||||
; CHECK: ptrue p0.h
 | 
			
		||||
; CHECK-NEXT: frintp z0.h, p0/m, z0.h
 | 
			
		||||
; CHECK-NEXT: ret
 | 
			
		||||
  %res = call <vscale x 8 x half> @llvm.ceil.nxv8f16(<vscale x 8 x half> %a)
 | 
			
		||||
  ret <vscale x 8 x half> %res  
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define <vscale x 4 x half> @frintp_nxv4f16(<vscale x 4 x half> %a) {
 | 
			
		||||
; CHECK-LABEL: frintp_nxv4f16:
 | 
			
		||||
; CHECK: ptrue p0.s
 | 
			
		||||
; CHECK-NEXT: frintp z0.h, p0/m, z0.h
 | 
			
		||||
; CHECK-NEXT: ret
 | 
			
		||||
  %res = call <vscale x 4 x half> @llvm.ceil.nxv4f16(<vscale x 4 x half> %a)
 | 
			
		||||
  ret <vscale x 4 x half> %res  
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define <vscale x 2 x half> @frintp_nxv2f16(<vscale x 2 x half> %a) {
 | 
			
		||||
; CHECK-LABEL: frintp_nxv2f16:
 | 
			
		||||
; CHECK: ptrue p0.d
 | 
			
		||||
; CHECK-NEXT: frintp z0.h, p0/m, z0.h
 | 
			
		||||
; CHECK-NEXT: ret
 | 
			
		||||
  %res = call <vscale x 2 x half> @llvm.ceil.nxv2f16(<vscale x 2 x half> %a)
 | 
			
		||||
  ret <vscale x 2 x half> %res  
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define <vscale x 4 x float> @frintp_nxv4f32(<vscale x 4 x float> %a) {
 | 
			
		||||
; CHECK-LABEL: frintp_nxv4f32:
 | 
			
		||||
; CHECK: ptrue p0.s
 | 
			
		||||
; CHECK-NEXT: frintp z0.s, p0/m, z0.s
 | 
			
		||||
; CHECK-NEXT: ret
 | 
			
		||||
  %res = call <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float> %a)
 | 
			
		||||
  ret <vscale x 4 x float> %res  
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define <vscale x 2 x float> @frintp_nxv2f32(<vscale x 2 x float> %a) {
 | 
			
		||||
; CHECK-LABEL: frintp_nxv2f32:
 | 
			
		||||
; CHECK: ptrue p0.d
 | 
			
		||||
; CHECK-NEXT: frintp z0.s, p0/m, z0.s
 | 
			
		||||
; CHECK-NEXT: ret
 | 
			
		||||
  %res = call <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float> %a)
 | 
			
		||||
  ret <vscale x 2 x float> %res  
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define <vscale x 2 x double> @frintp_nxv2f64(<vscale x 2 x double> %a) {
 | 
			
		||||
; CHECK-LABEL: frintp_nxv2f64:
 | 
			
		||||
; CHECK: ptrue p0.d
 | 
			
		||||
; CHECK-NEXT: frintp z0.d, p0/m, z0.d
 | 
			
		||||
; CHECK-NEXT: ret
 | 
			
		||||
  %res = call <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double> %a)
 | 
			
		||||
  ret <vscale x 2 x double> %res  
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
declare <vscale x 8 x half> @llvm.aarch64.sve.frecps.x.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
 | 
			
		||||
declare <vscale x 4 x float>  @llvm.aarch64.sve.frecps.x.nxv4f32(<vscale x 4 x float> , <vscale x 4 x float>)
 | 
			
		||||
declare <vscale x 2 x double> @llvm.aarch64.sve.frecps.x.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
 | 
			
		||||
| 
						 | 
				
			
			@ -495,5 +551,12 @@ declare <vscale x 8 x half> @llvm.fma.nxv8f16(<vscale x 8 x half>, <vscale x 8 x
 | 
			
		|||
declare <vscale x 4 x half> @llvm.fma.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>, <vscale x 4 x half>)
 | 
			
		||||
declare <vscale x 2 x half> @llvm.fma.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>, <vscale x 2 x half>)
 | 
			
		||||
 | 
			
		||||
declare <vscale x 8 x half> @llvm.ceil.nxv8f16( <vscale x 8 x half>)
 | 
			
		||||
declare <vscale x 4 x half> @llvm.ceil.nxv4f16( <vscale x 4 x half>)
 | 
			
		||||
declare <vscale x 2 x half> @llvm.ceil.nxv2f16( <vscale x 2 x half>)
 | 
			
		||||
declare <vscale x 4 x float> @llvm.ceil.nxv4f32(<vscale x 4 x float>)
 | 
			
		||||
declare <vscale x 2 x float> @llvm.ceil.nxv2f32(<vscale x 2 x float>)
 | 
			
		||||
declare <vscale x 2 x double> @llvm.ceil.nxv2f64(<vscale x 2 x double>)
 | 
			
		||||
 | 
			
		||||
; Function Attrs: nounwind readnone
 | 
			
		||||
declare double @llvm.aarch64.sve.faddv.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>) #2
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue