[SVE][CodeGen] Legalisation of integer -> floating point conversions
Splitting the operand of a scalable [S|U]INT_TO_FP results in a concat_vectors operation where the operands are unpacked FP scalable vectors (e.g. nxv2f32). This patch adds custom lowering of concat_vectors which checks that the number of operands is 2, and isel patterns to match concat_vectors of scalable FP types with uzp1. Reviewed By: efriedma, paulwalker-arm Differential Revision: https://reviews.llvm.org/D88033
This commit is contained in:
		
							parent
							
								
									8931c3d682
								
							
						
					
					
						commit
						75db7cf78a
					
				| 
						 | 
				
			
			@ -990,7 +990,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 | 
			
		|||
    // FIXME: Add custom lowering of MLOAD to handle different passthrus (not a
 | 
			
		||||
    // splat of 0 or undef) once vector selects supported in SVE codegen. See
 | 
			
		||||
    // D68877 for more details.
 | 
			
		||||
 | 
			
		||||
    for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
 | 
			
		||||
      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
 | 
			
		||||
      setOperationAction(ISD::UINT_TO_FP, VT, Custom);
 | 
			
		||||
| 
						 | 
				
			
			@ -1018,7 +1017,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 | 
			
		|||
    }
 | 
			
		||||
 | 
			
		||||
    for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
 | 
			
		||||
      setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
 | 
			
		||||
      setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
 | 
			
		||||
      setOperationAction(ISD::SELECT, VT, Custom);
 | 
			
		||||
      setOperationAction(ISD::SETCC, VT, Custom);
 | 
			
		||||
      setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
 | 
			
		||||
| 
						 | 
				
			
			@ -1035,6 +1034,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
 | 
			
		|||
 | 
			
		||||
    for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
 | 
			
		||||
                    MVT::nxv4f32, MVT::nxv2f64}) {
 | 
			
		||||
      setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
 | 
			
		||||
      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
 | 
			
		||||
      setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
 | 
			
		||||
      setOperationAction(ISD::SELECT, VT, Custom);
 | 
			
		||||
| 
						 | 
				
			
			@ -3835,6 +3835,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
 | 
			
		|||
    return LowerRETURNADDR(Op, DAG);
 | 
			
		||||
  case ISD::ADDROFRETURNADDR:
 | 
			
		||||
    return LowerADDROFRETURNADDR(Op, DAG);
 | 
			
		||||
  case ISD::CONCAT_VECTORS:
 | 
			
		||||
    return LowerCONCAT_VECTORS(Op, DAG);
 | 
			
		||||
  case ISD::INSERT_VECTOR_ELT:
 | 
			
		||||
    return LowerINSERT_VECTOR_ELT(Op, DAG);
 | 
			
		||||
  case ISD::EXTRACT_VECTOR_ELT:
 | 
			
		||||
| 
						 | 
				
			
			@ -9150,6 +9152,18 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
 | 
			
		|||
  return SDValue();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
 | 
			
		||||
                                                   SelectionDAG &DAG) const {
 | 
			
		||||
  assert(Op.getValueType().isScalableVector() &&
 | 
			
		||||
         isTypeLegal(Op.getValueType()) &&
 | 
			
		||||
         "Expected legal scalable vector type!");
 | 
			
		||||
 | 
			
		||||
  if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2)
 | 
			
		||||
    return Op;
 | 
			
		||||
 | 
			
		||||
  return SDValue();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
 | 
			
		||||
                                                      SelectionDAG &DAG) const {
 | 
			
		||||
  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1195,6 +1195,14 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
 | 
			
		|||
  def : Pat<(nxv16i1 (concat_vectors nxv8i1:$p1, nxv8i1:$p2)),
 | 
			
		||||
            (UZP1_PPP_B $p1, $p2)>;
 | 
			
		||||
 | 
			
		||||
  // Concatenate two floating point vectors.
 | 
			
		||||
  def : Pat<(nxv4f16 (concat_vectors nxv2f16:$v1, nxv2f16:$v2)),
 | 
			
		||||
            (UZP1_ZZZ_S $v1, $v2)>;
 | 
			
		||||
  def : Pat<(nxv8f16 (concat_vectors nxv4f16:$v1, nxv4f16:$v2)),
 | 
			
		||||
            (UZP1_ZZZ_H $v1, $v2)>;
 | 
			
		||||
  def : Pat<(nxv4f32 (concat_vectors nxv2f32:$v1, nxv2f32:$v2)),
 | 
			
		||||
            (UZP1_ZZZ_S $v1, $v2)>;
 | 
			
		||||
 | 
			
		||||
  defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>;
 | 
			
		||||
  defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>;
 | 
			
		||||
  defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -95,3 +95,144 @@ define <vscale x 4 x i64> @fcvtzu_d_nxv4f32(<vscale x 4 x float> %a) {
 | 
			
		|||
  %res = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64>
 | 
			
		||||
  ret <vscale x 4 x i64> %res
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; SINT_TO_FP
 | 
			
		||||
 | 
			
		||||
; Split operand
 | 
			
		||||
define <vscale x 4 x float> @scvtf_s_nxv4i64(<vscale x 4 x i64> %a) {
 | 
			
		||||
; CHECK-LABEL: scvtf_s_nxv4i64:
 | 
			
		||||
; CHECK:       // %bb.0:
 | 
			
		||||
; CHECK-NEXT:    ptrue p0.d
 | 
			
		||||
; CHECK-NEXT:    scvtf z1.s, p0/m, z1.d
 | 
			
		||||
; CHECK-NEXT:    scvtf z0.s, p0/m, z0.d
 | 
			
		||||
; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
 | 
			
		||||
; CHECK-NEXT:    ret
 | 
			
		||||
  %res = sitofp <vscale x 4 x i64> %a to <vscale x 4 x float>
 | 
			
		||||
  ret <vscale x 4 x float> %res
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define <vscale x 8 x half> @scvtf_h_nxv8i64(<vscale x 8 x i64> %a) {
 | 
			
		||||
; CHECK-LABEL: scvtf_h_nxv8i64:
 | 
			
		||||
; CHECK:       // %bb.0:
 | 
			
		||||
; CHECK-NEXT:    ptrue p0.d
 | 
			
		||||
; CHECK-NEXT:    scvtf z3.h, p0/m, z3.d
 | 
			
		||||
; CHECK-NEXT:    scvtf z2.h, p0/m, z2.d
 | 
			
		||||
; CHECK-NEXT:    scvtf z1.h, p0/m, z1.d
 | 
			
		||||
; CHECK-NEXT:    scvtf z0.h, p0/m, z0.d
 | 
			
		||||
; CHECK-NEXT:    uzp1 z2.s, z2.s, z3.s
 | 
			
		||||
; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
 | 
			
		||||
; CHECK-NEXT:    uzp1 z0.h, z0.h, z2.h
 | 
			
		||||
; CHECK-NEXT:    ret
 | 
			
		||||
  %res = sitofp <vscale x 8 x i64> %a to <vscale x 8 x half>
 | 
			
		||||
  ret <vscale x 8 x half> %res
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; Split result
 | 
			
		||||
define <vscale x 16 x float> @scvtf_s_nxv16i8(<vscale x 16 x i8> %a) {
 | 
			
		||||
; CHECK-LABEL: scvtf_s_nxv16i8:
 | 
			
		||||
; CHECK:       // %bb.0:
 | 
			
		||||
; CHECK-NEXT:    sunpklo z1.h, z0.b
 | 
			
		||||
; CHECK-NEXT:    sunpkhi z0.h, z0.b
 | 
			
		||||
; CHECK-NEXT:    ptrue p0.s
 | 
			
		||||
; CHECK-NEXT:    sunpklo z2.s, z1.h
 | 
			
		||||
; CHECK-NEXT:    sunpkhi z1.s, z1.h
 | 
			
		||||
; CHECK-NEXT:    sunpklo z3.s, z0.h
 | 
			
		||||
; CHECK-NEXT:    sunpkhi z4.s, z0.h
 | 
			
		||||
; CHECK-NEXT:    scvtf z0.s, p0/m, z2.s
 | 
			
		||||
; CHECK-NEXT:    scvtf z1.s, p0/m, z1.s
 | 
			
		||||
; CHECK-NEXT:    scvtf z2.s, p0/m, z3.s
 | 
			
		||||
; CHECK-NEXT:    scvtf z3.s, p0/m, z4.s
 | 
			
		||||
; CHECK-NEXT:    ret
 | 
			
		||||
  %res = sitofp <vscale x 16 x i8> %a to <vscale x 16 x float>
 | 
			
		||||
  ret <vscale x 16 x float> %res
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define <vscale x 4 x double> @scvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
 | 
			
		||||
; CHECK-LABEL: scvtf_d_nxv4i32:
 | 
			
		||||
; CHECK:       // %bb.0:
 | 
			
		||||
; CHECK-NEXT:    sunpklo z1.d, z0.s
 | 
			
		||||
; CHECK-NEXT:    ptrue p0.d
 | 
			
		||||
; CHECK-NEXT:    sunpkhi z2.d, z0.s
 | 
			
		||||
; CHECK-NEXT:    scvtf z0.d, p0/m, z1.d
 | 
			
		||||
; CHECK-NEXT:    scvtf z1.d, p0/m, z2.d
 | 
			
		||||
; CHECK-NEXT:    ret
 | 
			
		||||
  %res = sitofp <vscale x 4 x i32> %a to <vscale x 4 x double>
 | 
			
		||||
  ret <vscale x 4 x double> %res
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define <vscale x 4 x double> @scvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
 | 
			
		||||
; CHECK-LABEL: scvtf_d_nxv4i1:
 | 
			
		||||
; CHECK:       // %bb.0:
 | 
			
		||||
; CHECK-NEXT:    pfalse p1.b
 | 
			
		||||
; CHECK-NEXT:    zip1 p3.s, p0.s, p1.s
 | 
			
		||||
; CHECK-NEXT:    zip2 p0.s, p0.s, p1.s
 | 
			
		||||
; CHECK-NEXT:    ptrue p2.d
 | 
			
		||||
; CHECK-NEXT:    mov z0.d, p3/z, #-1 // =0xffffffffffffffff
 | 
			
		||||
; CHECK-NEXT:    mov z1.d, p0/z, #-1 // =0xffffffffffffffff
 | 
			
		||||
; CHECK-NEXT:    scvtf z0.d, p2/m, z0.d
 | 
			
		||||
; CHECK-NEXT:    scvtf z1.d, p2/m, z1.d
 | 
			
		||||
; CHECK-NEXT:    ret
 | 
			
		||||
  %res = sitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
 | 
			
		||||
  ret <vscale x 4 x double> %res
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; UINT_TO_FP
 | 
			
		||||
 | 
			
		||||
; Split operand
 | 
			
		||||
define <vscale x 4 x float> @ucvtf_s_nxv4i64(<vscale x 4 x i64> %a) {
 | 
			
		||||
; CHECK-LABEL: ucvtf_s_nxv4i64:
 | 
			
		||||
; CHECK:       // %bb.0:
 | 
			
		||||
; CHECK-NEXT:    ptrue p0.d
 | 
			
		||||
; CHECK-NEXT:    ucvtf z1.s, p0/m, z1.d
 | 
			
		||||
; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.d
 | 
			
		||||
; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
 | 
			
		||||
; CHECK-NEXT:    ret
 | 
			
		||||
  %res = uitofp <vscale x 4 x i64> %a to <vscale x 4 x float>
 | 
			
		||||
  ret <vscale x 4 x float> %res
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define <vscale x 8 x half> @ucvtf_h_nxv8i64(<vscale x 8 x i64> %a) {
 | 
			
		||||
; CHECK-LABEL: ucvtf_h_nxv8i64:
 | 
			
		||||
; CHECK:       // %bb.0:
 | 
			
		||||
; CHECK-NEXT:    ptrue p0.d
 | 
			
		||||
; CHECK-NEXT:    ucvtf z3.h, p0/m, z3.d
 | 
			
		||||
; CHECK-NEXT:    ucvtf z2.h, p0/m, z2.d
 | 
			
		||||
; CHECK-NEXT:    ucvtf z1.h, p0/m, z1.d
 | 
			
		||||
; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.d
 | 
			
		||||
; CHECK-NEXT:    uzp1 z2.s, z2.s, z3.s
 | 
			
		||||
; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
 | 
			
		||||
; CHECK-NEXT:    uzp1 z0.h, z0.h, z2.h
 | 
			
		||||
; CHECK-NEXT:    ret
 | 
			
		||||
  %res = uitofp <vscale x 8 x i64> %a to <vscale x 8 x half>
 | 
			
		||||
  ret <vscale x 8 x half> %res
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
; Split result
 | 
			
		||||
define <vscale x 4 x double> @ucvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
 | 
			
		||||
; CHECK-LABEL: ucvtf_d_nxv4i32:
 | 
			
		||||
; CHECK:       // %bb.0:
 | 
			
		||||
; CHECK-NEXT:    uunpklo z1.d, z0.s
 | 
			
		||||
; CHECK-NEXT:    ptrue p0.d
 | 
			
		||||
; CHECK-NEXT:    uunpkhi z2.d, z0.s
 | 
			
		||||
; CHECK-NEXT:    ucvtf z0.d, p0/m, z1.d
 | 
			
		||||
; CHECK-NEXT:    ucvtf z1.d, p0/m, z2.d
 | 
			
		||||
; CHECK-NEXT:    ret
 | 
			
		||||
  %res = uitofp <vscale x 4 x i32> %a to <vscale x 4 x double>
 | 
			
		||||
  ret <vscale x 4 x double> %res
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define <vscale x 4 x double> @ucvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
 | 
			
		||||
; CHECK-LABEL: ucvtf_d_nxv4i1:
 | 
			
		||||
; CHECK:       // %bb.0:
 | 
			
		||||
; CHECK-NEXT:    pfalse p1.b
 | 
			
		||||
; CHECK-NEXT:    zip1 p3.s, p0.s, p1.s
 | 
			
		||||
; CHECK-NEXT:    zip2 p0.s, p0.s, p1.s
 | 
			
		||||
; CHECK-NEXT:    ptrue p2.d
 | 
			
		||||
; CHECK-NEXT:    mov z0.d, p3/z, #1 // =0x1
 | 
			
		||||
; CHECK-NEXT:    mov z1.d, p0/z, #1 // =0x1
 | 
			
		||||
; CHECK-NEXT:    ucvtf z0.d, p2/m, z0.d
 | 
			
		||||
; CHECK-NEXT:    ucvtf z1.d, p2/m, z1.d
 | 
			
		||||
; CHECK-NEXT:    ret
 | 
			
		||||
  %res = uitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
 | 
			
		||||
  ret <vscale x 4 x double> %res
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue