[SVE][CodeGen] Legalisation of integer -> floating point conversions

Splitting the operand of a scalable [S|U]INT_TO_FP results in a concat_vectors operation where the operands are unpacked FP scalable vectors (e.g. nxv2f32). This patch adds custom lowering of concat_vectors which checks that the number of operands is 2, and isel patterns to match concat_vectors of scalable FP types with uzp1. Reviewed By: efriedma, paulwalker-arm Differential Revision: https://reviews.llvm.org/D88033
2020-09-30 10:55:51 +01:00 · 2020-09-30 10:55:51 +01:00 · 75db7cf78a
parent 8931c3d682
commit 75db7cf78a
3 changed files with 165 additions and 2 deletions
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@ -990,7 +990,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
    // FIXME: Add custom lowering of MLOAD to handle different passthrus (not a
    // splat of 0 or undef) once vector selects supported in SVE codegen. See
    // D68877 for more details.
-
    for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
      setOperationAction(ISD::UINT_TO_FP, VT, Custom);
@ -1018,7 +1017,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
    }

    for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
-      setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
+      setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
      setOperationAction(ISD::SELECT, VT, Custom);
      setOperationAction(ISD::SETCC, VT, Custom);
      setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
@ -1035,6 +1034,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,

    for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
                    MVT::nxv4f32, MVT::nxv2f64}) {
+      setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
      setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
      setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
      setOperationAction(ISD::SELECT, VT, Custom);
@ -3835,6 +3835,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
    return LowerRETURNADDR(Op, DAG);
  case ISD::ADDROFRETURNADDR:
    return LowerADDROFRETURNADDR(Op, DAG);
+  case ISD::CONCAT_VECTORS:
+    return LowerCONCAT_VECTORS(Op, DAG);
  case ISD::INSERT_VECTOR_ELT:
    return LowerINSERT_VECTOR_ELT(Op, DAG);
  case ISD::EXTRACT_VECTOR_ELT:
@ -9150,6 +9152,18 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
  return SDValue();
 }

+SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
+                                                   SelectionDAG &DAG) const {
+  assert(Op.getValueType().isScalableVector() &&
+         isTypeLegal(Op.getValueType()) &&
+         "Expected legal scalable vector type!");
+
+  if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2)
+    return Op;
+
+  return SDValue();
+}
+
 SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
                                                      SelectionDAG &DAG) const {
  assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
--- a/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
+++ b/llvm/lib/Target/AArch64/AArch64SVEInstrInfo.td
@ -1195,6 +1195,14 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
  def : Pat<(nxv16i1 (concat_vectors nxv8i1:$p1, nxv8i1:$p2)),
            (UZP1_PPP_B $p1, $p2)>;

+  // Concatenate two floating point vectors.
+  def : Pat<(nxv4f16 (concat_vectors nxv2f16:$v1, nxv2f16:$v2)),
+            (UZP1_ZZZ_S $v1, $v2)>;
+  def : Pat<(nxv8f16 (concat_vectors nxv4f16:$v1, nxv4f16:$v2)),
+            (UZP1_ZZZ_H $v1, $v2)>;
+  def : Pat<(nxv4f32 (concat_vectors nxv2f32:$v1, nxv2f32:$v2)),
+            (UZP1_ZZZ_S $v1, $v2)>;
+
  defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>;
  defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>;
  defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>;
--- a/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll
+++ b/llvm/test/CodeGen/AArch64/sve-split-fcvt.ll
@ -95,3 +95,144 @@ define <vscale x 4 x i64> @fcvtzu_d_nxv4f32(<vscale x 4 x float> %a) {
  %res = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64>
  ret <vscale x 4 x i64> %res
 }
+
+; SINT_TO_FP
+
+; Split operand
+define <vscale x 4 x float> @scvtf_s_nxv4i64(<vscale x 4 x i64> %a) {
+; CHECK-LABEL: scvtf_s_nxv4i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    scvtf z1.s, p0/m, z1.d
+; CHECK-NEXT:    scvtf z0.s, p0/m, z0.d
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = sitofp <vscale x 4 x i64> %a to <vscale x 4 x float>
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 8 x half> @scvtf_h_nxv8i64(<vscale x 8 x i64> %a) {
+; CHECK-LABEL: scvtf_h_nxv8i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    scvtf z3.h, p0/m, z3.d
+; CHECK-NEXT:    scvtf z2.h, p0/m, z2.d
+; CHECK-NEXT:    scvtf z1.h, p0/m, z1.d
+; CHECK-NEXT:    scvtf z0.h, p0/m, z0.d
+; CHECK-NEXT:    uzp1 z2.s, z2.s, z3.s
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z2.h
+; CHECK-NEXT:    ret
+  %res = sitofp <vscale x 8 x i64> %a to <vscale x 8 x half>
+  ret <vscale x 8 x half> %res
+}
+
+; Split result
+define <vscale x 16 x float> @scvtf_s_nxv16i8(<vscale x 16 x i8> %a) {
+; CHECK-LABEL: scvtf_s_nxv16i8:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sunpklo z1.h, z0.b
+; CHECK-NEXT:    sunpkhi z0.h, z0.b
+; CHECK-NEXT:    ptrue p0.s
+; CHECK-NEXT:    sunpklo z2.s, z1.h
+; CHECK-NEXT:    sunpkhi z1.s, z1.h
+; CHECK-NEXT:    sunpklo z3.s, z0.h
+; CHECK-NEXT:    sunpkhi z4.s, z0.h
+; CHECK-NEXT:    scvtf z0.s, p0/m, z2.s
+; CHECK-NEXT:    scvtf z1.s, p0/m, z1.s
+; CHECK-NEXT:    scvtf z2.s, p0/m, z3.s
+; CHECK-NEXT:    scvtf z3.s, p0/m, z4.s
+; CHECK-NEXT:    ret
+  %res = sitofp <vscale x 16 x i8> %a to <vscale x 16 x float>
+  ret <vscale x 16 x float> %res
+}
+
+define <vscale x 4 x double> @scvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: scvtf_d_nxv4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sunpklo z1.d, z0.s
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    sunpkhi z2.d, z0.s
+; CHECK-NEXT:    scvtf z0.d, p0/m, z1.d
+; CHECK-NEXT:    scvtf z1.d, p0/m, z2.d
+; CHECK-NEXT:    ret
+  %res = sitofp <vscale x 4 x i32> %a to <vscale x 4 x double>
+  ret <vscale x 4 x double> %res
+}
+
+define <vscale x 4 x double> @scvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
+; CHECK-LABEL: scvtf_d_nxv4i1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pfalse p1.b
+; CHECK-NEXT:    zip1 p3.s, p0.s, p1.s
+; CHECK-NEXT:    zip2 p0.s, p0.s, p1.s
+; CHECK-NEXT:    ptrue p2.d
+; CHECK-NEXT:    mov z0.d, p3/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    mov z1.d, p0/z, #-1 // =0xffffffffffffffff
+; CHECK-NEXT:    scvtf z0.d, p2/m, z0.d
+; CHECK-NEXT:    scvtf z1.d, p2/m, z1.d
+; CHECK-NEXT:    ret
+  %res = sitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
+  ret <vscale x 4 x double> %res
+}
+
+; UINT_TO_FP
+
+; Split operand
+define <vscale x 4 x float> @ucvtf_s_nxv4i64(<vscale x 4 x i64> %a) {
+; CHECK-LABEL: ucvtf_s_nxv4i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ucvtf z1.s, p0/m, z1.d
+; CHECK-NEXT:    ucvtf z0.s, p0/m, z0.d
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    ret
+  %res = uitofp <vscale x 4 x i64> %a to <vscale x 4 x float>
+  ret <vscale x 4 x float> %res
+}
+
+define <vscale x 8 x half> @ucvtf_h_nxv8i64(<vscale x 8 x i64> %a) {
+; CHECK-LABEL: ucvtf_h_nxv8i64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    ucvtf z3.h, p0/m, z3.d
+; CHECK-NEXT:    ucvtf z2.h, p0/m, z2.d
+; CHECK-NEXT:    ucvtf z1.h, p0/m, z1.d
+; CHECK-NEXT:    ucvtf z0.h, p0/m, z0.d
+; CHECK-NEXT:    uzp1 z2.s, z2.s, z3.s
+; CHECK-NEXT:    uzp1 z0.s, z0.s, z1.s
+; CHECK-NEXT:    uzp1 z0.h, z0.h, z2.h
+; CHECK-NEXT:    ret
+  %res = uitofp <vscale x 8 x i64> %a to <vscale x 8 x half>
+  ret <vscale x 8 x half> %res
+}
+
+; Split result
+define <vscale x 4 x double> @ucvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
+; CHECK-LABEL: ucvtf_d_nxv4i32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    uunpklo z1.d, z0.s
+; CHECK-NEXT:    ptrue p0.d
+; CHECK-NEXT:    uunpkhi z2.d, z0.s
+; CHECK-NEXT:    ucvtf z0.d, p0/m, z1.d
+; CHECK-NEXT:    ucvtf z1.d, p0/m, z2.d
+; CHECK-NEXT:    ret
+  %res = uitofp <vscale x 4 x i32> %a to <vscale x 4 x double>
+  ret <vscale x 4 x double> %res
+}
+
+define <vscale x 4 x double> @ucvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
+; CHECK-LABEL: ucvtf_d_nxv4i1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    pfalse p1.b
+; CHECK-NEXT:    zip1 p3.s, p0.s, p1.s
+; CHECK-NEXT:    zip2 p0.s, p0.s, p1.s
+; CHECK-NEXT:    ptrue p2.d
+; CHECK-NEXT:    mov z0.d, p3/z, #1 // =0x1
+; CHECK-NEXT:    mov z1.d, p0/z, #1 // =0x1
+; CHECK-NEXT:    ucvtf z0.d, p2/m, z0.d
+; CHECK-NEXT:    ucvtf z1.d, p2/m, z1.d
+; CHECK-NEXT:    ret
+  %res = uitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
+  ret <vscale x 4 x double> %res
+}