[SVE][CodeGen] Legalisation of integer -> floating point conversions
Splitting the operand of a scalable [S|U]INT_TO_FP results in a concat_vectors operation where the operands are unpacked FP scalable vectors (e.g. nxv2f32). This patch adds custom lowering of concat_vectors which checks that the number of operands is 2, and isel patterns to match concat_vectors of scalable FP types with uzp1. Reviewed By: efriedma, paulwalker-arm Differential Revision: https://reviews.llvm.org/D88033
This commit is contained in:
parent
8931c3d682
commit
75db7cf78a
|
|
@ -990,7 +990,6 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||
// FIXME: Add custom lowering of MLOAD to handle different passthrus (not a
|
||||
// splat of 0 or undef) once vector selects supported in SVE codegen. See
|
||||
// D68877 for more details.
|
||||
|
||||
for (auto VT : {MVT::nxv16i8, MVT::nxv8i16, MVT::nxv4i32, MVT::nxv2i64}) {
|
||||
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
|
||||
setOperationAction(ISD::UINT_TO_FP, VT, Custom);
|
||||
|
|
@ -1018,7 +1017,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||
}
|
||||
|
||||
for (auto VT : {MVT::nxv16i1, MVT::nxv8i1, MVT::nxv4i1, MVT::nxv2i1}) {
|
||||
setOperationAction(ISD::CONCAT_VECTORS, VT, Legal);
|
||||
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
|
||||
setOperationAction(ISD::SELECT, VT, Custom);
|
||||
setOperationAction(ISD::SETCC, VT, Custom);
|
||||
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
|
||||
|
|
@ -1035,6 +1034,7 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM,
|
|||
|
||||
for (auto VT : {MVT::nxv2f16, MVT::nxv4f16, MVT::nxv8f16, MVT::nxv2f32,
|
||||
MVT::nxv4f32, MVT::nxv2f64}) {
|
||||
setOperationAction(ISD::CONCAT_VECTORS, VT, Custom);
|
||||
setOperationAction(ISD::INSERT_SUBVECTOR, VT, Custom);
|
||||
setOperationAction(ISD::SPLAT_VECTOR, VT, Custom);
|
||||
setOperationAction(ISD::SELECT, VT, Custom);
|
||||
|
|
@ -3835,6 +3835,8 @@ SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
|
|||
return LowerRETURNADDR(Op, DAG);
|
||||
case ISD::ADDROFRETURNADDR:
|
||||
return LowerADDROFRETURNADDR(Op, DAG);
|
||||
case ISD::CONCAT_VECTORS:
|
||||
return LowerCONCAT_VECTORS(Op, DAG);
|
||||
case ISD::INSERT_VECTOR_ELT:
|
||||
return LowerINSERT_VECTOR_ELT(Op, DAG);
|
||||
case ISD::EXTRACT_VECTOR_ELT:
|
||||
|
|
@ -9150,6 +9152,18 @@ SDValue AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op,
|
|||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerCONCAT_VECTORS(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
assert(Op.getValueType().isScalableVector() &&
|
||||
isTypeLegal(Op.getValueType()) &&
|
||||
"Expected legal scalable vector type!");
|
||||
|
||||
if (isTypeLegal(Op.getOperand(0).getValueType()) && Op.getNumOperands() == 2)
|
||||
return Op;
|
||||
|
||||
return SDValue();
|
||||
}
|
||||
|
||||
SDValue AArch64TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op,
|
||||
SelectionDAG &DAG) const {
|
||||
assert(Op.getOpcode() == ISD::INSERT_VECTOR_ELT && "Unknown opcode!");
|
||||
|
|
|
|||
|
|
@ -1195,6 +1195,14 @@ multiclass sve_prefetch<SDPatternOperator prefetch, ValueType PredTy, Instructio
|
|||
def : Pat<(nxv16i1 (concat_vectors nxv8i1:$p1, nxv8i1:$p2)),
|
||||
(UZP1_PPP_B $p1, $p2)>;
|
||||
|
||||
// Concatenate two floating point vectors.
|
||||
def : Pat<(nxv4f16 (concat_vectors nxv2f16:$v1, nxv2f16:$v2)),
|
||||
(UZP1_ZZZ_S $v1, $v2)>;
|
||||
def : Pat<(nxv8f16 (concat_vectors nxv4f16:$v1, nxv4f16:$v2)),
|
||||
(UZP1_ZZZ_H $v1, $v2)>;
|
||||
def : Pat<(nxv4f32 (concat_vectors nxv2f32:$v1, nxv2f32:$v2)),
|
||||
(UZP1_ZZZ_S $v1, $v2)>;
|
||||
|
||||
defm CMPHS_PPzZZ : sve_int_cmp_0<0b000, "cmphs", SETUGE, SETULE>;
|
||||
defm CMPHI_PPzZZ : sve_int_cmp_0<0b001, "cmphi", SETUGT, SETULT>;
|
||||
defm CMPGE_PPzZZ : sve_int_cmp_0<0b100, "cmpge", SETGE, SETLE>;
|
||||
|
|
|
|||
|
|
@ -95,3 +95,144 @@ define <vscale x 4 x i64> @fcvtzu_d_nxv4f32(<vscale x 4 x float> %a) {
|
|||
%res = fptoui <vscale x 4 x float> %a to <vscale x 4 x i64>
|
||||
ret <vscale x 4 x i64> %res
|
||||
}
|
||||
|
||||
; SINT_TO_FP
|
||||
|
||||
; Split operand
|
||||
define <vscale x 4 x float> @scvtf_s_nxv4i64(<vscale x 4 x i64> %a) {
|
||||
; CHECK-LABEL: scvtf_s_nxv4i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: scvtf z1.s, p0/m, z1.d
|
||||
; CHECK-NEXT: scvtf z0.s, p0/m, z0.d
|
||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = sitofp <vscale x 4 x i64> %a to <vscale x 4 x float>
|
||||
ret <vscale x 4 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @scvtf_h_nxv8i64(<vscale x 8 x i64> %a) {
|
||||
; CHECK-LABEL: scvtf_h_nxv8i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: scvtf z3.h, p0/m, z3.d
|
||||
; CHECK-NEXT: scvtf z2.h, p0/m, z2.d
|
||||
; CHECK-NEXT: scvtf z1.h, p0/m, z1.d
|
||||
; CHECK-NEXT: scvtf z0.h, p0/m, z0.d
|
||||
; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s
|
||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = sitofp <vscale x 8 x i64> %a to <vscale x 8 x half>
|
||||
ret <vscale x 8 x half> %res
|
||||
}
|
||||
|
||||
; Split result
|
||||
define <vscale x 16 x float> @scvtf_s_nxv16i8(<vscale x 16 x i8> %a) {
|
||||
; CHECK-LABEL: scvtf_s_nxv16i8:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: sunpklo z1.h, z0.b
|
||||
; CHECK-NEXT: sunpkhi z0.h, z0.b
|
||||
; CHECK-NEXT: ptrue p0.s
|
||||
; CHECK-NEXT: sunpklo z2.s, z1.h
|
||||
; CHECK-NEXT: sunpkhi z1.s, z1.h
|
||||
; CHECK-NEXT: sunpklo z3.s, z0.h
|
||||
; CHECK-NEXT: sunpkhi z4.s, z0.h
|
||||
; CHECK-NEXT: scvtf z0.s, p0/m, z2.s
|
||||
; CHECK-NEXT: scvtf z1.s, p0/m, z1.s
|
||||
; CHECK-NEXT: scvtf z2.s, p0/m, z3.s
|
||||
; CHECK-NEXT: scvtf z3.s, p0/m, z4.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = sitofp <vscale x 16 x i8> %a to <vscale x 16 x float>
|
||||
ret <vscale x 16 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x double> @scvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: scvtf_d_nxv4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: sunpklo z1.d, z0.s
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: sunpkhi z2.d, z0.s
|
||||
; CHECK-NEXT: scvtf z0.d, p0/m, z1.d
|
||||
; CHECK-NEXT: scvtf z1.d, p0/m, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = sitofp <vscale x 4 x i32> %a to <vscale x 4 x double>
|
||||
ret <vscale x 4 x double> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x double> @scvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
|
||||
; CHECK-LABEL: scvtf_d_nxv4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: zip1 p3.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: ptrue p2.d
|
||||
; CHECK-NEXT: mov z0.d, p3/z, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: mov z1.d, p0/z, #-1 // =0xffffffffffffffff
|
||||
; CHECK-NEXT: scvtf z0.d, p2/m, z0.d
|
||||
; CHECK-NEXT: scvtf z1.d, p2/m, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = sitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
|
||||
ret <vscale x 4 x double> %res
|
||||
}
|
||||
|
||||
; UINT_TO_FP
|
||||
|
||||
; Split operand
|
||||
define <vscale x 4 x float> @ucvtf_s_nxv4i64(<vscale x 4 x i64> %a) {
|
||||
; CHECK-LABEL: ucvtf_s_nxv4i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: ucvtf z1.s, p0/m, z1.d
|
||||
; CHECK-NEXT: ucvtf z0.s, p0/m, z0.d
|
||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: ret
|
||||
%res = uitofp <vscale x 4 x i64> %a to <vscale x 4 x float>
|
||||
ret <vscale x 4 x float> %res
|
||||
}
|
||||
|
||||
define <vscale x 8 x half> @ucvtf_h_nxv8i64(<vscale x 8 x i64> %a) {
|
||||
; CHECK-LABEL: ucvtf_h_nxv8i64:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: ucvtf z3.h, p0/m, z3.d
|
||||
; CHECK-NEXT: ucvtf z2.h, p0/m, z2.d
|
||||
; CHECK-NEXT: ucvtf z1.h, p0/m, z1.d
|
||||
; CHECK-NEXT: ucvtf z0.h, p0/m, z0.d
|
||||
; CHECK-NEXT: uzp1 z2.s, z2.s, z3.s
|
||||
; CHECK-NEXT: uzp1 z0.s, z0.s, z1.s
|
||||
; CHECK-NEXT: uzp1 z0.h, z0.h, z2.h
|
||||
; CHECK-NEXT: ret
|
||||
%res = uitofp <vscale x 8 x i64> %a to <vscale x 8 x half>
|
||||
ret <vscale x 8 x half> %res
|
||||
}
|
||||
|
||||
; Split result
|
||||
define <vscale x 4 x double> @ucvtf_d_nxv4i32(<vscale x 4 x i32> %a) {
|
||||
; CHECK-LABEL: ucvtf_d_nxv4i32:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: uunpklo z1.d, z0.s
|
||||
; CHECK-NEXT: ptrue p0.d
|
||||
; CHECK-NEXT: uunpkhi z2.d, z0.s
|
||||
; CHECK-NEXT: ucvtf z0.d, p0/m, z1.d
|
||||
; CHECK-NEXT: ucvtf z1.d, p0/m, z2.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = uitofp <vscale x 4 x i32> %a to <vscale x 4 x double>
|
||||
ret <vscale x 4 x double> %res
|
||||
}
|
||||
|
||||
define <vscale x 4 x double> @ucvtf_d_nxv4i1(<vscale x 4 x i1> %a) {
|
||||
; CHECK-LABEL: ucvtf_d_nxv4i1:
|
||||
; CHECK: // %bb.0:
|
||||
; CHECK-NEXT: pfalse p1.b
|
||||
; CHECK-NEXT: zip1 p3.s, p0.s, p1.s
|
||||
; CHECK-NEXT: zip2 p0.s, p0.s, p1.s
|
||||
; CHECK-NEXT: ptrue p2.d
|
||||
; CHECK-NEXT: mov z0.d, p3/z, #1 // =0x1
|
||||
; CHECK-NEXT: mov z1.d, p0/z, #1 // =0x1
|
||||
; CHECK-NEXT: ucvtf z0.d, p2/m, z0.d
|
||||
; CHECK-NEXT: ucvtf z1.d, p2/m, z1.d
|
||||
; CHECK-NEXT: ret
|
||||
%res = uitofp <vscale x 4 x i1> %a to <vscale x 4 x double>
|
||||
ret <vscale x 4 x double> %res
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue