[AArch64] Add lowering pattern for scalar fp16 facge and facgt

Summary: The fp16 scalar version of facge and facgt requires a custom patter matching, as the result type is not the same width of the operands.

Reviewers: olista01, javed.absar, pbarrio

Reviewed By: javed.absar

Subscribers: kristof.beyls, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D60212

llvm-svn: 358083
This commit is contained in:
Diogo N. Sampaio 2019-04-10 13:34:18 +00:00
parent 71660b0321
commit aae424a2d2
2 changed files with 34 additions and 0 deletions

View File

@ -5355,6 +5355,16 @@ def : Pat<(i64 (int_aarch64_neon_vcvtfp2fxu (f16 FPR16:$Rn), vecshiftR64:$imm)),
(i64 (IMPLICIT_DEF)),
(FCVTZUh FPR16:$Rn, vecshiftR64:$imm),
hsub))>;
def : Pat<(i32 (int_aarch64_neon_facge (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
(i32 (INSERT_SUBREG
(i32 (IMPLICIT_DEF)),
(FACGE16 FPR16:$Rn, FPR16:$Rm),
hsub))>;
def : Pat<(i32 (int_aarch64_neon_facgt (f16 FPR16:$Rn), (f16 FPR16:$Rm))),
(i32 (INSERT_SUBREG
(i32 (IMPLICIT_DEF)),
(FACGT16 FPR16:$Rn, FPR16:$Rm),
hsub))>;
defm SHL : SIMDScalarLShiftD< 0, 0b01010, "shl", AArch64vshl>;
defm SLI : SIMDScalarLShiftDTied<1, 0b01010, "sli">;

View File

@ -7,6 +7,8 @@ declare half @llvm.aarch64.neon.frsqrts.f16(half, half)
declare half @llvm.aarch64.neon.frecps.f16(half, half)
declare half @llvm.aarch64.neon.fmulx.f16(half, half)
declare half @llvm.fabs.f16(half)
declare i32 @llvm.aarch64.neon.facge.i32.f16(half, half)
declare i32 @llvm.aarch64.neon.facgt.i32.f16(half, half)
define dso_local half @t_vabdh_f16(half %a, half %b) {
; CHECK-LABEL: t_vabdh_f16:
@ -318,3 +320,25 @@ entry:
%vcvth_n_u32_f16 = tail call i32 @llvm.aarch64.neon.vcvtfp2fxu.i32.f16(half %a, i32 16)
ret i32 %vcvth_n_u32_f16
}
define dso_local i16 @vcageh_f16_test(half %a, half %b) {
; CHECK-LABEL: vcageh_f16_test:
; CHECK: facge h0, h0, h1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
%facg = tail call i32 @llvm.aarch64.neon.facge.i32.f16(half %a, half %b)
%0 = trunc i32 %facg to i16
ret i16 %0
}
define dso_local i16 @vcagth_f16_test(half %a, half %b) {
; CHECK-LABEL: vcagth_f16_test:
; CHECK: facgt h0, h0, h1
; CHECK-NEXT: fmov w0, s0
; CHECK-NEXT: ret
entry:
%facg = tail call i32 @llvm.aarch64.neon.facgt.i32.f16(half %a, half %b)
%0 = trunc i32 %facg to i16
ret i16 %0
}