[AArch64][GlobalISel] Emit G_ASSERT_SEXT for SExt parameters in CallLowering

Similar to how we emit G_ASSERT_ZEXT when we have CCValAssign::LocInfo::ZExt.

This will allow us to combine away some redundant sign extends.

Example: https://godbolt.org/z/cTbKvr

Differential Revision: https://reviews.llvm.org/D96915
This commit is contained in:
Jessica Paquette 2021-02-17 11:43:09 -08:00
parent 7d1397f7ad
commit 95d13c01ec
3 changed files with 161 additions and 18 deletions

View File

@ -83,7 +83,15 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
NarrowTy.getSizeInBits()));
break;
}
case CCValAssign::LocInfo::SExt:
case CCValAssign::LocInfo::SExt: {
auto WideTy = LLT{VA.getLocVT()};
auto NarrowTy = MRI.getType(ValVReg);
MIRBuilder.buildTrunc(ValVReg,
MIRBuilder.buildAssertSExt(
WideTy, MIRBuilder.buildCopy(WideTy, PhysReg),
NarrowTy.getSizeInBits()));
break;
}
case CCValAssign::LocInfo::AExt: {
auto Copy = MIRBuilder.buildCopy(LLT{VA.getLocVT()}, PhysReg);
MIRBuilder.buildTrunc(ValVReg, Copy);
@ -104,16 +112,28 @@ struct IncomingArgHandler : public CallLowering::IncomingValueHandler {
MPO, MachineMemOperand::MOLoad | MachineMemOperand::MOInvariant,
MemSize, inferAlignFromPtrInfo(MF, MPO));
const LLT LocVT = LLT{VA.getLocVT()};
if (VA.getLocInfo() == CCValAssign::LocInfo::ZExt &&
RegTy.getScalarSizeInBits() < LocVT.getScalarSizeInBits()) {
// We know the parameter is zero-extended. Perform a load into LocVT, and
// use G_ASSERT_ZEXT to communicate that this was zero-extended from the
// parameter type. Move down to the parameter type using G_TRUNC.
MIRBuilder.buildTrunc(ValVReg,
MIRBuilder.buildAssertZExt(
LocVT, MIRBuilder.buildLoad(LocVT, Addr, *MMO),
RegTy.getScalarSizeInBits()));
return;
if (RegTy.getScalarSizeInBits() < LocVT.getScalarSizeInBits()) {
auto LocInfo = VA.getLocInfo();
if (LocInfo == CCValAssign::LocInfo::ZExt) {
// We know the parameter is zero-extended. Perform a load into LocVT,
// and use G_ASSERT_ZEXT to communicate that this was zero-extended from
// the parameter type. Move down to the parameter type using G_TRUNC.
MIRBuilder.buildTrunc(
ValVReg, MIRBuilder.buildAssertZExt(
LocVT, MIRBuilder.buildLoad(LocVT, Addr, *MMO),
RegTy.getScalarSizeInBits()));
return;
}
if (LocInfo == CCValAssign::LocInfo::SExt) {
// Same as the ZExt case, but use G_ASSERT_SEXT instead.
MIRBuilder.buildTrunc(
ValVReg, MIRBuilder.buildAssertSExt(
LocVT, MIRBuilder.buildLoad(LocVT, Addr, *MMO),
RegTy.getScalarSizeInBits()));
return;
}
}
// No extension information, or no extension necessary. Load into the

View File

@ -0,0 +1,118 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
; RUN: llc -mtriple=aarch64 -global-isel -stop-after=irtranslator -verify-machineinstrs -o - %s | FileCheck %s
; Verify that we generate G_ASSERT_SEXT for signext parameters.
define i8 @signext_param_i8(i8 signext %x) {
; CHECK-LABEL: name: signext_param_i8
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[COPY]], 8
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s8)
; CHECK: $w0 = COPY [[ANYEXT]](s32)
; CHECK: RET_ReallyLR implicit $w0
ret i8 %x
}
define i8 @no_signext_param(i8 %x) {
; CHECK-LABEL: name: no_signext_param
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[COPY]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s8)
; CHECK: $w0 = COPY [[ANYEXT]](s32)
; CHECK: RET_ReallyLR implicit $w0
ret i8 %x
}
; Don't need G_ASSERT_SEXT here. The sizes match.
define i32 @signext_param_i32(i32 signext %x) {
; CHECK-LABEL: name: signext_param_i32
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $w0
; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $w0
; CHECK: $w0 = COPY [[COPY]](s32)
; CHECK: RET_ReallyLR implicit $w0
ret i32 %x
}
; Zeroext param is passed on the stack. We should still get a G_ASSERT_SEXT.
define i32 @signext_param_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e, i64 %f,
; CHECK-LABEL: name: signext_param_stack
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $x5
; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $x6
; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $x7
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.1, align 16)
; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 1 from %fixed-stack.0, align 8)
; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[LOAD1]], 1
; CHECK: [[TRUNC:%[0-9]+]]:_(s1) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK: [[ZEXT:%[0-9]+]]:_(s32) = G_ZEXT [[TRUNC]](s1)
; CHECK: $w0 = COPY [[ZEXT]](s32)
; CHECK: RET_ReallyLR implicit $w0
i64 %g, i64 %h, i64 %i, i1 signext %j) {
%v = zext i1 %j to i32
ret i32 %v
}
; The signext parameter is a s32, so there's no extension required.
define i32 @dont_need_assert_zext_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e,
; CHECK-LABEL: name: dont_need_assert_zext_stack
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $x5
; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $x6
; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $x7
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.1, align 16)
; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 4 from %fixed-stack.0, align 8)
; CHECK: $w0 = COPY [[LOAD1]](s32)
; CHECK: RET_ReallyLR implicit $w0
i64 %f, i64 %g, i64 %h, i64 %i,
i32 signext %j) {
ret i32 %j
}
; s8 requires extension to s32, so we should get a G_ASSERT_SEXT here.
define i8 @s8_assert_zext_stack(i64 %a, i64 %b, i64 %c, i64 %d, i64 %e,
; CHECK-LABEL: name: s8_assert_zext_stack
; CHECK: bb.1 (%ir-block.0):
; CHECK: liveins: $x0, $x1, $x2, $x3, $x4, $x5, $x6, $x7
; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $x0
; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $x1
; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $x2
; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $x3
; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $x4
; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $x5
; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $x6
; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $x7
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
; CHECK: [[LOAD:%[0-9]+]]:_(s64) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 8 from %fixed-stack.1, align 16)
; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 1 from %fixed-stack.0, align 8)
; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[LOAD1]], 8
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK: [[ANYEXT:%[0-9]+]]:_(s32) = G_ANYEXT [[TRUNC]](s8)
; CHECK: $w0 = COPY [[ANYEXT]](s32)
; CHECK: RET_ReallyLR implicit $w0
i64 %f, i64 %g, i64 %h, i64 %i,
i8 signext %j) {
ret i8 %j
}

View File

@ -5,13 +5,18 @@
; CHECK: fixedStack:
; CHECK-DAG: - { id: [[STACK0:[0-9]+]], type: default, offset: 0, size: 1,
; CHECK-DAG: - { id: [[STACK8:[0-9]+]], type: default, offset: 1, size: 1,
; CHECK: [[LHS_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK0]]
; CHECK: [[LHS:%[0-9]+]]:_(s8) = G_LOAD [[LHS_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK0]], align 16)
; CHECK: [[RHS_ADDR:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.[[STACK8]]
; CHECK: [[RHS:%[0-9]+]]:_(s8) = G_LOAD [[RHS_ADDR]](p0) :: (invariant load 1 from %fixed-stack.[[STACK8]])
; CHECK: [[SUM:%[0-9]+]]:_(s8) = G_ADD [[LHS]], [[RHS]]
; CHECK: [[SUM32:%[0-9]+]]:_(s32) = G_SEXT [[SUM]](s8)
; CHECK: $w0 = COPY [[SUM32]](s32)
; CHECK: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.1
; CHECK: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (invariant load 1 from %fixed-stack.1, align 16)
; CHECK: [[ASSERT_SEXT:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[LOAD]], 8
; CHECK: [[TRUNC:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT]](s32)
; CHECK: [[FRAME_INDEX1:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; CHECK: [[LOAD1:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX1]](p0) :: (invariant load 1 from %fixed-stack.0)
; CHECK: [[ASSERT_SEXT1:%[0-9]+]]:_(s32) = G_ASSERT_SEXT [[LOAD1]], 8
; CHECK: [[TRUNC1:%[0-9]+]]:_(s8) = G_TRUNC [[ASSERT_SEXT1]](s32)
; CHECK: [[ADD:%[0-9]+]]:_(s8) = G_ADD [[TRUNC]], [[TRUNC1]]
; CHECK: [[SEXT:%[0-9]+]]:_(s32) = G_SEXT [[ADD]](s8)
; CHECK: $w0 = COPY [[SEXT]](s32)
; CHECK: RET_ReallyLR implicit $w0
define signext i8 @test_stack_slots([8 x i64], i8 signext %lhs, i8 signext %rhs) {
%sum = add i8 %lhs, %rhs
ret i8 %sum