forked from OSchip/llvm-project
[GlobalISel] Add combine for (x & mask) -> x when (x & mask) == x
If we have a mask, and a value x, where (x & mask) == x, we can drop the AND and just use x. This is about a 0.4% geomean code size improvement on CTMark at -O3 for AArch64. In AArch64, this is most useful post-legalization. Patterns like this often show up when legalizing s1s, which must be extended to larger types. e.g. ``` %cmp:_(s32) = G_ICMP ... %and:_(s32) = G_AND %cmp, 1 ``` Since G_ICMP only produces a single bit, there's no reason to mask it with the G_AND. Differential Revision: https://reviews.llvm.org/D85463
This commit is contained in:
parent
bcaa806a47
commit
d25b12bdc3
|
|
@ -263,6 +263,9 @@ public:
|
|||
/// Delete \p MI and replace all of its uses with its \p OpIdx-th operand.
|
||||
bool replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx);
|
||||
|
||||
/// Delete \p MI and replace all of its uses with \p Replacement.
|
||||
bool replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement);
|
||||
|
||||
/// Return true if \p MOP1 and \p MOP2 are register operands are defined by
|
||||
/// equivalent instructions.
|
||||
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2);
|
||||
|
|
@ -303,6 +306,13 @@ public:
|
|||
std::tuple<Register, int64_t> &MatchInfo);
|
||||
bool applyAshShlToSextInreg(MachineInstr &MI,
|
||||
std::tuple<Register, int64_t> &MatchInfo);
|
||||
/// \return true if \p MI is a G_AND instruction whose RHS is a mask where
|
||||
/// LHS & mask == LHS. (E.g., an all-ones value.)
|
||||
///
|
||||
/// \param [in] MI - The G_AND instruction.
|
||||
/// \param [out] Reg - A register the G_AND should be replaced with on
|
||||
/// success.
|
||||
bool matchAndWithTrivialMask(MachineInstr &MI, Register &Replacement);
|
||||
|
||||
/// Try to transform \p MI by using all of the above
|
||||
/// combine functions. Returns true if changed.
|
||||
|
|
|
|||
|
|
@ -299,6 +299,14 @@ def shl_ashr_to_sext_inreg : GICombineRule<
|
|||
[{ return Helper.matchAshrShlToSextInreg(*${root}, ${info}); }]),
|
||||
(apply [{ return Helper.applyAshShlToSextInreg(*${root}, ${info});}])
|
||||
>;
|
||||
// Fold (x & mask) -> x when (x & mask) is known to equal x.
|
||||
def and_trivial_mask_matchinfo : GIDefMatchData<"Register">;
|
||||
def and_trivial_mask: GICombineRule <
|
||||
(defs root:$root, and_trivial_mask_matchinfo:$matchinfo),
|
||||
(match (wip_match_opcode G_AND):$root,
|
||||
[{ return Helper.matchAndWithTrivialMask(*${root}, ${matchinfo}); }]),
|
||||
(apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
|
||||
>;
|
||||
|
||||
// FIXME: These should use the custom predicate feature once it lands.
|
||||
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
|
||||
|
|
|
|||
|
|
@ -1766,6 +1766,16 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
|
|||
return true;
|
||||
}
|
||||
|
||||
bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
|
||||
Register Replacement) {
|
||||
assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
|
||||
Register OldReg = MI.getOperand(0).getReg();
|
||||
assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
|
||||
MI.eraseFromParent();
|
||||
replaceRegWith(MRI, OldReg, Replacement);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
|
||||
assert(MI.getOpcode() == TargetOpcode::G_SELECT);
|
||||
// Match (cond ? x : x)
|
||||
|
|
@ -1979,6 +1989,52 @@ bool CombinerHelper::applyAshShlToSextInreg(
|
|||
return true;
|
||||
}
|
||||
|
||||
bool CombinerHelper::matchAndWithTrivialMask(MachineInstr &MI,
|
||||
Register &Replacement) {
|
||||
// Given
|
||||
//
|
||||
// %mask:_(sN) = G_CONSTANT iN 000...0111...1
|
||||
// %x:_(sN) = G_SOMETHING
|
||||
// %y:_(sN) = G_AND %x, %mask
|
||||
//
|
||||
// Eliminate the G_AND when it is known that x & mask == x.
|
||||
//
|
||||
// Patterns like this can appear as a result of legalization. E.g.
|
||||
//
|
||||
// %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
|
||||
// %one:_(s32) = G_CONSTANT i32 1
|
||||
// %and:_(s32) = G_AND %cmp, %one
|
||||
//
|
||||
// In this case, G_ICMP only produces a single bit, so x & 1 == x.
|
||||
assert(MI.getOpcode() == TargetOpcode::G_AND);
|
||||
if (!KB)
|
||||
return false;
|
||||
|
||||
// Replacement = %x, AndDst = %y. Check that we can replace AndDst with the
|
||||
// LHS of the G_AND.
|
||||
Replacement = MI.getOperand(1).getReg();
|
||||
Register AndDst = MI.getOperand(0).getReg();
|
||||
LLT DstTy = MRI.getType(AndDst);
|
||||
|
||||
// FIXME: This should be removed once GISelKnownBits supports vectors.
|
||||
if (DstTy.isVector())
|
||||
return false;
|
||||
if (!canReplaceReg(AndDst, Replacement, MRI))
|
||||
return false;
|
||||
|
||||
// Check that we have a constant on the RHS of the G_AND, which is of the form
|
||||
// 000...0111...1.
|
||||
int64_t Cst;
|
||||
if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(Cst)))
|
||||
return false;
|
||||
APInt Mask(DstTy.getSizeInBits(), Cst);
|
||||
if (!Mask.isMask())
|
||||
return false;
|
||||
|
||||
// Now, let's check that x & Mask == x. If this is true, then x & ~Mask == 0.
|
||||
return KB->maskedValueIsZero(Replacement, ~Mask);
|
||||
}
|
||||
|
||||
bool CombinerHelper::tryCombine(MachineInstr &MI) {
|
||||
if (tryCombineCopy(MI))
|
||||
return true;
|
||||
|
|
|
|||
|
|
@ -80,6 +80,7 @@ def AArch64PostLegalizerCombinerHelper
|
|||
: GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
|
||||
[copy_prop, erase_undef_store, combines_for_extload,
|
||||
sext_trunc_sextload, shuffle_vector_pseudos,
|
||||
hoist_logic_op_with_same_opcode_hands]> {
|
||||
hoist_logic_op_with_same_opcode_hands,
|
||||
and_trivial_mask]> {
|
||||
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -0,0 +1,222 @@
|
|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
|
||||
#
|
||||
# Check that we can fold (x & mask) -> x when (x & mask) is known to equal x.
|
||||
#
|
||||
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
|
||||
|
||||
name: remove_and_with_one_bit
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $w0, $w1
|
||||
; G_ICMP produces a single bit. The mask is 1.
|
||||
;
|
||||
; cmp = 000...0?
|
||||
; mask = 000...01
|
||||
; cmp & mask = 000...0?
|
||||
;
|
||||
; Remove the G_AND.
|
||||
;
|
||||
; CHECK-LABEL: name: remove_and_with_one_bit
|
||||
; CHECK: liveins: $w0, $w1
|
||||
; CHECK: %x:_(s32) = COPY $w0
|
||||
; CHECK: %y:_(s32) = COPY $w1
|
||||
; CHECK: %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y
|
||||
; CHECK: $w0 = COPY %cmp(s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%x:_(s32) = COPY $w0
|
||||
%y:_(s32) = COPY $w1
|
||||
%cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y
|
||||
%mask:_(s32) = G_CONSTANT i32 1
|
||||
%and:_(s32) = G_AND %cmp(s32), %mask
|
||||
$w0 = COPY %and(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: remove_and_all_ones_mask
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $w0, $w1, $w2
|
||||
; -1 is all ones. Therefore z & -1 = z. Remove the G_AND.
|
||||
;
|
||||
; CHECK-LABEL: name: remove_and_all_ones_mask
|
||||
; CHECK: liveins: $w0, $w1, $w2
|
||||
; CHECK: %z:_(s32) = COPY $w2
|
||||
; CHECK: $w0 = COPY %z(s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%x:_(s32) = COPY $w0
|
||||
%y:_(s32) = COPY $w1
|
||||
%z:_(s32) = COPY $w2
|
||||
%mask:_(s32) = G_CONSTANT i32 -1
|
||||
%and:_(s32) = G_AND %z(s32), %mask
|
||||
$w0 = COPY %and(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: remove_and_all_ones_zext
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $w0, $w1, $w2
|
||||
; %z is a s32, so it can be at most the all-ones value on 32 bits.
|
||||
; In decimal this is 4294967295. Any zero-extension of %z is at most this
|
||||
; value.
|
||||
;
|
||||
; Therefore, zext(z) & 4294967295 == z. Remove the G_AND.
|
||||
;
|
||||
; CHECK-LABEL: name: remove_and_all_ones_zext
|
||||
; CHECK: liveins: $w0, $w1, $w2
|
||||
; CHECK: %z:_(s32) = COPY $w2
|
||||
; CHECK: %ext:_(s64) = G_ZEXT %z(s32)
|
||||
; CHECK: $x0 = COPY %ext(s64)
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
%x:_(s32) = COPY $w0
|
||||
%y:_(s32) = COPY $w1
|
||||
%z:_(s32) = COPY $w2
|
||||
%ext:_(s64) = G_ZEXT %z
|
||||
%mask:_(s64) = G_CONSTANT i64 4294967295
|
||||
%and:_(s64) = G_AND %ext(s64), %mask
|
||||
$x0 = COPY %and(s64)
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
||||
---
|
||||
name: remove_and_all_ones_anyext
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $w0, $w1, $w2
|
||||
; This is the same as the zext case.
|
||||
;
|
||||
; CHECK-LABEL: name: remove_and_all_ones_anyext
|
||||
; CHECK: liveins: $w0, $w1, $w2
|
||||
; CHECK: %z:_(s32) = COPY $w2
|
||||
; CHECK: %ext:_(s64) = G_ZEXT %z(s32)
|
||||
; CHECK: $x0 = COPY %ext(s64)
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
%x:_(s32) = COPY $w0
|
||||
%y:_(s32) = COPY $w1
|
||||
%z:_(s32) = COPY $w2
|
||||
%ext:_(s64) = G_ZEXT %z
|
||||
%mask:_(s64) = G_CONSTANT i64 4294967295
|
||||
%and:_(s64) = G_AND %ext(s64), %mask
|
||||
$x0 = COPY %and(s64)
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
||||
---
|
||||
name: dont_remove_all_ones_sext
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $w0, $w1, $w2
|
||||
; We don't know if the sign bit is set on %z. So, the value in %ext may have
|
||||
; higher bits set than 4294967295.
|
||||
;
|
||||
; CHECK-LABEL: name: dont_remove_all_ones_sext
|
||||
; CHECK: liveins: $w0, $w1, $w2
|
||||
; CHECK: %z:_(s32) = COPY $w2
|
||||
; CHECK: %ext:_(s64) = G_SEXT %z(s32)
|
||||
; CHECK: %mask:_(s64) = G_CONSTANT i64 4294967295
|
||||
; CHECK: %and:_(s64) = G_AND %ext, %mask
|
||||
; CHECK: $x0 = COPY %and(s64)
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
%x:_(s32) = COPY $w0
|
||||
%y:_(s32) = COPY $w1
|
||||
%z:_(s32) = COPY $w2
|
||||
%ext:_(s64) = G_SEXT %z
|
||||
%mask:_(s64) = G_CONSTANT i64 4294967295
|
||||
%and:_(s64) = G_AND %ext(s64), %mask
|
||||
$x0 = COPY %and(s64)
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
||||
---
|
||||
name: remove_and_positive_constant_sext
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $w0, $w1, $w2
|
||||
; We know the sign bit is not set on %z. Therefore,
|
||||
;
|
||||
; z = ext = 42 = 000...0101010
|
||||
; mask = 0000...0111111
|
||||
;
|
||||
; So z & mask == z
|
||||
; CHECK-LABEL: name: remove_and_positive_constant_sext
|
||||
; CHECK: liveins: $w0, $w1, $w2
|
||||
; CHECK: %z:_(s32) = G_CONSTANT i32 42
|
||||
; CHECK: %ext:_(s64) = G_SEXT %z(s32)
|
||||
; CHECK: $x0 = COPY %ext(s64)
|
||||
; CHECK: RET_ReallyLR implicit $x0
|
||||
%x:_(s32) = COPY $w0
|
||||
%y:_(s32) = COPY $w1
|
||||
%z:_(s32) = G_CONSTANT i32 42
|
||||
%ext:_(s64) = G_SEXT %z
|
||||
%mask:_(s64) = G_CONSTANT i64 63
|
||||
%and:_(s64) = G_AND %ext(s64), %mask
|
||||
$x0 = COPY %and(s64)
|
||||
RET_ReallyLR implicit $x0
|
||||
|
||||
...
|
||||
---
|
||||
name: not_a_mask
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $w0, $w1
|
||||
; 6 is not a mask, so we should still have the G_AND.
|
||||
;
|
||||
; CHECK-LABEL: name: not_a_mask
|
||||
; CHECK: liveins: $w0, $w1
|
||||
; CHECK: %x:_(s32) = COPY $w0
|
||||
; CHECK: %y:_(s32) = COPY $w1
|
||||
; CHECK: %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y
|
||||
; CHECK: %mask:_(s32) = G_CONSTANT i32 6
|
||||
; CHECK: %and:_(s32) = G_AND %cmp, %mask
|
||||
; CHECK: $w0 = COPY %and(s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%x:_(s32) = COPY $w0
|
||||
%y:_(s32) = COPY $w1
|
||||
%cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y
|
||||
%mask:_(s32) = G_CONSTANT i32 6
|
||||
%and:_(s32) = G_AND %cmp(s32), %mask
|
||||
$w0 = COPY %and(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
|
||||
...
|
||||
---
|
||||
name: unknown_val
|
||||
legalized: true
|
||||
tracksRegLiveness: true
|
||||
body: |
|
||||
bb.0:
|
||||
liveins: $w0, $w1, $w2
|
||||
; We don't know what's in $w2, so we can't remove the G_AND without a mask
|
||||
; that fills every bit in the type.
|
||||
;
|
||||
; CHECK-LABEL: name: unknown_val
|
||||
; CHECK: liveins: $w0, $w1, $w2
|
||||
; CHECK: %z:_(s32) = COPY $w2
|
||||
; CHECK: %one:_(s32) = G_CONSTANT i32 32
|
||||
; CHECK: %and:_(s32) = G_AND %z, %one
|
||||
; CHECK: $w0 = COPY %and(s32)
|
||||
; CHECK: RET_ReallyLR implicit $w0
|
||||
%x:_(s32) = COPY $w0
|
||||
%y:_(s32) = COPY $w1
|
||||
%z:_(s32) = COPY $w2
|
||||
%one:_(s32) = G_CONSTANT i32 32
|
||||
%and:_(s32) = G_AND %z(s32), %one
|
||||
$w0 = COPY %and(s32)
|
||||
RET_ReallyLR implicit $w0
|
||||
...
|
||||
Loading…
Reference in New Issue