[GlobalISel] Add combine for (x & mask) -> x when (x & mask) == x

If we have a mask, and a value x, where (x & mask) == x, we can drop the AND
and just use x.

This is about a 0.4% geomean code size improvement on CTMark at -O3 for AArch64.

In AArch64, this is most useful post-legalization. Patterns like this often
show up when legalizing s1s, which must be extended to larger types.

e.g.

```
%cmp:_(s32) = G_ICMP ...
%and:_(s32) = G_AND %cmp, 1
```

Since G_ICMP only produces a single bit, there's no reason to mask it with the
G_AND.

Differential Revision: https://reviews.llvm.org/D85463
This commit is contained in:
Jessica Paquette 2020-08-06 10:40:46 -07:00
parent bcaa806a47
commit d25b12bdc3
5 changed files with 298 additions and 1 deletions

View File

@ -263,6 +263,9 @@ public:
/// Delete \p MI and replace all of its uses with its \p OpIdx-th operand.
bool replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx);
/// Delete \p MI and replace all of its uses with \p Replacement.
bool replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement);
/// Return true if \p MOP1 and \p MOP2 are register operands are defined by
/// equivalent instructions.
bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2);
@ -303,6 +306,13 @@ public:
std::tuple<Register, int64_t> &MatchInfo);
bool applyAshShlToSextInreg(MachineInstr &MI,
std::tuple<Register, int64_t> &MatchInfo);
/// \return true if \p MI is a G_AND instruction whose RHS is a mask where
/// LHS & mask == LHS. (E.g., an all-ones value.)
///
/// \param [in] MI - The G_AND instruction.
/// \param [out] Reg - A register the G_AND should be replaced with on
/// success.
bool matchAndWithTrivialMask(MachineInstr &MI, Register &Replacement);
/// Try to transform \p MI by using all of the above
/// combine functions. Returns true if changed.

View File

@ -299,6 +299,14 @@ def shl_ashr_to_sext_inreg : GICombineRule<
[{ return Helper.matchAshrShlToSextInreg(*${root}, ${info}); }]),
(apply [{ return Helper.applyAshShlToSextInreg(*${root}, ${info});}])
>;
// Fold (x & mask) -> x when (x & mask) is known to equal x.
def and_trivial_mask_matchinfo : GIDefMatchData<"Register">;
def and_trivial_mask: GICombineRule <
(defs root:$root, and_trivial_mask_matchinfo:$matchinfo),
(match (wip_match_opcode G_AND):$root,
[{ return Helper.matchAndWithTrivialMask(*${root}, ${matchinfo}); }]),
(apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
>;
// FIXME: These should use the custom predicate feature once it lands.
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,

View File

@ -1766,6 +1766,16 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
return true;
}
bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
Register Replacement) {
assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
Register OldReg = MI.getOperand(0).getReg();
assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
MI.eraseFromParent();
replaceRegWith(MRI, OldReg, Replacement);
return true;
}
bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
assert(MI.getOpcode() == TargetOpcode::G_SELECT);
// Match (cond ? x : x)
@ -1979,6 +1989,52 @@ bool CombinerHelper::applyAshShlToSextInreg(
return true;
}
bool CombinerHelper::matchAndWithTrivialMask(MachineInstr &MI,
Register &Replacement) {
// Given
//
// %mask:_(sN) = G_CONSTANT iN 000...0111...1
// %x:_(sN) = G_SOMETHING
// %y:_(sN) = G_AND %x, %mask
//
// Eliminate the G_AND when it is known that x & mask == x.
//
// Patterns like this can appear as a result of legalization. E.g.
//
// %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
// %one:_(s32) = G_CONSTANT i32 1
// %and:_(s32) = G_AND %cmp, %one
//
// In this case, G_ICMP only produces a single bit, so x & 1 == x.
assert(MI.getOpcode() == TargetOpcode::G_AND);
if (!KB)
return false;
// Replacement = %x, AndDst = %y. Check that we can replace AndDst with the
// LHS of the G_AND.
Replacement = MI.getOperand(1).getReg();
Register AndDst = MI.getOperand(0).getReg();
LLT DstTy = MRI.getType(AndDst);
// FIXME: This should be removed once GISelKnownBits supports vectors.
if (DstTy.isVector())
return false;
if (!canReplaceReg(AndDst, Replacement, MRI))
return false;
// Check that we have a constant on the RHS of the G_AND, which is of the form
// 000...0111...1.
int64_t Cst;
if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(Cst)))
return false;
APInt Mask(DstTy.getSizeInBits(), Cst);
if (!Mask.isMask())
return false;
// Now, let's check that x & Mask == x. If this is true, then x & ~Mask == 0.
return KB->maskedValueIsZero(Replacement, ~Mask);
}
bool CombinerHelper::tryCombine(MachineInstr &MI) {
if (tryCombineCopy(MI))
return true;

View File

@ -80,6 +80,7 @@ def AArch64PostLegalizerCombinerHelper
: GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
[copy_prop, erase_undef_store, combines_for_extload,
sext_trunc_sextload, shuffle_vector_pseudos,
hoist_logic_op_with_same_opcode_hands]> {
hoist_logic_op_with_same_opcode_hands,
and_trivial_mask]> {
let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
}

View File

@ -0,0 +1,222 @@
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
#
# Check that we can fold (x & mask) -> x when (x & mask) is known to equal x.
#
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
name: remove_and_with_one_bit
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1
; G_ICMP produces a single bit. The mask is 1.
;
; cmp = 000...0?
; mask = 000...01
; cmp & mask = 000...0?
;
; Remove the G_AND.
;
; CHECK-LABEL: name: remove_and_with_one_bit
; CHECK: liveins: $w0, $w1
; CHECK: %x:_(s32) = COPY $w0
; CHECK: %y:_(s32) = COPY $w1
; CHECK: %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y
; CHECK: $w0 = COPY %cmp(s32)
; CHECK: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
%y:_(s32) = COPY $w1
%cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y
%mask:_(s32) = G_CONSTANT i32 1
%and:_(s32) = G_AND %cmp(s32), %mask
$w0 = COPY %and(s32)
RET_ReallyLR implicit $w0
...
---
name: remove_and_all_ones_mask
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
; -1 is all ones. Therefore z & -1 = z. Remove the G_AND.
;
; CHECK-LABEL: name: remove_and_all_ones_mask
; CHECK: liveins: $w0, $w1, $w2
; CHECK: %z:_(s32) = COPY $w2
; CHECK: $w0 = COPY %z(s32)
; CHECK: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
%y:_(s32) = COPY $w1
%z:_(s32) = COPY $w2
%mask:_(s32) = G_CONSTANT i32 -1
%and:_(s32) = G_AND %z(s32), %mask
$w0 = COPY %and(s32)
RET_ReallyLR implicit $w0
...
---
name: remove_and_all_ones_zext
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
; %z is a s32, so it can be at most the all-ones value on 32 bits.
; In decimal this is 4294967295. Any zero-extension of %z is at most this
; value.
;
; Therefore, zext(z) & 4294967295 == z. Remove the G_AND.
;
; CHECK-LABEL: name: remove_and_all_ones_zext
; CHECK: liveins: $w0, $w1, $w2
; CHECK: %z:_(s32) = COPY $w2
; CHECK: %ext:_(s64) = G_ZEXT %z(s32)
; CHECK: $x0 = COPY %ext(s64)
; CHECK: RET_ReallyLR implicit $x0
%x:_(s32) = COPY $w0
%y:_(s32) = COPY $w1
%z:_(s32) = COPY $w2
%ext:_(s64) = G_ZEXT %z
%mask:_(s64) = G_CONSTANT i64 4294967295
%and:_(s64) = G_AND %ext(s64), %mask
$x0 = COPY %and(s64)
RET_ReallyLR implicit $x0
...
---
name: remove_and_all_ones_anyext
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
; This is the same as the zext case.
;
; CHECK-LABEL: name: remove_and_all_ones_anyext
; CHECK: liveins: $w0, $w1, $w2
; CHECK: %z:_(s32) = COPY $w2
; CHECK: %ext:_(s64) = G_ZEXT %z(s32)
; CHECK: $x0 = COPY %ext(s64)
; CHECK: RET_ReallyLR implicit $x0
%x:_(s32) = COPY $w0
%y:_(s32) = COPY $w1
%z:_(s32) = COPY $w2
%ext:_(s64) = G_ZEXT %z
%mask:_(s64) = G_CONSTANT i64 4294967295
%and:_(s64) = G_AND %ext(s64), %mask
$x0 = COPY %and(s64)
RET_ReallyLR implicit $x0
...
---
name: dont_remove_all_ones_sext
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
; We don't know if the sign bit is set on %z. So, the value in %ext may have
; higher bits set than 4294967295.
;
; CHECK-LABEL: name: dont_remove_all_ones_sext
; CHECK: liveins: $w0, $w1, $w2
; CHECK: %z:_(s32) = COPY $w2
; CHECK: %ext:_(s64) = G_SEXT %z(s32)
; CHECK: %mask:_(s64) = G_CONSTANT i64 4294967295
; CHECK: %and:_(s64) = G_AND %ext, %mask
; CHECK: $x0 = COPY %and(s64)
; CHECK: RET_ReallyLR implicit $x0
%x:_(s32) = COPY $w0
%y:_(s32) = COPY $w1
%z:_(s32) = COPY $w2
%ext:_(s64) = G_SEXT %z
%mask:_(s64) = G_CONSTANT i64 4294967295
%and:_(s64) = G_AND %ext(s64), %mask
$x0 = COPY %and(s64)
RET_ReallyLR implicit $x0
...
---
name: remove_and_positive_constant_sext
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
; We know the sign bit is not set on %z. Therefore,
;
; z = ext = 42 = 000...0101010
; mask = 0000...0111111
;
; So z & mask == z
; CHECK-LABEL: name: remove_and_positive_constant_sext
; CHECK: liveins: $w0, $w1, $w2
; CHECK: %z:_(s32) = G_CONSTANT i32 42
; CHECK: %ext:_(s64) = G_SEXT %z(s32)
; CHECK: $x0 = COPY %ext(s64)
; CHECK: RET_ReallyLR implicit $x0
%x:_(s32) = COPY $w0
%y:_(s32) = COPY $w1
%z:_(s32) = G_CONSTANT i32 42
%ext:_(s64) = G_SEXT %z
%mask:_(s64) = G_CONSTANT i64 63
%and:_(s64) = G_AND %ext(s64), %mask
$x0 = COPY %and(s64)
RET_ReallyLR implicit $x0
...
---
name: not_a_mask
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1
; 6 is not a mask, so we should still have the G_AND.
;
; CHECK-LABEL: name: not_a_mask
; CHECK: liveins: $w0, $w1
; CHECK: %x:_(s32) = COPY $w0
; CHECK: %y:_(s32) = COPY $w1
; CHECK: %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y
; CHECK: %mask:_(s32) = G_CONSTANT i32 6
; CHECK: %and:_(s32) = G_AND %cmp, %mask
; CHECK: $w0 = COPY %and(s32)
; CHECK: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
%y:_(s32) = COPY $w1
%cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y
%mask:_(s32) = G_CONSTANT i32 6
%and:_(s32) = G_AND %cmp(s32), %mask
$w0 = COPY %and(s32)
RET_ReallyLR implicit $w0
...
---
name: unknown_val
legalized: true
tracksRegLiveness: true
body: |
bb.0:
liveins: $w0, $w1, $w2
; We don't know what's in $w2, so we can't remove the G_AND without a mask
; that fills every bit in the type.
;
; CHECK-LABEL: name: unknown_val
; CHECK: liveins: $w0, $w1, $w2
; CHECK: %z:_(s32) = COPY $w2
; CHECK: %one:_(s32) = G_CONSTANT i32 32
; CHECK: %and:_(s32) = G_AND %z, %one
; CHECK: $w0 = COPY %and(s32)
; CHECK: RET_ReallyLR implicit $w0
%x:_(s32) = COPY $w0
%y:_(s32) = COPY $w1
%z:_(s32) = COPY $w2
%one:_(s32) = G_CONSTANT i32 32
%and:_(s32) = G_AND %z(s32), %one
$w0 = COPY %and(s32)
RET_ReallyLR implicit $w0
...