forked from OSchip/llvm-project
				
			[GlobalISel] Add combine for (x & mask) -> x when (x & mask) == x
If we have a mask, and a value x, where (x & mask) == x, we can drop the AND and just use x. This is about a 0.4% geomean code size improvement on CTMark at -O3 for AArch64. In AArch64, this is most useful post-legalization. Patterns like this often show up when legalizing s1s, which must be extended to larger types. e.g. ``` %cmp:_(s32) = G_ICMP ... %and:_(s32) = G_AND %cmp, 1 ``` Since G_ICMP only produces a single bit, there's no reason to mask it with the G_AND. Differential Revision: https://reviews.llvm.org/D85463
This commit is contained in:
		
							parent
							
								
									bcaa806a47
								
							
						
					
					
						commit
						d25b12bdc3
					
				| 
						 | 
				
			
			@ -263,6 +263,9 @@ public:
 | 
			
		|||
  /// Delete \p MI and replace all of its uses with its \p OpIdx-th operand.
 | 
			
		||||
  bool replaceSingleDefInstWithOperand(MachineInstr &MI, unsigned OpIdx);
 | 
			
		||||
 | 
			
		||||
  /// Delete \p MI and replace all of its uses with \p Replacement.
 | 
			
		||||
  bool replaceSingleDefInstWithReg(MachineInstr &MI, Register Replacement);
 | 
			
		||||
 | 
			
		||||
  /// Return true if \p MOP1 and \p MOP2 are register operands are defined by
 | 
			
		||||
  /// equivalent instructions.
 | 
			
		||||
  bool matchEqualDefs(const MachineOperand &MOP1, const MachineOperand &MOP2);
 | 
			
		||||
| 
						 | 
				
			
			@ -303,6 +306,13 @@ public:
 | 
			
		|||
                               std::tuple<Register, int64_t> &MatchInfo);
 | 
			
		||||
  bool applyAshShlToSextInreg(MachineInstr &MI,
 | 
			
		||||
                              std::tuple<Register, int64_t> &MatchInfo);
 | 
			
		||||
  /// \return true if \p MI is a G_AND instruction whose RHS is a mask where
 | 
			
		||||
  /// LHS & mask == LHS. (E.g., an all-ones value.)
 | 
			
		||||
  ///
 | 
			
		||||
  /// \param [in] MI - The G_AND instruction.
 | 
			
		||||
  /// \param [out] Reg - A register the G_AND should be replaced with on
 | 
			
		||||
  /// success.
 | 
			
		||||
  bool matchAndWithTrivialMask(MachineInstr &MI, Register &Replacement);
 | 
			
		||||
 | 
			
		||||
  /// Try to transform \p MI by using all of the above
 | 
			
		||||
  /// combine functions. Returns true if changed.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -299,6 +299,14 @@ def shl_ashr_to_sext_inreg : GICombineRule<
 | 
			
		|||
    [{ return Helper.matchAshrShlToSextInreg(*${root}, ${info}); }]),
 | 
			
		||||
  (apply [{ return Helper.applyAshShlToSextInreg(*${root}, ${info});}])
 | 
			
		||||
>;
 | 
			
		||||
// Fold (x & mask) -> x when (x & mask) is known to equal x.
 | 
			
		||||
def and_trivial_mask_matchinfo : GIDefMatchData<"Register">;
 | 
			
		||||
def and_trivial_mask: GICombineRule <
 | 
			
		||||
  (defs root:$root, and_trivial_mask_matchinfo:$matchinfo),
 | 
			
		||||
  (match (wip_match_opcode G_AND):$root,
 | 
			
		||||
         [{ return Helper.matchAndWithTrivialMask(*${root}, ${matchinfo}); }]),
 | 
			
		||||
  (apply [{ return Helper.replaceSingleDefInstWithReg(*${root}, ${matchinfo}); }])
 | 
			
		||||
>;
 | 
			
		||||
 | 
			
		||||
// FIXME: These should use the custom predicate feature once it lands.
 | 
			
		||||
def undef_combines : GICombineGroup<[undef_to_fp_zero, undef_to_int_zero,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1766,6 +1766,16 @@ bool CombinerHelper::replaceSingleDefInstWithOperand(MachineInstr &MI,
 | 
			
		|||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool CombinerHelper::replaceSingleDefInstWithReg(MachineInstr &MI,
 | 
			
		||||
                                                 Register Replacement) {
 | 
			
		||||
  assert(MI.getNumExplicitDefs() == 1 && "Expected one explicit def?");
 | 
			
		||||
  Register OldReg = MI.getOperand(0).getReg();
 | 
			
		||||
  assert(canReplaceReg(OldReg, Replacement, MRI) && "Cannot replace register?");
 | 
			
		||||
  MI.eraseFromParent();
 | 
			
		||||
  replaceRegWith(MRI, OldReg, Replacement);
 | 
			
		||||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool CombinerHelper::matchSelectSameVal(MachineInstr &MI) {
 | 
			
		||||
  assert(MI.getOpcode() == TargetOpcode::G_SELECT);
 | 
			
		||||
  // Match (cond ? x : x)
 | 
			
		||||
| 
						 | 
				
			
			@ -1979,6 +1989,52 @@ bool CombinerHelper::applyAshShlToSextInreg(
 | 
			
		|||
  return true;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool CombinerHelper::matchAndWithTrivialMask(MachineInstr &MI,
 | 
			
		||||
                                             Register &Replacement) {
 | 
			
		||||
  // Given
 | 
			
		||||
  //
 | 
			
		||||
  // %mask:_(sN) = G_CONSTANT iN 000...0111...1
 | 
			
		||||
  // %x:_(sN) = G_SOMETHING
 | 
			
		||||
  // %y:_(sN) = G_AND %x, %mask
 | 
			
		||||
  //
 | 
			
		||||
  // Eliminate the G_AND when it is known that x & mask == x.
 | 
			
		||||
  //
 | 
			
		||||
  // Patterns like this can appear as a result of legalization. E.g.
 | 
			
		||||
  //
 | 
			
		||||
  // %cmp:_(s32) = G_ICMP intpred(pred), %x(s32), %y
 | 
			
		||||
  // %one:_(s32) = G_CONSTANT i32 1
 | 
			
		||||
  // %and:_(s32) = G_AND %cmp, %one
 | 
			
		||||
  //
 | 
			
		||||
  // In this case, G_ICMP only produces a single bit, so x & 1 == x.
 | 
			
		||||
  assert(MI.getOpcode() == TargetOpcode::G_AND);
 | 
			
		||||
  if (!KB)
 | 
			
		||||
    return false;
 | 
			
		||||
 | 
			
		||||
  // Replacement = %x, AndDst = %y. Check that we can replace AndDst with the
 | 
			
		||||
  // LHS of the G_AND.
 | 
			
		||||
  Replacement = MI.getOperand(1).getReg();
 | 
			
		||||
  Register AndDst = MI.getOperand(0).getReg();
 | 
			
		||||
  LLT DstTy = MRI.getType(AndDst);
 | 
			
		||||
 | 
			
		||||
  // FIXME: This should be removed once GISelKnownBits supports vectors.
 | 
			
		||||
  if (DstTy.isVector())
 | 
			
		||||
    return false;
 | 
			
		||||
  if (!canReplaceReg(AndDst, Replacement, MRI))
 | 
			
		||||
    return false;
 | 
			
		||||
 | 
			
		||||
  // Check that we have a constant on the RHS of the G_AND, which is of the form
 | 
			
		||||
  // 000...0111...1.
 | 
			
		||||
  int64_t Cst;
 | 
			
		||||
  if (!mi_match(MI.getOperand(2).getReg(), MRI, m_ICst(Cst)))
 | 
			
		||||
    return false;
 | 
			
		||||
  APInt Mask(DstTy.getSizeInBits(), Cst);
 | 
			
		||||
  if (!Mask.isMask())
 | 
			
		||||
    return false;
 | 
			
		||||
 | 
			
		||||
  // Now, let's check that x & Mask == x. If this is true, then x & ~Mask == 0.
 | 
			
		||||
  return KB->maskedValueIsZero(Replacement, ~Mask);
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
bool CombinerHelper::tryCombine(MachineInstr &MI) {
 | 
			
		||||
  if (tryCombineCopy(MI))
 | 
			
		||||
    return true;
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -80,6 +80,7 @@ def AArch64PostLegalizerCombinerHelper
 | 
			
		|||
    : GICombinerHelper<"AArch64GenPostLegalizerCombinerHelper",
 | 
			
		||||
                       [copy_prop, erase_undef_store, combines_for_extload,
 | 
			
		||||
                        sext_trunc_sextload, shuffle_vector_pseudos,
 | 
			
		||||
                        hoist_logic_op_with_same_opcode_hands]> {
 | 
			
		||||
                        hoist_logic_op_with_same_opcode_hands,
 | 
			
		||||
                        and_trivial_mask]> {
 | 
			
		||||
  let DisableRuleOption = "aarch64postlegalizercombiner-disable-rule";
 | 
			
		||||
}
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -0,0 +1,222 @@
 | 
			
		|||
# NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
 | 
			
		||||
#
 | 
			
		||||
# Check that we can fold (x & mask) -> x when (x & mask) is known to equal x.
 | 
			
		||||
#
 | 
			
		||||
# RUN: llc -mtriple aarch64 -run-pass=aarch64-postlegalizer-combiner -verify-machineinstrs %s -o - | FileCheck %s
 | 
			
		||||
 | 
			
		||||
name:            remove_and_with_one_bit
 | 
			
		||||
legalized:       true
 | 
			
		||||
tracksRegLiveness: true
 | 
			
		||||
body:             |
 | 
			
		||||
  bb.0:
 | 
			
		||||
    liveins: $w0, $w1
 | 
			
		||||
    ; G_ICMP produces a single bit. The mask is 1.
 | 
			
		||||
    ;
 | 
			
		||||
    ; cmp = 000...0?
 | 
			
		||||
    ; mask = 000...01
 | 
			
		||||
    ; cmp & mask = 000...0?
 | 
			
		||||
    ;
 | 
			
		||||
    ; Remove the G_AND.
 | 
			
		||||
    ;
 | 
			
		||||
    ; CHECK-LABEL: name: remove_and_with_one_bit
 | 
			
		||||
    ; CHECK: liveins: $w0, $w1
 | 
			
		||||
    ; CHECK: %x:_(s32) = COPY $w0
 | 
			
		||||
    ; CHECK: %y:_(s32) = COPY $w1
 | 
			
		||||
    ; CHECK: %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y
 | 
			
		||||
    ; CHECK: $w0 = COPY %cmp(s32)
 | 
			
		||||
    ; CHECK: RET_ReallyLR implicit $w0
 | 
			
		||||
    %x:_(s32) = COPY $w0
 | 
			
		||||
    %y:_(s32) = COPY $w1
 | 
			
		||||
    %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y
 | 
			
		||||
    %mask:_(s32) = G_CONSTANT i32 1
 | 
			
		||||
    %and:_(s32) = G_AND %cmp(s32), %mask
 | 
			
		||||
    $w0 = COPY %and(s32)
 | 
			
		||||
    RET_ReallyLR implicit $w0
 | 
			
		||||
 | 
			
		||||
...
 | 
			
		||||
---
 | 
			
		||||
name:            remove_and_all_ones_mask
 | 
			
		||||
legalized:       true
 | 
			
		||||
tracksRegLiveness: true
 | 
			
		||||
body:             |
 | 
			
		||||
  bb.0:
 | 
			
		||||
    liveins: $w0, $w1, $w2
 | 
			
		||||
    ; -1 is all ones. Therefore z & -1 = z. Remove the G_AND.
 | 
			
		||||
    ;
 | 
			
		||||
    ; CHECK-LABEL: name: remove_and_all_ones_mask
 | 
			
		||||
    ; CHECK: liveins: $w0, $w1, $w2
 | 
			
		||||
    ; CHECK: %z:_(s32) = COPY $w2
 | 
			
		||||
    ; CHECK: $w0 = COPY %z(s32)
 | 
			
		||||
    ; CHECK: RET_ReallyLR implicit $w0
 | 
			
		||||
    %x:_(s32) = COPY $w0
 | 
			
		||||
    %y:_(s32) = COPY $w1
 | 
			
		||||
    %z:_(s32) = COPY $w2
 | 
			
		||||
    %mask:_(s32) = G_CONSTANT i32 -1
 | 
			
		||||
    %and:_(s32) = G_AND %z(s32), %mask
 | 
			
		||||
    $w0 = COPY %and(s32)
 | 
			
		||||
    RET_ReallyLR implicit $w0
 | 
			
		||||
 | 
			
		||||
...
 | 
			
		||||
---
 | 
			
		||||
name:            remove_and_all_ones_zext
 | 
			
		||||
legalized:       true
 | 
			
		||||
tracksRegLiveness: true
 | 
			
		||||
body:             |
 | 
			
		||||
  bb.0:
 | 
			
		||||
    liveins: $w0, $w1, $w2
 | 
			
		||||
    ; %z is a s32, so it can be at most the all-ones value on 32 bits.
 | 
			
		||||
    ; In decimal this is 4294967295. Any zero-extension of %z is at most this
 | 
			
		||||
    ; value.
 | 
			
		||||
    ;
 | 
			
		||||
    ; Therefore, zext(z) & 4294967295 == z. Remove the G_AND.
 | 
			
		||||
    ;
 | 
			
		||||
    ; CHECK-LABEL: name: remove_and_all_ones_zext
 | 
			
		||||
    ; CHECK: liveins: $w0, $w1, $w2
 | 
			
		||||
    ; CHECK: %z:_(s32) = COPY $w2
 | 
			
		||||
    ; CHECK: %ext:_(s64) = G_ZEXT %z(s32)
 | 
			
		||||
    ; CHECK: $x0 = COPY %ext(s64)
 | 
			
		||||
    ; CHECK: RET_ReallyLR implicit $x0
 | 
			
		||||
    %x:_(s32) = COPY $w0
 | 
			
		||||
    %y:_(s32) = COPY $w1
 | 
			
		||||
    %z:_(s32) = COPY $w2
 | 
			
		||||
    %ext:_(s64) = G_ZEXT %z
 | 
			
		||||
    %mask:_(s64) = G_CONSTANT i64 4294967295
 | 
			
		||||
    %and:_(s64) = G_AND %ext(s64), %mask
 | 
			
		||||
    $x0 = COPY %and(s64)
 | 
			
		||||
    RET_ReallyLR implicit $x0
 | 
			
		||||
 | 
			
		||||
...
 | 
			
		||||
---
 | 
			
		||||
name:            remove_and_all_ones_anyext
 | 
			
		||||
legalized:       true
 | 
			
		||||
tracksRegLiveness: true
 | 
			
		||||
body:             |
 | 
			
		||||
  bb.0:
 | 
			
		||||
    liveins: $w0, $w1, $w2
 | 
			
		||||
    ; This is the same as the zext case.
 | 
			
		||||
    ;
 | 
			
		||||
    ; CHECK-LABEL: name: remove_and_all_ones_anyext
 | 
			
		||||
    ; CHECK: liveins: $w0, $w1, $w2
 | 
			
		||||
    ; CHECK: %z:_(s32) = COPY $w2
 | 
			
		||||
    ; CHECK: %ext:_(s64) = G_ZEXT %z(s32)
 | 
			
		||||
    ; CHECK: $x0 = COPY %ext(s64)
 | 
			
		||||
    ; CHECK: RET_ReallyLR implicit $x0
 | 
			
		||||
    %x:_(s32) = COPY $w0
 | 
			
		||||
    %y:_(s32) = COPY $w1
 | 
			
		||||
    %z:_(s32) = COPY $w2
 | 
			
		||||
    %ext:_(s64) = G_ZEXT %z
 | 
			
		||||
    %mask:_(s64) = G_CONSTANT i64 4294967295
 | 
			
		||||
    %and:_(s64) = G_AND %ext(s64), %mask
 | 
			
		||||
    $x0 = COPY %and(s64)
 | 
			
		||||
    RET_ReallyLR implicit $x0
 | 
			
		||||
 | 
			
		||||
...
 | 
			
		||||
---
 | 
			
		||||
name:            dont_remove_all_ones_sext
 | 
			
		||||
legalized:       true
 | 
			
		||||
tracksRegLiveness: true
 | 
			
		||||
body:             |
 | 
			
		||||
  bb.0:
 | 
			
		||||
    liveins: $w0, $w1, $w2
 | 
			
		||||
    ; We don't know if the sign bit is set on %z. So, the value in %ext may have
 | 
			
		||||
    ; higher bits set than 4294967295.
 | 
			
		||||
    ;
 | 
			
		||||
    ; CHECK-LABEL: name: dont_remove_all_ones_sext
 | 
			
		||||
    ; CHECK: liveins: $w0, $w1, $w2
 | 
			
		||||
    ; CHECK: %z:_(s32) = COPY $w2
 | 
			
		||||
    ; CHECK: %ext:_(s64) = G_SEXT %z(s32)
 | 
			
		||||
    ; CHECK: %mask:_(s64) = G_CONSTANT i64 4294967295
 | 
			
		||||
    ; CHECK: %and:_(s64) = G_AND %ext, %mask
 | 
			
		||||
    ; CHECK: $x0 = COPY %and(s64)
 | 
			
		||||
    ; CHECK: RET_ReallyLR implicit $x0
 | 
			
		||||
    %x:_(s32) = COPY $w0
 | 
			
		||||
    %y:_(s32) = COPY $w1
 | 
			
		||||
    %z:_(s32) = COPY $w2
 | 
			
		||||
    %ext:_(s64) = G_SEXT %z
 | 
			
		||||
    %mask:_(s64) = G_CONSTANT i64 4294967295
 | 
			
		||||
    %and:_(s64) = G_AND %ext(s64), %mask
 | 
			
		||||
    $x0 = COPY %and(s64)
 | 
			
		||||
    RET_ReallyLR implicit $x0
 | 
			
		||||
 | 
			
		||||
...
 | 
			
		||||
---
 | 
			
		||||
name:            remove_and_positive_constant_sext
 | 
			
		||||
legalized:       true
 | 
			
		||||
tracksRegLiveness: true
 | 
			
		||||
body:             |
 | 
			
		||||
  bb.0:
 | 
			
		||||
    liveins: $w0, $w1, $w2
 | 
			
		||||
    ; We know the sign bit is not set on %z. Therefore,
 | 
			
		||||
    ;
 | 
			
		||||
    ; z = ext = 42 = 000...0101010
 | 
			
		||||
    ; mask = 0000...0111111
 | 
			
		||||
    ;
 | 
			
		||||
    ; So z & mask == z
 | 
			
		||||
    ; CHECK-LABEL: name: remove_and_positive_constant_sext
 | 
			
		||||
    ; CHECK: liveins: $w0, $w1, $w2
 | 
			
		||||
    ; CHECK: %z:_(s32) = G_CONSTANT i32 42
 | 
			
		||||
    ; CHECK: %ext:_(s64) = G_SEXT %z(s32)
 | 
			
		||||
    ; CHECK: $x0 = COPY %ext(s64)
 | 
			
		||||
    ; CHECK: RET_ReallyLR implicit $x0
 | 
			
		||||
    %x:_(s32) = COPY $w0
 | 
			
		||||
    %y:_(s32) = COPY $w1
 | 
			
		||||
    %z:_(s32) = G_CONSTANT i32 42
 | 
			
		||||
    %ext:_(s64) = G_SEXT %z
 | 
			
		||||
    %mask:_(s64) = G_CONSTANT i64 63
 | 
			
		||||
    %and:_(s64) = G_AND %ext(s64), %mask
 | 
			
		||||
    $x0 = COPY %and(s64)
 | 
			
		||||
    RET_ReallyLR implicit $x0
 | 
			
		||||
 | 
			
		||||
...
 | 
			
		||||
---
 | 
			
		||||
name:            not_a_mask
 | 
			
		||||
legalized:       true
 | 
			
		||||
tracksRegLiveness: true
 | 
			
		||||
body:             |
 | 
			
		||||
  bb.0:
 | 
			
		||||
    liveins: $w0, $w1
 | 
			
		||||
    ; 6 is not a mask, so we should still have the G_AND.
 | 
			
		||||
    ;
 | 
			
		||||
    ; CHECK-LABEL: name: not_a_mask
 | 
			
		||||
    ; CHECK: liveins: $w0, $w1
 | 
			
		||||
    ; CHECK: %x:_(s32) = COPY $w0
 | 
			
		||||
    ; CHECK: %y:_(s32) = COPY $w1
 | 
			
		||||
    ; CHECK: %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y
 | 
			
		||||
    ; CHECK: %mask:_(s32) = G_CONSTANT i32 6
 | 
			
		||||
    ; CHECK: %and:_(s32) = G_AND %cmp, %mask
 | 
			
		||||
    ; CHECK: $w0 = COPY %and(s32)
 | 
			
		||||
    ; CHECK: RET_ReallyLR implicit $w0
 | 
			
		||||
    %x:_(s32) = COPY $w0
 | 
			
		||||
    %y:_(s32) = COPY $w1
 | 
			
		||||
    %cmp:_(s32) = G_ICMP intpred(eq), %x(s32), %y
 | 
			
		||||
    %mask:_(s32) = G_CONSTANT i32 6
 | 
			
		||||
    %and:_(s32) = G_AND %cmp(s32), %mask
 | 
			
		||||
    $w0 = COPY %and(s32)
 | 
			
		||||
    RET_ReallyLR implicit $w0
 | 
			
		||||
 | 
			
		||||
...
 | 
			
		||||
---
 | 
			
		||||
name:            unknown_val
 | 
			
		||||
legalized:       true
 | 
			
		||||
tracksRegLiveness: true
 | 
			
		||||
body:             |
 | 
			
		||||
  bb.0:
 | 
			
		||||
    liveins: $w0, $w1, $w2
 | 
			
		||||
    ; We don't know what's in $w2, so we can't remove the G_AND without a mask
 | 
			
		||||
    ; that fills every bit in the type.
 | 
			
		||||
    ;
 | 
			
		||||
    ; CHECK-LABEL: name: unknown_val
 | 
			
		||||
    ; CHECK: liveins: $w0, $w1, $w2
 | 
			
		||||
    ; CHECK: %z:_(s32) = COPY $w2
 | 
			
		||||
    ; CHECK: %one:_(s32) = G_CONSTANT i32 32
 | 
			
		||||
    ; CHECK: %and:_(s32) = G_AND %z, %one
 | 
			
		||||
    ; CHECK: $w0 = COPY %and(s32)
 | 
			
		||||
    ; CHECK: RET_ReallyLR implicit $w0
 | 
			
		||||
    %x:_(s32) = COPY $w0
 | 
			
		||||
    %y:_(s32) = COPY $w1
 | 
			
		||||
    %z:_(s32) = COPY $w2
 | 
			
		||||
    %one:_(s32) = G_CONSTANT i32 32
 | 
			
		||||
    %and:_(s32) = G_AND %z(s32), %one
 | 
			
		||||
    $w0 = COPY %and(s32)
 | 
			
		||||
    RET_ReallyLR implicit $w0
 | 
			
		||||
...
 | 
			
		||||
		Loading…
	
		Reference in New Issue