[NFC][DAGCombine][X86][AArch64] Tests for 'A - (A & (B - 1))' pattern (PR44448)

https://rise4fun.com/Alive/ZVdp

Name: ptr - (ptr & (alignment-1))  ->  ptr & (0 - alignment)
  %mask = add i64 %alignment, -1
  %bias = and i64 %ptr, %mask
  %r = sub i64 %ptr, %bias
=>
  %highbitmask = sub i64 0, %alignment
  %r = and i64 %ptr, %highbitmask

The main motivational pattern involes pointer-typed values,
so this transform can't really be done in middle-end.

See
  https://bugs.llvm.org/show_bug.cgi?id=44448
  https://reviews.llvm.org/D71499
This commit is contained in:
Roman Lebedev 2020-01-03 12:46:52 +03:00
parent 2e03324441
commit c0cbe3fbb7
No known key found for this signature in database
GPG Key ID: 083C3EBB4A1689E0
2 changed files with 424 additions and 0 deletions

View File

@ -0,0 +1,153 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
; Fold
; ptr - (ptr & (alignment-1))
; To
; ptr & (0 - alignment)
;
; This needs to be a backend-level fold because only by now pointers
; are just registers; in middle-end IR this can only be done via @llvm.ptrmask()
; intrinsic which is not sufficiently widely-spread yet.
;
; https://bugs.llvm.org/show_bug.cgi?id=44448
; The basic positive tests
define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind {
; CHECK-LABEL: t0_32:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w1, #1 // =1
; CHECK-NEXT: and w8, w0, w8
; CHECK-NEXT: sub w0, w0, w8
; CHECK-NEXT: ret
%mask = add i32 %alignment, -1
%bias = and i32 %ptr, %mask
%r = sub i32 %ptr, %bias
ret i32 %r
}
define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind {
; CHECK-LABEL: t1_64:
; CHECK: // %bb.0:
; CHECK-NEXT: sub x8, x1, #1 // =1
; CHECK-NEXT: and x8, x0, x8
; CHECK-NEXT: sub x0, x0, x8
; CHECK-NEXT: ret
%mask = add i64 %alignment, -1
%bias = and i64 %ptr, %mask
%r = sub i64 %ptr, %bias
ret i64 %r
}
define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind {
; CHECK-LABEL: t2_commutative:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w1, #1 // =1
; CHECK-NEXT: and w8, w8, w0
; CHECK-NEXT: sub w0, w0, w8
; CHECK-NEXT: ret
%mask = add i32 %alignment, -1
%bias = and i32 %mask, %ptr ; swapped
%r = sub i32 %ptr, %bias
ret i32 %r
}
; Extra use tests
define i32 @t3_extrause0(i32 %ptr, i32 %alignment, i32* %mask_storage) nounwind {
; CHECK-LABEL: t3_extrause0:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w1, #1 // =1
; CHECK-NEXT: and w9, w0, w8
; CHECK-NEXT: sub w0, w0, w9
; CHECK-NEXT: str w8, [x2]
; CHECK-NEXT: ret
%mask = add i32 %alignment, -1
store i32 %mask, i32* %mask_storage
%bias = and i32 %ptr, %mask
%r = sub i32 %ptr, %bias
ret i32 %r
}
define i32 @n4_extrause1(i32 %ptr, i32 %alignment, i32* %bias_storage) nounwind {
; CHECK-LABEL: n4_extrause1:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w1, #1 // =1
; CHECK-NEXT: and w8, w0, w8
; CHECK-NEXT: sub w0, w0, w8
; CHECK-NEXT: str w8, [x2]
; CHECK-NEXT: ret
%mask = add i32 %alignment, -1
%bias = and i32 %ptr, %mask ; has extra uses, can't fold
store i32 %bias, i32* %bias_storage
%r = sub i32 %ptr, %bias
ret i32 %r
}
define i32 @n5_extrause2(i32 %ptr, i32 %alignment, i32* %mask_storage, i32* %bias_storage) nounwind {
; CHECK-LABEL: n5_extrause2:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w1, #1 // =1
; CHECK-NEXT: str w8, [x2]
; CHECK-NEXT: and w8, w0, w8
; CHECK-NEXT: sub w0, w0, w8
; CHECK-NEXT: str w8, [x3]
; CHECK-NEXT: ret
%mask = add i32 %alignment, -1
store i32 %mask, i32* %mask_storage
%bias = and i32 %ptr, %mask ; has extra uses, can't fold
store i32 %bias, i32* %bias_storage
%r = sub i32 %ptr, %bias
ret i32 %r
}
; Negative tests
define i32 @n6_different_ptrs(i32 %ptr0, i32 %ptr1, i32 %alignment) nounwind {
; CHECK-LABEL: n6_different_ptrs:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w2, #1 // =1
; CHECK-NEXT: and w8, w1, w8
; CHECK-NEXT: sub w0, w0, w8
; CHECK-NEXT: ret
%mask = add i32 %alignment, -1
%bias = and i32 %ptr1, %mask ; not %ptr0
%r = sub i32 %ptr0, %bias ; not %ptr1
ret i32 %r
}
define i32 @n7_different_ptrs_commutative(i32 %ptr0, i32 %ptr1, i32 %alignment) nounwind {
; CHECK-LABEL: n7_different_ptrs_commutative:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w2, #1 // =1
; CHECK-NEXT: and w8, w8, w1
; CHECK-NEXT: sub w0, w0, w8
; CHECK-NEXT: ret
%mask = add i32 %alignment, -1
%bias = and i32 %mask, %ptr1 ; swapped, not %ptr0
%r = sub i32 %ptr0, %bias ; not %ptr1
ret i32 %r
}
define i32 @n8_not_lowbit_mask(i32 %ptr, i32 %alignment) nounwind {
; CHECK-LABEL: n8_not_lowbit_mask:
; CHECK: // %bb.0:
; CHECK-NEXT: add w8, w1, #1 // =1
; CHECK-NEXT: and w8, w0, w8
; CHECK-NEXT: sub w0, w0, w8
; CHECK-NEXT: ret
%mask = add i32 %alignment, 1 ; not -1
%bias = and i32 %ptr, %mask
%r = sub i32 %ptr, %bias
ret i32 %r
}
define i32 @n9_sub_is_not_commutative(i32 %ptr, i32 %alignment) nounwind {
; CHECK-LABEL: n9_sub_is_not_commutative:
; CHECK: // %bb.0:
; CHECK-NEXT: sub w8, w1, #1 // =1
; CHECK-NEXT: and w8, w0, w8
; CHECK-NEXT: sub w0, w8, w0
; CHECK-NEXT: ret
%mask = add i32 %alignment, -1
%bias = and i32 %ptr, %mask
%r = sub i32 %bias, %ptr ; wrong order
ret i32 %r
}

View File

@ -0,0 +1,271 @@
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X86
; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X64
; Fold
; ptr - (ptr & (alignment-1))
; To
; ptr & (0 - alignment)
;
; This needs to be a backend-level fold because only by now pointers
; are just registers; in middle-end IR this can only be done via @llvm.ptrmask()
; intrinsic which is not sufficiently widely-spread yet.
;
; https://bugs.llvm.org/show_bug.cgi?id=44448
; The basic positive tests
define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind {
; X86-LABEL: t0_32:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: decl %ecx
; X86-NEXT: andl %eax, %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: t0_32:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: decl %esi
; X64-NEXT: andl %edi, %esi
; X64-NEXT: subl %esi, %eax
; X64-NEXT: retq
%mask = add i32 %alignment, -1
%bias = and i32 %ptr, %mask
%r = sub i32 %ptr, %bias
ret i32 %r
}
define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind {
; X86-LABEL: t1_64:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: addl $-1, %ecx
; X86-NEXT: adcl $-1, %esi
; X86-NEXT: andl %edx, %esi
; X86-NEXT: andl %eax, %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: sbbl %esi, %edx
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: t1_64:
; X64: # %bb.0:
; X64-NEXT: movq %rdi, %rax
; X64-NEXT: decq %rsi
; X64-NEXT: andq %rdi, %rsi
; X64-NEXT: subq %rsi, %rax
; X64-NEXT: retq
%mask = add i64 %alignment, -1
%bias = and i64 %ptr, %mask
%r = sub i64 %ptr, %bias
ret i64 %r
}
define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind {
; X86-LABEL: t2_commutative:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: decl %ecx
; X86-NEXT: andl %eax, %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: t2_commutative:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: decl %esi
; X64-NEXT: andl %edi, %esi
; X64-NEXT: subl %esi, %eax
; X64-NEXT: retq
%mask = add i32 %alignment, -1
%bias = and i32 %mask, %ptr ; swapped
%r = sub i32 %ptr, %bias
ret i32 %r
}
; Extra use tests
define i32 @t3_extrause0(i32 %ptr, i32 %alignment, i32* %mask_storage) nounwind {
; X86-LABEL: t3_extrause0:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: decl %edx
; X86-NEXT: movl %edx, (%ecx)
; X86-NEXT: andl %eax, %edx
; X86-NEXT: subl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: t3_extrause0:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: decl %esi
; X64-NEXT: movl %esi, (%rdx)
; X64-NEXT: andl %edi, %esi
; X64-NEXT: subl %esi, %eax
; X64-NEXT: retq
%mask = add i32 %alignment, -1
store i32 %mask, i32* %mask_storage
%bias = and i32 %ptr, %mask
%r = sub i32 %ptr, %bias
ret i32 %r
}
define i32 @n4_extrause1(i32 %ptr, i32 %alignment, i32* %bias_storage) nounwind {
; X86-LABEL: n4_extrause1:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: decl %edx
; X86-NEXT: andl %eax, %edx
; X86-NEXT: movl %edx, (%ecx)
; X86-NEXT: subl %edx, %eax
; X86-NEXT: retl
;
; X64-LABEL: n4_extrause1:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: decl %esi
; X64-NEXT: andl %edi, %esi
; X64-NEXT: movl %esi, (%rdx)
; X64-NEXT: subl %esi, %eax
; X64-NEXT: retq
%mask = add i32 %alignment, -1
%bias = and i32 %ptr, %mask ; has extra uses, can't fold
store i32 %bias, i32* %bias_storage
%r = sub i32 %ptr, %bias
ret i32 %r
}
define i32 @n5_extrause2(i32 %ptr, i32 %alignment, i32* %mask_storage, i32* %bias_storage) nounwind {
; X86-LABEL: n5_extrause2:
; X86: # %bb.0:
; X86-NEXT: pushl %esi
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %edx
; X86-NEXT: movl {{[0-9]+}}(%esp), %esi
; X86-NEXT: decl %esi
; X86-NEXT: movl %esi, (%edx)
; X86-NEXT: andl %eax, %esi
; X86-NEXT: movl %esi, (%ecx)
; X86-NEXT: subl %esi, %eax
; X86-NEXT: popl %esi
; X86-NEXT: retl
;
; X64-LABEL: n5_extrause2:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: decl %esi
; X64-NEXT: movl %esi, (%rdx)
; X64-NEXT: andl %edi, %esi
; X64-NEXT: movl %esi, (%rcx)
; X64-NEXT: subl %esi, %eax
; X64-NEXT: retq
%mask = add i32 %alignment, -1
store i32 %mask, i32* %mask_storage
%bias = and i32 %ptr, %mask ; has extra uses, can't fold
store i32 %bias, i32* %bias_storage
%r = sub i32 %ptr, %bias
ret i32 %r
}
; Negative tests
define i32 @n6_different_ptrs(i32 %ptr0, i32 %ptr1, i32 %alignment) nounwind {
; X86-LABEL: n6_different_ptrs:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: decl %ecx
; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: n6_different_ptrs:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: decl %edx
; X64-NEXT: andl %esi, %edx
; X64-NEXT: subl %edx, %eax
; X64-NEXT: retq
%mask = add i32 %alignment, -1
%bias = and i32 %ptr1, %mask ; not %ptr0
%r = sub i32 %ptr0, %bias ; not %ptr1
ret i32 %r
}
define i32 @n7_different_ptrs_commutative(i32 %ptr0, i32 %ptr1, i32 %alignment) nounwind {
; X86-LABEL: n7_different_ptrs_commutative:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: decl %ecx
; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: n7_different_ptrs_commutative:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: decl %edx
; X64-NEXT: andl %esi, %edx
; X64-NEXT: subl %edx, %eax
; X64-NEXT: retq
%mask = add i32 %alignment, -1
%bias = and i32 %mask, %ptr1 ; swapped, not %ptr0
%r = sub i32 %ptr0, %bias ; not %ptr1
ret i32 %r
}
define i32 @n8_not_lowbit_mask(i32 %ptr, i32 %alignment) nounwind {
; X86-LABEL: n8_not_lowbit_mask:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: incl %ecx
; X86-NEXT: andl %eax, %ecx
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: n8_not_lowbit_mask:
; X64: # %bb.0:
; X64-NEXT: movl %edi, %eax
; X64-NEXT: incl %esi
; X64-NEXT: andl %edi, %esi
; X64-NEXT: subl %esi, %eax
; X64-NEXT: retq
%mask = add i32 %alignment, 1 ; not -1
%bias = and i32 %ptr, %mask
%r = sub i32 %ptr, %bias
ret i32 %r
}
define i32 @n9_sub_is_not_commutative(i32 %ptr, i32 %alignment) nounwind {
; X86-LABEL: n9_sub_is_not_commutative:
; X86: # %bb.0:
; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx
; X86-NEXT: movl {{[0-9]+}}(%esp), %eax
; X86-NEXT: decl %eax
; X86-NEXT: andl %ecx, %eax
; X86-NEXT: subl %ecx, %eax
; X86-NEXT: retl
;
; X64-LABEL: n9_sub_is_not_commutative:
; X64: # %bb.0:
; X64-NEXT: movl %esi, %eax
; X64-NEXT: decl %eax
; X64-NEXT: andl %edi, %eax
; X64-NEXT: subl %edi, %eax
; X64-NEXT: retq
%mask = add i32 %alignment, -1
%bias = and i32 %ptr, %mask
%r = sub i32 %bias, %ptr ; wrong order
ret i32 %r
}