From c0cbe3fbb7d6b3f197b05a47a2cc25ccb7977629 Mon Sep 17 00:00:00 2001
From: Roman Lebedev <lebedev.ri@gmail.com>
Date: Fri, 3 Jan 2020 12:46:52 +0300
Subject: [PATCH] [NFC][DAGCombine][X86][AArch64] Tests for 'A - (A & (B - 1))'
 pattern (PR44448)

https://rise4fun.com/Alive/ZVdp

Name: ptr - (ptr & (alignment-1))  ->  ptr & (0 - alignment)
  %mask = add i64 %alignment, -1
  %bias = and i64 %ptr, %mask
  %r = sub i64 %ptr, %bias
=>
  %highbitmask = sub i64 0, %alignment
  %r = and i64 %ptr, %highbitmask

The main motivational pattern involes pointer-typed values,
so this transform can't really be done in middle-end.

See
  https://bugs.llvm.org/show_bug.cgi?id=44448
  https://reviews.llvm.org/D71499
---
 llvm/test/CodeGen/AArch64/align-down.ll | 153 +++++++++++++
 llvm/test/CodeGen/X86/align-down.ll     | 271 ++++++++++++++++++++++++
 2 files changed, 424 insertions(+)
 create mode 100644 llvm/test/CodeGen/AArch64/align-down.ll
 create mode 100644 llvm/test/CodeGen/X86/align-down.ll

diff --git a/llvm/test/CodeGen/AArch64/align-down.ll b/llvm/test/CodeGen/AArch64/align-down.ll
new file mode 100644
index 000000000000..23ff194908cb
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/align-down.ll
@@ -0,0 +1,153 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s
+
+; Fold
+;   ptr - (ptr & (alignment-1))
+; To
+;   ptr & (0 - alignment)
+;
+; This needs to be a backend-level fold because only by now pointers
+; are just registers; in middle-end IR this can only be done via @llvm.ptrmask()
+; intrinsic which is not sufficiently widely-spread yet.
+;
+; https://bugs.llvm.org/show_bug.cgi?id=44448
+
+; The basic positive tests
+
+define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind {
+; CHECK-LABEL: t0_32:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w8, w1, #1 // =1
+; CHECK-NEXT:    and w8, w0, w8
+; CHECK-NEXT:    sub w0, w0, w8
+; CHECK-NEXT:    ret
+  %mask = add i32 %alignment, -1
+  %bias = and i32 %ptr, %mask
+  %r = sub i32 %ptr, %bias
+  ret i32 %r
+}
+define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind {
+; CHECK-LABEL: t1_64:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub x8, x1, #1 // =1
+; CHECK-NEXT:    and x8, x0, x8
+; CHECK-NEXT:    sub x0, x0, x8
+; CHECK-NEXT:    ret
+  %mask = add i64 %alignment, -1
+  %bias = and i64 %ptr, %mask
+  %r = sub i64 %ptr, %bias
+  ret i64 %r
+}
+
+define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind {
+; CHECK-LABEL: t2_commutative:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w8, w1, #1 // =1
+; CHECK-NEXT:    and w8, w8, w0
+; CHECK-NEXT:    sub w0, w0, w8
+; CHECK-NEXT:    ret
+  %mask = add i32 %alignment, -1
+  %bias = and i32 %mask, %ptr ; swapped
+  %r = sub i32 %ptr, %bias
+  ret i32 %r
+}
+
+; Extra use tests
+
+define i32 @t3_extrause0(i32 %ptr, i32 %alignment, i32* %mask_storage) nounwind {
+; CHECK-LABEL: t3_extrause0:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w8, w1, #1 // =1
+; CHECK-NEXT:    and w9, w0, w8
+; CHECK-NEXT:    sub w0, w0, w9
+; CHECK-NEXT:    str w8, [x2]
+; CHECK-NEXT:    ret
+  %mask = add i32 %alignment, -1
+  store i32 %mask, i32* %mask_storage
+  %bias = and i32 %ptr, %mask
+  %r = sub i32 %ptr, %bias
+  ret i32 %r
+}
+define i32 @n4_extrause1(i32 %ptr, i32 %alignment, i32* %bias_storage) nounwind {
+; CHECK-LABEL: n4_extrause1:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w8, w1, #1 // =1
+; CHECK-NEXT:    and w8, w0, w8
+; CHECK-NEXT:    sub w0, w0, w8
+; CHECK-NEXT:    str w8, [x2]
+; CHECK-NEXT:    ret
+  %mask = add i32 %alignment, -1
+  %bias = and i32 %ptr, %mask ; has extra uses, can't fold
+  store i32 %bias, i32* %bias_storage
+  %r = sub i32 %ptr, %bias
+  ret i32 %r
+}
+define i32 @n5_extrause2(i32 %ptr, i32 %alignment, i32* %mask_storage, i32* %bias_storage) nounwind {
+; CHECK-LABEL: n5_extrause2:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w8, w1, #1 // =1
+; CHECK-NEXT:    str w8, [x2]
+; CHECK-NEXT:    and w8, w0, w8
+; CHECK-NEXT:    sub w0, w0, w8
+; CHECK-NEXT:    str w8, [x3]
+; CHECK-NEXT:    ret
+  %mask = add i32 %alignment, -1
+  store i32 %mask, i32* %mask_storage
+  %bias = and i32 %ptr, %mask ; has extra uses, can't fold
+  store i32 %bias, i32* %bias_storage
+  %r = sub i32 %ptr, %bias
+  ret i32 %r
+}
+
+; Negative tests
+
+define i32 @n6_different_ptrs(i32 %ptr0, i32 %ptr1, i32 %alignment) nounwind {
+; CHECK-LABEL: n6_different_ptrs:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w8, w2, #1 // =1
+; CHECK-NEXT:    and w8, w1, w8
+; CHECK-NEXT:    sub w0, w0, w8
+; CHECK-NEXT:    ret
+  %mask = add i32 %alignment, -1
+  %bias = and i32 %ptr1, %mask ; not %ptr0
+  %r = sub i32 %ptr0, %bias ; not %ptr1
+  ret i32 %r
+}
+define i32 @n7_different_ptrs_commutative(i32 %ptr0, i32 %ptr1, i32 %alignment) nounwind {
+; CHECK-LABEL: n7_different_ptrs_commutative:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w8, w2, #1 // =1
+; CHECK-NEXT:    and w8, w8, w1
+; CHECK-NEXT:    sub w0, w0, w8
+; CHECK-NEXT:    ret
+  %mask = add i32 %alignment, -1
+  %bias = and i32 %mask, %ptr1 ; swapped, not %ptr0
+  %r = sub i32 %ptr0, %bias ; not %ptr1
+  ret i32 %r
+}
+
+define i32 @n8_not_lowbit_mask(i32 %ptr, i32 %alignment) nounwind {
+; CHECK-LABEL: n8_not_lowbit_mask:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    add w8, w1, #1 // =1
+; CHECK-NEXT:    and w8, w0, w8
+; CHECK-NEXT:    sub w0, w0, w8
+; CHECK-NEXT:    ret
+  %mask = add i32 %alignment, 1 ; not -1
+  %bias = and i32 %ptr, %mask
+  %r = sub i32 %ptr, %bias
+  ret i32 %r
+}
+
+define i32 @n9_sub_is_not_commutative(i32 %ptr, i32 %alignment) nounwind {
+; CHECK-LABEL: n9_sub_is_not_commutative:
+; CHECK:       // %bb.0:
+; CHECK-NEXT:    sub w8, w1, #1 // =1
+; CHECK-NEXT:    and w8, w0, w8
+; CHECK-NEXT:    sub w0, w8, w0
+; CHECK-NEXT:    ret
+  %mask = add i32 %alignment, -1
+  %bias = and i32 %ptr, %mask
+  %r = sub i32 %bias, %ptr ; wrong order
+  ret i32 %r
+}
diff --git a/llvm/test/CodeGen/X86/align-down.ll b/llvm/test/CodeGen/X86/align-down.ll
new file mode 100644
index 000000000000..261740fee5f8
--- /dev/null
+++ b/llvm/test/CodeGen/X86/align-down.ll
@@ -0,0 +1,271 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X86
+; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X64
+
+; Fold
+;   ptr - (ptr & (alignment-1))
+; To
+;   ptr & (0 - alignment)
+;
+; This needs to be a backend-level fold because only by now pointers
+; are just registers; in middle-end IR this can only be done via @llvm.ptrmask()
+; intrinsic which is not sufficiently widely-spread yet.
+;
+; https://bugs.llvm.org/show_bug.cgi?id=44448
+
+; The basic positive tests
+
+define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind {
+; X86-LABEL: t0_32:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    decl %ecx
+; X86-NEXT:    andl %eax, %ecx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: t0_32:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    decl %esi
+; X64-NEXT:    andl %edi, %esi
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    retq
+  %mask = add i32 %alignment, -1
+  %bias = and i32 %ptr, %mask
+  %r = sub i32 %ptr, %bias
+  ret i32 %r
+}
+define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind {
+; X86-LABEL: t1_64:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    addl $-1, %ecx
+; X86-NEXT:    adcl $-1, %esi
+; X86-NEXT:    andl %edx, %esi
+; X86-NEXT:    andl %eax, %ecx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    sbbl %esi, %edx
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: t1_64:
+; X64:       # %bb.0:
+; X64-NEXT:    movq %rdi, %rax
+; X64-NEXT:    decq %rsi
+; X64-NEXT:    andq %rdi, %rsi
+; X64-NEXT:    subq %rsi, %rax
+; X64-NEXT:    retq
+  %mask = add i64 %alignment, -1
+  %bias = and i64 %ptr, %mask
+  %r = sub i64 %ptr, %bias
+  ret i64 %r
+}
+
+define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind {
+; X86-LABEL: t2_commutative:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    decl %ecx
+; X86-NEXT:    andl %eax, %ecx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: t2_commutative:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    decl %esi
+; X64-NEXT:    andl %edi, %esi
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    retq
+  %mask = add i32 %alignment, -1
+  %bias = and i32 %mask, %ptr ; swapped
+  %r = sub i32 %ptr, %bias
+  ret i32 %r
+}
+
+; Extra use tests
+
+define i32 @t3_extrause0(i32 %ptr, i32 %alignment, i32* %mask_storage) nounwind {
+; X86-LABEL: t3_extrause0:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    decl %edx
+; X86-NEXT:    movl %edx, (%ecx)
+; X86-NEXT:    andl %eax, %edx
+; X86-NEXT:    subl %edx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: t3_extrause0:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    decl %esi
+; X64-NEXT:    movl %esi, (%rdx)
+; X64-NEXT:    andl %edi, %esi
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    retq
+  %mask = add i32 %alignment, -1
+  store i32 %mask, i32* %mask_storage
+  %bias = and i32 %ptr, %mask
+  %r = sub i32 %ptr, %bias
+  ret i32 %r
+}
+define i32 @n4_extrause1(i32 %ptr, i32 %alignment, i32* %bias_storage) nounwind {
+; X86-LABEL: n4_extrause1:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    decl %edx
+; X86-NEXT:    andl %eax, %edx
+; X86-NEXT:    movl %edx, (%ecx)
+; X86-NEXT:    subl %edx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: n4_extrause1:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    decl %esi
+; X64-NEXT:    andl %edi, %esi
+; X64-NEXT:    movl %esi, (%rdx)
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    retq
+  %mask = add i32 %alignment, -1
+  %bias = and i32 %ptr, %mask ; has extra uses, can't fold
+  store i32 %bias, i32* %bias_storage
+  %r = sub i32 %ptr, %bias
+  ret i32 %r
+}
+define i32 @n5_extrause2(i32 %ptr, i32 %alignment, i32* %mask_storage, i32* %bias_storage) nounwind {
+; X86-LABEL: n5_extrause2:
+; X86:       # %bb.0:
+; X86-NEXT:    pushl %esi
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
+; X86-NEXT:    decl %esi
+; X86-NEXT:    movl %esi, (%edx)
+; X86-NEXT:    andl %eax, %esi
+; X86-NEXT:    movl %esi, (%ecx)
+; X86-NEXT:    subl %esi, %eax
+; X86-NEXT:    popl %esi
+; X86-NEXT:    retl
+;
+; X64-LABEL: n5_extrause2:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    decl %esi
+; X64-NEXT:    movl %esi, (%rdx)
+; X64-NEXT:    andl %edi, %esi
+; X64-NEXT:    movl %esi, (%rcx)
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    retq
+  %mask = add i32 %alignment, -1
+  store i32 %mask, i32* %mask_storage
+  %bias = and i32 %ptr, %mask ; has extra uses, can't fold
+  store i32 %bias, i32* %bias_storage
+  %r = sub i32 %ptr, %bias
+  ret i32 %r
+}
+
+; Negative tests
+
+define i32 @n6_different_ptrs(i32 %ptr0, i32 %ptr1, i32 %alignment) nounwind {
+; X86-LABEL: n6_different_ptrs:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    decl %ecx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: n6_different_ptrs:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    decl %edx
+; X64-NEXT:    andl %esi, %edx
+; X64-NEXT:    subl %edx, %eax
+; X64-NEXT:    retq
+  %mask = add i32 %alignment, -1
+  %bias = and i32 %ptr1, %mask ; not %ptr0
+  %r = sub i32 %ptr0, %bias ; not %ptr1
+  ret i32 %r
+}
+define i32 @n7_different_ptrs_commutative(i32 %ptr0, i32 %ptr1, i32 %alignment) nounwind {
+; X86-LABEL: n7_different_ptrs_commutative:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    decl %ecx
+; X86-NEXT:    andl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: n7_different_ptrs_commutative:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    decl %edx
+; X64-NEXT:    andl %esi, %edx
+; X64-NEXT:    subl %edx, %eax
+; X64-NEXT:    retq
+  %mask = add i32 %alignment, -1
+  %bias = and i32 %mask, %ptr1 ; swapped, not %ptr0
+  %r = sub i32 %ptr0, %bias ; not %ptr1
+  ret i32 %r
+}
+
+define i32 @n8_not_lowbit_mask(i32 %ptr, i32 %alignment) nounwind {
+; X86-LABEL: n8_not_lowbit_mask:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    incl %ecx
+; X86-NEXT:    andl %eax, %ecx
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: n8_not_lowbit_mask:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %edi, %eax
+; X64-NEXT:    incl %esi
+; X64-NEXT:    andl %edi, %esi
+; X64-NEXT:    subl %esi, %eax
+; X64-NEXT:    retq
+  %mask = add i32 %alignment, 1 ; not -1
+  %bias = and i32 %ptr, %mask
+  %r = sub i32 %ptr, %bias
+  ret i32 %r
+}
+
+define i32 @n9_sub_is_not_commutative(i32 %ptr, i32 %alignment) nounwind {
+; X86-LABEL: n9_sub_is_not_commutative:
+; X86:       # %bb.0:
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
+; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
+; X86-NEXT:    decl %eax
+; X86-NEXT:    andl %ecx, %eax
+; X86-NEXT:    subl %ecx, %eax
+; X86-NEXT:    retl
+;
+; X64-LABEL: n9_sub_is_not_commutative:
+; X64:       # %bb.0:
+; X64-NEXT:    movl %esi, %eax
+; X64-NEXT:    decl %eax
+; X64-NEXT:    andl %edi, %eax
+; X64-NEXT:    subl %edi, %eax
+; X64-NEXT:    retq
+  %mask = add i32 %alignment, -1
+  %bias = and i32 %ptr, %mask
+  %r = sub i32 %bias, %ptr ; wrong order
+  ret i32 %r
+}