From c0cbe3fbb7d6b3f197b05a47a2cc25ccb7977629 Mon Sep 17 00:00:00 2001 From: Roman Lebedev Date: Fri, 3 Jan 2020 12:46:52 +0300 Subject: [PATCH] [NFC][DAGCombine][X86][AArch64] Tests for 'A - (A & (B - 1))' pattern (PR44448) https://rise4fun.com/Alive/ZVdp Name: ptr - (ptr & (alignment-1)) -> ptr & (0 - alignment) %mask = add i64 %alignment, -1 %bias = and i64 %ptr, %mask %r = sub i64 %ptr, %bias => %highbitmask = sub i64 0, %alignment %r = and i64 %ptr, %highbitmask The main motivational pattern involes pointer-typed values, so this transform can't really be done in middle-end. See https://bugs.llvm.org/show_bug.cgi?id=44448 https://reviews.llvm.org/D71499 --- llvm/test/CodeGen/AArch64/align-down.ll | 153 +++++++++++++ llvm/test/CodeGen/X86/align-down.ll | 271 ++++++++++++++++++++++++ 2 files changed, 424 insertions(+) create mode 100644 llvm/test/CodeGen/AArch64/align-down.ll create mode 100644 llvm/test/CodeGen/X86/align-down.ll diff --git a/llvm/test/CodeGen/AArch64/align-down.ll b/llvm/test/CodeGen/AArch64/align-down.ll new file mode 100644 index 000000000000..23ff194908cb --- /dev/null +++ b/llvm/test/CodeGen/AArch64/align-down.ll @@ -0,0 +1,153 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=aarch64-unknown-linux-gnu < %s | FileCheck %s + +; Fold +; ptr - (ptr & (alignment-1)) +; To +; ptr & (0 - alignment) +; +; This needs to be a backend-level fold because only by now pointers +; are just registers; in middle-end IR this can only be done via @llvm.ptrmask() +; intrinsic which is not sufficiently widely-spread yet. +; +; https://bugs.llvm.org/show_bug.cgi?id=44448 + +; The basic positive tests + +define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind { +; CHECK-LABEL: t0_32: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w1, #1 // =1 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %mask = add i32 %alignment, -1 + %bias = and i32 %ptr, %mask + %r = sub i32 %ptr, %bias + ret i32 %r +} +define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind { +; CHECK-LABEL: t1_64: +; CHECK: // %bb.0: +; CHECK-NEXT: sub x8, x1, #1 // =1 +; CHECK-NEXT: and x8, x0, x8 +; CHECK-NEXT: sub x0, x0, x8 +; CHECK-NEXT: ret + %mask = add i64 %alignment, -1 + %bias = and i64 %ptr, %mask + %r = sub i64 %ptr, %bias + ret i64 %r +} + +define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind { +; CHECK-LABEL: t2_commutative: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w1, #1 // =1 +; CHECK-NEXT: and w8, w8, w0 +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %mask = add i32 %alignment, -1 + %bias = and i32 %mask, %ptr ; swapped + %r = sub i32 %ptr, %bias + ret i32 %r +} + +; Extra use tests + +define i32 @t3_extrause0(i32 %ptr, i32 %alignment, i32* %mask_storage) nounwind { +; CHECK-LABEL: t3_extrause0: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w1, #1 // =1 +; CHECK-NEXT: and w9, w0, w8 +; CHECK-NEXT: sub w0, w0, w9 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: ret + %mask = add i32 %alignment, -1 + store i32 %mask, i32* %mask_storage + %bias = and i32 %ptr, %mask + %r = sub i32 %ptr, %bias + ret i32 %r +} +define i32 @n4_extrause1(i32 %ptr, i32 %alignment, i32* %bias_storage) nounwind { +; CHECK-LABEL: n4_extrause1: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w1, #1 // =1 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: ret + %mask = add i32 %alignment, -1 + %bias = and i32 %ptr, %mask ; has extra uses, can't fold + store i32 %bias, i32* %bias_storage + %r = sub i32 %ptr, %bias + ret i32 %r +} +define i32 @n5_extrause2(i32 %ptr, i32 %alignment, i32* %mask_storage, i32* %bias_storage) nounwind { +; CHECK-LABEL: n5_extrause2: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w1, #1 // =1 +; CHECK-NEXT: str w8, [x2] +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: str w8, [x3] +; CHECK-NEXT: ret + %mask = add i32 %alignment, -1 + store i32 %mask, i32* %mask_storage + %bias = and i32 %ptr, %mask ; has extra uses, can't fold + store i32 %bias, i32* %bias_storage + %r = sub i32 %ptr, %bias + ret i32 %r +} + +; Negative tests + +define i32 @n6_different_ptrs(i32 %ptr0, i32 %ptr1, i32 %alignment) nounwind { +; CHECK-LABEL: n6_different_ptrs: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w2, #1 // =1 +; CHECK-NEXT: and w8, w1, w8 +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %mask = add i32 %alignment, -1 + %bias = and i32 %ptr1, %mask ; not %ptr0 + %r = sub i32 %ptr0, %bias ; not %ptr1 + ret i32 %r +} +define i32 @n7_different_ptrs_commutative(i32 %ptr0, i32 %ptr1, i32 %alignment) nounwind { +; CHECK-LABEL: n7_different_ptrs_commutative: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w2, #1 // =1 +; CHECK-NEXT: and w8, w8, w1 +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %mask = add i32 %alignment, -1 + %bias = and i32 %mask, %ptr1 ; swapped, not %ptr0 + %r = sub i32 %ptr0, %bias ; not %ptr1 + ret i32 %r +} + +define i32 @n8_not_lowbit_mask(i32 %ptr, i32 %alignment) nounwind { +; CHECK-LABEL: n8_not_lowbit_mask: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w1, #1 // =1 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: sub w0, w0, w8 +; CHECK-NEXT: ret + %mask = add i32 %alignment, 1 ; not -1 + %bias = and i32 %ptr, %mask + %r = sub i32 %ptr, %bias + ret i32 %r +} + +define i32 @n9_sub_is_not_commutative(i32 %ptr, i32 %alignment) nounwind { +; CHECK-LABEL: n9_sub_is_not_commutative: +; CHECK: // %bb.0: +; CHECK-NEXT: sub w8, w1, #1 // =1 +; CHECK-NEXT: and w8, w0, w8 +; CHECK-NEXT: sub w0, w8, w0 +; CHECK-NEXT: ret + %mask = add i32 %alignment, -1 + %bias = and i32 %ptr, %mask + %r = sub i32 %bias, %ptr ; wrong order + ret i32 %r +} diff --git a/llvm/test/CodeGen/X86/align-down.ll b/llvm/test/CodeGen/X86/align-down.ll new file mode 100644 index 000000000000..261740fee5f8 --- /dev/null +++ b/llvm/test/CodeGen/X86/align-down.ll @@ -0,0 +1,271 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -mtriple=i686-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X86 +; RUN: llc -mtriple=x86_64-unknown-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK,X64 + +; Fold +; ptr - (ptr & (alignment-1)) +; To +; ptr & (0 - alignment) +; +; This needs to be a backend-level fold because only by now pointers +; are just registers; in middle-end IR this can only be done via @llvm.ptrmask() +; intrinsic which is not sufficiently widely-spread yet. +; +; https://bugs.llvm.org/show_bug.cgi?id=44448 + +; The basic positive tests + +define i32 @t0_32(i32 %ptr, i32 %alignment) nounwind { +; X86-LABEL: t0_32: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: decl %ecx +; X86-NEXT: andl %eax, %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: t0_32: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: decl %esi +; X64-NEXT: andl %edi, %esi +; X64-NEXT: subl %esi, %eax +; X64-NEXT: retq + %mask = add i32 %alignment, -1 + %bias = and i32 %ptr, %mask + %r = sub i32 %ptr, %bias + ret i32 %r +} +define i64 @t1_64(i64 %ptr, i64 %alignment) nounwind { +; X86-LABEL: t1_64: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: addl $-1, %ecx +; X86-NEXT: adcl $-1, %esi +; X86-NEXT: andl %edx, %esi +; X86-NEXT: andl %eax, %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: sbbl %esi, %edx +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: t1_64: +; X64: # %bb.0: +; X64-NEXT: movq %rdi, %rax +; X64-NEXT: decq %rsi +; X64-NEXT: andq %rdi, %rsi +; X64-NEXT: subq %rsi, %rax +; X64-NEXT: retq + %mask = add i64 %alignment, -1 + %bias = and i64 %ptr, %mask + %r = sub i64 %ptr, %bias + ret i64 %r +} + +define i32 @t2_commutative(i32 %ptr, i32 %alignment) nounwind { +; X86-LABEL: t2_commutative: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: decl %ecx +; X86-NEXT: andl %eax, %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: t2_commutative: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: decl %esi +; X64-NEXT: andl %edi, %esi +; X64-NEXT: subl %esi, %eax +; X64-NEXT: retq + %mask = add i32 %alignment, -1 + %bias = and i32 %mask, %ptr ; swapped + %r = sub i32 %ptr, %bias + ret i32 %r +} + +; Extra use tests + +define i32 @t3_extrause0(i32 %ptr, i32 %alignment, i32* %mask_storage) nounwind { +; X86-LABEL: t3_extrause0: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: decl %edx +; X86-NEXT: movl %edx, (%ecx) +; X86-NEXT: andl %eax, %edx +; X86-NEXT: subl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: t3_extrause0: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: decl %esi +; X64-NEXT: movl %esi, (%rdx) +; X64-NEXT: andl %edi, %esi +; X64-NEXT: subl %esi, %eax +; X64-NEXT: retq + %mask = add i32 %alignment, -1 + store i32 %mask, i32* %mask_storage + %bias = and i32 %ptr, %mask + %r = sub i32 %ptr, %bias + ret i32 %r +} +define i32 @n4_extrause1(i32 %ptr, i32 %alignment, i32* %bias_storage) nounwind { +; X86-LABEL: n4_extrause1: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: decl %edx +; X86-NEXT: andl %eax, %edx +; X86-NEXT: movl %edx, (%ecx) +; X86-NEXT: subl %edx, %eax +; X86-NEXT: retl +; +; X64-LABEL: n4_extrause1: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: decl %esi +; X64-NEXT: andl %edi, %esi +; X64-NEXT: movl %esi, (%rdx) +; X64-NEXT: subl %esi, %eax +; X64-NEXT: retq + %mask = add i32 %alignment, -1 + %bias = and i32 %ptr, %mask ; has extra uses, can't fold + store i32 %bias, i32* %bias_storage + %r = sub i32 %ptr, %bias + ret i32 %r +} +define i32 @n5_extrause2(i32 %ptr, i32 %alignment, i32* %mask_storage, i32* %bias_storage) nounwind { +; X86-LABEL: n5_extrause2: +; X86: # %bb.0: +; X86-NEXT: pushl %esi +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %edx +; X86-NEXT: movl {{[0-9]+}}(%esp), %esi +; X86-NEXT: decl %esi +; X86-NEXT: movl %esi, (%edx) +; X86-NEXT: andl %eax, %esi +; X86-NEXT: movl %esi, (%ecx) +; X86-NEXT: subl %esi, %eax +; X86-NEXT: popl %esi +; X86-NEXT: retl +; +; X64-LABEL: n5_extrause2: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: decl %esi +; X64-NEXT: movl %esi, (%rdx) +; X64-NEXT: andl %edi, %esi +; X64-NEXT: movl %esi, (%rcx) +; X64-NEXT: subl %esi, %eax +; X64-NEXT: retq + %mask = add i32 %alignment, -1 + store i32 %mask, i32* %mask_storage + %bias = and i32 %ptr, %mask ; has extra uses, can't fold + store i32 %bias, i32* %bias_storage + %r = sub i32 %ptr, %bias + ret i32 %r +} + +; Negative tests + +define i32 @n6_different_ptrs(i32 %ptr0, i32 %ptr1, i32 %alignment) nounwind { +; X86-LABEL: n6_different_ptrs: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: decl %ecx +; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: n6_different_ptrs: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: decl %edx +; X64-NEXT: andl %esi, %edx +; X64-NEXT: subl %edx, %eax +; X64-NEXT: retq + %mask = add i32 %alignment, -1 + %bias = and i32 %ptr1, %mask ; not %ptr0 + %r = sub i32 %ptr0, %bias ; not %ptr1 + ret i32 %r +} +define i32 @n7_different_ptrs_commutative(i32 %ptr0, i32 %ptr1, i32 %alignment) nounwind { +; X86-LABEL: n7_different_ptrs_commutative: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: decl %ecx +; X86-NEXT: andl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: n7_different_ptrs_commutative: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: decl %edx +; X64-NEXT: andl %esi, %edx +; X64-NEXT: subl %edx, %eax +; X64-NEXT: retq + %mask = add i32 %alignment, -1 + %bias = and i32 %mask, %ptr1 ; swapped, not %ptr0 + %r = sub i32 %ptr0, %bias ; not %ptr1 + ret i32 %r +} + +define i32 @n8_not_lowbit_mask(i32 %ptr, i32 %alignment) nounwind { +; X86-LABEL: n8_not_lowbit_mask: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: incl %ecx +; X86-NEXT: andl %eax, %ecx +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: n8_not_lowbit_mask: +; X64: # %bb.0: +; X64-NEXT: movl %edi, %eax +; X64-NEXT: incl %esi +; X64-NEXT: andl %edi, %esi +; X64-NEXT: subl %esi, %eax +; X64-NEXT: retq + %mask = add i32 %alignment, 1 ; not -1 + %bias = and i32 %ptr, %mask + %r = sub i32 %ptr, %bias + ret i32 %r +} + +define i32 @n9_sub_is_not_commutative(i32 %ptr, i32 %alignment) nounwind { +; X86-LABEL: n9_sub_is_not_commutative: +; X86: # %bb.0: +; X86-NEXT: movl {{[0-9]+}}(%esp), %ecx +; X86-NEXT: movl {{[0-9]+}}(%esp), %eax +; X86-NEXT: decl %eax +; X86-NEXT: andl %ecx, %eax +; X86-NEXT: subl %ecx, %eax +; X86-NEXT: retl +; +; X64-LABEL: n9_sub_is_not_commutative: +; X64: # %bb.0: +; X64-NEXT: movl %esi, %eax +; X64-NEXT: decl %eax +; X64-NEXT: andl %edi, %eax +; X64-NEXT: subl %edi, %eax +; X64-NEXT: retq + %mask = add i32 %alignment, -1 + %bias = and i32 %ptr, %mask + %r = sub i32 %bias, %ptr ; wrong order + ret i32 %r +}