llvm-project/llvm/test/CodeGen/X86/load-combine.ll

734 lines
22 KiB
LLVM

; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
; RUN: llc < %s -mtriple=i686-unknown | FileCheck %s
; RUN: llc < %s -mtriple=x86_64-unknown | FileCheck %s --check-prefix=CHECK64
; i8* p;
; (i32) p[0] | ((i32) p[1] << 8) | ((i32) p[2] << 16) | ((i32) p[3] << 24)
define i32 @load_i32_by_i8(i32*) {
; CHECK-LABEL: load_i32_by_i8:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl (%eax), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8:
; CHECK64: # BB#0:
; CHECK64-NEXT: movl (%rdi), %eax
; CHECK64-NEXT: retq
%2 = bitcast i32* %0 to i8*
%3 = load i8, i8* %2, align 1
%4 = zext i8 %3 to i32
%5 = getelementptr inbounds i8, i8* %2, i32 1
%6 = load i8, i8* %5, align 1
%7 = zext i8 %6 to i32
%8 = shl nuw nsw i32 %7, 8
%9 = or i32 %8, %4
%10 = getelementptr inbounds i8, i8* %2, i32 2
%11 = load i8, i8* %10, align 1
%12 = zext i8 %11 to i32
%13 = shl nuw nsw i32 %12, 16
%14 = or i32 %9, %13
%15 = getelementptr inbounds i8, i8* %2, i32 3
%16 = load i8, i8* %15, align 1
%17 = zext i8 %16 to i32
%18 = shl nuw nsw i32 %17, 24
%19 = or i32 %14, %18
ret i32 %19
}
; i8* p;
; ((i32) p[0] << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
define i32 @load_i32_by_i8_bswap(i32*) {
; CHECK-LABEL: load_i32_by_i8_bswap:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl (%eax), %eax
; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_bswap:
; CHECK64: # BB#0:
; CHECK64-NEXT: movl (%rdi), %eax
; CHECK64-NEXT: bswapl %eax
; CHECK64-NEXT: retq
%2 = bitcast i32* %0 to i8*
%3 = load i8, i8* %2, align 1
%4 = zext i8 %3 to i32
%5 = shl nuw nsw i32 %4, 24
%6 = getelementptr inbounds i8, i8* %2, i32 1
%7 = load i8, i8* %6, align 1
%8 = zext i8 %7 to i32
%9 = shl nuw nsw i32 %8, 16
%10 = or i32 %9, %5
%11 = getelementptr inbounds i8, i8* %2, i32 2
%12 = load i8, i8* %11, align 1
%13 = zext i8 %12 to i32
%14 = shl nuw nsw i32 %13, 8
%15 = or i32 %10, %14
%16 = getelementptr inbounds i8, i8* %2, i32 3
%17 = load i8, i8* %16, align 1
%18 = zext i8 %17 to i32
%19 = or i32 %15, %18
ret i32 %19
}
; i16* p;
; (i32) p[0] | ((i32) p[1] << 16)
define i32 @load_i32_by_i16(i32*) {
; CHECK-LABEL: load_i32_by_i16:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl (%eax), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i16:
; CHECK64: # BB#0:
; CHECK64-NEXT: movl (%rdi), %eax
; CHECK64-NEXT: retq
%2 = bitcast i32* %0 to i16*
%3 = load i16, i16* %2, align 1
%4 = zext i16 %3 to i32
%5 = getelementptr inbounds i16, i16* %2, i32 1
%6 = load i16, i16* %5, align 1
%7 = zext i16 %6 to i32
%8 = shl nuw nsw i32 %7, 16
%9 = or i32 %8, %4
ret i32 %9
}
; i16* p_16;
; i8* p_8 = (i8*) p_16;
; (i32) p_16[0] | ((i32) p[2] << 16) | ((i32) p[3] << 24)
define i32 @load_i32_by_i16_i8(i32*) {
; CHECK-LABEL: load_i32_by_i16_i8:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl (%eax), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i16_i8:
; CHECK64: # BB#0:
; CHECK64-NEXT: movl (%rdi), %eax
; CHECK64-NEXT: retq
%2 = bitcast i32* %0 to i16*
%3 = bitcast i32* %0 to i8*
%4 = load i16, i16* %2, align 1
%5 = zext i16 %4 to i32
%6 = getelementptr inbounds i8, i8* %3, i32 2
%7 = load i8, i8* %6, align 1
%8 = zext i8 %7 to i32
%9 = shl nuw nsw i32 %8, 16
%10 = getelementptr inbounds i8, i8* %3, i32 3
%11 = load i8, i8* %10, align 1
%12 = zext i8 %11 to i32
%13 = shl nuw nsw i32 %12, 24
%14 = or i32 %9, %13
%15 = or i32 %14, %5
ret i32 %15
}
; i8* p;
; (i32) ((i16) p[0] | ((i16) p[1] << 8)) | (((i32) ((i16) p[3] | ((i16) p[4] << 8)) << 16)
define i32 @load_i32_by_i16_by_i8(i32*) {
; CHECK-LABEL: load_i32_by_i16_by_i8:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl (%eax), %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i16_by_i8:
; CHECK64: # BB#0:
; CHECK64-NEXT: movl (%rdi), %eax
; CHECK64-NEXT: retq
%2 = bitcast i32* %0 to i8*
%3 = load i8, i8* %2, align 1
%4 = zext i8 %3 to i16
%5 = getelementptr inbounds i8, i8* %2, i32 1
%6 = load i8, i8* %5, align 1
%7 = zext i8 %6 to i16
%8 = shl nuw nsw i16 %7, 8
%9 = or i16 %8, %4
%10 = getelementptr inbounds i8, i8* %2, i32 2
%11 = load i8, i8* %10, align 1
%12 = zext i8 %11 to i16
%13 = getelementptr inbounds i8, i8* %2, i32 3
%14 = load i8, i8* %13, align 1
%15 = zext i8 %14 to i16
%16 = shl nuw nsw i16 %15, 8
%17 = or i16 %16, %12
%18 = zext i16 %9 to i32
%19 = zext i16 %17 to i32
%20 = shl nuw nsw i32 %19, 16
%21 = or i32 %20, %18
ret i32 %21
}
; i8* p;
; ((i32) (((i16) p[0] << 8) | (i16) p[1]) << 16) | (i32) (((i16) p[3] << 8) | (i16) p[4])
define i32 @load_i32_by_i16_by_i8_bswap(i32*) {
; CHECK-LABEL: load_i32_by_i16_by_i8_bswap:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl (%eax), %eax
; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i16_by_i8_bswap:
; CHECK64: # BB#0:
; CHECK64-NEXT: movl (%rdi), %eax
; CHECK64-NEXT: bswapl %eax
; CHECK64-NEXT: retq
%2 = bitcast i32* %0 to i8*
%3 = load i8, i8* %2, align 1
%4 = zext i8 %3 to i16
%5 = getelementptr inbounds i8, i8* %2, i32 1
%6 = load i8, i8* %5, align 1
%7 = zext i8 %6 to i16
%8 = shl nuw nsw i16 %4, 8
%9 = or i16 %8, %7
%10 = getelementptr inbounds i8, i8* %2, i32 2
%11 = load i8, i8* %10, align 1
%12 = zext i8 %11 to i16
%13 = getelementptr inbounds i8, i8* %2, i32 3
%14 = load i8, i8* %13, align 1
%15 = zext i8 %14 to i16
%16 = shl nuw nsw i16 %12, 8
%17 = or i16 %16, %15
%18 = zext i16 %9 to i32
%19 = zext i16 %17 to i32
%20 = shl nuw nsw i32 %18, 16
%21 = or i32 %20, %19
ret i32 %21
}
; i8* p;
; (i64) p[0] | ((i64) p[1] << 8) | ((i64) p[2] << 16) | ((i64) p[3] << 24) | ((i64) p[4] << 32) | ((i64) p[5] << 40) | ((i64) p[6] << 48) | ((i64) p[7] << 56)
define i64 @load_i64_by_i8(i64*) {
; CHECK-LABEL: load_i64_by_i8:
; CHECK: # BB#0:
; CHECK-NEXT: pushl %edi
; CHECK-NEXT: .Lcfi0:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: .Lcfi1:
; CHECK-NEXT: .cfi_def_cfa_offset 12
; CHECK-NEXT: .Lcfi2:
; CHECK-NEXT: .cfi_offset %esi, -12
; CHECK-NEXT: .Lcfi3:
; CHECK-NEXT: .cfi_offset %edi, -8
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movzbl (%ecx), %eax
; CHECK-NEXT: movzbl 1(%ecx), %edx
; CHECK-NEXT: shll $8, %edx
; CHECK-NEXT: orl %eax, %edx
; CHECK-NEXT: movzbl 2(%ecx), %esi
; CHECK-NEXT: shll $16, %esi
; CHECK-NEXT: orl %edx, %esi
; CHECK-NEXT: movzbl 3(%ecx), %eax
; CHECK-NEXT: shll $24, %eax
; CHECK-NEXT: orl %esi, %eax
; CHECK-NEXT: movzbl 4(%ecx), %edx
; CHECK-NEXT: movzbl 5(%ecx), %esi
; CHECK-NEXT: shll $8, %esi
; CHECK-NEXT: orl %edx, %esi
; CHECK-NEXT: movzbl 6(%ecx), %edi
; CHECK-NEXT: shll $16, %edi
; CHECK-NEXT: orl %esi, %edi
; CHECK-NEXT: movzbl 7(%ecx), %edx
; CHECK-NEXT: shll $24, %edx
; CHECK-NEXT: orl %edi, %edx
; CHECK-NEXT: popl %esi
; CHECK-NEXT: popl %edi
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i64_by_i8:
; CHECK64: # BB#0:
; CHECK64-NEXT: movq (%rdi), %rax
; CHECK64-NEXT: retq
%2 = bitcast i64* %0 to i8*
%3 = load i8, i8* %2, align 1
%4 = zext i8 %3 to i64
%5 = getelementptr inbounds i8, i8* %2, i64 1
%6 = load i8, i8* %5, align 1
%7 = zext i8 %6 to i64
%8 = shl nuw nsw i64 %7, 8
%9 = or i64 %8, %4
%10 = getelementptr inbounds i8, i8* %2, i64 2
%11 = load i8, i8* %10, align 1
%12 = zext i8 %11 to i64
%13 = shl nuw nsw i64 %12, 16
%14 = or i64 %9, %13
%15 = getelementptr inbounds i8, i8* %2, i64 3
%16 = load i8, i8* %15, align 1
%17 = zext i8 %16 to i64
%18 = shl nuw nsw i64 %17, 24
%19 = or i64 %14, %18
%20 = getelementptr inbounds i8, i8* %2, i64 4
%21 = load i8, i8* %20, align 1
%22 = zext i8 %21 to i64
%23 = shl nuw nsw i64 %22, 32
%24 = or i64 %19, %23
%25 = getelementptr inbounds i8, i8* %2, i64 5
%26 = load i8, i8* %25, align 1
%27 = zext i8 %26 to i64
%28 = shl nuw nsw i64 %27, 40
%29 = or i64 %24, %28
%30 = getelementptr inbounds i8, i8* %2, i64 6
%31 = load i8, i8* %30, align 1
%32 = zext i8 %31 to i64
%33 = shl nuw nsw i64 %32, 48
%34 = or i64 %29, %33
%35 = getelementptr inbounds i8, i8* %2, i64 7
%36 = load i8, i8* %35, align 1
%37 = zext i8 %36 to i64
%38 = shl nuw i64 %37, 56
%39 = or i64 %34, %38
ret i64 %39
}
; i8* p;
; ((i64) p[0] << 56) | ((i64) p[1] << 48) | ((i64) p[2] << 40) | ((i64) p[3] << 32) | ((i64) p[4] << 24) | ((i64) p[5] << 16) | ((i64) p[6] << 8) | (i64) p[7]
define i64 @load_i64_by_i8_bswap(i64*) {
; CHECK-LABEL: load_i64_by_i8_bswap:
; CHECK: # BB#0:
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: .Lcfi4:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .Lcfi5:
; CHECK-NEXT: .cfi_offset %esi, -8
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movzbl (%eax), %ecx
; CHECK-NEXT: shll $24, %ecx
; CHECK-NEXT: movzbl 1(%eax), %edx
; CHECK-NEXT: shll $16, %edx
; CHECK-NEXT: orl %ecx, %edx
; CHECK-NEXT: movzbl 2(%eax), %ecx
; CHECK-NEXT: shll $8, %ecx
; CHECK-NEXT: orl %edx, %ecx
; CHECK-NEXT: movzbl 3(%eax), %edx
; CHECK-NEXT: orl %ecx, %edx
; CHECK-NEXT: movzbl 4(%eax), %ecx
; CHECK-NEXT: shll $24, %ecx
; CHECK-NEXT: movzbl 5(%eax), %esi
; CHECK-NEXT: shll $16, %esi
; CHECK-NEXT: orl %ecx, %esi
; CHECK-NEXT: movzbl 6(%eax), %ecx
; CHECK-NEXT: shll $8, %ecx
; CHECK-NEXT: orl %esi, %ecx
; CHECK-NEXT: movzbl 7(%eax), %eax
; CHECK-NEXT: orl %ecx, %eax
; CHECK-NEXT: popl %esi
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i64_by_i8_bswap:
; CHECK64: # BB#0:
; CHECK64-NEXT: movq (%rdi), %rax
; CHECK64-NEXT: bswapq %rax
; CHECK64-NEXT: retq
%2 = bitcast i64* %0 to i8*
%3 = load i8, i8* %2, align 1
%4 = zext i8 %3 to i64
%5 = shl nuw i64 %4, 56
%6 = getelementptr inbounds i8, i8* %2, i64 1
%7 = load i8, i8* %6, align 1
%8 = zext i8 %7 to i64
%9 = shl nuw nsw i64 %8, 48
%10 = or i64 %9, %5
%11 = getelementptr inbounds i8, i8* %2, i64 2
%12 = load i8, i8* %11, align 1
%13 = zext i8 %12 to i64
%14 = shl nuw nsw i64 %13, 40
%15 = or i64 %10, %14
%16 = getelementptr inbounds i8, i8* %2, i64 3
%17 = load i8, i8* %16, align 1
%18 = zext i8 %17 to i64
%19 = shl nuw nsw i64 %18, 32
%20 = or i64 %15, %19
%21 = getelementptr inbounds i8, i8* %2, i64 4
%22 = load i8, i8* %21, align 1
%23 = zext i8 %22 to i64
%24 = shl nuw nsw i64 %23, 24
%25 = or i64 %20, %24
%26 = getelementptr inbounds i8, i8* %2, i64 5
%27 = load i8, i8* %26, align 1
%28 = zext i8 %27 to i64
%29 = shl nuw nsw i64 %28, 16
%30 = or i64 %25, %29
%31 = getelementptr inbounds i8, i8* %2, i64 6
%32 = load i8, i8* %31, align 1
%33 = zext i8 %32 to i64
%34 = shl nuw nsw i64 %33, 8
%35 = or i64 %30, %34
%36 = getelementptr inbounds i8, i8* %2, i64 7
%37 = load i8, i8* %36, align 1
%38 = zext i8 %37 to i64
%39 = or i64 %35, %38
ret i64 %39
}
; Part of the load by bytes pattern is used outside of the pattern
; i8* p;
; i32 x = (i32) p[1]
; res = ((i32) p[0] << 24) | (x << 16) | ((i32) p[2] << 8) | (i32) p[3]
; x | res
define i32 @load_i32_by_i8_bswap_uses(i32*) {
; CHECK-LABEL: load_i32_by_i8_bswap_uses:
; CHECK: # BB#0:
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: .Lcfi6:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .Lcfi7:
; CHECK-NEXT: .cfi_offset %esi, -8
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movzbl (%eax), %ecx
; CHECK-NEXT: shll $24, %ecx
; CHECK-NEXT: movzbl 1(%eax), %edx
; CHECK-NEXT: movl %edx, %esi
; CHECK-NEXT: shll $16, %esi
; CHECK-NEXT: orl %ecx, %esi
; CHECK-NEXT: movzbl 2(%eax), %ecx
; CHECK-NEXT: shll $8, %ecx
; CHECK-NEXT: orl %esi, %ecx
; CHECK-NEXT: movzbl 3(%eax), %eax
; CHECK-NEXT: orl %ecx, %eax
; CHECK-NEXT: orl %edx, %eax
; CHECK-NEXT: popl %esi
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_bswap_uses:
; CHECK64: # BB#0:
; CHECK64-NEXT: movzbl (%rdi), %eax
; CHECK64-NEXT: shll $24, %eax
; CHECK64-NEXT: movzbl 1(%rdi), %ecx
; CHECK64-NEXT: movl %ecx, %edx
; CHECK64-NEXT: shll $16, %edx
; CHECK64-NEXT: orl %eax, %edx
; CHECK64-NEXT: movzbl 2(%rdi), %esi
; CHECK64-NEXT: shll $8, %esi
; CHECK64-NEXT: orl %edx, %esi
; CHECK64-NEXT: movzbl 3(%rdi), %eax
; CHECK64-NEXT: orl %esi, %eax
; CHECK64-NEXT: orl %ecx, %eax
; CHECK64-NEXT: retq
%2 = bitcast i32* %0 to i8*
%3 = load i8, i8* %2, align 1
%4 = zext i8 %3 to i32
%5 = shl nuw nsw i32 %4, 24
%6 = getelementptr inbounds i8, i8* %2, i32 1
%7 = load i8, i8* %6, align 1
%8 = zext i8 %7 to i32
%9 = shl nuw nsw i32 %8, 16
%10 = or i32 %9, %5
%11 = getelementptr inbounds i8, i8* %2, i32 2
%12 = load i8, i8* %11, align 1
%13 = zext i8 %12 to i32
%14 = shl nuw nsw i32 %13, 8
%15 = or i32 %10, %14
%16 = getelementptr inbounds i8, i8* %2, i32 3
%17 = load i8, i8* %16, align 1
%18 = zext i8 %17 to i32
%19 = or i32 %15, %18
; Use individual part of the pattern outside of the pattern
%20 = or i32 %8, %19
ret i32 %20
}
; One of the loads is volatile
; i8* p;
; p0 = volatile *p;
; ((i32) p0 << 24) | ((i32) p[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
define i32 @load_i32_by_i8_bswap_volatile(i32*) {
; CHECK-LABEL: load_i32_by_i8_bswap_volatile:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movzbl (%eax), %ecx
; CHECK-NEXT: shll $24, %ecx
; CHECK-NEXT: movzbl 1(%eax), %edx
; CHECK-NEXT: shll $16, %edx
; CHECK-NEXT: orl %ecx, %edx
; CHECK-NEXT: movzbl 2(%eax), %ecx
; CHECK-NEXT: shll $8, %ecx
; CHECK-NEXT: orl %edx, %ecx
; CHECK-NEXT: movzbl 3(%eax), %eax
; CHECK-NEXT: orl %ecx, %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_bswap_volatile:
; CHECK64: # BB#0:
; CHECK64-NEXT: movzbl (%rdi), %eax
; CHECK64-NEXT: shll $24, %eax
; CHECK64-NEXT: movzbl 1(%rdi), %ecx
; CHECK64-NEXT: shll $16, %ecx
; CHECK64-NEXT: orl %eax, %ecx
; CHECK64-NEXT: movzbl 2(%rdi), %edx
; CHECK64-NEXT: shll $8, %edx
; CHECK64-NEXT: orl %ecx, %edx
; CHECK64-NEXT: movzbl 3(%rdi), %eax
; CHECK64-NEXT: orl %edx, %eax
; CHECK64-NEXT: retq
%2 = bitcast i32* %0 to i8*
%3 = load volatile i8, i8* %2, align 1
%4 = zext i8 %3 to i32
%5 = shl nuw nsw i32 %4, 24
%6 = getelementptr inbounds i8, i8* %2, i32 1
%7 = load i8, i8* %6, align 1
%8 = zext i8 %7 to i32
%9 = shl nuw nsw i32 %8, 16
%10 = or i32 %9, %5
%11 = getelementptr inbounds i8, i8* %2, i32 2
%12 = load i8, i8* %11, align 1
%13 = zext i8 %12 to i32
%14 = shl nuw nsw i32 %13, 8
%15 = or i32 %10, %14
%16 = getelementptr inbounds i8, i8* %2, i32 3
%17 = load i8, i8* %16, align 1
%18 = zext i8 %17 to i32
%19 = or i32 %15, %18
ret i32 %19
}
; There is a store in between individual loads
; i8* p, q;
; res1 = ((i32) p[0] << 24) | ((i32) p[1] << 16)
; *q = 0;
; res2 = ((i32) p[2] << 8) | (i32) p[3]
; res1 | res2
define i32 @load_i32_by_i8_bswap_store_in_between(i32*, i32*) {
; CHECK-LABEL: load_i32_by_i8_bswap_store_in_between:
; CHECK: # BB#0:
; CHECK-NEXT: pushl %esi
; CHECK-NEXT: .Lcfi8:
; CHECK-NEXT: .cfi_def_cfa_offset 8
; CHECK-NEXT: .Lcfi9:
; CHECK-NEXT: .cfi_offset %esi, -8
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movzbl (%ecx), %edx
; CHECK-NEXT: shll $24, %edx
; CHECK-NEXT: movzbl 1(%ecx), %esi
; CHECK-NEXT: movl $0, (%eax)
; CHECK-NEXT: shll $16, %esi
; CHECK-NEXT: orl %edx, %esi
; CHECK-NEXT: movzbl 2(%ecx), %edx
; CHECK-NEXT: shll $8, %edx
; CHECK-NEXT: orl %esi, %edx
; CHECK-NEXT: movzbl 3(%ecx), %eax
; CHECK-NEXT: orl %edx, %eax
; CHECK-NEXT: popl %esi
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_bswap_store_in_between:
; CHECK64: # BB#0:
; CHECK64-NEXT: movzbl (%rdi), %eax
; CHECK64-NEXT: shll $24, %eax
; CHECK64-NEXT: movzbl 1(%rdi), %ecx
; CHECK64-NEXT: movl $0, (%rsi)
; CHECK64-NEXT: shll $16, %ecx
; CHECK64-NEXT: orl %eax, %ecx
; CHECK64-NEXT: movzbl 2(%rdi), %edx
; CHECK64-NEXT: shll $8, %edx
; CHECK64-NEXT: orl %ecx, %edx
; CHECK64-NEXT: movzbl 3(%rdi), %eax
; CHECK64-NEXT: orl %edx, %eax
; CHECK64-NEXT: retq
%3 = bitcast i32* %0 to i8*
%4 = load i8, i8* %3, align 1
%5 = zext i8 %4 to i32
%6 = shl nuw nsw i32 %5, 24
%7 = getelementptr inbounds i8, i8* %3, i32 1
%8 = load i8, i8* %7, align 1
; This store will prevent folding of the pattern
store i32 0, i32* %1
%9 = zext i8 %8 to i32
%10 = shl nuw nsw i32 %9, 16
%11 = or i32 %10, %6
%12 = getelementptr inbounds i8, i8* %3, i32 2
%13 = load i8, i8* %12, align 1
%14 = zext i8 %13 to i32
%15 = shl nuw nsw i32 %14, 8
%16 = or i32 %11, %15
%17 = getelementptr inbounds i8, i8* %3, i32 3
%18 = load i8, i8* %17, align 1
%19 = zext i8 %18 to i32
%20 = or i32 %16, %19
ret i32 %20
}
; One of the loads is from an unrelated location
; i8* p, q;
; ((i32) p[0] << 24) | ((i32) q[1] << 16) | ((i32) p[2] << 8) | (i32) p[3]
define i32 @load_i32_by_i8_bswap_unrelated_load(i32*, i32*) {
; CHECK-LABEL: load_i32_by_i8_bswap_unrelated_load:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movzbl (%ecx), %edx
; CHECK-NEXT: shll $24, %edx
; CHECK-NEXT: movzbl 1(%eax), %eax
; CHECK-NEXT: shll $16, %eax
; CHECK-NEXT: orl %edx, %eax
; CHECK-NEXT: movzbl 2(%ecx), %edx
; CHECK-NEXT: shll $8, %edx
; CHECK-NEXT: orl %eax, %edx
; CHECK-NEXT: movzbl 3(%ecx), %eax
; CHECK-NEXT: orl %edx, %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_bswap_unrelated_load:
; CHECK64: # BB#0:
; CHECK64-NEXT: movzbl (%rdi), %eax
; CHECK64-NEXT: shll $24, %eax
; CHECK64-NEXT: movzbl 1(%rsi), %ecx
; CHECK64-NEXT: shll $16, %ecx
; CHECK64-NEXT: orl %eax, %ecx
; CHECK64-NEXT: movzbl 2(%rdi), %edx
; CHECK64-NEXT: shll $8, %edx
; CHECK64-NEXT: orl %ecx, %edx
; CHECK64-NEXT: movzbl 3(%rdi), %eax
; CHECK64-NEXT: orl %edx, %eax
; CHECK64-NEXT: retq
%3 = bitcast i32* %0 to i8*
%4 = bitcast i32* %1 to i8*
%5 = load i8, i8* %3, align 1
%6 = zext i8 %5 to i32
%7 = shl nuw nsw i32 %6, 24
; Load from an unrelated address
%8 = getelementptr inbounds i8, i8* %4, i32 1
%9 = load i8, i8* %8, align 1
%10 = zext i8 %9 to i32
%11 = shl nuw nsw i32 %10, 16
%12 = or i32 %11, %7
%13 = getelementptr inbounds i8, i8* %3, i32 2
%14 = load i8, i8* %13, align 1
%15 = zext i8 %14 to i32
%16 = shl nuw nsw i32 %15, 8
%17 = or i32 %12, %16
%18 = getelementptr inbounds i8, i8* %3, i32 3
%19 = load i8, i8* %18, align 1
%20 = zext i8 %19 to i32
%21 = or i32 %17, %20
ret i32 %21
}
; Non-zero offsets are not supported for now
; i8* p;
; (i32) p[1] | ((i32) p[2] << 8) | ((i32) p[3] << 16) | ((i32) p[4] << 24)
define i32 @load_i32_by_i8_unsupported_offset(i32*) {
; CHECK-LABEL: load_i32_by_i8_unsupported_offset:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movzbl (%eax), %ecx
; CHECK-NEXT: movzbl 2(%eax), %edx
; CHECK-NEXT: shll $8, %edx
; CHECK-NEXT: orl %ecx, %edx
; CHECK-NEXT: movzbl 3(%eax), %ecx
; CHECK-NEXT: shll $16, %ecx
; CHECK-NEXT: orl %edx, %ecx
; CHECK-NEXT: movzbl 4(%eax), %eax
; CHECK-NEXT: shll $24, %eax
; CHECK-NEXT: orl %ecx, %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_unsupported_offset:
; CHECK64: # BB#0:
; CHECK64-NEXT: movzbl (%rdi), %eax
; CHECK64-NEXT: movzbl 2(%rdi), %ecx
; CHECK64-NEXT: shll $8, %ecx
; CHECK64-NEXT: orl %eax, %ecx
; CHECK64-NEXT: movzbl 3(%rdi), %edx
; CHECK64-NEXT: shll $16, %edx
; CHECK64-NEXT: orl %ecx, %edx
; CHECK64-NEXT: movzbl 4(%rdi), %eax
; CHECK64-NEXT: shll $24, %eax
; CHECK64-NEXT: orl %edx, %eax
; CHECK64-NEXT: retq
%2 = bitcast i32* %0 to i8*
%3 = getelementptr inbounds i8, i8* %2, i32 1
%4 = load i8, i8* %2, align 1
%5 = zext i8 %4 to i32
%6 = getelementptr inbounds i8, i8* %2, i32 2
%7 = load i8, i8* %6, align 1
%8 = zext i8 %7 to i32
%9 = shl nuw nsw i32 %8, 8
%10 = or i32 %9, %5
%11 = getelementptr inbounds i8, i8* %2, i32 3
%12 = load i8, i8* %11, align 1
%13 = zext i8 %12 to i32
%14 = shl nuw nsw i32 %13, 16
%15 = or i32 %10, %14
%16 = getelementptr inbounds i8, i8* %2, i32 4
%17 = load i8, i8* %16, align 1
%18 = zext i8 %17 to i32
%19 = shl nuw nsw i32 %18, 24
%20 = or i32 %15, %19
ret i32 %20
}
; i8* p; i32 i;
; ((i32) p[i] << 24) | ((i32) p[i + 1] << 16) | ((i32) p[i + 2] << 8) | (i32) p[i + 3]
define i32 @load_i32_by_i8_bswap_base_index_offset(i32*, i32) {
; CHECK-LABEL: load_i32_by_i8_bswap_base_index_offset:
; CHECK: # BB#0:
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %eax
; CHECK-NEXT: movl {{[0-9]+}}(%esp), %ecx
; CHECK-NEXT: movl (%ecx,%eax), %eax
; CHECK-NEXT: bswapl %eax
; CHECK-NEXT: retl
;
; CHECK64-LABEL: load_i32_by_i8_bswap_base_index_offset:
; CHECK64: # BB#0:
; CHECK64-NEXT: movslq %esi, %rax
; CHECK64-NEXT: movzbl (%rdi,%rax), %ecx
; CHECK64-NEXT: shll $24, %ecx
; CHECK64-NEXT: movzbl 1(%rdi,%rax), %edx
; CHECK64-NEXT: shll $16, %edx
; CHECK64-NEXT: orl %ecx, %edx
; CHECK64-NEXT: movzbl 2(%rdi,%rax), %ecx
; CHECK64-NEXT: shll $8, %ecx
; CHECK64-NEXT: orl %edx, %ecx
; CHECK64-NEXT: movzbl 3(%rdi,%rax), %eax
; CHECK64-NEXT: orl %ecx, %eax
; CHECK64-NEXT: retq
; Currently we don't fold the pattern for x86-64 target because we don't see
; that the loads are adjacent. It happens because BaseIndexOffset doesn't look
; through zexts.
%3 = bitcast i32* %0 to i8*
%4 = getelementptr inbounds i8, i8* %3, i32 %1
%5 = load i8, i8* %4, align 1
%6 = zext i8 %5 to i32
%7 = shl nuw nsw i32 %6, 24
%8 = add nuw nsw i32 %1, 1
%9 = getelementptr inbounds i8, i8* %3, i32 %8
%10 = load i8, i8* %9, align 1
%11 = zext i8 %10 to i32
%12 = shl nuw nsw i32 %11, 16
%13 = or i32 %12, %7
%14 = add nuw nsw i32 %1, 2
%15 = getelementptr inbounds i8, i8* %3, i32 %14
%16 = load i8, i8* %15, align 1
%17 = zext i8 %16 to i32
%18 = shl nuw nsw i32 %17, 8
%19 = or i32 %13, %18
%20 = add nuw nsw i32 %1, 3
%21 = getelementptr inbounds i8, i8* %3, i32 %20
%22 = load i8, i8* %21, align 1
%23 = zext i8 %22 to i32
%24 = or i32 %19, %23
ret i32 %24
}