218 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			LLVM
		
	
	
	
			
		
		
	
	
			218 lines
		
	
	
		
			6.8 KiB
		
	
	
	
		
			LLVM
		
	
	
	
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 | 
						|
; RUN: llc < %s -mcpu cortex-a53 -mtriple=aarch64-eabi | FileCheck %s --check-prefix=A53
 | 
						|
 | 
						|
; PR26827 - Merge stores causes wrong dependency.
 | 
						|
%struct1 = type { %struct1*, %struct1*, i32, i32, i16, i16, void (i32, i32, i8*)*, i8* }
 | 
						|
@gv0 = internal unnamed_addr global i32 0, align 4
 | 
						|
@gv1 = internal unnamed_addr global %struct1** null, align 8
 | 
						|
 | 
						|
define void @test(%struct1* %fde, i32 %fd, void (i32, i32, i8*)* %func, i8* %arg) uwtable {
 | 
						|
;CHECK-LABEL: test
 | 
						|
; A53-LABEL: test:
 | 
						|
; A53:       // %bb.0: // %entry
 | 
						|
; A53-NEXT:    stp x30, x19, [sp, #-16]! // 16-byte Folded Spill
 | 
						|
; A53-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; A53-NEXT:    .cfi_offset w19, -8
 | 
						|
; A53-NEXT:    .cfi_offset w30, -16
 | 
						|
; A53-NEXT:    .cfi_remember_state
 | 
						|
; A53-NEXT:    movi v0.2d, #0000000000000000
 | 
						|
; A53-NEXT:    mov x8, x0
 | 
						|
; A53-NEXT:    mov x19, x8
 | 
						|
; A53-NEXT:    mov w0, w1
 | 
						|
; A53-NEXT:    mov w9, #256
 | 
						|
; A53-NEXT:    stp x2, x3, [x8, #32]
 | 
						|
; A53-NEXT:    mov x2, x8
 | 
						|
; A53-NEXT:    str q0, [x19, #16]!
 | 
						|
; A53-NEXT:    str w1, [x19]
 | 
						|
; A53-NEXT:    mov w1, #4
 | 
						|
; A53-NEXT:    str q0, [x8]
 | 
						|
; A53-NEXT:    strh w9, [x8, #24]
 | 
						|
; A53-NEXT:    str wzr, [x8, #20]
 | 
						|
; A53-NEXT:    bl fcntl
 | 
						|
; A53-NEXT:    adrp x9, gv0
 | 
						|
; A53-NEXT:    add x9, x9, :lo12:gv0
 | 
						|
; A53-NEXT:    cmp x19, x9
 | 
						|
; A53-NEXT:    b.eq .LBB0_4
 | 
						|
; A53-NEXT:  // %bb.1:
 | 
						|
; A53-NEXT:    ldr w8, [x19]
 | 
						|
; A53-NEXT:    ldr w9, [x9]
 | 
						|
; A53-NEXT:    .p2align 4, 0x0, 8
 | 
						|
; A53-NEXT:  .LBB0_2: // %while.body.i.split.ver.us
 | 
						|
; A53-NEXT:    // =>This Inner Loop Header: Depth=1
 | 
						|
; A53-NEXT:    lsl w9, w9, #1
 | 
						|
; A53-NEXT:    cmp w9, w8
 | 
						|
; A53-NEXT:    b.le .LBB0_2
 | 
						|
; A53-NEXT:  // %bb.3: // %while.end.i
 | 
						|
; A53-NEXT:    bl foo
 | 
						|
; A53-NEXT:    adrp x8, gv1
 | 
						|
; A53-NEXT:    str x0, [x8, :lo12:gv1]
 | 
						|
; A53-NEXT:    ldp x30, x19, [sp], #16 // 16-byte Folded Reload
 | 
						|
; A53-NEXT:    .cfi_def_cfa_offset 0
 | 
						|
; A53-NEXT:    .cfi_restore w19
 | 
						|
; A53-NEXT:    .cfi_restore w30
 | 
						|
; A53-NEXT:    ret
 | 
						|
; A53-NEXT:    .p2align 4, 0x0, 8
 | 
						|
; A53-NEXT:  .LBB0_4: // %while.body.i.split
 | 
						|
; A53-NEXT:    // =>This Inner Loop Header: Depth=1
 | 
						|
; A53-NEXT:    .cfi_restore_state
 | 
						|
; A53-NEXT:    b .LBB0_4
 | 
						|
entry:
 | 
						|
  %0 = bitcast %struct1* %fde to i8*
 | 
						|
  tail call void @llvm.memset.p0i8.i64(i8* align 8 %0, i8 0, i64 40, i1 false)
 | 
						|
  %state = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 4
 | 
						|
  store i16 256, i16* %state, align 8
 | 
						|
  %fd1 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 2
 | 
						|
  store i32 %fd, i32* %fd1, align 8
 | 
						|
  %force_eof = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 3
 | 
						|
  store i32 0, i32* %force_eof, align 4
 | 
						|
  %func2 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 6
 | 
						|
  store void (i32, i32, i8*)* %func, void (i32, i32, i8*)** %func2, align 8
 | 
						|
  %arg3 = getelementptr inbounds %struct1, %struct1* %fde, i64 0, i32 7
 | 
						|
  store i8* %arg, i8** %arg3, align 8
 | 
						|
  %call = tail call i32 (i32, i32, ...) @fcntl(i32 %fd, i32 4, i8* %0) #6
 | 
						|
  %1 = load i32, i32* %fd1, align 8
 | 
						|
  %cmp.i = icmp slt i32 %1, 0
 | 
						|
  br i1 %cmp.i, label %if.then.i, label %while.body.i.preheader
 | 
						|
if.then.i:
 | 
						|
  unreachable
 | 
						|
 | 
						|
while.body.i.preheader:
 | 
						|
  %2 = load i32, i32* @gv0, align 4
 | 
						|
  %3 = icmp eq i32* %fd1, @gv0
 | 
						|
  br i1 %3, label %while.body.i.split, label %while.body.i.split.ver.us.preheader
 | 
						|
 | 
						|
while.body.i.split.ver.us.preheader:
 | 
						|
  br label %while.body.i.split.ver.us
 | 
						|
 | 
						|
while.body.i.split.ver.us:
 | 
						|
  %.reg2mem21.0 = phi i32 [ %mul.i.ver.us, %while.body.i.split.ver.us ], [ %2, %while.body.i.split.ver.us.preheader ]
 | 
						|
  %mul.i.ver.us = shl nsw i32 %.reg2mem21.0, 1
 | 
						|
  %4 = icmp sgt i32 %mul.i.ver.us, %1
 | 
						|
  br i1 %4, label %while.end.i, label %while.body.i.split.ver.us
 | 
						|
 | 
						|
while.body.i.split:
 | 
						|
  br label %while.body.i.split
 | 
						|
 | 
						|
while.end.i:
 | 
						|
  %call.i = tail call i8* @foo()
 | 
						|
  store i8* %call.i, i8** bitcast (%struct1*** @gv1 to i8**), align 8
 | 
						|
  br label %exit
 | 
						|
 | 
						|
exit:
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; TODO: rev16?
 | 
						|
 | 
						|
define void @rotate16_in_place(i8* %p) {
 | 
						|
; A53-LABEL: rotate16_in_place:
 | 
						|
; A53:       // %bb.0:
 | 
						|
; A53-NEXT:    ldrb w8, [x0, #1]
 | 
						|
; A53-NEXT:    ldrb w9, [x0]
 | 
						|
; A53-NEXT:    strb w8, [x0]
 | 
						|
; A53-NEXT:    strb w9, [x0, #1]
 | 
						|
; A53-NEXT:    ret
 | 
						|
  %p0 = getelementptr i8, i8* %p, i64 0
 | 
						|
  %p1 = getelementptr i8, i8* %p, i64 1
 | 
						|
  %i0 = load i8, i8* %p0, align 1
 | 
						|
  %i1 = load i8, i8* %p1, align 1
 | 
						|
  store i8 %i1, i8* %p0, align 1
 | 
						|
  store i8 %i0, i8* %p1, align 1
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; TODO: rev16?
 | 
						|
 | 
						|
define void @rotate16(i8* %p, i8* %q) {
 | 
						|
; A53-LABEL: rotate16:
 | 
						|
; A53:       // %bb.0:
 | 
						|
; A53-NEXT:    ldrb w8, [x0, #1]
 | 
						|
; A53-NEXT:    ldrb w9, [x0]
 | 
						|
; A53-NEXT:    strb w8, [x1]
 | 
						|
; A53-NEXT:    strb w9, [x1, #1]
 | 
						|
; A53-NEXT:    ret
 | 
						|
  %p0 = getelementptr i8, i8* %p, i64 0
 | 
						|
  %p1 = getelementptr i8, i8* %p, i64 1
 | 
						|
  %q0 = getelementptr i8, i8* %q, i64 0
 | 
						|
  %q1 = getelementptr i8, i8* %q, i64 1
 | 
						|
  %i0 = load i8, i8* %p0, align 1
 | 
						|
  %i1 = load i8, i8* %p1, align 1
 | 
						|
  store i8 %i1, i8* %q0, align 1
 | 
						|
  store i8 %i0, i8* %q1, align 1
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
define void @rotate32_in_place(i16* %p) {
 | 
						|
; A53-LABEL: rotate32_in_place:
 | 
						|
; A53:       // %bb.0:
 | 
						|
; A53-NEXT:    ldr w8, [x0]
 | 
						|
; A53-NEXT:    ror w8, w8, #16
 | 
						|
; A53-NEXT:    str w8, [x0]
 | 
						|
; A53-NEXT:    ret
 | 
						|
  %p0 = getelementptr i16, i16* %p, i64 0
 | 
						|
  %p1 = getelementptr i16, i16* %p, i64 1
 | 
						|
  %i0 = load i16, i16* %p0, align 2
 | 
						|
  %i1 = load i16, i16* %p1, align 2
 | 
						|
  store i16 %i1, i16* %p0, align 2
 | 
						|
  store i16 %i0, i16* %p1, align 2
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
define void @rotate32(i16* %p) {
 | 
						|
; A53-LABEL: rotate32:
 | 
						|
; A53:       // %bb.0:
 | 
						|
; A53-NEXT:    ldr w8, [x0]
 | 
						|
; A53-NEXT:    ror w8, w8, #16
 | 
						|
; A53-NEXT:    str w8, [x0, #84]
 | 
						|
; A53-NEXT:    ret
 | 
						|
  %p0 = getelementptr i16, i16* %p, i64 0
 | 
						|
  %p1 = getelementptr i16, i16* %p, i64 1
 | 
						|
  %p42 = getelementptr i16, i16* %p, i64 42
 | 
						|
  %p43 = getelementptr i16, i16* %p, i64 43
 | 
						|
  %i0 = load i16, i16* %p0, align 2
 | 
						|
  %i1 = load i16, i16* %p1, align 2
 | 
						|
  store i16 %i1, i16* %p42, align 2
 | 
						|
  store i16 %i0, i16* %p43, align 2
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; Prefer paired memops over rotate.
 | 
						|
 | 
						|
define void @rotate64_in_place(i32* %p) {
 | 
						|
; A53-LABEL: rotate64_in_place:
 | 
						|
; A53:       // %bb.0:
 | 
						|
; A53-NEXT:    ldp w9, w8, [x0]
 | 
						|
; A53-NEXT:    stp w8, w9, [x0]
 | 
						|
; A53-NEXT:    ret
 | 
						|
  %p0 = getelementptr i32, i32* %p, i64 0
 | 
						|
  %p1 = getelementptr i32, i32* %p, i64 1
 | 
						|
  %i0 = load i32, i32* %p0, align 4
 | 
						|
  %i1 = load i32, i32* %p1, align 4
 | 
						|
  store i32 %i1, i32* %p0, align 4
 | 
						|
  store i32 %i0, i32* %p1, align 4
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; Prefer paired memops over rotate.
 | 
						|
 | 
						|
define void @rotate64(i32* %p) {
 | 
						|
; A53-LABEL: rotate64:
 | 
						|
; A53:       // %bb.0:
 | 
						|
; A53-NEXT:    ldp w9, w8, [x0]
 | 
						|
; A53-NEXT:    stp w8, w9, [x0, #8]
 | 
						|
; A53-NEXT:    ret
 | 
						|
  %p0 = getelementptr i32, i32* %p, i64 0
 | 
						|
  %p1 = getelementptr i32, i32* %p, i64 1
 | 
						|
  %p2 = getelementptr i32, i32* %p, i64 2
 | 
						|
  %p3 = getelementptr i32, i32* %p, i64 3
 | 
						|
  %i0 = load i32, i32* %p0, align 4
 | 
						|
  %i1 = load i32, i32* %p1, align 4
 | 
						|
  store i32 %i1, i32* %p2, align 4
 | 
						|
  store i32 %i0, i32* %p3, align 4
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i1)
 | 
						|
declare i32 @fcntl(i32, i32, ...)
 | 
						|
declare noalias i8* @foo()
 |