210 lines
		
	
	
		
			8.9 KiB
		
	
	
	
		
			LLVM
		
	
	
	
			
		
		
	
	
			210 lines
		
	
	
		
			8.9 KiB
		
	
	
	
		
			LLVM
		
	
	
	
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 | |
| ; RUN: llc -verify-machineinstrs -stack-symbol-ordering=0 -mcpu=nehalem -debug-only=stackmaps < %s | FileCheck %s
 | |
| ; REQUIRES: asserts
 | |
| 
 | |
| target triple = "x86_64-pc-linux-gnu"
 | |
| 
 | |
| ; Can we lower a single vector?
 | |
| define <2 x i8 addrspace(1)*> @test(<2 x i8 addrspace(1)*> %obj) gc "statepoint-example" {
 | |
| ; CHECK-LABEL: test:
 | |
| ; CHECK:       # %bb.0: # %entry
 | |
| ; CHECK-NEXT:    subq $24, %rsp
 | |
| ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | |
| ; CHECK-NEXT:    movaps %xmm0, (%rsp)
 | |
| ; CHECK-NEXT:    callq do_safepoint@PLT
 | |
| ; CHECK-NEXT:  .Ltmp0:
 | |
| ; CHECK-NEXT:    movaps (%rsp), %xmm0
 | |
| ; CHECK-NEXT:    addq $24, %rsp
 | |
| ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | |
| ; CHECK-NEXT:    retq
 | |
| entry:
 | |
|   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<2 x i8 addrspace(1)*> %obj)]
 | |
|   %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 0, i32 0) ; (%obj, %obj)
 | |
|   ret <2 x i8 addrspace(1)*> %obj.relocated
 | |
| }
 | |
| 
 | |
| ; Can we lower the base, derived pairs if both are vectors?
 | |
| define <2 x i8 addrspace(1)*> @test2(<2 x i8 addrspace(1)*> %obj, i64 %offset) gc "statepoint-example" {
 | |
| ; CHECK-LABEL: test2:
 | |
| ; CHECK:       # %bb.0: # %entry
 | |
| ; CHECK-NEXT:    subq $40, %rsp
 | |
| ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | |
| ; CHECK-NEXT:    movq %rdi, %xmm1
 | |
| ; CHECK-NEXT:    pshufd {{.*#+}} xmm1 = xmm1[0,1,0,1]
 | |
| ; CHECK-NEXT:    paddq %xmm0, %xmm1
 | |
| ; CHECK-NEXT:    movdqa %xmm0, (%rsp)
 | |
| ; CHECK-NEXT:    movdqa %xmm1, {{[0-9]+}}(%rsp)
 | |
| ; CHECK-NEXT:    callq do_safepoint@PLT
 | |
| ; CHECK-NEXT:  .Ltmp1:
 | |
| ; CHECK-NEXT:    movaps {{[0-9]+}}(%rsp), %xmm0
 | |
| ; CHECK-NEXT:    addq $40, %rsp
 | |
| ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | |
| ; CHECK-NEXT:    retq
 | |
| entry:
 | |
|   %derived = getelementptr i8, <2 x i8 addrspace(1)*> %obj, i64 %offset
 | |
|   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<2 x i8 addrspace(1)*> %obj, <2 x i8 addrspace(1)*> %derived)]
 | |
|   %derived.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 0, i32 1) ; (%obj, %derived)
 | |
|   ret <2 x i8 addrspace(1)*> %derived.relocated
 | |
| }
 | |
| 
 | |
| ; Originally, this was just a variant of @test2 above, but it ends up
 | |
| ; covering a bunch of interesting missed optimizations.  Specifically:
 | |
| ; - We waste a stack slot for a value that a backend transform pass
 | |
| ;   CSEd to another spilled one.
 | |
| ; - We don't remove the testb even though it serves no purpose
 | |
| ; - We could in principal reuse the argument memory (%rsi) and do away
 | |
| ;   with stack slots entirely.
 | |
| define <2 x i64 addrspace(1)*> @test3(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
 | |
| ; CHECK-LABEL: test3:
 | |
| ; CHECK:       # %bb.0: # %entry
 | |
| ; CHECK-NEXT:    subq $40, %rsp
 | |
| ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | |
| ; CHECK-NEXT:    testb $1, %dil
 | |
| ; CHECK-NEXT:    movaps (%rsi), %xmm0
 | |
| ; CHECK-NEXT:    movaps %xmm0, (%rsp)
 | |
| ; CHECK-NEXT:    movaps %xmm0, {{[0-9]+}}(%rsp)
 | |
| ; CHECK-NEXT:    callq do_safepoint@PLT
 | |
| ; CHECK-NEXT:  .Ltmp2:
 | |
| ; CHECK-NEXT:    movaps (%rsp), %xmm0
 | |
| ; CHECK-NEXT:    addq $40, %rsp
 | |
| ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | |
| ; CHECK-NEXT:    retq
 | |
| entry:
 | |
|   br i1 %cnd, label %taken, label %untaken
 | |
| 
 | |
| taken:                                            ; preds = %entry
 | |
|   %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
 | |
|   br label %merge
 | |
| 
 | |
| untaken:                                          ; preds = %entry
 | |
|   %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
 | |
|   br label %merge
 | |
| 
 | |
| merge:                                            ; preds = %untaken, %taken
 | |
|   %obj.base = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
 | |
|   %obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
 | |
|   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<2 x i64 addrspace(1)*> %obj, <2 x i64 addrspace(1)*> %obj.base)]
 | |
|   %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 1, i32 0) ; (%obj.base, %obj)
 | |
|   %obj.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.relocated to <2 x i64 addrspace(1)*>
 | |
|   %obj.base.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 1, i32 1) ; (%obj.base, %obj.base)
 | |
|   %obj.base.relocated.casted = bitcast <2 x i8 addrspace(1)*> %obj.base.relocated to <2 x i64 addrspace(1)*>
 | |
|   ret <2 x i64 addrspace(1)*> %obj.relocated.casted
 | |
| }
 | |
| 
 | |
| ; Can we handle vector constants?  At the moment, we don't appear to actually
 | |
| ; get selection dag nodes for these.
 | |
| define <2 x i8 addrspace(1)*> @test4() gc "statepoint-example" {
 | |
| ; CHECK-LABEL: test4:
 | |
| ; CHECK:       # %bb.0: # %entry
 | |
| ; CHECK-NEXT:    subq $24, %rsp
 | |
| ; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | |
| ; CHECK-NEXT:    xorps %xmm0, %xmm0
 | |
| ; CHECK-NEXT:    movaps %xmm0, (%rsp)
 | |
| ; CHECK-NEXT:    callq do_safepoint@PLT
 | |
| ; CHECK-NEXT:  .Ltmp3:
 | |
| ; CHECK-NEXT:    movaps (%rsp), %xmm0
 | |
| ; CHECK-NEXT:    addq $24, %rsp
 | |
| ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | |
| ; CHECK-NEXT:    retq
 | |
| entry:
 | |
|   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["gc-live" (<2 x i8 addrspace(1)*> zeroinitializer)]
 | |
|   %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 0, i32 0)
 | |
|   ret <2 x i8 addrspace(1)*> %obj.relocated
 | |
| }
 | |
| 
 | |
| ; Check that we can lower a constant typed as i128 correctly.  We don't have
 | |
| ; a representation of larger than 64 bit constant in the StackMap format. At
 | |
| ; the moment, this simply means spilling them, but there's a potential
 | |
| ; optimization for values representable as sext(Con64).
 | |
| define void @test5() gc "statepoint-example" {
 | |
| ; CHECK-LABEL: test5:
 | |
| ; CHECK:       # %bb.0: # %entry
 | |
| ; CHECK-NEXT:    subq $40, %rsp
 | |
| ; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | |
| ; CHECK-NEXT:    xorps %xmm0, %xmm0
 | |
| ; CHECK-NEXT:    movups %xmm0, {{[0-9]+}}(%rsp)
 | |
| ; CHECK-NEXT:    movq $-1, {{[0-9]+}}(%rsp)
 | |
| ; CHECK-NEXT:    movq $-1, {{[0-9]+}}(%rsp)
 | |
| ; CHECK-NEXT:    callq do_safepoint@PLT
 | |
| ; CHECK-NEXT:  .Ltmp4:
 | |
| ; CHECK-NEXT:    addq $40, %rsp
 | |
| ; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | |
| ; CHECK-NEXT:    retq
 | |
| entry:
 | |
|   %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* elementtype(void ()) @do_safepoint, i32 0, i32 0, i32 0, i32 0) ["deopt" (i128 0, i128 -1)]
 | |
|   ret void
 | |
| }
 | |
| 
 | |
| ; CHECK: __LLVM_StackMaps:
 | |
| 
 | |
| ; CHECK: .Ltmp0-test
 | |
| ; Check for the two spill slots
 | |
| ; Stack Maps: 		Loc 3: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
 | |
| ; Stack Maps: 		Loc 4: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
 | |
| ; CHECK: .byte	3
 | |
| ; CHECK: .byte	0
 | |
| ; CHECK: .short 16
 | |
| ; CHECK: .short	7
 | |
| ; CHECK: .short	0
 | |
| ; CHECK: .long	0
 | |
| ; CHECK: .byte	3
 | |
| ; CHECK: .byte	0
 | |
| ; CHECK: .short 16
 | |
| ; CHECK: .short	7
 | |
| ; CHECK: .short	0
 | |
| ; CHECK: .long	0
 | |
| 
 | |
| ; CHECK: .Ltmp1-test2
 | |
| ; Check for the two spill slots
 | |
| ; Stack Maps: 		Loc 3: Indirect 7+16	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
 | |
| ; Stack Maps: 		Loc 4: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
 | |
| ; CHECK: .byte	3
 | |
| ; CHECK: .byte	0
 | |
| ; CHECK: .short 16
 | |
| ; CHECK: .short	7
 | |
| ; CHECK: .short	0
 | |
| ; CHECK: .long	0
 | |
| ; CHECK: .byte	3
 | |
| ; CHECK: .byte	0
 | |
| ; CHECK: .short 16
 | |
| ; CHECK: .short	7
 | |
| ; CHECK: .short	0
 | |
| ; CHECK: .long	16
 | |
| 
 | |
| ; CHECK: .Ltmp2-test3
 | |
| ; Check for the four spill slots
 | |
| ; Stack Maps: 		Loc 3: Indirect 7+16	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
 | |
| ; Stack Maps: 		Loc 4: Indirect 7+16	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
 | |
| ; Stack Maps: 		Loc 5: Indirect 7+16	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 16]
 | |
| ; Stack Maps: 		Loc 6: Indirect 7+0	[encoding: .byte 3, .byte 0, .short 16, .short 7, .short 0, .int 0]
 | |
| ; CHECK: .byte	3
 | |
| ; CHECK: .byte	0
 | |
| ; CHECK: .short 16
 | |
| ; CHECK: .short	7
 | |
| ; CHECK: .short	0
 | |
| ; CHECK: .long	16
 | |
| ; CHECK: .byte	3
 | |
| ; CHECK: .byte	 0
 | |
| ; CHECK: .short 16
 | |
| ; CHECK: .short	7
 | |
| ; CHECK: .short	0
 | |
| ; CHECK: .long	16
 | |
| ; CHECK: .byte	3
 | |
| ; CHECK: .byte	 0
 | |
| ; CHECK: .short 16
 | |
| ; CHECK: .short	7
 | |
| ; CHECK: .short	0
 | |
| ; CHECK: .long	16
 | |
| ; CHECK: .byte	3
 | |
| ; CHECK: .byte	 0
 | |
| ; CHECK: .short 16
 | |
| ; CHECK: .short	7
 | |
| ; CHECK: .short	0
 | |
| ; CHECK: .long	0
 | |
| 
 | |
| declare void @do_safepoint()
 | |
| 
 | |
| declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
 | |
| declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
 | |
| declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32)
 |