101 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			LLVM
		
	
	
	
			
		
		
	
	
			101 lines
		
	
	
		
			2.8 KiB
		
	
	
	
		
			LLVM
		
	
	
	
| ; RUN: llc -march=amdgcn -mcpu=gfx1030 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1030 %s
 | |
| ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs -amdgpu-s-branch-bits=7 < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX1010 %s
 | |
| 
 | |
| ; For gfx1010, overestimate the branch size in case we need to insert
 | |
| ; a nop for the buggy offset.
 | |
| 
 | |
| ; GCN-LABEL: long_forward_scc_branch_3f_offset_bug:
 | |
| ; GFX1030: s_cmp_lg_u32
 | |
| ; GFX1030-NEXT: s_cbranch_scc1  [[ENDBB:BB[0-9]+_[0-9]+]]
 | |
| 
 | |
| ; GFX1010: s_cmp_lg_u32
 | |
| ; GFX1010-NEXT: s_cbranch_scc0  [[RELAX_BB:BB[0-9]+_[0-9]+]]
 | |
| ; GFX1010: s_getpc_b64
 | |
| ; GFX1010-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, [[ENDBB:BB[0-9]+_[0-9]+]]-(BB
 | |
| ; GFX1010-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}
 | |
| ; GFX1010: [[RELAX_BB]]:
 | |
| 
 | |
| ; GCN: v_nop
 | |
| ; GCN: s_sleep
 | |
| ; GCN: s_cbranch_scc1
 | |
| 
 | |
| ; GCN: [[ENDBB]]:
 | |
| ; GCN: global_store_dword
 | |
| define amdgpu_kernel void @long_forward_scc_branch_3f_offset_bug(i32 addrspace(1)* %arg, i32 %cnd0) #0 {
 | |
| bb0:
 | |
|   %cmp0 = icmp eq i32 %cnd0, 0
 | |
|   br i1 %cmp0, label %bb2, label %bb3
 | |
| 
 | |
| bb2:
 | |
|   %val = call i32 asm sideeffect
 | |
|    "s_mov_b32 $0, 0
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64", "=s"()   ; 20 * 12 = 240
 | |
|   call void @llvm.amdgcn.s.sleep(i32 0) ; +4 = 244
 | |
|   %cmp1 = icmp eq i32 %val, 0           ; +4 = 248
 | |
|   br i1 %cmp1, label %bb2, label %bb3   ; +4 (gfx1030), +8 with workaround (gfx1010)
 | |
| 
 | |
| bb3:
 | |
|   store volatile i32 %cnd0, i32 addrspace(1)* %arg
 | |
|   ret void
 | |
| }
 | |
| 
 | |
| ; GCN-LABEL: {{^}}long_forward_exec_branch_3f_offset_bug:
 | |
| ; GFX1030: v_cmp_eq_u32
 | |
| ; GFX1030: s_and_saveexec_b32
 | |
| ; GFX1030-NEXT: s_cbranch_execnz [[RELAX_BB:BB[0-9]+_[0-9]+]]
 | |
| 
 | |
| ; GFX1010: v_cmp_eq_u32
 | |
| ; GFX1010: s_and_saveexec_b32
 | |
| ; GFX1010-NEXT: s_cbranch_execnz  [[RELAX_BB:BB[0-9]+_[0-9]+]]
 | |
| 
 | |
| ; GCN: s_getpc_b64
 | |
| ; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, [[ENDBB:BB[0-9]+_[0-9]+]]-(BB
 | |
| ; GCN-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}
 | |
| ; GCN: [[RELAX_BB]]:
 | |
| 
 | |
| ; GCN: v_nop
 | |
| ; GCN: s_sleep
 | |
| ; GCN: s_cbranch_execz
 | |
| 
 | |
| ; GCN: [[ENDBB]]:
 | |
| ; GCN: global_store_dword
 | |
| define void @long_forward_exec_branch_3f_offset_bug(i32 addrspace(1)* %arg, i32 %cnd0) #0 {
 | |
| bb0:
 | |
|   %cmp0 = icmp eq i32 %cnd0, 0
 | |
|   br i1 %cmp0, label %bb2, label %bb3
 | |
| 
 | |
| bb2:
 | |
|   %val = call i32 asm sideeffect
 | |
|    "v_mov_b32 $0, 0
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64
 | |
|     v_nop_e64", "=v"()   ; 20 * 12 = 240
 | |
|   call void @llvm.amdgcn.s.sleep(i32 0) ; +4 = 244
 | |
|   %cmp1 = icmp eq i32 %val, 0           ; +4 = 248
 | |
|   br i1 %cmp1, label %bb2, label %bb3   ; +4 (gfx1030), +8 with workaround (gfx1010)
 | |
| 
 | |
| bb3:
 | |
|   store volatile i32 %cnd0, i32 addrspace(1)* %arg
 | |
|   ret void
 | |
| }
 | |
| 
 | |
| declare void @llvm.amdgcn.s.sleep(i32 immarg)
 |