forked from OSchip/llvm-project
				
			R600: Set the noduplicate attribute on barrier() intrinsics
This will prevent LLVM optimization passes from creating illegal uses of the barrier() intrinsic (e.g. calling barrier() from a conditional that is not executed by all threads). llvm-svn: 193753
This commit is contained in:
		
							parent
							
								
									74e1d0a0a0
								
							
						
					
					
						commit
						d2e83929a9
					
				| 
						 | 
					@ -8,4 +8,3 @@ workitem/get_global_size.ll
 | 
				
			||||||
synchronization/barrier.cl
 | 
					synchronization/barrier.cl
 | 
				
			||||||
synchronization/barrier_impl.ll
 | 
					synchronization/barrier_impl.ll
 | 
				
			||||||
shared/vload.cl
 | 
					shared/vload.cl
 | 
				
			||||||
shared/vstore.cl
 | 
					 | 
				
			||||||
| 
						 | 
					@ -1,15 +1,10 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include <clc/clc.h>
 | 
					#include <clc/clc.h>
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void barrier_local(void);
 | 
					_CLC_DEF int __clc_clk_local_mem_fence() {
 | 
				
			||||||
void barrier_global(void);
 | 
					  return CLK_LOCAL_MEM_FENCE;
 | 
				
			||||||
 | 
					 | 
				
			||||||
void barrier(cl_mem_fence_flags flags) {
 | 
					 | 
				
			||||||
  if (flags & CLK_LOCAL_MEM_FENCE) {
 | 
					 | 
				
			||||||
    barrier_local();
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (flags & CLK_GLOBAL_MEM_FENCE) {
 | 
					_CLC_DEF int __clc_clk_global_mem_fence() {
 | 
				
			||||||
    barrier_global();
 | 
					  return CLK_GLOBAL_MEM_FENCE;
 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,12 +1,29 @@
 | 
				
			||||||
declare void @llvm.AMDGPU.barrier.local() nounwind
 | 
					declare i32 @__clc_clk_local_mem_fence() nounwind alwaysinline
 | 
				
			||||||
declare void @llvm.AMDGPU.barrier.global() nounwind
 | 
					declare i32 @__clc_clk_global_mem_fence() nounwind alwaysinline
 | 
				
			||||||
 | 
					declare void @llvm.AMDGPU.barrier.local() nounwind noduplicate
 | 
				
			||||||
 | 
					declare void @llvm.AMDGPU.barrier.global() nounwind noduplicate
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define void @barrier_local() nounwind alwaysinline {
 | 
					define void @barrier(i32 %flags) nounwind noduplicate alwaysinline {
 | 
				
			||||||
  call void @llvm.AMDGPU.barrier.local()
 | 
					barrier_local_test:
 | 
				
			||||||
  ret void
 | 
					  %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
 | 
				
			||||||
}
 | 
					  %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE
 | 
				
			||||||
 | 
					  %1 = icmp ne i32 %0, 0
 | 
				
			||||||
define void @barrier_global() nounwind alwaysinline {
 | 
					  br i1 %1, label %barrier_local, label %barrier_global_test
 | 
				
			||||||
  call void @llvm.AMDGPU.barrier.global()
 | 
					
 | 
				
			||||||
 | 
					barrier_local:
 | 
				
			||||||
 | 
					  call void @llvm.AMDGPU.barrier.local() noduplicate
 | 
				
			||||||
 | 
					  br label %barrier_global_test
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					barrier_global_test:
 | 
				
			||||||
 | 
					  %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
 | 
				
			||||||
 | 
					  %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
 | 
				
			||||||
 | 
					  %3 = icmp ne i32 %2, 0
 | 
				
			||||||
 | 
					  br i1 %3, label %barrier_global, label %done
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					barrier_global:
 | 
				
			||||||
 | 
					  call void @llvm.AMDGPU.barrier.global() noduplicate
 | 
				
			||||||
 | 
					  br label %done
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					done:
 | 
				
			||||||
  ret void
 | 
					  ret void
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue