forked from OSchip/llvm-project
Prefix the name of the calling host function in the name of callee GPU kernel
Summary: Provide more context to the name of a GPU kernel by prefixing its name with the host function that calls it. E.g. The first kernel called by `gemm` would be `FUNC_gemm_KERNEL_0`. Kernels currently follow the "kernel_#" (# = 0,1,2,3,...) nomenclature. This patch makes it easier to map host caller and device callee, especially when there are many kernels produced by Polly-ACC. Reviewers: grosser, Meinersbur, bollu, philip.pfaffe, kbarton! Reviewed By: grosser Subscribers: nemanjai, pollydev Tags: #polly Differential Revision: https://reviews.llvm.org/D33985 llvm-svn: 307173
This commit is contained in:
parent
7538b35cef
commit
79f13b9a80
|
|
@ -299,6 +299,9 @@ public:
|
|||
/// The maximal number of loops surrounding a parallel kernel.
|
||||
unsigned DeepestParallel = 0;
|
||||
|
||||
/// Return the name to set for the ptx_kernel.
|
||||
std::string getKernelFuncName(int Kernel_id);
|
||||
|
||||
private:
|
||||
/// A vector of array base pointers for which a new ScopArrayInfo was created.
|
||||
///
|
||||
|
|
@ -662,6 +665,11 @@ private:
|
|||
Value *Parameters);
|
||||
};
|
||||
|
||||
std::string GPUNodeBuilder::getKernelFuncName(int Kernel_id) {
|
||||
return "FUNC_" + S.getFunction().getName().str() + "_KERNEL_" +
|
||||
std::to_string(Kernel_id);
|
||||
}
|
||||
|
||||
void GPUNodeBuilder::initializeAfterRTH() {
|
||||
BasicBlock *NewBB = SplitBlock(Builder.GetInsertBlock(),
|
||||
&*Builder.GetInsertPoint(), &DT, &LI);
|
||||
|
|
@ -1621,7 +1629,7 @@ void GPUNodeBuilder::createKernel(__isl_take isl_ast_node *KernelStmt) {
|
|||
Builder.SetInsertPoint(&HostInsertPoint);
|
||||
Value *Parameters = createLaunchParameters(Kernel, F, SubtreeValues);
|
||||
|
||||
std::string Name = "kernel_" + std::to_string(Kernel->id);
|
||||
std::string Name = getKernelFuncName(Kernel->id);
|
||||
Value *KernelString = Builder.CreateGlobalStringPtr(ASMString, Name);
|
||||
Value *NameString = Builder.CreateGlobalStringPtr(Name, Name + "_name");
|
||||
Value *GPUKernel = createCallGetKernel(KernelString, NameString);
|
||||
|
|
@ -1662,7 +1670,7 @@ Function *
|
|||
GPUNodeBuilder::createKernelFunctionDecl(ppcg_kernel *Kernel,
|
||||
SetVector<Value *> &SubtreeValues) {
|
||||
std::vector<Type *> Args;
|
||||
std::string Identifier = "kernel_" + std::to_string(Kernel->id);
|
||||
std::string Identifier = getKernelFuncName(Kernel->id);
|
||||
|
||||
for (long i = 0; i < Prog->n_array; i++) {
|
||||
if (!ppcg_kernel_requires_array_argument(Kernel, i))
|
||||
|
|
@ -1926,7 +1934,7 @@ void GPUNodeBuilder::createKernelVariables(ppcg_kernel *Kernel, Function *FN) {
|
|||
void GPUNodeBuilder::createKernelFunction(
|
||||
ppcg_kernel *Kernel, SetVector<Value *> &SubtreeValues,
|
||||
SetVector<Function *> &SubtreeFunctions) {
|
||||
std::string Identifier = "kernel_" + std::to_string(Kernel->id);
|
||||
std::string Identifier = getKernelFuncName(Kernel->id);
|
||||
GPUModule.reset(new Module(Identifier, Builder.getContext()));
|
||||
|
||||
switch (Arch) {
|
||||
|
|
|
|||
|
|
@ -4,11 +4,11 @@
|
|||
|
||||
; REQUIRES: pollyacc
|
||||
|
||||
; KERNEL: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %n) #0 {
|
||||
; KERNEL: define ptx_kernel void @FUNC_foo_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %n) #0 {
|
||||
|
||||
; KERNEL: !nvvm.annotations = !{!0}
|
||||
|
||||
; KERNEL: !0 = !{void (i8 addrspace(1)*, i64)* @kernel_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1}
|
||||
; KERNEL: !0 = !{void (i8 addrspace(1)*, i64)* @FUNC_foo_KERNEL_0, !"maxntidx", i32 32, !"maxntidy", i32 1, !"maxntidz", i32 1}
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
|
|
|
|||
|
|
@ -54,7 +54,7 @@
|
|||
; CHECK-NEXT: %22 = getelementptr [4 x i8*], [4 x i8*]* %polly_launch_0_params, i64 0, i64 3
|
||||
; CHECK-NEXT: %23 = bitcast i32* %polly_launch_0_param_size_1 to i8*
|
||||
; CHECK-NEXT: store i8* %23, i8** %22
|
||||
; CHECK-NEXT: %24 = call i8* @polly_getKernel(i8* getelementptr inbounds ([750 x i8], [750 x i8]* @kernel_0, i32 0, i32 0), i8* getelementptr inbounds ([9 x i8], [9 x i8]* @kernel_0_name, i32 0, i32 0))
|
||||
; CHECK-NEXT: %24 = call i8* @polly_getKernel(i8* getelementptr inbounds ([810 x i8], [810 x i8]* @FUNC_copy_KERNEL_0, i32 0, i32 0), i8* getelementptr inbounds ([19 x i8], [19 x i8]* @FUNC_copy_KERNEL_0_name, i32 0, i32 0))
|
||||
; CHECK-NEXT: call void @polly_launchKernel(i8* %24, i32 2, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr)
|
||||
; CHECK-NEXT: call void @polly_freeKernel(i8* %24)
|
||||
; CHECK-NEXT: call void @polly_synchronizeDevice()
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@
|
|||
; IR-NEXT: %polly.loop_cond = icmp sle i64 %polly.indvar_next, 99
|
||||
; IR-NEXT: br i1 %polly.loop_cond, label %polly.loop_header, label %polly.loop_exit
|
||||
|
||||
; KERNEL-IR: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %c0)
|
||||
; KERNEL-IR: define ptx_kernel void @FUNC_foo_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %c0)
|
||||
; KERNEL-IR-LABEL: entry:
|
||||
; KERNEL-IR-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
; KERNEL-IR-NEXT: %b0 = zext i32 %0 to i64
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@
|
|||
; HOST-IR: call void @polly_launchKernel(i8* %215, i32 %221, i32 1, i32 32, i32 1, i32 1, i8* %polly_launch_0_params_i8ptr)
|
||||
; HOST-IR-NEXT: call void @polly_freeKernel(i8* %215)
|
||||
;
|
||||
; KERNEL-IR: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_B, i8 addrspace(1)* %MemRef_A, i32 %n, i32 %tmp12) #0 {
|
||||
; KERNEL-IR: define ptx_kernel void @FUNC_f_KERNEL_0(i8 addrspace(1)* %MemRef_B, i8 addrspace(1)* %MemRef_A, i32 %n, i32 %tmp12) #0 {
|
||||
;
|
||||
; Check that we generate correct GPU code in case of invariant load hoisting.
|
||||
;
|
||||
|
|
|
|||
|
|
@ -16,12 +16,12 @@
|
|||
; B[i] += 42;
|
||||
; }
|
||||
|
||||
; KERNEL: ; ModuleID = 'kernel_0'
|
||||
; KERNEL-NEXT: source_filename = "kernel_0"
|
||||
; KERNEL: ; ModuleID = 'FUNC_kernel_params_only_some_arrays_KERNEL_0'
|
||||
; KERNEL-NEXT: source_filename = "FUNC_kernel_params_only_some_arrays_KERNEL_0"
|
||||
; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
; KERNEL: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A)
|
||||
; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_KERNEL_0(i8 addrspace(1)* %MemRef_A)
|
||||
; KERNEL-NEXT: entry:
|
||||
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
|
||||
|
|
@ -31,12 +31,12 @@
|
|||
; KERNEL: ret void
|
||||
; KERNEL-NEXT: }
|
||||
|
||||
; KERNEL: ; ModuleID = 'kernel_1'
|
||||
; KERNEL-NEXT: source_filename = "kernel_1"
|
||||
; KERNEL: ; ModuleID = 'FUNC_kernel_params_only_some_arrays_KERNEL_1'
|
||||
; KERNEL-NEXT: source_filename = "FUNC_kernel_params_only_some_arrays_KERNEL_1"
|
||||
; KERNEL-NEXT: target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64"
|
||||
; KERNEL-NEXT: target triple = "nvptx64-nvidia-cuda"
|
||||
|
||||
; KERNEL: define ptx_kernel void @kernel_1(i8 addrspace(1)* %MemRef_B)
|
||||
; KERNEL: define ptx_kernel void @FUNC_kernel_params_only_some_arrays_KERNEL_1(i8 addrspace(1)* %MemRef_B)
|
||||
; KERNEL-NEXT: entry:
|
||||
; KERNEL-NEXT: %0 = call i32 @llvm.nvvm.read.ptx.sreg.ctaid.x()
|
||||
; KERNEL-NEXT: %b0 = zext i32 %0 to i64
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@
|
|||
; A[i] += 42;
|
||||
; }
|
||||
|
||||
; KERNEL-IR: define ptx_kernel void @kernel_0(i8 addrspace(1)* %MemRef_A, i64 %n)
|
||||
; KERNEL-IR: define ptx_kernel void @FUNC_kernel_params_scop_parameter_KERNEL_0(i8 addrspace(1)* %MemRef_A, i64 %n)
|
||||
|
||||
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue