forked from OSchip/llvm-project
[Polly] [PPCGCodeGeneration] Skip Scops which contain function pointers.
In `PPCGCodeGeneration`, we try to take the references of every `Value` that is used within a Scop to offload to the kernel. This occurs in `GPUNodeBuilder::createLaunchParameters`. This breaks if one of the values is a function pointer, since one of these cases will trigger: 1. We try to to take the references of an intrinsic function, and this breaks at `verifyModule`, since it is illegal to take the reference of an intrinsic. 2. We manage to take the reference to a function, but this fails at `verifyModule` since the function will not be present in the module that is created in the kernel. 3. Even if `verifyModule` succeeds (which should not occur), we would then try to call a *host function* from the *device*, which is illegal runtime behaviour. So, we disable this entire range of possibilities by simply not allowing function references within a `Scop` which corresponds to a kernel. However, note that this is too conservative. We *can* allow intrinsics within kernels if the backend can lower the intrinsic correctly. For example, an intrinsic like `llvm.powi.*` can actually be lowered by the `NVPTX` backend. We will now gradually whitelist intrinsics which are known to be safe. Differential Revision: https://reviews.llvm.org/D33414 llvm-svn: 305185
This commit is contained in:
parent
b079c8b35b
commit
bccaea57c0
|
|
@ -2611,6 +2611,36 @@ public:
|
|||
return isl_ast_expr_ge(Iterations, MinComputeExpr);
|
||||
}
|
||||
|
||||
/// Check whether the Block contains any Function value.
|
||||
bool ContainsFnPtrValInBlock(const BasicBlock *BB) {
|
||||
for (const Instruction &Inst : *BB)
|
||||
for (Value *SrcVal : Inst.operands()) {
|
||||
PointerType *p = dyn_cast<PointerType>(SrcVal->getType());
|
||||
if (!p)
|
||||
continue;
|
||||
if (isa<FunctionType>(p->getElementType()))
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Return whether the Scop S has functions.
|
||||
bool ContainsFnPtr(const Scop &S) {
|
||||
for (auto &Stmt : S) {
|
||||
if (Stmt.isBlockStmt()) {
|
||||
if (ContainsFnPtrValInBlock(Stmt.getBasicBlock()))
|
||||
return true;
|
||||
} else {
|
||||
assert(Stmt.isRegionStmt() &&
|
||||
"Stmt was neither block nor region statement");
|
||||
for (const BasicBlock *BB : Stmt.getRegion()->blocks())
|
||||
if (ContainsFnPtrValInBlock(BB))
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
/// Generate code for a given GPU AST described by @p Root.
|
||||
///
|
||||
/// @param Root An isl_ast_node pointing to the root of the GPU AST.
|
||||
|
|
@ -2681,6 +2711,14 @@ public:
|
|||
if (S->hasInvariantAccesses())
|
||||
return false;
|
||||
|
||||
// We currently do not support functions inside kernels, as code
|
||||
// generation will need to offload function calls to the kernel.
|
||||
// This may lead to a kernel trying to call a function on the host.
|
||||
// This also allows us to prevent codegen from trying to take the
|
||||
// address of an intrinsic function to send to the kernel.
|
||||
if (ContainsFnPtr(CurrentScop))
|
||||
return false;
|
||||
|
||||
auto PPCGScop = createPPCGScop();
|
||||
auto PPCGProg = createPPCGProg(PPCGScop);
|
||||
auto PPCGGen = generateGPU(PPCGScop, PPCGProg);
|
||||
|
|
|
|||
|
|
@ -0,0 +1,82 @@
|
|||
; RUN: opt %loadPolly -analyze -polly-scops < %s | FileCheck %s --check-prefix=SCOP
|
||||
; RUN: opt %loadPolly -S -polly-codegen-ppcg < %s | FileCheck %s
|
||||
|
||||
; Check that we do not create a kernel if there is an
|
||||
; unknown function call in a candidate kernel.
|
||||
|
||||
; Check that we model the kernel as a scop.
|
||||
; SCOP: Function: f
|
||||
; SCOP-NEXT: Region: %entry.split---%for.end13
|
||||
|
||||
; If a kernel were generated, then this code would have been part of the kernel
|
||||
; and not the `.ll` file that is generated.
|
||||
; CHECK: %conv = fpext float %0 to double
|
||||
; CHECK-NEXT: %1 = tail call double @extern.fn(double %conv)
|
||||
; CHECK-NEXT: %conv6 = fptrunc double %1 to float
|
||||
|
||||
; REQUIRES: pollyacc
|
||||
|
||||
; static const int N = 1000;
|
||||
; void f(float A[N][N], int n, float B[N][N]) {
|
||||
; for(int i = 0; i < n; i++) {
|
||||
; for(int j = 0; j < n; j++) {
|
||||
; B[i][j] = extern_fn(A[i][j], 3);
|
||||
; }
|
||||
;
|
||||
; }
|
||||
; }
|
||||
|
||||
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
|
||||
target triple = "x86_64-apple-macosx10.11.0"
|
||||
|
||||
define void @f([1000 x float]* %A, i32 %n, [1000 x float]* %B) {
|
||||
entry:
|
||||
br label %entry.split
|
||||
|
||||
entry.split: ; preds = %entry
|
||||
%cmp3 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp3, label %for.cond1.preheader.lr.ph, label %for.end13
|
||||
|
||||
for.cond1.preheader.lr.ph: ; preds = %entry.split
|
||||
br label %for.cond1.preheader
|
||||
|
||||
for.cond1.preheader: ; preds = %for.cond1.preheader.lr.ph, %for.inc11
|
||||
%indvars.iv5 = phi i64 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next6, %for.inc11 ]
|
||||
%cmp21 = icmp sgt i32 %n, 0
|
||||
br i1 %cmp21, label %for.body3.lr.ph, label %for.inc11
|
||||
|
||||
for.body3.lr.ph: ; preds = %for.cond1.preheader
|
||||
br label %for.body3
|
||||
|
||||
for.body3: ; preds = %for.body3.lr.ph, %for.body3
|
||||
%indvars.iv = phi i64 [ 0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
|
||||
%arrayidx5 = getelementptr inbounds [1000 x float], [1000 x float]* %A, i64 %indvars.iv5, i64 %indvars.iv
|
||||
%0 = load float, float* %arrayidx5, align 4
|
||||
%conv = fpext float %0 to double
|
||||
%1 = tail call double @extern.fn(double %conv)
|
||||
%conv6 = fptrunc double %1 to float
|
||||
%arrayidx10 = getelementptr inbounds [1000 x float], [1000 x float]* %B, i64 %indvars.iv5, i64 %indvars.iv
|
||||
store float %conv6, float* %arrayidx10, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%wide.trip.count = zext i32 %n to i64
|
||||
%exitcond = icmp ne i64 %indvars.iv.next, %wide.trip.count
|
||||
br i1 %exitcond, label %for.body3, label %for.cond1.for.inc11_crit_edge
|
||||
|
||||
for.cond1.for.inc11_crit_edge: ; preds = %for.body3
|
||||
br label %for.inc11
|
||||
|
||||
for.inc11: ; preds = %for.cond1.for.inc11_crit_edge, %for.cond1.preheader
|
||||
%indvars.iv.next6 = add nuw nsw i64 %indvars.iv5, 1
|
||||
%wide.trip.count7 = zext i32 %n to i64
|
||||
%exitcond8 = icmp ne i64 %indvars.iv.next6, %wide.trip.count7
|
||||
br i1 %exitcond8, label %for.cond1.preheader, label %for.cond.for.end13_crit_edge
|
||||
|
||||
for.cond.for.end13_crit_edge: ; preds = %for.inc11
|
||||
br label %for.end13
|
||||
|
||||
for.end13: ; preds = %for.cond.for.end13_crit_edge, %entry.split
|
||||
ret void
|
||||
}
|
||||
|
||||
declare double @extern.fn(double) #0
|
||||
attributes #0 = { readnone }
|
||||
Loading…
Reference in New Issue