forked from OSchip/llvm-project
Fix assertion due to buildMemoryAccess.
For llvm the memory accesses from nonaffine loops should be visible, however for polly those nonaffine loops should be invisible/boxed. This fixes llvm.org/PR28245 Cointributed-by: Huihui Zhang <huihuiz@codeaurora.org> Differential Revision: http://reviews.llvm.org/D21591 llvm-svn: 274842
This commit is contained in:
parent
eeb827447e
commit
586e579fe8
|
|
@ -29,6 +29,17 @@ using namespace polly;
|
|||
STATISTIC(ScopFound, "Number of valid Scops");
|
||||
STATISTIC(RichScopFound, "Number of Scops containing a loop");
|
||||
|
||||
// If the loop is nonaffine/boxed, return the first non-boxed surrounding loop
|
||||
// for Polly. If the loop is affine, return the loop itself. Do not call
|
||||
// `getSCEVAtScope()` on the result of `getFirstNonBoxedLoopFor()`, as we need
|
||||
// to analyze the memory accesses of the nonaffine/boxed loops.
|
||||
static Loop *getFirstNonBoxedLoopFor(Loop *L, LoopInfo &LI,
|
||||
const BoxedLoopsSetTy &BoxedLoops) {
|
||||
while (BoxedLoops.count(L))
|
||||
L = L->getParentLoop();
|
||||
return L;
|
||||
}
|
||||
|
||||
static cl::opt<bool> ModelReadOnlyScalars(
|
||||
"polly-analyze-read-only-scalars",
|
||||
cl::desc("Model read-only scalar values in the scop description"),
|
||||
|
|
@ -150,9 +161,12 @@ bool ScopBuilder::buildAccessMultiDimFixed(MemAccInst Inst, Loop *L) {
|
|||
std::vector<const SCEV *> SizesSCEV;
|
||||
|
||||
const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
|
||||
|
||||
Loop *SurroundingLoop = getFirstNonBoxedLoopFor(L, LI, scop->getBoxedLoops());
|
||||
for (auto *Subscript : Subscripts) {
|
||||
InvariantLoadsSetTy AccessILS;
|
||||
if (!isAffineExpr(&scop->getRegion(), L, Subscript, SE, &AccessILS))
|
||||
if (!isAffineExpr(&scop->getRegion(), SurroundingLoop, Subscript, SE,
|
||||
&AccessILS))
|
||||
return false;
|
||||
|
||||
for (LoadInst *LInst : AccessILS)
|
||||
|
|
@ -226,8 +240,10 @@ bool ScopBuilder::buildAccessMemIntrinsic(MemAccInst Inst, Loop *L) {
|
|||
// Check if the length val is actually affine or if we overapproximate it
|
||||
InvariantLoadsSetTy AccessILS;
|
||||
const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
|
||||
bool LengthIsAffine =
|
||||
isAffineExpr(&scop->getRegion(), L, LengthVal, SE, &AccessILS);
|
||||
|
||||
Loop *SurroundingLoop = getFirstNonBoxedLoopFor(L, LI, scop->getBoxedLoops());
|
||||
bool LengthIsAffine = isAffineExpr(&scop->getRegion(), SurroundingLoop,
|
||||
LengthVal, SE, &AccessILS);
|
||||
for (LoadInst *LInst : AccessILS)
|
||||
if (!ScopRIL.count(LInst))
|
||||
LengthIsAffine = false;
|
||||
|
|
@ -345,9 +361,11 @@ void ScopBuilder::buildAccessSingleDim(MemAccInst Inst, Loop *L) {
|
|||
isVariantInNonAffineLoop = true;
|
||||
|
||||
InvariantLoadsSetTy AccessILS;
|
||||
bool IsAffine =
|
||||
!isVariantInNonAffineLoop &&
|
||||
isAffineExpr(&scop->getRegion(), L, AccessFunction, SE, &AccessILS);
|
||||
|
||||
Loop *SurroundingLoop = getFirstNonBoxedLoopFor(L, LI, BoxedLoops);
|
||||
bool IsAffine = !isVariantInNonAffineLoop &&
|
||||
isAffineExpr(&scop->getRegion(), SurroundingLoop,
|
||||
AccessFunction, SE, &AccessILS);
|
||||
|
||||
const InvariantLoadsSetTy &ScopRIL = scop->getRequiredInvariantLoads();
|
||||
for (LoadInst *LInst : AccessILS)
|
||||
|
|
|
|||
|
|
@ -2246,6 +2246,10 @@ bool Scop::buildDomains(Region *R, DominatorTree &DT, LoopInfo &LI) {
|
|||
return true;
|
||||
}
|
||||
|
||||
// If the loop is nonaffine/boxed, return the first non-boxed surrounding loop
|
||||
// for Polly. If the loop is affine, return the loop itself. Do not call
|
||||
// `getSCEVAtScope()` on the result of `getFirstNonBoxedLoopFor()`, as we need
|
||||
// to analyze the memory accesses of the nonaffine/boxed loops.
|
||||
static Loop *getFirstNonBoxedLoopFor(BasicBlock *BB, LoopInfo &LI,
|
||||
const BoxedLoopsSetTy &BoxedLoops) {
|
||||
auto *L = LI.getLoopFor(BB);
|
||||
|
|
|
|||
|
|
@ -214,8 +214,9 @@ public:
|
|||
|
||||
auto *L = Expr->getLoop();
|
||||
if (R->contains(L) && (!Scope || !L->contains(Scope))) {
|
||||
DEBUG(dbgs() << "INVALID: AddRec out of a loop whose exit value is not "
|
||||
"synthesizable");
|
||||
DEBUG(dbgs() << "INVALID: Loop of AddRec expression boxed in an a "
|
||||
"non-affine subregion or has a non-synthesizable exit "
|
||||
"value.");
|
||||
return ValidatorResult(SCEVType::INVALID);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -0,0 +1,74 @@
|
|||
; RUN: opt %loadPolly -polly-codegen -polly-allow-nonaffine-loops -polly-allow-nonaffine -debug-only=polly-dependence < %s 2>&1 | FileCheck %s
|
||||
; REQUIRES: asserts
|
||||
|
||||
; CHECK: MayWriteAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: { Stmt_for_body__TO__for_inc11[i0] -> MemRef_A[o0] : 0 <= o0 <= 699 };
|
||||
; CHECK-NEXT: MayWriteAccess := [Reduction Type: NONE] [Scalar: 0]
|
||||
; CHECK-NEXT: { Stmt_for_body__TO__for_inc11[i0] -> MemRef_B[700] };
|
||||
|
||||
; The if condition C[i] is a non-affine condition, which make the nested loop boxed. The memory access for A should be a range A[0...699]. The memory access for B should be simplified to B[700].
|
||||
;
|
||||
; int A[1000], B[1000], C[1000];
|
||||
;
|
||||
; void foo(int n, int m, int N) {
|
||||
; for (int i = 0; i < 500; i+=1) { /* affine loop */
|
||||
; C[i] += i;
|
||||
; if (C[i]) { /* non-affine subregion */
|
||||
; int j;
|
||||
; for (j = 0; j < 700; j+=1) { /* boxed loop */
|
||||
; A[j] = 1;
|
||||
; }
|
||||
; B[j] = 2;
|
||||
; }
|
||||
; }
|
||||
; }
|
||||
|
||||
|
||||
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
|
||||
|
||||
@C = common global [1000 x i32] zeroinitializer, align 4
|
||||
@A = common global [1000 x i32] zeroinitializer, align 4
|
||||
@B = common global [1000 x i32] zeroinitializer, align 4
|
||||
|
||||
; Function Attrs: norecurse nounwind
|
||||
define void @foo(i32 %n, i32 %m, i32 %N) #0 {
|
||||
entry:
|
||||
br label %entry.split
|
||||
|
||||
entry.split: ; preds = %entry
|
||||
br label %for.body
|
||||
|
||||
for.cond.cleanup: ; preds = %for.inc11
|
||||
ret void
|
||||
|
||||
for.body: ; preds = %for.inc11, %entry.split
|
||||
%indvars.iv25 = phi i64 [ 0, %entry.split ], [ %indvars.iv.next26, %for.inc11 ]
|
||||
%arrayidx = getelementptr inbounds [1000 x i32], [1000 x i32]* @C, i64 0, i64 %indvars.iv25
|
||||
%0 = load i32, i32* %arrayidx, align 4
|
||||
%1 = trunc i64 %indvars.iv25 to i32
|
||||
%add = add nsw i32 %0, %1
|
||||
store i32 %add, i32* %arrayidx, align 4
|
||||
%tobool = icmp eq i32 %add, 0
|
||||
br i1 %tobool, label %for.inc11, label %for.body5.preheader
|
||||
|
||||
for.body5.preheader: ; preds = %for.body
|
||||
br label %for.body5
|
||||
|
||||
for.body5: ; preds = %for.body5.preheader, %for.body5
|
||||
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body5 ], [ 0, %for.body5.preheader ]
|
||||
%arrayidx7 = getelementptr inbounds [1000 x i32], [1000 x i32]* @A, i64 0, i64 %indvars.iv
|
||||
store i32 1, i32* %arrayidx7, align 4
|
||||
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
|
||||
%exitcond = icmp eq i64 %indvars.iv, 699
|
||||
br i1 %exitcond, label %for.end, label %for.body5
|
||||
|
||||
for.end: ; preds = %for.body5
|
||||
store i32 2, i32* getelementptr inbounds ([1000 x i32], [1000 x i32]* @B, i64 0, i64 700), align 4
|
||||
br label %for.inc11
|
||||
|
||||
for.inc11: ; preds = %for.body, %for.end
|
||||
%indvars.iv.next26 = add nuw nsw i64 %indvars.iv25, 1
|
||||
%exitcond27 = icmp eq i64 %indvars.iv25, 499
|
||||
br i1 %exitcond27, label %for.cond.cleanup, label %for.body
|
||||
}
|
||||
|
||||
|
|
@ -0,0 +1,45 @@
|
|||
; RUN: opt %loadPolly -polly-allow-nonaffine-loops -polly-scops -analyze < %s | FileCheck %s
|
||||
;
|
||||
; CHECK: Domain :=
|
||||
; CHECK-NEXT: { Stmt_while_cond_i__TO__while_end_i[] };
|
||||
;
|
||||
define i32 @func(i32 %param0, i32 %param1, i64* %param2) #3 {
|
||||
|
||||
entry:
|
||||
%var0 = alloca i32
|
||||
%var1 = alloca i32
|
||||
br label %while.cond.i
|
||||
|
||||
while.cond.i: ; preds = %while.cond.i.backedge, %entry
|
||||
%var2 = phi i32 [ %param0, %entry ], [ %var3, %while.cond.i.backedge ]
|
||||
%var3 = add nsw i32 %var2, 1
|
||||
%var4 = icmp slt i32 %var2, -1
|
||||
br i1 %var4, label %while.cond.i.backedge, label %if.end.i1.i
|
||||
|
||||
if.end.i1.i: ; preds = %while.cond.i
|
||||
%var5 = sdiv i32 %var3, 64
|
||||
%var6 = icmp sgt i32 %param1, %var5
|
||||
br i1 %var6, label %exit1.i, label %while.cond.i.backedge
|
||||
|
||||
exit1.i: ; preds = %if.end.i1.i
|
||||
%var7 = srem i32 %var3, 64
|
||||
%var8 = sext i32 %var5 to i64
|
||||
%var9 = getelementptr inbounds i64, i64* %param2, i64 %var8
|
||||
%var10 = load i64, i64* %var9, align 8
|
||||
%var11 = zext i32 %var7 to i64
|
||||
%var12 = shl i64 1, %var11
|
||||
%var13 = and i64 %var10, %var12
|
||||
%var14 = icmp eq i64 %var13, 0
|
||||
store i32 %var2, i32* %var1
|
||||
store i32 %var3, i32* %var0
|
||||
br i1 %var14, label %while.cond.i.backedge, label %while.end.i
|
||||
|
||||
while.cond.i.backedge: ; preds = %exit1.i, %while.cond.i, %if.end.i1.i
|
||||
br label %while.cond.i
|
||||
|
||||
while.end.i:
|
||||
%var15 = load i32, i32* %var0
|
||||
%var16 = load i32, i32* %var1
|
||||
%var17 = add i32 %var15, %var16
|
||||
ret i32 %var17
|
||||
}
|
||||
Loading…
Reference in New Issue