AArch64: avoid creating cycle in DAG for post-increment NEON ops.

Inserting a value into Visited has the effect of terminating a search for
predecessors if that node is seen. This is legitimate for the base address, and
acts as a slight performance optimization, but the vector-building node can be
paert of a legitimate cycle so we shouldn't stop searching there.

PR43056.

llvm-svn: 370036
This commit is contained in:
Tim Northover 2019-08-27 10:21:11 +00:00
parent bccbd74c62
commit a7f226f9db
2 changed files with 20 additions and 1 deletions

View File

@ -10694,7 +10694,7 @@ static SDValue performPostLD1Combine(SDNode *N,
// are predecessors to each other or the Vector.
SmallPtrSet<const SDNode *, 32> Visited;
SmallVector<const SDNode *, 16> Worklist;
Visited.insert(N);
Visited.insert(Addr.getNode());
Worklist.push_back(User);
Worklist.push_back(LD);
Worklist.push_back(Vector.getNode());

View File

@ -6319,3 +6319,22 @@ define void @test_ld1lane_build_i8(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, i8*
store <8 x i8> %sub, <8 x i8>* %p
ret void
}
define <4 x i32> @test_inc_cycle(<4 x i32> %vec, i32* %in) {
; CHECK-LABEL: test_inc_cycle:
; CHECK: ld1.s { v0 }[0], [x0]{{$}}
%elt = load i32, i32* %in
%newvec = insertelement <4 x i32> %vec, i32 %elt, i32 0
; %inc cannot be %elt directly because we check that the load is only
; used by the insert before trying to form post-inc.
%inc.vec = bitcast <4 x i32> %newvec to <2 x i64>
%inc = extractelement <2 x i64> %inc.vec, i32 0
%newaddr = getelementptr i32, i32* %in, i64 %inc
store i32* %newaddr, i32** @var
ret <4 x i32> %newvec
}
@var = global i32* null