forked from OSchip/llvm-project
				
			AArch64: avoid creating cycle in DAG for post-increment NEON ops.
Inserting a value into Visited has the effect of terminating a search for predecessors if that node is seen. This is legitimate for the base address, and acts as a slight performance optimization, but the vector-building node can be paert of a legitimate cycle so we shouldn't stop searching there. PR43056. llvm-svn: 370036
This commit is contained in:
		
							parent
							
								
									bccbd74c62
								
							
						
					
					
						commit
						a7f226f9db
					
				| 
						 | 
				
			
			@ -10694,7 +10694,7 @@ static SDValue performPostLD1Combine(SDNode *N,
 | 
			
		|||
    // are predecessors to each other or the Vector.
 | 
			
		||||
    SmallPtrSet<const SDNode *, 32> Visited;
 | 
			
		||||
    SmallVector<const SDNode *, 16> Worklist;
 | 
			
		||||
    Visited.insert(N);
 | 
			
		||||
    Visited.insert(Addr.getNode());
 | 
			
		||||
    Worklist.push_back(User);
 | 
			
		||||
    Worklist.push_back(LD);
 | 
			
		||||
    Worklist.push_back(Vector.getNode());
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -6319,3 +6319,22 @@ define void  @test_ld1lane_build_i8(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, i8*
 | 
			
		|||
  store <8 x i8> %sub, <8 x i8>* %p
 | 
			
		||||
  ret void
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
define <4 x i32> @test_inc_cycle(<4 x i32> %vec, i32* %in) {
 | 
			
		||||
; CHECK-LABEL: test_inc_cycle:
 | 
			
		||||
; CHECK: ld1.s { v0 }[0], [x0]{{$}}
 | 
			
		||||
 | 
			
		||||
  %elt = load i32, i32* %in
 | 
			
		||||
  %newvec = insertelement <4 x i32> %vec, i32 %elt, i32 0
 | 
			
		||||
 | 
			
		||||
  ; %inc cannot be %elt directly because we check that the load is only
 | 
			
		||||
  ; used by the insert before trying to form post-inc.
 | 
			
		||||
  %inc.vec = bitcast <4 x i32> %newvec to <2 x i64>
 | 
			
		||||
  %inc = extractelement <2 x i64> %inc.vec, i32 0
 | 
			
		||||
  %newaddr = getelementptr i32, i32* %in, i64 %inc
 | 
			
		||||
  store i32* %newaddr, i32** @var
 | 
			
		||||
 | 
			
		||||
  ret <4 x i32> %newvec
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@var = global i32* null
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue