forked from OSchip/llvm-project
				
			AArch64: avoid creating cycle in DAG for post-increment NEON ops.
Inserting a value into Visited has the effect of terminating a search for predecessors if that node is seen. This is legitimate for the base address, and acts as a slight performance optimization, but the vector-building node can be paert of a legitimate cycle so we shouldn't stop searching there. PR43056. llvm-svn: 370036
This commit is contained in:
		
							parent
							
								
									bccbd74c62
								
							
						
					
					
						commit
						a7f226f9db
					
				| 
						 | 
					@ -10694,7 +10694,7 @@ static SDValue performPostLD1Combine(SDNode *N,
 | 
				
			||||||
    // are predecessors to each other or the Vector.
 | 
					    // are predecessors to each other or the Vector.
 | 
				
			||||||
    SmallPtrSet<const SDNode *, 32> Visited;
 | 
					    SmallPtrSet<const SDNode *, 32> Visited;
 | 
				
			||||||
    SmallVector<const SDNode *, 16> Worklist;
 | 
					    SmallVector<const SDNode *, 16> Worklist;
 | 
				
			||||||
    Visited.insert(N);
 | 
					    Visited.insert(Addr.getNode());
 | 
				
			||||||
    Worklist.push_back(User);
 | 
					    Worklist.push_back(User);
 | 
				
			||||||
    Worklist.push_back(LD);
 | 
					    Worklist.push_back(LD);
 | 
				
			||||||
    Worklist.push_back(Vector.getNode());
 | 
					    Worklist.push_back(Vector.getNode());
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6319,3 +6319,22 @@ define void  @test_ld1lane_build_i8(i8* %a, i8* %b, i8* %c, i8* %d, i8* %e, i8*
 | 
				
			||||||
  store <8 x i8> %sub, <8 x i8>* %p
 | 
					  store <8 x i8> %sub, <8 x i8>* %p
 | 
				
			||||||
  ret void
 | 
					  ret void
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					define <4 x i32> @test_inc_cycle(<4 x i32> %vec, i32* %in) {
 | 
				
			||||||
 | 
					; CHECK-LABEL: test_inc_cycle:
 | 
				
			||||||
 | 
					; CHECK: ld1.s { v0 }[0], [x0]{{$}}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  %elt = load i32, i32* %in
 | 
				
			||||||
 | 
					  %newvec = insertelement <4 x i32> %vec, i32 %elt, i32 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ; %inc cannot be %elt directly because we check that the load is only
 | 
				
			||||||
 | 
					  ; used by the insert before trying to form post-inc.
 | 
				
			||||||
 | 
					  %inc.vec = bitcast <4 x i32> %newvec to <2 x i64>
 | 
				
			||||||
 | 
					  %inc = extractelement <2 x i64> %inc.vec, i32 0
 | 
				
			||||||
 | 
					  %newaddr = getelementptr i32, i32* %in, i64 %inc
 | 
				
			||||||
 | 
					  store i32* %newaddr, i32** @var
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  ret <4 x i32> %newvec
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@var = global i32* null
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue