125 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			LLVM
		
	
	
	
			
		
		
	
	
			125 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			LLVM
		
	
	
	
| ; REQUIRES: asserts
 | |
| ; RUN: opt -mcpu=thunderx2t99 -loop-unroll --debug-only=loop-unroll -S -unroll-allow-partial < %s 2>&1 | FileCheck %s
 | |
| 
 | |
| target triple = "aarch64-unknown-linux-gnu"
 | |
| 
 | |
| ; CHECK: Loop Unroll: F[foo] Loop %loop.2.header
 | |
| ; CHECK: Loop Size = 19
 | |
| ; CHECK: Trip Count = 512
 | |
| ; CHECK: Trip Multiple = 512
 | |
| ; CHECK: UNROLLING loop %loop.2.header by 4 with a breakout at trip 0
 | |
| ; CHECK: Merging:
 | |
| ; CHECK: Loop Unroll: F[foo] Loop %loop.header
 | |
| ; CHECK:   Loop Size = 18
 | |
| ; CHECK:   Trip Count = 512
 | |
| ; CHECK:   Trip Multiple = 512
 | |
| ; CHECK: UNROLLING loop %loop.header by 4 with a breakout at trip 0
 | |
| ; CHECK: Merging:
 | |
| ; CHECK: %counter = phi i32 [ 0, %entry ], [ %inc.3, %loop.inc.3 ]
 | |
| ; CHECK: %val = add nuw nsw i32 %counter, 5
 | |
| ; CHECK: %val1 = add nuw nsw i32 %counter, 6
 | |
| ; CHECK: %val2 = add nuw nsw i32 %counter, 7
 | |
| ; CHECK: %val3 = add nuw nsw i32 %counter, 8
 | |
| ; CHECK: %val4 = add nuw nsw i32 %counter, 9
 | |
| ; CHECK: %val5 = add nuw nsw i32 %counter, 10
 | |
| ; CHECK-NOT: %val = add i32 %counter, 5
 | |
| ; CHECK-NOT: %val = add i32 %counter, 6
 | |
| ; CHECK-NOT: %val = add i32 %counter, 7
 | |
| ; CHECK-NOT: %val = add i32 %counter, 8
 | |
| ; CHECK-NOT: %val = add i32 %counter, 9
 | |
| ; CHECK-NOT: %val = add i32 %counter, 10
 | |
| ; CHECK: %counter.2 = phi i32 [ 0, %exit.0 ], [ %inc.2.3, %loop.2.inc.3 ]
 | |
| 
 | |
| define void @foo(i32 * %out) {
 | |
| entry:
 | |
|   %0 = alloca [1024 x i32]
 | |
|   %x0 = alloca [1024 x i32]
 | |
|   %x01 = alloca [1024 x i32]
 | |
|   %x02 = alloca [1024 x i32]
 | |
|   %x03 = alloca [1024 x i32]
 | |
|   %x04 = alloca [1024 x i32]
 | |
|   %x05 = alloca [1024 x i32]
 | |
|   %x06 = alloca [1024 x i32]
 | |
|   br label %loop.header
 | |
| 
 | |
| loop.header:
 | |
|   %counter = phi i32 [0, %entry], [%inc, %loop.inc]
 | |
|   br label %loop.body
 | |
| 
 | |
| loop.body:
 | |
|   %ptr = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 %counter
 | |
|   store i32 %counter, i32* %ptr
 | |
|   %val = add i32 %counter, 5
 | |
|   %xptr = getelementptr [1024 x i32], [1024 x i32]* %x0, i32 0, i32 %counter
 | |
|   store i32 %val, i32* %xptr
 | |
|   %val1 = add i32 %counter, 6
 | |
|   %xptr1 = getelementptr [1024 x i32], [1024 x i32]* %x01, i32 0, i32 %counter
 | |
|   store i32 %val1, i32* %xptr1
 | |
|   %val2 = add i32 %counter, 7
 | |
|   %xptr2 = getelementptr [1024 x i32], [1024 x i32]* %x02, i32 0, i32 %counter
 | |
|   store i32 %val2, i32* %xptr2
 | |
|   %val3 = add i32 %counter, 8
 | |
|   %xptr3 = getelementptr [1024 x i32], [1024 x i32]* %x03, i32 0, i32 %counter
 | |
|   store i32 %val3, i32* %xptr3
 | |
|   %val4 = add i32 %counter, 9
 | |
|   %xptr4 = getelementptr [1024 x i32], [1024 x i32]* %x04, i32 0, i32 %counter
 | |
|   store i32 %val4, i32* %xptr4
 | |
|   %val5 = add i32 %counter, 10
 | |
|   %xptr5 = getelementptr [1024 x i32], [1024 x i32]* %x05, i32 0, i32 %counter
 | |
|   store i32 %val5, i32* %xptr5
 | |
|   br label %loop.inc
 | |
| 
 | |
| loop.inc:
 | |
|   %inc = add i32 %counter, 2
 | |
|   %1 = icmp sge i32 %inc, 1023
 | |
|   br i1 %1, label  %exit.0, label %loop.header
 | |
| 
 | |
| exit.0:
 | |
|   %2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 5
 | |
|   %3 = load i32, i32* %2
 | |
|   store i32 %3, i32 * %out
 | |
|   br label %loop.2.header
 | |
| 
 | |
| 
 | |
| loop.2.header:
 | |
|   %counter.2 = phi i32 [0, %exit.0], [%inc.2, %loop.2.inc]
 | |
|   br label %loop.2.body
 | |
| 
 | |
| loop.2.body:
 | |
|   %ptr.2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 %counter.2
 | |
|   store i32 %counter.2, i32* %ptr.2
 | |
|   %val.2 = add i32 %counter.2, 5
 | |
|   %xptr.2 = getelementptr [1024 x i32], [1024 x i32]* %x0, i32 0, i32 %counter.2
 | |
|   store i32 %val.2, i32* %xptr.2
 | |
|   %val1.2 = add i32 %counter.2, 6
 | |
|   %xptr1.2 = getelementptr [1024 x i32], [1024 x i32]* %x01, i32 0, i32 %counter.2
 | |
|   store i32 %val1, i32* %xptr1.2
 | |
|   %val2.2 = add i32 %counter.2, 7
 | |
|   %xptr2.2 = getelementptr [1024 x i32], [1024 x i32]* %x02, i32 0, i32 %counter.2
 | |
|   store i32 %val2, i32* %xptr2.2
 | |
|   %val3.2 = add i32 %counter.2, 8
 | |
|   %xptr3.2 = getelementptr [1024 x i32], [1024 x i32]* %x03, i32 0, i32 %counter.2
 | |
|   store i32 %val3.2, i32* %xptr3.2
 | |
|   %val4.2 = add i32 %counter.2, 9
 | |
|   %xptr4.2 = getelementptr [1024 x i32], [1024 x i32]* %x04, i32 0, i32 %counter.2
 | |
|   store i32 %val4.2, i32* %xptr4.2
 | |
|   %val5.2 = add i32 %counter.2, 10
 | |
|   %xptr5.2 = getelementptr [1024 x i32], [1024 x i32]* %x05, i32 0, i32 %counter.2
 | |
|   store i32 %val5.2, i32* %xptr5.2
 | |
|   %xptr6.2 = getelementptr [1024 x i32], [1024 x i32]* %x06, i32 0, i32 %counter.2
 | |
|   store i32 %val5.2, i32* %xptr6.2
 | |
|   br label %loop.2.inc
 | |
| 
 | |
| loop.2.inc:
 | |
|   %inc.2 = add i32 %counter.2, 2
 | |
|   %4 = icmp sge i32 %inc.2, 1023
 | |
|   br i1 %4, label  %exit.2, label %loop.2.header
 | |
| 
 | |
| exit.2:
 | |
|   %x2 = getelementptr [1024 x i32], [1024 x i32]* %0, i32 0, i32 6
 | |
|   %x3 = load i32, i32* %x2
 | |
|   %out2 = getelementptr i32, i32 * %out, i32 1
 | |
|   store i32 %3, i32 * %out2
 | |
|   ret void
 | |
| }
 |