146 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			LLVM
		
	
	
	
			
		
		
	
	
			146 lines
		
	
	
		
			4.1 KiB
		
	
	
	
		
			LLVM
		
	
	
	
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 | |
| ; RUN: llc < %s -mtriple=aarch64-unknown-unknown | FileCheck %s
 | |
| 
 | |
| ; Check that under certain conditions we can factor out a rotate
 | |
| ; from the following idioms:
 | |
| ;   (a*c0) >> s1 | (a*c1)
 | |
| ;   (a/c0) << s1 | (a/c1)
 | |
| ; This targets cases where instcombine has folded a shl/srl/mul/udiv
 | |
| ; with one of the shifts from the rotate idiom
 | |
| 
 | |
| define i64 @ror_extract_shl(i64 %i) nounwind {
 | |
| ; CHECK-LABEL: ror_extract_shl:
 | |
| ; CHECK:       // %bb.0:
 | |
| ; CHECK-NEXT:    lsl x8, x0, #3
 | |
| ; CHECK-NEXT:    ror x0, x8, #57
 | |
| ; CHECK-NEXT:    ret
 | |
|   %lhs_mul = shl i64 %i, 3
 | |
|   %rhs_mul = shl i64 %i, 10
 | |
|   %lhs_shift = lshr i64 %lhs_mul, 57
 | |
|   %out = or i64 %lhs_shift, %rhs_mul
 | |
|   ret i64 %out
 | |
| }
 | |
| 
 | |
| define i32 @ror_extract_shrl(i32 %i) nounwind {
 | |
| ; CHECK-LABEL: ror_extract_shrl:
 | |
| ; CHECK:       // %bb.0:
 | |
| ; CHECK-NEXT:    lsr w8, w0, #3
 | |
| ; CHECK-NEXT:    ror w0, w8, #4
 | |
| ; CHECK-NEXT:    ret
 | |
|   %lhs_div = lshr i32 %i, 7
 | |
|   %rhs_div = lshr i32 %i, 3
 | |
|   %rhs_shift = shl i32 %rhs_div, 28
 | |
|   %out = or i32 %lhs_div, %rhs_shift
 | |
|   ret i32 %out
 | |
| }
 | |
| 
 | |
| define i32 @ror_extract_mul(i32 %i) nounwind {
 | |
| ; CHECK-LABEL: ror_extract_mul:
 | |
| ; CHECK:       // %bb.0:
 | |
| ; CHECK-NEXT:    add w8, w0, w0, lsl #3
 | |
| ; CHECK-NEXT:    ror w0, w8, #25
 | |
| ; CHECK-NEXT:    ret
 | |
|   %lhs_mul = mul i32 %i, 9
 | |
|   %rhs_mul = mul i32 %i, 1152
 | |
|   %lhs_shift = lshr i32 %lhs_mul, 25
 | |
|   %out = or i32 %lhs_shift, %rhs_mul
 | |
|   ret i32 %out
 | |
| }
 | |
| 
 | |
| define i64 @ror_extract_udiv(i64 %i) nounwind {
 | |
| ; CHECK-LABEL: ror_extract_udiv:
 | |
| ; CHECK:       // %bb.0:
 | |
| ; CHECK-NEXT:    mov x8, #-6148914691236517206
 | |
| ; CHECK-NEXT:    movk x8, #43691
 | |
| ; CHECK-NEXT:    umulh x8, x0, x8
 | |
| ; CHECK-NEXT:    lsr x8, x8, #1
 | |
| ; CHECK-NEXT:    ror x0, x8, #4
 | |
| ; CHECK-NEXT:    ret
 | |
|   %lhs_div = udiv i64 %i, 3
 | |
|   %rhs_div = udiv i64 %i, 48
 | |
|   %lhs_shift = shl i64 %lhs_div, 60
 | |
|   %out = or i64 %lhs_shift, %rhs_div
 | |
|   ret i64 %out
 | |
| }
 | |
| 
 | |
| define i64 @ror_extract_mul_with_mask(i64 %i) nounwind {
 | |
| ; CHECK-LABEL: ror_extract_mul_with_mask:
 | |
| ; CHECK:       // %bb.0:
 | |
| ; CHECK-NEXT:    add x8, x0, x0, lsl #3
 | |
| ; CHECK-NEXT:    ror x8, x8, #57
 | |
| ; CHECK-NEXT:    and x0, x8, #0xff
 | |
| ; CHECK-NEXT:    ret
 | |
|   %lhs_mul = mul i64 %i, 1152
 | |
|   %rhs_mul = mul i64 %i, 9
 | |
|   %lhs_and = and i64 %lhs_mul, 160
 | |
|   %rhs_shift = lshr i64 %rhs_mul, 57
 | |
|   %out = or i64 %lhs_and, %rhs_shift
 | |
|   ret i64 %out
 | |
| }
 | |
| 
 | |
| ; Result would undershift
 | |
| define i64 @no_extract_shl(i64 %i) nounwind {
 | |
| ; CHECK-LABEL: no_extract_shl:
 | |
| ; CHECK:       // %bb.0:
 | |
| ; CHECK-NEXT:    lsl x8, x0, #10
 | |
| ; CHECK-NEXT:    bfxil x8, x0, #52, #7
 | |
| ; CHECK-NEXT:    mov x0, x8
 | |
| ; CHECK-NEXT:    ret
 | |
|   %lhs_mul = shl i64 %i, 5
 | |
|   %rhs_mul = shl i64 %i, 10
 | |
|   %lhs_shift = lshr i64 %lhs_mul, 57
 | |
|   %out = or i64 %lhs_shift, %rhs_mul
 | |
|   ret i64 %out
 | |
| }
 | |
| 
 | |
| ; Result would overshift
 | |
| define i32 @no_extract_shrl(i32 %i) nounwind {
 | |
| ; CHECK-LABEL: no_extract_shrl:
 | |
| ; CHECK:       // %bb.0:
 | |
| ; CHECK-NEXT:    lsr w8, w0, #3
 | |
| ; CHECK-NEXT:    lsr w0, w0, #9
 | |
| ; CHECK-NEXT:    bfi w0, w8, #28, #4
 | |
| ; CHECK-NEXT:    ret
 | |
|   %lhs_div = lshr i32 %i, 3
 | |
|   %rhs_div = lshr i32 %i, 9
 | |
|   %lhs_shift = shl i32 %lhs_div, 28
 | |
|   %out = or i32 %lhs_shift, %rhs_div
 | |
|   ret i32 %out
 | |
| }
 | |
| 
 | |
| ; Can factor 128 from 2304, but result is 18 instead of 9
 | |
| define i64 @no_extract_mul(i64 %i) nounwind {
 | |
| ; CHECK-LABEL: no_extract_mul:
 | |
| ; CHECK:       // %bb.0:
 | |
| ; CHECK-NEXT:    add x8, x0, x0, lsl #3
 | |
| ; CHECK-NEXT:    lsr x0, x8, #57
 | |
| ; CHECK-NEXT:    bfi x0, x8, #8, #56
 | |
| ; CHECK-NEXT:    ret
 | |
|   %lhs_mul = mul i64 %i, 2304
 | |
|   %rhs_mul = mul i64 %i, 9
 | |
|   %rhs_shift = lshr i64 %rhs_mul, 57
 | |
|   %out = or i64 %lhs_mul, %rhs_shift
 | |
|   ret i64 %out
 | |
| }
 | |
| 
 | |
| ; Can't evenly factor 16 from 49
 | |
| define i32 @no_extract_udiv(i32 %i) nounwind {
 | |
| ; CHECK-LABEL: no_extract_udiv:
 | |
| ; CHECK:       // %bb.0:
 | |
| ; CHECK-NEXT:    mov w8, #43691
 | |
| ; CHECK-NEXT:    mov w9, #33437
 | |
| ; CHECK-NEXT:    movk w8, #43690, lsl #16
 | |
| ; CHECK-NEXT:    movk w9, #21399, lsl #16
 | |
| ; CHECK-NEXT:    umull x8, w0, w8
 | |
| ; CHECK-NEXT:    umull x9, w0, w9
 | |
| ; CHECK-NEXT:    lsr x8, x8, #33
 | |
| ; CHECK-NEXT:    lsr x9, x9, #32
 | |
| ; CHECK-NEXT:    extr w0, w8, w9, #4
 | |
| ; CHECK-NEXT:    ret
 | |
|   %lhs_div = udiv i32 %i, 3
 | |
|   %rhs_div = udiv i32 %i, 49
 | |
|   %lhs_shift = shl i32 %lhs_div, 28
 | |
|   %out = or i32 %lhs_shift, %rhs_div
 | |
|   ret i32 %out
 | |
| }
 |