140 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			LLVM
		
	
	
	
			
		
		
	
	
			140 lines
		
	
	
		
			4.9 KiB
		
	
	
	
		
			LLVM
		
	
	
	
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 | 
						|
; RUN: llc < %s -mtriple=i386-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X86
 | 
						|
; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+sse2 | FileCheck %s --check-prefix=X64
 | 
						|
 | 
						|
define i32 @t(ptr %ref_frame_ptr, i32 %ref_frame_stride, i32 %idxX, i32 %idxY) nounwind  {
 | 
						|
; X86-LABEL: t:
 | 
						|
; X86:       ## %bb.0: ## %entry
 | 
						|
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 | 
						|
; X86-NEXT:    imull {{[0-9]+}}(%esp), %ecx
 | 
						|
; X86-NEXT:    addl {{[0-9]+}}(%esp), %ecx
 | 
						|
; X86-NEXT:    movl (%eax,%ecx), %eax
 | 
						|
; X86-NEXT:    retl
 | 
						|
;
 | 
						|
; X64-LABEL: t:
 | 
						|
; X64:       ## %bb.0: ## %entry
 | 
						|
; X64-NEXT:    imull %ecx, %esi
 | 
						|
; X64-NEXT:    addl %edx, %esi
 | 
						|
; X64-NEXT:    movslq %esi, %rax
 | 
						|
; X64-NEXT:    movl (%rdi,%rax), %eax
 | 
						|
; X64-NEXT:    retq
 | 
						|
entry:
 | 
						|
	%tmp7 = mul i32 %idxY, %ref_frame_stride		; <i32> [#uses=2]
 | 
						|
	%tmp9 = add i32 %tmp7, %idxX		; <i32> [#uses=1]
 | 
						|
	%tmp11 = getelementptr i8, ptr %ref_frame_ptr, i32 %tmp9		; <ptr> [#uses=1]
 | 
						|
	%tmp13 = load i32, ptr %tmp11, align 4		; <i32> [#uses=1]
 | 
						|
	%tmp18 = add i32 %idxX, 4		; <i32> [#uses=1]
 | 
						|
	%tmp20.sum = add i32 %tmp18, %tmp7		; <i32> [#uses=1]
 | 
						|
	%tmp21 = getelementptr i8, ptr %ref_frame_ptr, i32 %tmp20.sum		; <ptr> [#uses=1]
 | 
						|
	%tmp23 = load i16, ptr %tmp21, align 2		; <i16> [#uses=1]
 | 
						|
	%tmp2425 = zext i16 %tmp23 to i64		; <i64> [#uses=1]
 | 
						|
	%tmp26 = shl i64 %tmp2425, 32		; <i64> [#uses=1]
 | 
						|
	%tmp2728 = zext i32 %tmp13 to i64		; <i64> [#uses=1]
 | 
						|
	%tmp29 = or i64 %tmp26, %tmp2728		; <i64> [#uses=1]
 | 
						|
	%tmp3454 = bitcast i64 %tmp29 to double		; <double> [#uses=1]
 | 
						|
	%tmp35 = insertelement <2 x double> undef, double %tmp3454, i32 0		; <<2 x double>> [#uses=1]
 | 
						|
	%tmp36 = insertelement <2 x double> %tmp35, double 0.000000e+00, i32 1		; <<2 x double>> [#uses=1]
 | 
						|
	%tmp42 = bitcast <2 x double> %tmp36 to <8 x i16>		; <<8 x i16>> [#uses=1]
 | 
						|
	%tmp43 = shufflevector <8 x i16> %tmp42, <8 x i16> undef, <8 x i32> < i32 0, i32 1, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7 >		; <<8 x i16>> [#uses=1]
 | 
						|
	%tmp47 = bitcast <8 x i16> %tmp43 to <4 x i32>		; <<4 x i32>> [#uses=1]
 | 
						|
	%tmp48 = extractelement <4 x i32> %tmp47, i32 0		; <i32> [#uses=1]
 | 
						|
	ret i32 %tmp48
 | 
						|
}
 | 
						|
 | 
						|
; Test CSE for SDAG nodes with multiple results (UMUL_LOHI).
 | 
						|
define i96 @square_high(i96 %x) nounwind {
 | 
						|
; X86-LABEL: square_high:
 | 
						|
; X86:       ## %bb.0: ## %entry
 | 
						|
; X86-NEXT:    pushl %ebp
 | 
						|
; X86-NEXT:    pushl %ebx
 | 
						|
; X86-NEXT:    pushl %edi
 | 
						|
; X86-NEXT:    pushl %esi
 | 
						|
; X86-NEXT:    movl {{[0-9]+}}(%esp), %edi
 | 
						|
; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 | 
						|
; X86-NEXT:    movl %edi, %eax
 | 
						|
; X86-NEXT:    mull %edi
 | 
						|
; X86-NEXT:    movl %edx, %ebx
 | 
						|
; X86-NEXT:    movl %esi, %eax
 | 
						|
; X86-NEXT:    mull %edi
 | 
						|
; X86-NEXT:    addl %eax, %ebx
 | 
						|
; X86-NEXT:    movl %edx, %ebp
 | 
						|
; X86-NEXT:    adcl $0, %ebp
 | 
						|
; X86-NEXT:    addl %eax, %ebx
 | 
						|
; X86-NEXT:    adcl %edx, %ebp
 | 
						|
; X86-NEXT:    setb %al
 | 
						|
; X86-NEXT:    movzbl %al, %ecx
 | 
						|
; X86-NEXT:    movl %esi, %eax
 | 
						|
; X86-NEXT:    mull %esi
 | 
						|
; X86-NEXT:    movl %eax, %ebx
 | 
						|
; X86-NEXT:    addl %ebp, %ebx
 | 
						|
; X86-NEXT:    adcl %edx, %ecx
 | 
						|
; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi
 | 
						|
; X86-NEXT:    movl %esi, %eax
 | 
						|
; X86-NEXT:    mull %edi
 | 
						|
; X86-NEXT:    movl %edx, %edi
 | 
						|
; X86-NEXT:    movl %eax, %ebp
 | 
						|
; X86-NEXT:    movl %esi, %eax
 | 
						|
; X86-NEXT:    mull {{[0-9]+}}(%esp)
 | 
						|
; X86-NEXT:    movl %eax, %esi
 | 
						|
; X86-NEXT:    addl %edi, %esi
 | 
						|
; X86-NEXT:    adcl $0, %edx
 | 
						|
; X86-NEXT:    addl %ebp, %ebx
 | 
						|
; X86-NEXT:    adcl %esi, %ecx
 | 
						|
; X86-NEXT:    setb %al
 | 
						|
; X86-NEXT:    movl %edx, %edi
 | 
						|
; X86-NEXT:    adcl $0, %edi
 | 
						|
; X86-NEXT:    addl %ebp, %ebx
 | 
						|
; X86-NEXT:    adcl %ecx, %esi
 | 
						|
; X86-NEXT:    setb %cl
 | 
						|
; X86-NEXT:    adcl %edx, %edi
 | 
						|
; X86-NEXT:    addb $255, %cl
 | 
						|
; X86-NEXT:    movl %edx, %ecx
 | 
						|
; X86-NEXT:    adcl $0, %ecx
 | 
						|
; X86-NEXT:    setb %ah
 | 
						|
; X86-NEXT:    addb $255, %al
 | 
						|
; X86-NEXT:    adcl %edx, %ecx
 | 
						|
; X86-NEXT:    movzbl %ah, %ebx
 | 
						|
; X86-NEXT:    adcl $0, %ebx
 | 
						|
; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; X86-NEXT:    mull %eax
 | 
						|
; X86-NEXT:    addl %eax, %edi
 | 
						|
; X86-NEXT:    adcl %edx, %ebx
 | 
						|
; X86-NEXT:    movl %esi, %eax
 | 
						|
; X86-NEXT:    movl %edi, %edx
 | 
						|
; X86-NEXT:    movl %ebx, %ecx
 | 
						|
; X86-NEXT:    popl %esi
 | 
						|
; X86-NEXT:    popl %edi
 | 
						|
; X86-NEXT:    popl %ebx
 | 
						|
; X86-NEXT:    popl %ebp
 | 
						|
; X86-NEXT:    retl
 | 
						|
;
 | 
						|
; X64-LABEL: square_high:
 | 
						|
; X64:       ## %bb.0: ## %entry
 | 
						|
; X64-NEXT:    movl %esi, %ecx
 | 
						|
; X64-NEXT:    movq %rcx, %rax
 | 
						|
; X64-NEXT:    mulq %rdi
 | 
						|
; X64-NEXT:    movq %rdx, %r8
 | 
						|
; X64-NEXT:    movq %rax, %rsi
 | 
						|
; X64-NEXT:    movq %rdi, %rax
 | 
						|
; X64-NEXT:    mulq %rdi
 | 
						|
; X64-NEXT:    addq %rsi, %rdx
 | 
						|
; X64-NEXT:    movq %r8, %rax
 | 
						|
; X64-NEXT:    adcq $0, %rax
 | 
						|
; X64-NEXT:    addq %rdx, %rsi
 | 
						|
; X64-NEXT:    adcq %r8, %rax
 | 
						|
; X64-NEXT:    imulq %rcx, %rcx
 | 
						|
; X64-NEXT:    addq %rax, %rcx
 | 
						|
; X64-NEXT:    shrdq $32, %rcx, %rsi
 | 
						|
; X64-NEXT:    shrq $32, %rcx
 | 
						|
; X64-NEXT:    movq %rsi, %rax
 | 
						|
; X64-NEXT:    movq %rcx, %rdx
 | 
						|
; X64-NEXT:    retq
 | 
						|
entry:
 | 
						|
  %conv = zext i96 %x to i192
 | 
						|
  %mul = mul nuw i192 %conv, %conv
 | 
						|
  %shr = lshr i192 %mul, 96
 | 
						|
  %conv2 = trunc i192 %shr to i96
 | 
						|
  ret i96 %conv2
 | 
						|
}
 |