167 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			LLVM
		
	
	
	
			
		
		
	
	
			167 lines
		
	
	
		
			4.6 KiB
		
	
	
	
		
			LLVM
		
	
	
	
| ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 | |
| ; RUN: llc < %s -mtriple=x86_64-- -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X64
 | |
| ; RUN: llc < %s -mtriple=i686-- -mattr=+sse2 -verify-machineinstrs | FileCheck %s --check-prefix=CHECK --check-prefix=X32
 | |
| 
 | |
| ; On x86, an atomic rmw operation that does not modify the value in memory
 | |
| ; (such as atomic add 0) can be replaced by an mfence followed by a mov.
 | |
| ; This is explained (with the motivation for such an optimization) in
 | |
| ; http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf
 | |
| 
 | |
| define i8 @add8(i8* %p) {
 | |
| ; X64-LABEL: add8:
 | |
| ; X64:       # %bb.0:
 | |
| ; X64-NEXT:    mfence
 | |
| ; X64-NEXT:    movb (%rdi), %al
 | |
| ; X64-NEXT:    retq
 | |
| ;
 | |
| ; X32-LABEL: add8:
 | |
| ; X32:       # %bb.0:
 | |
| ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | |
| ; X32-NEXT:    mfence
 | |
| ; X32-NEXT:    movb (%eax), %al
 | |
| ; X32-NEXT:    retl
 | |
|   %1 = atomicrmw add i8* %p, i8 0 monotonic
 | |
|   ret i8 %1
 | |
| }
 | |
| 
 | |
| define i16 @or16(i16* %p) {
 | |
| ; X64-LABEL: or16:
 | |
| ; X64:       # %bb.0:
 | |
| ; X64-NEXT:    mfence
 | |
| ; X64-NEXT:    movzwl (%rdi), %eax
 | |
| ; X64-NEXT:    retq
 | |
| ;
 | |
| ; X32-LABEL: or16:
 | |
| ; X32:       # %bb.0:
 | |
| ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | |
| ; X32-NEXT:    mfence
 | |
| ; X32-NEXT:    movzwl (%eax), %eax
 | |
| ; X32-NEXT:    retl
 | |
|   %1 = atomicrmw or i16* %p, i16 0 acquire
 | |
|   ret i16 %1
 | |
| }
 | |
| 
 | |
| define i32 @xor32(i32* %p) {
 | |
| ; X64-LABEL: xor32:
 | |
| ; X64:       # %bb.0:
 | |
| ; X64-NEXT:    mfence
 | |
| ; X64-NEXT:    movl (%rdi), %eax
 | |
| ; X64-NEXT:    retq
 | |
| ;
 | |
| ; X32-LABEL: xor32:
 | |
| ; X32:       # %bb.0:
 | |
| ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | |
| ; X32-NEXT:    mfence
 | |
| ; X32-NEXT:    movl (%eax), %eax
 | |
| ; X32-NEXT:    retl
 | |
|   %1 = atomicrmw xor i32* %p, i32 0 release
 | |
|   ret i32 %1
 | |
| }
 | |
| 
 | |
| define i64 @sub64(i64* %p) {
 | |
| ; X64-LABEL: sub64:
 | |
| ; X64:       # %bb.0:
 | |
| ; X64-NEXT:    mfence
 | |
| ; X64-NEXT:    movq (%rdi), %rax
 | |
| ; X64-NEXT:    retq
 | |
| ;
 | |
| ; X32-LABEL: sub64:
 | |
| ; X32:       # %bb.0:
 | |
| ; X32-NEXT:    pushl %ebx
 | |
| ; X32-NEXT:    .cfi_def_cfa_offset 8
 | |
| ; X32-NEXT:    pushl %esi
 | |
| ; X32-NEXT:    .cfi_def_cfa_offset 12
 | |
| ; X32-NEXT:    .cfi_offset %esi, -12
 | |
| ; X32-NEXT:    .cfi_offset %ebx, -8
 | |
| ; X32-NEXT:    movl {{[0-9]+}}(%esp), %esi
 | |
| ; X32-NEXT:    movl (%esi), %eax
 | |
| ; X32-NEXT:    movl 4(%esi), %edx
 | |
| ; X32-NEXT:    .p2align 4, 0x90
 | |
| ; X32-NEXT:  .LBB3_1: # %atomicrmw.start
 | |
| ; X32-NEXT:    # =>This Inner Loop Header: Depth=1
 | |
| ; X32-NEXT:    movl %edx, %ecx
 | |
| ; X32-NEXT:    movl %eax, %ebx
 | |
| ; X32-NEXT:    lock cmpxchg8b (%esi)
 | |
| ; X32-NEXT:    jne .LBB3_1
 | |
| ; X32-NEXT:  # %bb.2: # %atomicrmw.end
 | |
| ; X32-NEXT:    popl %esi
 | |
| ; X32-NEXT:    .cfi_def_cfa_offset 8
 | |
| ; X32-NEXT:    popl %ebx
 | |
| ; X32-NEXT:    .cfi_def_cfa_offset 4
 | |
| ; X32-NEXT:    retl
 | |
|   %1 = atomicrmw sub i64* %p, i64 0 seq_cst
 | |
|   ret i64 %1
 | |
| }
 | |
| 
 | |
| define i128 @or128(i128* %p) {
 | |
| ; X64-LABEL: or128:
 | |
| ; X64:       # %bb.0:
 | |
| ; X64-NEXT:    pushq %rax
 | |
| ; X64-NEXT:    .cfi_def_cfa_offset 16
 | |
| ; X64-NEXT:    xorl %esi, %esi
 | |
| ; X64-NEXT:    xorl %edx, %edx
 | |
| ; X64-NEXT:    callq __sync_fetch_and_or_16
 | |
| ; X64-NEXT:    popq %rcx
 | |
| ; X64-NEXT:    .cfi_def_cfa_offset 8
 | |
| ; X64-NEXT:    retq
 | |
| ;
 | |
| ; X32-LABEL: or128:
 | |
| ; X32:       # %bb.0:
 | |
| ; X32-NEXT:    pushl %ebp
 | |
| ; X32-NEXT:    .cfi_def_cfa_offset 8
 | |
| ; X32-NEXT:    .cfi_offset %ebp, -8
 | |
| ; X32-NEXT:    movl %esp, %ebp
 | |
| ; X32-NEXT:    .cfi_def_cfa_register %ebp
 | |
| ; X32-NEXT:    pushl %edi
 | |
| ; X32-NEXT:    pushl %esi
 | |
| ; X32-NEXT:    andl $-8, %esp
 | |
| ; X32-NEXT:    subl $16, %esp
 | |
| ; X32-NEXT:    .cfi_offset %esi, -16
 | |
| ; X32-NEXT:    .cfi_offset %edi, -12
 | |
| ; X32-NEXT:    movl 8(%ebp), %esi
 | |
| ; X32-NEXT:    movl %esp, %eax
 | |
| ; X32-NEXT:    pushl $0
 | |
| ; X32-NEXT:    pushl $0
 | |
| ; X32-NEXT:    pushl $0
 | |
| ; X32-NEXT:    pushl $0
 | |
| ; X32-NEXT:    pushl 12(%ebp)
 | |
| ; X32-NEXT:    pushl %eax
 | |
| ; X32-NEXT:    calll __sync_fetch_and_or_16
 | |
| ; X32-NEXT:    addl $20, %esp
 | |
| ; X32-NEXT:    movl (%esp), %eax
 | |
| ; X32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 | |
| ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edx
 | |
| ; X32-NEXT:    movl {{[0-9]+}}(%esp), %edi
 | |
| ; X32-NEXT:    movl %edi, 12(%esi)
 | |
| ; X32-NEXT:    movl %edx, 8(%esi)
 | |
| ; X32-NEXT:    movl %ecx, 4(%esi)
 | |
| ; X32-NEXT:    movl %eax, (%esi)
 | |
| ; X32-NEXT:    movl %esi, %eax
 | |
| ; X32-NEXT:    leal -8(%ebp), %esp
 | |
| ; X32-NEXT:    popl %esi
 | |
| ; X32-NEXT:    popl %edi
 | |
| ; X32-NEXT:    popl %ebp
 | |
| ; X32-NEXT:    .cfi_def_cfa %esp, 4
 | |
| ; X32-NEXT:    retl $4
 | |
|   %1 = atomicrmw or i128* %p, i128 0 monotonic
 | |
|   ret i128 %1
 | |
| }
 | |
| 
 | |
| ; For 'and', the idempotent value is (-1)
 | |
| define i32 @and32 (i32* %p) {
 | |
| ; X64-LABEL: and32:
 | |
| ; X64:       # %bb.0:
 | |
| ; X64-NEXT:    mfence
 | |
| ; X64-NEXT:    movl (%rdi), %eax
 | |
| ; X64-NEXT:    retq
 | |
| ;
 | |
| ; X32-LABEL: and32:
 | |
| ; X32:       # %bb.0:
 | |
| ; X32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | |
| ; X32-NEXT:    mfence
 | |
| ; X32-NEXT:    movl (%eax), %eax
 | |
| ; X32-NEXT:    retl
 | |
|   %1 = atomicrmw and i32* %p, i32 -1 acq_rel
 | |
|   ret i32 %1
 | |
| }
 |