2487 lines
		
	
	
		
			109 KiB
		
	
	
	
		
			LLVM
		
	
	
	
			
		
		
	
	
			2487 lines
		
	
	
		
			109 KiB
		
	
	
	
		
			LLVM
		
	
	
	
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 | 
						|
; RUN: llc -O3 -disable-peephole -mtriple=x86_64-unknown-unknown -mattr=+sse4.2,+aes,+crc32,+pclmul < %s | FileCheck %s
 | 
						|
 | 
						|
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 | 
						|
target triple = "x86_64-unknown-unknown"
 | 
						|
 | 
						|
; Stack reload folding tests.
 | 
						|
;
 | 
						|
; By including a nop call with sideeffects we can force a partial register spill of the
 | 
						|
; relevant registers and check that the reload is correctly folded into the instruction.
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_aesdec(<2 x i64> %a0, <2 x i64> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_aesdec:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    aesdec {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64> %a0, <2 x i64> %a1)
 | 
						|
  ret <2 x i64> %2
 | 
						|
}
 | 
						|
declare <2 x i64> @llvm.x86.aesni.aesdec(<2 x i64>, <2 x i64>) nounwind readnone
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_aesdeclast(<2 x i64> %a0, <2 x i64> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_aesdeclast:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    aesdeclast {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64> %a0, <2 x i64> %a1)
 | 
						|
  ret <2 x i64> %2
 | 
						|
}
 | 
						|
declare <2 x i64> @llvm.x86.aesni.aesdeclast(<2 x i64>, <2 x i64>) nounwind readnone
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_aesenc(<2 x i64> %a0, <2 x i64> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_aesenc:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    aesenc {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64> %a0, <2 x i64> %a1)
 | 
						|
  ret <2 x i64> %2
 | 
						|
}
 | 
						|
declare <2 x i64> @llvm.x86.aesni.aesenc(<2 x i64>, <2 x i64>) nounwind readnone
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_aesenclast(<2 x i64> %a0, <2 x i64> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_aesenclast:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    aesenclast {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64> %a0, <2 x i64> %a1)
 | 
						|
  ret <2 x i64> %2
 | 
						|
}
 | 
						|
declare <2 x i64> @llvm.x86.aesni.aesenclast(<2 x i64>, <2 x i64>) nounwind readnone
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_aesimc(<2 x i64> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_aesimc:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    aesimc {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64> %a0)
 | 
						|
  ret <2 x i64> %2
 | 
						|
}
 | 
						|
declare <2 x i64> @llvm.x86.aesni.aesimc(<2 x i64>) nounwind readnone
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_aeskeygenassist(<2 x i64> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_aeskeygenassist:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    aeskeygenassist $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64> %a0, i8 7)
 | 
						|
  ret <2 x i64> %2
 | 
						|
}
 | 
						|
declare <2 x i64> @llvm.x86.aesni.aeskeygenassist(<2 x i64>, i8) nounwind readnone
 | 
						|
 | 
						|
define i32 @stack_fold_crc32_32_8(i32 %a0, i8 %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_crc32_32_8:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    pushq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    pushq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    pushq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    pushq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    pushq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    pushq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; CHECK-NEXT:    .cfi_offset %r12, -48
 | 
						|
; CHECK-NEXT:    .cfi_offset %r13, -40
 | 
						|
; CHECK-NEXT:    .cfi_offset %r14, -32
 | 
						|
; CHECK-NEXT:    .cfi_offset %r15, -24
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 | 
						|
; CHECK-NEXT:    movl %edi, %eax
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    crc32b {{[-0-9]+}}(%r{{[sb]}}p), %eax # 1-byte Folded Reload
 | 
						|
; CHECK-NEXT:    popq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    popq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    popq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    popq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    popq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    popq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
 | 
						|
  %2 = call i32 @llvm.x86.sse42.crc32.32.8(i32 %a0, i8 %a1)
 | 
						|
  ret i32 %2
 | 
						|
}
 | 
						|
declare i32 @llvm.x86.sse42.crc32.32.8(i32, i8) nounwind
 | 
						|
 | 
						|
define i32 @stack_fold_crc32_32_16(i32 %a0, i16 %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_crc32_32_16:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    pushq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    pushq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    pushq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    pushq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    pushq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    pushq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; CHECK-NEXT:    .cfi_offset %r12, -48
 | 
						|
; CHECK-NEXT:    .cfi_offset %r13, -40
 | 
						|
; CHECK-NEXT:    .cfi_offset %r14, -32
 | 
						|
; CHECK-NEXT:    .cfi_offset %r15, -24
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 | 
						|
; CHECK-NEXT:    movl %edi, %eax
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    crc32w {{[-0-9]+}}(%r{{[sb]}}p), %eax # 2-byte Folded Reload
 | 
						|
; CHECK-NEXT:    popq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    popq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    popq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    popq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    popq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    popq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
 | 
						|
  %2 = call i32 @llvm.x86.sse42.crc32.32.16(i32 %a0, i16 %a1)
 | 
						|
  ret i32 %2
 | 
						|
}
 | 
						|
declare i32 @llvm.x86.sse42.crc32.32.16(i32, i16) nounwind
 | 
						|
 | 
						|
define i32 @stack_fold_crc32_32_32(i32 %a0, i32 %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_crc32_32_32:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    pushq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    pushq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    pushq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    pushq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    pushq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    pushq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; CHECK-NEXT:    .cfi_offset %r12, -48
 | 
						|
; CHECK-NEXT:    .cfi_offset %r13, -40
 | 
						|
; CHECK-NEXT:    .cfi_offset %r14, -32
 | 
						|
; CHECK-NEXT:    .cfi_offset %r15, -24
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; CHECK-NEXT:    movl %esi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 | 
						|
; CHECK-NEXT:    movl %edi, %eax
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    crc32l {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Folded Reload
 | 
						|
; CHECK-NEXT:    popq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    popq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    popq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    popq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    popq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    popq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
 | 
						|
  %2 = call i32 @llvm.x86.sse42.crc32.32.32(i32 %a0, i32 %a1)
 | 
						|
  ret i32 %2
 | 
						|
}
 | 
						|
declare i32 @llvm.x86.sse42.crc32.32.32(i32, i32) nounwind
 | 
						|
 | 
						|
define i64 @stack_fold_crc32_64_64(i64 %a0, i64 %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_crc32_64_64:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    pushq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    pushq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    pushq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    pushq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    pushq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    pushq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; CHECK-NEXT:    .cfi_offset %r12, -48
 | 
						|
; CHECK-NEXT:    .cfi_offset %r13, -40
 | 
						|
; CHECK-NEXT:    .cfi_offset %r14, -32
 | 
						|
; CHECK-NEXT:    .cfi_offset %r15, -24
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; CHECK-NEXT:    movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 | 
						|
; CHECK-NEXT:    movq %rdi, %rax
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    crc32q {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Folded Reload
 | 
						|
; CHECK-NEXT:    popq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    popq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    popq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    popq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    popq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    popq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
 | 
						|
  %2 = call i64 @llvm.x86.sse42.crc32.64.64(i64 %a0, i64 %a1)
 | 
						|
  ret i64 %2
 | 
						|
}
 | 
						|
declare i64 @llvm.x86.sse42.crc32.64.64(i64, i64) nounwind
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_movd_load(i32 %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_movd_load:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    pushq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    pushq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    pushq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    pushq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    pushq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    pushq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; CHECK-NEXT:    .cfi_offset %r12, -48
 | 
						|
; CHECK-NEXT:    .cfi_offset %r13, -40
 | 
						|
; CHECK-NEXT:    .cfi_offset %r14, -32
 | 
						|
; CHECK-NEXT:    .cfi_offset %r15, -24
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    movd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero
 | 
						|
; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
 | 
						|
; CHECK-NEXT:    psubd %xmm1, %xmm0
 | 
						|
; CHECK-NEXT:    popq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    popq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    popq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    popq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    popq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    popq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
 | 
						|
  %2 = insertelement <4 x i32> zeroinitializer, i32 %a0, i32 0
 | 
						|
  ; add forces execution domain
 | 
						|
  %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
 | 
						|
  ret <4 x i32> %3
 | 
						|
}
 | 
						|
 | 
						|
define i32 @stack_fold_movd_store(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_movd_store:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    pushq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    pushq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    pushq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    pushq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    pushq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    pushq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; CHECK-NEXT:    .cfi_offset %r12, -48
 | 
						|
; CHECK-NEXT:    .cfi_offset %r13, -40
 | 
						|
; CHECK-NEXT:    .cfi_offset %r14, -32
 | 
						|
; CHECK-NEXT:    .cfi_offset %r15, -24
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; CHECK-NEXT:    paddd %xmm1, %xmm0
 | 
						|
; CHECK-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
 | 
						|
; CHECK-NEXT:    popq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    popq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    popq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    popq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    popq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    popq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  ; add forces execution domain
 | 
						|
  %1 = add <4 x i32> %a0, %a1
 | 
						|
  %2 = extractelement <4 x i32> %1, i32 0
 | 
						|
  %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
 | 
						|
  ret i32 %2
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_movq_load(<2 x i64> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_movq_load:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = mem[0],zero
 | 
						|
; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
 | 
						|
; CHECK-NEXT:    psubq %xmm1, %xmm0
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <2 x i64> %a0, <2 x i64> zeroinitializer, <2 x i32> <i32 0, i32 2>
 | 
						|
  ; add forces execution domain
 | 
						|
  %3 = add <2 x i64> %2, <i64 1, i64 1>
 | 
						|
  ret <2 x i64> %3
 | 
						|
}
 | 
						|
 | 
						|
define i64 @stack_fold_movq_store(<2 x i64> %a0, <2 x i64> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_movq_store:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    pushq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    pushq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    pushq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    pushq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    pushq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    pushq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; CHECK-NEXT:    .cfi_offset %r12, -48
 | 
						|
; CHECK-NEXT:    .cfi_offset %r13, -40
 | 
						|
; CHECK-NEXT:    .cfi_offset %r14, -32
 | 
						|
; CHECK-NEXT:    .cfi_offset %r15, -24
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; CHECK-NEXT:    paddq %xmm1, %xmm0
 | 
						|
; CHECK-NEXT:    movq %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
 | 
						|
; CHECK-NEXT:    popq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    popq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    popq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    popq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    popq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    popq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  ; add forces execution domain
 | 
						|
  %1 = add <2 x i64> %a0, %a1
 | 
						|
  %2 = extractelement <2 x i64> %1, i32 0
 | 
						|
  %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
 | 
						|
  ret i64 %2
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_mpsadbw(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_mpsadbw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    mpsadbw $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8> %a0, <16 x i8> %a1, i8 7)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.x86.sse41.mpsadbw(<16 x i8>, <16 x i8>, i8) nounwind readnone
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_pabsb(<16 x i8> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pabsb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pabsb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp sgt <16 x i8> %a0, zeroinitializer
 | 
						|
  %3 = sub <16 x i8> zeroinitializer, %a0
 | 
						|
  %4 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %3
 | 
						|
  ret <16 x i8> %4
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_pabsd(<4 x i32> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pabsd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pabsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp sgt <4 x i32> %a0, zeroinitializer
 | 
						|
  %3 = sub <4 x i32> zeroinitializer, %a0
 | 
						|
  %4 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %3
 | 
						|
  ret <4 x i32> %4
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pabsw(<8 x i16> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pabsw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pabsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp sgt <8 x i16> %a0, zeroinitializer
 | 
						|
  %3 = sub <8 x i16> zeroinitializer, %a0
 | 
						|
  %4 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %3
 | 
						|
  ret <8 x i16> %4
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_packssdw(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_packssdw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    packssdw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32> %a0, <4 x i32> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.x86.sse2.packssdw.128(<4 x i32>, <4 x i32>) nounwind readnone
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_packsswb(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_packsswb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    packsswb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
declare <16 x i8> @llvm.x86.sse2.packsswb.128(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_packusdw(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_packusdw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    packusdw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32> %a0, <4 x i32> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.x86.sse41.packusdw(<4 x i32>, <4 x i32>) nounwind readnone
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_packuswb(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_packuswb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    packuswb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
declare <16 x i8> @llvm.x86.sse2.packuswb.128(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_paddb(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_paddb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    paddb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = add <16 x i8> %a0, %a1
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_paddd(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_paddd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    paddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = add <4 x i32> %a0, %a1
 | 
						|
  ret <4 x i32> %2
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_paddq(<2 x i64> %a0, <2 x i64> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_paddq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    paddq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = add <2 x i64> %a0, %a1
 | 
						|
  ret <2 x i64> %2
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_paddsb(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_paddsb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    paddsb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1)
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
declare <16 x i8> @llvm.sadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_paddsw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_paddsw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    paddsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.sadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_paddusb(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_paddusb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    paddusb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1)
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
declare <16 x i8> @llvm.uadd.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_paddusw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_paddusw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    paddusw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.uadd.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_paddw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_paddw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    paddw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = add <8 x i16> %a0, %a1
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_palignr(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_palignr:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    palignr $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = mem[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0]
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <16 x i8> %a1, <16 x i8> %a0, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_pand(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pand:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pand {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
 | 
						|
; CHECK-NEXT:    psubb %xmm1, %xmm0
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = and <16 x i8> %a0, %a1
 | 
						|
  ; add forces execution domain
 | 
						|
  %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
 | 
						|
  ret <16 x i8> %3
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_pandn(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pandn:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pandn {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
 | 
						|
; CHECK-NEXT:    psubb %xmm1, %xmm0
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = xor <16 x i8> %a0, <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
 | 
						|
  %3 = and <16 x i8> %2, %a1
 | 
						|
  ; add forces execution domain
 | 
						|
  %4 = add <16 x i8> %3, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
 | 
						|
  ret <16 x i8> %4
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_pavgb(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pavgb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pavgb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = zext <16 x i8> %a0 to <16 x i16>
 | 
						|
  %3 = zext <16 x i8> %a1 to <16 x i16>
 | 
						|
  %4 = add <16 x i16> %2, %3
 | 
						|
  %5 = add <16 x i16> %4, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
 | 
						|
  %6 = lshr <16 x i16> %5, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1>
 | 
						|
  %7 = trunc <16 x i16> %6 to <16 x i8>
 | 
						|
  ret <16 x i8> %7
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pavgw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pavgw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pavgw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = zext <8 x i16> %a0 to <8 x i32>
 | 
						|
  %3 = zext <8 x i16> %a1 to <8 x i32>
 | 
						|
  %4 = add <8 x i32> %2, %3
 | 
						|
  %5 = add <8 x i32> %4, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 | 
						|
  %6 = lshr <8 x i32> %5, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>
 | 
						|
  %7 = trunc <8 x i32> %6 to <8 x i16>
 | 
						|
  ret <8 x i16> %7
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_pblendvb(<16 x i8> %a0, <16 x i8> %a1, <16 x i8> %c) {
 | 
						|
; CHECK-LABEL: stack_fold_pblendvb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm2, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    movdqa %xmm1, %xmm2
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pblendvb %xmm0, {{[-0-9]+}}(%r{{[sb]}}p), %xmm2 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    movdqa %xmm2, %xmm0
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %a1, <16 x i8> %c, <16 x i8> %a0)
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pblendw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pblendw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pblendw $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = mem[0,1,2],xmm0[3,4,5,6,7]
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 8, i32 9, i32 10, i32 3, i32 4, i32 5, i32 6, i32 7>
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_pclmulqdq(<2 x i64> %a0, <2 x i64> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pclmulqdq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pclmulqdq $0, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <2 x i64> @llvm.x86.pclmulqdq(<2 x i64> %a0, <2 x i64> %a1, i8 0)
 | 
						|
  ret <2 x i64> %2
 | 
						|
}
 | 
						|
declare <2 x i64> @llvm.x86.pclmulqdq(<2 x i64>, <2 x i64>, i8) nounwind readnone
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_pcmpeqb(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pcmpeqb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pcmpeqb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp eq <16 x i8> %a0, %a1
 | 
						|
  %3 = sext <16 x i1> %2 to <16 x i8>
 | 
						|
  ret <16 x i8> %3
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_pcmpeqd(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pcmpeqd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pcmpeqd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp eq <4 x i32> %a0, %a1
 | 
						|
  %3 = sext <4 x i1> %2 to <4 x i32>
 | 
						|
  ret <4 x i32> %3
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_pcmpeqq(<2 x i64> %a0, <2 x i64> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pcmpeqq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pcmpeqq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp eq <2 x i64> %a0, %a1
 | 
						|
  %3 = sext <2 x i1> %2 to <2 x i64>
 | 
						|
  ret <2 x i64> %3
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pcmpeqw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pcmpeqw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pcmpeqw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp eq <8 x i16> %a0, %a1
 | 
						|
  %3 = sext <8 x i1> %2 to <8 x i16>
 | 
						|
  ret <8 x i16> %3
 | 
						|
}
 | 
						|
 | 
						|
define i32 @stack_fold_pcmpestri(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pcmpestri:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    movl $7, %eax
 | 
						|
; CHECK-NEXT:    movl $7, %edx
 | 
						|
; CHECK-NEXT:    pcmpestri $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    movl %ecx, %eax
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"()
 | 
						|
  %2 = call i32 @llvm.x86.sse42.pcmpestri128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
 | 
						|
  ret i32 %2
 | 
						|
}
 | 
						|
declare i32 @llvm.x86.sse42.pcmpestri128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_pcmpestrm(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pcmpestrm:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    movl $7, %eax
 | 
						|
; CHECK-NEXT:    movl $7, %edx
 | 
						|
; CHECK-NEXT:    pcmpestrm $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{rax},~{flags}"()
 | 
						|
  %2 = call <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8> %a0, i32 7, <16 x i8> %a1, i32 7, i8 7)
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
declare <16 x i8> @llvm.x86.sse42.pcmpestrm128(<16 x i8>, i32, <16 x i8>, i32, i8) nounwind readnone
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_pcmpgtb(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pcmpgtb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pcmpgtb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp sgt <16 x i8> %a0, %a1
 | 
						|
  %3 = sext <16 x i1> %2 to <16 x i8>
 | 
						|
  ret <16 x i8> %3
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_pcmpgtd(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pcmpgtd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pcmpgtd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp sgt <4 x i32> %a0, %a1
 | 
						|
  %3 = sext <4 x i1> %2 to <4 x i32>
 | 
						|
  ret <4 x i32> %3
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_pcmpgtq(<2 x i64> %a0, <2 x i64> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pcmpgtq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pcmpgtq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp sgt <2 x i64> %a0, %a1
 | 
						|
  %3 = sext <2 x i1> %2 to <2 x i64>
 | 
						|
  ret <2 x i64> %3
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pcmpgtw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pcmpgtw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pcmpgtw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp sgt <8 x i16> %a0, %a1
 | 
						|
  %3 = sext <8 x i1> %2 to <8 x i16>
 | 
						|
  ret <8 x i16> %3
 | 
						|
}
 | 
						|
 | 
						|
define i32 @stack_fold_pcmpistri(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pcmpistri:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pcmpistri $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    movl %ecx, %eax
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call i32 @llvm.x86.sse42.pcmpistri128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
 | 
						|
  ret i32 %2
 | 
						|
}
 | 
						|
declare i32 @llvm.x86.sse42.pcmpistri128(<16 x i8>, <16 x i8>, i8) nounwind readnone
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_pcmpistrm(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pcmpistrm:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pcmpistrm $7, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8> %a0, <16 x i8> %a1, i8 7)
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
declare <16 x i8> @llvm.x86.sse42.pcmpistrm128(<16 x i8>, <16 x i8>, i8) nounwind readnone
 | 
						|
 | 
						|
; TODO stack_fold_pextrb
 | 
						|
 | 
						|
; We can't naively fold pextrw as it only writes to a 16-bit memory location
 | 
						|
; even though it can store to a 32-bit register.
 | 
						|
define i16 @stack_fold_pextrw(<8 x i16> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pextrw:
 | 
						|
; CHECK:       # %bb.0: # %entry
 | 
						|
; CHECK-NEXT:    pushq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    pushq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    pushq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    pushq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    pushq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    pushq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; CHECK-NEXT:    .cfi_offset %r12, -48
 | 
						|
; CHECK-NEXT:    .cfi_offset %r13, -40
 | 
						|
; CHECK-NEXT:    .cfi_offset %r14, -32
 | 
						|
; CHECK-NEXT:    .cfi_offset %r15, -24
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; CHECK-NEXT:    pextrw $1, %xmm0, %eax
 | 
						|
; CHECK-NEXT:    addl $2, %eax
 | 
						|
; CHECK-NEXT:    movl %eax, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
 | 
						|
; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
 | 
						|
; CHECK-NEXT:    popq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    popq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    popq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    popq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    popq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    popq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
entry:
 | 
						|
; add forces execution domain
 | 
						|
  %add = add <8 x i16> %a0, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
 | 
						|
  %extract = extractelement <8 x i16> %add, i32 1
 | 
						|
  %asm = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
 | 
						|
  ret i16 %extract
 | 
						|
}
 | 
						|
 | 
						|
define i32 @stack_fold_pextrd(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pextrd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    pushq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    pushq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    pushq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    pushq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    pushq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    pushq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; CHECK-NEXT:    .cfi_offset %r12, -48
 | 
						|
; CHECK-NEXT:    .cfi_offset %r13, -40
 | 
						|
; CHECK-NEXT:    .cfi_offset %r14, -32
 | 
						|
; CHECK-NEXT:    .cfi_offset %r15, -24
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; CHECK-NEXT:    paddd %xmm1, %xmm0
 | 
						|
; CHECK-NEXT:    pextrd $1, %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Folded Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    movl {{[-0-9]+}}(%r{{[sb]}}p), %eax # 4-byte Reload
 | 
						|
; CHECK-NEXT:    popq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    popq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    popq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    popq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    popq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    popq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  ; add forces execution domain
 | 
						|
  %1 = add <4 x i32> %a0, %a1
 | 
						|
  %2 = extractelement <4 x i32> %1, i32 1
 | 
						|
  %3 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
 | 
						|
  ret i32 %2
 | 
						|
}
 | 
						|
 | 
						|
define i64 @stack_fold_pextrq(<2 x i64> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pextrq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    pushq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    pushq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    pushq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    pushq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    pushq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    pushq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; CHECK-NEXT:    .cfi_offset %r12, -48
 | 
						|
; CHECK-NEXT:    .cfi_offset %r13, -40
 | 
						|
; CHECK-NEXT:    .cfi_offset %r14, -32
 | 
						|
; CHECK-NEXT:    .cfi_offset %r15, -24
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; CHECK-NEXT:    pextrq $1, %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Folded Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    movq {{[-0-9]+}}(%r{{[sb]}}p), %rax # 8-byte Reload
 | 
						|
; CHECK-NEXT:    popq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    popq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    popq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    popq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    popq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    popq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = extractelement <2 x i64> %a0, i32 1
 | 
						|
  %2 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
 | 
						|
  ret i64 %1
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_phaddd(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_phaddd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    phaddd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32> %a0, <4 x i32> %a1)
 | 
						|
  ret <4 x i32> %2
 | 
						|
}
 | 
						|
declare <4 x i32> @llvm.x86.ssse3.phadd.d.128(<4 x i32>, <4 x i32>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_phaddsw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_phaddsw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    phaddsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.x86.ssse3.phadd.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_phaddw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_phaddw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    phaddw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.x86.ssse3.phadd.w.128(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_phminposuw(<8 x i16> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_phminposuw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    phminposuw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16> %a0)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.x86.sse41.phminposuw(<8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_phsubd(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_phsubd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    phsubd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32> %a0, <4 x i32> %a1)
 | 
						|
  ret <4 x i32> %2
 | 
						|
}
 | 
						|
declare <4 x i32> @llvm.x86.ssse3.phsub.d.128(<4 x i32>, <4 x i32>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_phsubsw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_phsubsw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    phsubsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.x86.ssse3.phsub.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_phsubw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_phsubw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    phsubw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.x86.ssse3.phsub.w.128(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_pinsrb(<16 x i8> %a0, i8 %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pinsrb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    pushq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    pushq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    pushq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    pushq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    pushq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    pushq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; CHECK-NEXT:    .cfi_offset %r12, -48
 | 
						|
; CHECK-NEXT:    .cfi_offset %r13, -40
 | 
						|
; CHECK-NEXT:    .cfi_offset %r14, -32
 | 
						|
; CHECK-NEXT:    .cfi_offset %r15, -24
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pinsrb $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
 | 
						|
; CHECK-NEXT:    popq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    popq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    popq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    popq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    popq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    popq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
 | 
						|
  %2 = insertelement <16 x i8> %a0, i8 %a1, i32 1
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_pinsrd(<4 x i32> %a0, i32 %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pinsrd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    pushq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    pushq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    pushq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    pushq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    pushq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    pushq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; CHECK-NEXT:    .cfi_offset %r12, -48
 | 
						|
; CHECK-NEXT:    .cfi_offset %r13, -40
 | 
						|
; CHECK-NEXT:    .cfi_offset %r14, -32
 | 
						|
; CHECK-NEXT:    .cfi_offset %r15, -24
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pinsrd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
 | 
						|
; CHECK-NEXT:    popq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    popq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    popq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    popq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    popq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    popq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
 | 
						|
  %2 = insertelement <4 x i32> %a0, i32 %a1, i32 1
 | 
						|
  ret <4 x i32> %2
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_pinsrq(<2 x i64> %a0, i64 %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pinsrq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    pushq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    pushq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    pushq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    pushq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    pushq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    pushq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; CHECK-NEXT:    .cfi_offset %r12, -48
 | 
						|
; CHECK-NEXT:    .cfi_offset %r13, -40
 | 
						|
; CHECK-NEXT:    .cfi_offset %r14, -32
 | 
						|
; CHECK-NEXT:    .cfi_offset %r15, -24
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; CHECK-NEXT:    movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pinsrq $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 8-byte Folded Reload
 | 
						|
; CHECK-NEXT:    popq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    popq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    popq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    popq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    popq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    popq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
 | 
						|
  %2 = insertelement <2 x i64> %a0, i64 %a1, i32 1
 | 
						|
  ret <2 x i64> %2
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pinsrw(<8 x i16> %a0, i16 %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pinsrw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    pushq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    pushq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    pushq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    pushq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    pushq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    pushq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 56
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbx, -56
 | 
						|
; CHECK-NEXT:    .cfi_offset %r12, -48
 | 
						|
; CHECK-NEXT:    .cfi_offset %r13, -40
 | 
						|
; CHECK-NEXT:    .cfi_offset %r14, -32
 | 
						|
; CHECK-NEXT:    .cfi_offset %r15, -24
 | 
						|
; CHECK-NEXT:    .cfi_offset %rbp, -16
 | 
						|
; CHECK-NEXT:    movl %edi, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pinsrw $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
 | 
						|
; CHECK-NEXT:    popq %rbx
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 48
 | 
						|
; CHECK-NEXT:    popq %r12
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 40
 | 
						|
; CHECK-NEXT:    popq %r13
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 32
 | 
						|
; CHECK-NEXT:    popq %r14
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 24
 | 
						|
; CHECK-NEXT:    popq %r15
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 16
 | 
						|
; CHECK-NEXT:    popq %rbp
 | 
						|
; CHECK-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{rax},~{rbx},~{rcx},~{rdx},~{rsi},~{rdi},~{rbp},~{r8},~{r9},~{r10},~{r11},~{r12},~{r13},~{r14},~{r15}"()
 | 
						|
  %2 = insertelement <8 x i16> %a0, i16 %a1, i32 1
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pmaddubsw(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pmaddubsw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmaddubsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8> %a0, <16 x i8> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.x86.ssse3.pmadd.ub.sw.128(<16 x i8>, <16 x i8>) nounwind readnone
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_pmaddwd(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pmaddwd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmaddwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <4 x i32> %2
 | 
						|
}
 | 
						|
declare <4 x i32> @llvm.x86.sse2.pmadd.wd(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_pmaxsb(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pmaxsb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmaxsb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp sgt <16 x i8> %a0, %a1
 | 
						|
  %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
 | 
						|
  ret <16 x i8> %3
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_pmaxsd(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pmaxsd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmaxsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp sgt <4 x i32> %a0, %a1
 | 
						|
  %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
 | 
						|
  ret <4 x i32> %3
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pmaxsw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pmaxsw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmaxsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp sgt <8 x i16> %a0, %a1
 | 
						|
  %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
 | 
						|
  ret <8 x i16> %3
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_pmaxub(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pmaxub:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmaxub {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp ugt <16 x i8> %a0, %a1
 | 
						|
  %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
 | 
						|
  ret <16 x i8> %3
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_pmaxud(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pmaxud:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmaxud {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp ugt <4 x i32> %a0, %a1
 | 
						|
  %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
 | 
						|
  ret <4 x i32> %3
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pmaxuw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pmaxuw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmaxuw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp ugt <8 x i16> %a0, %a1
 | 
						|
  %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
 | 
						|
  ret <8 x i16> %3
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_pminsb(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pminsb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pminsb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp slt <16 x i8> %a0, %a1
 | 
						|
  %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
 | 
						|
  ret <16 x i8> %3
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_pminsd(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pminsd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pminsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp slt <4 x i32> %a0, %a1
 | 
						|
  %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
 | 
						|
  ret <4 x i32> %3
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pminsw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pminsw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pminsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp slt <8 x i16> %a0, %a1
 | 
						|
  %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
 | 
						|
  ret <8 x i16> %3
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_pminub(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pminub:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pminub {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp ult <16 x i8> %a0, %a1
 | 
						|
  %3 = select <16 x i1> %2, <16 x i8> %a0, <16 x i8> %a1
 | 
						|
  ret <16 x i8> %3
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_pminud(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pminud:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pminud {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp ult <4 x i32> %a0, %a1
 | 
						|
  %3 = select <4 x i1> %2, <4 x i32> %a0, <4 x i32> %a1
 | 
						|
  ret <4 x i32> %3
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pminuw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pminuw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pminuw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = icmp ult <8 x i16> %a0, %a1
 | 
						|
  %3 = select <8 x i1> %2, <8 x i16> %a0, <8 x i16> %a1
 | 
						|
  ret <8 x i16> %3
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_pmovsxbd(<16 x i8> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pmovsxbd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmovsxbd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 | 
						|
  %3 = sext <4 x i8> %2 to <4 x i32>
 | 
						|
  ret <4 x i32> %3
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_pmovsxbq(<16 x i8> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pmovsxbq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmovsxbq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
 | 
						|
  %3 = sext <2 x i8> %2 to <2 x i64>
 | 
						|
  ret <2 x i64> %3
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pmovsxbw(<16 x i8> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pmovsxbw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmovsxbw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <16 x i8> %a0, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
 | 
						|
  %3 = sext <8 x i8> %2 to <8 x i16>
 | 
						|
  ret <8 x i16> %3
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_pmovsxdq(<4 x i32> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pmovsxdq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmovsxdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
 | 
						|
  %3 = sext <2 x i32> %2 to <2 x i64>
 | 
						|
  ret <2 x i64> %3
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_pmovsxwd(<8 x i16> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pmovsxwd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmovsxwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
 | 
						|
  %3 = sext <4 x i16> %2 to <4 x i32>
 | 
						|
  ret <4 x i32> %3
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_pmovsxwq(<8 x i16> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pmovsxwq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmovsxwq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
 | 
						|
  %3 = sext <2 x i16> %2 to <2 x i64>
 | 
						|
  ret <2 x i64> %3
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_pmovzxbd(<16 x i8> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pmovzxbd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmovzxbd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 1, i32 19, i32 20, i32 21, i32 2, i32 22, i32 23, i32 24, i32 3, i32 25, i32 26, i32 27>
 | 
						|
  %3 = bitcast <16 x i8> %2 to <4 x i32>
 | 
						|
  ret <4 x i32> %3
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_pmovzxbq(<16 x i8> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pmovzxbq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmovzxbq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 1, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28>
 | 
						|
  %3 = bitcast <16 x i8> %2 to <2 x i64>
 | 
						|
  ret <2 x i64> %3
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pmovzxbw(<16 x i8> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pmovzxbw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmovzxbw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <16 x i8> %a0, <16 x i8> zeroinitializer, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 | 
						|
  %3 = bitcast <16 x i8> %2 to <8 x i16>
 | 
						|
  ret <8 x i16> %3
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_pmovzxdq(<4 x i32> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pmovzxdq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmovzxdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = mem[0],zero,mem[1],zero
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <4 x i32> %a0, <4 x i32> zeroinitializer, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 | 
						|
  %3 = bitcast <4 x i32> %2 to <2 x i64>
 | 
						|
  ret <2 x i64> %3
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_pmovzxwd(<8 x i16> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pmovzxwd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmovzxwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 | 
						|
  %3 = bitcast <8 x i16> %2 to <4 x i32>
 | 
						|
  ret <4 x i32> %3
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_pmovzxwq(<8 x i16> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pmovzxwq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmovzxwq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <8 x i16> %a0, <8 x i16> zeroinitializer, <8 x i32> <i32 0, i32 8, i32 9, i32 10, i32 1, i32 11, i32 12, i32 13>
 | 
						|
  %3 = bitcast <8 x i16> %2 to <2 x i64>
 | 
						|
  ret <2 x i64> %3
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_pmuldq(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pmuldq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmuldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = bitcast <4 x i32> %a0 to <2 x i64>
 | 
						|
  %3 = bitcast <4 x i32> %a1 to <2 x i64>
 | 
						|
  %4 = shl <2 x i64> %2, <i64 32, i64 32>
 | 
						|
  %5 = ashr <2 x i64> %4, <i64 32, i64 32>
 | 
						|
  %6 = shl <2 x i64> %3, <i64 32, i64 32>
 | 
						|
  %7 = ashr <2 x i64> %6, <i64 32, i64 32>
 | 
						|
  %8 = mul <2 x i64> %5, %7
 | 
						|
  ret <2 x i64> %8
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pmulhrsw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pmulhrsw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmulhrsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.x86.ssse3.pmul.hr.sw.128(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pmulhuw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pmulhuw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmulhuw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.x86.sse2.pmulhu.w(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pmulhw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pmulhw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmulhw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.x86.sse2.pmulh.w(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_pmulld(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pmulld:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmulld {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = mul <4 x i32> %a0, %a1
 | 
						|
  ret <4 x i32> %2
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pmullw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pmullw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmullw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = mul <8 x i16> %a0, %a1
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_pmuludq(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pmuludq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pmuludq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = bitcast <4 x i32> %a0 to <2 x i64>
 | 
						|
  %3 = bitcast <4 x i32> %a1 to <2 x i64>
 | 
						|
  %4 = and <2 x i64> %2, <i64 4294967295, i64 4294967295>
 | 
						|
  %5 = and <2 x i64> %3, <i64 4294967295, i64 4294967295>
 | 
						|
  %6 = mul <2 x i64> %4, %5
 | 
						|
  ret <2 x i64> %6
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_por(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_por:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    por {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
 | 
						|
; CHECK-NEXT:    psubb %xmm1, %xmm0
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = or <16 x i8> %a0, %a1
 | 
						|
  ; add forces execution domain
 | 
						|
  %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
 | 
						|
  ret <16 x i8> %3
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_psadbw(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psadbw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psadbw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8> %a0, <16 x i8> %a1)
 | 
						|
  ret <2 x i64> %2
 | 
						|
}
 | 
						|
declare <2 x i64> @llvm.x86.sse2.psad.bw(<16 x i8>, <16 x i8>) nounwind readnone
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_pshufb(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pshufb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pshufb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %a0, <16 x i8> %a1)
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>) nounwind readnone
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_pshufd(<4 x i32> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pshufd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pshufd $27, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = mem[3,2,1,0]
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <4 x i32> %a0, <4 x i32> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
 | 
						|
  ret <4 x i32> %2
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pshufhw(<8 x i16> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pshufhw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pshufhw $11, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = mem[0,1,2,3,7,6,4,4]
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 4, i32 4>
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_pshuflw(<8 x i16> %a0) {
 | 
						|
; CHECK-LABEL: stack_fold_pshuflw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pshuflw $27, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = mem[3,2,1,0,4,5,6,7]
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm1},~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <8 x i16> %a0, <8 x i16> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_psignb(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psignb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psignb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8> %a0, <16 x i8> %a1)
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
declare <16 x i8> @llvm.x86.ssse3.psign.b.128(<16 x i8>, <16 x i8>) nounwind readnone
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_psignd(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psignd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psignd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32> %a0, <4 x i32> %a1)
 | 
						|
  ret <4 x i32> %2
 | 
						|
}
 | 
						|
declare <4 x i32> @llvm.x86.ssse3.psign.d.128(<4 x i32>, <4 x i32>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_psignw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psignw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psignw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.x86.ssse3.psign.w.128(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_pslld(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pslld:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pslld {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %a0, <4 x i32> %a1)
 | 
						|
  ret <4 x i32> %2
 | 
						|
}
 | 
						|
declare <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32>, <4 x i32>) nounwind readnone
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_psllq(<2 x i64> %a0, <2 x i64> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psllq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psllq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %a0, <2 x i64> %a1)
 | 
						|
  ret <2 x i64> %2
 | 
						|
}
 | 
						|
declare <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64>, <2 x i64>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_psllw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psllw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psllw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_psrad(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psrad:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psrad {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %a0, <4 x i32> %a1)
 | 
						|
  ret <4 x i32> %2
 | 
						|
}
 | 
						|
declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_psraw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psraw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psraw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_psrld(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psrld:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psrld {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %a0, <4 x i32> %a1)
 | 
						|
  ret <4 x i32> %2
 | 
						|
}
 | 
						|
declare <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32>, <4 x i32>) nounwind readnone
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_psrlq(<2 x i64> %a0, <2 x i64> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psrlq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psrlq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %a0, <2 x i64> %a1)
 | 
						|
  ret <2 x i64> %2
 | 
						|
}
 | 
						|
declare <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64>, <2 x i64>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_psrlw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psrlw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psrlw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_psubb(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psubb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psubb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = sub <16 x i8> %a0, %a1
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_psubd(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psubd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psubd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = sub <4 x i32> %a0, %a1
 | 
						|
  ret <4 x i32> %2
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_psubq(<2 x i64> %a0, <2 x i64> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psubq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psubq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = sub <2 x i64> %a0, %a1
 | 
						|
  ret <2 x i64> %2
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_psubsb(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psubsb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psubsb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1)
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
declare <16 x i8> @llvm.ssub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_psubsw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psubsw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psubsw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.ssub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_psubusb(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psubusb:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psubusb {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <16 x i8> @llvm.usub.sat.v16i8(<16 x i8> %a0, <16 x i8> %a1)
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
declare <16 x i8> @llvm.usub.sat.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_psubusw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psubusw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psubusw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call <8 x i16> @llvm.usub.sat.v8i16(<8 x i16> %a0, <8 x i16> %a1)
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
declare <8 x i16> @llvm.usub.sat.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_psubw(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_psubw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    psubw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = sub <8 x i16> %a0, %a1
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
 | 
						|
define i32 @stack_fold_ptest(<2 x i64> %a0, <2 x i64> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_ptest:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    xorl %eax, %eax
 | 
						|
; CHECK-NEXT:    ptest {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    setb %al
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = call i32 @llvm.x86.sse41.ptestc(<2 x i64> %a0, <2 x i64> %a1)
 | 
						|
  ret i32 %2
 | 
						|
}
 | 
						|
declare i32 @llvm.x86.sse41.ptestc(<2 x i64>, <2 x i64>) nounwind readnone
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_punpckhbw(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_punpckhbw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    punpckhbw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = xmm0[8],mem[8],xmm0[9],mem[9],xmm0[10],mem[10],xmm0[11],mem[11],xmm0[12],mem[12],xmm0[13],mem[13],xmm0[14],mem[14],xmm0[15],mem[15]
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31>
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_punpckhdq(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_punpckhdq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    punpckhdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = xmm0[2],mem[2],xmm0[3],mem[3]
 | 
						|
; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
 | 
						|
; CHECK-NEXT:    psubd %xmm1, %xmm0
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
 | 
						|
  ; add forces execution domain
 | 
						|
  %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
 | 
						|
  ret <4 x i32> %3
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_punpckhqdq(<2 x i64> %a0, <2 x i64> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_punpckhqdq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    punpckhqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = xmm0[1],mem[1]
 | 
						|
; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
 | 
						|
; CHECK-NEXT:    psubq %xmm1, %xmm0
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 1, i32 3>
 | 
						|
  ; add forces execution domain
 | 
						|
  %3 = add <2 x i64> %2, <i64 1, i64 1>
 | 
						|
  ret <2 x i64> %3
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_punpckhwd(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_punpckhwd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    punpckhwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7]
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_punpcklbw(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_punpcklbw:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    punpcklbw {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3],xmm0[4],mem[4],xmm0[5],mem[5],xmm0[6],mem[6],xmm0[7],mem[7]
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <16 x i8> %a0, <16 x i8> %a1, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23>
 | 
						|
  ret <16 x i8> %2
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @stack_fold_punpckldq(<4 x i32> %a0, <4 x i32> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_punpckldq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
 | 
						|
; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
 | 
						|
; CHECK-NEXT:    psubd %xmm1, %xmm0
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <4 x i32> %a0, <4 x i32> %a1, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
 | 
						|
  ; add forces execution domain
 | 
						|
  %3 = add <4 x i32> %2, <i32 1, i32 1, i32 1, i32 1>
 | 
						|
  ret <4 x i32> %3
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @stack_fold_punpcklqdq(<2 x i64> %a0, <2 x i64> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_punpcklqdq:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0]
 | 
						|
; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
 | 
						|
; CHECK-NEXT:    psubq %xmm1, %xmm0
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <2 x i64> %a0, <2 x i64> %a1, <2 x i32> <i32 0, i32 2>
 | 
						|
  ; add forces execution domain
 | 
						|
  %3 = add <2 x i64> %2, <i64 1, i64 1>
 | 
						|
  ret <2 x i64> %3
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i16> @stack_fold_punpcklwd(<8 x i16> %a0, <8 x i16> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_punpcklwd:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    punpcklwd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1],xmm0[2],mem[2],xmm0[3],mem[3]
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = shufflevector <8 x i16> %a0, <8 x i16> %a1, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
 | 
						|
  ret <8 x i16> %2
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i8> @stack_fold_pxor(<16 x i8> %a0, <16 x i8> %a1) {
 | 
						|
; CHECK-LABEL: stack_fold_pxor:
 | 
						|
; CHECK:       # %bb.0:
 | 
						|
; CHECK-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
 | 
						|
; CHECK-NEXT:    #APP
 | 
						|
; CHECK-NEXT:    nop
 | 
						|
; CHECK-NEXT:    #NO_APP
 | 
						|
; CHECK-NEXT:    pxor {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
 | 
						|
; CHECK-NEXT:    pcmpeqd %xmm1, %xmm1
 | 
						|
; CHECK-NEXT:    psubb %xmm1, %xmm0
 | 
						|
; CHECK-NEXT:    retq
 | 
						|
  %1 = tail call <2 x i64> asm sideeffect "nop", "=x,~{xmm2},~{xmm3},~{xmm4},~{xmm5},~{xmm6},~{xmm7},~{xmm8},~{xmm9},~{xmm10},~{xmm11},~{xmm12},~{xmm13},~{xmm14},~{xmm15},~{flags}"()
 | 
						|
  %2 = xor <16 x i8> %a0, %a1
 | 
						|
  ; add forces execution domain
 | 
						|
  %3 = add <16 x i8> %2, <i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1, i8 1>
 | 
						|
  ret <16 x i8> %3
 | 
						|
}
 |