3527 lines
		
	
	
		
			138 KiB
		
	
	
	
		
			LLVM
		
	
	
	
			
		
		
	
	
			3527 lines
		
	
	
		
			138 KiB
		
	
	
	
		
			LLVM
		
	
	
	
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
 | 
						|
; RUN: llc -mtriple=x86_64-unknown-linux-gnu  -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_64
 | 
						|
; RUN: llc -mtriple=i386-unknown-linux-gnu  -mattr=+avx512f < %s | FileCheck %s --check-prefix=ALL --check-prefix=KNL_32
 | 
						|
; RUN: llc -mtriple=x86_64-unknown-linux-gnu  -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX --check-prefix=SKX_SMALL
 | 
						|
; RUN: llc -mtriple=x86_64-unknown-linux-gnu  -mattr=+avx512vl -mattr=+avx512dq -code-model=large < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX --check-prefix=SKX_LARGE
 | 
						|
; RUN: llc -mtriple=i386-unknown-linux-gnu  -mattr=+avx512vl -mattr=+avx512dq < %s | FileCheck %s --check-prefix=ALL --check-prefix=SKX_32
 | 
						|
; RUN: opt -mtriple=x86_64-apple-darwin -scalarize-masked-mem-intrin -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR
 | 
						|
; RUN: opt -mtriple=x86_64-apple-darwin -passes=scalarize-masked-mem-intrin -mcpu=corei7-avx -S < %s | FileCheck %s -check-prefix=SCALAR
 | 
						|
; RUN: llc -O0 -mtriple=x86_64-unknown-linux-gnu -mcpu=skx < %s -o /dev/null
 | 
						|
 | 
						|
@glob_array = internal unnamed_addr constant [16 x i32] [i32 1, i32 1, i32 2, i32 3, i32 5, i32 8, i32 13, i32 21, i32 34, i32 55, i32 89, i32 144, i32 233, i32 377, i32 610, i32 987], align 16
 | 
						|
 | 
						|
; SCALAR-LABEL: test1
 | 
						|
; SCALAR:      extractelement <16 x float*>
 | 
						|
; SCALAR-NEXT: load float
 | 
						|
; SCALAR-NEXT: insertelement <16 x float>
 | 
						|
; SCALAR-NEXT: extractelement <16 x float*>
 | 
						|
; SCALAR-NEXT: load float
 | 
						|
 | 
						|
define <16 x float> @test1(float* %base, <16 x i32> %ind) {
 | 
						|
; KNL_64-LABEL: test1:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test1:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test1:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test1:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
 | 
						|
  %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
 | 
						|
 | 
						|
  %sext_ind = sext <16 x i32> %ind to <16 x i64>
 | 
						|
  %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
 | 
						|
 | 
						|
  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
 | 
						|
  ret <16 x float>%res
 | 
						|
}
 | 
						|
 | 
						|
declare <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*>, i32, <16 x i1>, <16 x i32>)
 | 
						|
declare <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*>, i32, <16 x i1>, <16 x float>)
 | 
						|
declare <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> , i32, <8 x i1> , <8 x i32> )
 | 
						|
 | 
						|
 | 
						|
; SCALAR-LABEL: test2
 | 
						|
; SCALAR:      extractelement <16 x float*>
 | 
						|
; SCALAR-NEXT: load float
 | 
						|
; SCALAR-NEXT: insertelement <16 x float>
 | 
						|
; SCALAR-NEXT: br label %else
 | 
						|
; SCALAR: else:
 | 
						|
; SCALAR-NEXT:  %res.phi.else = phi
 | 
						|
; SCALAR-NEXT:  and i16 %{{.*}}, 2
 | 
						|
; SCALAR-NEXT:  icmp ne i16 %{{.*}}, 0
 | 
						|
; SCALAR-NEXT:  br i1 %{{.*}}, label %cond.load1, label %else2
 | 
						|
 | 
						|
define <16 x float> @test2(float* %base, <16 x i32> %ind, i16 %mask) {
 | 
						|
; KNL_64-LABEL: test2:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    kmovw %esi, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test2:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
 | 
						|
; KNL_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test2:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    kmovw %esi, %k1
 | 
						|
; SKX-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test2:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
 | 
						|
; SKX_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
 | 
						|
  %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
 | 
						|
 | 
						|
  %sext_ind = sext <16 x i32> %ind to <16 x i64>
 | 
						|
  %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
 | 
						|
  %imask = bitcast i16 %mask to <16 x i1>
 | 
						|
  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> %imask, <16 x float>undef)
 | 
						|
  ret <16 x float> %res
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i32> @test3(i32* %base, <16 x i32> %ind, i16 %mask) {
 | 
						|
; KNL_64-LABEL: test3:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    kmovw %esi, %k1
 | 
						|
; KNL_64-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovdqa64 %zmm1, %zmm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test3:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
 | 
						|
; KNL_32-NEXT:    vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovdqa64 %zmm1, %zmm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test3:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    kmovw %esi, %k1
 | 
						|
; SKX-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovdqa64 %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test3:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
 | 
						|
; SKX_32-NEXT:    vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovdqa64 %zmm1, %zmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
 | 
						|
  %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
 | 
						|
 | 
						|
  %sext_ind = sext <16 x i32> %ind to <16 x i64>
 | 
						|
  %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i64> %sext_ind
 | 
						|
  %imask = bitcast i16 %mask to <16 x i1>
 | 
						|
  %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
 | 
						|
  ret <16 x i32> %res
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
define <16 x i32> @test4(i32* %base, <16 x i32> %ind, i16 %mask) {
 | 
						|
; KNL_64-LABEL: test4:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    kmovw %esi, %k1
 | 
						|
; KNL_64-NEXT:    kmovw %k1, %k2
 | 
						|
; KNL_64-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k2}
 | 
						|
; KNL_64-NEXT:    vmovdqa64 %zmm1, %zmm2
 | 
						|
; KNL_64-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1}
 | 
						|
; KNL_64-NEXT:    vpaddd %zmm2, %zmm1, %zmm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test4:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
 | 
						|
; KNL_32-NEXT:    kmovw %k1, %k2
 | 
						|
; KNL_32-NEXT:    vpgatherdd (%eax,%zmm0,4), %zmm1 {%k2}
 | 
						|
; KNL_32-NEXT:    vmovdqa64 %zmm1, %zmm2
 | 
						|
; KNL_32-NEXT:    vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1}
 | 
						|
; KNL_32-NEXT:    vpaddd %zmm2, %zmm1, %zmm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test4:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    kmovw %esi, %k1
 | 
						|
; SKX-NEXT:    kmovw %k1, %k2
 | 
						|
; SKX-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k2}
 | 
						|
; SKX-NEXT:    vmovdqa64 %zmm1, %zmm2
 | 
						|
; SKX-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1}
 | 
						|
; SKX-NEXT:    vpaddd %zmm2, %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test4:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
 | 
						|
; SKX_32-NEXT:    kmovw %k1, %k2
 | 
						|
; SKX_32-NEXT:    vpgatherdd (%eax,%zmm0,4), %zmm1 {%k2}
 | 
						|
; SKX_32-NEXT:    vmovdqa64 %zmm1, %zmm2
 | 
						|
; SKX_32-NEXT:    vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1}
 | 
						|
; SKX_32-NEXT:    vpaddd %zmm2, %zmm1, %zmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
 | 
						|
  %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
 | 
						|
 | 
						|
  %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
 | 
						|
  %imask = bitcast i16 %mask to <16 x i1>
 | 
						|
  %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
 | 
						|
  %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
 | 
						|
  %res = add <16 x i32> %gt1, %gt2
 | 
						|
  ret <16 x i32> %res
 | 
						|
}
 | 
						|
 | 
						|
 | 
						|
; SCALAR-LABEL: test5
 | 
						|
; SCALAR:        and i16 %scalar_mask, 1
 | 
						|
; SCALAR-NEXT:   icmp ne i16 %{{.*}}, 0
 | 
						|
; SCALAR-NEXT:   br i1 %{{.*}}, label %cond.store, label %else
 | 
						|
; SCALAR: cond.store:
 | 
						|
; SCALAR-NEXT:  %Elt0 = extractelement <16 x i32> %val, i64 0
 | 
						|
; SCALAR-NEXT:  %Ptr0 = extractelement <16 x i32*> %gep.random, i64 0
 | 
						|
; SCALAR-NEXT:  store i32 %Elt0, i32* %Ptr0, align 4
 | 
						|
; SCALAR-NEXT:  br label %else
 | 
						|
; SCALAR: else:
 | 
						|
; SCALAR-NEXT:   and i16 %scalar_mask, 2
 | 
						|
; SCALAR-NEXT:   icmp ne i16 %{{.*}}, 0
 | 
						|
; SCALAR-NEXT:  br i1 %{{.*}}, label %cond.store1, label %else2
 | 
						|
 | 
						|
define void @test5(i32* %base, <16 x i32> %ind, i16 %mask, <16 x i32>%val) {
 | 
						|
; KNL_64-LABEL: test5:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    kmovw %esi, %k1
 | 
						|
; KNL_64-NEXT:    kmovw %k1, %k2
 | 
						|
; KNL_64-NEXT:    vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k2}
 | 
						|
; KNL_64-NEXT:    vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1}
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test5:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
 | 
						|
; KNL_32-NEXT:    kmovw %k1, %k2
 | 
						|
; KNL_32-NEXT:    vpscatterdd %zmm1, (%eax,%zmm0,4) {%k2}
 | 
						|
; KNL_32-NEXT:    vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1}
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test5:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    kmovw %esi, %k1
 | 
						|
; SKX-NEXT:    kmovw %k1, %k2
 | 
						|
; SKX-NEXT:    vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k2}
 | 
						|
; SKX-NEXT:    vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1}
 | 
						|
; SKX-NEXT:    vzeroupper
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test5:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
 | 
						|
; SKX_32-NEXT:    kmovw %k1, %k2
 | 
						|
; SKX_32-NEXT:    vpscatterdd %zmm1, (%eax,%zmm0,4) {%k2}
 | 
						|
; SKX_32-NEXT:    vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1}
 | 
						|
; SKX_32-NEXT:    vzeroupper
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %broadcast.splatinsert = insertelement <16 x i32*> undef, i32* %base, i32 0
 | 
						|
  %broadcast.splat = shufflevector <16 x i32*> %broadcast.splatinsert, <16 x i32*> undef, <16 x i32> zeroinitializer
 | 
						|
 | 
						|
  %gep.random = getelementptr i32, <16 x i32*> %broadcast.splat, <16 x i32> %ind
 | 
						|
  %imask = bitcast i16 %mask to <16 x i1>
 | 
						|
  call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
 | 
						|
  call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32>%val, <16 x i32*> %gep.random, i32 4, <16 x i1> %imask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
declare void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> , <8 x i32*> , i32 , <8 x i1> )
 | 
						|
declare void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> , <16 x i32*> , i32 , <16 x i1> )
 | 
						|
 | 
						|
 | 
						|
; SCALAR-LABEL: test6
 | 
						|
; SCALAR:        store i32 %Elt0, i32* %Ptr01, align 4
 | 
						|
; SCALAR-NEXT:   %Elt1 = extractelement <8 x i32> %a1, i64 1
 | 
						|
; SCALAR-NEXT:   %Ptr12 = extractelement <8 x i32*> %ptr, i64 1
 | 
						|
; SCALAR-NEXT:   store i32 %Elt1, i32* %Ptr12, align 4
 | 
						|
; SCALAR-NEXT:   %Elt2 = extractelement <8 x i32> %a1, i64 2
 | 
						|
; SCALAR-NEXT:   %Ptr23 = extractelement <8 x i32*> %ptr, i64 2
 | 
						|
; SCALAR-NEXT:   store i32 %Elt2, i32* %Ptr23, align 4
 | 
						|
 | 
						|
define <8 x i32> @test6(<8 x i32>%a1, <8 x i32*> %ptr) {
 | 
						|
; KNL_64-LABEL: test6:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k2
 | 
						|
; KNL_64-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k2}
 | 
						|
; KNL_64-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k1}
 | 
						|
; KNL_64-NEXT:    vmovdqa %ymm2, %ymm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test6:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 | 
						|
; KNL_32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 | 
						|
; KNL_32-NEXT:    movw $255, %ax
 | 
						|
; KNL_32-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_32-NEXT:    kmovw %k1, %k2
 | 
						|
; KNL_32-NEXT:    vpgatherdd (,%zmm1), %zmm2 {%k2}
 | 
						|
; KNL_32-NEXT:    vpscatterdd %zmm0, (,%zmm1) {%k1}
 | 
						|
; KNL_32-NEXT:    vmovdqa %ymm2, %ymm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test6:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k2
 | 
						|
; SKX-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k2}
 | 
						|
; SKX-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k1}
 | 
						|
; SKX-NEXT:    vmovdqa %ymm2, %ymm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test6:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k2
 | 
						|
; SKX_32-NEXT:    vpgatherdd (,%ymm1), %ymm2 {%k2}
 | 
						|
; SKX_32-NEXT:    vpscatterdd %ymm0, (,%ymm1) {%k1}
 | 
						|
; SKX_32-NEXT:    vmovdqa %ymm2, %ymm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %a = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
 | 
						|
 | 
						|
  call void @llvm.masked.scatter.v8i32.v8p0i32(<8 x i32> %a1, <8 x i32*> %ptr, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
 | 
						|
  ret <8 x i32>%a
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i32> @test7(i32* %base, <8 x i32> %ind, i8 %mask) {
 | 
						|
;
 | 
						|
; KNL_64-LABEL: test7:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 | 
						|
; KNL_64-NEXT:    kmovw %esi, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $8, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $8, %k0, %k1
 | 
						|
; KNL_64-NEXT:    kmovw %k1, %k2
 | 
						|
; KNL_64-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k2}
 | 
						|
; KNL_64-NEXT:    vmovdqa64 %zmm1, %zmm2
 | 
						|
; KNL_64-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1}
 | 
						|
; KNL_64-NEXT:    vpaddd %ymm2, %ymm1, %ymm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test7:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx
 | 
						|
; KNL_32-NEXT:    kmovw %ecx, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $8, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $8, %k0, %k1
 | 
						|
; KNL_32-NEXT:    kmovw %k1, %k2
 | 
						|
; KNL_32-NEXT:    vpgatherdd (%eax,%zmm0,4), %zmm1 {%k2}
 | 
						|
; KNL_32-NEXT:    vmovdqa64 %zmm1, %zmm2
 | 
						|
; KNL_32-NEXT:    vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1}
 | 
						|
; KNL_32-NEXT:    vpaddd %ymm2, %ymm1, %ymm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test7:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    kmovw %esi, %k1
 | 
						|
; SKX-NEXT:    kmovw %k1, %k2
 | 
						|
; SKX-NEXT:    vpgatherdd (%rdi,%ymm0,4), %ymm1 {%k2}
 | 
						|
; SKX-NEXT:    vmovdqa %ymm1, %ymm2
 | 
						|
; SKX-NEXT:    vpgatherdd (%rdi,%ymm0,4), %ymm2 {%k1}
 | 
						|
; SKX-NEXT:    vpaddd %ymm2, %ymm1, %ymm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test7:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    kmovb {{[0-9]+}}(%esp), %k1
 | 
						|
; SKX_32-NEXT:    kmovw %k1, %k2
 | 
						|
; SKX_32-NEXT:    vpgatherdd (%eax,%ymm0,4), %ymm1 {%k2}
 | 
						|
; SKX_32-NEXT:    vmovdqa %ymm1, %ymm2
 | 
						|
; SKX_32-NEXT:    vpgatherdd (%eax,%ymm0,4), %ymm2 {%k1}
 | 
						|
; SKX_32-NEXT:    vpaddd %ymm2, %ymm1, %ymm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %broadcast.splatinsert = insertelement <8 x i32*> undef, i32* %base, i32 0
 | 
						|
  %broadcast.splat = shufflevector <8 x i32*> %broadcast.splatinsert, <8 x i32*> undef, <8 x i32> zeroinitializer
 | 
						|
 | 
						|
  %gep.random = getelementptr i32, <8 x i32*> %broadcast.splat, <8 x i32> %ind
 | 
						|
  %imask = bitcast i8 %mask to <8 x i1>
 | 
						|
  %gt1 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>undef)
 | 
						|
  %gt2 = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %gep.random, i32 4, <8 x i1> %imask, <8 x i32>%gt1)
 | 
						|
  %res = add <8 x i32> %gt1, %gt2
 | 
						|
  ret <8 x i32> %res
 | 
						|
}
 | 
						|
 | 
						|
; No uniform base in this case, index <8 x i64> contains addresses,
 | 
						|
; each gather call will be split into two
 | 
						|
define <16 x i32> @test8(<16 x i32*> %ptr.random, <16 x i32> %ind, i16 %mask) {
 | 
						|
; KNL_64-LABEL: test8:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    kmovw %edi, %k1
 | 
						|
; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; KNL_64-NEXT:    kmovw %k2, %k3
 | 
						|
; KNL_64-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k3}
 | 
						|
; KNL_64-NEXT:    kmovw %k1, %k3
 | 
						|
; KNL_64-NEXT:    vpgatherqd (,%zmm0), %ymm3 {%k3}
 | 
						|
; KNL_64-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm4
 | 
						|
; KNL_64-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k2}
 | 
						|
; KNL_64-NEXT:    vpgatherqd (,%zmm0), %ymm3 {%k1}
 | 
						|
; KNL_64-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm0
 | 
						|
; KNL_64-NEXT:    vpaddd %zmm0, %zmm4, %zmm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test8:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
 | 
						|
; KNL_32-NEXT:    kmovw %k1, %k2
 | 
						|
; KNL_32-NEXT:    vpgatherdd (,%zmm0), %zmm1 {%k2}
 | 
						|
; KNL_32-NEXT:    vmovdqa64 %zmm1, %zmm2
 | 
						|
; KNL_32-NEXT:    vpgatherdd (,%zmm0), %zmm2 {%k1}
 | 
						|
; KNL_32-NEXT:    vpaddd %zmm2, %zmm1, %zmm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test8:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    kmovw %edi, %k1
 | 
						|
; SKX-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; SKX-NEXT:    kmovw %k2, %k3
 | 
						|
; SKX-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k3}
 | 
						|
; SKX-NEXT:    kmovw %k1, %k3
 | 
						|
; SKX-NEXT:    vpgatherqd (,%zmm0), %ymm3 {%k3}
 | 
						|
; SKX-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm4
 | 
						|
; SKX-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k2}
 | 
						|
; SKX-NEXT:    vpgatherqd (,%zmm0), %ymm3 {%k1}
 | 
						|
; SKX-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm0
 | 
						|
; SKX-NEXT:    vpaddd %zmm0, %zmm4, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test8:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
 | 
						|
; SKX_32-NEXT:    kmovw %k1, %k2
 | 
						|
; SKX_32-NEXT:    vpgatherdd (,%zmm0), %zmm1 {%k2}
 | 
						|
; SKX_32-NEXT:    vmovdqa64 %zmm1, %zmm2
 | 
						|
; SKX_32-NEXT:    vpgatherdd (,%zmm0), %zmm2 {%k1}
 | 
						|
; SKX_32-NEXT:    vpaddd %zmm2, %zmm1, %zmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %imask = bitcast i16 %mask to <16 x i1>
 | 
						|
  %gt1 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>undef)
 | 
						|
  %gt2 = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptr.random, i32 4, <16 x i1> %imask, <16 x i32>%gt1)
 | 
						|
  %res = add <16 x i32> %gt1, %gt2
 | 
						|
  ret <16 x i32> %res
 | 
						|
}
 | 
						|
 | 
						|
%struct.RT = type { i8, [10 x [20 x i32]], i8 }
 | 
						|
%struct.ST = type { i32, double, %struct.RT }
 | 
						|
 | 
						|
; Masked gather for aggregate types
 | 
						|
; Test9 and Test10 should give the same result (scalar and vector indices in GEP)
 | 
						|
 | 
						|
 | 
						|
define <8 x i32> @test9(%struct.ST* %base, <8 x i64> %ind1, <8 x i32>%ind5) {
 | 
						|
; KNL_64-LABEL: test9:
 | 
						|
; KNL_64:       # %bb.0: # %entry
 | 
						|
; KNL_64-NEXT:    vpbroadcastq %rdi, %zmm2
 | 
						|
; KNL_64-NEXT:    vpbroadcastq {{.*#+}} zmm3 = [824,824,824,824,824,824,824,824]
 | 
						|
; KNL_64-NEXT:    vpmuludq %zmm3, %zmm0, %zmm4
 | 
						|
; KNL_64-NEXT:    vpsrlq $32, %zmm0, %zmm0
 | 
						|
; KNL_64-NEXT:    vpmuludq %zmm3, %zmm0, %zmm0
 | 
						|
; KNL_64-NEXT:    vpsllq $32, %zmm0, %zmm0
 | 
						|
; KNL_64-NEXT:    vpaddq %zmm2, %zmm0, %zmm0
 | 
						|
; KNL_64-NEXT:    vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
 | 
						|
; KNL_64-NEXT:    vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1
 | 
						|
; KNL_64-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
 | 
						|
; KNL_64-NEXT:    vpaddq %zmm0, %zmm4, %zmm0
 | 
						|
; KNL_64-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vpgatherqd (,%zmm1), %ymm0 {%k1}
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test9:
 | 
						|
; KNL_32:       # %bb.0: # %entry
 | 
						|
; KNL_32-NEXT:    vpbroadcastd {{[0-9]+}}(%esp), %ymm2
 | 
						|
; KNL_32-NEXT:    vpbroadcastd {{.*#+}} ymm3 = [80,80,80,80,80,80,80,80]
 | 
						|
; KNL_32-NEXT:    vpmulld %ymm3, %ymm1, %ymm1
 | 
						|
; KNL_32-NEXT:    vpmovqd %zmm0, %ymm0
 | 
						|
; KNL_32-NEXT:    vpbroadcastd {{.*#+}} ymm3 = [820,820,820,820,820,820,820,820]
 | 
						|
; KNL_32-NEXT:    vpmulld %ymm3, %ymm0, %ymm0
 | 
						|
; KNL_32-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
 | 
						|
; KNL_32-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [68,68,68,68,68,68,68,68]
 | 
						|
; KNL_32-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
 | 
						|
; KNL_32-NEXT:    vpaddd %ymm0, %ymm2, %ymm1
 | 
						|
; KNL_32-NEXT:    movw $255, %ax
 | 
						|
; KNL_32-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_32-NEXT:    vpgatherdd (,%zmm1), %zmm0 {%k1}
 | 
						|
; KNL_32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX_SMALL-LABEL: test9:
 | 
						|
; SKX_SMALL:       # %bb.0: # %entry
 | 
						|
; SKX_SMALL-NEXT:    vpbroadcastq %rdi, %zmm2
 | 
						|
; SKX_SMALL-NEXT:    vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
 | 
						|
; SKX_SMALL-NEXT:    vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
 | 
						|
; SKX_SMALL-NEXT:    vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1
 | 
						|
; SKX_SMALL-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
 | 
						|
; SKX_SMALL-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
 | 
						|
; SKX_SMALL-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
 | 
						|
; SKX_SMALL-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_SMALL-NEXT:    vpgatherqd (,%zmm1), %ymm0 {%k1}
 | 
						|
; SKX_SMALL-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_LARGE-LABEL: test9:
 | 
						|
; SKX_LARGE:       # %bb.0: # %entry
 | 
						|
; SKX_LARGE-NEXT:    vpbroadcastq %rdi, %zmm2
 | 
						|
; SKX_LARGE-NEXT:    vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
 | 
						|
; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
 | 
						|
; SKX_LARGE-NEXT:    vpmuldq (%rax){1to8}, %zmm1, %zmm1
 | 
						|
; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
 | 
						|
; SKX_LARGE-NEXT:    vpmullq (%rax){1to8}, %zmm0, %zmm0
 | 
						|
; SKX_LARGE-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
 | 
						|
; SKX_LARGE-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
 | 
						|
; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
 | 
						|
; SKX_LARGE-NEXT:    vpaddq (%rax){1to8}, %zmm0, %zmm1
 | 
						|
; SKX_LARGE-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_LARGE-NEXT:    vpgatherqd (,%zmm1), %ymm0 {%k1}
 | 
						|
; SKX_LARGE-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test9:
 | 
						|
; SKX_32:       # %bb.0: # %entry
 | 
						|
; SKX_32-NEXT:    vpmulld {{\.LCPI.*}}{1to8}, %ymm1, %ymm1
 | 
						|
; SKX_32-NEXT:    vpmovqd %zmm0, %ymm0
 | 
						|
; SKX_32-NEXT:    vpmulld {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
 | 
						|
; SKX_32-NEXT:    vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0
 | 
						|
; SKX_32-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
 | 
						|
; SKX_32-NEXT:    vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vpgatherdd (,%ymm1), %ymm0 {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
entry:
 | 
						|
  %broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
 | 
						|
  %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
 | 
						|
 | 
						|
  %arrayidx = getelementptr  %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %ind1, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>, <8 x i32><i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, <8 x i32> %ind5, <8 x i64> <i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13, i64 13>
 | 
						|
  %res = call <8 x i32 >  @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
 | 
						|
  ret <8 x i32> %res
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i32> @test10(%struct.ST* %base, <8 x i64> %i1, <8 x i32>%ind5) {
 | 
						|
; KNL_64-LABEL: test10:
 | 
						|
; KNL_64:       # %bb.0: # %entry
 | 
						|
; KNL_64-NEXT:    vpbroadcastq %rdi, %zmm2
 | 
						|
; KNL_64-NEXT:    vpbroadcastq {{.*#+}} zmm3 = [824,824,824,824,824,824,824,824]
 | 
						|
; KNL_64-NEXT:    vpmuludq %zmm3, %zmm0, %zmm4
 | 
						|
; KNL_64-NEXT:    vpsrlq $32, %zmm0, %zmm0
 | 
						|
; KNL_64-NEXT:    vpmuludq %zmm3, %zmm0, %zmm0
 | 
						|
; KNL_64-NEXT:    vpsllq $32, %zmm0, %zmm0
 | 
						|
; KNL_64-NEXT:    vpaddq %zmm2, %zmm0, %zmm0
 | 
						|
; KNL_64-NEXT:    vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
 | 
						|
; KNL_64-NEXT:    vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1
 | 
						|
; KNL_64-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
 | 
						|
; KNL_64-NEXT:    vpaddq %zmm0, %zmm4, %zmm0
 | 
						|
; KNL_64-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vpgatherqd (,%zmm1), %ymm0 {%k1}
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test10:
 | 
						|
; KNL_32:       # %bb.0: # %entry
 | 
						|
; KNL_32-NEXT:    vpbroadcastd {{[0-9]+}}(%esp), %ymm2
 | 
						|
; KNL_32-NEXT:    vpbroadcastd {{.*#+}} ymm3 = [80,80,80,80,80,80,80,80]
 | 
						|
; KNL_32-NEXT:    vpmulld %ymm3, %ymm1, %ymm1
 | 
						|
; KNL_32-NEXT:    vpmovqd %zmm0, %ymm0
 | 
						|
; KNL_32-NEXT:    vpbroadcastd {{.*#+}} ymm3 = [820,820,820,820,820,820,820,820]
 | 
						|
; KNL_32-NEXT:    vpmulld %ymm3, %ymm0, %ymm0
 | 
						|
; KNL_32-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
 | 
						|
; KNL_32-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [68,68,68,68,68,68,68,68]
 | 
						|
; KNL_32-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
 | 
						|
; KNL_32-NEXT:    vpaddd %ymm0, %ymm2, %ymm1
 | 
						|
; KNL_32-NEXT:    movw $255, %ax
 | 
						|
; KNL_32-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_32-NEXT:    vpgatherdd (,%zmm1), %zmm0 {%k1}
 | 
						|
; KNL_32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX_SMALL-LABEL: test10:
 | 
						|
; SKX_SMALL:       # %bb.0: # %entry
 | 
						|
; SKX_SMALL-NEXT:    vpbroadcastq %rdi, %zmm2
 | 
						|
; SKX_SMALL-NEXT:    vpmullq {{.*}}(%rip){1to8}, %zmm0, %zmm0
 | 
						|
; SKX_SMALL-NEXT:    vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
 | 
						|
; SKX_SMALL-NEXT:    vpmuldq {{.*}}(%rip){1to8}, %zmm1, %zmm1
 | 
						|
; SKX_SMALL-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
 | 
						|
; SKX_SMALL-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
 | 
						|
; SKX_SMALL-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
 | 
						|
; SKX_SMALL-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_SMALL-NEXT:    vpgatherqd (,%zmm1), %ymm0 {%k1}
 | 
						|
; SKX_SMALL-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_LARGE-LABEL: test10:
 | 
						|
; SKX_LARGE:       # %bb.0: # %entry
 | 
						|
; SKX_LARGE-NEXT:    vpbroadcastq %rdi, %zmm2
 | 
						|
; SKX_LARGE-NEXT:    vpmovzxdq {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero
 | 
						|
; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
 | 
						|
; SKX_LARGE-NEXT:    vpmuldq (%rax){1to8}, %zmm1, %zmm1
 | 
						|
; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
 | 
						|
; SKX_LARGE-NEXT:    vpmullq (%rax){1to8}, %zmm0, %zmm0
 | 
						|
; SKX_LARGE-NEXT:    vpaddq %zmm1, %zmm0, %zmm0
 | 
						|
; SKX_LARGE-NEXT:    vpaddq %zmm0, %zmm2, %zmm0
 | 
						|
; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
 | 
						|
; SKX_LARGE-NEXT:    vpaddq (%rax){1to8}, %zmm0, %zmm1
 | 
						|
; SKX_LARGE-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_LARGE-NEXT:    vpgatherqd (,%zmm1), %ymm0 {%k1}
 | 
						|
; SKX_LARGE-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test10:
 | 
						|
; SKX_32:       # %bb.0: # %entry
 | 
						|
; SKX_32-NEXT:    vpmulld {{\.LCPI.*}}{1to8}, %ymm1, %ymm1
 | 
						|
; SKX_32-NEXT:    vpmovqd %zmm0, %ymm0
 | 
						|
; SKX_32-NEXT:    vpmulld {{\.LCPI.*}}{1to8}, %ymm0, %ymm0
 | 
						|
; SKX_32-NEXT:    vpaddd {{[0-9]+}}(%esp){1to8}, %ymm0, %ymm0
 | 
						|
; SKX_32-NEXT:    vpaddd %ymm1, %ymm0, %ymm0
 | 
						|
; SKX_32-NEXT:    vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vpgatherdd (,%ymm1), %ymm0 {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
entry:
 | 
						|
  %broadcast.splatinsert = insertelement <8 x %struct.ST*> undef, %struct.ST* %base, i32 0
 | 
						|
  %broadcast.splat = shufflevector <8 x %struct.ST*> %broadcast.splatinsert, <8 x %struct.ST*> undef, <8 x i32> zeroinitializer
 | 
						|
 | 
						|
  %arrayidx = getelementptr  %struct.ST, <8 x %struct.ST*> %broadcast.splat, <8 x i64> %i1, i32 2, i32 1, <8 x i32> %ind5, i64 13
 | 
						|
  %res = call <8 x i32 >  @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*>%arrayidx, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
 | 
						|
  ret <8 x i32> %res
 | 
						|
}
 | 
						|
 | 
						|
; Splat index in GEP, requires broadcast
 | 
						|
define <16 x float> @test11(float* %base, i32 %ind) {
 | 
						|
; KNL_64-LABEL: test11:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    movslq %esi, %rax
 | 
						|
; KNL_64-NEXT:    leaq (%rdi,%rax,4), %rax
 | 
						|
; KNL_64-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdps (%rax,%zmm1,4), %zmm0 {%k1}
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test11:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    shll $2, %eax
 | 
						|
; KNL_32-NEXT:    addl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 | 
						|
; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test11:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    movslq %esi, %rax
 | 
						|
; SKX-NEXT:    leaq (%rdi,%rax,4), %rax
 | 
						|
; SKX-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX-NEXT:    vgatherdps (%rax,%zmm1,4), %zmm0 {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test11:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    shll $2, %eax
 | 
						|
; SKX_32-NEXT:    addl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vxorps %xmm1, %xmm1, %xmm1
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
 | 
						|
  %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
 | 
						|
 | 
						|
  %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind
 | 
						|
 | 
						|
  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
 | 
						|
  ret <16 x float>%res
 | 
						|
}
 | 
						|
 | 
						|
; We are checking the uniform base here. It is taken directly from input to vgatherdps
 | 
						|
define <16 x float> @test12(float* %base, <16 x i32> %ind) {
 | 
						|
; KNL_64-LABEL: test12:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test12:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test12:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test12:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %sext_ind = sext <16 x i32> %ind to <16 x i64>
 | 
						|
  %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
 | 
						|
 | 
						|
  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
 | 
						|
  ret <16 x float>%res
 | 
						|
}
 | 
						|
 | 
						|
; The same as the previous, but the mask is undefined
 | 
						|
define <16 x float> @test13(float* %base, <16 x i32> %ind) {
 | 
						|
; KNL_64-LABEL: test13:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test13:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test13:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test13:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %sext_ind = sext <16 x i32> %ind to <16 x i64>
 | 
						|
  %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
 | 
						|
 | 
						|
  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
 | 
						|
  ret <16 x float>%res
 | 
						|
}
 | 
						|
 | 
						|
; The base pointer is not splat, can't find unform base
 | 
						|
define <16 x float> @test14(float* %base, i32 %ind, <16 x float*> %vec) {
 | 
						|
; KNL_64-LABEL: test14:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vmovq %xmm0, %rax
 | 
						|
; KNL_64-NEXT:    vmovd %esi, %xmm0
 | 
						|
; KNL_64-NEXT:    vpbroadcastd %xmm0, %ymm0
 | 
						|
; KNL_64-NEXT:    vpmovsxdq %ymm0, %zmm0
 | 
						|
; KNL_64-NEXT:    vpsllq $2, %zmm0, %zmm0
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vgatherqps (%rax,%zmm0), %ymm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vinsertf64x4 $1, %ymm1, %zmm1, %zmm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test14:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    vmovd %xmm0, %eax
 | 
						|
; KNL_32-NEXT:    vpslld $2, {{[0-9]+}}(%esp){1to16}, %zmm1
 | 
						|
; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vgatherdps (%eax,%zmm1), %zmm0 {%k1}
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test14:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vmovq %xmm0, %rax
 | 
						|
; SKX-NEXT:    vpbroadcastd %esi, %ymm0
 | 
						|
; SKX-NEXT:    vpmovsxdq %ymm0, %zmm0
 | 
						|
; SKX-NEXT:    vpsllq $2, %zmm0, %zmm0
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX-NEXT:    vgatherqps (%rax,%zmm0), %ymm1 {%k1}
 | 
						|
; SKX-NEXT:    vinsertf64x4 $1, %ymm1, %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test14:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vmovd %xmm0, %eax
 | 
						|
; SKX_32-NEXT:    vpslld $2, {{[0-9]+}}(%esp){1to16}, %zmm1
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vgatherdps (%eax,%zmm1), %zmm0 {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %broadcast.splatinsert = insertelement <16 x float*> %vec, float* %base, i32 1
 | 
						|
  %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
 | 
						|
 | 
						|
  %gep.random = getelementptr float, <16 x float*> %broadcast.splat, i32 %ind
 | 
						|
 | 
						|
  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
 | 
						|
  ret <16 x float>%res
 | 
						|
}
 | 
						|
 | 
						|
declare <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*>, i32, <4 x i1>, <4 x float>)
 | 
						|
declare <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*>, i32, <4 x i1>, <4 x double>)
 | 
						|
declare <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*>, i32, <2 x i1>, <2 x double>)
 | 
						|
 | 
						|
; Gather smaller than existing instruction
 | 
						|
define <4 x float> @test15(float* %base, <4 x i32> %ind, <4 x i1> %mask) {
 | 
						|
; KNL_64-LABEL: test15:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_64-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm1, %zmm1, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovaps %xmm1, %xmm0
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test15:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_32-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovaps %xmm1, %xmm0
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test15:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; SKX-NEXT:    vpmovd2m %xmm1, %k1
 | 
						|
; SKX-NEXT:    vgatherdps (%rdi,%xmm0,4), %xmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %xmm1, %xmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test15:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; SKX_32-NEXT:    vpmovd2m %xmm1, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vgatherdps (%eax,%xmm0,4), %xmm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovaps %xmm1, %xmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %sext_ind = sext <4 x i32> %ind to <4 x i64>
 | 
						|
  %gep.random = getelementptr float, float* %base, <4 x i64> %sext_ind
 | 
						|
  %res = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> %gep.random, i32 4, <4 x i1> %mask, <4 x float> undef)
 | 
						|
  ret <4 x float>%res
 | 
						|
}
 | 
						|
 | 
						|
; Gather smaller than existing instruction
 | 
						|
define <4 x double> @test16(double* %base, <4 x i32> %ind, <4 x i1> %mask, <4 x double> %src0) {
 | 
						|
; KNL_64-LABEL: test16:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 | 
						|
; KNL_64-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm1, %zmm1, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdpd (%rdi,%ymm0,8), %zmm2 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovapd %ymm2, %ymm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test16:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 | 
						|
; KNL_32-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vgatherdpd (%eax,%ymm0,8), %zmm2 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovapd %ymm2, %ymm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test16:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; SKX-NEXT:    vpmovd2m %xmm1, %k1
 | 
						|
; SKX-NEXT:    vgatherdpd (%rdi,%xmm0,8), %ymm2 {%k1}
 | 
						|
; SKX-NEXT:    vmovapd %ymm2, %ymm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test16:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; SKX_32-NEXT:    vpmovd2m %xmm1, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vgatherdpd (%eax,%xmm0,8), %ymm2 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovapd %ymm2, %ymm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %sext_ind = sext <4 x i32> %ind to <4 x i64>
 | 
						|
  %gep.random = getelementptr double, double* %base, <4 x i64> %sext_ind
 | 
						|
  %res = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %gep.random, i32 4, <4 x i1> %mask, <4 x double> %src0)
 | 
						|
  ret <4 x double>%res
 | 
						|
}
 | 
						|
 | 
						|
define <2 x double> @test17(double* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x double> %src0) {
 | 
						|
; KNL_64-LABEL: test17:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 | 
						|
; KNL_64-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; KNL_64-NEXT:    vptestmq %zmm1, %zmm1, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdpd (%rdi,%ymm0,8), %zmm2 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovapd %xmm2, %xmm0
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test17:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 | 
						|
; KNL_32-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; KNL_32-NEXT:    vptestmq %zmm1, %zmm1, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vgatherdpd (%eax,%ymm0,8), %zmm2 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovapd %xmm2, %xmm0
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test17:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; SKX-NEXT:    vpmovq2m %xmm1, %k1
 | 
						|
; SKX-NEXT:    vgatherdpd (%rdi,%xmm0,8), %xmm2 {%k1}
 | 
						|
; SKX-NEXT:    vmovapd %xmm2, %xmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test17:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; SKX_32-NEXT:    vpmovq2m %xmm1, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vgatherdpd (%eax,%xmm0,8), %xmm2 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovapd %xmm2, %xmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %sext_ind = sext <2 x i32> %ind to <2 x i64>
 | 
						|
  %gep.random = getelementptr double, double* %base, <2 x i64> %sext_ind
 | 
						|
  %res = call <2 x double> @llvm.masked.gather.v2f64.v2p0f64(<2 x double*> %gep.random, i32 4, <2 x i1> %mask, <2 x double> %src0)
 | 
						|
  ret <2 x double>%res
 | 
						|
}
 | 
						|
 | 
						|
declare void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> , <4 x i32*> , i32 , <4 x i1> )
 | 
						|
declare void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> , <4 x double*> , i32 , <4 x i1> )
 | 
						|
declare void @llvm.masked.scatter.v2i64.v2p0i64(<2 x i64> , <2 x i64*> , i32 , <2 x i1> )
 | 
						|
declare void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> , <2 x i32*> , i32 , <2 x i1> )
 | 
						|
declare void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> , <2 x float*> , i32 , <2 x i1> )
 | 
						|
 | 
						|
define void @test18(<4 x i32>%a1, <4 x i32*> %ptr, <4 x i1>%mask) {
 | 
						|
; KNL_64-LABEL: test18:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $ymm1 killed $ymm1 def $zmm1
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 | 
						|
; KNL_64-NEXT:    vpslld $31, %xmm2, %xmm2
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k1}
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test18:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_32-NEXT:    vpslld $31, %xmm2, %xmm2
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm2, %zmm2, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vpscatterdd %zmm0, (,%zmm1) {%k1}
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test18:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpslld $31, %xmm2, %xmm2
 | 
						|
; SKX-NEXT:    vpmovd2m %xmm2, %k1
 | 
						|
; SKX-NEXT:    vpscatterqd %xmm0, (,%ymm1) {%k1}
 | 
						|
; SKX-NEXT:    vzeroupper
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test18:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpslld $31, %xmm2, %xmm2
 | 
						|
; SKX_32-NEXT:    vpmovd2m %xmm2, %k1
 | 
						|
; SKX_32-NEXT:    vpscatterdd %xmm0, (,%xmm1) {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %a1, <4 x i32*> %ptr, i32 4, <4 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
define void @test19(<4 x double>%a1, double* %ptr, <4 x i1>%mask, <4 x i64> %ind) {
 | 
						|
; KNL_64-LABEL: test19:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 | 
						|
; KNL_64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 | 
						|
; KNL_64-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm1, %zmm1, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vscatterqpd %zmm0, (%rdi,%zmm2,8) {%k1}
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test19:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $ymm2 killed $ymm2 def $zmm2
 | 
						|
; KNL_32-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 | 
						|
; KNL_32-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vscatterqpd %zmm0, (%eax,%zmm2,8) {%k1}
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test19:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; SKX-NEXT:    vpmovd2m %xmm1, %k1
 | 
						|
; SKX-NEXT:    vscatterqpd %ymm0, (%rdi,%ymm2,8) {%k1}
 | 
						|
; SKX-NEXT:    vzeroupper
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test19:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; SKX_32-NEXT:    vpmovd2m %xmm1, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vscatterqpd %ymm0, (%eax,%ymm2,8) {%k1}
 | 
						|
; SKX_32-NEXT:    vzeroupper
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %gep = getelementptr double, double* %ptr, <4 x i64> %ind
 | 
						|
  call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %a1, <4 x double*> %gep, i32 8, <4 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; Data type requires widening
 | 
						|
define void @test20(<2 x float>%a1, <2 x float*> %ptr, <2 x i1> %mask) {
 | 
						|
; KNL_64-LABEL: test20:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 | 
						|
; KNL_64-NEXT:    vpsllq $63, %xmm2, %xmm2
 | 
						|
; KNL_64-NEXT:    vptestmq %zmm2, %zmm2, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vscatterqps %ymm0, (,%zmm1) {%k1}
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test20:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_32-NEXT:    vpsllq $63, %xmm2, %xmm2
 | 
						|
; KNL_32-NEXT:    vptestmq %zmm2, %zmm2, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vscatterdps %zmm0, (,%zmm1) {%k1}
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test20:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpsllq $63, %xmm2, %xmm2
 | 
						|
; SKX-NEXT:    vpmovq2m %xmm2, %k1
 | 
						|
; SKX-NEXT:    vscatterqps %xmm0, (,%xmm1) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test20:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpsllq $63, %xmm2, %xmm2
 | 
						|
; SKX_32-NEXT:    vpmovq2m %xmm2, %k1
 | 
						|
; SKX_32-NEXT:    vscatterdps %xmm0, (,%xmm1) {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  call void @llvm.masked.scatter.v2f32.v2p0f32(<2 x float> %a1, <2 x float*> %ptr, i32 4, <2 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; Data type requires promotion
 | 
						|
define void @test21(<2 x i32>%a1, <2 x i32*> %ptr, <2 x i1>%mask) {
 | 
						|
; KNL_64-LABEL: test21:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 | 
						|
; KNL_64-NEXT:    vpsllq $63, %xmm2, %xmm2
 | 
						|
; KNL_64-NEXT:    vptestmq %zmm2, %zmm2, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k1}
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test21:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_32-NEXT:    vpsllq $63, %xmm2, %xmm2
 | 
						|
; KNL_32-NEXT:    vptestmq %zmm2, %zmm2, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vpscatterdd %zmm0, (,%zmm1) {%k1}
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test21:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpsllq $63, %xmm2, %xmm2
 | 
						|
; SKX-NEXT:    vpmovq2m %xmm2, %k1
 | 
						|
; SKX-NEXT:    vpscatterqd %xmm0, (,%xmm1) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test21:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpsllq $63, %xmm2, %xmm2
 | 
						|
; SKX_32-NEXT:    vpmovq2m %xmm2, %k1
 | 
						|
; SKX_32-NEXT:    vpscatterdd %xmm0, (,%xmm1) {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; The result type requires widening
 | 
						|
declare <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*>, i32, <2 x i1>, <2 x float>)
 | 
						|
 | 
						|
define <2 x float> @test22(float* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x float> %src0) {
 | 
						|
; KNL_64-LABEL: test22:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_64-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; KNL_64-NEXT:    vptestmq %zmm1, %zmm1, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm2 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovaps %xmm2, %xmm0
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test22:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_32-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; KNL_32-NEXT:    vptestmq %zmm1, %zmm1, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm2 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovaps %xmm2, %xmm0
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test22:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; SKX-NEXT:    vpmovq2m %xmm1, %k1
 | 
						|
; SKX-NEXT:    vgatherdps (%rdi,%xmm0,4), %xmm2 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %xmm2, %xmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test22:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; SKX_32-NEXT:    vpmovq2m %xmm1, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vgatherdps (%eax,%xmm0,4), %xmm2 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovaps %xmm2, %xmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %sext_ind = sext <2 x i32> %ind to <2 x i64>
 | 
						|
  %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
 | 
						|
  %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0)
 | 
						|
  ret <2 x float>%res
 | 
						|
}
 | 
						|
 | 
						|
define <2 x float> @test22a(float* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x float> %src0) {
 | 
						|
; KNL_64-LABEL: test22a:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_64-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; KNL_64-NEXT:    vptestmq %zmm1, %zmm1, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vgatherqps (%rdi,%zmm0,4), %ymm2 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovaps %xmm2, %xmm0
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test22a:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_32-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; KNL_32-NEXT:    vptestmq %zmm1, %zmm1, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vgatherqps (%eax,%zmm0,4), %ymm2 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovaps %xmm2, %xmm0
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test22a:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; SKX-NEXT:    vpmovq2m %xmm1, %k1
 | 
						|
; SKX-NEXT:    vgatherqps (%rdi,%xmm0,4), %xmm2 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %xmm2, %xmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test22a:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; SKX_32-NEXT:    vpmovq2m %xmm1, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vgatherqps (%eax,%xmm0,4), %xmm2 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovaps %xmm2, %xmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %gep.random = getelementptr float, float* %base, <2 x i64> %ind
 | 
						|
  %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0)
 | 
						|
  ret <2 x float>%res
 | 
						|
}
 | 
						|
 | 
						|
declare <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*>, i32, <2 x i1>, <2 x i32>)
 | 
						|
declare <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*>, i32, <2 x i1>, <2 x i64>)
 | 
						|
 | 
						|
define <2 x i32> @test23(i32* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i32> %src0) {
 | 
						|
; KNL_64-LABEL: test23:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_64-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; KNL_64-NEXT:    vptestmq %zmm1, %zmm1, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm2 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovdqa %xmm2, %xmm0
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test23:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_32-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; KNL_32-NEXT:    vptestmq %zmm1, %zmm1, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vpgatherdd (%eax,%zmm0,4), %zmm2 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovdqa %xmm2, %xmm0
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test23:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; SKX-NEXT:    vpmovq2m %xmm1, %k1
 | 
						|
; SKX-NEXT:    vpgatherdd (%rdi,%xmm0,4), %xmm2 {%k1}
 | 
						|
; SKX-NEXT:    vmovdqa %xmm2, %xmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test23:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; SKX_32-NEXT:    vpmovq2m %xmm1, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vpgatherdd (%eax,%xmm0,4), %xmm2 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovdqa %xmm2, %xmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %sext_ind = sext <2 x i32> %ind to <2 x i64>
 | 
						|
  %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
 | 
						|
  %res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0)
 | 
						|
  ret <2 x i32>%res
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i32> @test23b(i32* %base, <2 x i64> %ind, <2 x i1> %mask, <2 x i32> %src0) {
 | 
						|
; KNL_64-LABEL: test23b:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_64-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; KNL_64-NEXT:    vptestmq %zmm1, %zmm1, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovdqa %xmm2, %xmm0
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test23b:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_32-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; KNL_32-NEXT:    vptestmq %zmm1, %zmm1, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovdqa %xmm2, %xmm0
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test23b:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; SKX-NEXT:    vpmovq2m %xmm1, %k1
 | 
						|
; SKX-NEXT:    vpgatherqd (%rdi,%xmm0,4), %xmm2 {%k1}
 | 
						|
; SKX-NEXT:    vmovdqa %xmm2, %xmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test23b:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; SKX_32-NEXT:    vpmovq2m %xmm1, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vpgatherqd (%eax,%xmm0,4), %xmm2 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovdqa %xmm2, %xmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %gep.random = getelementptr i32, i32* %base, <2 x i64> %ind
 | 
						|
  %res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> %mask, <2 x i32> %src0)
 | 
						|
  ret <2 x i32>%res
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i32> @test24(i32* %base, <2 x i32> %ind) {
 | 
						|
; KNL_64-LABEL: test24:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_64-NEXT:    movw $3, %ax
 | 
						|
; KNL_64-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_64-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovdqa %xmm1, %xmm0
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test24:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    movw $3, %cx
 | 
						|
; KNL_32-NEXT:    kmovw %ecx, %k1
 | 
						|
; KNL_32-NEXT:    vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovdqa %xmm1, %xmm0
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test24:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    movb $3, %al
 | 
						|
; SKX-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX-NEXT:    vpgatherdd (%rdi,%xmm0,4), %xmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovdqa %xmm1, %xmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test24:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    movb $3, %cl
 | 
						|
; SKX_32-NEXT:    kmovw %ecx, %k1
 | 
						|
; SKX_32-NEXT:    vpgatherdd (%eax,%xmm0,4), %xmm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovdqa %xmm1, %xmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %sext_ind = sext <2 x i32> %ind to <2 x i64>
 | 
						|
  %gep.random = getelementptr i32, i32* %base, <2 x i64> %sext_ind
 | 
						|
  %res = call <2 x i32> @llvm.masked.gather.v2i32.v2p0i32(<2 x i32*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x i32> undef)
 | 
						|
  ret <2 x i32>%res
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @test25(i64* %base, <2 x i32> %ind, <2 x i1> %mask, <2 x i64> %src0) {
 | 
						|
; KNL_64-LABEL: test25:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 | 
						|
; KNL_64-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; KNL_64-NEXT:    vptestmq %zmm1, %zmm1, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vpgatherdq (%rdi,%ymm0,8), %zmm2 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovdqa %xmm2, %xmm0
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test25:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 | 
						|
; KNL_32-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; KNL_32-NEXT:    vptestmq %zmm1, %zmm1, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vpgatherdq (%eax,%ymm0,8), %zmm2 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovdqa %xmm2, %xmm0
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test25:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; SKX-NEXT:    vpmovq2m %xmm1, %k1
 | 
						|
; SKX-NEXT:    vpgatherdq (%rdi,%xmm0,8), %xmm2 {%k1}
 | 
						|
; SKX-NEXT:    vmovdqa %xmm2, %xmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test25:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpsllq $63, %xmm1, %xmm1
 | 
						|
; SKX_32-NEXT:    vpmovq2m %xmm1, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vpgatherdq (%eax,%xmm0,8), %xmm2 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovdqa %xmm2, %xmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %sext_ind = sext <2 x i32> %ind to <2 x i64>
 | 
						|
  %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
 | 
						|
  %res = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep.random, i32 8, <2 x i1> %mask, <2 x i64> %src0)
 | 
						|
  ret <2 x i64>%res
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @test26(i64* %base, <2 x i32> %ind, <2 x i64> %src0) {
 | 
						|
; KNL_64-LABEL: test26:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 | 
						|
; KNL_64-NEXT:    movb $3, %al
 | 
						|
; KNL_64-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_64-NEXT:    vpgatherdq (%rdi,%ymm0,8), %zmm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovdqa %xmm1, %xmm0
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test26:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    movb $3, %cl
 | 
						|
; KNL_32-NEXT:    kmovw %ecx, %k1
 | 
						|
; KNL_32-NEXT:    vpgatherdq (%eax,%ymm0,8), %zmm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovdqa %xmm1, %xmm0
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test26:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX-NEXT:    vpgatherdq (%rdi,%xmm0,8), %xmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovdqa %xmm1, %xmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test26:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vpgatherdq (%eax,%xmm0,8), %xmm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovdqa %xmm1, %xmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %sext_ind = sext <2 x i32> %ind to <2 x i64>
 | 
						|
  %gep.random = getelementptr i64, i64* %base, <2 x i64> %sext_ind
 | 
						|
  %res = call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep.random, i32 8, <2 x i1> <i1 true, i1 true>, <2 x i64> %src0)
 | 
						|
  ret <2 x i64>%res
 | 
						|
}
 | 
						|
 | 
						|
; Result type requires widening; all-ones mask
 | 
						|
define <2 x float> @test27(float* %base, <2 x i32> %ind) {
 | 
						|
; KNL_64-LABEL: test27:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_64-NEXT:    movw $3, %ax
 | 
						|
; KNL_64-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovaps %xmm1, %xmm0
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test27:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    movw $3, %cx
 | 
						|
; KNL_32-NEXT:    kmovw %ecx, %k1
 | 
						|
; KNL_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovaps %xmm1, %xmm0
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test27:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    movb $3, %al
 | 
						|
; SKX-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX-NEXT:    vgatherdps (%rdi,%xmm0,4), %xmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %xmm1, %xmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test27:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    movb $3, %cl
 | 
						|
; SKX_32-NEXT:    kmovw %ecx, %k1
 | 
						|
; SKX_32-NEXT:    vgatherdps (%eax,%xmm0,4), %xmm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovaps %xmm1, %xmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %sext_ind = sext <2 x i32> %ind to <2 x i64>
 | 
						|
  %gep.random = getelementptr float, float* %base, <2 x i64> %sext_ind
 | 
						|
  %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> <i1 true, i1 true>, <2 x float> undef)
 | 
						|
  ret <2 x float>%res
 | 
						|
}
 | 
						|
 | 
						|
; Data type requires promotion, mask is all-ones
 | 
						|
define void @test28(<2 x i32>%a1, <2 x i32*> %ptr) {
 | 
						|
; KNL_64-LABEL: test28:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 | 
						|
; KNL_64-NEXT:    movb $3, %al
 | 
						|
; KNL_64-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_64-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k1}
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test28:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_32-NEXT:    movw $3, %ax
 | 
						|
; KNL_32-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_32-NEXT:    vpscatterdd %zmm0, (,%zmm1) {%k1}
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test28:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX-NEXT:    vpscatterqd %xmm0, (,%xmm1) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test28:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movb $3, %al
 | 
						|
; SKX_32-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX_32-NEXT:    vpscatterdd %xmm0, (,%xmm1) {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %a1, <2 x i32*> %ptr, i32 4, <2 x i1> <i1 true, i1 true>)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; SCALAR-LABEL: test29
 | 
						|
; SCALAR:      extractelement <16 x float*>
 | 
						|
; SCALAR-NEXT: load float
 | 
						|
; SCALAR-NEXT: insertelement <16 x float>
 | 
						|
; SCALAR-NEXT: extractelement <16 x float*>
 | 
						|
; SCALAR-NEXT: load float
 | 
						|
 | 
						|
define <16 x float> @test29(float* %base, <16 x i32> %ind) {
 | 
						|
; KNL_64-LABEL: test29:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    movw $44, %ax
 | 
						|
; KNL_64-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test29:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    movw $44, %cx
 | 
						|
; KNL_32-NEXT:    kmovw %ecx, %k1
 | 
						|
; KNL_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test29:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    movw $44, %ax
 | 
						|
; SKX-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test29:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    movw $44, %cx
 | 
						|
; SKX_32-NEXT:    kmovw %ecx, %k1
 | 
						|
; SKX_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovaps %zmm1, %zmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
 | 
						|
  %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
 | 
						|
 | 
						|
  %sext_ind = sext <16 x i32> %ind to <16 x i64>
 | 
						|
  %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
 | 
						|
 | 
						|
  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 false, i1 false, i1 true, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x float> undef)
 | 
						|
  ret <16 x float>%res
 | 
						|
}
 | 
						|
 | 
						|
declare <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*>, i32, <3 x i1>, <3 x i32>)
 | 
						|
define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) {
 | 
						|
; KNL_64-LABEL: test30:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
 | 
						|
; KNL_64-NEXT:    movw $-3, %ax
 | 
						|
; KNL_64-NEXT:    kmovw %eax, %k0
 | 
						|
; KNL_64-NEXT:    andl $1, %edi
 | 
						|
; KNL_64-NEXT:    kmovw %edi, %k1
 | 
						|
; KNL_64-NEXT:    kandw %k0, %k1, %k0
 | 
						|
; KNL_64-NEXT:    kmovw %esi, %k1
 | 
						|
; KNL_64-NEXT:    kshiftlw $15, %k1, %k1
 | 
						|
; KNL_64-NEXT:    kshiftrw $14, %k1, %k1
 | 
						|
; KNL_64-NEXT:    korw %k1, %k0, %k0
 | 
						|
; KNL_64-NEXT:    movw $-5, %ax
 | 
						|
; KNL_64-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_64-NEXT:    kandw %k1, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kmovw %edx, %k1
 | 
						|
; KNL_64-NEXT:    kshiftlw $15, %k1, %k1
 | 
						|
; KNL_64-NEXT:    kshiftrw $13, %k1, %k1
 | 
						|
; KNL_64-NEXT:    korw %k1, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vpmovsxdq %xmm1, %ymm1
 | 
						|
; KNL_64-NEXT:    vpsllq $2, %ymm1, %ymm1
 | 
						|
; KNL_64-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
 | 
						|
; KNL_64-NEXT:    vpgatherqd (,%zmm0), %ymm2 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovdqa %xmm2, %xmm0
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test30:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 | 
						|
; KNL_32-NEXT:    movw $-3, %ax
 | 
						|
; KNL_32-NEXT:    kmovw %eax, %k0
 | 
						|
; KNL_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 | 
						|
; KNL_32-NEXT:    andl $1, %eax
 | 
						|
; KNL_32-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_32-NEXT:    kandw %k0, %k1, %k0
 | 
						|
; KNL_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 | 
						|
; KNL_32-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_32-NEXT:    kshiftlw $15, %k1, %k1
 | 
						|
; KNL_32-NEXT:    kshiftrw $14, %k1, %k1
 | 
						|
; KNL_32-NEXT:    korw %k1, %k0, %k0
 | 
						|
; KNL_32-NEXT:    movw $-5, %ax
 | 
						|
; KNL_32-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_32-NEXT:    kandw %k1, %k0, %k0
 | 
						|
; KNL_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 | 
						|
; KNL_32-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_32-NEXT:    kshiftlw $15, %k1, %k1
 | 
						|
; KNL_32-NEXT:    kshiftrw $13, %k1, %k1
 | 
						|
; KNL_32-NEXT:    korw %k1, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vpslld $2, %xmm1, %xmm1
 | 
						|
; KNL_32-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 | 
						|
; KNL_32-NEXT:    vpgatherdd (,%zmm0), %zmm2 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovdqa %xmm2, %xmm0
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test30:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    movb $-3, %al
 | 
						|
; SKX-NEXT:    kmovw %eax, %k0
 | 
						|
; SKX-NEXT:    kmovw %edi, %k1
 | 
						|
; SKX-NEXT:    kshiftlb $7, %k1, %k1
 | 
						|
; SKX-NEXT:    kshiftrb $7, %k1, %k1
 | 
						|
; SKX-NEXT:    kandw %k0, %k1, %k0
 | 
						|
; SKX-NEXT:    kmovw %esi, %k1
 | 
						|
; SKX-NEXT:    kshiftlb $7, %k1, %k1
 | 
						|
; SKX-NEXT:    kshiftrb $6, %k1, %k1
 | 
						|
; SKX-NEXT:    korw %k1, %k0, %k0
 | 
						|
; SKX-NEXT:    movb $-5, %al
 | 
						|
; SKX-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX-NEXT:    kandw %k1, %k0, %k0
 | 
						|
; SKX-NEXT:    kmovw %edx, %k1
 | 
						|
; SKX-NEXT:    kshiftlb $7, %k1, %k1
 | 
						|
; SKX-NEXT:    kshiftrb $5, %k1, %k1
 | 
						|
; SKX-NEXT:    korw %k1, %k0, %k1
 | 
						|
; SKX-NEXT:    vpmovsxdq %xmm1, %ymm1
 | 
						|
; SKX-NEXT:    vpsllq $2, %ymm1, %ymm1
 | 
						|
; SKX-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
 | 
						|
; SKX-NEXT:    vpgatherqd (,%ymm0), %xmm2 {%k1}
 | 
						|
; SKX-NEXT:    vmovdqa %xmm2, %xmm0
 | 
						|
; SKX-NEXT:    vzeroupper
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test30:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movb $-3, %al
 | 
						|
; SKX_32-NEXT:    kmovw %eax, %k0
 | 
						|
; SKX_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 | 
						|
; SKX_32-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX_32-NEXT:    kshiftlb $7, %k1, %k1
 | 
						|
; SKX_32-NEXT:    kshiftrb $7, %k1, %k1
 | 
						|
; SKX_32-NEXT:    kandw %k0, %k1, %k0
 | 
						|
; SKX_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 | 
						|
; SKX_32-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX_32-NEXT:    kshiftlb $7, %k1, %k1
 | 
						|
; SKX_32-NEXT:    kshiftrb $6, %k1, %k1
 | 
						|
; SKX_32-NEXT:    korw %k1, %k0, %k0
 | 
						|
; SKX_32-NEXT:    movb $-5, %al
 | 
						|
; SKX_32-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX_32-NEXT:    kandw %k1, %k0, %k0
 | 
						|
; SKX_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 | 
						|
; SKX_32-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX_32-NEXT:    kshiftlb $7, %k1, %k1
 | 
						|
; SKX_32-NEXT:    kshiftrb $5, %k1, %k1
 | 
						|
; SKX_32-NEXT:    korw %k1, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vpslld $2, %xmm1, %xmm1
 | 
						|
; SKX_32-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 | 
						|
; SKX_32-NEXT:    vpgatherdd (,%xmm0), %xmm2 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovdqa %xmm2, %xmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %sext_ind = sext <3 x i32> %ind to <3 x i64>
 | 
						|
  %gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind
 | 
						|
  %res = call <3 x i32> @llvm.masked.gather.v3i32.v3p0i32(<3 x i32*> %gep.random, i32 4, <3 x i1> %mask, <3 x i32> %src0)
 | 
						|
  ret <3 x i32>%res
 | 
						|
}
 | 
						|
 | 
						|
; Non-power of 2 scatter
 | 
						|
declare void @llvm.masked.scatter.v3i32.v3p0i32(<3 x i32>, <3 x i32*>, i32, <3 x i1>)
 | 
						|
define void @test30b(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x i32> %src0) {
 | 
						|
; KNL_64-LABEL: test30b:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm2 killed $xmm2 def $ymm2
 | 
						|
; KNL_64-NEXT:    movw $-3, %ax
 | 
						|
; KNL_64-NEXT:    kmovw %eax, %k0
 | 
						|
; KNL_64-NEXT:    andl $1, %edi
 | 
						|
; KNL_64-NEXT:    kmovw %edi, %k1
 | 
						|
; KNL_64-NEXT:    kandw %k0, %k1, %k0
 | 
						|
; KNL_64-NEXT:    kmovw %esi, %k1
 | 
						|
; KNL_64-NEXT:    kshiftlw $15, %k1, %k1
 | 
						|
; KNL_64-NEXT:    kshiftrw $14, %k1, %k1
 | 
						|
; KNL_64-NEXT:    korw %k1, %k0, %k0
 | 
						|
; KNL_64-NEXT:    movw $-5, %ax
 | 
						|
; KNL_64-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_64-NEXT:    kandw %k1, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kmovw %edx, %k1
 | 
						|
; KNL_64-NEXT:    kshiftlw $15, %k1, %k1
 | 
						|
; KNL_64-NEXT:    kshiftrw $13, %k1, %k1
 | 
						|
; KNL_64-NEXT:    korw %k1, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vpmovsxdq %xmm1, %ymm1
 | 
						|
; KNL_64-NEXT:    vpsllq $2, %ymm1, %ymm1
 | 
						|
; KNL_64-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
 | 
						|
; KNL_64-NEXT:    vpscatterqd %ymm2, (,%zmm0) {%k1}
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test30b:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm2 killed $xmm2 def $zmm2
 | 
						|
; KNL_32-NEXT:    movw $-3, %ax
 | 
						|
; KNL_32-NEXT:    kmovw %eax, %k0
 | 
						|
; KNL_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 | 
						|
; KNL_32-NEXT:    andl $1, %eax
 | 
						|
; KNL_32-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_32-NEXT:    kandw %k0, %k1, %k0
 | 
						|
; KNL_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 | 
						|
; KNL_32-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_32-NEXT:    kshiftlw $15, %k1, %k1
 | 
						|
; KNL_32-NEXT:    kshiftrw $14, %k1, %k1
 | 
						|
; KNL_32-NEXT:    korw %k1, %k0, %k0
 | 
						|
; KNL_32-NEXT:    movw $-5, %ax
 | 
						|
; KNL_32-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_32-NEXT:    kandw %k1, %k0, %k0
 | 
						|
; KNL_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 | 
						|
; KNL_32-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_32-NEXT:    kshiftlw $15, %k1, %k1
 | 
						|
; KNL_32-NEXT:    kshiftrw $13, %k1, %k1
 | 
						|
; KNL_32-NEXT:    korw %k1, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vpslld $2, %xmm1, %xmm1
 | 
						|
; KNL_32-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 | 
						|
; KNL_32-NEXT:    vpscatterdd %zmm2, (,%zmm0) {%k1}
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test30b:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    movb $-3, %al
 | 
						|
; SKX-NEXT:    kmovw %eax, %k0
 | 
						|
; SKX-NEXT:    kmovw %edi, %k1
 | 
						|
; SKX-NEXT:    kshiftlb $7, %k1, %k1
 | 
						|
; SKX-NEXT:    kshiftrb $7, %k1, %k1
 | 
						|
; SKX-NEXT:    kandw %k0, %k1, %k0
 | 
						|
; SKX-NEXT:    kmovw %esi, %k1
 | 
						|
; SKX-NEXT:    kshiftlb $7, %k1, %k1
 | 
						|
; SKX-NEXT:    kshiftrb $6, %k1, %k1
 | 
						|
; SKX-NEXT:    korw %k1, %k0, %k0
 | 
						|
; SKX-NEXT:    movb $-5, %al
 | 
						|
; SKX-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX-NEXT:    kandw %k1, %k0, %k0
 | 
						|
; SKX-NEXT:    kmovw %edx, %k1
 | 
						|
; SKX-NEXT:    kshiftlb $7, %k1, %k1
 | 
						|
; SKX-NEXT:    kshiftrb $5, %k1, %k1
 | 
						|
; SKX-NEXT:    korw %k1, %k0, %k1
 | 
						|
; SKX-NEXT:    vpmovsxdq %xmm1, %ymm1
 | 
						|
; SKX-NEXT:    vpsllq $2, %ymm1, %ymm1
 | 
						|
; SKX-NEXT:    vpaddq %ymm1, %ymm0, %ymm0
 | 
						|
; SKX-NEXT:    vpscatterqd %xmm2, (,%ymm0) {%k1}
 | 
						|
; SKX-NEXT:    vzeroupper
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test30b:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movb $-3, %al
 | 
						|
; SKX_32-NEXT:    kmovw %eax, %k0
 | 
						|
; SKX_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 | 
						|
; SKX_32-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX_32-NEXT:    kshiftlb $7, %k1, %k1
 | 
						|
; SKX_32-NEXT:    kshiftrb $7, %k1, %k1
 | 
						|
; SKX_32-NEXT:    kandw %k0, %k1, %k0
 | 
						|
; SKX_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 | 
						|
; SKX_32-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX_32-NEXT:    kshiftlb $7, %k1, %k1
 | 
						|
; SKX_32-NEXT:    kshiftrb $6, %k1, %k1
 | 
						|
; SKX_32-NEXT:    korw %k1, %k0, %k0
 | 
						|
; SKX_32-NEXT:    movb $-5, %al
 | 
						|
; SKX_32-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX_32-NEXT:    kandw %k1, %k0, %k0
 | 
						|
; SKX_32-NEXT:    movb {{[0-9]+}}(%esp), %al
 | 
						|
; SKX_32-NEXT:    kmovw %eax, %k1
 | 
						|
; SKX_32-NEXT:    kshiftlb $7, %k1, %k1
 | 
						|
; SKX_32-NEXT:    kshiftrb $5, %k1, %k1
 | 
						|
; SKX_32-NEXT:    korw %k1, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vpslld $2, %xmm1, %xmm1
 | 
						|
; SKX_32-NEXT:    vpaddd %xmm1, %xmm0, %xmm0
 | 
						|
; SKX_32-NEXT:    vpscatterdd %xmm2, (,%xmm0) {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %sext_ind = sext <3 x i32> %ind to <3 x i64>
 | 
						|
  %gep.random = getelementptr i32, <3 x i32*> %base, <3 x i64> %sext_ind
 | 
						|
  call void @llvm.masked.scatter.v3i32.v3p0i32(<3 x i32> %src0, <3 x i32*> %gep.random, i32 4, <3 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
declare <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**>, i32, <16 x i1>, <16 x float*>)
 | 
						|
define <16 x float*> @test31(<16 x float**> %ptrs) {
 | 
						|
; KNL_64-LABEL: test31:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k2
 | 
						|
; KNL_64-NEXT:    vpgatherqq (,%zmm0), %zmm2 {%k2}
 | 
						|
; KNL_64-NEXT:    vpgatherqq (,%zmm1), %zmm3 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovdqa64 %zmm2, %zmm0
 | 
						|
; KNL_64-NEXT:    vmovdqa64 %zmm3, %zmm1
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test31:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vpgatherdd (,%zmm0), %zmm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovdqa64 %zmm1, %zmm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test31:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k2
 | 
						|
; SKX-NEXT:    vpgatherqq (,%zmm0), %zmm2 {%k2}
 | 
						|
; SKX-NEXT:    vpgatherqq (,%zmm1), %zmm3 {%k1}
 | 
						|
; SKX-NEXT:    vmovdqa64 %zmm2, %zmm0
 | 
						|
; SKX-NEXT:    vmovdqa64 %zmm3, %zmm1
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test31:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vpgatherdd (,%zmm0), %zmm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovdqa64 %zmm1, %zmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %res = call <16 x float*> @llvm.masked.gather.v16p0f32.v16p0p0f32(<16 x float**> %ptrs, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float*> undef)
 | 
						|
  ret <16 x float*>%res
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i32> @test_gather_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0)  {
 | 
						|
; KNL_64-LABEL: test_gather_16i32:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vpmovsxbd %xmm2, %zmm2
 | 
						|
; KNL_64-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 | 
						|
; KNL_64-NEXT:    vextracti64x4 $1, %zmm3, %ymm2
 | 
						|
; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; KNL_64-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k2}
 | 
						|
; KNL_64-NEXT:    vpgatherqd (,%zmm0), %ymm3 {%k1}
 | 
						|
; KNL_64-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test_gather_16i32:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; KNL_32-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
 | 
						|
; KNL_32-NEXT:    vpgatherdd (,%zmm0), %zmm2 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovdqa64 %zmm2, %zmm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test_gather_16i32:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
 | 
						|
; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; SKX-NEXT:    vpmovd2m %zmm2, %k1
 | 
						|
; SKX-NEXT:    vextracti64x4 $1, %zmm3, %ymm2
 | 
						|
; SKX-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; SKX-NEXT:    vpgatherqd (,%zmm1), %ymm2 {%k2}
 | 
						|
; SKX-NEXT:    vpgatherqd (,%zmm0), %ymm3 {%k1}
 | 
						|
; SKX-NEXT:    vinserti64x4 $1, %ymm2, %zmm3, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test_gather_16i32:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; SKX_32-NEXT:    vpmovd2m %zmm1, %k1
 | 
						|
; SKX_32-NEXT:    vpgatherdd (,%zmm0), %zmm2 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovdqa64 %zmm2, %zmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %res = call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %ptrs, i32 4, <16 x i1> %mask, <16 x i32> %src0)
 | 
						|
  ret <16 x i32> %res
 | 
						|
}
 | 
						|
define <16 x i64> @test_gather_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0)  {
 | 
						|
; KNL_64-LABEL: test_gather_16i64:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vpmovsxbd %xmm2, %zmm2
 | 
						|
; KNL_64-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 | 
						|
; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; KNL_64-NEXT:    vpgatherqq (,%zmm0), %zmm3 {%k1}
 | 
						|
; KNL_64-NEXT:    vpgatherqq (,%zmm1), %zmm4 {%k2}
 | 
						|
; KNL_64-NEXT:    vmovdqa64 %zmm3, %zmm0
 | 
						|
; KNL_64-NEXT:    vmovdqa64 %zmm4, %zmm1
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test_gather_16i64:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    pushl %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; KNL_32-NEXT:    .cfi_offset %ebp, -8
 | 
						|
; KNL_32-NEXT:    movl %esp, %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa_register %ebp
 | 
						|
; KNL_32-NEXT:    andl $-64, %esp
 | 
						|
; KNL_32-NEXT:    subl $64, %esp
 | 
						|
; KNL_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; KNL_32-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
 | 
						|
; KNL_32-NEXT:    vmovdqa64 8(%ebp), %zmm1
 | 
						|
; KNL_32-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; KNL_32-NEXT:    vpgatherdq (,%ymm0), %zmm2 {%k1}
 | 
						|
; KNL_32-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
 | 
						|
; KNL_32-NEXT:    vpgatherdq (,%ymm0), %zmm1 {%k2}
 | 
						|
; KNL_32-NEXT:    vmovdqa64 %zmm2, %zmm0
 | 
						|
; KNL_32-NEXT:    movl %ebp, %esp
 | 
						|
; KNL_32-NEXT:    popl %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa %esp, 4
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test_gather_16i64:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
 | 
						|
; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; SKX-NEXT:    vpmovd2m %zmm2, %k1
 | 
						|
; SKX-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; SKX-NEXT:    vpgatherqq (,%zmm0), %zmm3 {%k1}
 | 
						|
; SKX-NEXT:    vpgatherqq (,%zmm1), %zmm4 {%k2}
 | 
						|
; SKX-NEXT:    vmovdqa64 %zmm3, %zmm0
 | 
						|
; SKX-NEXT:    vmovdqa64 %zmm4, %zmm1
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test_gather_16i64:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    pushl %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; SKX_32-NEXT:    .cfi_offset %ebp, -8
 | 
						|
; SKX_32-NEXT:    movl %esp, %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa_register %ebp
 | 
						|
; SKX_32-NEXT:    andl $-64, %esp
 | 
						|
; SKX_32-NEXT:    subl $64, %esp
 | 
						|
; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; SKX_32-NEXT:    vpmovd2m %zmm1, %k1
 | 
						|
; SKX_32-NEXT:    vmovdqa64 8(%ebp), %zmm1
 | 
						|
; SKX_32-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; SKX_32-NEXT:    vpgatherdq (,%ymm0), %zmm2 {%k1}
 | 
						|
; SKX_32-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
 | 
						|
; SKX_32-NEXT:    vpgatherdq (,%ymm0), %zmm1 {%k2}
 | 
						|
; SKX_32-NEXT:    vmovdqa64 %zmm2, %zmm0
 | 
						|
; SKX_32-NEXT:    movl %ebp, %esp
 | 
						|
; SKX_32-NEXT:    popl %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa %esp, 4
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %res = call <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32 4, <16 x i1> %mask, <16 x i64> %src0)
 | 
						|
  ret <16 x i64> %res
 | 
						|
}
 | 
						|
declare <16 x i64> @llvm.masked.gather.v16i64.v16p0i64(<16 x i64*> %ptrs, i32, <16 x i1> %mask, <16 x i64> %src0)
 | 
						|
define <16 x float> @test_gather_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0)  {
 | 
						|
; KNL_64-LABEL: test_gather_16f32:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vpmovsxbd %xmm2, %zmm2
 | 
						|
; KNL_64-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 | 
						|
; KNL_64-NEXT:    vextractf64x4 $1, %zmm3, %ymm2
 | 
						|
; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; KNL_64-NEXT:    vgatherqps (,%zmm1), %ymm2 {%k2}
 | 
						|
; KNL_64-NEXT:    vgatherqps (,%zmm0), %ymm3 {%k1}
 | 
						|
; KNL_64-NEXT:    vinsertf64x4 $1, %ymm2, %zmm3, %zmm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test_gather_16f32:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; KNL_32-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
 | 
						|
; KNL_32-NEXT:    vgatherdps (,%zmm0), %zmm2 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovaps %zmm2, %zmm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test_gather_16f32:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
 | 
						|
; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; SKX-NEXT:    vpmovd2m %zmm2, %k1
 | 
						|
; SKX-NEXT:    vextractf64x4 $1, %zmm3, %ymm2
 | 
						|
; SKX-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; SKX-NEXT:    vgatherqps (,%zmm1), %ymm2 {%k2}
 | 
						|
; SKX-NEXT:    vgatherqps (,%zmm0), %ymm3 {%k1}
 | 
						|
; SKX-NEXT:    vinsertf64x4 $1, %ymm2, %zmm3, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test_gather_16f32:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; SKX_32-NEXT:    vpmovd2m %zmm1, %k1
 | 
						|
; SKX_32-NEXT:    vgatherdps (,%zmm0), %zmm2 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovaps %zmm2, %zmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %ptrs, i32 4, <16 x i1> %mask, <16 x float> %src0)
 | 
						|
  ret <16 x float> %res
 | 
						|
}
 | 
						|
define <16 x double> @test_gather_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0)  {
 | 
						|
; KNL_64-LABEL: test_gather_16f64:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vpmovsxbd %xmm2, %zmm2
 | 
						|
; KNL_64-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 | 
						|
; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; KNL_64-NEXT:    vgatherqpd (,%zmm0), %zmm3 {%k1}
 | 
						|
; KNL_64-NEXT:    vgatherqpd (,%zmm1), %zmm4 {%k2}
 | 
						|
; KNL_64-NEXT:    vmovapd %zmm3, %zmm0
 | 
						|
; KNL_64-NEXT:    vmovapd %zmm4, %zmm1
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test_gather_16f64:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    pushl %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; KNL_32-NEXT:    .cfi_offset %ebp, -8
 | 
						|
; KNL_32-NEXT:    movl %esp, %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa_register %ebp
 | 
						|
; KNL_32-NEXT:    andl $-64, %esp
 | 
						|
; KNL_32-NEXT:    subl $64, %esp
 | 
						|
; KNL_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; KNL_32-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
 | 
						|
; KNL_32-NEXT:    vmovapd 8(%ebp), %zmm1
 | 
						|
; KNL_32-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; KNL_32-NEXT:    vgatherdpd (,%ymm0), %zmm2 {%k1}
 | 
						|
; KNL_32-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
 | 
						|
; KNL_32-NEXT:    vgatherdpd (,%ymm0), %zmm1 {%k2}
 | 
						|
; KNL_32-NEXT:    vmovapd %zmm2, %zmm0
 | 
						|
; KNL_32-NEXT:    movl %ebp, %esp
 | 
						|
; KNL_32-NEXT:    popl %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa %esp, 4
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test_gather_16f64:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
 | 
						|
; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; SKX-NEXT:    vpmovd2m %zmm2, %k1
 | 
						|
; SKX-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; SKX-NEXT:    vgatherqpd (,%zmm0), %zmm3 {%k1}
 | 
						|
; SKX-NEXT:    vgatherqpd (,%zmm1), %zmm4 {%k2}
 | 
						|
; SKX-NEXT:    vmovapd %zmm3, %zmm0
 | 
						|
; SKX-NEXT:    vmovapd %zmm4, %zmm1
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test_gather_16f64:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    pushl %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; SKX_32-NEXT:    .cfi_offset %ebp, -8
 | 
						|
; SKX_32-NEXT:    movl %esp, %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa_register %ebp
 | 
						|
; SKX_32-NEXT:    andl $-64, %esp
 | 
						|
; SKX_32-NEXT:    subl $64, %esp
 | 
						|
; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; SKX_32-NEXT:    vpmovd2m %zmm1, %k1
 | 
						|
; SKX_32-NEXT:    vmovapd 8(%ebp), %zmm1
 | 
						|
; SKX_32-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; SKX_32-NEXT:    vgatherdpd (,%ymm0), %zmm2 {%k1}
 | 
						|
; SKX_32-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
 | 
						|
; SKX_32-NEXT:    vgatherdpd (,%ymm0), %zmm1 {%k2}
 | 
						|
; SKX_32-NEXT:    vmovapd %zmm2, %zmm0
 | 
						|
; SKX_32-NEXT:    movl %ebp, %esp
 | 
						|
; SKX_32-NEXT:    popl %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa %esp, 4
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %res = call <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32 4, <16 x i1> %mask, <16 x double> %src0)
 | 
						|
  ret <16 x double> %res
 | 
						|
}
 | 
						|
declare <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %ptrs, i32, <16 x i1> %mask, <16 x double> %src0)
 | 
						|
define void @test_scatter_16i32(<16 x i32*> %ptrs, <16 x i1> %mask, <16 x i32> %src0)  {
 | 
						|
; KNL_64-LABEL: test_scatter_16i32:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vpmovsxbd %xmm2, %zmm2
 | 
						|
; KNL_64-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 | 
						|
; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; KNL_64-NEXT:    vpscatterqd %ymm3, (,%zmm0) {%k1}
 | 
						|
; KNL_64-NEXT:    vextracti64x4 $1, %zmm3, %ymm0
 | 
						|
; KNL_64-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k2}
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test_scatter_16i32:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; KNL_32-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
 | 
						|
; KNL_32-NEXT:    vpscatterdd %zmm2, (,%zmm0) {%k1}
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test_scatter_16i32:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
 | 
						|
; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; SKX-NEXT:    vpmovd2m %zmm2, %k1
 | 
						|
; SKX-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; SKX-NEXT:    vpscatterqd %ymm3, (,%zmm0) {%k1}
 | 
						|
; SKX-NEXT:    vextracti64x4 $1, %zmm3, %ymm0
 | 
						|
; SKX-NEXT:    vpscatterqd %ymm0, (,%zmm1) {%k2}
 | 
						|
; SKX-NEXT:    vzeroupper
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test_scatter_16i32:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; SKX_32-NEXT:    vpmovd2m %zmm1, %k1
 | 
						|
; SKX_32-NEXT:    vpscatterdd %zmm2, (,%zmm0) {%k1}
 | 
						|
; SKX_32-NEXT:    vzeroupper
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %src0, <16 x i32*> %ptrs, i32 4, <16 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
define void @test_scatter_16i64(<16 x i64*> %ptrs, <16 x i1> %mask, <16 x i64> %src0)  {
 | 
						|
; KNL_64-LABEL: test_scatter_16i64:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vpmovsxbd %xmm2, %zmm2
 | 
						|
; KNL_64-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 | 
						|
; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; KNL_64-NEXT:    vpscatterqq %zmm3, (,%zmm0) {%k1}
 | 
						|
; KNL_64-NEXT:    vpscatterqq %zmm4, (,%zmm1) {%k2}
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test_scatter_16i64:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    pushl %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; KNL_32-NEXT:    .cfi_offset %ebp, -8
 | 
						|
; KNL_32-NEXT:    movl %esp, %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa_register %ebp
 | 
						|
; KNL_32-NEXT:    andl $-64, %esp
 | 
						|
; KNL_32-NEXT:    subl $64, %esp
 | 
						|
; KNL_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; KNL_32-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
 | 
						|
; KNL_32-NEXT:    vmovdqa64 8(%ebp), %zmm1
 | 
						|
; KNL_32-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; KNL_32-NEXT:    vpscatterdq %zmm2, (,%ymm0) {%k1}
 | 
						|
; KNL_32-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
 | 
						|
; KNL_32-NEXT:    vpscatterdq %zmm1, (,%ymm0) {%k2}
 | 
						|
; KNL_32-NEXT:    movl %ebp, %esp
 | 
						|
; KNL_32-NEXT:    popl %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa %esp, 4
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test_scatter_16i64:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
 | 
						|
; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; SKX-NEXT:    vpmovd2m %zmm2, %k1
 | 
						|
; SKX-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; SKX-NEXT:    vpscatterqq %zmm3, (,%zmm0) {%k1}
 | 
						|
; SKX-NEXT:    vpscatterqq %zmm4, (,%zmm1) {%k2}
 | 
						|
; SKX-NEXT:    vzeroupper
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test_scatter_16i64:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    pushl %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; SKX_32-NEXT:    .cfi_offset %ebp, -8
 | 
						|
; SKX_32-NEXT:    movl %esp, %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa_register %ebp
 | 
						|
; SKX_32-NEXT:    andl $-64, %esp
 | 
						|
; SKX_32-NEXT:    subl $64, %esp
 | 
						|
; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; SKX_32-NEXT:    vpmovd2m %zmm1, %k1
 | 
						|
; SKX_32-NEXT:    vmovdqa64 8(%ebp), %zmm1
 | 
						|
; SKX_32-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; SKX_32-NEXT:    vpscatterdq %zmm2, (,%ymm0) {%k1}
 | 
						|
; SKX_32-NEXT:    vextracti64x4 $1, %zmm0, %ymm0
 | 
						|
; SKX_32-NEXT:    vpscatterdq %zmm1, (,%ymm0) {%k2}
 | 
						|
; SKX_32-NEXT:    movl %ebp, %esp
 | 
						|
; SKX_32-NEXT:    popl %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa %esp, 4
 | 
						|
; SKX_32-NEXT:    vzeroupper
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  call void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32 4, <16 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
declare void @llvm.masked.scatter.v16i64.v16p0i64(<16 x i64> %src0, <16 x i64*> %ptrs, i32, <16 x i1> %mask)
 | 
						|
define void @test_scatter_16f32(<16 x float*> %ptrs, <16 x i1> %mask, <16 x float> %src0)  {
 | 
						|
; KNL_64-LABEL: test_scatter_16f32:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vpmovsxbd %xmm2, %zmm2
 | 
						|
; KNL_64-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 | 
						|
; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; KNL_64-NEXT:    vscatterqps %ymm3, (,%zmm0) {%k1}
 | 
						|
; KNL_64-NEXT:    vextractf64x4 $1, %zmm3, %ymm0
 | 
						|
; KNL_64-NEXT:    vscatterqps %ymm0, (,%zmm1) {%k2}
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test_scatter_16f32:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; KNL_32-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
 | 
						|
; KNL_32-NEXT:    vscatterdps %zmm2, (,%zmm0) {%k1}
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test_scatter_16f32:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
 | 
						|
; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; SKX-NEXT:    vpmovd2m %zmm2, %k1
 | 
						|
; SKX-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; SKX-NEXT:    vscatterqps %ymm3, (,%zmm0) {%k1}
 | 
						|
; SKX-NEXT:    vextractf64x4 $1, %zmm3, %ymm0
 | 
						|
; SKX-NEXT:    vscatterqps %ymm0, (,%zmm1) {%k2}
 | 
						|
; SKX-NEXT:    vzeroupper
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test_scatter_16f32:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; SKX_32-NEXT:    vpmovd2m %zmm1, %k1
 | 
						|
; SKX_32-NEXT:    vscatterdps %zmm2, (,%zmm0) {%k1}
 | 
						|
; SKX_32-NEXT:    vzeroupper
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  call void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x float*> %ptrs, i32 4, <16 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
declare void @llvm.masked.scatter.v16f32.v16p0f32(<16 x float> %src0, <16 x float*> %ptrs, i32, <16 x i1> %mask)
 | 
						|
define void @test_scatter_16f64(<16 x double*> %ptrs, <16 x i1> %mask, <16 x double> %src0)  {
 | 
						|
; KNL_64-LABEL: test_scatter_16f64:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vpmovsxbd %xmm2, %zmm2
 | 
						|
; KNL_64-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm2, %zmm2, %k1
 | 
						|
; KNL_64-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; KNL_64-NEXT:    vscatterqpd %zmm3, (,%zmm0) {%k1}
 | 
						|
; KNL_64-NEXT:    vscatterqpd %zmm4, (,%zmm1) {%k2}
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test_scatter_16f64:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    pushl %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; KNL_32-NEXT:    .cfi_offset %ebp, -8
 | 
						|
; KNL_32-NEXT:    movl %esp, %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa_register %ebp
 | 
						|
; KNL_32-NEXT:    andl $-64, %esp
 | 
						|
; KNL_32-NEXT:    subl $64, %esp
 | 
						|
; KNL_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; KNL_32-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k1
 | 
						|
; KNL_32-NEXT:    vmovapd 8(%ebp), %zmm1
 | 
						|
; KNL_32-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; KNL_32-NEXT:    vscatterdpd %zmm2, (,%ymm0) {%k1}
 | 
						|
; KNL_32-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
 | 
						|
; KNL_32-NEXT:    vscatterdpd %zmm1, (,%ymm0) {%k2}
 | 
						|
; KNL_32-NEXT:    movl %ebp, %esp
 | 
						|
; KNL_32-NEXT:    popl %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa %esp, 4
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test_scatter_16f64:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpmovsxbd %xmm2, %zmm2
 | 
						|
; SKX-NEXT:    vpslld $31, %zmm2, %zmm2
 | 
						|
; SKX-NEXT:    vpmovd2m %zmm2, %k1
 | 
						|
; SKX-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; SKX-NEXT:    vscatterqpd %zmm3, (,%zmm0) {%k1}
 | 
						|
; SKX-NEXT:    vscatterqpd %zmm4, (,%zmm1) {%k2}
 | 
						|
; SKX-NEXT:    vzeroupper
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test_scatter_16f64:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    pushl %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; SKX_32-NEXT:    .cfi_offset %ebp, -8
 | 
						|
; SKX_32-NEXT:    movl %esp, %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa_register %ebp
 | 
						|
; SKX_32-NEXT:    andl $-64, %esp
 | 
						|
; SKX_32-NEXT:    subl $64, %esp
 | 
						|
; SKX_32-NEXT:    vpmovsxbd %xmm1, %zmm1
 | 
						|
; SKX_32-NEXT:    vpslld $31, %zmm1, %zmm1
 | 
						|
; SKX_32-NEXT:    vpmovd2m %zmm1, %k1
 | 
						|
; SKX_32-NEXT:    vmovapd 8(%ebp), %zmm1
 | 
						|
; SKX_32-NEXT:    kshiftrw $8, %k1, %k2
 | 
						|
; SKX_32-NEXT:    vscatterdpd %zmm2, (,%ymm0) {%k1}
 | 
						|
; SKX_32-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
 | 
						|
; SKX_32-NEXT:    vscatterdpd %zmm1, (,%ymm0) {%k2}
 | 
						|
; SKX_32-NEXT:    movl %ebp, %esp
 | 
						|
; SKX_32-NEXT:    popl %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa %esp, 4
 | 
						|
; SKX_32-NEXT:    vzeroupper
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  call void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32 4, <16 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
declare void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %ptrs, i32, <16 x i1> %mask)
 | 
						|
 | 
						|
define <4 x i64> @test_pr28312(<4 x i64*> %p1, <4 x i1> %k, <4 x i1> %k2,<4 x i64> %d) {
 | 
						|
; KNL_64-LABEL: test_pr28312:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $ymm0 killed $ymm0 def $zmm0
 | 
						|
; KNL_64-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm1, %zmm1, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vpgatherqq (,%zmm0), %zmm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vpaddq %ymm1, %ymm1, %ymm0
 | 
						|
; KNL_64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test_pr28312:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    pushl %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; KNL_32-NEXT:    .cfi_offset %ebp, -8
 | 
						|
; KNL_32-NEXT:    movl %esp, %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa_register %ebp
 | 
						|
; KNL_32-NEXT:    andl $-32, %esp
 | 
						|
; KNL_32-NEXT:    subl $32, %esp
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
 | 
						|
; KNL_32-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm1, %zmm1, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vpgatherdq (,%ymm0), %zmm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vpaddq %ymm1, %ymm1, %ymm0
 | 
						|
; KNL_32-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
 | 
						|
; KNL_32-NEXT:    movl %ebp, %esp
 | 
						|
; KNL_32-NEXT:    popl %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa %esp, 4
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test_pr28312:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; SKX-NEXT:    vpmovd2m %xmm1, %k1
 | 
						|
; SKX-NEXT:    vpgatherqq (,%ymm0), %ymm1 {%k1}
 | 
						|
; SKX-NEXT:    vpaddq %ymm1, %ymm1, %ymm0
 | 
						|
; SKX-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test_pr28312:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    pushl %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; SKX_32-NEXT:    .cfi_offset %ebp, -8
 | 
						|
; SKX_32-NEXT:    movl %esp, %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa_register %ebp
 | 
						|
; SKX_32-NEXT:    andl $-32, %esp
 | 
						|
; SKX_32-NEXT:    subl $32, %esp
 | 
						|
; SKX_32-NEXT:    vpslld $31, %xmm1, %xmm1
 | 
						|
; SKX_32-NEXT:    vpmovd2m %xmm1, %k1
 | 
						|
; SKX_32-NEXT:    vpgatherdq (,%xmm0), %ymm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vpaddq %ymm1, %ymm1, %ymm0
 | 
						|
; SKX_32-NEXT:    vpaddq %ymm0, %ymm1, %ymm0
 | 
						|
; SKX_32-NEXT:    movl %ebp, %esp
 | 
						|
; SKX_32-NEXT:    popl %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa %esp, 4
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %g1 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
 | 
						|
  %g2 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
 | 
						|
  %g3 = call <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*> %p1, i32 8, <4 x i1> %k, <4 x i64> undef)
 | 
						|
  %a = add <4 x i64> %g1, %g2
 | 
						|
  %b = add <4 x i64> %a, %g3
 | 
						|
  ret <4 x i64> %b
 | 
						|
}
 | 
						|
declare <4 x i64> @llvm.masked.gather.v4i64.v4p0i64(<4 x i64*>, i32, <4 x i1>, <4 x i64>)
 | 
						|
 | 
						|
define <8 x i32> @test_global_array(<8 x i64> %indxs) {
 | 
						|
; KNL_64-LABEL: test_global_array:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovdqa %ymm1, %ymm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test_global_array:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovdqa %ymm1, %ymm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX_SMALL-LABEL: test_global_array:
 | 
						|
; SKX_SMALL:       # %bb.0:
 | 
						|
; SKX_SMALL-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_SMALL-NEXT:    vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
 | 
						|
; SKX_SMALL-NEXT:    vmovdqa %ymm1, %ymm0
 | 
						|
; SKX_SMALL-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_LARGE-LABEL: test_global_array:
 | 
						|
; SKX_LARGE:       # %bb.0:
 | 
						|
; SKX_LARGE-NEXT:    movabsq $glob_array, %rax
 | 
						|
; SKX_LARGE-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_LARGE-NEXT:    vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1}
 | 
						|
; SKX_LARGE-NEXT:    vmovdqa %ymm1, %ymm0
 | 
						|
; SKX_LARGE-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test_global_array:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovdqa %ymm1, %ymm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, i64 0, <8 x i64> %indxs
 | 
						|
  %g = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %p, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
 | 
						|
  ret <8 x i32> %g
 | 
						|
}
 | 
						|
 | 
						|
define <8 x i32> @test_global_array_zeroinitializer_index(<8 x i64> %indxs) {
 | 
						|
; KNL_64-LABEL: test_global_array_zeroinitializer_index:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovdqa %ymm1, %ymm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test_global_array_zeroinitializer_index:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovdqa %ymm1, %ymm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX_SMALL-LABEL: test_global_array_zeroinitializer_index:
 | 
						|
; SKX_SMALL:       # %bb.0:
 | 
						|
; SKX_SMALL-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_SMALL-NEXT:    vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
 | 
						|
; SKX_SMALL-NEXT:    vmovdqa %ymm1, %ymm0
 | 
						|
; SKX_SMALL-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_LARGE-LABEL: test_global_array_zeroinitializer_index:
 | 
						|
; SKX_LARGE:       # %bb.0:
 | 
						|
; SKX_LARGE-NEXT:    movabsq $glob_array, %rax
 | 
						|
; SKX_LARGE-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_LARGE-NEXT:    vpgatherqd (%rax,%zmm0,4), %ymm1 {%k1}
 | 
						|
; SKX_LARGE-NEXT:    vmovdqa %ymm1, %ymm0
 | 
						|
; SKX_LARGE-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test_global_array_zeroinitializer_index:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vpgatherqd glob_array(,%zmm0,4), %ymm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovdqa %ymm1, %ymm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %p = getelementptr inbounds [16 x i32], [16 x i32]* @glob_array, <8 x i64> zeroinitializer, <8 x i64> %indxs
 | 
						|
  %g = call <8 x i32> @llvm.masked.gather.v8i32.v8p0i32(<8 x i32*> %p, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
 | 
						|
  ret <8 x i32> %g
 | 
						|
}
 | 
						|
 | 
						|
define void @v1_scatter(<1 x i32>%a1, <1 x i32*> %ptr, <1 x i1> %mask) {
 | 
						|
; KNL_64-LABEL: v1_scatter:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    testb $1, %dl
 | 
						|
; KNL_64-NEXT:    je .LBB45_2
 | 
						|
; KNL_64-NEXT:  # %bb.1: # %cond.store
 | 
						|
; KNL_64-NEXT:    movl %edi, (%rsi)
 | 
						|
; KNL_64-NEXT:  .LBB45_2: # %else
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: v1_scatter:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    testb $1, {{[0-9]+}}(%esp)
 | 
						|
; KNL_32-NEXT:    je .LBB45_2
 | 
						|
; KNL_32-NEXT:  # %bb.1: # %cond.store
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 | 
						|
; KNL_32-NEXT:    movl %ecx, (%eax)
 | 
						|
; KNL_32-NEXT:  .LBB45_2: # %else
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: v1_scatter:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    testb $1, %dl
 | 
						|
; SKX-NEXT:    je .LBB45_2
 | 
						|
; SKX-NEXT:  # %bb.1: # %cond.store
 | 
						|
; SKX-NEXT:    movl %edi, (%rsi)
 | 
						|
; SKX-NEXT:  .LBB45_2: # %else
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: v1_scatter:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    testb $1, {{[0-9]+}}(%esp)
 | 
						|
; SKX_32-NEXT:    je .LBB45_2
 | 
						|
; SKX_32-NEXT:  # %bb.1: # %cond.store
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 | 
						|
; SKX_32-NEXT:    movl %ecx, (%eax)
 | 
						|
; SKX_32-NEXT:  .LBB45_2: # %else
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  call void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32> %a1, <1 x i32*> %ptr, i32 4, <1 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
declare void @llvm.masked.scatter.v1i32.v1p0i32(<1 x i32>, <1 x i32*>, i32, <1 x i1>)
 | 
						|
 | 
						|
define <1 x i32> @v1_gather(<1 x i32*> %ptr, <1 x i1> %mask, <1 x i32> %src0) {
 | 
						|
; KNL_64-LABEL: v1_gather:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    movl (%rdi), %eax
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: v1_gather:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    movl (%eax), %eax
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: v1_gather:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    movl (%rdi), %eax
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: v1_gather:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    movl (%eax), %eax
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %res = call <1 x i32> @llvm.masked.gather.v1i32.v1p0i32(<1 x i32*> %ptr, i32 4, <1 x i1> <i1 true>, <1 x i32> %src0)
 | 
						|
  ret <1 x i32>%res
 | 
						|
}
 | 
						|
declare <1 x i32> @llvm.masked.gather.v1i32.v1p0i32(<1 x i32*>, i32, <1 x i1>, <1 x i32>)
 | 
						|
 | 
						|
; Make sure we don't crash when the index element type is larger than i64 and we need to widen the result
 | 
						|
; This experienced a bad interaction when we widened and then tried to split.
 | 
						|
define <2 x float> @large_index(float* %base, <2 x i128> %ind, <2 x i1> %mask, <2 x float> %src0) {
 | 
						|
; KNL_64-LABEL: large_index:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm1 killed $xmm1 def $ymm1
 | 
						|
; KNL_64-NEXT:    vpsllq $63, %xmm0, %xmm0
 | 
						|
; KNL_64-NEXT:    vptestmq %zmm0, %zmm0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vmovq %rcx, %xmm0
 | 
						|
; KNL_64-NEXT:    vmovq %rsi, %xmm2
 | 
						|
; KNL_64-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
 | 
						|
; KNL_64-NEXT:    vgatherqps (%rdi,%zmm0,4), %ymm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovaps %xmm1, %xmm0
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: large_index:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm1 killed $xmm1 def $ymm1
 | 
						|
; KNL_32-NEXT:    vpsllq $63, %xmm0, %xmm0
 | 
						|
; KNL_32-NEXT:    vptestmq %zmm0, %zmm0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 | 
						|
; KNL_32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
 | 
						|
; KNL_32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
 | 
						|
; KNL_32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
 | 
						|
; KNL_32-NEXT:    vgatherqps (%eax,%zmm0,4), %ymm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovaps %xmm1, %xmm0
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: large_index:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpsllq $63, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovq2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vmovq %rcx, %xmm0
 | 
						|
; SKX-NEXT:    vmovq %rsi, %xmm2
 | 
						|
; SKX-NEXT:    vpunpcklqdq {{.*#+}} xmm0 = xmm2[0],xmm0[0]
 | 
						|
; SKX-NEXT:    vgatherqps (%rdi,%xmm0,4), %xmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovaps %xmm1, %xmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: large_index:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpsllq $63, %xmm0, %xmm0
 | 
						|
; SKX_32-NEXT:    vpmovq2m %xmm0, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
 | 
						|
; SKX_32-NEXT:    vpinsrd $1, {{[0-9]+}}(%esp), %xmm0, %xmm0
 | 
						|
; SKX_32-NEXT:    vpinsrd $2, {{[0-9]+}}(%esp), %xmm0, %xmm0
 | 
						|
; SKX_32-NEXT:    vpinsrd $3, {{[0-9]+}}(%esp), %xmm0, %xmm0
 | 
						|
; SKX_32-NEXT:    vgatherqps (%eax,%xmm0,4), %xmm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovaps %xmm1, %xmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %gep.random = getelementptr float, float* %base, <2 x i128> %ind
 | 
						|
  %res = call <2 x float> @llvm.masked.gather.v2f32.v2p0f32(<2 x float*> %gep.random, i32 4, <2 x i1> %mask, <2 x float> %src0)
 | 
						|
  ret <2 x float>%res
 | 
						|
}
 | 
						|
 | 
						|
; Make sure we allow index to be sign extended from a smaller than i32 element size.
 | 
						|
define <16 x float> @sext_i8_index(float* %base, <16 x i8> %ind) {
 | 
						|
; KNL_64-LABEL: sext_i8_index:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vpmovsxbd %xmm0, %zmm1
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: sext_i8_index:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vpmovsxbd %xmm0, %zmm1
 | 
						|
; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: sext_i8_index:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpmovsxbd %xmm0, %zmm1
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX-NEXT:    vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: sext_i8_index:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vpmovsxbd %xmm0, %zmm1
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %sext_ind = sext <16 x i8> %ind to <16 x i64>
 | 
						|
  %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
 | 
						|
 | 
						|
  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
 | 
						|
  ret <16 x float>%res
 | 
						|
}
 | 
						|
 | 
						|
; Make sure we allow index to be sign extended from a smaller than i32 element size.
 | 
						|
define <8 x float> @sext_v8i8_index(float* %base, <8 x i8> %ind) {
 | 
						|
; KNL_64-LABEL: sext_v8i8_index:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vpmovsxbd %xmm0, %ymm1
 | 
						|
; KNL_64-NEXT:    movw $255, %ax
 | 
						|
; KNL_64-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
 | 
						|
; KNL_64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: sext_v8i8_index:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vpmovsxbd %xmm0, %ymm1
 | 
						|
; KNL_32-NEXT:    movw $255, %cx
 | 
						|
; KNL_32-NEXT:    kmovw %ecx, %k1
 | 
						|
; KNL_32-NEXT:    vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
 | 
						|
; KNL_32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: sext_v8i8_index:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpmovsxbd %xmm0, %ymm1
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX-NEXT:    vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: sext_v8i8_index:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vpmovsxbd %xmm0, %ymm1
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vgatherdps (%eax,%ymm1,4), %ymm0 {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %sext_ind = sext <8 x i8> %ind to <8 x i64>
 | 
						|
  %gep.random = getelementptr float, float *%base, <8 x i64> %sext_ind
 | 
						|
 | 
						|
  %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %gep.random, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef)
 | 
						|
  ret <8 x float>%res
 | 
						|
}
 | 
						|
declare <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*>, i32, <8 x i1>, <8 x float>)
 | 
						|
 | 
						|
; Make sure we also allow index to be zero extended from a smaller than i32 element size.
 | 
						|
define <16 x float> @zext_i8_index(float* %base, <16 x i8> %ind) {
 | 
						|
; KNL_64-LABEL: zext_i8_index:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: zext_i8_index:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
 | 
						|
; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: zext_i8_index:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX-NEXT:    vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: zext_i8_index:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vpmovzxbd {{.*#+}} zmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %zext_ind = zext <16 x i8> %ind to <16 x i64>
 | 
						|
  %gep.random = getelementptr float, float *%base, <16 x i64> %zext_ind
 | 
						|
 | 
						|
  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
 | 
						|
  ret <16 x float>%res
 | 
						|
}
 | 
						|
 | 
						|
; Make sure we also allow index to be zero extended from a smaller than i32 element size.
 | 
						|
define <8 x float> @zext_v8i8_index(float* %base, <8 x i8> %ind) {
 | 
						|
; KNL_64-LABEL: zext_v8i8_index:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
 | 
						|
; KNL_64-NEXT:    movw $255, %ax
 | 
						|
; KNL_64-NEXT:    kmovw %eax, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
 | 
						|
; KNL_64-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: zext_v8i8_index:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
 | 
						|
; KNL_32-NEXT:    movw $255, %cx
 | 
						|
; KNL_32-NEXT:    kmovw %ecx, %k1
 | 
						|
; KNL_32-NEXT:    vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
 | 
						|
; KNL_32-NEXT:    # kill: def $ymm0 killed $ymm0 killed $zmm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: zext_v8i8_index:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX-NEXT:    vgatherdps (%rdi,%ymm1,4), %ymm0 {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: zext_v8i8_index:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vgatherdps (%eax,%ymm1,4), %ymm0 {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
 | 
						|
  %zext_ind = zext <8 x i8> %ind to <8 x i64>
 | 
						|
  %gep.random = getelementptr float, float *%base, <8 x i64> %zext_ind
 | 
						|
 | 
						|
  %res = call <8 x float> @llvm.masked.gather.v8f32.v8p0f32(<8 x float*> %gep.random, i32 4, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x float> undef)
 | 
						|
  ret <8 x float>%res
 | 
						|
}
 | 
						|
 | 
						|
; Index requires promotion
 | 
						|
define void @test_scatter_2i32_index(<2 x double> %a1, double* %base, <2 x i32> %ind, <2 x i1> %mask) {
 | 
						|
; KNL_64-LABEL: test_scatter_2i32_index:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm1 killed $xmm1 def $ymm1
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_64-NEXT:    vpsllq $63, %xmm2, %xmm2
 | 
						|
; KNL_64-NEXT:    vptestmq %zmm2, %zmm2, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vscatterdpd %zmm0, (%rdi,%ymm1,8) {%k1}
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test_scatter_2i32_index:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm1 killed $xmm1 def $ymm1
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 def $zmm0
 | 
						|
; KNL_32-NEXT:    vpsllq $63, %xmm2, %xmm2
 | 
						|
; KNL_32-NEXT:    vptestmq %zmm2, %zmm2, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vscatterdpd %zmm0, (%eax,%ymm1,8) {%k1}
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test_scatter_2i32_index:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpsllq $63, %xmm2, %xmm2
 | 
						|
; SKX-NEXT:    vpmovq2m %xmm2, %k1
 | 
						|
; SKX-NEXT:    vscatterdpd %xmm0, (%rdi,%xmm1,8) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test_scatter_2i32_index:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpsllq $63, %xmm2, %xmm2
 | 
						|
; SKX_32-NEXT:    vpmovq2m %xmm2, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vscatterdpd %xmm0, (%eax,%xmm1,8) {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %gep = getelementptr double, double *%base, <2 x i32> %ind
 | 
						|
  call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %a1, <2 x double*> %gep, i32 4, <2 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
declare void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double>, <2 x double*>, i32, <2 x i1>)
 | 
						|
 | 
						|
define <16 x float> @zext_index(float* %base, <16 x i32> %ind) {
 | 
						|
; KNL_64-LABEL: zext_index:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vpandd {{.*}}(%rip){1to16}, %zmm0, %zmm1
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: zext_index:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vpandd {{\.LCPI.*}}{1to16}, %zmm0, %zmm1
 | 
						|
; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX_SMALL-LABEL: zext_index:
 | 
						|
; SKX_SMALL:       # %bb.0:
 | 
						|
; SKX_SMALL-NEXT:    vandps {{.*}}(%rip){1to16}, %zmm0, %zmm1
 | 
						|
; SKX_SMALL-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_SMALL-NEXT:    vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
 | 
						|
; SKX_SMALL-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_LARGE-LABEL: zext_index:
 | 
						|
; SKX_LARGE:       # %bb.0:
 | 
						|
; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
 | 
						|
; SKX_LARGE-NEXT:    vandps (%rax){1to16}, %zmm0, %zmm1
 | 
						|
; SKX_LARGE-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_LARGE-NEXT:    vgatherdps (%rdi,%zmm1,4), %zmm0 {%k1}
 | 
						|
; SKX_LARGE-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: zext_index:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vandps {{\.LCPI.*}}{1to16}, %zmm0, %zmm1
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vgatherdps (%eax,%zmm1,4), %zmm0 {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %ind_masked = and <16 x i32> %ind, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
 | 
						|
  %sext_ind = zext <16 x i32> %ind_masked to <16 x i64>
 | 
						|
  %gep.random = getelementptr float, float *%base, <16 x i64> %sext_ind
 | 
						|
 | 
						|
  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
 | 
						|
  ret <16 x float>%res
 | 
						|
}
 | 
						|
 | 
						|
define <16 x double> @test_gather_setcc_split(double* %base, <16 x i32> %ind, <16 x i32> %cmp, <16 x double> %passthru) {
 | 
						|
; KNL_64-LABEL: test_gather_setcc_split:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
 | 
						|
; KNL_64-NEXT:    vptestnmd %zmm4, %zmm4, %k1
 | 
						|
; KNL_64-NEXT:    vptestnmd %zmm1, %zmm1, %k2
 | 
						|
; KNL_64-NEXT:    vgatherdpd (%rdi,%ymm0,8), %zmm2 {%k2}
 | 
						|
; KNL_64-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
 | 
						|
; KNL_64-NEXT:    vgatherdpd (%rdi,%ymm0,8), %zmm3 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovapd %zmm2, %zmm0
 | 
						|
; KNL_64-NEXT:    vmovapd %zmm3, %zmm1
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test_gather_setcc_split:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    pushl %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; KNL_32-NEXT:    .cfi_offset %ebp, -8
 | 
						|
; KNL_32-NEXT:    movl %esp, %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa_register %ebp
 | 
						|
; KNL_32-NEXT:    andl $-64, %esp
 | 
						|
; KNL_32-NEXT:    subl $64, %esp
 | 
						|
; KNL_32-NEXT:    vmovapd 72(%ebp), %zmm3
 | 
						|
; KNL_32-NEXT:    movl 8(%ebp), %eax
 | 
						|
; KNL_32-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
 | 
						|
; KNL_32-NEXT:    vptestnmd %zmm4, %zmm4, %k1
 | 
						|
; KNL_32-NEXT:    vptestnmd %zmm1, %zmm1, %k2
 | 
						|
; KNL_32-NEXT:    vgatherdpd (%eax,%ymm0,8), %zmm2 {%k2}
 | 
						|
; KNL_32-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
 | 
						|
; KNL_32-NEXT:    vgatherdpd (%eax,%ymm0,8), %zmm3 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovapd %zmm2, %zmm0
 | 
						|
; KNL_32-NEXT:    vmovapd %zmm3, %zmm1
 | 
						|
; KNL_32-NEXT:    movl %ebp, %esp
 | 
						|
; KNL_32-NEXT:    popl %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa %esp, 4
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test_gather_setcc_split:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
 | 
						|
; SKX-NEXT:    vptestnmd %ymm4, %ymm4, %k1
 | 
						|
; SKX-NEXT:    vptestnmd %ymm1, %ymm1, %k2
 | 
						|
; SKX-NEXT:    vgatherdpd (%rdi,%ymm0,8), %zmm2 {%k2}
 | 
						|
; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
 | 
						|
; SKX-NEXT:    vgatherdpd (%rdi,%ymm0,8), %zmm3 {%k1}
 | 
						|
; SKX-NEXT:    vmovapd %zmm2, %zmm0
 | 
						|
; SKX-NEXT:    vmovapd %zmm3, %zmm1
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test_gather_setcc_split:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    pushl %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; SKX_32-NEXT:    .cfi_offset %ebp, -8
 | 
						|
; SKX_32-NEXT:    movl %esp, %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa_register %ebp
 | 
						|
; SKX_32-NEXT:    andl $-64, %esp
 | 
						|
; SKX_32-NEXT:    subl $64, %esp
 | 
						|
; SKX_32-NEXT:    vmovapd 72(%ebp), %zmm3
 | 
						|
; SKX_32-NEXT:    movl 8(%ebp), %eax
 | 
						|
; SKX_32-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
 | 
						|
; SKX_32-NEXT:    vptestnmd %ymm4, %ymm4, %k1
 | 
						|
; SKX_32-NEXT:    vptestnmd %ymm1, %ymm1, %k2
 | 
						|
; SKX_32-NEXT:    vgatherdpd (%eax,%ymm0,8), %zmm2 {%k2}
 | 
						|
; SKX_32-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
 | 
						|
; SKX_32-NEXT:    vgatherdpd (%eax,%ymm0,8), %zmm3 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovapd %zmm2, %zmm0
 | 
						|
; SKX_32-NEXT:    vmovapd %zmm3, %zmm1
 | 
						|
; SKX_32-NEXT:    movl %ebp, %esp
 | 
						|
; SKX_32-NEXT:    popl %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa %esp, 4
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %sext_ind = sext <16 x i32> %ind to <16 x i64>
 | 
						|
  %gep.random = getelementptr double, double *%base, <16 x i64> %sext_ind
 | 
						|
 | 
						|
  %mask = icmp eq <16 x i32> %cmp, zeroinitializer
 | 
						|
  %res = call <16 x double> @llvm.masked.gather.v16f64.v16p0f64(<16 x double*> %gep.random, i32 4, <16 x i1> %mask, <16 x double> %passthru)
 | 
						|
  ret <16 x double>%res
 | 
						|
}
 | 
						|
 | 
						|
define void @test_scatter_setcc_split(double* %base, <16 x i32> %ind, <16 x i32> %cmp, <16 x double> %src0)  {
 | 
						|
; KNL_64-LABEL: test_scatter_setcc_split:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
 | 
						|
; KNL_64-NEXT:    vptestnmd %zmm4, %zmm4, %k1
 | 
						|
; KNL_64-NEXT:    vptestnmd %zmm1, %zmm1, %k2
 | 
						|
; KNL_64-NEXT:    vscatterdpd %zmm2, (%rdi,%ymm0,8) {%k2}
 | 
						|
; KNL_64-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
 | 
						|
; KNL_64-NEXT:    vscatterdpd %zmm3, (%rdi,%ymm0,8) {%k1}
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test_scatter_setcc_split:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    pushl %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; KNL_32-NEXT:    .cfi_offset %ebp, -8
 | 
						|
; KNL_32-NEXT:    movl %esp, %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa_register %ebp
 | 
						|
; KNL_32-NEXT:    andl $-64, %esp
 | 
						|
; KNL_32-NEXT:    subl $64, %esp
 | 
						|
; KNL_32-NEXT:    vmovapd 72(%ebp), %zmm3
 | 
						|
; KNL_32-NEXT:    movl 8(%ebp), %eax
 | 
						|
; KNL_32-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
 | 
						|
; KNL_32-NEXT:    vptestnmd %zmm4, %zmm4, %k1
 | 
						|
; KNL_32-NEXT:    vptestnmd %zmm1, %zmm1, %k2
 | 
						|
; KNL_32-NEXT:    vscatterdpd %zmm2, (%eax,%ymm0,8) {%k2}
 | 
						|
; KNL_32-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
 | 
						|
; KNL_32-NEXT:    vscatterdpd %zmm3, (%eax,%ymm0,8) {%k1}
 | 
						|
; KNL_32-NEXT:    movl %ebp, %esp
 | 
						|
; KNL_32-NEXT:    popl %ebp
 | 
						|
; KNL_32-NEXT:    .cfi_def_cfa %esp, 4
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test_scatter_setcc_split:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
 | 
						|
; SKX-NEXT:    vptestnmd %ymm4, %ymm4, %k1
 | 
						|
; SKX-NEXT:    vptestnmd %ymm1, %ymm1, %k2
 | 
						|
; SKX-NEXT:    vscatterdpd %zmm2, (%rdi,%ymm0,8) {%k2}
 | 
						|
; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
 | 
						|
; SKX-NEXT:    vscatterdpd %zmm3, (%rdi,%ymm0,8) {%k1}
 | 
						|
; SKX-NEXT:    vzeroupper
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test_scatter_setcc_split:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    pushl %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa_offset 8
 | 
						|
; SKX_32-NEXT:    .cfi_offset %ebp, -8
 | 
						|
; SKX_32-NEXT:    movl %esp, %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa_register %ebp
 | 
						|
; SKX_32-NEXT:    andl $-64, %esp
 | 
						|
; SKX_32-NEXT:    subl $64, %esp
 | 
						|
; SKX_32-NEXT:    vmovapd 72(%ebp), %zmm3
 | 
						|
; SKX_32-NEXT:    movl 8(%ebp), %eax
 | 
						|
; SKX_32-NEXT:    vextracti64x4 $1, %zmm1, %ymm4
 | 
						|
; SKX_32-NEXT:    vptestnmd %ymm4, %ymm4, %k1
 | 
						|
; SKX_32-NEXT:    vptestnmd %ymm1, %ymm1, %k2
 | 
						|
; SKX_32-NEXT:    vscatterdpd %zmm2, (%eax,%ymm0,8) {%k2}
 | 
						|
; SKX_32-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
 | 
						|
; SKX_32-NEXT:    vscatterdpd %zmm3, (%eax,%ymm0,8) {%k1}
 | 
						|
; SKX_32-NEXT:    movl %ebp, %esp
 | 
						|
; SKX_32-NEXT:    popl %ebp
 | 
						|
; SKX_32-NEXT:    .cfi_def_cfa %esp, 4
 | 
						|
; SKX_32-NEXT:    vzeroupper
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %sext_ind = sext <16 x i32> %ind to <16 x i64>
 | 
						|
  %gep.random = getelementptr double, double *%base, <16 x i64> %sext_ind
 | 
						|
 | 
						|
  %mask = icmp eq <16 x i32> %cmp, zeroinitializer
 | 
						|
  call void @llvm.masked.scatter.v16f64.v16p0f64(<16 x double> %src0, <16 x double*> %gep.random, i32 4, <16 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
; This test case previously triggered an infinite loop when the two gathers became identical after DAG combine removed the sign extend.
 | 
						|
define <16 x float> @test_sext_cse(float* %base, <16 x i32> %ind, <16 x i32>* %foo) {
 | 
						|
; KNL_64-LABEL: test_sext_cse:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vmovaps %zmm0, (%rsi)
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vaddps %zmm1, %zmm1, %zmm0
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: test_sext_cse:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 | 
						|
; KNL_32-NEXT:    vmovaps %zmm0, (%ecx)
 | 
						|
; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vaddps %zmm1, %zmm1, %zmm0
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: test_sext_cse:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vmovaps %zmm0, (%rsi)
 | 
						|
; SKX-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX-NEXT:    vgatherdps (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; SKX-NEXT:    vaddps %zmm1, %zmm1, %zmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: test_sext_cse:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %ecx
 | 
						|
; SKX_32-NEXT:    vmovaps %zmm0, (%ecx)
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vgatherdps (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vaddps %zmm1, %zmm1, %zmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %broadcast.splatinsert = insertelement <16 x float*> undef, float* %base, i32 0
 | 
						|
  %broadcast.splat = shufflevector <16 x float*> %broadcast.splatinsert, <16 x float*> undef, <16 x i32> zeroinitializer
 | 
						|
 | 
						|
  %sext_ind = sext <16 x i32> %ind to <16 x i64>
 | 
						|
  %gep.random = getelementptr float, <16 x float*> %broadcast.splat, <16 x i64> %sext_ind
 | 
						|
 | 
						|
  store <16 x i32> %ind, <16 x i32>* %foo
 | 
						|
  %res = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
 | 
						|
  %gep.random2 = getelementptr float, <16 x float*> %broadcast.splat, <16 x i32> %ind
 | 
						|
  %res2 = call <16 x float> @llvm.masked.gather.v16f32.v16p0f32(<16 x float*> %gep.random2, i32 4, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <16 x float> undef)
 | 
						|
  %res3 = fadd <16 x float> %res2, %res
 | 
						|
  ret <16 x float>%res3
 | 
						|
}
 | 
						|
 | 
						|
define void @zero_mask(<2 x double>%a1, <2 x double*> %ptr) {
 | 
						|
; ALL-LABEL: zero_mask:
 | 
						|
; ALL:       # %bb.0:
 | 
						|
; ALL-NEXT:    ret{{[l|q]}}
 | 
						|
  call void @llvm.masked.scatter.v2f64.v2p0f64(<2 x double> %a1, <2 x double*> %ptr, i32 4, <2 x i1> zeroinitializer)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
define <2 x i64> @gather_2i64_constant_indices(i64* %ptr, <2 x i1> %mask) {
 | 
						|
; KNL_64-LABEL: gather_2i64_constant_indices:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vpsllq $63, %xmm0, %xmm0
 | 
						|
; KNL_64-NEXT:    vptestmq %zmm0, %zmm0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,4294967294,u,u,u,u,u,u>
 | 
						|
; KNL_64-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; KNL_64-NEXT:    vpgatherdq (%rdi,%ymm1,8), %zmm0 {%k1}
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: gather_2i64_constant_indices:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    vpsllq $63, %xmm0, %xmm0
 | 
						|
; KNL_32-NEXT:    vptestmq %zmm0, %zmm0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,4294967294,u,u,u,u,u,u>
 | 
						|
; KNL_32-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; KNL_32-NEXT:    vpgatherdq (%eax,%ymm1,8), %zmm0 {%k1}
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm0 killed $xmm0 killed $zmm0
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX_SMALL-LABEL: gather_2i64_constant_indices:
 | 
						|
; SKX_SMALL:       # %bb.0:
 | 
						|
; SKX_SMALL-NEXT:    vpsllq $63, %xmm0, %xmm0
 | 
						|
; SKX_SMALL-NEXT:    vpmovq2m %xmm0, %k1
 | 
						|
; SKX_SMALL-NEXT:    vmovdqa {{.*#+}} xmm1 = <0,4294967294,u,u>
 | 
						|
; SKX_SMALL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; SKX_SMALL-NEXT:    vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1}
 | 
						|
; SKX_SMALL-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_LARGE-LABEL: gather_2i64_constant_indices:
 | 
						|
; SKX_LARGE:       # %bb.0:
 | 
						|
; SKX_LARGE-NEXT:    vpsllq $63, %xmm0, %xmm0
 | 
						|
; SKX_LARGE-NEXT:    vpmovq2m %xmm0, %k1
 | 
						|
; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
 | 
						|
; SKX_LARGE-NEXT:    vmovdqa (%rax), %xmm1
 | 
						|
; SKX_LARGE-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; SKX_LARGE-NEXT:    vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1}
 | 
						|
; SKX_LARGE-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: gather_2i64_constant_indices:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpsllq $63, %xmm0, %xmm0
 | 
						|
; SKX_32-NEXT:    vpmovq2m %xmm0, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vmovdqa {{.*#+}} xmm1 = <0,4294967294,u,u>
 | 
						|
; SKX_32-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; SKX_32-NEXT:    vpgatherdq (%eax,%xmm1,8), %xmm0 {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %gep = getelementptr i64, i64* %ptr, <2 x i64> <i64 0, i64 -2>
 | 
						|
  %res = tail call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep, i32 8, <2 x i1> %mask, <2 x i64> zeroinitializer) #1
 | 
						|
  ret <2 x i64> %res
 | 
						|
}
 | 
						|
 | 
						|
define <16 x i32> @gather_16i64_constant_indices(i32* %ptr, <16 x i1> %mask) {
 | 
						|
; KNL_64-LABEL: gather_16i64_constant_indices:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; KNL_64-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm0, %zmm0, %k1
 | 
						|
; KNL_64-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609]
 | 
						|
; KNL_64-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; KNL_64-NEXT:    vpgatherdd (%rdi,%zmm1,4), %zmm0 {%k1}
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: gather_16i64_constant_indices:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; KNL_32-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm0, %zmm0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609]
 | 
						|
; KNL_32-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; KNL_32-NEXT:    vpgatherdd (%eax,%zmm1,4), %zmm0 {%k1}
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX_SMALL-LABEL: gather_16i64_constant_indices:
 | 
						|
; SKX_SMALL:       # %bb.0:
 | 
						|
; SKX_SMALL-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; SKX_SMALL-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; SKX_SMALL-NEXT:    vpmovd2m %zmm0, %k1
 | 
						|
; SKX_SMALL-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609]
 | 
						|
; SKX_SMALL-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; SKX_SMALL-NEXT:    vpgatherdd (%rdi,%zmm1,4), %zmm0 {%k1}
 | 
						|
; SKX_SMALL-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_LARGE-LABEL: gather_16i64_constant_indices:
 | 
						|
; SKX_LARGE:       # %bb.0:
 | 
						|
; SKX_LARGE-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; SKX_LARGE-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; SKX_LARGE-NEXT:    vpmovd2m %zmm0, %k1
 | 
						|
; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
 | 
						|
; SKX_LARGE-NEXT:    vmovdqa64 (%rax), %zmm1
 | 
						|
; SKX_LARGE-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; SKX_LARGE-NEXT:    vpgatherdd (%rdi,%zmm1,4), %zmm0 {%k1}
 | 
						|
; SKX_LARGE-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: gather_16i64_constant_indices:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; SKX_32-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; SKX_32-NEXT:    vpmovd2m %zmm0, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609]
 | 
						|
; SKX_32-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; SKX_32-NEXT:    vpgatherdd (%eax,%zmm1,4), %zmm0 {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %gep = getelementptr i32, i32* %ptr, <16 x i64> <i64 0, i64 -2, i64 1, i64 -8, i64 10, i64 20, i64 50, i64 65536, i64 16777215, i64 2147483647, i64 100, i64 -2000, i64 -2147483648, i64 76897723, i64 7, i64 -67897687>
 | 
						|
  %res = tail call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep, i32 4, <16 x i1> %mask, <16 x i32> zeroinitializer) #1
 | 
						|
  ret <16 x i32> %res
 | 
						|
}
 | 
						|
 | 
						|
define void @scatter_2i64_constant_indices(i32* %ptr, <2 x i1> %mask, <2 x i32> %src0)  {
 | 
						|
; KNL_64-LABEL: scatter_2i64_constant_indices:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 | 
						|
; KNL_64-NEXT:    vpsllq $63, %xmm0, %xmm0
 | 
						|
; KNL_64-NEXT:    vptestmq %zmm0, %zmm0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vmovdqa64 {{.*#+}} zmm0 = <0,4294967294,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
 | 
						|
; KNL_64-NEXT:    vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1}
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: scatter_2i64_constant_indices:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 | 
						|
; KNL_32-NEXT:    vpsllq $63, %xmm0, %xmm0
 | 
						|
; KNL_32-NEXT:    vptestmq %zmm0, %zmm0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $14, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $14, %k0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vmovdqa64 {{.*#+}} zmm0 = <0,4294967294,u,u,u,u,u,u,u,u,u,u,u,u,u,u>
 | 
						|
; KNL_32-NEXT:    vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1}
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX_SMALL-LABEL: scatter_2i64_constant_indices:
 | 
						|
; SKX_SMALL:       # %bb.0:
 | 
						|
; SKX_SMALL-NEXT:    vpsllq $63, %xmm0, %xmm0
 | 
						|
; SKX_SMALL-NEXT:    vpmovq2m %xmm0, %k1
 | 
						|
; SKX_SMALL-NEXT:    vmovdqa {{.*#+}} xmm0 = <0,4294967294,u,u>
 | 
						|
; SKX_SMALL-NEXT:    vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
 | 
						|
; SKX_SMALL-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_LARGE-LABEL: scatter_2i64_constant_indices:
 | 
						|
; SKX_LARGE:       # %bb.0:
 | 
						|
; SKX_LARGE-NEXT:    vpsllq $63, %xmm0, %xmm0
 | 
						|
; SKX_LARGE-NEXT:    vpmovq2m %xmm0, %k1
 | 
						|
; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
 | 
						|
; SKX_LARGE-NEXT:    vmovdqa (%rax), %xmm0
 | 
						|
; SKX_LARGE-NEXT:    vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
 | 
						|
; SKX_LARGE-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: scatter_2i64_constant_indices:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpsllq $63, %xmm0, %xmm0
 | 
						|
; SKX_32-NEXT:    vpmovq2m %xmm0, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vmovdqa {{.*#+}} xmm0 = <0,4294967294,u,u>
 | 
						|
; SKX_32-NEXT:    vpscatterdd %xmm1, (%eax,%xmm0,4) {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %gep = getelementptr i32, i32* %ptr, <2 x i64> <i64 0, i64 -2>
 | 
						|
  call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %src0, <2 x i32*> %gep, i32 4, <2 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
define void @scatter_16i64_constant_indices(i32* %ptr, <16 x i1> %mask, <16 x i32> %src0)  {
 | 
						|
; KNL_64-LABEL: scatter_16i64_constant_indices:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; KNL_64-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm0, %zmm0, %k1
 | 
						|
; KNL_64-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609]
 | 
						|
; KNL_64-NEXT:    vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1}
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: scatter_16i64_constant_indices:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; KNL_32-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm0, %zmm0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609]
 | 
						|
; KNL_32-NEXT:    vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1}
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX_SMALL-LABEL: scatter_16i64_constant_indices:
 | 
						|
; SKX_SMALL:       # %bb.0:
 | 
						|
; SKX_SMALL-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; SKX_SMALL-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; SKX_SMALL-NEXT:    vpmovd2m %zmm0, %k1
 | 
						|
; SKX_SMALL-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609]
 | 
						|
; SKX_SMALL-NEXT:    vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1}
 | 
						|
; SKX_SMALL-NEXT:    vzeroupper
 | 
						|
; SKX_SMALL-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_LARGE-LABEL: scatter_16i64_constant_indices:
 | 
						|
; SKX_LARGE:       # %bb.0:
 | 
						|
; SKX_LARGE-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; SKX_LARGE-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; SKX_LARGE-NEXT:    vpmovd2m %zmm0, %k1
 | 
						|
; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
 | 
						|
; SKX_LARGE-NEXT:    vmovdqa64 (%rax), %zmm0
 | 
						|
; SKX_LARGE-NEXT:    vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1}
 | 
						|
; SKX_LARGE-NEXT:    vzeroupper
 | 
						|
; SKX_LARGE-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: scatter_16i64_constant_indices:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpmovsxbd %xmm0, %zmm0
 | 
						|
; SKX_32-NEXT:    vpslld $31, %zmm0, %zmm0
 | 
						|
; SKX_32-NEXT:    vpmovd2m %zmm0, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609]
 | 
						|
; SKX_32-NEXT:    vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1}
 | 
						|
; SKX_32-NEXT:    vzeroupper
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %gep = getelementptr i32, i32* %ptr, <16 x i64> <i64 0, i64 -2, i64 1, i64 -8, i64 10, i64 20, i64 50, i64 65536, i64 16777215, i64 2147483647, i64 100, i64 -2000, i64 -2147483648, i64 76897723, i64 7, i64 -67897687>
 | 
						|
  call void @llvm.masked.scatter.v16i32.v16p0i32(<16 x i32> %src0, <16 x i32*> %gep, i32 4, <16 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
define <4 x i32> @splat_ptr_gather(i32* %ptr, <4 x i1> %mask, <4 x i32> %passthru) {
 | 
						|
; KNL_64-LABEL: splat_ptr_gather:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 | 
						|
; KNL_64-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm0, %zmm0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; KNL_64-NEXT:    vpgatherdd (%rdi,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_64-NEXT:    vmovdqa %xmm1, %xmm0
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: splat_ptr_gather:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 | 
						|
; KNL_32-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm0, %zmm0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; KNL_32-NEXT:    vpgatherdd (%eax,%zmm0,4), %zmm1 {%k1}
 | 
						|
; KNL_32-NEXT:    vmovdqa %xmm1, %xmm0
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: splat_ptr_gather:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovd2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpgatherdd (%rdi,%xmm0,4), %xmm1 {%k1}
 | 
						|
; SKX-NEXT:    vmovdqa %xmm1, %xmm0
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: splat_ptr_gather:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; SKX_32-NEXT:    vpmovd2m %xmm0, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; SKX_32-NEXT:    vpgatherdd (%eax,%xmm0,4), %xmm1 {%k1}
 | 
						|
; SKX_32-NEXT:    vmovdqa %xmm1, %xmm0
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %1 = insertelement <4 x i32*> undef, i32* %ptr, i32 0
 | 
						|
  %2 = shufflevector <4 x i32*> %1, <4 x i32*> undef, <4 x i32> zeroinitializer
 | 
						|
  %3 = call <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*> %2, i32 4, <4 x i1> %mask, <4 x i32> %passthru)
 | 
						|
  ret <4 x i32> %3
 | 
						|
}
 | 
						|
declare  <4 x i32> @llvm.masked.gather.v4i32.v4p0i32(<4 x i32*>, i32, <4 x i1>, <4 x i32>)
 | 
						|
 | 
						|
define void @splat_ptr_scatter(i32* %ptr, <4 x i1> %mask, <4 x i32> %val) {
 | 
						|
; KNL_64-LABEL: splat_ptr_scatter:
 | 
						|
; KNL_64:       # %bb.0:
 | 
						|
; KNL_64-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 | 
						|
; KNL_64-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; KNL_64-NEXT:    vptestmd %zmm0, %zmm0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_64-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; KNL_64-NEXT:    vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1}
 | 
						|
; KNL_64-NEXT:    vzeroupper
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: splat_ptr_scatter:
 | 
						|
; KNL_32:       # %bb.0:
 | 
						|
; KNL_32-NEXT:    # kill: def $xmm1 killed $xmm1 def $zmm1
 | 
						|
; KNL_32-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; KNL_32-NEXT:    vptestmd %zmm0, %zmm0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftlw $12, %k0, %k0
 | 
						|
; KNL_32-NEXT:    kshiftrw $12, %k0, %k1
 | 
						|
; KNL_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; KNL_32-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; KNL_32-NEXT:    vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1}
 | 
						|
; KNL_32-NEXT:    vzeroupper
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX-LABEL: splat_ptr_scatter:
 | 
						|
; SKX:       # %bb.0:
 | 
						|
; SKX-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpmovd2m %xmm0, %k1
 | 
						|
; SKX-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; SKX-NEXT:    vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1}
 | 
						|
; SKX-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: splat_ptr_scatter:
 | 
						|
; SKX_32:       # %bb.0:
 | 
						|
; SKX_32-NEXT:    vpslld $31, %xmm0, %xmm0
 | 
						|
; SKX_32-NEXT:    vpmovd2m %xmm0, %k1
 | 
						|
; SKX_32-NEXT:    movl {{[0-9]+}}(%esp), %eax
 | 
						|
; SKX_32-NEXT:    vpxor %xmm0, %xmm0, %xmm0
 | 
						|
; SKX_32-NEXT:    vpscatterdd %xmm1, (%eax,%xmm0,4) {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
  %1 = insertelement <4 x i32*> undef, i32* %ptr, i32 0
 | 
						|
  %2 = shufflevector <4 x i32*> %1, <4 x i32*> undef, <4 x i32> zeroinitializer
 | 
						|
  call void @llvm.masked.scatter.v4i32.v4p0i32(<4 x i32> %val, <4 x i32*> %2, i32 4, <4 x i1> %mask)
 | 
						|
  ret void
 | 
						|
}
 | 
						|
 | 
						|
%struct.foo = type { i8*, i64, i16, i16, i32 }
 | 
						|
 | 
						|
; This used to cause fast-isel to generate bad copy instructions that would
 | 
						|
; cause an error in copyPhysReg.
 | 
						|
define <8 x i64> @pr45906(<8 x %struct.foo*> %ptr) {
 | 
						|
; KNL_64-LABEL: pr45906:
 | 
						|
; KNL_64:       # %bb.0: # %bb
 | 
						|
; KNL_64-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
 | 
						|
; KNL_64-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_64-NEXT:    vpgatherqq (,%zmm1), %zmm0 {%k1}
 | 
						|
; KNL_64-NEXT:    retq
 | 
						|
;
 | 
						|
; KNL_32-LABEL: pr45906:
 | 
						|
; KNL_32:       # %bb.0: # %bb
 | 
						|
; KNL_32-NEXT:    vpbroadcastd {{.*#+}} ymm1 = [4,4,4,4,4,4,4,4]
 | 
						|
; KNL_32-NEXT:    vpaddd %ymm1, %ymm0, %ymm1
 | 
						|
; KNL_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; KNL_32-NEXT:    vpgatherdq (,%ymm1), %zmm0 {%k1}
 | 
						|
; KNL_32-NEXT:    retl
 | 
						|
;
 | 
						|
; SKX_SMALL-LABEL: pr45906:
 | 
						|
; SKX_SMALL:       # %bb.0: # %bb
 | 
						|
; SKX_SMALL-NEXT:    vpaddq {{.*}}(%rip){1to8}, %zmm0, %zmm1
 | 
						|
; SKX_SMALL-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_SMALL-NEXT:    vpgatherqq (,%zmm1), %zmm0 {%k1}
 | 
						|
; SKX_SMALL-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_LARGE-LABEL: pr45906:
 | 
						|
; SKX_LARGE:       # %bb.0: # %bb
 | 
						|
; SKX_LARGE-NEXT:    movabsq ${{\.LCPI.*}}, %rax
 | 
						|
; SKX_LARGE-NEXT:    vpaddq (%rax){1to8}, %zmm0, %zmm1
 | 
						|
; SKX_LARGE-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_LARGE-NEXT:    vpgatherqq (,%zmm1), %zmm0 {%k1}
 | 
						|
; SKX_LARGE-NEXT:    retq
 | 
						|
;
 | 
						|
; SKX_32-LABEL: pr45906:
 | 
						|
; SKX_32:       # %bb.0: # %bb
 | 
						|
; SKX_32-NEXT:    vpaddd {{\.LCPI.*}}{1to8}, %ymm0, %ymm1
 | 
						|
; SKX_32-NEXT:    kxnorw %k0, %k0, %k1
 | 
						|
; SKX_32-NEXT:    vpgatherdq (,%ymm1), %zmm0 {%k1}
 | 
						|
; SKX_32-NEXT:    retl
 | 
						|
bb:
 | 
						|
  %tmp = getelementptr inbounds %struct.foo, <8 x %struct.foo*> %ptr, i64 0, i32 1
 | 
						|
  %tmp1 = call <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*> %tmp, i32 8, <8 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i64> undef)
 | 
						|
  ret <8 x i64> %tmp1
 | 
						|
}
 | 
						|
declare <8 x i64> @llvm.masked.gather.v8i64.v8p0i64(<8 x i64*>, i32, <8 x i1>, <8 x i64>)
 |