[AVX512] Add patterns for VEXTRACT v16i16->v8i16 and v32i8->v16i8. Disable AVX2 versions of vector extract when AVX512VL is enabled.

llvm-svn: 270318
This commit is contained in:
Craig Topper 2016-05-21 07:08:56 +00:00
parent 22ae353207
commit 02626c076b
6 changed files with 34 additions and 20 deletions

View File

@ -770,6 +770,12 @@ defm : vextract_for_size_lowering<"VEXTRACTF32x4Z256", v4f64x_info, v2f64x_info,
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v4i64x_info, v2i64x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX, NoDQI]>;
// Codegen pattern with the alternative types extract VEC128 from VEC512
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v16i16x_info, v8i16x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z256", v32i8x_info, v16i8x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasVLX]>;
// Codegen pattern with the alternative types extract VEC128 from VEC512
defm : vextract_for_size_lowering<"VEXTRACTI32x4Z", v32i16_info, v8i16x_info,
vextract128_extract, EXTRACT_get_vextract128_imm, [HasAVX512]>;

View File

@ -8617,7 +8617,7 @@ def VEXTRACTI128mr : AVX2AIi8<0x39, MRMDestMem, (outs),
"vextracti128\t{$src2, $src1, $dst|$dst, $src1, $src2}", []>,
Sched<[WriteStore]>, VEX, VEX_L;
let Predicates = [HasAVX2] in {
let Predicates = [HasAVX2, NoVLX] in {
def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
(v2i64 (VEXTRACTI128rr
(v4i64 VR256:$src1),
@ -8634,7 +8634,9 @@ def : Pat<(vextract128_extract:$ext VR256:$src1, (iPTR imm)),
(v16i8 (VEXTRACTI128rr
(v32i8 VR256:$src1),
(EXTRACT_get_vextract128_imm VR128:$ext)))>;
}
let Predicates = [HasAVX2] in {
def : Pat<(store (v2i64 (vextract128_extract:$ext (v4i64 VR256:$src1),
(iPTR imm))), addr:$dst),
(VEXTRACTI128mr addr:$dst, VR256:$src1,

View File

@ -246,7 +246,7 @@ define i64 @extract_v4i64(<4 x i64> %x, i64* %dst) {
; SKX-LABEL: extract_v4i64:
; SKX: ## BB#0:
; SKX-NEXT: vpextrq $1, %xmm0, %rax
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrq $1, %xmm0, (%rdi)
; SKX-NEXT: retq
%r1 = extractelement <4 x i64> %x, i32 1
@ -284,7 +284,7 @@ define i32 @extract_v8i32(<8 x i32> %x, i32* %dst) {
; SKX-LABEL: extract_v8i32:
; SKX: ## BB#0:
; SKX-NEXT: vpextrd $1, %xmm0, %eax
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrd $1, %xmm0, (%rdi)
; SKX-NEXT: retq
%r1 = extractelement <8 x i32> %x, i32 1
@ -322,7 +322,7 @@ define i16 @extract_v16i16(<16 x i16> %x, i16* %dst) {
; SKX-LABEL: extract_v16i16:
; SKX: ## BB#0:
; SKX-NEXT: vpextrw $1, %xmm0, %eax
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrw $1, %xmm0, (%rdi)
; SKX-NEXT: retq
%r1 = extractelement <16 x i16> %x, i32 1
@ -360,7 +360,7 @@ define i8 @extract_v32i8(<32 x i8> %x, i8* %dst) {
; SKX-LABEL: extract_v32i8:
; SKX: ## BB#0:
; SKX-NEXT: vpextrb $1, %xmm0, %eax
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm0
; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0
; SKX-NEXT: vpextrb $1, %xmm0, (%rdi)
; SKX-NEXT: retq
%r1 = extractelement <32 x i8> %x, i32 1
@ -401,7 +401,7 @@ define <4 x i64> @insert_v4i64(<4 x i64> %x, i64 %y , i64* %ptr) {
; SKX: ## BB#0:
; SKX-NEXT: vpinsrq $1, (%rsi), %xmm0, %xmm1
; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm1
; SKX-NEXT: vpinsrq $1, %rdi, %xmm1, %xmm1
; SKX-NEXT: vinserti64x2 $1, %xmm1, %ymm0, %ymm0
; SKX-NEXT: retq
@ -452,7 +452,7 @@ define <8 x i32> @insert_v8i32(<8 x i32> %x, i32 %y, i32* %ptr) {
; SKX: ## BB#0:
; SKX-NEXT: vpinsrd $1, (%rsi), %xmm0, %xmm1
; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1
; SKX-NEXT: vpinsrd $1, %edi, %xmm1, %xmm1
; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
; SKX-NEXT: retq
@ -518,7 +518,7 @@ define <16 x i16> @insert_v16i16(<16 x i16> %x, i16 %y, i16* %ptr) {
; SKX: ## BB#0:
; SKX-NEXT: vpinsrw $1, (%rsi), %xmm0, %xmm1
; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1
; SKX-NEXT: vpinsrw $1, %edi, %xmm1, %xmm1
; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
; SKX-NEXT: retq
@ -575,7 +575,7 @@ define <32 x i8> @insert_v32i8(<32 x i8> %x, i8 %y, i8* %ptr) {
; SKX: ## BB#0:
; SKX-NEXT: vpinsrb $1, (%rsi), %xmm0, %xmm1
; SKX-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1
; SKX-NEXT: vpinsrb $1, %edi, %xmm1, %xmm1
; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
; SKX-NEXT: retq
@ -677,7 +677,7 @@ define <16 x i16> @test_insert_128_v16i16(<16 x i16> %x, i16 %y) {
;
; SKX-LABEL: test_insert_128_v16i16:
; SKX: ## BB#0:
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1
; SKX-NEXT: vpinsrw $2, %edi, %xmm1, %xmm1
; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
; SKX-NEXT: retq
@ -695,7 +695,7 @@ define <32 x i8> @test_insert_128_v32i8(<32 x i8> %x, i8 %y) {
;
; SKX-LABEL: test_insert_128_v32i8:
; SKX: ## BB#0:
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1
; SKX-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1
; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
; SKX-NEXT: retq

View File

@ -1486,7 +1486,7 @@ define <3 x i32> @test30(<3 x i32*> %base, <3 x i32> %ind, <3 x i1> %mask, <3 x
; SKX-NEXT: testb %al, %al
; SKX-NEXT: je .LBB29_6
; SKX-NEXT: # BB#5: # %cond.load4
; SKX-NEXT: vextracti128 $1, %ymm1, %xmm1
; SKX-NEXT: vextracti64x2 $1, %ymm1, %xmm1
; SKX-NEXT: vmovq %xmm1, %rax
; SKX-NEXT: vpinsrd $2, (%rax), %xmm0, %xmm0
; SKX-NEXT: .LBB29_6: # %else5

View File

@ -1249,11 +1249,17 @@ define void @one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) {
; AVX-NEXT: vzeroupper
; AVX-NEXT: retq
;
; AVX512-LABEL: one_mask_bit_set3:
; AVX512: ## BB#0:
; AVX512-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512-NEXT: vmovq %xmm0, 16(%rdi)
; AVX512-NEXT: retq
; AVX512F-LABEL: one_mask_bit_set3:
; AVX512F: ## BB#0:
; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0
; AVX512F-NEXT: vmovq %xmm0, 16(%rdi)
; AVX512F-NEXT: retq
;
; SKX-LABEL: one_mask_bit_set3:
; SKX: ## BB#0:
; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0
; SKX-NEXT: vmovq %xmm0, 16(%rdi)
; SKX-NEXT: retq
call void @llvm.masked.store.v4i64(<4 x i64> %val, <4 x i64>* %addr, i32 4, <4 x i1><i1 false, i1 false, i1 true, i1 false>)
ret void
}
@ -1354,7 +1360,7 @@ define <4 x i64> @load_one_mask_bit_set3(<4 x i64>* %addr, <4 x i64> %val) {
;
; SKX-LABEL: load_one_mask_bit_set3:
; SKX: ## BB#0:
; SKX-NEXT: vextracti128 $1, %ymm0, %xmm1
; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm1
; SKX-NEXT: vpinsrq $0, 16(%rdi), %xmm1, %xmm1
; SKX-NEXT: vinserti32x4 $1, %xmm1, %ymm0, %ymm0
; SKX-NEXT: retq

View File

@ -477,7 +477,7 @@ define <32 x i8> @testv32i8(<32 x i8> %in) nounwind {
;
; AVX512VLCD-LABEL: testv32i8:
; AVX512VLCD: ## BB#0:
; AVX512VLCD-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512VLCD-NEXT: vextracti32x4 $1, %ymm0, %xmm1
; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; AVX512VLCD-NEXT: vplzcntd %zmm1, %zmm1
; AVX512VLCD-NEXT: vpmovdb %zmm1, %xmm1
@ -551,7 +551,7 @@ define <32 x i8> @testv32i8u(<32 x i8> %in) nounwind {
;
; AVX512VLCD-LABEL: testv32i8u:
; AVX512VLCD: ## BB#0:
; AVX512VLCD-NEXT: vextractf128 $1, %ymm0, %xmm1
; AVX512VLCD-NEXT: vextracti32x4 $1, %ymm0, %xmm1
; AVX512VLCD-NEXT: vpmovzxbd {{.*#+}} zmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero,xmm1[4],zero,zero,zero,xmm1[5],zero,zero,zero,xmm1[6],zero,zero,zero,xmm1[7],zero,zero,zero,xmm1[8],zero,zero,zero,xmm1[9],zero,zero,zero,xmm1[10],zero,zero,zero,xmm1[11],zero,zero,zero,xmm1[12],zero,zero,zero,xmm1[13],zero,zero,zero,xmm1[14],zero,zero,zero,xmm1[15],zero,zero,zero
; AVX512VLCD-NEXT: vplzcntd %zmm1, %zmm1
; AVX512VLCD-NEXT: vpmovdb %zmm1, %xmm1