[X86] Fold (trunc (i32 (zextload i16))) into vbroadcast.
When matching non-LSB-extracting truncating broadcasts, we now insert the necessary SRL. If the scalar resulted from a load, the SRL will be folded into it, creating a narrower, offset, load. However, i16 loads aren't Desirable, so we get i16->i32 zextloads. We already catch i16 aextloads; catch these as well. llvm-svn: 252363
This commit is contained in:
parent
05a0514b12
commit
b49eb3ab4b
|
|
@ -8365,6 +8365,12 @@ let Predicates = [HasAVX2] in {
|
|||
(VPBROADCASTWrm addr:$src)>;
|
||||
def : Pat<(v16i16 (X86VBroadcast (i16 (trunc (i32 (load addr:$src)))))),
|
||||
(VPBROADCASTWYrm addr:$src)>;
|
||||
def : Pat<(v8i16 (X86VBroadcast
|
||||
(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
|
||||
(VPBROADCASTWrm addr:$src)>;
|
||||
def : Pat<(v16i16 (X86VBroadcast
|
||||
(i16 (trunc (i32 (zextloadi16 addr:$src)))))),
|
||||
(VPBROADCASTWYrm addr:$src)>;
|
||||
|
||||
// Provide aliases for broadcast from the same register class that
|
||||
// automatically does the extract.
|
||||
|
|
|
|||
|
|
@ -2258,9 +2258,7 @@ define <8 x i16> @insert_dup_elt1_mem_v8i16_i32(i32* %ptr) {
|
|||
;
|
||||
; AVX2-LABEL: insert_dup_elt1_mem_v8i16_i32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: movzwl 2(%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
%tmp = load i32, i32* %ptr, align 4
|
||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
||||
|
|
@ -2298,9 +2296,7 @@ define <8 x i16> @insert_dup_elt3_mem_v8i16_i32(i32* %ptr) {
|
|||
;
|
||||
; AVX2-LABEL: insert_dup_elt3_mem_v8i16_i32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: movzwl 2(%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw 2(%rdi), %xmm0
|
||||
; AVX2-NEXT: retq
|
||||
%tmp = load i32, i32* %ptr, align 4
|
||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
|
||||
|
|
|
|||
|
|
@ -3342,9 +3342,7 @@ define <16 x i16> @insert_dup_elt1_mem_v16i16_i32(i32* %ptr) #0 {
|
|||
;
|
||||
; AVX2-LABEL: insert_dup_elt1_mem_v16i16_i32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: movzwl 2(%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; AVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%tmp = load i32, i32* %ptr, align 4
|
||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
|
||||
|
|
@ -3363,9 +3361,7 @@ define <16 x i16> @insert_dup_elt3_mem_v16i16_i32(i32* %ptr) #0 {
|
|||
;
|
||||
; AVX2-LABEL: insert_dup_elt3_mem_v16i16_i32:
|
||||
; AVX2: # BB#0:
|
||||
; AVX2-NEXT: movzwl 2(%rdi), %eax
|
||||
; AVX2-NEXT: vmovd %eax, %xmm0
|
||||
; AVX2-NEXT: vpbroadcastw %xmm0, %ymm0
|
||||
; AVX2-NEXT: vpbroadcastw 2(%rdi), %ymm0
|
||||
; AVX2-NEXT: retq
|
||||
%tmp = load i32, i32* %ptr, align 4
|
||||
%tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 1
|
||||
|
|
|
|||
Loading…
Reference in New Issue