forked from OSchip/llvm-project
[X86][AVX] Fold broadcast(bitcast(src)) -> bitcast(broadcast(src))
llvm-svn: 352751
This commit is contained in:
parent
140f75f625
commit
63f3383ece
|
@ -31938,6 +31938,14 @@ static SDValue combineTargetShuffle(SDValue N, SelectionDAG &DAG,
|
||||||
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
|
return DAG.getNode(X86ISD::VBROADCAST, DL, VT,
|
||||||
DAG.getBitcast(SrcVT, Res));
|
DAG.getBitcast(SrcVT, Res));
|
||||||
}
|
}
|
||||||
|
// broadcast(bitcast(src)) -> bitcast(broadcast(src))
|
||||||
|
// 32-bit targets have to bitcast i64 to f64, so better to bitcast upward.
|
||||||
|
if (Src.getOpcode() == ISD::BITCAST &&
|
||||||
|
SrcVT.getScalarSizeInBits() == BCVT.getScalarSizeInBits()) {
|
||||||
|
EVT NewVT = EVT::getVectorVT(*DAG.getContext(), BCVT.getScalarType(),
|
||||||
|
VT.getVectorNumElements());
|
||||||
|
return DAG.getBitcast(VT, DAG.getNode(X86ISD::VBROADCAST, DL, NewVT, BC));
|
||||||
|
}
|
||||||
return SDValue();
|
return SDValue();
|
||||||
}
|
}
|
||||||
case X86ISD::PSHUFD:
|
case X86ISD::PSHUFD:
|
||||||
|
|
|
@ -45,9 +45,9 @@ define <4 x i64> @A2(i64* %ptr, i64* %ptr2) nounwind uwtable readnone ssp {
|
||||||
; X64-LABEL: A2:
|
; X64-LABEL: A2:
|
||||||
; X64: ## %bb.0: ## %entry
|
; X64: ## %bb.0: ## %entry
|
||||||
; X64-NEXT: movq (%rdi), %rax
|
; X64-NEXT: movq (%rdi), %rax
|
||||||
; X64-NEXT: vmovq %rax, %xmm0
|
|
||||||
; X64-NEXT: movq %rax, (%rsi)
|
; X64-NEXT: movq %rax, (%rsi)
|
||||||
; X64-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
|
; X64-NEXT: vmovq %rax, %xmm0
|
||||||
|
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,1]
|
||||||
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
entry:
|
entry:
|
||||||
|
@ -110,8 +110,8 @@ define <8 x i32> @B3(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp {
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %ecx
|
||||||
; X32-NEXT: movl (%ecx), %ecx
|
; X32-NEXT: movl (%ecx), %ecx
|
||||||
; X32-NEXT: vmovd %ecx, %xmm0
|
|
||||||
; X32-NEXT: movl %ecx, (%eax)
|
; X32-NEXT: movl %ecx, (%eax)
|
||||||
|
; X32-NEXT: vmovd %ecx, %xmm0
|
||||||
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
; X32-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||||
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
; X32-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
|
@ -119,8 +119,8 @@ define <8 x i32> @B3(i32* %ptr, i32* %ptr2) nounwind uwtable readnone ssp {
|
||||||
; X64-LABEL: B3:
|
; X64-LABEL: B3:
|
||||||
; X64: ## %bb.0: ## %entry
|
; X64: ## %bb.0: ## %entry
|
||||||
; X64-NEXT: movl (%rdi), %eax
|
; X64-NEXT: movl (%rdi), %eax
|
||||||
; X64-NEXT: vmovd %eax, %xmm0
|
|
||||||
; X64-NEXT: movl %eax, (%rsi)
|
; X64-NEXT: movl %eax, (%rsi)
|
||||||
|
; X64-NEXT: vmovd %eax, %xmm0
|
||||||
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
; X64-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0]
|
||||||
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
; X64-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||||
; X64-NEXT: retq
|
; X64-NEXT: retq
|
||||||
|
|
|
@ -244,8 +244,7 @@ define <16 x i16> @broadcast_mem_v4i16_v16i16(<4 x i16>* %ptr) {
|
||||||
; X32-LABEL: broadcast_mem_v4i16_v16i16:
|
; X32-LABEL: broadcast_mem_v4i16_v16i16:
|
||||||
; X32: ## %bb.0:
|
; X32: ## %bb.0:
|
||||||
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X32-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
; X32-NEXT: vbroadcastsd (%eax), %ymm0
|
||||||
; X32-NEXT: vbroadcastsd %xmm0, %ymm0
|
|
||||||
; X32-NEXT: retl
|
; X32-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-LABEL: broadcast_mem_v4i16_v16i16:
|
; X64-LABEL: broadcast_mem_v4i16_v16i16:
|
||||||
|
|
|
@ -1592,15 +1592,13 @@ define <8 x i32> @test_2xi32_to_8xi32_mem(<2 x i32>* %vp) {
|
||||||
; X32-AVX2-LABEL: test_2xi32_to_8xi32_mem:
|
; X32-AVX2-LABEL: test_2xi32_to_8xi32_mem:
|
||||||
; X32-AVX2: # %bb.0:
|
; X32-AVX2: # %bb.0:
|
||||||
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X32-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
; X32-AVX2-NEXT: vbroadcastsd (%eax), %ymm0
|
||||||
; X32-AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
|
|
||||||
; X32-AVX2-NEXT: retl
|
; X32-AVX2-NEXT: retl
|
||||||
;
|
;
|
||||||
; X32-AVX512-LABEL: test_2xi32_to_8xi32_mem:
|
; X32-AVX512-LABEL: test_2xi32_to_8xi32_mem:
|
||||||
; X32-AVX512: # %bb.0:
|
; X32-AVX512: # %bb.0:
|
||||||
; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-AVX512-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X32-AVX512-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
; X32-AVX512-NEXT: vbroadcastsd (%eax), %ymm0
|
||||||
; X32-AVX512-NEXT: vbroadcastsd %xmm0, %ymm0
|
|
||||||
; X32-AVX512-NEXT: retl
|
; X32-AVX512-NEXT: retl
|
||||||
;
|
;
|
||||||
; X64-AVX1-LABEL: test_2xi32_to_8xi32_mem:
|
; X64-AVX1-LABEL: test_2xi32_to_8xi32_mem:
|
||||||
|
@ -1635,8 +1633,7 @@ define <16 x i32> @test_2xi32_to_16xi32_mem(<2 x i32>* %vp) {
|
||||||
; X32-AVX2-LABEL: test_2xi32_to_16xi32_mem:
|
; X32-AVX2-LABEL: test_2xi32_to_16xi32_mem:
|
||||||
; X32-AVX2: # %bb.0:
|
; X32-AVX2: # %bb.0:
|
||||||
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
; X32-AVX2-NEXT: movl {{[0-9]+}}(%esp), %eax
|
||||||
; X32-AVX2-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
; X32-AVX2-NEXT: vbroadcastsd (%eax), %ymm0
|
||||||
; X32-AVX2-NEXT: vbroadcastsd %xmm0, %ymm0
|
|
||||||
; X32-AVX2-NEXT: vmovaps %ymm0, %ymm1
|
; X32-AVX2-NEXT: vmovaps %ymm0, %ymm1
|
||||||
; X32-AVX2-NEXT: retl
|
; X32-AVX2-NEXT: retl
|
||||||
;
|
;
|
||||||
|
|
Loading…
Reference in New Issue