[X86] Remove patterns for X86VSintToFP/X86VUintToFP+loadv4f32 to v2f64.
These patterns can incorrectly narrow a volatile load from 128-bits to 64-bits. Similar to PR42079. Switch to using (v4i32 (bitcast (v2i64 (scalar_to_vector (loadi64))))) as the load pattern used in the instructions. This probably still has issues in 32-bit mode where loadi64 isn't legal. Maybe we should use VZMOVL for widened loads even when we don't need the upper bits as zeroes? llvm-svn: 362203
This commit is contained in:
parent
cded573710
commit
31d00d80a2
|
|
@ -7608,7 +7608,8 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
|||
X86FoldableSchedWrite sched,
|
||||
string Broadcast = _.BroadcastStr,
|
||||
string Alias = "", X86MemOperand MemOp = _Src.MemOp,
|
||||
RegisterClass MaskRC = _.KRCWM> {
|
||||
RegisterClass MaskRC = _.KRCWM,
|
||||
dag LdDAG = (_.VT (OpNode (_Src.VT (_Src.LdFrag addr:$src))))> {
|
||||
|
||||
defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _Src.RC:$src),
|
||||
|
|
@ -7627,8 +7628,7 @@ multiclass avx512_vcvt_fp<bits<8> opc, string OpcodeStr, X86VectorVTInfo _,
|
|||
(ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
|
||||
(ins MaskRC:$mask, MemOp:$src),
|
||||
OpcodeStr#Alias, "$src", "$src",
|
||||
(_.VT (OpNode (_Src.VT
|
||||
(_Src.LdFrag addr:$src)))),
|
||||
LdDAG,
|
||||
(vselect MaskRC:$mask,
|
||||
(_.VT (OpNode (_Src.VT
|
||||
(_Src.LdFrag addr:$src)))),
|
||||
|
|
@ -7683,53 +7683,10 @@ multiclass avx512_vcvt_fpextend<bits<8> opc, string OpcodeStr, X86VectorVTInfo _
|
|||
X86FoldableSchedWrite sched,
|
||||
string Broadcast = _.BroadcastStr,
|
||||
string Alias = "", X86MemOperand MemOp = _Src.MemOp,
|
||||
RegisterClass MaskRC = _.KRCWM,
|
||||
PatFrag LdFrag = !cast<PatFrag>("extload"#_Src.VTName)> {
|
||||
|
||||
defm rr : AVX512_maskable_common<opc, MRMSrcReg, _, (outs _.RC:$dst),
|
||||
(ins _Src.RC:$src),
|
||||
(ins _.RC:$src0, MaskRC:$mask, _Src.RC:$src),
|
||||
(ins MaskRC:$mask, _Src.RC:$src),
|
||||
OpcodeStr, "$src", "$src",
|
||||
(_.VT (OpNode (_Src.VT _Src.RC:$src))),
|
||||
(vselect MaskRC:$mask,
|
||||
(_.VT (OpNode (_Src.VT _Src.RC:$src))),
|
||||
_.RC:$src0),
|
||||
vselect, "$src0 = $dst">,
|
||||
EVEX, Sched<[sched]>;
|
||||
|
||||
defm rm : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins MemOp:$src),
|
||||
(ins _.RC:$src0, MaskRC:$mask, MemOp:$src),
|
||||
(ins MaskRC:$mask, MemOp:$src),
|
||||
OpcodeStr#Alias, "$src", "$src",
|
||||
(_.VT (LdFrag addr:$src)),
|
||||
(vselect MaskRC:$mask,
|
||||
(_.VT (OpNode (_Src.VT
|
||||
(_Src.LdFrag addr:$src)))),
|
||||
_.RC:$src0),
|
||||
vselect, "$src0 = $dst">,
|
||||
EVEX, Sched<[sched.Folded]>;
|
||||
|
||||
defm rmb : AVX512_maskable_common<opc, MRMSrcMem, _, (outs _.RC:$dst),
|
||||
(ins _Src.ScalarMemOp:$src),
|
||||
(ins _.RC:$src0, MaskRC:$mask, _Src.ScalarMemOp:$src),
|
||||
(ins MaskRC:$mask, _Src.ScalarMemOp:$src),
|
||||
OpcodeStr,
|
||||
"${src}"##Broadcast, "${src}"##Broadcast,
|
||||
(_.VT (OpNode (_Src.VT
|
||||
(X86VBroadcast (_Src.ScalarLdFrag addr:$src)))
|
||||
)),
|
||||
(vselect MaskRC:$mask,
|
||||
(_.VT
|
||||
(OpNode
|
||||
(_Src.VT
|
||||
(X86VBroadcast
|
||||
(_Src.ScalarLdFrag addr:$src))))),
|
||||
_.RC:$src0),
|
||||
vselect, "$src0 = $dst">,
|
||||
EVEX, EVEX_B, Sched<[sched.Folded]>;
|
||||
}
|
||||
RegisterClass MaskRC = _.KRCWM>
|
||||
: avx512_vcvt_fp<opc, OpcodeStr, _, _Src, OpNode, sched, Broadcast, Alias,
|
||||
MemOp, MaskRC,
|
||||
(_.VT (!cast<PatFrag>("extload"#_Src.VTName) addr:$src))>;
|
||||
|
||||
// Extend Float to Double
|
||||
multiclass avx512_cvtps2pd<bits<8> opc, string OpcodeStr,
|
||||
|
|
@ -7910,7 +7867,11 @@ multiclass avx512_cvtdq2pd<bits<8> opc, string OpcodeStr, SDNode OpNode,
|
|||
|
||||
let Predicates = [HasVLX] in {
|
||||
defm Z128 : avx512_vcvt_fp<opc, OpcodeStr, v2f64x_info, v4i32x_info,
|
||||
OpNode128, sched.XMM, "{1to2}", "", i64mem>, EVEX_V128;
|
||||
OpNode128, sched.XMM, "{1to2}", "", i64mem, VK2WM,
|
||||
(v2f64 (OpNode128 (bc_v4i32
|
||||
(v2i64
|
||||
(scalar_to_vector (loadi64 addr:$src))))))>,
|
||||
EVEX_V128;
|
||||
defm Z256 : avx512_vcvt_fp<opc, OpcodeStr, v4f64x_info, v4i32x_info, OpNode,
|
||||
sched.YMM>, EVEX_V256;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1655,7 +1655,10 @@ let hasSideEffects = 0, mayLoad = 1 in
|
|||
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
||||
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
|
||||
(v2f64 (X86VSintToFP
|
||||
(bc_v4i32
|
||||
(v2i64 (scalar_to_vector
|
||||
(loadi64 addr:$src)))))))]>,
|
||||
VEX, Sched<[WriteCvtI2PDLd]>, VEX_WIG;
|
||||
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
|
||||
|
|
@ -1679,7 +1682,10 @@ let hasSideEffects = 0, mayLoad = 1 in
|
|||
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
||||
"cvtdq2pd\t{$src, $dst|$dst, $src}",
|
||||
[(set VR128:$dst,
|
||||
(v2f64 (X86VSintToFP (loadv4i32 addr:$src))))]>,
|
||||
(v2f64 (X86VSintToFP
|
||||
(bc_v4i32
|
||||
(v2i64 (scalar_to_vector
|
||||
(loadi64 addr:$src)))))))]>,
|
||||
Sched<[WriteCvtI2PDLd]>;
|
||||
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||
"cvtdq2pd\t{$src, $dst|$dst, $src}",
|
||||
|
|
@ -1689,16 +1695,12 @@ def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
|||
|
||||
// AVX register conversion intrinsics
|
||||
let Predicates = [HasAVX, NoVLX] in {
|
||||
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
|
||||
(VCVTDQ2PDrm addr:$src)>;
|
||||
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
|
||||
(VCVTDQ2PDrm addr:$src)>;
|
||||
} // Predicates = [HasAVX, NoVLX]
|
||||
|
||||
// SSE2 register conversion intrinsics
|
||||
let Predicates = [UseSSE2] in {
|
||||
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
|
||||
(CVTDQ2PDrm addr:$src)>;
|
||||
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
|
||||
(CVTDQ2PDrm addr:$src)>;
|
||||
} // Predicates = [UseSSE2]
|
||||
|
|
|
|||
|
|
@ -2558,16 +2558,14 @@ define <2 x double> @test_sito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) {
|
|||
; VLDQ-LABEL: test_sito2f64_mask_load:
|
||||
; VLDQ: # %bb.0:
|
||||
; VLDQ-NEXT: vpmovq2m %xmm0, %k1
|
||||
; VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 {%k1} {z}
|
||||
; VLDQ-NEXT: vcvtdq2pd (%rdi), %xmm0 {%k1} {z}
|
||||
; VLDQ-NEXT: retq
|
||||
;
|
||||
; VLNODQ-LABEL: test_sito2f64_mask_load:
|
||||
; VLNODQ: # %bb.0:
|
||||
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; VLNODQ-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
|
||||
; VLNODQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0 {%k1} {z}
|
||||
; VLNODQ-NEXT: vcvtdq2pd (%rdi), %xmm0 {%k1} {z}
|
||||
; VLNODQ-NEXT: retq
|
||||
;
|
||||
; DQNOVL-LABEL: test_sito2f64_mask_load:
|
||||
|
|
@ -2611,16 +2609,14 @@ define <2 x double> @test_uito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) {
|
|||
; VLDQ-LABEL: test_uito2f64_mask_load:
|
||||
; VLDQ: # %bb.0:
|
||||
; VLDQ-NEXT: vpmovq2m %xmm0, %k1
|
||||
; VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
|
||||
; VLDQ-NEXT: vcvtudq2pd (%rdi), %xmm0 {%k1} {z}
|
||||
; VLDQ-NEXT: retq
|
||||
;
|
||||
; VLNODQ-LABEL: test_uito2f64_mask_load:
|
||||
; VLNODQ: # %bb.0:
|
||||
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; VLNODQ-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
|
||||
; VLNODQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; VLNODQ-NEXT: vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
|
||||
; VLNODQ-NEXT: vcvtudq2pd (%rdi), %xmm0 {%k1} {z}
|
||||
; VLNODQ-NEXT: retq
|
||||
;
|
||||
; DQNOVL-LABEL: test_uito2f64_mask_load:
|
||||
|
|
|
|||
|
|
@ -2533,16 +2533,14 @@ define <2 x double> @test_sito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) {
|
|||
; VLDQ-LABEL: test_sito2f64_mask_load:
|
||||
; VLDQ: # %bb.0:
|
||||
; VLDQ-NEXT: vpmovq2m %xmm0, %k1
|
||||
; VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; VLDQ-NEXT: vcvtdq2pd %xmm0, %xmm0 {%k1} {z}
|
||||
; VLDQ-NEXT: vcvtdq2pd (%rdi), %xmm0 {%k1} {z}
|
||||
; VLDQ-NEXT: retq
|
||||
;
|
||||
; VLNODQ-LABEL: test_sito2f64_mask_load:
|
||||
; VLNODQ: # %bb.0:
|
||||
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; VLNODQ-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
|
||||
; VLNODQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; VLNODQ-NEXT: vcvtdq2pd %xmm0, %xmm0 {%k1} {z}
|
||||
; VLNODQ-NEXT: vcvtdq2pd (%rdi), %xmm0 {%k1} {z}
|
||||
; VLNODQ-NEXT: retq
|
||||
;
|
||||
; DQNOVL-LABEL: test_sito2f64_mask_load:
|
||||
|
|
@ -2586,16 +2584,14 @@ define <2 x double> @test_uito2f64_mask_load(<2 x i32> *%a, <2 x i64> %c) {
|
|||
; VLDQ-LABEL: test_uito2f64_mask_load:
|
||||
; VLDQ: # %bb.0:
|
||||
; VLDQ-NEXT: vpmovq2m %xmm0, %k1
|
||||
; VLDQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; VLDQ-NEXT: vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
|
||||
; VLDQ-NEXT: vcvtudq2pd (%rdi), %xmm0 {%k1} {z}
|
||||
; VLDQ-NEXT: retq
|
||||
;
|
||||
; VLNODQ-LABEL: test_uito2f64_mask_load:
|
||||
; VLNODQ: # %bb.0:
|
||||
; VLNODQ-NEXT: vpxor %xmm1, %xmm1, %xmm1
|
||||
; VLNODQ-NEXT: vpcmpgtq %xmm0, %xmm1, %k1
|
||||
; VLNODQ-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
|
||||
; VLNODQ-NEXT: vcvtudq2pd %xmm0, %xmm0 {%k1} {z}
|
||||
; VLNODQ-NEXT: vcvtudq2pd (%rdi), %xmm0 {%k1} {z}
|
||||
; VLNODQ-NEXT: retq
|
||||
;
|
||||
; DQNOVL-LABEL: test_uito2f64_mask_load:
|
||||
|
|
|
|||
|
|
@ -3158,12 +3158,14 @@ define <2 x double> @sitofp_load_2i32_to_2f64(<2 x i32> *%a) {
|
|||
define <2 x double> @sitofp_volatile_load_4i32_to_2f64(<4 x i32> *%a) {
|
||||
; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvtdq2pd (%rdi), %xmm0
|
||||
; SSE-NEXT: movaps (%rdi), %xmm0
|
||||
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vcvtdq2pd (%rdi), %xmm0
|
||||
; AVX-NEXT: vmovaps (%rdi), %xmm0
|
||||
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%ld = load volatile <4 x i32>, <4 x i32> *%a
|
||||
%b = shufflevector <4 x i32> %ld, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
|
|
|
|||
|
|
@ -3158,12 +3158,14 @@ define <2 x double> @sitofp_load_2i32_to_2f64(<2 x i32> *%a) {
|
|||
define <2 x double> @sitofp_volatile_load_4i32_to_2f64(<4 x i32> *%a) {
|
||||
; SSE-LABEL: sitofp_volatile_load_4i32_to_2f64:
|
||||
; SSE: # %bb.0:
|
||||
; SSE-NEXT: cvtdq2pd (%rdi), %xmm0
|
||||
; SSE-NEXT: movaps (%rdi), %xmm0
|
||||
; SSE-NEXT: cvtdq2pd %xmm0, %xmm0
|
||||
; SSE-NEXT: retq
|
||||
;
|
||||
; AVX-LABEL: sitofp_volatile_load_4i32_to_2f64:
|
||||
; AVX: # %bb.0:
|
||||
; AVX-NEXT: vcvtdq2pd (%rdi), %xmm0
|
||||
; AVX-NEXT: vmovaps (%rdi), %xmm0
|
||||
; AVX-NEXT: vcvtdq2pd %xmm0, %xmm0
|
||||
; AVX-NEXT: retq
|
||||
%ld = load volatile <4 x i32>, <4 x i32> *%a
|
||||
%b = shufflevector <4 x i32> %ld, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
|
||||
|
|
|
|||
Loading…
Reference in New Issue