[X86] Add an additional isel pattern to CVTDQ2PDrm/VCVTDQ2PDrm to enable load folding without the peephole pass.
This pattern is already used in AVX512VL version of these instructions. Though AVX512VL version is missing other patterns. llvm-svn: 315794
This commit is contained in:
parent
e55d9a1403
commit
53b0cb7fa9
|
|
@ -1955,7 +1955,7 @@ let hasSideEffects = 0, mayLoad = 1 in
|
||||||
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
def VCVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
||||||
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
|
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))))]>,
|
(v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))]>,
|
||||||
VEX, Sched<[WriteCvtI2FLd]>, VEX_WIG;
|
VEX, Sched<[WriteCvtI2FLd]>, VEX_WIG;
|
||||||
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
def VCVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||||
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
|
"vcvtdq2pd\t{$src, $dst|$dst, $src}",
|
||||||
|
|
@ -1978,7 +1978,7 @@ let hasSideEffects = 0, mayLoad = 1 in
|
||||||
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
def CVTDQ2PDrm : S2SI<0xE6, MRMSrcMem, (outs VR128:$dst), (ins i64mem:$src),
|
||||||
"cvtdq2pd\t{$src, $dst|$dst, $src}",
|
"cvtdq2pd\t{$src, $dst|$dst, $src}",
|
||||||
[(set VR128:$dst,
|
[(set VR128:$dst,
|
||||||
(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))))],
|
(v2f64 (X86VSintToFP (bc_v4i32 (loadv2i64 addr:$src)))))],
|
||||||
IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
|
IIC_SSE_CVT_PD_RR>, Sched<[WriteCvtI2FLd]>;
|
||||||
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||||
"cvtdq2pd\t{$src, $dst|$dst, $src}",
|
"cvtdq2pd\t{$src, $dst|$dst, $src}",
|
||||||
|
|
@ -1990,12 +1990,16 @@ def CVTDQ2PDrr : S2SI<0xE6, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
|
||||||
let Predicates = [HasAVX, NoVLX] in {
|
let Predicates = [HasAVX, NoVLX] in {
|
||||||
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
|
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
|
||||||
(VCVTDQ2PDrm addr:$src)>;
|
(VCVTDQ2PDrm addr:$src)>;
|
||||||
|
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
|
||||||
|
(VCVTDQ2PDrm addr:$src)>;
|
||||||
} // Predicates = [HasAVX, NoVLX]
|
} // Predicates = [HasAVX, NoVLX]
|
||||||
|
|
||||||
// SSE2 register conversion intrinsics
|
// SSE2 register conversion intrinsics
|
||||||
let Predicates = [UseSSE2] in {
|
let Predicates = [UseSSE2] in {
|
||||||
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
|
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (scalar_to_vector (loadi64 addr:$src)))))),
|
||||||
(CVTDQ2PDrm addr:$src)>;
|
(CVTDQ2PDrm addr:$src)>;
|
||||||
|
def : Pat<(v2f64 (X86VSintToFP (bc_v4i32 (v2i64 (X86vzload addr:$src))))),
|
||||||
|
(CVTDQ2PDrm addr:$src)>;
|
||||||
} // Predicates = [UseSSE2]
|
} // Predicates = [UseSSE2]
|
||||||
|
|
||||||
// Convert packed double to packed single
|
// Convert packed double to packed single
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue