Add 256-bit variant vmovss and vmovsd patterns. rdar://10538417
llvm-svn: 146196
This commit is contained in:
parent
83dcd7bc66
commit
b96bca81e7
|
|
@ -645,6 +645,16 @@ let Predicates = [HasAVX] in {
|
||||||
(VMOVSSrr (v4f32 VR128:$src1),
|
(VMOVSSrr (v4f32 VR128:$src1),
|
||||||
(EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
|
(EXTRACT_SUBREG (v4f32 VR128:$src2), sub_ss))>;
|
||||||
|
|
||||||
|
// 256-bit variants
|
||||||
|
def : Pat<(v8i32 (X86Movsd VR256:$src1, VR256:$src2)),
|
||||||
|
(SUBREG_TO_REG (i32 0),
|
||||||
|
(VMOVSSrr (EXTRACT_SUBREG (v8i32 VR256:$src1), sub_ss),
|
||||||
|
(EXTRACT_SUBREG (v8i32 VR256:$src2), sub_ss)), sub_xmm)>;
|
||||||
|
def : Pat<(v8f32 (X86Movsd VR256:$src1, VR256:$src2)),
|
||||||
|
(SUBREG_TO_REG (i32 0),
|
||||||
|
(VMOVSSrr (EXTRACT_SUBREG (v8f32 VR256:$src1), sub_ss),
|
||||||
|
(EXTRACT_SUBREG (v8f32 VR256:$src2), sub_ss)), sub_xmm)>;
|
||||||
|
|
||||||
// Shuffle with VMOVSD
|
// Shuffle with VMOVSD
|
||||||
def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
|
def : Pat<(v2f64 (X86Movsd VR128:$src1, (scalar_to_vector FR64:$src2))),
|
||||||
(VMOVSDrr VR128:$src1, FR64:$src2)>;
|
(VMOVSDrr VR128:$src1, FR64:$src2)>;
|
||||||
|
|
@ -661,6 +671,17 @@ let Predicates = [HasAVX] in {
|
||||||
(VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
|
(VMOVSDrr VR128:$src1, (EXTRACT_SUBREG (v4i32 VR128:$src2),
|
||||||
sub_sd))>;
|
sub_sd))>;
|
||||||
|
|
||||||
|
// 256-bit variants
|
||||||
|
def : Pat<(v4i64 (X86Movsd VR256:$src1, VR256:$src2)),
|
||||||
|
(SUBREG_TO_REG (i32 0),
|
||||||
|
(VMOVSDrr (EXTRACT_SUBREG (v4i64 VR256:$src1), sub_sd),
|
||||||
|
(EXTRACT_SUBREG (v4i64 VR256:$src2), sub_sd)), sub_xmm)>;
|
||||||
|
def : Pat<(v4f64 (X86Movsd VR256:$src1, VR256:$src2)),
|
||||||
|
(SUBREG_TO_REG (i32 0),
|
||||||
|
(VMOVSDrr (EXTRACT_SUBREG (v4f64 VR256:$src1), sub_sd),
|
||||||
|
(EXTRACT_SUBREG (v4f64 VR256:$src2), sub_sd)), sub_xmm)>;
|
||||||
|
|
||||||
|
|
||||||
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
|
// FIXME: Instead of a X86Movlps there should be a X86Movsd here, the problem
|
||||||
// is during lowering, where it's not possible to recognize the fold cause
|
// is during lowering, where it's not possible to recognize the fold cause
|
||||||
// it has two uses through a bitcast. One use disappears at isel time and the
|
// it has two uses through a bitcast. One use disappears at isel time and the
|
||||||
|
|
|
||||||
|
|
@ -8,3 +8,13 @@ define <4 x float> @test1(<4 x float> %a) nounwind {
|
||||||
; CHECK: vshufps
|
; CHECK: vshufps
|
||||||
; CHECK: vpshufd
|
; CHECK: vpshufd
|
||||||
}
|
}
|
||||||
|
|
||||||
|
; rdar://10538417
|
||||||
|
define <3 x i64> @test2(<3 x i64> %v) nounwind readnone {
|
||||||
|
; CHECK: test2:
|
||||||
|
; CHECK: vxorps
|
||||||
|
; CHECK: vmovsd
|
||||||
|
%1 = shufflevector <2 x i64> undef, <2 x i64> undef, <3 x i32> <i32 0, i32 1, i32 undef>
|
||||||
|
%2 = shufflevector <3 x i64> zeroinitializer, <3 x i64> %1, <3 x i32> <i32 3, i32 4, i32 2>
|
||||||
|
ret <3 x i64> %2
|
||||||
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue