[X86] Add isel patterns to be able to fold loads into VPERM2F128 even when the load is on the first input to the SDNode.
We just need to toggle bits 1 and 5 of the immediate and swap the sources. The peephole pass could trigger commuting/folding for this later, but its easy enough to fix in isel. Disable the peephole pass on the main vperm2x128 test so we know we're doing this through isel. llvm-svn: 313455
This commit is contained in:
parent
0d1b519f78
commit
23f78c1662
|
|
@ -7670,12 +7670,28 @@ def VPERM2F128rm : AVXAIi8<0x06, MRMSrcMem, (outs VR256:$dst),
|
||||||
Sched<[WriteFShuffleLd, ReadAfterLd]>;
|
Sched<[WriteFShuffleLd, ReadAfterLd]>;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Immediate transform to help with commuting.
|
||||||
|
def Perm2XCommuteImm : SDNodeXForm<imm, [{
|
||||||
|
return getI8Imm(N->getZExtValue() ^ 0x22, SDLoc(N));
|
||||||
|
}]>;
|
||||||
|
|
||||||
|
let Predicates = [HasAVX] in {
|
||||||
|
// Pattern with load in other operand.
|
||||||
|
def : Pat<(v4f64 (X86VPerm2x128 (loadv4f64 addr:$src2),
|
||||||
|
VR256:$src1, (i8 imm:$imm))),
|
||||||
|
(VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
|
||||||
|
}
|
||||||
|
|
||||||
let Predicates = [HasAVX1Only] in {
|
let Predicates = [HasAVX1Only] in {
|
||||||
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
|
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1, VR256:$src2, (i8 imm:$imm))),
|
||||||
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
|
(VPERM2F128rr VR256:$src1, VR256:$src2, imm:$imm)>;
|
||||||
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
|
def : Pat<(v4i64 (X86VPerm2x128 VR256:$src1,
|
||||||
(loadv4i64 addr:$src2), (i8 imm:$imm))),
|
(loadv4i64 addr:$src2), (i8 imm:$imm))),
|
||||||
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
|
(VPERM2F128rm VR256:$src1, addr:$src2, imm:$imm)>;
|
||||||
|
// Pattern with load in other operand.
|
||||||
|
def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
|
||||||
|
VR256:$src1, (i8 imm:$imm))),
|
||||||
|
(VPERM2F128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
|
||||||
}
|
}
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
@ -8083,6 +8099,11 @@ def VPERM2I128rm : AVX2AIi8<0x46, MRMSrcMem, (outs VR256:$dst),
|
||||||
(i8 imm:$src3)))]>,
|
(i8 imm:$src3)))]>,
|
||||||
Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
|
Sched<[WriteShuffle256Ld, ReadAfterLd]>, VEX_4V, VEX_L;
|
||||||
|
|
||||||
|
let Predicates = [HasAVX2] in
|
||||||
|
def : Pat<(v4i64 (X86VPerm2x128 (loadv4i64 addr:$src2),
|
||||||
|
VR256:$src1, (i8 imm:$imm))),
|
||||||
|
(VPERM2I128rm VR256:$src1, addr:$src2, (Perm2XCommuteImm imm:$imm))>;
|
||||||
|
|
||||||
|
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
// VINSERTI128 - Insert packed integer values
|
// VINSERTI128 - Insert packed integer values
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
|
||||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx -disable-peephole | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
|
||||||
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
|
; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 -disable-peephole | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
|
||||||
|
|
||||||
define <8 x float> @shuffle_v8f32_45670123(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
|
define <8 x float> @shuffle_v8f32_45670123(<8 x float> %a, <8 x float> %b) nounwind uwtable readnone ssp {
|
||||||
; ALL-LABEL: shuffle_v8f32_45670123:
|
; ALL-LABEL: shuffle_v8f32_45670123:
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue