forked from OSchip/llvm-project
Re-apply 40504, but with a fix for the segfault it caused in oggenc:
Make the alignedload and alignedstore patterns always require 16-byte alignment. This way when they are used in the "Fs" instructions, in which a vector instruction is used for a scalar purpose, they can still require the full vector alignment. And add a regression test for this. llvm-svn: 40555
This commit is contained in:
parent
5d49bcdce0
commit
4788552deb
|
|
@ -3367,14 +3367,10 @@ SDOperand X86TargetLowering::LowerFABS(SDOperand Op, SelectionDAG &DAG) {
|
||||||
CV.push_back(C);
|
CV.push_back(C);
|
||||||
CV.push_back(C);
|
CV.push_back(C);
|
||||||
}
|
}
|
||||||
Constant *CS = ConstantStruct::get(CV);
|
Constant *C = ConstantVector::get(CV);
|
||||||
SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
|
SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
|
||||||
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
|
SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
|
||||||
SmallVector<SDOperand, 3> Ops;
|
false, 16);
|
||||||
Ops.push_back(DAG.getEntryNode());
|
|
||||||
Ops.push_back(CPIdx);
|
|
||||||
Ops.push_back(DAG.getSrcValue(NULL));
|
|
||||||
SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
|
|
||||||
return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
|
return DAG.getNode(X86ISD::FAND, VT, Op.getOperand(0), Mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -3399,21 +3395,16 @@ SDOperand X86TargetLowering::LowerFNEG(SDOperand Op, SelectionDAG &DAG) {
|
||||||
CV.push_back(C);
|
CV.push_back(C);
|
||||||
CV.push_back(C);
|
CV.push_back(C);
|
||||||
}
|
}
|
||||||
Constant *CS = ConstantStruct::get(CV);
|
Constant *C = ConstantVector::get(CV);
|
||||||
SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
|
SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
|
||||||
|
SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
|
||||||
|
false, 16);
|
||||||
if (MVT::isVector(VT)) {
|
if (MVT::isVector(VT)) {
|
||||||
SDOperand Mask = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0);
|
|
||||||
return DAG.getNode(ISD::BIT_CONVERT, VT,
|
return DAG.getNode(ISD::BIT_CONVERT, VT,
|
||||||
DAG.getNode(ISD::XOR, MVT::v2i64,
|
DAG.getNode(ISD::XOR, MVT::v2i64,
|
||||||
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)),
|
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Op.getOperand(0)),
|
||||||
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask)));
|
DAG.getNode(ISD::BIT_CONVERT, MVT::v2i64, Mask)));
|
||||||
} else {
|
} else {
|
||||||
SDVTList Tys = DAG.getVTList(VT, MVT::Other);
|
|
||||||
SmallVector<SDOperand, 3> Ops;
|
|
||||||
Ops.push_back(DAG.getEntryNode());
|
|
||||||
Ops.push_back(CPIdx);
|
|
||||||
Ops.push_back(DAG.getSrcValue(NULL));
|
|
||||||
SDOperand Mask = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
|
|
||||||
return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
|
return DAG.getNode(X86ISD::FXOR, VT, Op.getOperand(0), Mask);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -3442,14 +3433,10 @@ SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
|
||||||
CV.push_back(ConstantFP::get(SrcTy, 0.0));
|
CV.push_back(ConstantFP::get(SrcTy, 0.0));
|
||||||
CV.push_back(ConstantFP::get(SrcTy, 0.0));
|
CV.push_back(ConstantFP::get(SrcTy, 0.0));
|
||||||
}
|
}
|
||||||
Constant *CS = ConstantStruct::get(CV);
|
Constant *C = ConstantVector::get(CV);
|
||||||
SDOperand CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
|
SDOperand CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
|
||||||
SDVTList Tys = DAG.getVTList(SrcVT, MVT::Other);
|
SDOperand Mask1 = DAG.getLoad(SrcVT, DAG.getEntryNode(), CPIdx, NULL, 0,
|
||||||
SmallVector<SDOperand, 3> Ops;
|
false, 16);
|
||||||
Ops.push_back(DAG.getEntryNode());
|
|
||||||
Ops.push_back(CPIdx);
|
|
||||||
Ops.push_back(DAG.getSrcValue(NULL));
|
|
||||||
SDOperand Mask1 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
|
|
||||||
SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
|
SDOperand SignBit = DAG.getNode(X86ISD::FAND, SrcVT, Op1, Mask1);
|
||||||
|
|
||||||
// Shift sign bit right or left if the two operands have different types.
|
// Shift sign bit right or left if the two operands have different types.
|
||||||
|
|
@ -3474,14 +3461,10 @@ SDOperand X86TargetLowering::LowerFCOPYSIGN(SDOperand Op, SelectionDAG &DAG) {
|
||||||
CV.push_back(ConstantFP::get(SrcTy, 0.0));
|
CV.push_back(ConstantFP::get(SrcTy, 0.0));
|
||||||
CV.push_back(ConstantFP::get(SrcTy, 0.0));
|
CV.push_back(ConstantFP::get(SrcTy, 0.0));
|
||||||
}
|
}
|
||||||
CS = ConstantStruct::get(CV);
|
C = ConstantVector::get(CV);
|
||||||
CPIdx = DAG.getConstantPool(CS, getPointerTy(), 4);
|
CPIdx = DAG.getConstantPool(C, getPointerTy(), 4);
|
||||||
Tys = DAG.getVTList(VT, MVT::Other);
|
SDOperand Mask2 = DAG.getLoad(VT, DAG.getEntryNode(), CPIdx, NULL, 0,
|
||||||
Ops.clear();
|
false, 16);
|
||||||
Ops.push_back(DAG.getEntryNode());
|
|
||||||
Ops.push_back(CPIdx);
|
|
||||||
Ops.push_back(DAG.getSrcValue(NULL));
|
|
||||||
SDOperand Mask2 = DAG.getNode(X86ISD::LOAD_PACK, Tys, &Ops[0], Ops.size());
|
|
||||||
SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
|
SDOperand Val = DAG.getNode(X86ISD::FAND, VT, Op0, Mask2);
|
||||||
|
|
||||||
// Or the value with the sign bit.
|
// Or the value with the sign bit.
|
||||||
|
|
@ -4357,8 +4340,6 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
|
||||||
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
|
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
|
||||||
case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
|
case X86ISD::REP_STOS: return "X86ISD::REP_STOS";
|
||||||
case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
|
case X86ISD::REP_MOVS: return "X86ISD::REP_MOVS";
|
||||||
case X86ISD::LOAD_PACK: return "X86ISD::LOAD_PACK";
|
|
||||||
case X86ISD::LOAD_UA: return "X86ISD::LOAD_UA";
|
|
||||||
case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
|
case X86ISD::GlobalBaseReg: return "X86ISD::GlobalBaseReg";
|
||||||
case X86ISD::Wrapper: return "X86ISD::Wrapper";
|
case X86ISD::Wrapper: return "X86ISD::Wrapper";
|
||||||
case X86ISD::S2VEC: return "X86ISD::S2VEC";
|
case X86ISD::S2VEC: return "X86ISD::S2VEC";
|
||||||
|
|
@ -4756,19 +4737,14 @@ static SDOperand PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
|
||||||
}
|
}
|
||||||
|
|
||||||
bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
|
bool isAlign16 = isBaseAlignment16(Base->getOperand(1).Val, MFI, Subtarget);
|
||||||
|
LoadSDNode *LD = cast<LoadSDNode>(Base);
|
||||||
if (isAlign16) {
|
if (isAlign16) {
|
||||||
LoadSDNode *LD = cast<LoadSDNode>(Base);
|
|
||||||
return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
|
return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
|
||||||
LD->getSrcValueOffset());
|
LD->getSrcValueOffset(), LD->isVolatile());
|
||||||
} else {
|
} else {
|
||||||
// Just use movups, it's shorter.
|
return DAG.getLoad(VT, LD->getChain(), LD->getBasePtr(), LD->getSrcValue(),
|
||||||
SDVTList Tys = DAG.getVTList(MVT::v4f32, MVT::Other);
|
LD->getSrcValueOffset(), LD->isVolatile(),
|
||||||
SmallVector<SDOperand, 3> Ops;
|
LD->getAlignment());
|
||||||
Ops.push_back(Base->getOperand(0));
|
|
||||||
Ops.push_back(Base->getOperand(1));
|
|
||||||
Ops.push_back(Base->getOperand(2));
|
|
||||||
return DAG.getNode(ISD::BIT_CONVERT, VT,
|
|
||||||
DAG.getNode(X86ISD::LOAD_UA, Tys, &Ops[0], Ops.size()));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -143,14 +143,6 @@ namespace llvm {
|
||||||
/// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
|
/// REP_MOVS - Repeat move, corresponds to X86::REP_MOVSx.
|
||||||
REP_MOVS,
|
REP_MOVS,
|
||||||
|
|
||||||
/// LOAD_PACK Load a 128-bit packed float / double value. It has the same
|
|
||||||
/// operands as a normal load.
|
|
||||||
LOAD_PACK,
|
|
||||||
|
|
||||||
/// LOAD_UA Load an unaligned 128-bit value. It has the same operands as
|
|
||||||
/// a normal load.
|
|
||||||
LOAD_UA,
|
|
||||||
|
|
||||||
/// GlobalBaseReg - On Darwin, this node represents the result of the popl
|
/// GlobalBaseReg - On Darwin, this node represents the result of the popl
|
||||||
/// at function entry, used for PIC code.
|
/// at function entry, used for PIC code.
|
||||||
GlobalBaseReg,
|
GlobalBaseReg,
|
||||||
|
|
|
||||||
|
|
@ -21,8 +21,6 @@
|
||||||
def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
|
def SDTX86FPShiftOp : SDTypeProfile<1, 2, [ SDTCisSameAs<0, 1>,
|
||||||
SDTCisFP<0>, SDTCisInt<2> ]>;
|
SDTCisFP<0>, SDTCisInt<2> ]>;
|
||||||
|
|
||||||
def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad, [SDNPHasChain]>;
|
|
||||||
def X86loadu : SDNode<"X86ISD::LOAD_UA", SDTLoad, [SDNPHasChain]>;
|
|
||||||
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
|
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
|
||||||
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
|
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
|
||||||
def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
|
def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
|
||||||
|
|
@ -82,33 +80,32 @@ def sdmem : Operand<v2f64> {
|
||||||
// SSE pattern fragments
|
// SSE pattern fragments
|
||||||
//===----------------------------------------------------------------------===//
|
//===----------------------------------------------------------------------===//
|
||||||
|
|
||||||
def X86loadpf32 : PatFrag<(ops node:$ptr), (f32 (X86loadp node:$ptr))>;
|
|
||||||
def X86loadpf64 : PatFrag<(ops node:$ptr), (f64 (X86loadp node:$ptr))>;
|
|
||||||
|
|
||||||
def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
|
def loadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (load node:$ptr))>;
|
||||||
def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
|
def loadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (load node:$ptr))>;
|
||||||
def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
|
def loadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (load node:$ptr))>;
|
||||||
def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
|
def loadv2i64 : PatFrag<(ops node:$ptr), (v2i64 (load node:$ptr))>;
|
||||||
|
|
||||||
// Like 'store', but always requires natural alignment.
|
// Like 'store', but always requires vector alignment.
|
||||||
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
|
def alignedstore : PatFrag<(ops node:$val, node:$ptr),
|
||||||
(st node:$val, node:$ptr), [{
|
(st node:$val, node:$ptr), [{
|
||||||
if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
|
if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
|
||||||
return !ST->isTruncatingStore() &&
|
return !ST->isTruncatingStore() &&
|
||||||
ST->getAddressingMode() == ISD::UNINDEXED &&
|
ST->getAddressingMode() == ISD::UNINDEXED &&
|
||||||
ST->getAlignment() * 8 >= MVT::getSizeInBits(ST->getStoredVT());
|
ST->getAlignment() >= 16;
|
||||||
return false;
|
return false;
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
// Like 'load', but always requires natural alignment.
|
// Like 'load', but always requires vector alignment.
|
||||||
def alignedload : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
|
def alignedload : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
|
||||||
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
|
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
|
||||||
return LD->getExtensionType() == ISD::NON_EXTLOAD &&
|
return LD->getExtensionType() == ISD::NON_EXTLOAD &&
|
||||||
LD->getAddressingMode() == ISD::UNINDEXED &&
|
LD->getAddressingMode() == ISD::UNINDEXED &&
|
||||||
LD->getAlignment() * 8 >= MVT::getSizeInBits(LD->getLoadedVT());
|
LD->getAlignment() >= 16;
|
||||||
return false;
|
return false;
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
def alignedloadfsf32 : PatFrag<(ops node:$ptr), (f32 (alignedload node:$ptr))>;
|
||||||
|
def alignedloadfsf64 : PatFrag<(ops node:$ptr), (f64 (alignedload node:$ptr))>;
|
||||||
def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>;
|
def alignedloadv4f32 : PatFrag<(ops node:$ptr), (v4f32 (alignedload node:$ptr))>;
|
||||||
def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>;
|
def alignedloadv2f64 : PatFrag<(ops node:$ptr), (v2f64 (alignedload node:$ptr))>;
|
||||||
def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (alignedload node:$ptr))>;
|
def alignedloadv4i32 : PatFrag<(ops node:$ptr), (v4i32 (alignedload node:$ptr))>;
|
||||||
|
|
@ -123,10 +120,12 @@ def memop : PatFrag<(ops node:$ptr), (ld node:$ptr), [{
|
||||||
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
|
if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N))
|
||||||
return LD->getExtensionType() == ISD::NON_EXTLOAD &&
|
return LD->getExtensionType() == ISD::NON_EXTLOAD &&
|
||||||
LD->getAddressingMode() == ISD::UNINDEXED &&
|
LD->getAddressingMode() == ISD::UNINDEXED &&
|
||||||
LD->getAlignment() * 8 >= MVT::getSizeInBits(LD->getLoadedVT());
|
LD->getAlignment() >= 16;
|
||||||
return false;
|
return false;
|
||||||
}]>;
|
}]>;
|
||||||
|
|
||||||
|
def memopfsf32 : PatFrag<(ops node:$ptr), (f32 (memop node:$ptr))>;
|
||||||
|
def memopfsf64 : PatFrag<(ops node:$ptr), (f64 (memop node:$ptr))>;
|
||||||
def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
|
def memopv4f32 : PatFrag<(ops node:$ptr), (v4f32 (memop node:$ptr))>;
|
||||||
def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
|
def memopv2f64 : PatFrag<(ops node:$ptr), (v2f64 (memop node:$ptr))>;
|
||||||
def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
|
def memopv4i32 : PatFrag<(ops node:$ptr), (v4i32 (memop node:$ptr))>;
|
||||||
|
|
@ -410,7 +409,7 @@ def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
|
||||||
// disregarded.
|
// disregarded.
|
||||||
def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
|
def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
|
||||||
"movaps {$src, $dst|$dst, $src}",
|
"movaps {$src, $dst|$dst, $src}",
|
||||||
[(set FR32:$dst, (X86loadpf32 addr:$src))]>;
|
[(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
|
||||||
|
|
||||||
// Alias bitwise logical operations using SSE logical ops on packed FP values.
|
// Alias bitwise logical operations using SSE logical ops on packed FP values.
|
||||||
let isTwoAddress = 1 in {
|
let isTwoAddress = 1 in {
|
||||||
|
|
@ -429,15 +428,15 @@ let isCommutable = 1 in {
|
||||||
def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
|
def FsANDPSrm : PSI<0x54, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
|
||||||
"andps {$src2, $dst|$dst, $src2}",
|
"andps {$src2, $dst|$dst, $src2}",
|
||||||
[(set FR32:$dst, (X86fand FR32:$src1,
|
[(set FR32:$dst, (X86fand FR32:$src1,
|
||||||
(X86loadpf32 addr:$src2)))]>;
|
(memopfsf32 addr:$src2)))]>;
|
||||||
def FsORPSrm : PSI<0x56, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
|
def FsORPSrm : PSI<0x56, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
|
||||||
"orps {$src2, $dst|$dst, $src2}",
|
"orps {$src2, $dst|$dst, $src2}",
|
||||||
[(set FR32:$dst, (X86for FR32:$src1,
|
[(set FR32:$dst, (X86for FR32:$src1,
|
||||||
(X86loadpf32 addr:$src2)))]>;
|
(memopfsf32 addr:$src2)))]>;
|
||||||
def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
|
def FsXORPSrm : PSI<0x57, MRMSrcMem, (outs FR32:$dst), (ins FR32:$src1, f128mem:$src2),
|
||||||
"xorps {$src2, $dst|$dst, $src2}",
|
"xorps {$src2, $dst|$dst, $src2}",
|
||||||
[(set FR32:$dst, (X86fxor FR32:$src1,
|
[(set FR32:$dst, (X86fxor FR32:$src1,
|
||||||
(X86loadpf32 addr:$src2)))]>;
|
(memopfsf32 addr:$src2)))]>;
|
||||||
|
|
||||||
def FsANDNPSrr : PSI<0x55, MRMSrcReg,
|
def FsANDNPSrr : PSI<0x55, MRMSrcReg,
|
||||||
(outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
|
(outs FR32:$dst), (ins FR32:$src1, FR32:$src2),
|
||||||
|
|
@ -1083,7 +1082,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
|
||||||
// disregarded.
|
// disregarded.
|
||||||
def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
|
def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
|
||||||
"movapd {$src, $dst|$dst, $src}",
|
"movapd {$src, $dst|$dst, $src}",
|
||||||
[(set FR64:$dst, (X86loadpf64 addr:$src))]>;
|
[(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
|
||||||
|
|
||||||
// Alias bitwise logical operations using SSE logical ops on packed FP values.
|
// Alias bitwise logical operations using SSE logical ops on packed FP values.
|
||||||
let isTwoAddress = 1 in {
|
let isTwoAddress = 1 in {
|
||||||
|
|
@ -1102,15 +1101,15 @@ let isCommutable = 1 in {
|
||||||
def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
|
def FsANDPDrm : PDI<0x54, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
|
||||||
"andpd {$src2, $dst|$dst, $src2}",
|
"andpd {$src2, $dst|$dst, $src2}",
|
||||||
[(set FR64:$dst, (X86fand FR64:$src1,
|
[(set FR64:$dst, (X86fand FR64:$src1,
|
||||||
(X86loadpf64 addr:$src2)))]>;
|
(memopfsf64 addr:$src2)))]>;
|
||||||
def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
|
def FsORPDrm : PDI<0x56, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
|
||||||
"orpd {$src2, $dst|$dst, $src2}",
|
"orpd {$src2, $dst|$dst, $src2}",
|
||||||
[(set FR64:$dst, (X86for FR64:$src1,
|
[(set FR64:$dst, (X86for FR64:$src1,
|
||||||
(X86loadpf64 addr:$src2)))]>;
|
(memopfsf64 addr:$src2)))]>;
|
||||||
def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
|
def FsXORPDrm : PDI<0x57, MRMSrcMem, (outs FR64:$dst), (ins FR64:$src1, f128mem:$src2),
|
||||||
"xorpd {$src2, $dst|$dst, $src2}",
|
"xorpd {$src2, $dst|$dst, $src2}",
|
||||||
[(set FR64:$dst, (X86fxor FR64:$src1,
|
[(set FR64:$dst, (X86fxor FR64:$src1,
|
||||||
(X86loadpf64 addr:$src2)))]>;
|
(memopfsf64 addr:$src2)))]>;
|
||||||
|
|
||||||
def FsANDNPDrr : PDI<0x55, MRMSrcReg,
|
def FsANDNPDrr : PDI<0x55, MRMSrcReg,
|
||||||
(outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
|
(outs FR64:$dst), (ins FR64:$src1, FR64:$src2),
|
||||||
|
|
@ -2630,11 +2629,11 @@ def : Pat<(v2i64 (and (xor VR128:$src1, (bc_v2i64 (v16i8 immAllOnesV))),
|
||||||
(load addr:$src2))),
|
(load addr:$src2))),
|
||||||
(PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
(PANDNrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
|
||||||
|
|
||||||
// Unaligned load
|
|
||||||
def : Pat<(v4f32 (X86loadu addr:$src)), (MOVUPSrm addr:$src)>,
|
|
||||||
Requires<[HasSSE1]>;
|
|
||||||
|
|
||||||
// Use movaps / movups for SSE integer load / store (one byte shorter).
|
// Use movaps / movups for SSE integer load / store (one byte shorter).
|
||||||
|
def : Pat<(alignedloadv4i32 addr:$src),
|
||||||
|
(MOVAPSrm addr:$src)>, Requires<[HasSSE1]>;
|
||||||
|
def : Pat<(loadv4i32 addr:$src),
|
||||||
|
(MOVUPSrm addr:$src)>, Requires<[HasSSE1]>;
|
||||||
def : Pat<(alignedloadv2i64 addr:$src),
|
def : Pat<(alignedloadv2i64 addr:$src),
|
||||||
(MOVAPSrm addr:$src)>, Requires<[HasSSE2]>;
|
(MOVAPSrm addr:$src)>, Requires<[HasSSE2]>;
|
||||||
def : Pat<(loadv2i64 addr:$src),
|
def : Pat<(loadv2i64 addr:$src),
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,14 @@
|
||||||
|
; RUN: llvm-as < %s | llc -march=x86 -mattr=+sse -enable-unsafe-fp-math | \
|
||||||
|
; RUN: grep -v sp | grep xorps | wc -l | grep 2
|
||||||
|
|
||||||
|
; Don't fold the incoming stack arguments into the xorps instructions used
|
||||||
|
; to do floating-point negations, because the arguments aren't vectors
|
||||||
|
; and aren't vector-aligned.
|
||||||
|
|
||||||
|
define void @foo(float* %p, float* %q, float %s, float %y) {
|
||||||
|
%ss = sub float -0.0, %s
|
||||||
|
%yy = sub float -0.0, %y
|
||||||
|
store float %ss, float* %p
|
||||||
|
store float %yy, float* %q
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -mattr=+sse2 -o %t -f
|
; RUN: llvm-upgrade < %s | llvm-as | llc -march=x86 -mattr=+sse2 -o %t -f
|
||||||
; RUN: grep shufp %t | wc -l | grep 1
|
; RUN: grep shufp %t | wc -l | grep 1
|
||||||
; RUN: grep movups %t | wc -l | grep 1
|
; RUN: grep movupd %t | wc -l | grep 1
|
||||||
; RUN: grep pshufhw %t | wc -l | grep 1
|
; RUN: grep pshufhw %t | wc -l | grep 1
|
||||||
|
|
||||||
void %test_v4sf(<4 x float>* %P, float %X, float %Y) {
|
void %test_v4sf(<4 x float>* %P, float %X, float %Y) {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue