Fixed x86 code generation of multiple for v2i64. It was incorrect for SSE4.1.
llvm-svn: 61211
This commit is contained in:
parent
6e5f4bc1e7
commit
998fd29ce1
|
|
@ -687,6 +687,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||||
setOperationAction(ISD::ADD, MVT::v8i16, Legal);
|
setOperationAction(ISD::ADD, MVT::v8i16, Legal);
|
||||||
setOperationAction(ISD::ADD, MVT::v4i32, Legal);
|
setOperationAction(ISD::ADD, MVT::v4i32, Legal);
|
||||||
setOperationAction(ISD::ADD, MVT::v2i64, Legal);
|
setOperationAction(ISD::ADD, MVT::v2i64, Legal);
|
||||||
|
setOperationAction(ISD::MUL, MVT::v2i64, Custom);
|
||||||
setOperationAction(ISD::SUB, MVT::v16i8, Legal);
|
setOperationAction(ISD::SUB, MVT::v16i8, Legal);
|
||||||
setOperationAction(ISD::SUB, MVT::v8i16, Legal);
|
setOperationAction(ISD::SUB, MVT::v8i16, Legal);
|
||||||
setOperationAction(ISD::SUB, MVT::v4i32, Legal);
|
setOperationAction(ISD::SUB, MVT::v4i32, Legal);
|
||||||
|
|
@ -758,7 +759,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
|
||||||
if (Subtarget->hasSSE41()) {
|
if (Subtarget->hasSSE41()) {
|
||||||
// FIXME: Do we need to handle scalar-to-vector here?
|
// FIXME: Do we need to handle scalar-to-vector here?
|
||||||
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
|
setOperationAction(ISD::MUL, MVT::v4i32, Legal);
|
||||||
setOperationAction(ISD::MUL, MVT::v2i64, Legal);
|
|
||||||
|
|
||||||
// i8 and i16 vectors are custom , because the source register and source
|
// i8 and i16 vectors are custom , because the source register and source
|
||||||
// source memory operand types are not the same width. f32 vectors are
|
// source memory operand types are not the same width. f32 vectors are
|
||||||
|
|
@ -6136,6 +6136,50 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
|
||||||
return Op;
|
return Op;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) {
|
||||||
|
MVT VT = Op.getValueType();
|
||||||
|
assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
|
||||||
|
|
||||||
|
// ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32);
|
||||||
|
// ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32);
|
||||||
|
// ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b );
|
||||||
|
// ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi );
|
||||||
|
// ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b );
|
||||||
|
//
|
||||||
|
// AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 );
|
||||||
|
// AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
|
||||||
|
// return AloBlo + AloBhi + AhiBlo;
|
||||||
|
|
||||||
|
SDValue A = Op.getOperand(0);
|
||||||
|
SDValue B = Op.getOperand(1);
|
||||||
|
|
||||||
|
SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
|
||||||
|
DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
|
||||||
|
A, DAG.getConstant(32, MVT::i32));
|
||||||
|
SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
|
||||||
|
DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
|
||||||
|
B, DAG.getConstant(32, MVT::i32));
|
||||||
|
SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
|
||||||
|
DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
|
||||||
|
A, B);
|
||||||
|
SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
|
||||||
|
DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
|
||||||
|
A, Bhi);
|
||||||
|
SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
|
||||||
|
DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
|
||||||
|
Ahi, B);
|
||||||
|
AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
|
||||||
|
DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
|
||||||
|
AloBhi, DAG.getConstant(32, MVT::i32));
|
||||||
|
AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
|
||||||
|
DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
|
||||||
|
AhiBlo, DAG.getConstant(32, MVT::i32));
|
||||||
|
SDValue Res = DAG.getNode(ISD::ADD, VT, AloBlo, AloBhi);
|
||||||
|
Res = DAG.getNode(ISD::ADD, VT, Res, AhiBlo);
|
||||||
|
return Res;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
|
SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
|
||||||
// Lower the "add/sub/mul with overflow" instruction into a regular ins plus
|
// Lower the "add/sub/mul with overflow" instruction into a regular ins plus
|
||||||
// a "setcc" instruction that checks the overflow flag. The "brcond" lowering
|
// a "setcc" instruction that checks the overflow flag. The "brcond" lowering
|
||||||
|
|
@ -6305,6 +6349,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
|
||||||
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
|
case ISD::FLT_ROUNDS_: return LowerFLT_ROUNDS_(Op, DAG);
|
||||||
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
|
case ISD::CTLZ: return LowerCTLZ(Op, DAG);
|
||||||
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
|
case ISD::CTTZ: return LowerCTTZ(Op, DAG);
|
||||||
|
case ISD::MUL: return LowerMUL_V2I64(Op, DAG);
|
||||||
case ISD::SADDO:
|
case ISD::SADDO:
|
||||||
case ISD::UADDO:
|
case ISD::UADDO:
|
||||||
case ISD::SSUBO:
|
case ISD::SSUBO:
|
||||||
|
|
|
||||||
|
|
@ -597,6 +597,7 @@ namespace llvm {
|
||||||
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
|
SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
|
||||||
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG);
|
SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG);
|
||||||
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG);
|
SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG);
|
||||||
|
SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG);
|
||||||
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG);
|
SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG);
|
||||||
|
|
||||||
SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG);
|
SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG);
|
||||||
|
|
|
||||||
|
|
@ -581,7 +581,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
|
||||||
{ X86::PMINSWrr, X86::PMINSWrm },
|
{ X86::PMINSWrr, X86::PMINSWrm },
|
||||||
{ X86::PMINUBrr, X86::PMINUBrm },
|
{ X86::PMINUBrr, X86::PMINUBrm },
|
||||||
{ X86::PMULDQrr, X86::PMULDQrm },
|
{ X86::PMULDQrr, X86::PMULDQrm },
|
||||||
{ X86::PMULDQrr_int, X86::PMULDQrm_int },
|
|
||||||
{ X86::PMULHUWrr, X86::PMULHUWrm },
|
{ X86::PMULHUWrr, X86::PMULHUWrm },
|
||||||
{ X86::PMULHWrr, X86::PMULHWrm },
|
{ X86::PMULHWrr, X86::PMULHWrm },
|
||||||
{ X86::PMULLDrr, X86::PMULLDrm },
|
{ X86::PMULLDrr, X86::PMULLDrm },
|
||||||
|
|
|
||||||
|
|
@ -3313,12 +3313,13 @@ defm PMAXUD : SS41I_binop_rm_int<0x3F, "pmaxud",
|
||||||
defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
|
defm PMAXUW : SS41I_binop_rm_int<0x3E, "pmaxuw",
|
||||||
int_x86_sse41_pmaxuw, 1>;
|
int_x86_sse41_pmaxuw, 1>;
|
||||||
|
|
||||||
|
defm PMULDQ : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>;
|
||||||
|
|
||||||
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
|
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
|
||||||
(PCMPEQQrr VR128:$src1, VR128:$src2)>;
|
(PCMPEQQrr VR128:$src1, VR128:$src2)>;
|
||||||
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
|
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
|
||||||
(PCMPEQQrm VR128:$src1, addr:$src2)>;
|
(PCMPEQQrm VR128:$src1, addr:$src2)>;
|
||||||
|
|
||||||
|
|
||||||
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
|
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT,
|
multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT,
|
||||||
|
|
@ -3353,9 +3354,6 @@ let Constraints = "$src1 = $dst" in {
|
||||||
}
|
}
|
||||||
defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul,
|
defm PMULLD : SS41I_binop_patint<0x40, "pmulld", v4i32, mul,
|
||||||
int_x86_sse41_pmulld, 1>;
|
int_x86_sse41_pmulld, 1>;
|
||||||
defm PMULDQ : SS41I_binop_patint<0x28, "pmuldq", v2i64, mul,
|
|
||||||
int_x86_sse41_pmuldq, 1>;
|
|
||||||
|
|
||||||
|
|
||||||
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
|
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
|
||||||
let Constraints = "$src1 = $dst" in {
|
let Constraints = "$src1 = $dst" in {
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue