Fixed x86 code generation of multiple for v2i64. It was incorrect for SSE4.1.
llvm-svn: 61211
This commit is contained in:
		
							parent
							
								
									6e5f4bc1e7
								
							
						
					
					
						commit
						998fd29ce1
					
				| 
						 | 
					@ -687,6 +687,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 | 
				
			||||||
    setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
 | 
					    setOperationAction(ISD::ADD,                MVT::v8i16, Legal);
 | 
				
			||||||
    setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
 | 
					    setOperationAction(ISD::ADD,                MVT::v4i32, Legal);
 | 
				
			||||||
    setOperationAction(ISD::ADD,                MVT::v2i64, Legal);
 | 
					    setOperationAction(ISD::ADD,                MVT::v2i64, Legal);
 | 
				
			||||||
 | 
					    setOperationAction(ISD::MUL,                MVT::v2i64, Custom);
 | 
				
			||||||
    setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
 | 
					    setOperationAction(ISD::SUB,                MVT::v16i8, Legal);
 | 
				
			||||||
    setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
 | 
					    setOperationAction(ISD::SUB,                MVT::v8i16, Legal);
 | 
				
			||||||
    setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
 | 
					    setOperationAction(ISD::SUB,                MVT::v4i32, Legal);
 | 
				
			||||||
| 
						 | 
					@ -758,7 +759,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
 | 
				
			||||||
  if (Subtarget->hasSSE41()) {
 | 
					  if (Subtarget->hasSSE41()) {
 | 
				
			||||||
    // FIXME: Do we need to handle scalar-to-vector here?
 | 
					    // FIXME: Do we need to handle scalar-to-vector here?
 | 
				
			||||||
    setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
 | 
					    setOperationAction(ISD::MUL,                MVT::v4i32, Legal);
 | 
				
			||||||
    setOperationAction(ISD::MUL,                MVT::v2i64, Legal);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // i8 and i16 vectors are custom , because the source register and source
 | 
					    // i8 and i16 vectors are custom , because the source register and source
 | 
				
			||||||
    // source memory operand types are not the same width.  f32 vectors are
 | 
					    // source memory operand types are not the same width.  f32 vectors are
 | 
				
			||||||
| 
						 | 
					@ -6136,6 +6136,50 @@ SDValue X86TargetLowering::LowerCTTZ(SDValue Op, SelectionDAG &DAG) {
 | 
				
			||||||
  return Op;
 | 
					  return Op;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SDValue X86TargetLowering::LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG) {
 | 
				
			||||||
 | 
					  MVT VT = Op.getValueType();
 | 
				
			||||||
 | 
					  assert(VT == MVT::v2i64 && "Only know how to lower V2I64 multiply");
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  //  ulong2 Ahi = __builtin_ia32_psrlqi128( a, 32);
 | 
				
			||||||
 | 
					  //  ulong2 Bhi = __builtin_ia32_psrlqi128( b, 32);
 | 
				
			||||||
 | 
					  //  ulong2 AloBlo = __builtin_ia32_pmuludq128( a, b );
 | 
				
			||||||
 | 
					  //  ulong2 AloBhi = __builtin_ia32_pmuludq128( a, Bhi );
 | 
				
			||||||
 | 
					  //  ulong2 AhiBlo = __builtin_ia32_pmuludq128( Ahi, b );
 | 
				
			||||||
 | 
					  //
 | 
				
			||||||
 | 
					  //  AloBhi = __builtin_ia32_psllqi128( AloBhi, 32 );
 | 
				
			||||||
 | 
					  //  AhiBlo = __builtin_ia32_psllqi128( AhiBlo, 32 );
 | 
				
			||||||
 | 
					  //  return AloBlo + AloBhi + AhiBlo;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SDValue A = Op.getOperand(0);
 | 
				
			||||||
 | 
					  SDValue B = Op.getOperand(1);
 | 
				
			||||||
 | 
					  
 | 
				
			||||||
 | 
					  SDValue Ahi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
 | 
				
			||||||
 | 
					                       DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
 | 
				
			||||||
 | 
					                       A, DAG.getConstant(32, MVT::i32));
 | 
				
			||||||
 | 
					  SDValue Bhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
 | 
				
			||||||
 | 
					                       DAG.getConstant(Intrinsic::x86_sse2_psrli_q, MVT::i32),
 | 
				
			||||||
 | 
					                       B, DAG.getConstant(32, MVT::i32));
 | 
				
			||||||
 | 
					  SDValue AloBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
 | 
				
			||||||
 | 
					                       DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
 | 
				
			||||||
 | 
					                       A, B);
 | 
				
			||||||
 | 
					  SDValue AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
 | 
				
			||||||
 | 
					                       DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
 | 
				
			||||||
 | 
					                       A, Bhi);
 | 
				
			||||||
 | 
					  SDValue AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
 | 
				
			||||||
 | 
					                       DAG.getConstant(Intrinsic::x86_sse2_pmulu_dq, MVT::i32),
 | 
				
			||||||
 | 
					                       Ahi, B);
 | 
				
			||||||
 | 
					  AloBhi = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
 | 
				
			||||||
 | 
					                       DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
 | 
				
			||||||
 | 
					                       AloBhi, DAG.getConstant(32, MVT::i32));
 | 
				
			||||||
 | 
					  AhiBlo = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, VT,
 | 
				
			||||||
 | 
					                       DAG.getConstant(Intrinsic::x86_sse2_pslli_q, MVT::i32),
 | 
				
			||||||
 | 
					                       AhiBlo, DAG.getConstant(32, MVT::i32));
 | 
				
			||||||
 | 
					  SDValue Res = DAG.getNode(ISD::ADD, VT, AloBlo, AloBhi);
 | 
				
			||||||
 | 
					  Res = DAG.getNode(ISD::ADD, VT, Res, AhiBlo);
 | 
				
			||||||
 | 
					  return Res;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
 | 
					SDValue X86TargetLowering::LowerXALUO(SDValue Op, SelectionDAG &DAG) {
 | 
				
			||||||
  // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
 | 
					  // Lower the "add/sub/mul with overflow" instruction into a regular ins plus
 | 
				
			||||||
  // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
 | 
					  // a "setcc" instruction that checks the overflow flag. The "brcond" lowering
 | 
				
			||||||
| 
						 | 
					@ -6305,6 +6349,7 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
 | 
				
			||||||
  case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
 | 
					  case ISD::FLT_ROUNDS_:        return LowerFLT_ROUNDS_(Op, DAG);
 | 
				
			||||||
  case ISD::CTLZ:               return LowerCTLZ(Op, DAG);
 | 
					  case ISD::CTLZ:               return LowerCTLZ(Op, DAG);
 | 
				
			||||||
  case ISD::CTTZ:               return LowerCTTZ(Op, DAG);
 | 
					  case ISD::CTTZ:               return LowerCTTZ(Op, DAG);
 | 
				
			||||||
 | 
					  case ISD::MUL:                return LowerMUL_V2I64(Op, DAG);
 | 
				
			||||||
  case ISD::SADDO:
 | 
					  case ISD::SADDO:
 | 
				
			||||||
  case ISD::UADDO:
 | 
					  case ISD::UADDO:
 | 
				
			||||||
  case ISD::SSUBO:
 | 
					  case ISD::SSUBO:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -597,6 +597,7 @@ namespace llvm {
 | 
				
			||||||
    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
 | 
					    SDValue LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG);
 | 
				
			||||||
    SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG);
 | 
					    SDValue LowerCTLZ(SDValue Op, SelectionDAG &DAG);
 | 
				
			||||||
    SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG);
 | 
					    SDValue LowerCTTZ(SDValue Op, SelectionDAG &DAG);
 | 
				
			||||||
 | 
					    SDValue LowerMUL_V2I64(SDValue Op, SelectionDAG &DAG);
 | 
				
			||||||
    SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG);
 | 
					    SDValue LowerXALUO(SDValue Op, SelectionDAG &DAG);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG);
 | 
					    SDValue LowerCMP_SWAP(SDValue Op, SelectionDAG &DAG);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -581,7 +581,6 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
 | 
				
			||||||
    { X86::PMINSWrr,        X86::PMINSWrm },
 | 
					    { X86::PMINSWrr,        X86::PMINSWrm },
 | 
				
			||||||
    { X86::PMINUBrr,        X86::PMINUBrm },
 | 
					    { X86::PMINUBrr,        X86::PMINUBrm },
 | 
				
			||||||
    { X86::PMULDQrr,        X86::PMULDQrm },
 | 
					    { X86::PMULDQrr,        X86::PMULDQrm },
 | 
				
			||||||
    { X86::PMULDQrr_int,    X86::PMULDQrm_int },
 | 
					 | 
				
			||||||
    { X86::PMULHUWrr,       X86::PMULHUWrm },
 | 
					    { X86::PMULHUWrr,       X86::PMULHUWrm },
 | 
				
			||||||
    { X86::PMULHWrr,        X86::PMULHWrm },
 | 
					    { X86::PMULHWrr,        X86::PMULHWrm },
 | 
				
			||||||
    { X86::PMULLDrr,        X86::PMULLDrm },
 | 
					    { X86::PMULLDrr,        X86::PMULLDrm },
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3313,12 +3313,13 @@ defm PMAXUD       : SS41I_binop_rm_int<0x3F, "pmaxud",
 | 
				
			||||||
defm PMAXUW       : SS41I_binop_rm_int<0x3E, "pmaxuw",
 | 
					defm PMAXUW       : SS41I_binop_rm_int<0x3E, "pmaxuw",
 | 
				
			||||||
                                       int_x86_sse41_pmaxuw, 1>;
 | 
					                                       int_x86_sse41_pmaxuw, 1>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					defm PMULDQ       : SS41I_binop_rm_int<0x28, "pmuldq", int_x86_sse41_pmuldq, 1>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
 | 
					def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, VR128:$src2)),
 | 
				
			||||||
          (PCMPEQQrr VR128:$src1, VR128:$src2)>;
 | 
					          (PCMPEQQrr VR128:$src1, VR128:$src2)>;
 | 
				
			||||||
def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
 | 
					def : Pat<(v2i64 (X86pcmpeqq VR128:$src1, (memop addr:$src2))),
 | 
				
			||||||
          (PCMPEQQrm VR128:$src1, addr:$src2)>;
 | 
					          (PCMPEQQrm VR128:$src1, addr:$src2)>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					 | 
				
			||||||
/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
 | 
					/// SS41I_binop_rm_int - Simple SSE 4.1 binary operator
 | 
				
			||||||
let Constraints = "$src1 = $dst" in {
 | 
					let Constraints = "$src1 = $dst" in {
 | 
				
			||||||
  multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT,
 | 
					  multiclass SS41I_binop_patint<bits<8> opc, string OpcodeStr, ValueType OpVT,
 | 
				
			||||||
| 
						 | 
					@ -3353,9 +3354,6 @@ let Constraints = "$src1 = $dst" in {
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
defm PMULLD       : SS41I_binop_patint<0x40, "pmulld", v4i32, mul,
 | 
					defm PMULLD       : SS41I_binop_patint<0x40, "pmulld", v4i32, mul,
 | 
				
			||||||
                                       int_x86_sse41_pmulld, 1>;
 | 
					                                       int_x86_sse41_pmulld, 1>;
 | 
				
			||||||
defm PMULDQ       : SS41I_binop_patint<0x28, "pmuldq", v2i64, mul,
 | 
					 | 
				
			||||||
                                       int_x86_sse41_pmuldq, 1>;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
 | 
					/// SS41I_binop_rmi_int - SSE 4.1 binary operator with 8-bit immediate
 | 
				
			||||||
let Constraints = "$src1 = $dst" in {
 | 
					let Constraints = "$src1 = $dst" in {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue