forked from OSchip/llvm-project
				
			Custom lower FMA intrinsics to target specific nodes and remove the patterns.
llvm-svn: 162534
This commit is contained in:
		
							parent
							
								
									fe6eb67b12
								
							
						
					
					
						commit
						663d160adb
					
				| 
						 | 
				
			
			@ -10077,6 +10077,78 @@ X86TargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const
 | 
			
		|||
    SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::i32);
 | 
			
		||||
    return DAG.getNode(Opcode, dl, VTs, NewOps.data(), NewOps.size());
 | 
			
		||||
  }
 | 
			
		||||
  case Intrinsic::x86_fma_vfmadd_ps:
 | 
			
		||||
  case Intrinsic::x86_fma_vfmadd_pd:
 | 
			
		||||
  case Intrinsic::x86_fma_vfmsub_ps:
 | 
			
		||||
  case Intrinsic::x86_fma_vfmsub_pd:
 | 
			
		||||
  case Intrinsic::x86_fma_vfnmadd_ps:
 | 
			
		||||
  case Intrinsic::x86_fma_vfnmadd_pd:
 | 
			
		||||
  case Intrinsic::x86_fma_vfnmsub_ps:
 | 
			
		||||
  case Intrinsic::x86_fma_vfnmsub_pd:
 | 
			
		||||
  case Intrinsic::x86_fma_vfmaddsub_ps:
 | 
			
		||||
  case Intrinsic::x86_fma_vfmaddsub_pd:
 | 
			
		||||
  case Intrinsic::x86_fma_vfmsubadd_ps:
 | 
			
		||||
  case Intrinsic::x86_fma_vfmsubadd_pd:
 | 
			
		||||
  case Intrinsic::x86_fma_vfmadd_ps_256:
 | 
			
		||||
  case Intrinsic::x86_fma_vfmadd_pd_256:
 | 
			
		||||
  case Intrinsic::x86_fma_vfmsub_ps_256:
 | 
			
		||||
  case Intrinsic::x86_fma_vfmsub_pd_256:
 | 
			
		||||
  case Intrinsic::x86_fma_vfnmadd_ps_256:
 | 
			
		||||
  case Intrinsic::x86_fma_vfnmadd_pd_256:
 | 
			
		||||
  case Intrinsic::x86_fma_vfnmsub_ps_256:
 | 
			
		||||
  case Intrinsic::x86_fma_vfnmsub_pd_256:
 | 
			
		||||
  case Intrinsic::x86_fma_vfmaddsub_ps_256:
 | 
			
		||||
  case Intrinsic::x86_fma_vfmaddsub_pd_256:
 | 
			
		||||
  case Intrinsic::x86_fma_vfmsubadd_ps_256:
 | 
			
		||||
  case Intrinsic::x86_fma_vfmsubadd_pd_256: {
 | 
			
		||||
    // Only lower intrinsics if FMA is enabled. FMA4 still uses patterns.
 | 
			
		||||
    if (!Subtarget->hasFMA())
 | 
			
		||||
      return SDValue();
 | 
			
		||||
 | 
			
		||||
    unsigned Opc;
 | 
			
		||||
    switch (IntNo) {
 | 
			
		||||
    default: llvm_unreachable("Impossible intrinsic");  // Can't reach here.
 | 
			
		||||
    case Intrinsic::x86_fma_vfmadd_ps:
 | 
			
		||||
    case Intrinsic::x86_fma_vfmadd_pd:
 | 
			
		||||
    case Intrinsic::x86_fma_vfmadd_ps_256:
 | 
			
		||||
    case Intrinsic::x86_fma_vfmadd_pd_256:
 | 
			
		||||
      Opc = X86ISD::FMADD;
 | 
			
		||||
      break;
 | 
			
		||||
    case Intrinsic::x86_fma_vfmsub_ps:
 | 
			
		||||
    case Intrinsic::x86_fma_vfmsub_pd:
 | 
			
		||||
    case Intrinsic::x86_fma_vfmsub_ps_256:
 | 
			
		||||
    case Intrinsic::x86_fma_vfmsub_pd_256:
 | 
			
		||||
      Opc = X86ISD::FMSUB;
 | 
			
		||||
      break;
 | 
			
		||||
    case Intrinsic::x86_fma_vfnmadd_ps:
 | 
			
		||||
    case Intrinsic::x86_fma_vfnmadd_pd:
 | 
			
		||||
    case Intrinsic::x86_fma_vfnmadd_ps_256:
 | 
			
		||||
    case Intrinsic::x86_fma_vfnmadd_pd_256:
 | 
			
		||||
      Opc = X86ISD::FNMADD;
 | 
			
		||||
      break;
 | 
			
		||||
    case Intrinsic::x86_fma_vfnmsub_ps:
 | 
			
		||||
    case Intrinsic::x86_fma_vfnmsub_pd:
 | 
			
		||||
    case Intrinsic::x86_fma_vfnmsub_ps_256:
 | 
			
		||||
    case Intrinsic::x86_fma_vfnmsub_pd_256:
 | 
			
		||||
      Opc = X86ISD::FNMSUB;
 | 
			
		||||
      break;
 | 
			
		||||
    case Intrinsic::x86_fma_vfmaddsub_ps:
 | 
			
		||||
    case Intrinsic::x86_fma_vfmaddsub_pd:
 | 
			
		||||
    case Intrinsic::x86_fma_vfmaddsub_ps_256:
 | 
			
		||||
    case Intrinsic::x86_fma_vfmaddsub_pd_256:
 | 
			
		||||
      Opc = X86ISD::FMADDSUB;
 | 
			
		||||
      break;
 | 
			
		||||
    case Intrinsic::x86_fma_vfmsubadd_ps:
 | 
			
		||||
    case Intrinsic::x86_fma_vfmsubadd_pd:
 | 
			
		||||
    case Intrinsic::x86_fma_vfmsubadd_ps_256:
 | 
			
		||||
    case Intrinsic::x86_fma_vfmsubadd_pd_256:
 | 
			
		||||
      Opc = X86ISD::FMSUBADD;
 | 
			
		||||
      break;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    return DAG.getNode(Opc, dl, Op.getValueType(), Op.getOperand(1),
 | 
			
		||||
                       Op.getOperand(2), Op.getOperand(3));
 | 
			
		||||
  }
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -112,137 +112,6 @@ let ExeDomain = SSEPackedDouble in {
 | 
			
		|||
                               v4f64>, VEX_W;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
let Predicates = [HasFMA] in {
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
 | 
			
		||||
            (VFMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmadd_ps VR128:$src2, VR128:$src1,
 | 
			
		||||
             (memopv4f32 addr:$src3)),
 | 
			
		||||
            (VFMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
 | 
			
		||||
            (VFMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmsub_ps VR128:$src2, VR128:$src1,
 | 
			
		||||
             (memopv4f32 addr:$src3)),
 | 
			
		||||
            (VFMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
 | 
			
		||||
            (VFMADDSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmaddsub_ps VR128:$src2, VR128:$src1,
 | 
			
		||||
             (memopv4f32 addr:$src3)),
 | 
			
		||||
            (VFMADDSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
 | 
			
		||||
            (VFMSUBADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmsubadd_ps VR128:$src2, VR128:$src1,
 | 
			
		||||
             (memopv4f32 addr:$src3)),
 | 
			
		||||
            (VFMSUBADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
 | 
			
		||||
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
 | 
			
		||||
            (VFMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmadd_ps_256 VR256:$src2, VR256:$src1,
 | 
			
		||||
             (memopv8f32 addr:$src3)),
 | 
			
		||||
            (VFMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
 | 
			
		||||
            (VFMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmsub_ps_256 VR256:$src2, VR256:$src1,
 | 
			
		||||
             (memopv8f32 addr:$src3)),
 | 
			
		||||
            (VFMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
 | 
			
		||||
            (VFMADDSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmaddsub_ps_256 VR256:$src2, VR256:$src1,
 | 
			
		||||
             (memopv8f32 addr:$src3)),
 | 
			
		||||
            (VFMADDSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
 | 
			
		||||
            (VFMSUBADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmsubadd_ps_256 VR256:$src2, VR256:$src1,
 | 
			
		||||
             (memopv8f32 addr:$src3)),
 | 
			
		||||
            (VFMSUBADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
 | 
			
		||||
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
 | 
			
		||||
            (VFMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmadd_pd VR128:$src2, VR128:$src1,
 | 
			
		||||
             (memopv2f64 addr:$src3)),
 | 
			
		||||
            (VFMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
 | 
			
		||||
            (VFMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmsub_pd VR128:$src2, VR128:$src1,
 | 
			
		||||
             (memopv2f64 addr:$src3)),
 | 
			
		||||
            (VFMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
 | 
			
		||||
            (VFMADDSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmaddsub_pd VR128:$src2, VR128:$src1,
 | 
			
		||||
             (memopv2f64 addr:$src3)),
 | 
			
		||||
            (VFMADDSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
 | 
			
		||||
            (VFMSUBADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmsubadd_pd VR128:$src2, VR128:$src1,
 | 
			
		||||
             (memopv2f64 addr:$src3)),
 | 
			
		||||
            (VFMSUBADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
 | 
			
		||||
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
 | 
			
		||||
            (VFMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmadd_pd_256 VR256:$src2, VR256:$src1,
 | 
			
		||||
             (memopv4f64 addr:$src3)),
 | 
			
		||||
            (VFMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
 | 
			
		||||
            (VFMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmsub_pd_256 VR256:$src2, VR256:$src1,
 | 
			
		||||
             (memopv4f64 addr:$src3)),
 | 
			
		||||
            (VFMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
 | 
			
		||||
            (VFMADDSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmaddsub_pd_256 VR256:$src2, VR256:$src1,
 | 
			
		||||
             (memopv4f64 addr:$src3)),
 | 
			
		||||
            (VFMADDSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
 | 
			
		||||
            (VFMSUBADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfmsubadd_pd_256 VR256:$src2, VR256:$src1,
 | 
			
		||||
             (memopv4f64 addr:$src3)),
 | 
			
		||||
            (VFMSUBADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
 | 
			
		||||
 | 
			
		||||
  def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1, VR128:$src3),
 | 
			
		||||
            (VFNMADDPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfnmadd_ps VR128:$src2, VR128:$src1,
 | 
			
		||||
             (memopv4f32 addr:$src3)),
 | 
			
		||||
            (VFNMADDPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1, VR128:$src3),
 | 
			
		||||
            (VFNMSUBPSr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfnmsub_ps VR128:$src2, VR128:$src1,
 | 
			
		||||
             (memopv4f32 addr:$src3)),
 | 
			
		||||
            (VFNMSUBPSr213m VR128:$src1, VR128:$src2, addr:$src3)>;
 | 
			
		||||
 | 
			
		||||
  def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
 | 
			
		||||
            (VFNMADDPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfnmadd_ps_256 VR256:$src2, VR256:$src1,
 | 
			
		||||
             (memopv8f32 addr:$src3)),
 | 
			
		||||
            (VFNMADDPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1, VR256:$src3),
 | 
			
		||||
            (VFNMSUBPSr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfnmsub_ps_256 VR256:$src2, VR256:$src1,
 | 
			
		||||
             (memopv8f32 addr:$src3)),
 | 
			
		||||
            (VFNMSUBPSr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
 | 
			
		||||
 | 
			
		||||
  def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1, VR128:$src3),
 | 
			
		||||
            (VFNMADDPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfnmadd_pd VR128:$src2, VR128:$src1,
 | 
			
		||||
             (memopv2f64 addr:$src3)),
 | 
			
		||||
            (VFNMADDPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1, VR128:$src3),
 | 
			
		||||
            (VFNMSUBPDr213r VR128:$src1, VR128:$src2, VR128:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfnmsub_pd VR128:$src2, VR128:$src1,
 | 
			
		||||
             (memopv2f64 addr:$src3)),
 | 
			
		||||
            (VFNMSUBPDr213m VR128:$src1, VR128:$src2, addr:$src3)>;
 | 
			
		||||
 | 
			
		||||
  def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
 | 
			
		||||
            (VFNMADDPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfnmadd_pd_256 VR256:$src2, VR256:$src1,
 | 
			
		||||
             (memopv4f64 addr:$src3)),
 | 
			
		||||
            (VFNMADDPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1, VR256:$src3),
 | 
			
		||||
            (VFNMSUBPDr213rY VR256:$src1, VR256:$src2, VR256:$src3)>;
 | 
			
		||||
  def : Pat<(int_x86_fma_vfnmsub_pd_256 VR256:$src2, VR256:$src1,
 | 
			
		||||
             (memopv4f64 addr:$src3)),
 | 
			
		||||
            (VFNMSUBPDr213mY VR256:$src1, VR256:$src2, addr:$src3)>;
 | 
			
		||||
 | 
			
		||||
} // Predicates = [HasFMA]
 | 
			
		||||
 | 
			
		||||
let Constraints = "$src1 = $dst" in {
 | 
			
		||||
multiclass fma3s_rm<bits<8> opc, string OpcodeStr, X86MemOperand x86memop,
 | 
			
		||||
                    RegisterClass RC, ValueType OpVT, PatFrag mem_frag,
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -205,7 +205,7 @@ public:
 | 
			
		|||
  bool hasAES() const { return HasAES; }
 | 
			
		||||
  bool hasPCLMUL() const { return HasPCLMUL; }
 | 
			
		||||
  bool hasFMA() const { return HasFMA; }
 | 
			
		||||
  // FIXME: Favor FMA when both are enabled. Is this right?
 | 
			
		||||
  // FIXME: Favor FMA when both are enabled. Is this the right thing to do?
 | 
			
		||||
  bool hasFMA4() const { return HasFMA4 && !HasFMA; }
 | 
			
		||||
  bool hasXOP() const { return HasXOP; }
 | 
			
		||||
  bool hasMOVBE() const { return HasMOVBE; }
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
		Reference in New Issue