forked from OSchip/llvm-project
				
			
							parent
							
								
									954eae0ed4
								
							
						
					
					
						commit
						cc3c2b3946
					
				| 
						 | 
					@ -2374,6 +2374,12 @@ const char* AMDGPUTargetLowering::getTargetNodeName(unsigned Opcode) const {
 | 
				
			||||||
  NODE_NAME_CASE(FMIN_LEGACY)
 | 
					  NODE_NAME_CASE(FMIN_LEGACY)
 | 
				
			||||||
  NODE_NAME_CASE(SMIN)
 | 
					  NODE_NAME_CASE(SMIN)
 | 
				
			||||||
  NODE_NAME_CASE(UMIN)
 | 
					  NODE_NAME_CASE(UMIN)
 | 
				
			||||||
 | 
					  NODE_NAME_CASE(FMAX3)
 | 
				
			||||||
 | 
					  NODE_NAME_CASE(SMAX3)
 | 
				
			||||||
 | 
					  NODE_NAME_CASE(UMAX3)
 | 
				
			||||||
 | 
					  NODE_NAME_CASE(FMIN3)
 | 
				
			||||||
 | 
					  NODE_NAME_CASE(SMIN3)
 | 
				
			||||||
 | 
					  NODE_NAME_CASE(UMIN3)
 | 
				
			||||||
  NODE_NAME_CASE(URECIP)
 | 
					  NODE_NAME_CASE(URECIP)
 | 
				
			||||||
  NODE_NAME_CASE(DIV_SCALE)
 | 
					  NODE_NAME_CASE(DIV_SCALE)
 | 
				
			||||||
  NODE_NAME_CASE(DIV_FMAS)
 | 
					  NODE_NAME_CASE(DIV_FMAS)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -210,6 +210,12 @@ enum {
 | 
				
			||||||
  FMIN_LEGACY,
 | 
					  FMIN_LEGACY,
 | 
				
			||||||
  SMIN,
 | 
					  SMIN,
 | 
				
			||||||
  UMIN,
 | 
					  UMIN,
 | 
				
			||||||
 | 
					  FMAX3,
 | 
				
			||||||
 | 
					  SMAX3,
 | 
				
			||||||
 | 
					  UMAX3,
 | 
				
			||||||
 | 
					  FMIN3,
 | 
				
			||||||
 | 
					  SMIN3,
 | 
				
			||||||
 | 
					  UMIN3,
 | 
				
			||||||
  URECIP,
 | 
					  URECIP,
 | 
				
			||||||
  DIV_SCALE,
 | 
					  DIV_SCALE,
 | 
				
			||||||
  DIV_FMAS,
 | 
					  DIV_FMAS,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -84,7 +84,7 @@ def AMDGPUfmin_legacy : SDNode<"AMDGPUISD::FMIN_LEGACY", SDTFPBinOp,
 | 
				
			||||||
  [SDNPAssociative]
 | 
					  [SDNPAssociative]
 | 
				
			||||||
>;
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// out = min(a, b) a snd b are signed ints
 | 
					// out = min(a, b) a and b are signed ints
 | 
				
			||||||
def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
 | 
					def AMDGPUsmin : SDNode<"AMDGPUISD::SMIN", SDTIntBinOp,
 | 
				
			||||||
  [SDNPCommutative, SDNPAssociative]
 | 
					  [SDNPCommutative, SDNPAssociative]
 | 
				
			||||||
>;
 | 
					>;
 | 
				
			||||||
| 
						 | 
					@ -94,6 +94,37 @@ def AMDGPUumin : SDNode<"AMDGPUISD::UMIN", SDTIntBinOp,
 | 
				
			||||||
  [SDNPCommutative, SDNPAssociative]
 | 
					  [SDNPCommutative, SDNPAssociative]
 | 
				
			||||||
>;
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// FIXME: TableGen doesn't like commutative instructions with more
 | 
				
			||||||
 | 
					// than 2 operands.
 | 
				
			||||||
 | 
					// out = max(a, b, c) a, b and c are floats
 | 
				
			||||||
 | 
					def AMDGPUfmax3 : SDNode<"AMDGPUISD::FMAX3", SDTFPTernaryOp,
 | 
				
			||||||
 | 
					  [/*SDNPCommutative, SDNPAssociative*/]
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// out = max(a, b, c) a, b, and c are signed ints
 | 
				
			||||||
 | 
					def AMDGPUsmax3 : SDNode<"AMDGPUISD::SMAX3", AMDGPUDTIntTernaryOp,
 | 
				
			||||||
 | 
					  [/*SDNPCommutative, SDNPAssociative*/]
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// out = max(a, b, c) a, b and c are unsigned ints
 | 
				
			||||||
 | 
					def AMDGPUumax3 : SDNode<"AMDGPUISD::UMAX3", AMDGPUDTIntTernaryOp,
 | 
				
			||||||
 | 
					  [/*SDNPCommutative, SDNPAssociative*/]
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// out = min(a, b, c) a, b and c are floats
 | 
				
			||||||
 | 
					def AMDGPUfmin3 : SDNode<"AMDGPUISD::FMIN3", SDTFPTernaryOp,
 | 
				
			||||||
 | 
					  [/*SDNPCommutative, SDNPAssociative*/]
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// out = min(a, b, c) a, b and c are signed ints
 | 
				
			||||||
 | 
					def AMDGPUsmin3 : SDNode<"AMDGPUISD::SMIN3", AMDGPUDTIntTernaryOp,
 | 
				
			||||||
 | 
					  [/*SDNPCommutative, SDNPAssociative*/]
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// out = min(a, b) a and b are unsigned ints
 | 
				
			||||||
 | 
					def AMDGPUumin3 : SDNode<"AMDGPUISD::UMIN3", AMDGPUDTIntTernaryOp,
 | 
				
			||||||
 | 
					  [/*SDNPCommutative, SDNPAssociative*/]
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
 | 
					def AMDGPUcvt_f32_ubyte0 : SDNode<"AMDGPUISD::CVT_F32_UBYTE0",
 | 
				
			||||||
  SDTIntToFPOp, []>;
 | 
					  SDTIntToFPOp, []>;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -231,6 +231,8 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  setTargetDAGCombine(ISD::FADD);
 | 
					  setTargetDAGCombine(ISD::FADD);
 | 
				
			||||||
  setTargetDAGCombine(ISD::FSUB);
 | 
					  setTargetDAGCombine(ISD::FSUB);
 | 
				
			||||||
 | 
					  setTargetDAGCombine(ISD::FMINNUM);
 | 
				
			||||||
 | 
					  setTargetDAGCombine(ISD::FMAXNUM);
 | 
				
			||||||
  setTargetDAGCombine(ISD::SELECT_CC);
 | 
					  setTargetDAGCombine(ISD::SELECT_CC);
 | 
				
			||||||
  setTargetDAGCombine(ISD::SETCC);
 | 
					  setTargetDAGCombine(ISD::SETCC);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1314,6 +1316,61 @@ SDValue SITargetLowering::performSHLPtrCombine(SDNode *N,
 | 
				
			||||||
  return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset);
 | 
					  return DAG.getNode(ISD::ADD, SL, VT, ShlX, COffset);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static unsigned minMaxOpcToMin3Max3Opc(unsigned Opc) {
 | 
				
			||||||
 | 
					  switch (Opc) {
 | 
				
			||||||
 | 
					  case ISD::FMAXNUM:
 | 
				
			||||||
 | 
					    return AMDGPUISD::FMAX3;
 | 
				
			||||||
 | 
					  case AMDGPUISD::SMAX:
 | 
				
			||||||
 | 
					    return AMDGPUISD::SMAX3;
 | 
				
			||||||
 | 
					  case AMDGPUISD::UMAX:
 | 
				
			||||||
 | 
					    return AMDGPUISD::UMAX3;
 | 
				
			||||||
 | 
					  case ISD::FMINNUM:
 | 
				
			||||||
 | 
					    return AMDGPUISD::FMIN3;
 | 
				
			||||||
 | 
					  case AMDGPUISD::SMIN:
 | 
				
			||||||
 | 
					    return AMDGPUISD::SMIN3;
 | 
				
			||||||
 | 
					  case AMDGPUISD::UMIN:
 | 
				
			||||||
 | 
					    return AMDGPUISD::UMIN3;
 | 
				
			||||||
 | 
					  default:
 | 
				
			||||||
 | 
					    llvm_unreachable("Not a min/max opcode");
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SDValue SITargetLowering::performMin3Max3Combine(SDNode *N,
 | 
				
			||||||
 | 
					                                                 DAGCombinerInfo &DCI) const {
 | 
				
			||||||
 | 
					  SelectionDAG &DAG = DCI.DAG;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  unsigned Opc = N->getOpcode();
 | 
				
			||||||
 | 
					  SDValue Op0 = N->getOperand(0);
 | 
				
			||||||
 | 
					  SDValue Op1 = N->getOperand(1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Only do this if the inner op has one use since this will just increases
 | 
				
			||||||
 | 
					  // register pressure for no benefit.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // max(max(a, b), c)
 | 
				
			||||||
 | 
					  if (Op0.getOpcode() == Opc && Op0.hasOneUse()) {
 | 
				
			||||||
 | 
					    SDLoc DL(N);
 | 
				
			||||||
 | 
					    return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
 | 
				
			||||||
 | 
					                       DL,
 | 
				
			||||||
 | 
					                       N->getValueType(0),
 | 
				
			||||||
 | 
					                       Op0.getOperand(0),
 | 
				
			||||||
 | 
					                       Op0.getOperand(1),
 | 
				
			||||||
 | 
					                       Op1);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // max(a, max(b, c))
 | 
				
			||||||
 | 
					  if (Op1.getOpcode() == Opc && Op1.hasOneUse()) {
 | 
				
			||||||
 | 
					    SDLoc DL(N);
 | 
				
			||||||
 | 
					    return DAG.getNode(minMaxOpcToMin3Max3Opc(Opc),
 | 
				
			||||||
 | 
					                       DL,
 | 
				
			||||||
 | 
					                       N->getValueType(0),
 | 
				
			||||||
 | 
					                       Op0,
 | 
				
			||||||
 | 
					                       Op1.getOperand(0),
 | 
				
			||||||
 | 
					                       Op1.getOperand(1));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return SDValue();
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
 | 
					SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
 | 
				
			||||||
                                            DAGCombinerInfo &DCI) const {
 | 
					                                            DAGCombinerInfo &DCI) const {
 | 
				
			||||||
  SelectionDAG &DAG = DCI.DAG;
 | 
					  SelectionDAG &DAG = DCI.DAG;
 | 
				
			||||||
| 
						 | 
					@ -1341,6 +1398,17 @@ SDValue SITargetLowering::PerformDAGCombine(SDNode *N,
 | 
				
			||||||
      }
 | 
					      }
 | 
				
			||||||
      break;
 | 
					      break;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					  case ISD::FMAXNUM: // TODO: What about fmax_legacy?
 | 
				
			||||||
 | 
					  case ISD::FMINNUM:
 | 
				
			||||||
 | 
					  case AMDGPUISD::SMAX:
 | 
				
			||||||
 | 
					  case AMDGPUISD::SMIN:
 | 
				
			||||||
 | 
					  case AMDGPUISD::UMAX:
 | 
				
			||||||
 | 
					  case AMDGPUISD::UMIN: {
 | 
				
			||||||
 | 
					    if (DCI.getDAGCombineLevel() >= AfterLegalizeDAG &&
 | 
				
			||||||
 | 
					        getTargetMachine().getOptLevel() > CodeGenOpt::None)
 | 
				
			||||||
 | 
					      return performMin3Max3Combine(N, DCI);
 | 
				
			||||||
 | 
					    break;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  case AMDGPUISD::CVT_F32_UBYTE0:
 | 
					  case AMDGPUISD::CVT_F32_UBYTE0:
 | 
				
			||||||
  case AMDGPUISD::CVT_F32_UBYTE1:
 | 
					  case AMDGPUISD::CVT_F32_UBYTE1:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -59,6 +59,8 @@ class SITargetLowering : public AMDGPUTargetLowering {
 | 
				
			||||||
                               unsigned AS,
 | 
					                               unsigned AS,
 | 
				
			||||||
                               DAGCombinerInfo &DCI) const;
 | 
					                               DAGCombinerInfo &DCI) const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SDValue performMin3Max3Combine(SDNode *N, DAGCombinerInfo &DCI) const;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
public:
 | 
					public:
 | 
				
			||||||
  SITargetLowering(TargetMachine &tm);
 | 
					  SITargetLowering(TargetMachine &tm);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1573,15 +1573,27 @@ defm V_ALIGNBYTE_B32 : VOP3Inst <vop3<0x14f>, "v_alignbyte_b32",
 | 
				
			||||||
>;
 | 
					>;
 | 
				
			||||||
defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
 | 
					defm V_MULLIT_F32 : VOP3Inst <vop3<0x150>, "v_mullit_f32",
 | 
				
			||||||
  VOP_F32_F32_F32_F32>;
 | 
					  VOP_F32_F32_F32_F32>;
 | 
				
			||||||
////def V_MIN3_F32 : VOP3_MIN3 <0x00000151, "v_min3_f32", []>;
 | 
					defm V_MIN3_F32 : VOP3Inst <vop3<0x151>, "v_min3_f32",
 | 
				
			||||||
////def V_MIN3_I32 : VOP3_MIN3 <0x00000152, "v_min3_i32", []>;
 | 
					  VOP_F32_F32_F32_F32, AMDGPUfmin3>;
 | 
				
			||||||
////def V_MIN3_U32 : VOP3_MIN3 <0x00000153, "v_min3_u32", []>;
 | 
					
 | 
				
			||||||
////def V_MAX3_F32 : VOP3_MAX3 <0x00000154, "v_max3_f32", []>;
 | 
					defm V_MIN3_I32 : VOP3Inst <vop3<0x152>, "v_min3_i32",
 | 
				
			||||||
////def V_MAX3_I32 : VOP3_MAX3 <0x00000155, "v_max3_i32", []>;
 | 
					  VOP_I32_I32_I32_I32, AMDGPUsmin3
 | 
				
			||||||
////def V_MAX3_U32 : VOP3_MAX3 <0x00000156, "v_max3_u32", []>;
 | 
					>;
 | 
				
			||||||
////def V_MED3_F32 : VOP3_MED3 <0x00000157, "v_med3_f32", []>;
 | 
					defm V_MIN3_U32 : VOP3Inst <vop3<0x153>, "v_min3_u32",
 | 
				
			||||||
////def V_MED3_I32 : VOP3_MED3 <0x00000158, "v_med3_i32", []>;
 | 
					  VOP_I32_I32_I32_I32, AMDGPUumin3
 | 
				
			||||||
////def V_MED3_U32 : VOP3_MED3 <0x00000159, "v_med3_u32", []>;
 | 
					>;
 | 
				
			||||||
 | 
					defm V_MAX3_F32 : VOP3Inst <vop3<0x154>, "v_max3_f32",
 | 
				
			||||||
 | 
					  VOP_F32_F32_F32_F32, AMDGPUfmax3
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					defm V_MAX3_I32 : VOP3Inst <vop3<0x155>, "v_max3_i32",
 | 
				
			||||||
 | 
					  VOP_I32_I32_I32_I32, AMDGPUsmax3
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					defm V_MAX3_U32 : VOP3Inst <vop3<0x156>, "v_max3_u32",
 | 
				
			||||||
 | 
					  VOP_I32_I32_I32_I32, AMDGPUumax3
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					//def V_MED3_F32 : VOP3_MED3 <0x00000157, "v_med3_f32", []>;
 | 
				
			||||||
 | 
					//def V_MED3_I32 : VOP3_MED3 <0x00000158, "v_med3_i32", []>;
 | 
				
			||||||
 | 
					//def V_MED3_U32 : VOP3_MED3 <0x00000159, "v_med3_u32", []>;
 | 
				
			||||||
//def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>;
 | 
					//def V_SAD_U8 : VOP3_U8 <0x0000015a, "v_sad_u8", []>;
 | 
				
			||||||
//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "v_sad_hi_u8", []>;
 | 
					//def V_SAD_HI_U8 : VOP3_U8 <0x0000015b, "v_sad_hi_u8", []>;
 | 
				
			||||||
//def V_SAD_U16 : VOP3_U16 <0x0000015c, "v_sad_u16", []>;
 | 
					//def V_SAD_U16 : VOP3_U16 <0x0000015c, "v_sad_u16", []>;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,38 @@
 | 
				
			||||||
 | 
					; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					declare float @llvm.maxnum.f32(float, float) nounwind readnone
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; SI-LABEL: {{^}}test_fmax3_olt_0:
 | 
				
			||||||
 | 
					; SI: buffer_load_dword [[REGA:v[0-9]+]]
 | 
				
			||||||
 | 
					; SI: buffer_load_dword [[REGB:v[0-9]+]]
 | 
				
			||||||
 | 
					; SI: buffer_load_dword [[REGC:v[0-9]+]]
 | 
				
			||||||
 | 
					; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
 | 
				
			||||||
 | 
					; SI: buffer_store_dword [[RESULT]],
 | 
				
			||||||
 | 
					; SI: s_endpgm
 | 
				
			||||||
 | 
					define void @test_fmax3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
 | 
				
			||||||
 | 
					  %a = load float addrspace(1)* %aptr, align 4
 | 
				
			||||||
 | 
					  %b = load float addrspace(1)* %bptr, align 4
 | 
				
			||||||
 | 
					  %c = load float addrspace(1)* %cptr, align 4
 | 
				
			||||||
 | 
					  %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
 | 
				
			||||||
 | 
					  %f1 = call float @llvm.maxnum.f32(float %f0, float %c) nounwind readnone
 | 
				
			||||||
 | 
					  store float %f1, float addrspace(1)* %out, align 4
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; Commute operand of second fmax
 | 
				
			||||||
 | 
					; SI-LABEL: {{^}}test_fmax3_olt_1:
 | 
				
			||||||
 | 
					; SI: buffer_load_dword [[REGA:v[0-9]+]]
 | 
				
			||||||
 | 
					; SI: buffer_load_dword [[REGB:v[0-9]+]]
 | 
				
			||||||
 | 
					; SI: buffer_load_dword [[REGC:v[0-9]+]]
 | 
				
			||||||
 | 
					; SI: v_max3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
 | 
				
			||||||
 | 
					; SI: buffer_store_dword [[RESULT]],
 | 
				
			||||||
 | 
					; SI: s_endpgm
 | 
				
			||||||
 | 
					define void @test_fmax3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
 | 
				
			||||||
 | 
					  %a = load float addrspace(1)* %aptr, align 4
 | 
				
			||||||
 | 
					  %b = load float addrspace(1)* %bptr, align 4
 | 
				
			||||||
 | 
					  %c = load float addrspace(1)* %cptr, align 4
 | 
				
			||||||
 | 
					  %f0 = call float @llvm.maxnum.f32(float %a, float %b) nounwind readnone
 | 
				
			||||||
 | 
					  %f1 = call float @llvm.maxnum.f32(float %c, float %f0) nounwind readnone
 | 
				
			||||||
 | 
					  store float %f1, float addrspace(1)* %out, align 4
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,38 @@
 | 
				
			||||||
 | 
					; RUN: llc -march=r600 -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					declare float @llvm.minnum.f32(float, float) nounwind readnone
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; SI-LABEL: {{^}}test_fmin3_olt_0:
 | 
				
			||||||
 | 
					; SI: buffer_load_dword [[REGA:v[0-9]+]]
 | 
				
			||||||
 | 
					; SI: buffer_load_dword [[REGB:v[0-9]+]]
 | 
				
			||||||
 | 
					; SI: buffer_load_dword [[REGC:v[0-9]+]]
 | 
				
			||||||
 | 
					; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
 | 
				
			||||||
 | 
					; SI: buffer_store_dword [[RESULT]],
 | 
				
			||||||
 | 
					; SI: s_endpgm
 | 
				
			||||||
 | 
					define void @test_fmin3_olt_0(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
 | 
				
			||||||
 | 
					  %a = load float addrspace(1)* %aptr, align 4
 | 
				
			||||||
 | 
					  %b = load float addrspace(1)* %bptr, align 4
 | 
				
			||||||
 | 
					  %c = load float addrspace(1)* %cptr, align 4
 | 
				
			||||||
 | 
					  %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
 | 
				
			||||||
 | 
					  %f1 = call float @llvm.minnum.f32(float %f0, float %c) nounwind readnone
 | 
				
			||||||
 | 
					  store float %f1, float addrspace(1)* %out, align 4
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; Commute operand of second fmin
 | 
				
			||||||
 | 
					; SI-LABEL: {{^}}test_fmin3_olt_1:
 | 
				
			||||||
 | 
					; SI: buffer_load_dword [[REGA:v[0-9]+]]
 | 
				
			||||||
 | 
					; SI: buffer_load_dword [[REGB:v[0-9]+]]
 | 
				
			||||||
 | 
					; SI: buffer_load_dword [[REGC:v[0-9]+]]
 | 
				
			||||||
 | 
					; SI: v_min3_f32 [[RESULT:v[0-9]+]], [[REGC]], [[REGB]], [[REGA]]
 | 
				
			||||||
 | 
					; SI: buffer_store_dword [[RESULT]],
 | 
				
			||||||
 | 
					; SI: s_endpgm
 | 
				
			||||||
 | 
					define void @test_fmin3_olt_1(float addrspace(1)* %out, float addrspace(1)* %aptr, float addrspace(1)* %bptr, float addrspace(1)* %cptr) nounwind {
 | 
				
			||||||
 | 
					  %a = load float addrspace(1)* %aptr, align 4
 | 
				
			||||||
 | 
					  %b = load float addrspace(1)* %bptr, align 4
 | 
				
			||||||
 | 
					  %c = load float addrspace(1)* %cptr, align 4
 | 
				
			||||||
 | 
					  %f0 = call float @llvm.minnum.f32(float %a, float %b) nounwind readnone
 | 
				
			||||||
 | 
					  %f1 = call float @llvm.minnum.f32(float %c, float %f0) nounwind readnone
 | 
				
			||||||
 | 
					  store float %f1, float addrspace(1)* %out, align 4
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,41 @@
 | 
				
			||||||
 | 
					; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; FUNC-LABEL: @v_test_imax3_sgt_i32
 | 
				
			||||||
 | 
					; SI: v_max3_i32
 | 
				
			||||||
 | 
					define void @v_test_imax3_sgt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
 | 
				
			||||||
 | 
					  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
 | 
				
			||||||
 | 
					  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
 | 
				
			||||||
 | 
					  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
 | 
				
			||||||
 | 
					  %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
 | 
				
			||||||
 | 
					  %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
 | 
				
			||||||
 | 
					  %a = load i32 addrspace(1)* %gep0, align 4
 | 
				
			||||||
 | 
					  %b = load i32 addrspace(1)* %gep1, align 4
 | 
				
			||||||
 | 
					  %c = load i32 addrspace(1)* %gep2, align 4
 | 
				
			||||||
 | 
					  %icmp0 = icmp sgt i32 %a, %b
 | 
				
			||||||
 | 
					  %i0 = select i1 %icmp0, i32 %a, i32 %b
 | 
				
			||||||
 | 
					  %icmp1 = icmp sgt i32 %i0, %c
 | 
				
			||||||
 | 
					  %i1 = select i1 %icmp1, i32 %i0, i32 %c
 | 
				
			||||||
 | 
					  store i32 %i1, i32 addrspace(1)* %out, align 4
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; FUNC-LABEL: @v_test_umax3_ugt_i32
 | 
				
			||||||
 | 
					; SI: v_max3_u32
 | 
				
			||||||
 | 
					define void @v_test_umax3_ugt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
 | 
				
			||||||
 | 
					  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
 | 
				
			||||||
 | 
					  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
 | 
				
			||||||
 | 
					  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
 | 
				
			||||||
 | 
					  %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
 | 
				
			||||||
 | 
					  %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
 | 
				
			||||||
 | 
					  %a = load i32 addrspace(1)* %gep0, align 4
 | 
				
			||||||
 | 
					  %b = load i32 addrspace(1)* %gep1, align 4
 | 
				
			||||||
 | 
					  %c = load i32 addrspace(1)* %gep2, align 4
 | 
				
			||||||
 | 
					  %icmp0 = icmp ugt i32 %a, %b
 | 
				
			||||||
 | 
					  %i0 = select i1 %icmp0, i32 %a, i32 %b
 | 
				
			||||||
 | 
					  %icmp1 = icmp ugt i32 %i0, %c
 | 
				
			||||||
 | 
					  %i1 = select i1 %icmp1, i32 %i0, i32 %c
 | 
				
			||||||
 | 
					  store i32 %i1, i32 addrspace(1)* %out, align 4
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,111 @@
 | 
				
			||||||
 | 
					; RUN: llc -march=r600 -mcpu=SI < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; FUNC-LABEL: @v_test_imin3_slt_i32
 | 
				
			||||||
 | 
					; SI: v_min3_i32
 | 
				
			||||||
 | 
					define void @v_test_imin3_slt_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
 | 
				
			||||||
 | 
					  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
 | 
				
			||||||
 | 
					  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
 | 
				
			||||||
 | 
					  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
 | 
				
			||||||
 | 
					  %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
 | 
				
			||||||
 | 
					  %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
 | 
				
			||||||
 | 
					  %a = load i32 addrspace(1)* %gep0, align 4
 | 
				
			||||||
 | 
					  %b = load i32 addrspace(1)* %gep1, align 4
 | 
				
			||||||
 | 
					  %c = load i32 addrspace(1)* %gep2, align 4
 | 
				
			||||||
 | 
					  %icmp0 = icmp slt i32 %a, %b
 | 
				
			||||||
 | 
					  %i0 = select i1 %icmp0, i32 %a, i32 %b
 | 
				
			||||||
 | 
					  %icmp1 = icmp slt i32 %i0, %c
 | 
				
			||||||
 | 
					  %i1 = select i1 %icmp1, i32 %i0, i32 %c
 | 
				
			||||||
 | 
					  store i32 %i1, i32 addrspace(1)* %outgep, align 4
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; FUNC-LABEL: @v_test_umin3_ult_i32
 | 
				
			||||||
 | 
					; SI: v_min3_u32
 | 
				
			||||||
 | 
					define void @v_test_umin3_ult_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
 | 
				
			||||||
 | 
					  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
 | 
				
			||||||
 | 
					  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
 | 
				
			||||||
 | 
					  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
 | 
				
			||||||
 | 
					  %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
 | 
				
			||||||
 | 
					  %outgep = getelementptr i32 addrspace(1)* %out, i32 %tid
 | 
				
			||||||
 | 
					  %a = load i32 addrspace(1)* %gep0, align 4
 | 
				
			||||||
 | 
					  %b = load i32 addrspace(1)* %gep1, align 4
 | 
				
			||||||
 | 
					  %c = load i32 addrspace(1)* %gep2, align 4
 | 
				
			||||||
 | 
					  %icmp0 = icmp ult i32 %a, %b
 | 
				
			||||||
 | 
					  %i0 = select i1 %icmp0, i32 %a, i32 %b
 | 
				
			||||||
 | 
					  %icmp1 = icmp ult i32 %i0, %c
 | 
				
			||||||
 | 
					  %i1 = select i1 %icmp1, i32 %i0, i32 %c
 | 
				
			||||||
 | 
					  store i32 %i1, i32 addrspace(1)* %outgep, align 4
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; FUNC-LABEL: @v_test_umin_umin_umin
 | 
				
			||||||
 | 
					; SI: v_min_i32
 | 
				
			||||||
 | 
					; SI: v_min3_i32
 | 
				
			||||||
 | 
					define void @v_test_umin_umin_umin(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
 | 
				
			||||||
 | 
					  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
 | 
				
			||||||
 | 
					  %tid2 = mul i32 %tid, 2
 | 
				
			||||||
 | 
					  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
 | 
				
			||||||
 | 
					  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
 | 
				
			||||||
 | 
					  %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  %gep3 = getelementptr i32 addrspace(1)* %aptr, i32 %tid2
 | 
				
			||||||
 | 
					  %gep4 = getelementptr i32 addrspace(1)* %bptr, i32 %tid2
 | 
				
			||||||
 | 
					  %gep5 = getelementptr i32 addrspace(1)* %cptr, i32 %tid2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  %outgep0 = getelementptr i32 addrspace(1)* %out, i32 %tid
 | 
				
			||||||
 | 
					  %outgep1 = getelementptr i32 addrspace(1)* %out, i32 %tid2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  %a = load i32 addrspace(1)* %gep0, align 4
 | 
				
			||||||
 | 
					  %b = load i32 addrspace(1)* %gep1, align 4
 | 
				
			||||||
 | 
					  %c = load i32 addrspace(1)* %gep2, align 4
 | 
				
			||||||
 | 
					  %d = load i32 addrspace(1)* %gep3, align 4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  %icmp0 = icmp slt i32 %a, %b
 | 
				
			||||||
 | 
					  %i0 = select i1 %icmp0, i32 %a, i32 %b
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  %icmp1 = icmp slt i32 %c, %d
 | 
				
			||||||
 | 
					  %i1 = select i1 %icmp1, i32 %c, i32 %d
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  %icmp2 = icmp slt i32 %i0, %i1
 | 
				
			||||||
 | 
					  %i2 = select i1 %icmp2, i32 %i0, i32 %i1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  store i32 %i2, i32 addrspace(1)* %outgep1, align 4
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; FUNC-LABEL: @v_test_umin3_2_uses
 | 
				
			||||||
 | 
					; SI-NOT: v_min3
 | 
				
			||||||
 | 
					define void @v_test_umin3_2_uses(i32 addrspace(1)* %out, i32 addrspace(1)* %aptr, i32 addrspace(1)* %bptr, i32 addrspace(1)* %cptr) nounwind {
 | 
				
			||||||
 | 
					  %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone
 | 
				
			||||||
 | 
					  %tid2 = mul i32 %tid, 2
 | 
				
			||||||
 | 
					  %gep0 = getelementptr i32 addrspace(1)* %aptr, i32 %tid
 | 
				
			||||||
 | 
					  %gep1 = getelementptr i32 addrspace(1)* %bptr, i32 %tid
 | 
				
			||||||
 | 
					  %gep2 = getelementptr i32 addrspace(1)* %cptr, i32 %tid
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  %gep3 = getelementptr i32 addrspace(1)* %aptr, i32 %tid2
 | 
				
			||||||
 | 
					  %gep4 = getelementptr i32 addrspace(1)* %bptr, i32 %tid2
 | 
				
			||||||
 | 
					  %gep5 = getelementptr i32 addrspace(1)* %cptr, i32 %tid2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  %outgep0 = getelementptr i32 addrspace(1)* %out, i32 %tid
 | 
				
			||||||
 | 
					  %outgep1 = getelementptr i32 addrspace(1)* %out, i32 %tid2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  %a = load i32 addrspace(1)* %gep0, align 4
 | 
				
			||||||
 | 
					  %b = load i32 addrspace(1)* %gep1, align 4
 | 
				
			||||||
 | 
					  %c = load i32 addrspace(1)* %gep2, align 4
 | 
				
			||||||
 | 
					  %d = load i32 addrspace(1)* %gep3, align 4
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  %icmp0 = icmp slt i32 %a, %b
 | 
				
			||||||
 | 
					  %i0 = select i1 %icmp0, i32 %a, i32 %b
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  %icmp1 = icmp slt i32 %c, %d
 | 
				
			||||||
 | 
					  %i1 = select i1 %icmp1, i32 %c, i32 %d
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  %icmp2 = icmp slt i32 %i0, %c
 | 
				
			||||||
 | 
					  %i2 = select i1 %icmp2, i32 %i0, i32 %c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  store i32 %i2, i32 addrspace(1)* %outgep0, align 4
 | 
				
			||||||
 | 
					  store i32 %i0, i32 addrspace(1)* %outgep1, align 4
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Loading…
	
		Reference in New Issue