forked from OSchip/llvm-project
				
			R600: Swap the legality of rotl and rotr
The hardware supports rotr and not rotl. llvm-svn: 182285
This commit is contained in:
		
							parent
							
								
									1cfd7a50bb
								
							
						
					
					
						commit
						5643c4ac72
					
				| 
						 | 
					@ -46,6 +46,9 @@ AMDGPUTargetLowering::AMDGPUTargetLowering(TargetMachine &TM) :
 | 
				
			||||||
  setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
 | 
					  setOperationAction(ISD::FFLOOR, MVT::f32, Legal);
 | 
				
			||||||
  setOperationAction(ISD::FRINT,  MVT::f32, Legal);
 | 
					  setOperationAction(ISD::FRINT,  MVT::f32, Legal);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // The hardware supports ROTR, but not ROTL
 | 
				
			||||||
 | 
					  setOperationAction(ISD::ROTL, MVT::i32, Expand);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Lower floating point store/load to integer store/load to reduce the number
 | 
					  // Lower floating point store/load to integer store/load to reduce the number
 | 
				
			||||||
  // of patterns in tablegen.
 | 
					  // of patterns in tablegen.
 | 
				
			||||||
  setOperationAction(ISD::STORE, MVT::f32, Promote);
 | 
					  setOperationAction(ISD::STORE, MVT::f32, Promote);
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -115,7 +115,6 @@ enum {
 | 
				
			||||||
  RET_FLAG,
 | 
					  RET_FLAG,
 | 
				
			||||||
  BRANCH_COND,
 | 
					  BRANCH_COND,
 | 
				
			||||||
  // End AMDIL ISD Opcodes
 | 
					  // End AMDIL ISD Opcodes
 | 
				
			||||||
  BITALIGN,
 | 
					 | 
				
			||||||
  BUFFER_STORE,
 | 
					  BUFFER_STORE,
 | 
				
			||||||
  DWORDADDR,
 | 
					  DWORDADDR,
 | 
				
			||||||
  FRACT,
 | 
					  FRACT,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -23,12 +23,6 @@ def AMDGPUDTIntTernaryOp : SDTypeProfile<1, 3, [
 | 
				
			||||||
// AMDGPU DAG Nodes
 | 
					// AMDGPU DAG Nodes
 | 
				
			||||||
//
 | 
					//
 | 
				
			||||||
 | 
					
 | 
				
			||||||
// out = ((a << 32) | b) >> c)
 | 
					 | 
				
			||||||
//
 | 
					 | 
				
			||||||
// Can be used to optimize rtol:
 | 
					 | 
				
			||||||
// rotl(a, b) = bitalign(a, a, 32 - b)
 | 
					 | 
				
			||||||
def AMDGPUbitalign : SDNode<"AMDGPUISD::BITALIGN", AMDGPUDTIntTernaryOp>;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
// This argument to this node is a dword address.
 | 
					// This argument to this node is a dword address.
 | 
				
			||||||
def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
 | 
					def AMDGPUdwordaddr : SDNode<"AMDGPUISD::DWORDADDR", SDTIntUnaryOp>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -295,6 +295,12 @@ class BFEPattern <Instruction BFE> : Pat <
 | 
				
			||||||
  (BFE $x, $y, $z)
 | 
					  (BFE $x, $y, $z)
 | 
				
			||||||
>;
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// rotr pattern
 | 
				
			||||||
 | 
					class ROTRPattern <Instruction BIT_ALIGN> : Pat <
 | 
				
			||||||
 | 
					  (rotr i32:$src0, i32:$src1),
 | 
				
			||||||
 | 
					  (BIT_ALIGN $src0, $src0, $src1)
 | 
				
			||||||
 | 
					>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
include "R600Instructions.td"
 | 
					include "R600Instructions.td"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
include "SIInstrInfo.td"
 | 
					include "SIInstrInfo.td"
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -138,8 +138,6 @@ void AMDGPUTargetLowering::InitAMDILLowering() {
 | 
				
			||||||
    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 | 
					    setOperationAction(ISD::SMUL_LOHI, VT, Expand);
 | 
				
			||||||
    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 | 
					    setOperationAction(ISD::UMUL_LOHI, VT, Expand);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // GPU doesn't have a rotl, rotr, or byteswap instruction
 | 
					 | 
				
			||||||
    setOperationAction(ISD::ROTR, VT, Expand);
 | 
					 | 
				
			||||||
    setOperationAction(ISD::BSWAP, VT, Expand);
 | 
					    setOperationAction(ISD::BSWAP, VT, Expand);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // GPU doesn't have any counting operators
 | 
					    // GPU doesn't have any counting operators
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -72,8 +72,6 @@ R600TargetLowering::R600TargetLowering(TargetMachine &TM) :
 | 
				
			||||||
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 | 
					  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 | 
				
			||||||
  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
 | 
					  setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i1, Custom);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  setOperationAction(ISD::ROTL, MVT::i32, Custom);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
 | 
					  setOperationAction(ISD::SELECT_CC, MVT::f32, Custom);
 | 
				
			||||||
  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
 | 
					  setOperationAction(ISD::SELECT_CC, MVT::i32, Custom);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -480,7 +478,6 @@ using namespace llvm::AMDGPUIntrinsic;
 | 
				
			||||||
SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
 | 
					SDValue R600TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
 | 
				
			||||||
  switch (Op.getOpcode()) {
 | 
					  switch (Op.getOpcode()) {
 | 
				
			||||||
  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
 | 
					  default: return AMDGPUTargetLowering::LowerOperation(Op, DAG);
 | 
				
			||||||
  case ISD::ROTL: return LowerROTL(Op, DAG);
 | 
					 | 
				
			||||||
  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
 | 
					  case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
 | 
				
			||||||
  case ISD::SELECT: return LowerSELECT(Op, DAG);
 | 
					  case ISD::SELECT: return LowerSELECT(Op, DAG);
 | 
				
			||||||
  case ISD::STORE: return LowerSTORE(Op, DAG);
 | 
					  case ISD::STORE: return LowerSTORE(Op, DAG);
 | 
				
			||||||
| 
						 | 
					@ -765,18 +762,6 @@ SDValue R600TargetLowering::LowerFrameIndex(SDValue Op, SelectionDAG &DAG) const
 | 
				
			||||||
  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
 | 
					  return DAG.getConstant(Offset * 4 * TFL->getStackWidth(MF), MVT::i32);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SDValue R600TargetLowering::LowerROTL(SDValue Op, SelectionDAG &DAG) const {
 | 
					 | 
				
			||||||
  DebugLoc DL = Op.getDebugLoc();
 | 
					 | 
				
			||||||
  EVT VT = Op.getValueType();
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  return DAG.getNode(AMDGPUISD::BITALIGN, DL, VT,
 | 
					 | 
				
			||||||
                     Op.getOperand(0),
 | 
					 | 
				
			||||||
                     Op.getOperand(0),
 | 
					 | 
				
			||||||
                     DAG.getNode(ISD::SUB, DL, VT,
 | 
					 | 
				
			||||||
                                 DAG.getConstant(32, MVT::i32),
 | 
					 | 
				
			||||||
                                 Op.getOperand(1)));
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
bool R600TargetLowering::isZero(SDValue Op) const {
 | 
					bool R600TargetLowering::isZero(SDValue Op) const {
 | 
				
			||||||
  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
 | 
					  if(ConstantSDNode *Cst = dyn_cast<ConstantSDNode>(Op)) {
 | 
				
			||||||
    return Cst->isNullValue();
 | 
					    return Cst->isNullValue();
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1635,10 +1635,8 @@ let Predicates = [isEGorCayman] in {
 | 
				
			||||||
  def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
 | 
					  def BFI_INT_eg : R600_3OP <0x06, "BFI_INT", [], VecALU>;
 | 
				
			||||||
  defm : BFIPatterns <BFI_INT_eg>;
 | 
					  defm : BFIPatterns <BFI_INT_eg>;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT",
 | 
					  def BIT_ALIGN_INT_eg : R600_3OP <0xC, "BIT_ALIGN_INT", [], VecALU>;
 | 
				
			||||||
    [(set i32:$dst, (AMDGPUbitalign i32:$src0, i32:$src1, i32:$src2))],
 | 
					  def : ROTRPattern <BIT_ALIGN_INT_eg>;
 | 
				
			||||||
    VecALU
 | 
					 | 
				
			||||||
  >;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  def MULADD_eg : MULADD_Common<0x14>;
 | 
					  def MULADD_eg : MULADD_Common<0x14>;
 | 
				
			||||||
  def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
 | 
					  def MULADD_IEEE_eg : MULADD_IEEE_Common<0x18>;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -0,0 +1,29 @@
 | 
				
			||||||
 | 
					; RUN: llc < %s -debug-only=isel -march=r600 -mcpu=redwood -o - 2>&1 | FileCheck %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; CHECK: rotr
 | 
				
			||||||
 | 
					; CHECK: @rotr
 | 
				
			||||||
 | 
					; CHECK: BIT_ALIGN_INT
 | 
				
			||||||
 | 
					define void @rotr(i32 addrspace(1)* %in, i32 %x, i32 %y) {
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					  %0 = sub i32 32, %y
 | 
				
			||||||
 | 
					  %1 = shl i32 %x, %0
 | 
				
			||||||
 | 
					  %2 = lshr i32 %x, %y
 | 
				
			||||||
 | 
					  %3 = or i32 %1, %2
 | 
				
			||||||
 | 
					  store i32 %3, i32 addrspace(1)* %in
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; CHECK: rotr
 | 
				
			||||||
 | 
					; CHECK: @rotl
 | 
				
			||||||
 | 
					; CHECK: SUB_INT {{\** T[0-9]+\.[XYZW]}}, literal.x
 | 
				
			||||||
 | 
					; CHECK-NEXT: 32
 | 
				
			||||||
 | 
					; CHECK: BIT_ALIGN_INT {{\** T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], T[0-9]+\.[XYZW], PV.[xyzw]}}
 | 
				
			||||||
 | 
					define void @rotl(i32 addrspace(1)* %in, i32 %x, i32 %y) {
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					  %0 = shl i32 %x, %y
 | 
				
			||||||
 | 
					  %1 = sub i32 32, %y
 | 
				
			||||||
 | 
					  %2 = lshr i32 %x, %1
 | 
				
			||||||
 | 
					  %3 = or i32 %0, %2
 | 
				
			||||||
 | 
					  store i32 %3, i32 addrspace(1)* %in
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Loading…
	
		Reference in New Issue