forked from OSchip/llvm-project
				
			R600/SI: Add support for i8 and i16 private loads/stores
llvm-svn: 199823
This commit is contained in:
		
							parent
							
								
									ec59e75ef2
								
							
						
					
					
						commit
						e93736057f
					
				| 
						 | 
					@ -589,18 +589,96 @@ SDValue AMDGPUTargetLowering::SplitVectorStore(SDValue Op,
 | 
				
			||||||
  return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts);
 | 
					  return DAG.getNode(ISD::TokenFactor, SL, MVT::Other, &Chains[0], NumElts);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					SDValue AMDGPUTargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 | 
				
			||||||
 | 
					  SDLoc DL(Op);
 | 
				
			||||||
 | 
					  LoadSDNode *Load = cast<LoadSDNode>(Op);
 | 
				
			||||||
 | 
					  ISD::LoadExtType ExtType = Load->getExtensionType();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS ||
 | 
				
			||||||
 | 
					      ExtType == ISD::NON_EXTLOAD || Load->getMemoryVT().bitsGE(MVT::i32))
 | 
				
			||||||
 | 
					    return SDValue();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  EVT VT = Op.getValueType();
 | 
				
			||||||
 | 
					  EVT MemVT = Load->getMemoryVT();
 | 
				
			||||||
 | 
					  unsigned Mask = 0;
 | 
				
			||||||
 | 
					  if (Load->getMemoryVT() == MVT::i8) {
 | 
				
			||||||
 | 
					    Mask = 0xff;
 | 
				
			||||||
 | 
					  } else if (Load->getMemoryVT() == MVT::i16) {
 | 
				
			||||||
 | 
					    Mask = 0xffff;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					  SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
 | 
				
			||||||
 | 
					                            DAG.getConstant(2, MVT::i32));
 | 
				
			||||||
 | 
					  SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
 | 
				
			||||||
 | 
					                            Load->getChain(), Ptr,
 | 
				
			||||||
 | 
					                            DAG.getTargetConstant(0, MVT::i32),
 | 
				
			||||||
 | 
					                            Op.getOperand(2));
 | 
				
			||||||
 | 
					  SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32,
 | 
				
			||||||
 | 
					                                Load->getBasePtr(),
 | 
				
			||||||
 | 
					                                DAG.getConstant(0x3, MVT::i32));
 | 
				
			||||||
 | 
					  SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
 | 
				
			||||||
 | 
					                                 DAG.getConstant(3, MVT::i32));
 | 
				
			||||||
 | 
					  Ret = DAG.getNode(ISD::SRL, DL, MVT::i32, Ret, ShiftAmt);
 | 
				
			||||||
 | 
					  Ret = DAG.getNode(ISD::AND, DL, MVT::i32, Ret,
 | 
				
			||||||
 | 
					                    DAG.getConstant(Mask, MVT::i32));
 | 
				
			||||||
 | 
					  if (ExtType == ISD::SEXTLOAD) {
 | 
				
			||||||
 | 
					    SDValue SExtShift = DAG.getConstant(
 | 
				
			||||||
 | 
					        VT.getSizeInBits() - MemVT.getSizeInBits(), MVT::i32);
 | 
				
			||||||
 | 
					    Ret = DAG.getNode(ISD::SHL, DL, MVT::i32, Ret, SExtShift);
 | 
				
			||||||
 | 
					    Ret = DAG.getNode(ISD::SRA, DL, MVT::i32, Ret, SExtShift);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  return Ret;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
 | 
					SDValue AMDGPUTargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
 | 
				
			||||||
 | 
					  SDLoc DL(Op);
 | 
				
			||||||
  SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
 | 
					  SDValue Result = AMDGPUTargetLowering::MergeVectorStore(Op, DAG);
 | 
				
			||||||
  if (Result.getNode()) {
 | 
					  if (Result.getNode()) {
 | 
				
			||||||
    return Result;
 | 
					    return Result;
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  StoreSDNode *Store = cast<StoreSDNode>(Op);
 | 
					  StoreSDNode *Store = cast<StoreSDNode>(Op);
 | 
				
			||||||
 | 
					  SDValue Chain = Store->getChain();
 | 
				
			||||||
  if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
 | 
					  if ((Store->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS ||
 | 
				
			||||||
       Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
 | 
					       Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS) &&
 | 
				
			||||||
      Store->getValue().getValueType().isVector()) {
 | 
					      Store->getValue().getValueType().isVector()) {
 | 
				
			||||||
    return SplitVectorStore(Op, DAG);
 | 
					    return SplitVectorStore(Op, DAG);
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  if (Store->getAddressSpace() == AMDGPUAS::PRIVATE_ADDRESS &&
 | 
				
			||||||
 | 
					      Store->getMemoryVT().bitsLT(MVT::i32)) {
 | 
				
			||||||
 | 
					    unsigned Mask = 0;
 | 
				
			||||||
 | 
					    if (Store->getMemoryVT() == MVT::i8) {
 | 
				
			||||||
 | 
					      Mask = 0xff;
 | 
				
			||||||
 | 
					    } else if (Store->getMemoryVT() == MVT::i16) {
 | 
				
			||||||
 | 
					      Mask = 0xffff;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    SDValue TruncPtr = DAG.getZExtOrTrunc(Store->getBasePtr(), DL, MVT::i32);
 | 
				
			||||||
 | 
					    SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, TruncPtr,
 | 
				
			||||||
 | 
					                              DAG.getConstant(2, MVT::i32));
 | 
				
			||||||
 | 
					    SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
 | 
				
			||||||
 | 
					                              Chain, Ptr, DAG.getTargetConstant(0, MVT::i32));
 | 
				
			||||||
 | 
					    SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, TruncPtr,
 | 
				
			||||||
 | 
					                                  DAG.getConstant(0x3, MVT::i32));
 | 
				
			||||||
 | 
					    SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
 | 
				
			||||||
 | 
					                                   DAG.getConstant(3, MVT::i32));
 | 
				
			||||||
 | 
					    SDValue SExtValue = DAG.getNode(ISD::SIGN_EXTEND, DL, MVT::i32,
 | 
				
			||||||
 | 
					                                    Store->getValue());
 | 
				
			||||||
 | 
					    SDValue MaskedValue = DAG.getNode(ISD::AND, DL, MVT::i32, SExtValue,
 | 
				
			||||||
 | 
					                                      DAG.getConstant(Mask, MVT::i32));
 | 
				
			||||||
 | 
					    SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
 | 
				
			||||||
 | 
					                                       MaskedValue, ShiftAmt);
 | 
				
			||||||
 | 
					    SDValue DstMask = DAG.getNode(ISD::SHL, DL, MVT::i32, DAG.getConstant(Mask, MVT::i32),
 | 
				
			||||||
 | 
					                                  ShiftAmt);
 | 
				
			||||||
 | 
					    DstMask = DAG.getNode(ISD::XOR, DL, MVT::i32, DstMask,
 | 
				
			||||||
 | 
					                          DAG.getConstant(0xffffffff, MVT::i32));
 | 
				
			||||||
 | 
					    Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    SDValue Value = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
 | 
				
			||||||
 | 
					    return DAG.getNode(AMDGPUISD::REGISTER_STORE, DL, MVT::Other,
 | 
				
			||||||
 | 
					                       Chain, Value, Ptr, DAG.getTargetConstant(0, MVT::i32));
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
  return SDValue();
 | 
					  return SDValue();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -54,6 +54,7 @@ protected:
 | 
				
			||||||
  /// \brief Split a vector load into multiple scalar loads.
 | 
					  /// \brief Split a vector load into multiple scalar loads.
 | 
				
			||||||
  SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const;
 | 
					  SDValue SplitVectorLoad(const SDValue &Op, SelectionDAG &DAG) const;
 | 
				
			||||||
  SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
 | 
					  SDValue SplitVectorStore(SDValue Op, SelectionDAG &DAG) const;
 | 
				
			||||||
 | 
					  SDValue LowerLOAD(SDValue Op, SelectionDAG &DAG) const;
 | 
				
			||||||
  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
 | 
					  SDValue LowerSTORE(SDValue Op, SelectionDAG &DAG) const;
 | 
				
			||||||
  bool isHWTrueValue(SDValue Op) const;
 | 
					  bool isHWTrueValue(SDValue Op) const;
 | 
				
			||||||
  bool isHWFalseValue(SDValue Op) const;
 | 
					  bool isHWFalseValue(SDValue Op) const;
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1113,6 +1113,10 @@ SDValue R600TargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
 | 
				
			||||||
    return SDValue();
 | 
					    return SDValue();
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SDValue Ret = AMDGPUTargetLowering::LowerSTORE(Op, DAG);
 | 
				
			||||||
 | 
					  if (Ret.getNode()) {
 | 
				
			||||||
 | 
					    return Ret;
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
  // Lowering for indirect addressing
 | 
					  // Lowering for indirect addressing
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  const MachineFunction &MF = DAG.getMachineFunction();
 | 
					  const MachineFunction &MF = DAG.getMachineFunction();
 | 
				
			||||||
| 
						 | 
					@ -1204,6 +1208,15 @@ SDValue R600TargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const
 | 
				
			||||||
  SDValue Ptr = Op.getOperand(1);
 | 
					  SDValue Ptr = Op.getOperand(1);
 | 
				
			||||||
  SDValue LoweredLoad;
 | 
					  SDValue LoweredLoad;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
 | 
				
			||||||
 | 
					  if (Ret.getNode()) {
 | 
				
			||||||
 | 
					    SDValue Ops[2];
 | 
				
			||||||
 | 
					    Ops[0] = Ret;
 | 
				
			||||||
 | 
					    Ops[1] = Chain;
 | 
				
			||||||
 | 
					    return DAG.getMergeValues(Ops, 2, DL);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
 | 
					  if (LoadNode->getAddressSpace() == AMDGPUAS::LOCAL_ADDRESS && VT.isVector()) {
 | 
				
			||||||
    SDValue MergedValues[2] = {
 | 
					    SDValue MergedValues[2] = {
 | 
				
			||||||
      SplitVectorLoad(Op, DAG),
 | 
					      SplitVectorLoad(Op, DAG),
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -125,11 +125,17 @@ SITargetLowering::SITargetLowering(TargetMachine &TM) :
 | 
				
			||||||
  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
 | 
					  setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
 | 
					  setLoadExtAction(ISD::SEXTLOAD, MVT::i32, Expand);
 | 
				
			||||||
  setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand);
 | 
					  setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Custom);
 | 
				
			||||||
 | 
					  setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Custom);
 | 
				
			||||||
  setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
 | 
					  setLoadExtAction(ISD::SEXTLOAD, MVT::v8i16, Expand);
 | 
				
			||||||
  setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
 | 
					  setLoadExtAction(ISD::SEXTLOAD, MVT::v16i16, Expand);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  setLoadExtAction(ISD::EXTLOAD, MVT::i8, Custom);
 | 
				
			||||||
 | 
					  setLoadExtAction(ISD::EXTLOAD, MVT::i16, Custom);
 | 
				
			||||||
 | 
					  setLoadExtAction(ISD::EXTLOAD, MVT::i32, Expand);
 | 
				
			||||||
  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
 | 
					  setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
 | 
				
			||||||
 | 
					  setTruncStoreAction(MVT::i32, MVT::i8, Custom);
 | 
				
			||||||
 | 
					  setTruncStoreAction(MVT::i32, MVT::i16, Custom);
 | 
				
			||||||
  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 | 
					  setTruncStoreAction(MVT::f64, MVT::f32, Expand);
 | 
				
			||||||
  setTruncStoreAction(MVT::i64, MVT::i32, Expand);
 | 
					  setTruncStoreAction(MVT::i64, MVT::i32, Expand);
 | 
				
			||||||
  setTruncStoreAction(MVT::i128, MVT::i64, Expand);
 | 
					  setTruncStoreAction(MVT::i128, MVT::i64, Expand);
 | 
				
			||||||
| 
						 | 
					@ -700,21 +706,26 @@ SDValue SITargetLowering::LowerBRCOND(SDValue BRCOND,
 | 
				
			||||||
SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 | 
					SDValue SITargetLowering::LowerLOAD(SDValue Op, SelectionDAG &DAG) const {
 | 
				
			||||||
  SDLoc DL(Op);
 | 
					  SDLoc DL(Op);
 | 
				
			||||||
  LoadSDNode *Load = cast<LoadSDNode>(Op);
 | 
					  LoadSDNode *Load = cast<LoadSDNode>(Op);
 | 
				
			||||||
 | 
					  SDValue Ret = AMDGPUTargetLowering::LowerLOAD(Op, DAG);
 | 
				
			||||||
 | 
					  SDValue MergedValues[2];
 | 
				
			||||||
 | 
					  MergedValues[1] = Load->getChain();
 | 
				
			||||||
 | 
					  if (Ret.getNode()) {
 | 
				
			||||||
 | 
					    MergedValues[0] = Ret;
 | 
				
			||||||
 | 
					    return DAG.getMergeValues(MergedValues, 2, DL);
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS)
 | 
					  if (Load->getAddressSpace() != AMDGPUAS::PRIVATE_ADDRESS) {
 | 
				
			||||||
    return SDValue();
 | 
					    return SDValue();
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
 | 
					  SDValue Ptr = DAG.getNode(ISD::SRL, DL, MVT::i32, Load->getBasePtr(),
 | 
				
			||||||
                            DAG.getConstant(2, MVT::i32));
 | 
					                            DAG.getConstant(2, MVT::i32));
 | 
				
			||||||
 | 
					  Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
 | 
				
			||||||
  SDValue Ret = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, Op.getValueType(),
 | 
					 | 
				
			||||||
                    Load->getChain(), Ptr,
 | 
					                    Load->getChain(), Ptr,
 | 
				
			||||||
                    DAG.getTargetConstant(0, MVT::i32),
 | 
					                    DAG.getTargetConstant(0, MVT::i32),
 | 
				
			||||||
                    Op.getOperand(2));
 | 
					                    Op.getOperand(2));
 | 
				
			||||||
  SDValue MergedValues[2] = {
 | 
					
 | 
				
			||||||
    Ret,
 | 
					  MergedValues[0] = Ret;
 | 
				
			||||||
    Load->getChain()
 | 
					 | 
				
			||||||
  };
 | 
					 | 
				
			||||||
  return DAG.getMergeValues(MergedValues, 2, DL);
 | 
					  return DAG.getMergeValues(MergedValues, 2, DL);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
| 
						 | 
					@ -796,7 +807,34 @@ SDValue SITargetLowering::LowerSTORE(SDValue Op, SelectionDAG &DAG) const {
 | 
				
			||||||
  SDValue Chain = Store->getChain();
 | 
					  SDValue Chain = Store->getChain();
 | 
				
			||||||
  SmallVector<SDValue, 8> Values;
 | 
					  SmallVector<SDValue, 8> Values;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  if (VT == MVT::i64) {
 | 
					  if (Store->isTruncatingStore()) {
 | 
				
			||||||
 | 
					    unsigned Mask = 0;
 | 
				
			||||||
 | 
					    if (Store->getMemoryVT() == MVT::i8) {
 | 
				
			||||||
 | 
					      Mask = 0xff;
 | 
				
			||||||
 | 
					    } else if (Store->getMemoryVT() == MVT::i16) {
 | 
				
			||||||
 | 
					      Mask = 0xffff;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    SDValue Dst = DAG.getNode(AMDGPUISD::REGISTER_LOAD, DL, MVT::i32,
 | 
				
			||||||
 | 
					                              Chain, Store->getBasePtr(),
 | 
				
			||||||
 | 
					                              DAG.getConstant(0, MVT::i32));
 | 
				
			||||||
 | 
					    SDValue ByteIdx = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getBasePtr(),
 | 
				
			||||||
 | 
					                                  DAG.getConstant(0x3, MVT::i32));
 | 
				
			||||||
 | 
					    SDValue ShiftAmt = DAG.getNode(ISD::SHL, DL, MVT::i32, ByteIdx,
 | 
				
			||||||
 | 
					                                   DAG.getConstant(3, MVT::i32));
 | 
				
			||||||
 | 
					    SDValue MaskedValue = DAG.getNode(ISD::AND, DL, MVT::i32, Store->getValue(),
 | 
				
			||||||
 | 
					                                      DAG.getConstant(Mask, MVT::i32));
 | 
				
			||||||
 | 
					    SDValue ShiftedValue = DAG.getNode(ISD::SHL, DL, MVT::i32,
 | 
				
			||||||
 | 
					                                       MaskedValue, ShiftAmt);
 | 
				
			||||||
 | 
					    SDValue RotrAmt = DAG.getNode(ISD::SUB, DL, MVT::i32,
 | 
				
			||||||
 | 
					                                  DAG.getConstant(32, MVT::i32), ShiftAmt);
 | 
				
			||||||
 | 
					    SDValue DstMask = DAG.getNode(ISD::ROTR, DL, MVT::i32,
 | 
				
			||||||
 | 
					                                  DAG.getConstant(Mask, MVT::i32),
 | 
				
			||||||
 | 
					                                  RotrAmt);
 | 
				
			||||||
 | 
					    Dst = DAG.getNode(ISD::AND, DL, MVT::i32, Dst, DstMask);
 | 
				
			||||||
 | 
					    Dst = DAG.getNode(ISD::OR, DL, MVT::i32, Dst, ShiftedValue);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    Values.push_back(Dst);
 | 
				
			||||||
 | 
					  } else if (VT == MVT::i64) {
 | 
				
			||||||
    for (unsigned i = 0; i < 2; ++i) {
 | 
					    for (unsigned i = 0; i < 2; ++i) {
 | 
				
			||||||
      Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
 | 
					      Values.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32,
 | 
				
			||||||
                       Store->getValue(), DAG.getConstant(i, MVT::i32)));
 | 
					                       Store->getValue(), DAG.getConstant(i, MVT::i32)));
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -2,7 +2,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; EG-LABEL: @anyext_load_i8:
 | 
					; EG-LABEL: @anyext_load_i8:
 | 
				
			||||||
; EG: AND_INT
 | 
					; EG: AND_INT
 | 
				
			||||||
; EG-NEXT: 255
 | 
					; EG: 255
 | 
				
			||||||
define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
 | 
					define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspace(1)* nocapture noalias %src) nounwind {
 | 
				
			||||||
  %cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)*
 | 
					  %cast = bitcast i8 addrspace(1)* %src to i32 addrspace(1)*
 | 
				
			||||||
  %load = load i32 addrspace(1)* %cast, align 1
 | 
					  %load = load i32 addrspace(1)* %cast, align 1
 | 
				
			||||||
| 
						 | 
					@ -14,8 +14,9 @@ define void @anyext_load_i8(i8 addrspace(1)* nocapture noalias %out, i8 addrspac
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; EG-LABEL: @anyext_load_i16:
 | 
					; EG-LABEL: @anyext_load_i16:
 | 
				
			||||||
; EG: AND_INT
 | 
					; EG: AND_INT
 | 
				
			||||||
; EG: LSHL
 | 
					; EG: AND_INT
 | 
				
			||||||
; EG: 65535
 | 
					; EG-DAG: 65535
 | 
				
			||||||
 | 
					; EG-DAG: -65536
 | 
				
			||||||
define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind {
 | 
					define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrspace(1)* nocapture noalias %src) nounwind {
 | 
				
			||||||
  %cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)*
 | 
					  %cast = bitcast i16 addrspace(1)* %src to i32 addrspace(1)*
 | 
				
			||||||
  %load = load i32 addrspace(1)* %cast, align 1
 | 
					  %load = load i32 addrspace(1)* %cast, align 1
 | 
				
			||||||
| 
						 | 
					@ -27,7 +28,7 @@ define void @anyext_load_i16(i16 addrspace(1)* nocapture noalias %out, i16 addrs
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; EG-LABEL: @anyext_load_lds_i8:
 | 
					; EG-LABEL: @anyext_load_lds_i8:
 | 
				
			||||||
; EG: AND_INT
 | 
					; EG: AND_INT
 | 
				
			||||||
; EG-NEXT: 255
 | 
					; EG: 255
 | 
				
			||||||
define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
 | 
					define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addrspace(3)* nocapture noalias %src) nounwind {
 | 
				
			||||||
  %cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)*
 | 
					  %cast = bitcast i8 addrspace(3)* %src to i32 addrspace(3)*
 | 
				
			||||||
  %load = load i32 addrspace(3)* %cast, align 1
 | 
					  %load = load i32 addrspace(3)* %cast, align 1
 | 
				
			||||||
| 
						 | 
					@ -39,8 +40,9 @@ define void @anyext_load_lds_i8(i8 addrspace(3)* nocapture noalias %out, i8 addr
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; EG-LABEL: @anyext_load_lds_i16:
 | 
					; EG-LABEL: @anyext_load_lds_i16:
 | 
				
			||||||
; EG: AND_INT
 | 
					; EG: AND_INT
 | 
				
			||||||
; EG: LSHL
 | 
					; EG: AND_INT
 | 
				
			||||||
; EG: 65535
 | 
					; EG-DAG: 65535
 | 
				
			||||||
 | 
					; EG-DAG: -65536
 | 
				
			||||||
define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind {
 | 
					define void @anyext_load_lds_i16(i16 addrspace(3)* nocapture noalias %out, i16 addrspace(3)* nocapture noalias %src) nounwind {
 | 
				
			||||||
  %cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)*
 | 
					  %cast = bitcast i16 addrspace(3)* %src to i32 addrspace(3)*
 | 
				
			||||||
  %load = load i32 addrspace(3)* %cast, align 1
 | 
					  %load = load i32 addrspace(3)* %cast, align 1
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -1,10 +1,11 @@
 | 
				
			||||||
; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK
 | 
					; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck %s --check-prefix=R600-CHECK --check-prefix=FUNC
 | 
				
			||||||
; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK
 | 
					; RUN: llc < %s -march=r600 -mcpu=SI | FileCheck %s --check-prefix=SI-CHECK --check-prefix=FUNC
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; This test checks that uses and defs of the AR register happen in the same
 | 
					; This test checks that uses and defs of the AR register happen in the same
 | 
				
			||||||
; instruction clause.
 | 
					; instruction clause.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; R600-CHECK-LABEL: @mova_same_clause
 | 
					; FUNC-LABEL: @mova_same_clause
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; R600-CHECK: MOVA_INT
 | 
					; R600-CHECK: MOVA_INT
 | 
				
			||||||
; R600-CHECK-NOT: ALU clause
 | 
					; R600-CHECK-NOT: ALU clause
 | 
				
			||||||
; R600-CHECK: 0 + AR.x
 | 
					; R600-CHECK: 0 + AR.x
 | 
				
			||||||
| 
						 | 
					@ -12,7 +13,6 @@
 | 
				
			||||||
; R600-CHECK-NOT: ALU clause
 | 
					; R600-CHECK-NOT: ALU clause
 | 
				
			||||||
; R600-CHECK: 0 + AR.x
 | 
					; R600-CHECK: 0 + AR.x
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-CHECK-LABEL: @mova_same_clause
 | 
					 | 
				
			||||||
; SI-CHECK: V_READFIRSTLANE
 | 
					; SI-CHECK: V_READFIRSTLANE
 | 
				
			||||||
; SI-CHECK: V_MOVRELD
 | 
					; SI-CHECK: V_MOVRELD
 | 
				
			||||||
; SI-CHECK: S_CBRANCH
 | 
					; SI-CHECK: S_CBRANCH
 | 
				
			||||||
| 
						 | 
					@ -46,9 +46,8 @@ entry:
 | 
				
			||||||
; XXX: This generated code has unnecessary MOVs, we should be able to optimize
 | 
					; XXX: This generated code has unnecessary MOVs, we should be able to optimize
 | 
				
			||||||
; this.
 | 
					; this.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; R600-CHECK-LABEL: @multiple_structs
 | 
					; FUNC-LABEL: @multiple_structs
 | 
				
			||||||
; R600-CHECK-NOT: MOVA_INT
 | 
					; R600-CHECK-NOT: MOVA_INT
 | 
				
			||||||
; SI-CHECK-LABEL: @multiple_structs
 | 
					 | 
				
			||||||
; SI-CHECK-NOT: V_MOVREL
 | 
					; SI-CHECK-NOT: V_MOVREL
 | 
				
			||||||
%struct.point = type { i32, i32 }
 | 
					%struct.point = type { i32, i32 }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -77,9 +76,8 @@ entry:
 | 
				
			||||||
; loads and stores should be lowered to copies, so there shouldn't be any
 | 
					; loads and stores should be lowered to copies, so there shouldn't be any
 | 
				
			||||||
; MOVA instructions.
 | 
					; MOVA instructions.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; R600-CHECK-LABEL: @direct_loop
 | 
					; FUNC-LABEL: @direct_loop
 | 
				
			||||||
; R600-CHECK-NOT: MOVA_INT
 | 
					; R600-CHECK-NOT: MOVA_INT
 | 
				
			||||||
; SI-CHECK-LABEL: @direct_loop
 | 
					 | 
				
			||||||
; SI-CHECK-NOT: V_MOVREL
 | 
					; SI-CHECK-NOT: V_MOVREL
 | 
				
			||||||
 | 
					
 | 
				
			||||||
define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 | 
					define void @direct_loop(i32 addrspace(1)* %out, i32 addrspace(1)* %in) {
 | 
				
			||||||
| 
						 | 
					@ -113,3 +111,48 @@ for.end:
 | 
				
			||||||
  store i32 %value, i32 addrspace(1)* %out
 | 
					  store i32 %value, i32 addrspace(1)* %out
 | 
				
			||||||
  ret void
 | 
					  ret void
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; FUNC-LABEL: @short_array
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; R600-CHECK: MOV {{\** *}}T{{[0-9]\.[XYZW]}}, literal
 | 
				
			||||||
 | 
					; R600-CHECK: 65536
 | 
				
			||||||
 | 
					; R600-CHECK: MOVA_INT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; SI-CHECK: V_MOV_B32_e32 v{{[0-9]}}, 65536
 | 
				
			||||||
 | 
					; SI-CHECK: V_MOVRELS_B32_e32
 | 
				
			||||||
 | 
					define void @short_array(i32 addrspace(1)* %out, i32 %index) {
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					  %0 = alloca [2 x i16]
 | 
				
			||||||
 | 
					  %1 = getelementptr [2 x i16]* %0, i32 0, i32 0
 | 
				
			||||||
 | 
					  %2 = getelementptr [2 x i16]* %0, i32 0, i32 1
 | 
				
			||||||
 | 
					  store i16 0, i16* %1
 | 
				
			||||||
 | 
					  store i16 1, i16* %2
 | 
				
			||||||
 | 
					  %3 = getelementptr [2 x i16]* %0, i32 0, i32 %index
 | 
				
			||||||
 | 
					  %4 = load i16* %3
 | 
				
			||||||
 | 
					  %5 = sext i16 %4 to i32
 | 
				
			||||||
 | 
					  store i32 %5, i32 addrspace(1)* %out
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; FUNC-LABEL: @char_array
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; R600-CHECK: OR_INT {{\** *}}T{{[0-9]\.[XYZW]}}, {{[PVT0-9]+\.[XYZW]}}, literal
 | 
				
			||||||
 | 
					; R600-CHECK: 256
 | 
				
			||||||
 | 
					; R600-CHECK: MOVA_INT
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					; SI-CHECK: V_OR_B32_e32 v{{[0-9]}}, 256
 | 
				
			||||||
 | 
					; SI-CHECK: V_MOVRELS_B32_e32
 | 
				
			||||||
 | 
					define void @char_array(i32 addrspace(1)* %out, i32 %index) {
 | 
				
			||||||
 | 
					entry:
 | 
				
			||||||
 | 
					  %0 = alloca [2 x i8]
 | 
				
			||||||
 | 
					  %1 = getelementptr [2 x i8]* %0, i32 0, i32 0
 | 
				
			||||||
 | 
					  %2 = getelementptr [2 x i8]* %0, i32 0, i32 1
 | 
				
			||||||
 | 
					  store i8 0, i8* %1
 | 
				
			||||||
 | 
					  store i8 1, i8* %2
 | 
				
			||||||
 | 
					  %3 = getelementptr [2 x i8]* %0, i32 0, i32 %index
 | 
				
			||||||
 | 
					  %4 = load i8* %3
 | 
				
			||||||
 | 
					  %5 = sext i8 %4 to i32
 | 
				
			||||||
 | 
					  store i32 %5, i32 addrspace(1)* %out
 | 
				
			||||||
 | 
					  ret void
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue