forked from OSchip/llvm-project
				
			Revert "ScheduleDAGInstrs: Rework schedule graph builder."
This works mostly fine but breaks some stage 1 builders when compiling compiler-rt on i386. Revert for further investigation as I can't see an obvious cause/fix. This reverts commit r254577. llvm-svn: 254586
This commit is contained in:
		
							parent
							
								
									311fef6ea5
								
							
						
					
					
						commit
						2fd672a221
					
				| 
						 | 
					@ -33,26 +33,15 @@ namespace llvm {
 | 
				
			||||||
  /// An individual mapping from virtual register number to SUnit.
 | 
					  /// An individual mapping from virtual register number to SUnit.
 | 
				
			||||||
  struct VReg2SUnit {
 | 
					  struct VReg2SUnit {
 | 
				
			||||||
    unsigned VirtReg;
 | 
					    unsigned VirtReg;
 | 
				
			||||||
    LaneBitmask LaneMask;
 | 
					 | 
				
			||||||
    SUnit *SU;
 | 
					    SUnit *SU;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU)
 | 
					    VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {}
 | 
				
			||||||
      : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {}
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    unsigned getSparseSetIndex() const {
 | 
					    unsigned getSparseSetIndex() const {
 | 
				
			||||||
      return TargetRegisterInfo::virtReg2Index(VirtReg);
 | 
					      return TargetRegisterInfo::virtReg2Index(VirtReg);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  /// Mapping from virtual register to SUnit including an operand index.
 | 
					 | 
				
			||||||
  struct VReg2SUnitOperIdx : public VReg2SUnit {
 | 
					 | 
				
			||||||
    unsigned OperandIndex;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask,
 | 
					 | 
				
			||||||
                      unsigned OperandIndex, SUnit *SU)
 | 
					 | 
				
			||||||
      : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {}
 | 
					 | 
				
			||||||
  };
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  /// Record a physical register access.
 | 
					  /// Record a physical register access.
 | 
				
			||||||
  /// For non-data-dependent uses, OpIdx == -1.
 | 
					  /// For non-data-dependent uses, OpIdx == -1.
 | 
				
			||||||
  struct PhysRegSUOper {
 | 
					  struct PhysRegSUOper {
 | 
				
			||||||
| 
						 | 
					@ -80,10 +69,7 @@ namespace llvm {
 | 
				
			||||||
  /// Track local uses of virtual registers. These uses are gathered by the DAG
 | 
					  /// Track local uses of virtual registers. These uses are gathered by the DAG
 | 
				
			||||||
  /// builder and may be consulted by the scheduler to avoid iterating an entire
 | 
					  /// builder and may be consulted by the scheduler to avoid iterating an entire
 | 
				
			||||||
  /// vreg use list.
 | 
					  /// vreg use list.
 | 
				
			||||||
  typedef SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor> VReg2SUnitMultiMap;
 | 
					  typedef SparseMultiSet<VReg2SUnit, VirtReg2IndexFunctor> VReg2UseMap;
 | 
				
			||||||
 | 
					 | 
				
			||||||
  typedef SparseMultiSet<VReg2SUnitOperIdx, VirtReg2IndexFunctor>
 | 
					 | 
				
			||||||
    VReg2SUnitOperIdxMultiMap;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
 | 
					  /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of
 | 
				
			||||||
  /// MachineInstrs.
 | 
					  /// MachineInstrs.
 | 
				
			||||||
| 
						 | 
					@ -109,9 +95,6 @@ namespace llvm {
 | 
				
			||||||
    /// it has taken responsibility for scheduling the terminator correctly.
 | 
					    /// it has taken responsibility for scheduling the terminator correctly.
 | 
				
			||||||
    bool CanHandleTerminators;
 | 
					    bool CanHandleTerminators;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Whether lane masks should get tracked.
 | 
					 | 
				
			||||||
    bool TrackLaneMasks;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    /// State specific to the current scheduling region.
 | 
					    /// State specific to the current scheduling region.
 | 
				
			||||||
    /// ------------------------------------------------
 | 
					    /// ------------------------------------------------
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -134,7 +117,7 @@ namespace llvm {
 | 
				
			||||||
    /// After calling BuildSchedGraph, each vreg used in the scheduling region
 | 
					    /// After calling BuildSchedGraph, each vreg used in the scheduling region
 | 
				
			||||||
    /// is mapped to a set of SUnits. These include all local vreg uses, not
 | 
					    /// is mapped to a set of SUnits. These include all local vreg uses, not
 | 
				
			||||||
    /// just the uses for a singly defined vreg.
 | 
					    /// just the uses for a singly defined vreg.
 | 
				
			||||||
    VReg2SUnitMultiMap VRegUses;
 | 
					    VReg2UseMap VRegUses;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// State internal to DAG building.
 | 
					    /// State internal to DAG building.
 | 
				
			||||||
    /// -------------------------------
 | 
					    /// -------------------------------
 | 
				
			||||||
| 
						 | 
					@ -146,12 +129,8 @@ namespace llvm {
 | 
				
			||||||
    Reg2SUnitsMap Defs;
 | 
					    Reg2SUnitsMap Defs;
 | 
				
			||||||
    Reg2SUnitsMap Uses;
 | 
					    Reg2SUnitsMap Uses;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// Tracks the last instruction(s) in this region defining each virtual
 | 
					    /// Track the last instruction in this region defining each virtual register.
 | 
				
			||||||
    /// register. There may be multiple current definitions for a register with
 | 
					    VReg2SUnitMap VRegDefs;
 | 
				
			||||||
    /// disjunct lanemasks.
 | 
					 | 
				
			||||||
    VReg2SUnitMultiMap CurrentVRegDefs;
 | 
					 | 
				
			||||||
    /// Tracks the last instructions in this region using each virtual register.
 | 
					 | 
				
			||||||
    VReg2SUnitOperIdxMultiMap CurrentVRegUses;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// PendingLoads - Remember where unknown loads are after the most recent
 | 
					    /// PendingLoads - Remember where unknown loads are after the most recent
 | 
				
			||||||
    /// unknown store, as we iterate. As with Defs and Uses, this is here
 | 
					    /// unknown store, as we iterate. As with Defs and Uses, this is here
 | 
				
			||||||
| 
						 | 
					@ -221,8 +200,7 @@ namespace llvm {
 | 
				
			||||||
    /// input.
 | 
					    /// input.
 | 
				
			||||||
    void buildSchedGraph(AliasAnalysis *AA,
 | 
					    void buildSchedGraph(AliasAnalysis *AA,
 | 
				
			||||||
                         RegPressureTracker *RPTracker = nullptr,
 | 
					                         RegPressureTracker *RPTracker = nullptr,
 | 
				
			||||||
                         PressureDiffs *PDiffs = nullptr,
 | 
					                         PressureDiffs *PDiffs = nullptr);
 | 
				
			||||||
                         bool TrackLaneMasks = false);
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    /// addSchedBarrierDeps - Add dependencies from instructions in the current
 | 
					    /// addSchedBarrierDeps - Add dependencies from instructions in the current
 | 
				
			||||||
    /// list of instructions being scheduled to scheduling barrier. We want to
 | 
					    /// list of instructions being scheduled to scheduling barrier. We want to
 | 
				
			||||||
| 
						 | 
					@ -269,12 +247,6 @@ namespace llvm {
 | 
				
			||||||
    /// Other adjustments may be made to the instruction if necessary. Return
 | 
					    /// Other adjustments may be made to the instruction if necessary. Return
 | 
				
			||||||
    /// true if the operand has been deleted, false if not.
 | 
					    /// true if the operand has been deleted, false if not.
 | 
				
			||||||
    bool toggleKillFlag(MachineInstr *MI, MachineOperand &MO);
 | 
					    bool toggleKillFlag(MachineInstr *MI, MachineOperand &MO);
 | 
				
			||||||
 | 
					 | 
				
			||||||
    /// Returns a mask for which lanes get read/written by the given (register)
 | 
					 | 
				
			||||||
    /// machine operand.
 | 
					 | 
				
			||||||
    LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    void collectVRegUses(SUnit *SU);
 | 
					 | 
				
			||||||
  };
 | 
					  };
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  /// newSUnit - Creates a new SUnit and return a ptr to it.
 | 
					  /// newSUnit - Creates a new SUnit and return a ptr to it.
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -13,12 +13,12 @@
 | 
				
			||||||
//===----------------------------------------------------------------------===//
 | 
					//===----------------------------------------------------------------------===//
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 | 
					#include "llvm/CodeGen/ScheduleDAGInstrs.h"
 | 
				
			||||||
#include "llvm/ADT/IntEqClasses.h"
 | 
					 | 
				
			||||||
#include "llvm/ADT/MapVector.h"
 | 
					#include "llvm/ADT/MapVector.h"
 | 
				
			||||||
#include "llvm/ADT/SmallPtrSet.h"
 | 
					#include "llvm/ADT/SmallPtrSet.h"
 | 
				
			||||||
#include "llvm/ADT/SmallSet.h"
 | 
					#include "llvm/ADT/SmallSet.h"
 | 
				
			||||||
#include "llvm/Analysis/AliasAnalysis.h"
 | 
					#include "llvm/Analysis/AliasAnalysis.h"
 | 
				
			||||||
#include "llvm/Analysis/ValueTracking.h"
 | 
					#include "llvm/Analysis/ValueTracking.h"
 | 
				
			||||||
 | 
					#include "llvm/CodeGen/LiveIntervalAnalysis.h"
 | 
				
			||||||
#include "llvm/CodeGen/MachineFunctionPass.h"
 | 
					#include "llvm/CodeGen/MachineFunctionPass.h"
 | 
				
			||||||
#include "llvm/CodeGen/MachineFrameInfo.h"
 | 
					#include "llvm/CodeGen/MachineFrameInfo.h"
 | 
				
			||||||
#include "llvm/CodeGen/MachineInstrBuilder.h"
 | 
					#include "llvm/CodeGen/MachineInstrBuilder.h"
 | 
				
			||||||
| 
						 | 
					@ -55,7 +55,7 @@ ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf,
 | 
				
			||||||
                                     bool RemoveKillFlags)
 | 
					                                     bool RemoveKillFlags)
 | 
				
			||||||
    : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(LIS),
 | 
					    : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(LIS),
 | 
				
			||||||
      RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
 | 
					      RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false),
 | 
				
			||||||
      TrackLaneMasks(false), FirstDbgValue(nullptr) {
 | 
					      FirstDbgValue(nullptr) {
 | 
				
			||||||
  DbgValues.clear();
 | 
					  DbgValues.clear();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  const TargetSubtargetInfo &ST = mf.getSubtarget();
 | 
					  const TargetSubtargetInfo &ST = mf.getSubtarget();
 | 
				
			||||||
| 
						 | 
					@ -363,20 +363,6 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
 | 
					 | 
				
			||||||
{
 | 
					 | 
				
			||||||
  unsigned Reg = MO.getReg();
 | 
					 | 
				
			||||||
  // No point in tracking lanemasks if we don't have interesting subregisters.
 | 
					 | 
				
			||||||
  const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
 | 
					 | 
				
			||||||
  if (!RC.HasDisjunctSubRegs)
 | 
					 | 
				
			||||||
    return ~0u;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  unsigned SubReg = MO.getSubReg();
 | 
					 | 
				
			||||||
  if (SubReg == 0)
 | 
					 | 
				
			||||||
    return RC.getLaneMask();
 | 
					 | 
				
			||||||
  return TRI->getSubRegIndexLaneMask(SubReg);
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/// addVRegDefDeps - Add register output and data dependencies from this SUnit
 | 
					/// addVRegDefDeps - Add register output and data dependencies from this SUnit
 | 
				
			||||||
/// to instructions that occur later in the same scheduling region if they read
 | 
					/// to instructions that occur later in the same scheduling region if they read
 | 
				
			||||||
/// from or write to the virtual register defined at OperIdx.
 | 
					/// from or write to the virtual register defined at OperIdx.
 | 
				
			||||||
| 
						 | 
					@ -384,106 +370,35 @@ LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const
 | 
				
			||||||
/// TODO: Hoist loop induction variable increments. This has to be
 | 
					/// TODO: Hoist loop induction variable increments. This has to be
 | 
				
			||||||
/// reevaluated. Generally, IV scheduling should be done before coalescing.
 | 
					/// reevaluated. Generally, IV scheduling should be done before coalescing.
 | 
				
			||||||
void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
 | 
					void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
 | 
				
			||||||
  MachineInstr *MI = SU->getInstr();
 | 
					  const MachineInstr *MI = SU->getInstr();
 | 
				
			||||||
  MachineOperand &MO = MI->getOperand(OperIdx);
 | 
					  unsigned Reg = MI->getOperand(OperIdx).getReg();
 | 
				
			||||||
  unsigned Reg = MO.getReg();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  LaneBitmask DefLaneMask;
 | 
					  // Singly defined vregs do not have output/anti dependencies.
 | 
				
			||||||
  LaneBitmask KillLaneMask;
 | 
					  // The current operand is a def, so we have at least one.
 | 
				
			||||||
  if (TrackLaneMasks) {
 | 
					  // Check here if there are any others...
 | 
				
			||||||
    bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
 | 
					 | 
				
			||||||
    DefLaneMask = getLaneMaskForMO(MO);
 | 
					 | 
				
			||||||
    // If we have a <read-undef> flag, none of the lane values comes from an
 | 
					 | 
				
			||||||
    // earlier instruction.
 | 
					 | 
				
			||||||
    KillLaneMask = IsKill ? ~0u : DefLaneMask;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Clear undef flag, we'll re-add it later once we know which subregister
 | 
					 | 
				
			||||||
    // Def is first.
 | 
					 | 
				
			||||||
    MO.setIsUndef(false);
 | 
					 | 
				
			||||||
  } else {
 | 
					 | 
				
			||||||
    DefLaneMask = ~0u;
 | 
					 | 
				
			||||||
    KillLaneMask = ~0u;
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  if (MO.isDead()) {
 | 
					 | 
				
			||||||
    assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() &&
 | 
					 | 
				
			||||||
           "Dead defs should have no uses");
 | 
					 | 
				
			||||||
  } else {
 | 
					 | 
				
			||||||
    // Add data dependence to all uses we found so far.
 | 
					 | 
				
			||||||
    const TargetSubtargetInfo &ST = MF.getSubtarget();
 | 
					 | 
				
			||||||
    for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg),
 | 
					 | 
				
			||||||
         E = CurrentVRegUses.end(); I != E; /*empty*/) {
 | 
					 | 
				
			||||||
      LaneBitmask LaneMask = I->LaneMask;
 | 
					 | 
				
			||||||
      // Ignore uses of other lanes.
 | 
					 | 
				
			||||||
      if ((LaneMask & KillLaneMask) == 0) {
 | 
					 | 
				
			||||||
        ++I;
 | 
					 | 
				
			||||||
        continue;
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      if ((LaneMask & DefLaneMask) != 0) {
 | 
					 | 
				
			||||||
        SUnit *UseSU = I->SU;
 | 
					 | 
				
			||||||
        MachineInstr *Use = UseSU->getInstr();
 | 
					 | 
				
			||||||
        SDep Dep(SU, SDep::Data, Reg);
 | 
					 | 
				
			||||||
        Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use,
 | 
					 | 
				
			||||||
                                                        I->OperandIndex));
 | 
					 | 
				
			||||||
        ST.adjustSchedDependency(SU, UseSU, Dep);
 | 
					 | 
				
			||||||
        UseSU->addPred(Dep);
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
      LaneMask &= ~KillLaneMask;
 | 
					 | 
				
			||||||
      // If we found a Def for all lanes of this use, remove it from the list.
 | 
					 | 
				
			||||||
      if (LaneMask != 0) {
 | 
					 | 
				
			||||||
        I->LaneMask = LaneMask;
 | 
					 | 
				
			||||||
        ++I;
 | 
					 | 
				
			||||||
      } else
 | 
					 | 
				
			||||||
        I = CurrentVRegUses.erase(I);
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // Shortcut: Singly defined vregs do not have output/anti dependencies.
 | 
					 | 
				
			||||||
  if (MRI.hasOneDef(Reg))
 | 
					  if (MRI.hasOneDef(Reg))
 | 
				
			||||||
    return;
 | 
					    return;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Add output dependence to the next nearest defs of this vreg.
 | 
					  // Add output dependence to the next nearest def of this vreg.
 | 
				
			||||||
  //
 | 
					  //
 | 
				
			||||||
  // Unless this definition is dead, the output dependence should be
 | 
					  // Unless this definition is dead, the output dependence should be
 | 
				
			||||||
  // transitively redundant with antidependencies from this definition's
 | 
					  // transitively redundant with antidependencies from this definition's
 | 
				
			||||||
  // uses. We're conservative for now until we have a way to guarantee the uses
 | 
					  // uses. We're conservative for now until we have a way to guarantee the uses
 | 
				
			||||||
  // are not eliminated sometime during scheduling. The output dependence edge
 | 
					  // are not eliminated sometime during scheduling. The output dependence edge
 | 
				
			||||||
  // is also useful if output latency exceeds def-use latency.
 | 
					  // is also useful if output latency exceeds def-use latency.
 | 
				
			||||||
  LaneBitmask LaneMask = DefLaneMask;
 | 
					  VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
 | 
				
			||||||
  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
 | 
					  if (DefI == VRegDefs.end())
 | 
				
			||||||
                                     CurrentVRegDefs.end())) {
 | 
					    VRegDefs.insert(VReg2SUnit(Reg, SU));
 | 
				
			||||||
    // Ignore defs for other lanes.
 | 
					  else {
 | 
				
			||||||
    if ((V2SU.LaneMask & LaneMask) == 0)
 | 
					    SUnit *DefSU = DefI->SU;
 | 
				
			||||||
      continue;
 | 
					    if (DefSU != SU && DefSU != &ExitSU) {
 | 
				
			||||||
    // Add an output dependence.
 | 
					 | 
				
			||||||
    SUnit *DefSU = V2SU.SU;
 | 
					 | 
				
			||||||
    // Ignore additional defs of the same lanes in one instruction. This can
 | 
					 | 
				
			||||||
    // happen because lanemasks are shared for targets with too many
 | 
					 | 
				
			||||||
    // subregisters. We also use some representration tricks/hacks where we
 | 
					 | 
				
			||||||
    // add super-register defs/uses, to imply that although we only access parts
 | 
					 | 
				
			||||||
    // of the reg we care about the full one.
 | 
					 | 
				
			||||||
    if (DefSU == SU)
 | 
					 | 
				
			||||||
      continue;
 | 
					 | 
				
			||||||
      SDep Dep(SU, SDep::Output, Reg);
 | 
					      SDep Dep(SU, SDep::Output, Reg);
 | 
				
			||||||
      Dep.setLatency(
 | 
					      Dep.setLatency(
 | 
				
			||||||
        SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
 | 
					        SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
 | 
				
			||||||
      DefSU->addPred(Dep);
 | 
					      DefSU->addPred(Dep);
 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Update current definition. This can get tricky if the def was about a
 | 
					 | 
				
			||||||
    // bigger lanemask before. We then have to shrink it and create a new
 | 
					 | 
				
			||||||
    // VReg2SUnit for the non-overlapping part.
 | 
					 | 
				
			||||||
    LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
 | 
					 | 
				
			||||||
    LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
 | 
					 | 
				
			||||||
    if (NonOverlapMask != 0)
 | 
					 | 
				
			||||||
      CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, V2SU.SU));
 | 
					 | 
				
			||||||
    V2SU.SU = SU;
 | 
					 | 
				
			||||||
    V2SU.LaneMask = OverlapMask;
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
  // If there was no CurrentVRegDefs entry for some lanes yet, create one.
 | 
					    DefI->SU = SU;
 | 
				
			||||||
  if (LaneMask != 0)
 | 
					  }
 | 
				
			||||||
    CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
 | 
					 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// addVRegUseDeps - Add a register data dependency if the instruction that
 | 
					/// addVRegUseDeps - Add a register data dependency if the instruction that
 | 
				
			||||||
| 
						 | 
					@ -493,26 +408,49 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
 | 
				
			||||||
///
 | 
					///
 | 
				
			||||||
/// TODO: Handle ExitSU "uses" properly.
 | 
					/// TODO: Handle ExitSU "uses" properly.
 | 
				
			||||||
void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
 | 
					void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
 | 
				
			||||||
  const MachineInstr *MI = SU->getInstr();
 | 
					  MachineInstr *MI = SU->getInstr();
 | 
				
			||||||
  const MachineOperand &MO = MI->getOperand(OperIdx);
 | 
					  unsigned Reg = MI->getOperand(OperIdx).getReg();
 | 
				
			||||||
  unsigned Reg = MO.getReg();
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Remember the use. Data dependencies will be added when we find the def.
 | 
					  // Record this local VReg use.
 | 
				
			||||||
  LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u;
 | 
					  VReg2UseMap::iterator UI = VRegUses.find(Reg);
 | 
				
			||||||
  CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
 | 
					  for (; UI != VRegUses.end(); ++UI) {
 | 
				
			||||||
 | 
					    if (UI->SU == SU)
 | 
				
			||||||
  // Add antidependences to the following defs of the vreg.
 | 
					      break;
 | 
				
			||||||
  for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
 | 
					 | 
				
			||||||
                                     CurrentVRegDefs.end())) {
 | 
					 | 
				
			||||||
    // Ignore defs for unrelated lanes.
 | 
					 | 
				
			||||||
    LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
 | 
					 | 
				
			||||||
    if ((PrevDefLaneMask & LaneMask) == 0)
 | 
					 | 
				
			||||||
      continue;
 | 
					 | 
				
			||||||
    if (V2SU.SU == SU)
 | 
					 | 
				
			||||||
      continue;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
 | 
					 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					  if (UI == VRegUses.end())
 | 
				
			||||||
 | 
					    VRegUses.insert(VReg2SUnit(Reg, SU));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Lookup this operand's reaching definition.
 | 
				
			||||||
 | 
					  assert(LIS && "vreg dependencies requires LiveIntervals");
 | 
				
			||||||
 | 
					  LiveQueryResult LRQ
 | 
				
			||||||
 | 
					    = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI));
 | 
				
			||||||
 | 
					  VNInfo *VNI = LRQ.valueIn();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // VNI will be valid because MachineOperand::readsReg() is checked by caller.
 | 
				
			||||||
 | 
					  assert(VNI && "No value to read by operand");
 | 
				
			||||||
 | 
					  MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def);
 | 
				
			||||||
 | 
					  // Phis and other noninstructions (after coalescing) have a NULL Def.
 | 
				
			||||||
 | 
					  if (Def) {
 | 
				
			||||||
 | 
					    SUnit *DefSU = getSUnit(Def);
 | 
				
			||||||
 | 
					    if (DefSU) {
 | 
				
			||||||
 | 
					      // The reaching Def lives within this scheduling region.
 | 
				
			||||||
 | 
					      // Create a data dependence.
 | 
				
			||||||
 | 
					      SDep dep(DefSU, SDep::Data, Reg);
 | 
				
			||||||
 | 
					      // Adjust the dependence latency using operand def/use information, then
 | 
				
			||||||
 | 
					      // allow the target to perform its own adjustments.
 | 
				
			||||||
 | 
					      int DefOp = Def->findRegisterDefOperandIdx(Reg);
 | 
				
			||||||
 | 
					      dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx));
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      const TargetSubtargetInfo &ST = MF.getSubtarget();
 | 
				
			||||||
 | 
					      ST.adjustSchedDependency(DefSU, SU, const_cast<SDep &>(dep));
 | 
				
			||||||
 | 
					      SU->addPred(dep);
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					  // Add antidependence to the following def of the vreg it uses.
 | 
				
			||||||
 | 
					  VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg);
 | 
				
			||||||
 | 
					  if (DefI != VRegDefs.end() && DefI->SU != SU)
 | 
				
			||||||
 | 
					    DefI->SU->addPred(SDep(SU, SDep::Anti, Reg));
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/// Return true if MI is an instruction we are unable to reason about
 | 
					/// Return true if MI is an instruction we are unable to reason about
 | 
				
			||||||
| 
						 | 
					@ -795,42 +733,17 @@ void ScheduleDAGInstrs::initSUnits() {
 | 
				
			||||||
  }
 | 
					  }
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) {
 | 
					 | 
				
			||||||
  const MachineInstr *MI = SU->getInstr();
 | 
					 | 
				
			||||||
  for (const MachineOperand &MO : MI->operands()) {
 | 
					 | 
				
			||||||
    if (!MO.isReg())
 | 
					 | 
				
			||||||
      continue;
 | 
					 | 
				
			||||||
    if (!MO.isUse() && (MO.getSubReg() == 0 || !TrackLaneMasks))
 | 
					 | 
				
			||||||
      continue;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    unsigned Reg = MO.getReg();
 | 
					 | 
				
			||||||
    if (!TargetRegisterInfo::isVirtualRegister(Reg))
 | 
					 | 
				
			||||||
      continue;
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    // Record this local VReg use.
 | 
					 | 
				
			||||||
    VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg);
 | 
					 | 
				
			||||||
    for (; UI != VRegUses.end(); ++UI) {
 | 
					 | 
				
			||||||
      if (UI->SU == SU)
 | 
					 | 
				
			||||||
        break;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    if (UI == VRegUses.end())
 | 
					 | 
				
			||||||
      VRegUses.insert(VReg2SUnit(Reg, 0, SU));
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
}
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
/// If RegPressure is non-null, compute register pressure as a side effect. The
 | 
					/// If RegPressure is non-null, compute register pressure as a side effect. The
 | 
				
			||||||
/// DAG builder is an efficient place to do it because it already visits
 | 
					/// DAG builder is an efficient place to do it because it already visits
 | 
				
			||||||
/// operands.
 | 
					/// operands.
 | 
				
			||||||
void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
 | 
					void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
 | 
				
			||||||
                                        RegPressureTracker *RPTracker,
 | 
					                                        RegPressureTracker *RPTracker,
 | 
				
			||||||
                                        PressureDiffs *PDiffs,
 | 
					                                        PressureDiffs *PDiffs) {
 | 
				
			||||||
                                        bool TrackLaneMasks) {
 | 
					 | 
				
			||||||
  const TargetSubtargetInfo &ST = MF.getSubtarget();
 | 
					  const TargetSubtargetInfo &ST = MF.getSubtarget();
 | 
				
			||||||
  bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
 | 
					  bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
 | 
				
			||||||
                                                       : ST.useAA();
 | 
					                                                       : ST.useAA();
 | 
				
			||||||
  AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
 | 
					  AliasAnalysis *AAForDep = UseAA ? AA : nullptr;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  this->TrackLaneMasks = TrackLaneMasks;
 | 
					 | 
				
			||||||
  MISUnitMap.clear();
 | 
					  MISUnitMap.clear();
 | 
				
			||||||
  ScheduleDAG::clearDAG();
 | 
					  ScheduleDAG::clearDAG();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -864,14 +777,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
 | 
				
			||||||
  Defs.setUniverse(TRI->getNumRegs());
 | 
					  Defs.setUniverse(TRI->getNumRegs());
 | 
				
			||||||
  Uses.setUniverse(TRI->getNumRegs());
 | 
					  Uses.setUniverse(TRI->getNumRegs());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
 | 
					  assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs");
 | 
				
			||||||
  assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
 | 
					 | 
				
			||||||
  unsigned NumVirtRegs = MRI.getNumVirtRegs();
 | 
					 | 
				
			||||||
  CurrentVRegDefs.setUniverse(NumVirtRegs);
 | 
					 | 
				
			||||||
  CurrentVRegUses.setUniverse(NumVirtRegs);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  VRegUses.clear();
 | 
					  VRegUses.clear();
 | 
				
			||||||
  VRegUses.setUniverse(NumVirtRegs);
 | 
					  VRegDefs.setUniverse(MRI.getNumVirtRegs());
 | 
				
			||||||
 | 
					  VRegUses.setUniverse(MRI.getNumVirtRegs());
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Model data dependencies between instructions being scheduled and the
 | 
					  // Model data dependencies between instructions being scheduled and the
 | 
				
			||||||
  // ExitSU.
 | 
					  // ExitSU.
 | 
				
			||||||
| 
						 | 
					@ -899,7 +808,6 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
 | 
				
			||||||
      RPTracker->recede(/*LiveUses=*/nullptr, PDiff);
 | 
					      RPTracker->recede(/*LiveUses=*/nullptr, PDiff);
 | 
				
			||||||
      assert(RPTracker->getPos() == std::prev(MII) &&
 | 
					      assert(RPTracker->getPos() == std::prev(MII) &&
 | 
				
			||||||
             "RPTracker can't find MI");
 | 
					             "RPTracker can't find MI");
 | 
				
			||||||
      collectVRegUses(SU);
 | 
					 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    assert(
 | 
					    assert(
 | 
				
			||||||
| 
						 | 
					@ -1149,8 +1057,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA,
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  Defs.clear();
 | 
					  Defs.clear();
 | 
				
			||||||
  Uses.clear();
 | 
					  Uses.clear();
 | 
				
			||||||
  CurrentVRegDefs.clear();
 | 
					  VRegDefs.clear();
 | 
				
			||||||
  CurrentVRegUses.clear();
 | 
					 | 
				
			||||||
  PendingLoads.clear();
 | 
					  PendingLoads.clear();
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -6,7 +6,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}width_2d:
 | 
					; FUNC-LABEL: {{^}}width_2d:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV * [[VAL]], KC0[2].Z
 | 
					; EG: MOV [[VAL]], KC0[2].Z
 | 
				
			||||||
define void @width_2d (%opencl.image2d_t addrspace(1)* %in,
 | 
					define void @width_2d (%opencl.image2d_t addrspace(1)* %in,
 | 
				
			||||||
                       i32 addrspace(1)* %out) {
 | 
					                       i32 addrspace(1)* %out) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -19,7 +19,7 @@ entry:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}width_3d:
 | 
					; FUNC-LABEL: {{^}}width_3d:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV * [[VAL]], KC0[2].Z
 | 
					; EG: MOV [[VAL]], KC0[2].Z
 | 
				
			||||||
define void @width_3d (%opencl.image3d_t addrspace(1)* %in,
 | 
					define void @width_3d (%opencl.image3d_t addrspace(1)* %in,
 | 
				
			||||||
                       i32 addrspace(1)* %out) {
 | 
					                       i32 addrspace(1)* %out) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -36,7 +36,7 @@ entry:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}height_2d:
 | 
					; FUNC-LABEL: {{^}}height_2d:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV * [[VAL]], KC0[2].W
 | 
					; EG: MOV [[VAL]], KC0[2].W
 | 
				
			||||||
define void @height_2d (%opencl.image2d_t addrspace(1)* %in,
 | 
					define void @height_2d (%opencl.image2d_t addrspace(1)* %in,
 | 
				
			||||||
                        i32 addrspace(1)* %out) {
 | 
					                        i32 addrspace(1)* %out) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -49,7 +49,7 @@ entry:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}height_3d:
 | 
					; FUNC-LABEL: {{^}}height_3d:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV * [[VAL]], KC0[2].W
 | 
					; EG: MOV [[VAL]], KC0[2].W
 | 
				
			||||||
define void @height_3d (%opencl.image3d_t addrspace(1)* %in,
 | 
					define void @height_3d (%opencl.image3d_t addrspace(1)* %in,
 | 
				
			||||||
                        i32 addrspace(1)* %out) {
 | 
					                        i32 addrspace(1)* %out) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -66,7 +66,7 @@ entry:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}depth_3d:
 | 
					; FUNC-LABEL: {{^}}depth_3d:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV * [[VAL]], KC0[3].X
 | 
					; EG: MOV [[VAL]], KC0[3].X
 | 
				
			||||||
define void @depth_3d (%opencl.image3d_t addrspace(1)* %in,
 | 
					define void @depth_3d (%opencl.image3d_t addrspace(1)* %in,
 | 
				
			||||||
                       i32 addrspace(1)* %out) {
 | 
					                       i32 addrspace(1)* %out) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -83,7 +83,7 @@ entry:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}data_type_2d:
 | 
					; FUNC-LABEL: {{^}}data_type_2d:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV * [[VAL]], KC0[3].Y
 | 
					; EG: MOV [[VAL]], KC0[3].Y
 | 
				
			||||||
define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in,
 | 
					define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in,
 | 
				
			||||||
                           i32 addrspace(1)* %out) {
 | 
					                           i32 addrspace(1)* %out) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -96,7 +96,7 @@ entry:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}data_type_3d:
 | 
					; FUNC-LABEL: {{^}}data_type_3d:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV * [[VAL]], KC0[3].Y
 | 
					; EG: MOV [[VAL]], KC0[3].Y
 | 
				
			||||||
define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in,
 | 
					define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in,
 | 
				
			||||||
                                     i32 addrspace(1)* %out) {
 | 
					                                     i32 addrspace(1)* %out) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -113,7 +113,7 @@ entry:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}channel_order_2d:
 | 
					; FUNC-LABEL: {{^}}channel_order_2d:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV * [[VAL]], KC0[3].Z
 | 
					; EG: MOV [[VAL]], KC0[3].Z
 | 
				
			||||||
define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in,
 | 
					define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in,
 | 
				
			||||||
                               i32 addrspace(1)* %out) {
 | 
					                               i32 addrspace(1)* %out) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -126,7 +126,7 @@ entry:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}channel_order_3d:
 | 
					; FUNC-LABEL: {{^}}channel_order_3d:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV * [[VAL]], KC0[3].Z
 | 
					; EG: MOV [[VAL]], KC0[3].Z
 | 
				
			||||||
define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in,
 | 
					define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in,
 | 
				
			||||||
                                         i32 addrspace(1)* %out) {
 | 
					                                         i32 addrspace(1)* %out) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -145,7 +145,7 @@ entry:
 | 
				
			||||||
;
 | 
					;
 | 
				
			||||||
; FUNC-LABEL: {{^}}image_arg_2nd:
 | 
					; FUNC-LABEL: {{^}}image_arg_2nd:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV * [[VAL]], KC0[4].Z
 | 
					; EG: MOV [[VAL]], KC0[4].Z
 | 
				
			||||||
define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1,
 | 
					define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1,
 | 
				
			||||||
                            i32 %x,
 | 
					                            i32 %x,
 | 
				
			||||||
                            %opencl.image2d_t addrspace(1)* %in2,
 | 
					                            %opencl.image2d_t addrspace(1)* %in2,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -7,8 +7,8 @@
 | 
				
			||||||
; ADD_INT literal.x KC0[2].Z, 5
 | 
					; ADD_INT literal.x KC0[2].Z, 5
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: {{^}}i32_literal:
 | 
					; CHECK: {{^}}i32_literal:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: ADD_INT {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
 | 
				
			||||||
; CHECK-NEXT: ADD_INT * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 5
 | 
					; CHECK-NEXT: 5
 | 
				
			||||||
define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
 | 
					define void @i32_literal(i32 addrspace(1)* %out, i32 %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -24,8 +24,8 @@ entry:
 | 
				
			||||||
; ADD literal.x KC0[2].Z, 5.0
 | 
					; ADD literal.x KC0[2].Z, 5.0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: {{^}}float_literal:
 | 
					; CHECK: {{^}}float_literal:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
 | 
				
			||||||
; CHECK-NEXT: ADD * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 1084227584(5.0
 | 
					; CHECK-NEXT: 1084227584(5.0
 | 
				
			||||||
define void @float_literal(float addrspace(1)* %out, float %in) {
 | 
					define void @float_literal(float addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -4,7 +4,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}read_workdim:
 | 
					; FUNC-LABEL: {{^}}read_workdim:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV * [[VAL]], KC0[2].Z
 | 
					; EG: MOV [[VAL]], KC0[2].Z
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
 | 
					; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb
 | 
				
			||||||
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
 | 
					; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,7 +3,7 @@
 | 
				
			||||||
; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s
 | 
					; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; R600: {{^}}amdgpu_trunc:
 | 
					; R600: {{^}}amdgpu_trunc:
 | 
				
			||||||
; R600: TRUNC {{\*? *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 | 
					; R600: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z
 | 
				
			||||||
; SI: {{^}}amdgpu_trunc:
 | 
					; SI: {{^}}amdgpu_trunc:
 | 
				
			||||||
; SI: v_trunc_f32
 | 
					; SI: v_trunc_f32
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,7 +5,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}local_size_x:
 | 
					; FUNC-LABEL: {{^}}local_size_x:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV * [[VAL]], KC0[1].Z
 | 
					; EG: MOV [[VAL]], KC0[1].Z
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
 | 
					; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
 | 
				
			||||||
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
 | 
					; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
 | 
				
			||||||
| 
						 | 
					@ -23,7 +23,7 @@ entry:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}local_size_y:
 | 
					; FUNC-LABEL: {{^}}local_size_y:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV * [[VAL]], KC0[1].W
 | 
					; EG: MOV [[VAL]], KC0[1].W
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
 | 
					; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
 | 
				
			||||||
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
 | 
					; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
 | 
				
			||||||
| 
						 | 
					@ -38,7 +38,7 @@ entry:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}local_size_z:
 | 
					; FUNC-LABEL: {{^}}local_size_z:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV * [[VAL]], KC0[2].X
 | 
					; EG: MOV [[VAL]], KC0[2].X
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
 | 
					; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
 | 
				
			||||||
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
 | 
					; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -153,7 +153,7 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) {
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}or_i1:
 | 
					; FUNC-LABEL: {{^}}or_i1:
 | 
				
			||||||
; EG: OR_INT * {{\** *}}T{{[0-9]+\.[XYZW], PS, PV\.[XYZW]}}
 | 
					; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}]
 | 
					; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}]
 | 
				
			||||||
define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
 | 
					define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) {
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -5,8 +5,8 @@
 | 
				
			||||||
; SET*DX10 instructions.
 | 
					; SET*DX10 instructions.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: {{^}}fcmp_une_select_fptosi:
 | 
					; CHECK: {{^}}fcmp_une_select_fptosi:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 | 
				
			||||||
; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
					; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
				
			||||||
define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
 | 
					define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -19,8 +19,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: {{^}}fcmp_une_select_i32:
 | 
					; CHECK: {{^}}fcmp_une_select_i32:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 | 
				
			||||||
; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
					; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
				
			||||||
define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
 | 
					define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -31,8 +31,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: {{^}}fcmp_oeq_select_fptosi:
 | 
					; CHECK: {{^}}fcmp_oeq_select_fptosi:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 | 
				
			||||||
; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
					; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
				
			||||||
define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
 | 
					define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -45,8 +45,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: {{^}}fcmp_oeq_select_i32:
 | 
					; CHECK: {{^}}fcmp_oeq_select_i32:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 | 
				
			||||||
; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
					; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
				
			||||||
define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
 | 
					define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -57,8 +57,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: {{^}}fcmp_ogt_select_fptosi:
 | 
					; CHECK: {{^}}fcmp_ogt_select_fptosi:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 | 
				
			||||||
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
					; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
				
			||||||
define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
 | 
					define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -71,8 +71,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: {{^}}fcmp_ogt_select_i32:
 | 
					; CHECK: {{^}}fcmp_ogt_select_i32:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 | 
				
			||||||
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
					; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
				
			||||||
define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
 | 
					define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -83,8 +83,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: {{^}}fcmp_oge_select_fptosi:
 | 
					; CHECK: {{^}}fcmp_oge_select_fptosi:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 | 
				
			||||||
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
					; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
				
			||||||
define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
 | 
					define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -97,8 +97,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: {{^}}fcmp_oge_select_i32:
 | 
					; CHECK: {{^}}fcmp_oge_select_i32:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x,
 | 
				
			||||||
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y,
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
					; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
				
			||||||
define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
 | 
					define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -109,8 +109,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: {{^}}fcmp_ole_select_fptosi:
 | 
					; CHECK: {{^}}fcmp_ole_select_fptosi:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
 | 
				
			||||||
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
					; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
				
			||||||
define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
 | 
					define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -123,8 +123,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: {{^}}fcmp_ole_select_i32:
 | 
					; CHECK: {{^}}fcmp_ole_select_i32:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
 | 
				
			||||||
; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
					; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
				
			||||||
define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
 | 
					define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -135,8 +135,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: {{^}}fcmp_olt_select_fptosi:
 | 
					; CHECK: {{^}}fcmp_olt_select_fptosi:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
 | 
				
			||||||
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
					; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
				
			||||||
define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
 | 
					define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -149,8 +149,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK: {{^}}fcmp_olt_select_i32:
 | 
					; CHECK: {{^}}fcmp_olt_select_i32:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z,
 | 
				
			||||||
; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z,
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
					; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
				
			||||||
define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
 | 
					define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -12,8 +12,8 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone
 | 
				
			||||||
; SI: buffer_store_dword [[EXTRACT]],
 | 
					; SI: buffer_store_dword [[EXTRACT]],
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
 | 
					; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]]
 | 
				
			||||||
; EG: LSHR * [[ADDR]]
 | 
					; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1
 | 
				
			||||||
; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1
 | 
					; EG-NEXT: LSHR * [[ADDR]]
 | 
				
			||||||
define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
 | 
					define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) {
 | 
				
			||||||
  %shl = shl i32 %in, 31
 | 
					  %shl = shl i32 %in, 31
 | 
				
			||||||
  %sext = ashr i32 %shl, 31
 | 
					  %sext = ashr i32 %shl, 31
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -53,14 +53,14 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
 | 
				
			||||||
  ret void
 | 
					  ret void
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;EG-LABEL: {{^}}shl_i64:
 | 
					;EG: {{^}}shl_i64:
 | 
				
			||||||
;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
 | 
					;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
 | 
				
			||||||
;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
 | 
					;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
 | 
				
			||||||
;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
 | 
					;EG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
 | 
				
			||||||
;EG-DAG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
 | 
					;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
 | 
				
			||||||
;EG-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]]
 | 
					;EG-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]]
 | 
				
			||||||
;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}}
 | 
					;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
 | 
				
			||||||
;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]|PV.[XYZW]}}
 | 
					;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]}}
 | 
				
			||||||
;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
 | 
					;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
 | 
				
			||||||
;EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
 | 
					;EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
 | 
				
			||||||
;EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
 | 
					;EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
 | 
				
			||||||
| 
						 | 
					@ -80,7 +80,7 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
 | 
				
			||||||
  ret void
 | 
					  ret void
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
;EG-LABEL: {{^}}shl_v2i64:
 | 
					;EG: {{^}}shl_v2i64:
 | 
				
			||||||
;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
 | 
					;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]]
 | 
				
			||||||
;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
 | 
					;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]]
 | 
				
			||||||
;EG-DAG: LSHR {{\*? *}}[[COMPSHA]]
 | 
					;EG-DAG: LSHR {{\*? *}}[[COMPSHA]]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -70,11 +70,11 @@ entry:
 | 
				
			||||||
;EG-LABEL: {{^}}ashr_i64_2:
 | 
					;EG-LABEL: {{^}}ashr_i64_2:
 | 
				
			||||||
;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
 | 
					;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
 | 
				
			||||||
;EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
 | 
					;EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
 | 
				
			||||||
;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
 | 
					;EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
 | 
				
			||||||
;EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
 | 
					;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
 | 
				
			||||||
;EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
 | 
					;EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
 | 
				
			||||||
;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}}
 | 
					;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
 | 
				
			||||||
;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|PV.[XYZW]|[[SHIFT]]}}
 | 
					;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
 | 
				
			||||||
;EG-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
 | 
					;EG-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
 | 
				
			||||||
;EG-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
 | 
					;EG-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal
 | 
				
			||||||
;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
 | 
					;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -65,14 +65,14 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
 | 
					; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]]
 | 
				
			||||||
; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
 | 
					; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}}
 | 
				
			||||||
 | 
					; EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
 | 
				
			||||||
; EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
 | 
					; EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
 | 
				
			||||||
; EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1
 | 
					 | 
				
			||||||
; EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
 | 
					; EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]]
 | 
				
			||||||
; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}}
 | 
					; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}}
 | 
				
			||||||
; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]|PV\.[XYZW]}}
 | 
					; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
 | 
				
			||||||
 | 
					; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}}
 | 
				
			||||||
; EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
 | 
					; EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal
 | 
				
			||||||
; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]|PS}}
 | 
					; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}}
 | 
				
			||||||
; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], [[SHIFT]]
 | 
					 | 
				
			||||||
; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
 | 
					; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0
 | 
				
			||||||
define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
 | 
					define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) {
 | 
				
			||||||
  %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
 | 
					  %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -3,8 +3,8 @@
 | 
				
			||||||
; These tests are for condition codes that are not supported by the hardware
 | 
					; These tests are for condition codes that are not supported by the hardware
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK-LABEL: {{^}}slt:
 | 
					; CHECK-LABEL: {{^}}slt:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
 | 
				
			||||||
; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 5(7.006492e-45)
 | 
					; CHECK-NEXT: 5(7.006492e-45)
 | 
				
			||||||
define void @slt(i32 addrspace(1)* %out, i32 %in) {
 | 
					define void @slt(i32 addrspace(1)* %out, i32 %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -15,8 +15,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK-LABEL: {{^}}ult_i32:
 | 
					; CHECK-LABEL: {{^}}ult_i32:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
 | 
				
			||||||
; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 5(7.006492e-45)
 | 
					; CHECK-NEXT: 5(7.006492e-45)
 | 
				
			||||||
define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
 | 
					define void @ult_i32(i32 addrspace(1)* %out, i32 %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -40,8 +40,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK-LABEL: {{^}}ult_float_native:
 | 
					; CHECK-LABEL: {{^}}ult_float_native:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
 | 
				
			||||||
; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}}
 | 
					; CHECK-NEXT: LSHR *
 | 
				
			||||||
; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
					; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
				
			||||||
define void @ult_float_native(float addrspace(1)* %out, float %in) {
 | 
					define void @ult_float_native(float addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -52,8 +52,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK-LABEL: {{^}}olt:
 | 
					; CHECK-LABEL: {{^}}olt:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
 | 
				
			||||||
; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
 | 
					; CHECK-NEXT: LSHR *
 | 
				
			||||||
; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
					; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
				
			||||||
define void @olt(float addrspace(1)* %out, float %in) {
 | 
					define void @olt(float addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -64,8 +64,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK-LABEL: {{^}}sle:
 | 
					; CHECK-LABEL: {{^}}sle:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
 | 
				
			||||||
; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 6(8.407791e-45)
 | 
					; CHECK-NEXT: 6(8.407791e-45)
 | 
				
			||||||
define void @sle(i32 addrspace(1)* %out, i32 %in) {
 | 
					define void @sle(i32 addrspace(1)* %out, i32 %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -76,8 +76,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK-LABEL: {{^}}ule_i32:
 | 
					; CHECK-LABEL: {{^}}ule_i32:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z
 | 
				
			||||||
; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
 | 
					; CHECK-NEXT: LSHR
 | 
				
			||||||
; CHECK-NEXT: 6(8.407791e-45)
 | 
					; CHECK-NEXT: 6(8.407791e-45)
 | 
				
			||||||
define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
 | 
					define void @ule_i32(i32 addrspace(1)* %out, i32 %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -101,8 +101,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK-LABEL: {{^}}ule_float_native:
 | 
					; CHECK-LABEL: {{^}}ule_float_native:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x
 | 
				
			||||||
; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}}
 | 
					; CHECK-NEXT: LSHR *
 | 
				
			||||||
; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
					; CHECK-NEXT: 1084227584(5.000000e+00)
 | 
				
			||||||
define void @ule_float_native(float addrspace(1)* %out, float %in) {
 | 
					define void @ule_float_native(float addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					@ -113,8 +113,8 @@ entry:
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; CHECK-LABEL: {{^}}ole:
 | 
					; CHECK-LABEL: {{^}}ole:
 | 
				
			||||||
; CHECK: LSHR
 | 
					; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z
 | 
				
			||||||
; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z
 | 
					; CHECK-NEXT: LSHR *
 | 
				
			||||||
; CHECK-NEXT:1084227584(5.000000e+00)
 | 
					; CHECK-NEXT:1084227584(5.000000e+00)
 | 
				
			||||||
define void @ole(float addrspace(1)* %out, float %in) {
 | 
					define void @ole(float addrspace(1)* %out, float %in) {
 | 
				
			||||||
entry:
 | 
					entry:
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -7,7 +7,7 @@
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}ngroups_x:
 | 
					; FUNC-LABEL: {{^}}ngroups_x:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
 | 
					; EG: MOV [[VAL]], KC0[0].X
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; HSA: .amd_kernel_code_t
 | 
					; HSA: .amd_kernel_code_t
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -38,7 +38,7 @@ entry:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}ngroups_y:
 | 
					; FUNC-LABEL: {{^}}ngroups_y:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
 | 
					; EG: MOV [[VAL]], KC0[0].Y
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
 | 
					; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
 | 
				
			||||||
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
 | 
					; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
 | 
				
			||||||
| 
						 | 
					@ -53,7 +53,7 @@ entry:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}ngroups_z:
 | 
					; FUNC-LABEL: {{^}}ngroups_z:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
 | 
					; EG: MOV [[VAL]], KC0[0].Z
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
 | 
					; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
 | 
				
			||||||
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
 | 
					; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
 | 
				
			||||||
| 
						 | 
					@ -68,7 +68,7 @@ entry:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}global_size_x:
 | 
					; FUNC-LABEL: {{^}}global_size_x:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
 | 
					; EG: MOV [[VAL]], KC0[0].W
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
 | 
					; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
 | 
				
			||||||
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
 | 
					; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
 | 
				
			||||||
| 
						 | 
					@ -83,7 +83,7 @@ entry:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}global_size_y:
 | 
					; FUNC-LABEL: {{^}}global_size_y:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
 | 
					; EG: MOV [[VAL]], KC0[1].X
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
 | 
					; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
 | 
				
			||||||
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
 | 
					; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
 | 
				
			||||||
| 
						 | 
					@ -98,7 +98,7 @@ entry:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}global_size_z:
 | 
					; FUNC-LABEL: {{^}}global_size_z:
 | 
				
			||||||
; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
					; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
 | 
				
			||||||
; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
 | 
					; EG: MOV [[VAL]], KC0[1].Y
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
 | 
					; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
 | 
				
			||||||
; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
 | 
					; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -38,7 +38,7 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; FUNC-LABEL: {{^}}xor_i1:
 | 
					; FUNC-LABEL: {{^}}xor_i1:
 | 
				
			||||||
; EG: XOR_INT {{\** *}}{{T[0-9]+\.[XYZW]}}, {{PS|PV\.[XYZW]}}, {{PS|PV\.[XYZW]}}
 | 
					; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
; SI-DAG: v_cmp_le_f32_e32 [[CMP0:vcc]], 0, {{v[0-9]+}}
 | 
					; SI-DAG: v_cmp_le_f32_e32 [[CMP0:vcc]], 0, {{v[0-9]+}}
 | 
				
			||||||
; SI-DAG: v_cmp_le_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 1.0, {{v[0-9]+}}
 | 
					; SI-DAG: v_cmp_le_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 1.0, {{v[0-9]+}}
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue